[ovs-dev] [ovs-dev, 03/17] dpif-netdev: Don't try to output on a device without txqs.

Daniele Di Proietto diproiettod at vmware.com
Mon Nov 21 22:00:17 UTC 2016






On 21/11/2016 05:07, "Ilya Maximets" <i.maximets at samsung.com> wrote:

>On 16.11.2016 03:45, Daniele Di Proietto wrote:
>> Tunnel devices have 0 txqs and don't support netdev_send().  While
>> netdev_send() simply returns EOPNOTSUPP, the XPS logic is still executed
>> on output, and that might be confused by devices with no txqs.
>> 
>> It seems better to have different structures in the fast path for ports
>> that support netdev_{push,pop}_header (tunnel devices), and ports that
>> support netdev_send.  With this we can also remove a branch in
>> netdev_send().
>> 
>> This is also necessary for a future commit, which starts DPDK devices
>> without txqs.
>> 
>> Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
>> ---
>>  lib/dpif-netdev.c | 72 ++++++++++++++++++++++++++++++++++++++++---------------
>>  lib/netdev.c      | 15 ++++++++----
>>  lib/netdev.h      |  1 +
>>  3 files changed, 64 insertions(+), 24 deletions(-)
>> 
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 7b67b42..81366b2 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -422,7 +422,8 @@ struct rxq_poll {
>>      struct ovs_list node;
>>  };
>>  
>> -/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
>> +/* Contained by struct dp_netdev_pmd_thread's 'send_port_cache',
>> + * 'tnl_port_cache' or 'tx_ports'. */
>>  struct tx_port {
>>      struct dp_netdev_port *port;
>>      int qid;
>> @@ -504,11 +505,19 @@ struct dp_netdev_pmd_thread {
>>       * read by the pmd thread. */
>>      struct hmap tx_ports OVS_GUARDED;
>>  
>> -    /* Map of 'tx_port' used in the fast path. This is a thread-local copy of
>> -     * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
>> -     * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
>> -     * Every other instance will only be accessed by its own pmd thread. */
>> -    struct hmap port_cache;
>> +    /* These are thread-local copies of 'tx_ports'.  One contains only tunnel
>> +     * ports (that support push_tunnel/pop_tunnel)  The other contains
>> +     * non-tunnel ports (that support send).
>> +     *
>> +     * These are kept separate to make sure that we don't try to execute
>> +     * OUTPUT on a tunnel device (which has 0 txqs) or PUSH/POP on a non-tunnel
>> +     * device.
>> +     *
>> +     * The instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
>> +     * threads, and thusly needs to be protected by 'non_pmd_mutex'.  Every
>> +     * other instance will only be accessed by its own pmd thread. */
>> +    struct hmap tnl_port_cache;
>> +    struct hmap send_port_cache;
>>  
>>      /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>>       * The main thread keeps 'stats_zero' and 'cycles_zero' as base
>> @@ -3055,7 +3064,10 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
>>      /* Free all used tx queue ids. */
>>      dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
>>  
>> -    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->port_cache) {
>> +    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) {
>> +        free(tx_port_cached);
>> +    }
>> +    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->send_port_cache) {
>>          free(tx_port_cached);
>>      }
>>  }
>> @@ -3069,11 +3081,22 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
>>      struct tx_port *tx_port, *tx_port_cached;
>>  
>>      pmd_free_cached_ports(pmd);
>> -    hmap_shrink(&pmd->port_cache);
>> +    hmap_shrink(&pmd->send_port_cache);
>> +    hmap_shrink(&pmd->tnl_port_cache);
>>  
>>      HMAP_FOR_EACH (tx_port, node, &pmd->tx_ports) {
>> +        struct hmap *cache;
>> +
>> +        if (netdev_has_tunnel_push_pop(tx_port->port->netdev)) {
>> +            cache = &pmd->tnl_port_cache;
>> +        } else if (netdev_n_txq(tx_port->port->netdev)) {
>> +            cache = &pmd->send_port_cache;
>> +        } else {
>> +            continue;
>> +        }
>
>IMHO, this code introduces artificial limitations for netdev.
>What about something like this:
>
>        if (has_pop _OR_ has_push) {
>            insert to 'tnl_port_cache';
>        }
>
>	if (netdev_n_txq(tx_port->port->netdev)) {
>            insert to 'send_port_cache';
>        }
>?
>i.e make all the checks independent.
>
>Otherwise, it must be described in 'netdev-provider.h' that netdev
>can have only tunnel related functions (both 'push' and 'pop') or
>send function.

I was more leaning towards documenting the limitation (currently we have
no use case for a netdev that support both), but making the checks
independent is simpler and it avoids one assumption on the netdev provider,
so I went with that.

I also realized that the checks in netdev_pop_header() and
netdev_push_header() are not necessary, so I removed them.

Thanks

>
>> +
>>          tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached);
>> -        hmap_insert(&pmd->port_cache, &tx_port_cached->node,
>> +        hmap_insert(cache, &tx_port_cached->node,
>>                      hash_port_no(tx_port_cached->port->port_no));
>>      }
>>  }
>> @@ -3309,7 +3332,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>>      pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
>>      ovs_list_init(&pmd->poll_list);
>>      hmap_init(&pmd->tx_ports);
>> -    hmap_init(&pmd->port_cache);
>> +    hmap_init(&pmd->tnl_port_cache);
>> +    hmap_init(&pmd->send_port_cache);
>>      /* init the 'flow_cache' since there is no
>>       * actual thread created for NON_PMD_CORE_ID. */
>>      if (core_id == NON_PMD_CORE_ID) {
>> @@ -3325,7 +3349,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
>>      struct dpcls *cls;
>>  
>>      dp_netdev_pmd_flow_flush(pmd);
>> -    hmap_destroy(&pmd->port_cache);
>> +    hmap_destroy(&pmd->send_port_cache);
>> +    hmap_destroy(&pmd->tnl_port_cache);
>>      hmap_destroy(&pmd->tx_ports);
>>      /* All flows (including their dpcls_rules) have been deleted already */
>>      CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
>> @@ -3592,7 +3617,9 @@ static void
>>  dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
>>                               struct dp_netdev_port *port)
>>  {
>> -    struct tx_port *tx = xzalloc(sizeof *tx);
>> +    struct tx_port *tx;
>> +
>> +    tx = xzalloc(sizeof *tx);
>>  
>>      tx->port = port;
>>      tx->qid = -1;
>> @@ -4280,7 +4307,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
>>      struct dp_netdev_port *port;
>>      long long interval;
>>  
>> -    HMAP_FOR_EACH (tx, node, &pmd->port_cache) {
>> +    HMAP_FOR_EACH (tx, node, &pmd->send_port_cache) {
>>          if (!tx->port->dynamic_txqs) {
>>              continue;
>>          }
>> @@ -4344,10 +4371,17 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
>>  }
>>  
>>  static struct tx_port *
>> -pmd_tx_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> -                         odp_port_t port_no)
>> +pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> +                          odp_port_t port_no)
>> +{
>> +    return tx_port_lookup(&pmd->tnl_port_cache, port_no);
>> +}
>> +
>> +static struct tx_port *
>> +pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> +                           odp_port_t port_no)
>>  {
>> -    return tx_port_lookup(&pmd->port_cache, port_no);
>> +    return tx_port_lookup(&pmd->send_port_cache, port_no);
>>  }
>>  
>>  static int
>> @@ -4361,7 +4395,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
>>  
>>      data = nl_attr_get(attr);
>>  
>> -    tun_port = pmd_tx_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
>> +    tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
>>      if (!tun_port) {
>>          err = -EINVAL;
>>          goto error;
>> @@ -4413,7 +4447,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>>  
>>      switch ((enum ovs_action_attr)type) {
>>      case OVS_ACTION_ATTR_OUTPUT:
>> -        p = pmd_tx_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>> +        p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>>          if (OVS_LIKELY(p)) {
>>              int tx_qid;
>>              bool dynamic_txqs;
>> @@ -4460,7 +4494,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>>              struct dp_packet_batch *orig_packets_ = packets_;
>>              odp_port_t portno = nl_attr_get_odp_port(a);
>>  
>> -            p = pmd_tx_port_cache_lookup(pmd, portno);
>> +            p = pmd_tnl_port_cache_lookup(pmd, portno);
>>              if (p) {
>>                  struct dp_packet_batch tnl_pkt;
>>                  int i;
>> diff --git a/lib/netdev.c b/lib/netdev.c
>> index ad90ef6..9ab4e4c 100644
>> --- a/lib/netdev.c
>> +++ b/lib/netdev.c
>> @@ -117,6 +117,13 @@ netdev_is_pmd(const struct netdev *netdev)
>>      return netdev->netdev_class->is_pmd;
>>  }
>>  
>> +bool
>> +netdev_has_tunnel_push_pop(const struct netdev *netdev)
>> +{
>> +    return netdev->netdev_class->push_header
>> +           && netdev->netdev_class->pop_header;
>> +}
>> +
>>  static void
>>  netdev_initialize(void)
>>      OVS_EXCLUDED(netdev_mutex)
>> @@ -686,6 +693,9 @@ netdev_set_tx_multiq(struct netdev *netdev, unsigned int n_txq)
>>   * if a partial packet was transmitted or if a packet is too big or too small
>>   * to transmit on the device.
>>   *
>> + * The caller must make sure that 'netdev' supports sending by making sure that
>> + * 'netdev_n_txq(netdev)' returns >= 1.
>> + *
>>   * If the function returns a non-zero value, some of the packets might have
>>   * been sent anyway.
>>   *
>> @@ -710,11 +720,6 @@ int
>>  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
>>              bool may_steal, bool concurrent_txq)
>>  {
>> -    if (!netdev->netdev_class->send) {
>> -        dp_packet_delete_batch(batch, may_steal);
>> -        return EOPNOTSUPP;
>> -    }
>> -
>>      int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
>>                                             concurrent_txq);
>>      if (!error) {
>> diff --git a/lib/netdev.h b/lib/netdev.h
>> index bad28c4..03059ca 100644
>> --- a/lib/netdev.h
>> +++ b/lib/netdev.h
>> @@ -109,6 +109,7 @@ bool netdev_is_reserved_name(const char *name);
>>  int netdev_n_txq(const struct netdev *netdev);
>>  int netdev_n_rxq(const struct netdev *netdev);
>>  bool netdev_is_pmd(const struct netdev *netdev);
>> +bool netdev_has_tunnel_push_pop(const struct netdev *netdev);
>>  
>>  /* Open and close. */
>>  int netdev_open(const char *name, const char *type, struct netdev **netdevp);
>> 


More information about the dev mailing list