[ovs-dev] [ovs-dev, 03/17] dpif-netdev: Don't try to output on a device without txqs.

Ilya Maximets i.maximets at samsung.com
Mon Nov 21 13:07:10 UTC 2016


On 16.11.2016 03:45, Daniele Di Proietto wrote:
> Tunnel devices have 0 txqs and don't support netdev_send().  While
> netdev_send() simply returns EOPNOTSUPP, the XPS logic is still executed
> on output, and that might be confused by devices with no txqs.
> 
> It seems better to have different structures in the fast path for ports
> that support netdev_{push,pop}_header (tunnel devices), and ports that
> support netdev_send.  With this we can also remove a branch in
> netdev_send().
> 
> This is also necessary for a future commit, which starts DPDK devices
> without txqs.
> 
> Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
> ---
>  lib/dpif-netdev.c | 72 ++++++++++++++++++++++++++++++++++++++++---------------
>  lib/netdev.c      | 15 ++++++++----
>  lib/netdev.h      |  1 +
>  3 files changed, 64 insertions(+), 24 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 7b67b42..81366b2 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -422,7 +422,8 @@ struct rxq_poll {
>      struct ovs_list node;
>  };
>  
> -/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
> +/* Contained by struct dp_netdev_pmd_thread's 'send_port_cache',
> + * 'tnl_port_cache' or 'tx_ports'. */
>  struct tx_port {
>      struct dp_netdev_port *port;
>      int qid;
> @@ -504,11 +505,19 @@ struct dp_netdev_pmd_thread {
>       * read by the pmd thread. */
>      struct hmap tx_ports OVS_GUARDED;
>  
> -    /* Map of 'tx_port' used in the fast path. This is a thread-local copy of
> -     * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
> -     * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
> -     * Every other instance will only be accessed by its own pmd thread. */
> -    struct hmap port_cache;
> +    /* These are thread-local copies of 'tx_ports'.  One contains only tunnel
> +     * ports (that support push_tunnel/pop_tunnel)  The other contains
> +     * non-tunnel ports (that support send).
> +     *
> +     * These are kept separate to make sure that we don't try to execute
> +     * OUTPUT on a tunnel device (which has 0 txqs) or PUSH/POP on a non-tunnel
> +     * device.
> +     *
> +     * The instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
> +     * threads, and thusly needs to be protected by 'non_pmd_mutex'.  Every
> +     * other instance will only be accessed by its own pmd thread. */
> +    struct hmap tnl_port_cache;
> +    struct hmap send_port_cache;
>  
>      /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>       * The main thread keeps 'stats_zero' and 'cycles_zero' as base
> @@ -3055,7 +3064,10 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
>      /* Free all used tx queue ids. */
>      dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
>  
> -    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->port_cache) {
> +    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) {
> +        free(tx_port_cached);
> +    }
> +    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->send_port_cache) {
>          free(tx_port_cached);
>      }
>  }
> @@ -3069,11 +3081,22 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
>      struct tx_port *tx_port, *tx_port_cached;
>  
>      pmd_free_cached_ports(pmd);
> -    hmap_shrink(&pmd->port_cache);
> +    hmap_shrink(&pmd->send_port_cache);
> +    hmap_shrink(&pmd->tnl_port_cache);
>  
>      HMAP_FOR_EACH (tx_port, node, &pmd->tx_ports) {
> +        struct hmap *cache;
> +
> +        if (netdev_has_tunnel_push_pop(tx_port->port->netdev)) {
> +            cache = &pmd->tnl_port_cache;
> +        } else if (netdev_n_txq(tx_port->port->netdev)) {
> +            cache = &pmd->send_port_cache;
> +        } else {
> +            continue;
> +        }

IMHO, this code introduces artificial limitations for netdev.
What about something like this:

        if (has_pop _OR_ has_push) {
            insert to 'tnl_port_cache';
        }

	if (netdev_n_txq(tx_port->port->netdev)) {
            insert to 'send_port_cache';
        }
?
i.e make all the checks independent.

Otherwise, it must be described in 'netdev-provider.h' that netdev
can have only tunnel related functions (both 'push' and 'pop') or
send function.

> +
>          tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached);
> -        hmap_insert(&pmd->port_cache, &tx_port_cached->node,
> +        hmap_insert(cache, &tx_port_cached->node,
>                      hash_port_no(tx_port_cached->port->port_no));
>      }
>  }
> @@ -3309,7 +3332,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>      pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
>      ovs_list_init(&pmd->poll_list);
>      hmap_init(&pmd->tx_ports);
> -    hmap_init(&pmd->port_cache);
> +    hmap_init(&pmd->tnl_port_cache);
> +    hmap_init(&pmd->send_port_cache);
>      /* init the 'flow_cache' since there is no
>       * actual thread created for NON_PMD_CORE_ID. */
>      if (core_id == NON_PMD_CORE_ID) {
> @@ -3325,7 +3349,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
>      struct dpcls *cls;
>  
>      dp_netdev_pmd_flow_flush(pmd);
> -    hmap_destroy(&pmd->port_cache);
> +    hmap_destroy(&pmd->send_port_cache);
> +    hmap_destroy(&pmd->tnl_port_cache);
>      hmap_destroy(&pmd->tx_ports);
>      /* All flows (including their dpcls_rules) have been deleted already */
>      CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
> @@ -3592,7 +3617,9 @@ static void
>  dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
>                               struct dp_netdev_port *port)
>  {
> -    struct tx_port *tx = xzalloc(sizeof *tx);
> +    struct tx_port *tx;
> +
> +    tx = xzalloc(sizeof *tx);
>  
>      tx->port = port;
>      tx->qid = -1;
> @@ -4280,7 +4307,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
>      struct dp_netdev_port *port;
>      long long interval;
>  
> -    HMAP_FOR_EACH (tx, node, &pmd->port_cache) {
> +    HMAP_FOR_EACH (tx, node, &pmd->send_port_cache) {
>          if (!tx->port->dynamic_txqs) {
>              continue;
>          }
> @@ -4344,10 +4371,17 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
>  }
>  
>  static struct tx_port *
> -pmd_tx_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> -                         odp_port_t port_no)
> +pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> +                          odp_port_t port_no)
> +{
> +    return tx_port_lookup(&pmd->tnl_port_cache, port_no);
> +}
> +
> +static struct tx_port *
> +pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> +                           odp_port_t port_no)
>  {
> -    return tx_port_lookup(&pmd->port_cache, port_no);
> +    return tx_port_lookup(&pmd->send_port_cache, port_no);
>  }
>  
>  static int
> @@ -4361,7 +4395,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
>  
>      data = nl_attr_get(attr);
>  
> -    tun_port = pmd_tx_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
> +    tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
>      if (!tun_port) {
>          err = -EINVAL;
>          goto error;
> @@ -4413,7 +4447,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>  
>      switch ((enum ovs_action_attr)type) {
>      case OVS_ACTION_ATTR_OUTPUT:
> -        p = pmd_tx_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
> +        p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>          if (OVS_LIKELY(p)) {
>              int tx_qid;
>              bool dynamic_txqs;
> @@ -4460,7 +4494,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>              struct dp_packet_batch *orig_packets_ = packets_;
>              odp_port_t portno = nl_attr_get_odp_port(a);
>  
> -            p = pmd_tx_port_cache_lookup(pmd, portno);
> +            p = pmd_tnl_port_cache_lookup(pmd, portno);
>              if (p) {
>                  struct dp_packet_batch tnl_pkt;
>                  int i;
> diff --git a/lib/netdev.c b/lib/netdev.c
> index ad90ef6..9ab4e4c 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -117,6 +117,13 @@ netdev_is_pmd(const struct netdev *netdev)
>      return netdev->netdev_class->is_pmd;
>  }
>  
> +bool
> +netdev_has_tunnel_push_pop(const struct netdev *netdev)
> +{
> +    return netdev->netdev_class->push_header
> +           && netdev->netdev_class->pop_header;
> +}
> +
>  static void
>  netdev_initialize(void)
>      OVS_EXCLUDED(netdev_mutex)
> @@ -686,6 +693,9 @@ netdev_set_tx_multiq(struct netdev *netdev, unsigned int n_txq)
>   * if a partial packet was transmitted or if a packet is too big or too small
>   * to transmit on the device.
>   *
> + * The caller must make sure that 'netdev' supports sending by making sure that
> + * 'netdev_n_txq(netdev)' returns >= 1.
> + *
>   * If the function returns a non-zero value, some of the packets might have
>   * been sent anyway.
>   *
> @@ -710,11 +720,6 @@ int
>  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
>              bool may_steal, bool concurrent_txq)
>  {
> -    if (!netdev->netdev_class->send) {
> -        dp_packet_delete_batch(batch, may_steal);
> -        return EOPNOTSUPP;
> -    }
> -
>      int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
>                                             concurrent_txq);
>      if (!error) {
> diff --git a/lib/netdev.h b/lib/netdev.h
> index bad28c4..03059ca 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -109,6 +109,7 @@ bool netdev_is_reserved_name(const char *name);
>  int netdev_n_txq(const struct netdev *netdev);
>  int netdev_n_rxq(const struct netdev *netdev);
>  bool netdev_is_pmd(const struct netdev *netdev);
> +bool netdev_has_tunnel_push_pop(const struct netdev *netdev);
>  
>  /* Open and close. */
>  int netdev_open(const char *name, const char *type, struct netdev **netdevp);
> 


More information about the dev mailing list