[ovs-dev] [PATCH v2] dpif-netdev: proper tx queue id

Ilya Maximets i.maximets at samsung.com
Thu Aug 13 13:50:31 UTC 2015


ping.

On 06.08.2015 17:32, Ilya Maximets wrote:
> Currently tx_qid is equal to pmd->core_id. This leads to unexpected
> behavior if pmd-cpu-mask different from '/(0*)(1|3|7)?(f*)/',
> e.g. if core_ids are not sequential, or doesn't start from 0, or both.
> 
> Example:
> 	starting 2 pmd threads with 1 port, 2 rxqs per port,
> 	pmd-cpu-mask = 00000014 and let dev->real_n_txq = 2
> 
> 	It that case pmd_1->tx_qid = 2, pmd_2->tx_qid = 4 and
> 	txq_needs_locking = true (if device hasn't ovs_numa_get_n_cores()+1
> 	queues).
> 
> 	In that case, after truncating in netdev_dpdk_send__():
> 		'qid = qid % dev->real_n_txq;'
> 	pmd_1: qid = 2 % 2 = 0
> 	pmd_2: qid = 4 % 2 = 0
> 
> 	So, both threads will call dpdk_queue_pkts() with same qid = 0.
> 	This is unexpected behavior if there is 2 tx queues in device.
> 	Queue #1 will not be used and both threads will lock queue #0
> 	on each send.
> 
> Fix that by introducing per pmd thread hash map 'tx_queues', where will
> be stored all available tx queues for that pmd thread with
> port_no as a key(hash). All tx_qid-s will be unique per port and
> sequential to prevent described unexpected mapping after truncating.
> 
> Implemented infrastructure can be used in the future to choose
> between all tx queues available for that pmd thread.
> 
> Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
> ---
>  lib/dpif-netdev.c | 110 ++++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 91 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c144352..309994c 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -365,6 +365,13 @@ struct dp_netdev_pmd_cycles {
>      atomic_ullong n[PMD_N_CYCLES];
>  };
>  
> +struct dp_netdev_pmd_txq {
> +    struct cmap_node node;        /* In owning dp_netdev_pmd_thread's */
> +                                  /* 'tx_queues'. */
> +    struct dp_netdev_port *port;
> +    int tx_qid;
> +};
> +
>  /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
>   * the performance overhead of interrupt processing.  Therefore netdev can
>   * not implement rx-wait for these devices.  dpif-netdev needs to poll
> @@ -420,8 +427,8 @@ struct dp_netdev_pmd_thread {
>                                      /* threads on same numa node. */
>      unsigned core_id;               /* CPU core id of this pmd thread. */
>      int numa_id;                    /* numa node id of this pmd thread. */
> -    int tx_qid;                     /* Queue id used by this pmd thread to
> -                                     * send packets on all netdevs */
> +    struct cmap tx_queues;          /* Queue ids used by this pmd thread to
> +                                     * send packets to ports */
>  
>      /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>       * The main thread keeps 'stats_zero' and 'cycles_zero' as base
> @@ -2602,6 +2609,46 @@ dpif_netdev_wait(struct dpif *dpif)
>      seq_wait(tnl_conf_seq, dp->last_tnl_conf_seq);
>  }
>  
> +static void
> +pmd_flush_tx_queues(struct dp_netdev_pmd_thread *pmd)
> +{
> +    struct dp_netdev_pmd_txq *txq;
> +
> +    CMAP_FOR_EACH (txq, node, &pmd->tx_queues) {
> +        cmap_remove(&pmd->tx_queues, &txq->node,
> +                    hash_port_no(txq->port->port_no));
> +        port_unref(txq->port);
> +        free(txq);
> +    }
> +}
> +
> +static void OVS_UNUSED
> +pmd_tx_queues_print(struct dp_netdev_pmd_thread *pmd)
> +{
> +    struct dp_netdev_pmd_txq *txq;
> +
> +    CMAP_FOR_EACH (txq, node, &pmd->tx_queues) {
> +        VLOG_INFO("Core_id: %d, Port: %s, tx_qid: %d\n",
> +                   pmd->core_id, netdev_get_name(txq->port->netdev),
> +                   txq->tx_qid);
> +    }
> +}
> +
> +static struct dp_netdev_pmd_txq *
> +pmd_lookup_txq(const struct dp_netdev_pmd_thread *pmd,
> +               const struct dp_netdev_port *port)
> +{
> +    struct dp_netdev_pmd_txq *txq;
> +
> +    CMAP_FOR_EACH_WITH_HASH (txq, node, hash_port_no(port->port_no),
> +                             &pmd->tx_queues) {
> +        if (txq->port == port) {
> +            return txq;
> +        }
> +    }
> +    return NULL;
> +}
> +
>  struct rxq_poll {
>      struct dp_netdev_port *port;
>      struct netdev_rxq *rx;
> @@ -2613,16 +2660,20 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd,
>  {
>      struct rxq_poll *poll_list = *ppoll_list;
>      struct dp_netdev_port *port;
> -    int n_pmds_on_numa, index, i;
> +    struct dp_netdev_pmd_txq *txq;
> +    int n_pmds_on_numa, rx_index, tx_index, i;
>  
>      /* Simple scheduler for netdev rx polling. */
> +    pmd_flush_tx_queues(pmd);
> +
>      for (i = 0; i < poll_cnt; i++) {
>          port_unref(poll_list[i].port);
>      }
>  
>      poll_cnt = 0;
>      n_pmds_on_numa = get_n_pmd_threads_on_numa(pmd->dp, pmd->numa_id);
> -    index = 0;
> +    rx_index = 0;
> +    tx_index = 0;
>  
>      CMAP_FOR_EACH (port, node, &pmd->dp->ports) {
>          /* Calls port_try_ref() to prevent the main thread
> @@ -2633,7 +2684,7 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd,
>                  int i;
>  
>                  for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
> -                    if ((index % n_pmds_on_numa) == pmd->index) {
> +                    if ((rx_index % n_pmds_on_numa) == pmd->index) {
>                          poll_list = xrealloc(poll_list,
>                                          sizeof *poll_list * (poll_cnt + 1));
>  
> @@ -2642,7 +2693,20 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd,
>                          poll_list[poll_cnt].rx = port->rxq[i];
>                          poll_cnt++;
>                      }
> -                    index++;
> +                    rx_index++;
> +                }
> +
> +                /* Last queue reserved for non pmd threads */
> +                for (i = 0; i < netdev_n_txq(port->netdev) - 1; i++) {
> +                    if ((tx_index % n_pmds_on_numa) == pmd->index) {
> +                        txq = xmalloc(sizeof *txq);
> +                        port_ref(port);
> +                        txq->port = port;
> +                        txq->tx_qid = i;
> +                        cmap_insert(&pmd->tx_queues, &txq->node,
> +                                    hash_port_no(port->port_no));
> +                    }
> +                    tx_index++;
>                  }
>              }
>              /* Unrefs the port_try_ref(). */
> @@ -2712,6 +2776,8 @@ reload:
>          goto reload;
>      }
>  
> +    pmd_flush_tx_queues(pmd);
> +
>      for (i = 0; i < poll_cnt; i++) {
>           port_unref(poll_list[i].port);
>      }
> @@ -2825,16 +2891,6 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos)
>      return next;
>  }
>  
> -static int
> -core_id_to_qid(unsigned core_id)
> -{
> -    if (core_id != NON_PMD_CORE_ID) {
> -        return core_id;
> -    } else {
> -        return ovs_numa_get_n_cores();
> -    }
> -}
> -
>  /* Configures the 'pmd' based on the input argument. */
>  static void
>  dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
> @@ -2843,7 +2899,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>      pmd->dp = dp;
>      pmd->index = index;
>      pmd->core_id = core_id;
> -    pmd->tx_qid = core_id_to_qid(core_id);
>      pmd->numa_id = numa_id;
>  
>      ovs_refcount_init(&pmd->ref_cnt);
> @@ -2854,9 +2909,20 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>      ovs_mutex_init(&pmd->flow_mutex);
>      dpcls_init(&pmd->cls);
>      cmap_init(&pmd->flow_table);
> -    /* init the 'flow_cache' since there is no
> +    cmap_init(&pmd->tx_queues);
> +    /* init the 'flow_cache' and 'tx_queues' since there is no
>       * actual thread created for NON_PMD_CORE_ID. */
>      if (core_id == NON_PMD_CORE_ID) {
> +        struct dp_netdev_port *port;
> +        CMAP_FOR_EACH (port, node, &pmd->dp->ports) {
> +            if (port_try_ref(port)) {
> +                struct dp_netdev_pmd_txq * txq = xmalloc(sizeof *txq);
> +                txq->port = port;
> +                txq->tx_qid = ovs_numa_get_n_cores();
> +                cmap_insert(&pmd->tx_queues, &txq->node,
> +                            hash_port_no(port->port_no));
> +            }
> +        }
>          emc_cache_init(&pmd->flow_cache);
>      }
>      cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
> @@ -2869,6 +2935,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
>      dp_netdev_pmd_flow_flush(pmd);
>      dpcls_destroy(&pmd->cls);
>      cmap_destroy(&pmd->flow_table);
> +    cmap_destroy(&pmd->tx_queues);
>      ovs_mutex_destroy(&pmd->flow_mutex);
>      latch_destroy(&pmd->exit_latch);
>      xpthread_cond_destroy(&pmd->cond);
> @@ -2885,6 +2952,7 @@ dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd)
>       * no actual thread uninit it for NON_PMD_CORE_ID. */
>      if (pmd->core_id == NON_PMD_CORE_ID) {
>          emc_cache_uninit(&pmd->flow_cache);
> +        pmd_flush_tx_queues(pmd);
>      } else {
>          latch_set(&pmd->exit_latch);
>          dp_netdev_reload_pmd__(pmd);
> @@ -3480,13 +3548,17 @@ dp_execute_cb(void *aux_, struct dp_packet **packets, int cnt,
>      struct dp_netdev *dp = pmd->dp;
>      int type = nl_attr_type(a);
>      struct dp_netdev_port *p;
> +    struct dp_netdev_pmd_txq *txq;
>      int i;
>  
>      switch ((enum ovs_action_attr)type) {
>      case OVS_ACTION_ATTR_OUTPUT:
>          p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a)));
>          if (OVS_LIKELY(p)) {
> -            netdev_send(p->netdev, pmd->tx_qid, packets, cnt, may_steal);
> +            txq = pmd_lookup_txq(pmd, p);
> +            if (OVS_UNLIKELY(!txq))
> +                break;
> +            netdev_send(p->netdev, txq->tx_qid, packets, cnt, may_steal);
>              return;
>          }
>          break;
> 



More information about the dev mailing list