[ovs-dev] [PATCH v2] dpif-netdev: proper tx queue id

Daniele Di Proietto diproiettod at vmware.com
Mon Aug 17 16:45:56 UTC 2015


Sorry about the long wait.

I think this is the right direction, I like the idea of having a thread
local map of the txqs.  It will allow us to assign the txqs in a more
sophisticated way.

I see some unit tests failing with this patch applied.  Could you try
running the testsuite with 'make check'?

The txq distribution code is run only when a pmd device is added.
If a "system" or a "dummy" device is added its txq will not be used
by any pmd thread (or to the main thread).

Two more comments below.

On 06/08/2015 15:32, "Ilya Maximets" <i.maximets at samsung.com> wrote:

>Currently tx_qid is equal to pmd->core_id. This leads to unexpected
>behavior if pmd-cpu-mask different from '/(0*)(1|3|7)?(f*)/',
>e.g. if core_ids are not sequential, or doesn't start from 0, or both.
>
>Example:
>	starting 2 pmd threads with 1 port, 2 rxqs per port,
>	pmd-cpu-mask = 00000014 and let dev->real_n_txq = 2
>
>	It that case pmd_1->tx_qid = 2, pmd_2->tx_qid = 4 and
>	txq_needs_locking = true (if device hasn't ovs_numa_get_n_cores()+1
>	queues).
>
>	In that case, after truncating in netdev_dpdk_send__():
>		'qid = qid % dev->real_n_txq;'
>	pmd_1: qid = 2 % 2 = 0
>	pmd_2: qid = 4 % 2 = 0
>
>	So, both threads will call dpdk_queue_pkts() with same qid = 0.
>	This is unexpected behavior if there is 2 tx queues in device.
>	Queue #1 will not be used and both threads will lock queue #0
>	on each send.
>
>Fix that by introducing per pmd thread hash map 'tx_queues', where will
>be stored all available tx queues for that pmd thread with
>port_no as a key(hash). All tx_qid-s will be unique per port and
>sequential to prevent described unexpected mapping after truncating.
>
>Implemented infrastructure can be used in the future to choose
>between all tx queues available for that pmd thread.
>
>Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
>---
> lib/dpif-netdev.c | 110
>++++++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 91 insertions(+), 19 deletions(-)
>
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>index c144352..309994c 100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -365,6 +365,13 @@ struct dp_netdev_pmd_cycles {
>     atomic_ullong n[PMD_N_CYCLES];
> };
> 
>+struct dp_netdev_pmd_txq {
>+    struct cmap_node node;        /* In owning dp_netdev_pmd_thread's */
>+                                  /* 'tx_queues'. */
>+    struct dp_netdev_port *port;
>+    int tx_qid;
>+};
>+
> /* PMD: Poll modes drivers.  PMD accesses devices via polling to
>eliminate
>  * the performance overhead of interrupt processing.  Therefore netdev
>can
>  * not implement rx-wait for these devices.  dpif-netdev needs to poll
>@@ -420,8 +427,8 @@ struct dp_netdev_pmd_thread {
>                                     /* threads on same numa node. */
>     unsigned core_id;               /* CPU core id of this pmd thread. */
>     int numa_id;                    /* numa node id of this pmd thread.
>*/
>-    int tx_qid;                     /* Queue id used by this pmd thread
>to
>-                                     * send packets on all netdevs */
>+    struct cmap tx_queues;          /* Queue ids used by this pmd thread
>to
>+                                     * send packets to ports */
> 
>     /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>      * The main thread keeps 'stats_zero' and 'cycles_zero' as base
>@@ -2602,6 +2609,46 @@ dpif_netdev_wait(struct dpif *dpif)
>     seq_wait(tnl_conf_seq, dp->last_tnl_conf_seq);
> }
> 
>+static void
>+pmd_flush_tx_queues(struct dp_netdev_pmd_thread *pmd)
>+{
>+    struct dp_netdev_pmd_txq *txq;
>+
>+    CMAP_FOR_EACH (txq, node, &pmd->tx_queues) {
>+        cmap_remove(&pmd->tx_queues, &txq->node,
>+                    hash_port_no(txq->port->port_no));
>+        port_unref(txq->port);
>+        free(txq);
>+    }
>+}

The term "flush" used for txqs makes me think that the
packets in the device transmission queue will be sent.

>+
>+static void OVS_UNUSED
>+pmd_tx_queues_print(struct dp_netdev_pmd_thread *pmd)
>+{
>+    struct dp_netdev_pmd_txq *txq;
>+
>+    CMAP_FOR_EACH (txq, node, &pmd->tx_queues) {
>+        VLOG_INFO("Core_id: %d, Port: %s, tx_qid: %d\n",
>+                   pmd->core_id, netdev_get_name(txq->port->netdev),
>+                   txq->tx_qid);
>+    }
>+}
>+
>+static struct dp_netdev_pmd_txq *
>+pmd_lookup_txq(const struct dp_netdev_pmd_thread *pmd,
>+               const struct dp_netdev_port *port)
>+{
>+    struct dp_netdev_pmd_txq *txq;
>+
>+    CMAP_FOR_EACH_WITH_HASH (txq, node, hash_port_no(port->port_no),
>+                             &pmd->tx_queues) {
>+        if (txq->port == port) {
>+            return txq;
>+        }
>+    }
>+    return NULL;
>+}
>+
> struct rxq_poll {
>     struct dp_netdev_port *port;
>     struct netdev_rxq *rx;
>@@ -2613,16 +2660,20 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd,
> {
>     struct rxq_poll *poll_list = *ppoll_list;
>     struct dp_netdev_port *port;
>-    int n_pmds_on_numa, index, i;
>+    struct dp_netdev_pmd_txq *txq;
>+    int n_pmds_on_numa, rx_index, tx_index, i;
> 
>     /* Simple scheduler for netdev rx polling. */
>+    pmd_flush_tx_queues(pmd);
>+
>     for (i = 0; i < poll_cnt; i++) {
>         port_unref(poll_list[i].port);
>     }
> 
>     poll_cnt = 0;
>     n_pmds_on_numa = get_n_pmd_threads_on_numa(pmd->dp, pmd->numa_id);
>-    index = 0;
>+    rx_index = 0;
>+    tx_index = 0;
> 
>     CMAP_FOR_EACH (port, node, &pmd->dp->ports) {
>         /* Calls port_try_ref() to prevent the main thread
>@@ -2633,7 +2684,7 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd,
>                 int i;
> 
>                 for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
>-                    if ((index % n_pmds_on_numa) == pmd->index) {
>+                    if ((rx_index % n_pmds_on_numa) == pmd->index) {
>                         poll_list = xrealloc(poll_list,
>                                         sizeof *poll_list * (poll_cnt +
>1));
> 
>@@ -2642,7 +2693,20 @@ pmd_load_queues(struct dp_netdev_pmd_thread *pmd,
>                         poll_list[poll_cnt].rx = port->rxq[i];
>                         poll_cnt++;
>                     }
>-                    index++;
>+                    rx_index++;
>+                }
>+
>+                /* Last queue reserved for non pmd threads */
>+                for (i = 0; i < netdev_n_txq(port->netdev) - 1; i++) {
>+                    if ((tx_index % n_pmds_on_numa) == pmd->index) {
>+                        txq = xmalloc(sizeof *txq);
>+                        port_ref(port);
>+                        txq->port = port;
>+                        txq->tx_qid = i;
>+                        cmap_insert(&pmd->tx_queues, &txq->node,
>+                                    hash_port_no(port->port_no));
>+                    }
>+                    tx_index++;
>                 }
>             }
>             /* Unrefs the port_try_ref(). */
>@@ -2712,6 +2776,8 @@ reload:
>         goto reload;
>     }
> 
>+    pmd_flush_tx_queues(pmd);
>+
>     for (i = 0; i < poll_cnt; i++) {
>          port_unref(poll_list[i].port);
>     }
>@@ -2825,16 +2891,6 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp,
>struct cmap_position *pos)
>     return next;
> }
> 
>-static int
>-core_id_to_qid(unsigned core_id)
>-{
>-    if (core_id != NON_PMD_CORE_ID) {
>-        return core_id;
>-    } else {
>-        return ovs_numa_get_n_cores();
>-    }
>-}
>-
> /* Configures the 'pmd' based on the input argument. */
> static void
> dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct
>dp_netdev *dp,
>@@ -2843,7 +2899,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread
>*pmd, struct dp_netdev *dp,
>     pmd->dp = dp;
>     pmd->index = index;
>     pmd->core_id = core_id;
>-    pmd->tx_qid = core_id_to_qid(core_id);
>     pmd->numa_id = numa_id;
> 
>     ovs_refcount_init(&pmd->ref_cnt);
>@@ -2854,9 +2909,20 @@ dp_netdev_configure_pmd(struct
>dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>     ovs_mutex_init(&pmd->flow_mutex);
>     dpcls_init(&pmd->cls);
>     cmap_init(&pmd->flow_table);
>-    /* init the 'flow_cache' since there is no
>+    cmap_init(&pmd->tx_queues);
>+    /* init the 'flow_cache' and 'tx_queues' since there is no
>      * actual thread created for NON_PMD_CORE_ID. */
>     if (core_id == NON_PMD_CORE_ID) {
>+        struct dp_netdev_port *port;
>+        CMAP_FOR_EACH (port, node, &pmd->dp->ports) {
>+            if (port_try_ref(port)) {
>+                struct dp_netdev_pmd_txq * txq = xmalloc(sizeof *txq);
>+                txq->port = port;
>+                txq->tx_qid = ovs_numa_get_n_cores();
>+                cmap_insert(&pmd->tx_queues, &txq->node,
>+                            hash_port_no(port->port_no));
>+            }
>+        }
>         emc_cache_init(&pmd->flow_cache);
>     }
>     cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *,
>&pmd->node),
>@@ -2869,6 +2935,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread
>*pmd)
>     dp_netdev_pmd_flow_flush(pmd);
>     dpcls_destroy(&pmd->cls);
>     cmap_destroy(&pmd->flow_table);
>+    cmap_destroy(&pmd->tx_queues);
>     ovs_mutex_destroy(&pmd->flow_mutex);
>     latch_destroy(&pmd->exit_latch);
>     xpthread_cond_destroy(&pmd->cond);
>@@ -2885,6 +2952,7 @@ dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd)
>      * no actual thread uninit it for NON_PMD_CORE_ID. */
>     if (pmd->core_id == NON_PMD_CORE_ID) {
>         emc_cache_uninit(&pmd->flow_cache);
>+        pmd_flush_tx_queues(pmd);
>     } else {
>         latch_set(&pmd->exit_latch);
>         dp_netdev_reload_pmd__(pmd);
>@@ -3480,13 +3548,17 @@ dp_execute_cb(void *aux_, struct dp_packet
>**packets, int cnt,
>     struct dp_netdev *dp = pmd->dp;
>     int type = nl_attr_type(a);
>     struct dp_netdev_port *p;
>+    struct dp_netdev_pmd_txq *txq;
>     int i;
> 
>     switch ((enum ovs_action_attr)type) {
>     case OVS_ACTION_ATTR_OUTPUT:
>         p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a)));
>         if (OVS_LIKELY(p)) {
>-            netdev_send(p->netdev, pmd->tx_qid, packets, cnt, may_steal);
>+            txq = pmd_lookup_txq(pmd, p);
>+            if (OVS_UNLIKELY(!txq))
>+                break;
>+            netdev_send(p->netdev, txq->tx_qid, packets, cnt, may_steal);

I think that performing two lookups (for the port and for the txq) is more
expensive than necessary.  One solution could be to include a pointer to
the
netdev in the 'txq'.

>             return;
>         }
>         break;
>-- 
>2.1.4
>




More information about the dev mailing list