[ovs-dev] [PATCH v2 1/3] dpif-netdev: Incremental addition/deletion of PMD threads.

Kavanagh, Mark B mark.b.kavanagh at intel.com
Thu Jun 1 14:30:20 UTC 2017


>From: ovs-dev-bounces at openvswitch.org [mailto:ovs-dev-bounces at openvswitch.org] On Behalf Of
>Ilya Maximets
>Sent: Tuesday, May 30, 2017 3:12 PM
>To: dev at openvswitch.org; Darrell Ball <dball at vmware.com>
>Cc: Heetae Ahn <heetae82.ahn at samsung.com>; Ilya Maximets <i.maximets at samsung.com>
>Subject: [ovs-dev] [PATCH v2 1/3] dpif-netdev: Incremental addition/deletion of PMD threads.
>
>Currently, change of 'pmd-cpu-mask' is very heavy operation.
>It requires destroying of all the PMD threads and creating	
>them back. After that, all the threads will sleep until
>ports' redistribution finished.
>
>This patch adds ability to not stop the datapath while
>adjusting number/placement of PMD threads. All not affected
>threads will forward traffic without any additional latencies.
>
>id-pool created for static tx queue ids to keep them sequential
>in a flexible way. non-PMD thread will always have
>static_tx_qid = 0 as it was before.
>
>Signed-off-by: Ilya Maximets <i.maximets at samsung.com>

Hi Ilya,

Patch LGTM - just once comment inline.

I conducted some light testing and observed the following:
    - temporary dip in forwarding rate (~50%) when modifying pmd-cpu-mask (in previous implementation, forwarding stopped completely)
        - occasional drop to zero in forwarding rate when modifying pmd-cpu-mask (e.g. change from '2' to '12') 
    - temporary drop of ~1mpps when adding/deleting port (similar to existing implementation) 

I've also conducted the following sanity checks on the patch, and found no issues:
 - checkpatch.py
 - compilation with clang
 - compilation with gcc
 - sparse
 - make check (no additional tests fail after application of this patch)

Tested-by: Mark Kavanagh <mark.b.kavanagh at intel.com>

Thanks,
Mark

>---
> lib/dpif-netdev.c | 143 +++++++++++++++++++++++++++++++++++++++---------------
> tests/pmd.at      |   2 +-
> 2 files changed, 105 insertions(+), 40 deletions(-)
>
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>index b50164b..596d133 100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -48,6 +48,7 @@
> #include "fat-rwlock.h"
> #include "flow.h"
> #include "hmapx.h"
>+#include "id-pool.h"
> #include "latch.h"
> #include "netdev.h"
> #include "netdev-vport.h"
>@@ -277,6 +278,9 @@ struct dp_netdev {
>
>     /* Stores all 'struct dp_netdev_pmd_thread's. */
>     struct cmap poll_threads;
>+    /* id pool for per thread static_tx_qid. */
>+    struct id_pool *tx_qid_pool;
>+    struct ovs_mutex tx_qid_pool_mutex;
>
>     /* Protects the access of the 'struct dp_netdev_pmd_thread'
>      * instance for non-pmd thread. */
>@@ -562,7 +566,7 @@ struct dp_netdev_pmd_thread {
>     /* Queue id used by this pmd thread to send packets on all netdevs if
>      * XPS disabled for this netdev. All static_tx_qid's are unique and less
>      * than 'cmap_count(dp->poll_threads)'. */
>-    const int static_tx_qid;
>+    uint32_t static_tx_qid;
>
>     struct ovs_mutex port_mutex;    /* Mutex for 'poll_list' and 'tx_ports'. */
>     /* List of rx queues to poll. */
>@@ -642,6 +646,8 @@ static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev
>*dp,
>                                                       unsigned core_id);
> static struct dp_netdev_pmd_thread *
> dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
>+static void dp_netdev_del_pmd(struct dp_netdev *dp,
>+                              struct dp_netdev_pmd_thread *pmd);
> static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool non_pmd);
> static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
> static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
>@@ -1181,10 +1187,17 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
>     atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
>
>     cmap_init(&dp->poll_threads);
>+
>+    ovs_mutex_init(&dp->tx_qid_pool_mutex);
>+    /* We need 1 Tx queue for each possible core + 1 for non-PMD threads. */
>+    dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + 1);
>+
>     ovs_mutex_init_recursive(&dp->non_pmd_mutex);
>     ovsthread_key_create(&dp->per_pmd_key, NULL);
>
>     ovs_mutex_lock(&dp->port_mutex);
>+    /* non-PMD will be created before all other threads and will
>+     * allocate static_tx_qid = 0. */
>     dp_netdev_set_nonpmd(dp);
>
>     error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class,
>@@ -1279,6 +1292,9 @@ dp_netdev_free(struct dp_netdev *dp)
>     dp_netdev_destroy_all_pmds(dp, true);
>     cmap_destroy(&dp->poll_threads);
>
>+    ovs_mutex_destroy(&dp->tx_qid_pool_mutex);
>+    id_pool_destroy(dp->tx_qid_pool);
>+
>     ovs_mutex_destroy(&dp->non_pmd_mutex);
>     ovsthread_key_delete(dp->per_pmd_key);
>
>@@ -3302,12 +3318,29 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp-
>>port_mutex)
> }
>
> static void
>+reload_affected_pmds(struct dp_netdev *dp)
>+{
>+    struct dp_netdev_pmd_thread *pmd;
>+
>+    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>+        if (pmd->need_reload) {
>+            dp_netdev_reload_pmd__(pmd);
>+            pmd->need_reload = false;
>+        }
>+    }
>+}
>+
>+static void
> reconfigure_pmd_threads(struct dp_netdev *dp)
>     OVS_REQUIRES(dp->port_mutex)
> {
>     struct dp_netdev_pmd_thread *pmd;
>     struct ovs_numa_dump *pmd_cores;
>+    struct ovs_numa_info_core *core;
>+    struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete);
>+    struct hmapx_node *node;
>     bool changed = false;
>+    bool need_to_adjust_static_tx_qids = false;
>
>     /* The pmd threads should be started only if there's a pmd port in the
>      * datapath.  If the user didn't provide any "pmd-cpu-mask", we start
>@@ -3320,40 +3353,61 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
>         pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS);
>     }
>
>-    /* Check for changed configuration */
>-    if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) {
>-        changed = true;
>-    } else {
>-        CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>-            if (pmd->core_id != NON_PMD_CORE_ID
>-                && !ovs_numa_dump_contains_core(pmd_cores,
>-                                                pmd->numa_id,
>-                                                pmd->core_id)) {
>-                changed = true;
>-                break;
>-            }
>+    /* We need to adjust 'static_tx_qid's only if we're reducing number of
>+     * PMD threads. Otherwise, new threads will allocate all the freed ids. */

Can you elaborate on why the static_tx_qids need to be adjusted - it's still not clear to me from the comment.

>+    if (ovs_numa_dump_count(pmd_cores) < cmap_count(&dp->poll_threads) - 1) {
>+        need_to_adjust_static_tx_qids = true;
>+    }
>+
>+    /* Check for unwanted pmd threads */
>+    CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
>+        if (pmd->core_id == NON_PMD_CORE_ID) {
>+            continue;
>+        }
>+        if (!ovs_numa_dump_contains_core(pmd_cores, pmd->numa_id,
>+                                                    pmd->core_id)) {
>+            hmapx_add(&to_delete, pmd);
>+        } else if (need_to_adjust_static_tx_qids) {
>+            pmd->need_reload = true;
>         }
>     }
>
>-    /* Destroy the old and recreate the new pmd threads.  We don't perform an
>-     * incremental update because we would have to adjust 'static_tx_qid'. */
>-    if (changed) {
>-        struct ovs_numa_info_core *core;
>-        struct ovs_numa_info_numa *numa;
>+    HMAPX_FOR_EACH (node, &to_delete) {
>+        pmd = (struct dp_netdev_pmd_thread *) node->data;
>+        VLOG_INFO("PMD thread on numa_id: %d, core id: %2d destroyed.",
>+                  pmd->numa_id, pmd->core_id);
>+        dp_netdev_del_pmd(dp, pmd);
>+    }
>+    changed = !hmapx_is_empty(&to_delete);
>+    hmapx_destroy(&to_delete);
>
>-        /* Do not destroy the non pmd thread. */
>-        dp_netdev_destroy_all_pmds(dp, false);
>-        FOR_EACH_CORE_ON_DUMP (core, pmd_cores) {
>-            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>+    if (need_to_adjust_static_tx_qids) {
>+        /* 'static_tx_qid's are not sequential now.
>+         * Reload remaining threads to fix this. */
>+        reload_affected_pmds(dp);
>+    }
>
>+    /* Check for required new pmd threads */
>+    FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
>+        pmd = dp_netdev_get_pmd(dp, core->core_id);
>+        if (!pmd) {
>+            pmd = xzalloc(sizeof *pmd);
>             dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
>-
>             pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
>+            VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.",
>+                      pmd->numa_id, pmd->core_id);
>+            changed = true;
>+        } else {
>+            dp_netdev_pmd_unref(pmd);
>         }
>+    }
>+
>+    if (changed) {
>+        struct ovs_numa_info_numa *numa;
>
>         /* Log the number of pmd threads per numa node. */
>         FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) {
>-            VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node %d",
>+            VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa node %d",
>                       numa->n_cores, numa->numa_id);
>         }
>     }
>@@ -3362,19 +3416,6 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
> }
>
> static void
>-reload_affected_pmds(struct dp_netdev *dp)
>-{
>-    struct dp_netdev_pmd_thread *pmd;
>-
>-    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>-        if (pmd->need_reload) {
>-            dp_netdev_reload_pmd__(pmd);
>-            pmd->need_reload = false;
>-        }
>-    }
>-}
>-
>-static void
> pmd_remove_stale_ports(struct dp_netdev *dp,
>                        struct dp_netdev_pmd_thread *pmd)
>     OVS_EXCLUDED(pmd->port_mutex)
>@@ -3673,6 +3714,28 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
>     }
> }
>
>+static void
>+pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
>+{
>+    ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
>+    if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) {
>+        VLOG_ABORT("static_tx_qid allocation failed for PMD on core %2d"
>+                   ", numa_id %d.", pmd->core_id, pmd->numa_id);
>+    }
>+    ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
>+
>+    VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core %2d"
>+             ", numa_id %d.", pmd->static_tx_qid, pmd->core_id, pmd->numa_id);
>+}
>+
>+static void
>+pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
>+{
>+    ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
>+    id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid);
>+    ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
>+}
>+
> static int
> pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
>                           struct polled_queue **ppoll_list)
>@@ -3718,6 +3781,7 @@ pmd_thread_main(void *f_)
>     dpdk_set_lcore_id(pmd->core_id);
>     poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
> reload:
>+    pmd_alloc_static_tx_qid(pmd);
>     emc_cache_init(&pmd->flow_cache);
>
>     /* List port/core affinity */
>@@ -3766,6 +3830,7 @@ reload:
>     dp_netdev_pmd_reload_done(pmd);
>
>     emc_cache_uninit(&pmd->flow_cache);
>+    pmd_free_static_tx_qid(pmd);
>
>     if (!exiting) {
>         goto reload;
>@@ -4176,8 +4241,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct
>dp_netdev *dp,
>     pmd->numa_id = numa_id;
>     pmd->need_reload = false;
>
>-    *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp->poll_threads);
>-
>     ovs_refcount_init(&pmd->ref_cnt);
>     latch_init(&pmd->exit_latch);
>     pmd->reload_seq = seq_create();
>@@ -4198,6 +4261,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct
>dp_netdev *dp,
>      * actual thread created for NON_PMD_CORE_ID. */
>     if (core_id == NON_PMD_CORE_ID) {
>         emc_cache_init(&pmd->flow_cache);
>+        pmd_alloc_static_tx_qid(pmd);
>     }
>     cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
>                 hash_int(core_id, 0));
>@@ -4240,6 +4304,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread
>*pmd)
>         ovs_mutex_lock(&dp->non_pmd_mutex);
>         emc_cache_uninit(&pmd->flow_cache);
>         pmd_free_cached_ports(pmd);
>+        pmd_free_static_tx_qid(pmd);
>         ovs_mutex_unlock(&dp->non_pmd_mutex);
>     } else {
>         latch_set(&pmd->exit_latch);
>diff --git a/tests/pmd.at b/tests/pmd.at
>index 05755b3..39a3645 100644
>--- a/tests/pmd.at
>+++ b/tests/pmd.at
>@@ -38,7 +38,7 @@ dnl
> dnl Whaits for creation of 'n_threads' or at least 1 thread if $1 not
> dnl passed. Checking starts from line number 'line' in ovs-vswithd.log .
> m4_define([CHECK_PMD_THREADS_CREATED], [
>-    PATTERN="Created [[0-9]]* pmd threads on numa node $2"
>+    PATTERN="There are [[0-9]]* pmd threads on numa node $2"
>     line_st=$3
>     if [[ -z "$line_st" ]]
>     then
>--
>2.7.4
>
>_______________________________________________
>dev mailing list
>dev at openvswitch.org
>https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list