[ovs-dev] [PATCH] dpdk: Support running PMD threads on cores > RTE_MAX_LCORE.

David Marchand david.marchand at redhat.com
Mon Dec 2 16:03:30 UTC 2019


Most DPDK components make the assumption that rte_lcore_id() returns
a valid lcore_id in [0..RTE_MAX_LCORE] range (with the exception of
the LCORE_ID_ANY special value).

OVS does not currently check which value is set in
RTE_PER_LCORE(_lcore_id) which exposes us to potential crashes on DPDK
side.

Introduce a lcore allocator in OVS for PMD threads and map them to
unused lcores from DPDK à la --lcores.

The physical cores on which the PMD threads are running still
constitutes an important information when debugging, so still keep
those in the PMD thread names but add a new debug log when starting
them.

Synchronize DPDK internals on numa and cpuset for the PMD threads by
registering them via the rte_thread_set_affinity() helper.

Signed-off-by: David Marchand <david.marchand at redhat.com>
---
 lib/dpdk-stub.c   |  8 +++++-
 lib/dpdk.c        | 69 +++++++++++++++++++++++++++++++++++++++++++----
 lib/dpdk.h        |  3 ++-
 lib/dpif-netdev.c |  3 ++-
 4 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index c332c217c..90473bc8e 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -39,7 +39,13 @@ dpdk_init(const struct smap *ovs_other_config)
 }
 
 void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_init_thread_context(unsigned cpu OVS_UNUSED)
+{
+    /* Nothing */
+}
+
+void
+dpdk_uninit_thread_context(void)
 {
     /* Nothing */
 }
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 21dd47e80..771baa413 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -33,6 +33,7 @@
 
 #include "dirs.h"
 #include "fatal-signal.h"
+#include "id-pool.h"
 #include "netdev-dpdk.h"
 #include "netdev-offload-provider.h"
 #include "openvswitch/dynamic-string.h"
@@ -55,6 +56,9 @@ static bool dpdk_initialized = false; /* Indicates successful initialization
                                        * of DPDK. */
 static bool per_port_memory = false; /* Status of per port memory support */
 
+static struct id_pool *lcore_id_pool;
+static struct ovs_mutex lcore_id_pool_mutex = OVS_MUTEX_INITIALIZER;
+
 static int
 process_vhost_flags(char *flag, const char *default_val, int size,
                     const struct smap *ovs_other_config,
@@ -346,7 +350,8 @@ dpdk_init__(const struct smap *ovs_other_config)
         }
     }
 
-    if (args_contains(&args, "-c") || args_contains(&args, "-l")) {
+    if (args_contains(&args, "-c") || args_contains(&args, "-l") ||
+        args_contains(&args, "--lcores")) {
         auto_determine = false;
     }
 
@@ -372,8 +377,8 @@ dpdk_init__(const struct smap *ovs_other_config)
              * thread affintity - default to core #0 */
             VLOG_ERR("Thread getaffinity failed. Using core #0");
         }
-        svec_add(&args, "-l");
-        svec_add_nocopy(&args, xasprintf("%d", cpu));
+        svec_add(&args, "--lcores");
+        svec_add_nocopy(&args, xasprintf("0@%d", cpu));
     }
 
     svec_terminate(&args);
@@ -429,6 +434,23 @@ dpdk_init__(const struct smap *ovs_other_config)
         }
     }
 
+    ovs_mutex_lock(&lcore_id_pool_mutex);
+    lcore_id_pool = id_pool_create(0, RTE_MAX_LCORE);
+    /* Empty the whole pool... */
+    for (uint32_t lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
+        uint32_t lcore_id;
+
+        id_pool_alloc_id(lcore_id_pool, &lcore_id);
+    }
+    /* ...and release the unused spots. */
+    for (uint32_t lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
+        if (rte_eal_lcore_role(lcore) != ROLE_OFF) {
+             continue;
+        }
+        id_pool_free_id(lcore_id_pool, lcore);
+    }
+    ovs_mutex_unlock(&lcore_id_pool_mutex);
+
     /* We are called from the main thread here */
     RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
 
@@ -522,11 +544,48 @@ dpdk_available(void)
 }
 
 void
-dpdk_set_lcore_id(unsigned cpu)
+dpdk_init_thread_context(unsigned cpu)
 {
+    cpu_set_t cpuset;
+    unsigned lcore;
+    int err;
+
     /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
     ovs_assert(cpu != NON_PMD_CORE_ID);
-    RTE_PER_LCORE(_lcore_id) = cpu;
+
+    ovs_mutex_lock(&lcore_id_pool_mutex);
+    if (lcore_id_pool == NULL || !id_pool_alloc_id(lcore_id_pool, &lcore)) {
+        lcore = NON_PMD_CORE_ID;
+    }
+    ovs_mutex_unlock(&lcore_id_pool_mutex);
+
+    RTE_PER_LCORE(_lcore_id) = lcore;
+
+    /* DPDK is not initialised, nothing more to do. */
+    if (lcore == NON_PMD_CORE_ID) {
+        return;
+    }
+
+    CPU_ZERO(&cpuset);
+    err = pthread_getaffinity_np(pthread_self(), sizeof cpuset, &cpuset);
+    if (err) {
+        VLOG_ABORT("Thread getaffinity error: %s", ovs_strerror(err));
+    }
+
+    rte_thread_set_affinity(&cpuset);
+    VLOG_INFO("Initialised lcore %u for core %u", lcore, cpu);
+}
+
+void
+dpdk_uninit_thread_context(void)
+{
+    if (RTE_PER_LCORE(_lcore_id) == NON_PMD_CORE_ID) {
+        return;
+    }
+
+    ovs_mutex_lock(&lcore_id_pool_mutex);
+    id_pool_free_id(lcore_id_pool, RTE_PER_LCORE(_lcore_id));
+    ovs_mutex_unlock(&lcore_id_pool_mutex);
 }
 
 void
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 736a64279..404ac1a4b 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -36,7 +36,8 @@ struct smap;
 struct ovsrec_open_vswitch;
 
 void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+void dpdk_init_thread_context(unsigned cpu);
+void dpdk_uninit_thread_context(void);
 const char *dpdk_get_vhost_sock_dir(void);
 bool dpdk_vhost_iommu_enabled(void);
 bool dpdk_vhost_postcopy_enabled(void);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5142bad1d..c40031a78 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5472,7 +5472,7 @@ pmd_thread_main(void *f_)
     /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
     ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
     ovs_numa_thread_setaffinity_core(pmd->core_id);
-    dpdk_set_lcore_id(pmd->core_id);
+    dpdk_init_thread_context(pmd->core_id);
     poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
     dfc_cache_init(&pmd->flow_cache);
     pmd_alloc_static_tx_qid(pmd);
@@ -5592,6 +5592,7 @@ reload:
     dfc_cache_uninit(&pmd->flow_cache);
     free(poll_list);
     pmd_free_cached_ports(pmd);
+    dpdk_uninit_thread_context();
     return NULL;
 }
 
-- 
2.23.0



More information about the dev mailing list