[ovs-dev] [PATCH] dpdk: Support running PMD threads on cores > RTE_MAX_LCORE.
David Marchand
david.marchand at redhat.com
Mon Dec 2 16:03:30 UTC 2019
Most DPDK components make the assumption that rte_lcore_id() returns
a valid lcore_id in [0..RTE_MAX_LCORE] range (with the exception of
the LCORE_ID_ANY special value).
OVS does not currently check which value is set in
RTE_PER_LCORE(_lcore_id) which exposes us to potential crashes on DPDK
side.
Introduce a lcore allocator in OVS for PMD threads and map them to
unused lcores from DPDK à la --lcores.
The physical cores on which the PMD threads are running still
constitutes an important information when debugging, so still keep
those in the PMD thread names but add a new debug log when starting
them.
Synchronize DPDK internals on numa and cpuset for the PMD threads by
registering them via the rte_thread_set_affinity() helper.
Signed-off-by: David Marchand <david.marchand at redhat.com>
---
lib/dpdk-stub.c | 8 +++++-
lib/dpdk.c | 69 +++++++++++++++++++++++++++++++++++++++++++----
lib/dpdk.h | 3 ++-
lib/dpif-netdev.c | 3 ++-
4 files changed, 75 insertions(+), 8 deletions(-)
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index c332c217c..90473bc8e 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -39,7 +39,13 @@ dpdk_init(const struct smap *ovs_other_config)
}
void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_init_thread_context(unsigned cpu OVS_UNUSED)
+{
+ /* Nothing */
+}
+
+void
+dpdk_uninit_thread_context(void)
{
/* Nothing */
}
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 21dd47e80..771baa413 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -33,6 +33,7 @@
#include "dirs.h"
#include "fatal-signal.h"
+#include "id-pool.h"
#include "netdev-dpdk.h"
#include "netdev-offload-provider.h"
#include "openvswitch/dynamic-string.h"
@@ -55,6 +56,9 @@ static bool dpdk_initialized = false; /* Indicates successful initialization
* of DPDK. */
static bool per_port_memory = false; /* Status of per port memory support */
+static struct id_pool *lcore_id_pool;
+static struct ovs_mutex lcore_id_pool_mutex = OVS_MUTEX_INITIALIZER;
+
static int
process_vhost_flags(char *flag, const char *default_val, int size,
const struct smap *ovs_other_config,
@@ -346,7 +350,8 @@ dpdk_init__(const struct smap *ovs_other_config)
}
}
- if (args_contains(&args, "-c") || args_contains(&args, "-l")) {
+ if (args_contains(&args, "-c") || args_contains(&args, "-l") ||
+ args_contains(&args, "--lcores")) {
auto_determine = false;
}
@@ -372,8 +377,8 @@ dpdk_init__(const struct smap *ovs_other_config)
* thread affintity - default to core #0 */
VLOG_ERR("Thread getaffinity failed. Using core #0");
}
- svec_add(&args, "-l");
- svec_add_nocopy(&args, xasprintf("%d", cpu));
+ svec_add(&args, "--lcores");
+ svec_add_nocopy(&args, xasprintf("0@%d", cpu));
}
svec_terminate(&args);
@@ -429,6 +434,23 @@ dpdk_init__(const struct smap *ovs_other_config)
}
}
+ ovs_mutex_lock(&lcore_id_pool_mutex);
+ lcore_id_pool = id_pool_create(0, RTE_MAX_LCORE);
+ /* Empty the whole pool... */
+ for (uint32_t lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
+ uint32_t lcore_id;
+
+ id_pool_alloc_id(lcore_id_pool, &lcore_id);
+ }
+ /* ...and release the unused spots. */
+ for (uint32_t lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
+ if (rte_eal_lcore_role(lcore) != ROLE_OFF) {
+ continue;
+ }
+ id_pool_free_id(lcore_id_pool, lcore);
+ }
+ ovs_mutex_unlock(&lcore_id_pool_mutex);
+
/* We are called from the main thread here */
RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
@@ -522,11 +544,48 @@ dpdk_available(void)
}
void
-dpdk_set_lcore_id(unsigned cpu)
+dpdk_init_thread_context(unsigned cpu)
{
+ cpu_set_t cpuset;
+ unsigned lcore;
+ int err;
+
/* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
ovs_assert(cpu != NON_PMD_CORE_ID);
- RTE_PER_LCORE(_lcore_id) = cpu;
+
+ ovs_mutex_lock(&lcore_id_pool_mutex);
+ if (lcore_id_pool == NULL || !id_pool_alloc_id(lcore_id_pool, &lcore)) {
+ lcore = NON_PMD_CORE_ID;
+ }
+ ovs_mutex_unlock(&lcore_id_pool_mutex);
+
+ RTE_PER_LCORE(_lcore_id) = lcore;
+
+ /* DPDK is not initialised, nothing more to do. */
+ if (lcore == NON_PMD_CORE_ID) {
+ return;
+ }
+
+ CPU_ZERO(&cpuset);
+ err = pthread_getaffinity_np(pthread_self(), sizeof cpuset, &cpuset);
+ if (err) {
+ VLOG_ABORT("Thread getaffinity error: %s", ovs_strerror(err));
+ }
+
+ rte_thread_set_affinity(&cpuset);
+ VLOG_INFO("Initialised lcore %u for core %u", lcore, cpu);
+}
+
+void
+dpdk_uninit_thread_context(void)
+{
+ if (RTE_PER_LCORE(_lcore_id) == NON_PMD_CORE_ID) {
+ return;
+ }
+
+ ovs_mutex_lock(&lcore_id_pool_mutex);
+ id_pool_free_id(lcore_id_pool, RTE_PER_LCORE(_lcore_id));
+ ovs_mutex_unlock(&lcore_id_pool_mutex);
}
void
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 736a64279..404ac1a4b 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -36,7 +36,8 @@ struct smap;
struct ovsrec_open_vswitch;
void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+void dpdk_init_thread_context(unsigned cpu);
+void dpdk_uninit_thread_context(void);
const char *dpdk_get_vhost_sock_dir(void);
bool dpdk_vhost_iommu_enabled(void);
bool dpdk_vhost_postcopy_enabled(void);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5142bad1d..c40031a78 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5472,7 +5472,7 @@ pmd_thread_main(void *f_)
/* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
ovs_numa_thread_setaffinity_core(pmd->core_id);
- dpdk_set_lcore_id(pmd->core_id);
+ dpdk_init_thread_context(pmd->core_id);
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
dfc_cache_init(&pmd->flow_cache);
pmd_alloc_static_tx_qid(pmd);
@@ -5592,6 +5592,7 @@ reload:
dfc_cache_uninit(&pmd->flow_cache);
free(poll_list);
pmd_free_cached_ports(pmd);
+ dpdk_uninit_thread_context();
return NULL;
}
--
2.23.0
More information about the dev
mailing list