[ovs-dev] [PATCH dpdk-latest v3] dpdk: Support running PMD threads on any core.

David Marchand david.marchand at redhat.com
Fri Jun 26 12:30:17 UTC 2020


DPDK 20.08 introduced a new API that associates a non-EAL thread to a free
lcore. This new API does not change the thread characteristics (like CPU
affinity).
Using this new API, there is no assumption on lcore X running on cpu X
anymore which leaves OVS free from running its PMD thread on any cpu.

DPDK still limits the number of lcores to RTE_MAX_LCORE (128 on x86_64)
which should be enough for OVS (hopefully).

lcore/pmd threads mapping are logged at threads creation and
destruction.
A new command is added to help get DPDK point of view of the lcores:

$ ovs-appctl dpdk/lcores-list
lcore 0, socket 0, role RTE, cpuset 0
lcore 1, socket 0, role NON_EAL, cpuset 1
lcore 2, socket 0, role NON_EAL, cpuset 15

Signed-off-by: David Marchand <david.marchand at redhat.com>
---
Changes since v2:
- introduced a new api in DPDK 20.08 (still being discussed), inbox thread at
  http://inbox.dpdk.org/dev/20200610144506.30505-1-david.marchand@redhat.com/T/#t
- this current patch depends on a patch on master I sent:
  https://patchwork.ozlabs.org/project/openvswitch/patch/20200626122738.28163-1-david.marchand@redhat.com/
- dropped 'dpdk-lcore-mask' compat handling,

Changes since v1:
- rewired existing configuration 'dpdk-lcore-mask' to use --lcores,
- switched to a bitmap to track lcores,
- added a command to dump current mapping (Flavio): used an experimental
  API to get DPDK lcores cpuset since it is the most reliable/portable
  information,
- used the same code for the logs when starting DPDK/PMD threads,
- addressed Ilya comments,

---
 lib/dpdk-stub.c   |  8 +++++++-
 lib/dpdk.c        | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 lib/dpdk.h        |  3 ++-
 lib/dpif-netdev.c |  3 ++-
 4 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index c332c217cb..90473bc8e7 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -39,7 +39,13 @@ dpdk_init(const struct smap *ovs_other_config)
 }
 
 void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_init_thread_context(unsigned cpu OVS_UNUSED)
+{
+    /* Nothing */
+}
+
+void
+dpdk_uninit_thread_context(void)
 {
     /* Nothing */
 }
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 55ce9a9221..2a97786d14 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -358,6 +358,31 @@ dpdk_unixctl_log_set(struct unixctl_conn *conn, int argc, const char *argv[],
     unixctl_command_reply(conn, NULL);
 }
 
+#ifdef ALLOW_EXPERIMENTAL_API
+static void
+dpdk_unixctl_lcore_list(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                        const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+    char *response = NULL;
+    FILE *stream;
+    size_t size;
+
+    stream = open_memstream(&response, &size);
+    if (!stream) {
+        response = xasprintf("Unable to open memstream: %s.",
+                             ovs_strerror(errno));
+        unixctl_command_reply_error(conn, response);
+        goto out;
+    }
+
+    rte_lcore_dump(stream);
+    fclose(stream);
+    unixctl_command_reply(conn, response);
+out:
+    free(response);
+}
+#endif
+
 static bool
 dpdk_init__(const struct smap *ovs_other_config)
 {
@@ -537,6 +562,10 @@ dpdk_init__(const struct smap *ovs_other_config)
                              dpdk_unixctl_log_list, NULL);
     unixctl_command_register("dpdk/log-set", "pattern:level", 0, INT_MAX,
                              dpdk_unixctl_log_set, NULL);
+#ifdef ALLOW_EXPERIMENTAL_API
+    unixctl_command_register("dpdk/lcores-list", "", 0, 0,
+                             dpdk_unixctl_lcore_list, NULL);
+#endif
 
     /* We are called from the main thread here */
     RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
@@ -613,18 +642,34 @@ dpdk_available(void)
 }
 
 void
-dpdk_set_lcore_id(unsigned cpu)
+dpdk_init_thread_context(unsigned cpu)
 {
     /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
     ovs_assert(cpu != NON_PMD_CORE_ID);
+#ifdef ALLOW_EXPERIMENTAL_API
+    rte_thread_register();
+#else
     if (cpu >= RTE_MAX_LCORE) {
         cpu = LCORE_ID_ANY;
     }
     RTE_PER_LCORE(_lcore_id) = cpu;
+#endif
     if (rte_lcore_id() == LCORE_ID_ANY) {
         ovs_abort(0, "PMD thread init failed, trying to use more cores than "
                   "DPDK supports (RTE_MAX_LCORE %u).", RTE_MAX_LCORE);
     }
+    VLOG_INFO("PMD thread is associated to DPDK lcore %u.", rte_lcore_id());
+}
+
+void
+dpdk_uninit_thread_context(void)
+{
+    unsigned int lcore_id = rte_lcore_id();
+
+#ifdef ALLOW_EXPERIMENTAL_API
+    rte_thread_unregister();
+#endif
+    VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
 }
 
 void
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 736a64279e..404ac1a4bf 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -36,7 +36,8 @@ struct smap;
 struct ovsrec_open_vswitch;
 
 void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+void dpdk_init_thread_context(unsigned cpu);
+void dpdk_uninit_thread_context(void);
 const char *dpdk_get_vhost_sock_dir(void);
 bool dpdk_vhost_iommu_enabled(void);
 bool dpdk_vhost_postcopy_enabled(void);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1086efd47e..cd7e1569e0 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5701,7 +5701,7 @@ pmd_thread_main(void *f_)
     /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
     ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
     ovs_numa_thread_setaffinity_core(pmd->core_id);
-    dpdk_set_lcore_id(pmd->core_id);
+    dpdk_init_thread_context(pmd->core_id);
     poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
     dfc_cache_init(&pmd->flow_cache);
     pmd_alloc_static_tx_qid(pmd);
@@ -5821,6 +5821,7 @@ reload:
     dfc_cache_uninit(&pmd->flow_cache);
     free(poll_list);
     pmd_free_cached_ports(pmd);
+    dpdk_uninit_thread_context();
     return NULL;
 }
 
-- 
2.23.0



More information about the dev mailing list