[ovs-dev] [PATCH v5] dpdk: Support running PMD threads on any core.

David Marchand david.marchand at redhat.com
Tue Dec 29 17:56:41 UTC 2020


DPDK 20.08 introduced a new API that associates a non-EAL thread to a free
lcore. This new API does not change the thread characteristics (like CPU
affinity).
Using this new API, there is no assumption on lcore X running on cpu X
anymore which leaves OVS free from running its PMD thread on any cpu.
The DPDK multiprocess feature is not compatible with this new API and is
disabled.

DPDK still limits the number of lcores to RTE_MAX_LCORE (128 on x86_64)
which should be enough for OVS pmd threads (hopefully).

DPDK lcore/OVS pmd threads mapping are logged at threads creation and
destruction.
A new command is added to help get DPDK point of view of the DPDK lcores:

$ ovs-appctl dpdk/lcores-list
lcore 0, socket 0, role RTE, cpuset 0
lcore 1, socket 0, role NON_EAL, cpuset 1
lcore 2, socket 0, role NON_EAL, cpuset 15

Signed-off-by: David Marchand <david.marchand at redhat.com>
---
Changes since v4:
- rebased on the master branch,
- disabled DPDK mp feature,
- updated DPDK documentation and manual with the new command,
- added notes in NEWS,

Changes since v3:
- rebased on current HEAD,
- switched back to simple warning rather than abort when registering a
  thread fails,

Changes since v2:
- introduced a new api in DPDK 20.08 (still being discussed), inbox thread at
  http://inbox.dpdk.org/dev/20200610144506.30505-1-david.marchand@redhat.com/T/#t
- this current patch depends on a patch on master I sent:
  https://patchwork.ozlabs.org/project/openvswitch/patch/20200626122738.28163-1-david.marchand@redhat.com/
- dropped 'dpdk-lcore-mask' compat handling,

Changes since v1:
- rewired existing configuration 'dpdk-lcore-mask' to use --lcores,
- switched to a bitmap to track lcores,
- added a command to dump current mapping (Flavio): used an experimental
  API to get DPDK lcores cpuset since it is the most reliable/portable
  information,
- used the same code for the logs when starting DPDK/PMD threads,
- addressed Ilya comments,

---
 Documentation/howto/dpdk.rst |  5 +++++
 NEWS                         |  2 ++
 lib/dpdk-stub.c              |  8 ++++++-
 lib/dpdk-unixctl.man         |  2 ++
 lib/dpdk.c                   | 42 ++++++++++++++++++++++++++++++++++--
 lib/dpdk.h                   |  3 ++-
 lib/dpif-netdev.c            |  3 ++-
 7 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index f0d45e47b6..8492f354ce 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -399,6 +399,11 @@ Supported actions for hardware offload are:
 - Modification of IPv6 (set_field:<ADDR>->ipv6_src/ipv6_dst/mod_nw_ttl).
 - Clone/output (tnl_push and output) for encapsulating over a tunnel.
 
+Multiprocess
+------------
+
+This DPDK feature is not supported and disabled during OVS initialization.
+
 Further Reading
 ---------------
 
diff --git a/NEWS b/NEWS
index d357da31d8..f59311ab63 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,8 @@ Post-v2.14.0
    - DPDK:
      * Removed support for vhost-user dequeue zero-copy.
      * Add support for DPDK 20.11.
+     * Forbid use of DPDK multiprocess feature.
+     * Add support for running threads on cores > RTE_MAX_LCORE.
    - Userspace datapath:
      * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which
        restricts a flow dump to a single PMD thread if set.
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index b7d577870d..5bc996b665 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -39,7 +39,13 @@ dpdk_init(const struct smap *ovs_other_config)
 }
 
 void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_init_thread_context(unsigned cpu OVS_UNUSED)
+{
+    /* Nothing */
+}
+
+void
+dpdk_uninit_thread_context(void)
 {
     /* Nothing */
 }
diff --git a/lib/dpdk-unixctl.man b/lib/dpdk-unixctl.man
index 2d6d576f24..c28bb4ef20 100644
--- a/lib/dpdk-unixctl.man
+++ b/lib/dpdk-unixctl.man
@@ -10,5 +10,7 @@ list of words separated by spaces: a word can be either a logging \fBlevel\fR
 \fBnotice\fR, \fBinfo\fR or \fBdebug\fR) or a \fBpattern\fR matching DPDK
 components (see \fBdpdk/log-list\fR command on \fBovs\-appctl\fR(8)) separated
 by a colon from the logging \fBlevel\fR to apply.
+.IP "\fBdpdk/lcore-list\fR"
+Lists the DPDK lcores and their cpu affinity.
 .RE
 .
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 319540394b..defac3f7de 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+#ifndef ALLOW_EXPERIMENTAL_API
+#define ALLOW_EXPERIMENTAL_API
+#endif
+
 #include <config.h>
 #include "dpdk.h"
 
@@ -502,6 +506,12 @@ dpdk_init__(const struct smap *ovs_other_config)
         return false;
     }
 
+    if (!rte_mp_disable()) {
+        VLOG_EMER("Could not disable multiprocess, DPDK won't be available.");
+        rte_eal_cleanup();
+        return false;
+    }
+
     if (VLOG_IS_DBG_ENABLED()) {
         size_t size;
         char *response = NULL;
@@ -525,6 +535,8 @@ dpdk_init__(const struct smap *ovs_other_config)
                              dpdk_unixctl_mem_stream, rte_log_dump);
     unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0,
                              INT_MAX, dpdk_unixctl_log_set, NULL);
+    unixctl_command_register("dpdk/lcores-list", "", 0, 0,
+                             dpdk_unixctl_mem_stream, rte_lcore_dump);
 
     /* We are called from the main thread here */
     RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
@@ -601,11 +613,37 @@ dpdk_available(void)
 }
 
 void
-dpdk_set_lcore_id(unsigned cpu)
+dpdk_init_thread_context(unsigned cpu)
 {
     /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
     ovs_assert(cpu != NON_PMD_CORE_ID);
-    RTE_PER_LCORE(_lcore_id) = cpu;
+
+    if (!dpdk_available()) {
+        return;
+    }
+
+    if (rte_thread_register() < 0) {
+        VLOG_WARN("This OVS pmd thread will share resources with the non-pmd "
+                  "thread: %s.", rte_strerror(rte_errno));
+    } else {
+        VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id());
+    }
+}
+
+void
+dpdk_uninit_thread_context(void)
+{
+    unsigned int lcore_id;
+
+    if (!dpdk_available()) {
+        return;
+    }
+
+    lcore_id = rte_lcore_id();
+    rte_thread_unregister();
+    if (lcore_id != LCORE_ID_ANY) {
+        VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
+    }
 }
 
 void
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 445a51d065..1bd16b31db 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -36,7 +36,8 @@ struct smap;
 struct ovsrec_open_vswitch;
 
 void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+void dpdk_init_thread_context(unsigned cpu);
+void dpdk_uninit_thread_context(void);
 const char *dpdk_get_vhost_sock_dir(void);
 bool dpdk_vhost_iommu_enabled(void);
 bool dpdk_vhost_postcopy_enabled(void);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 300861ca59..da9e1e8fcf 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5926,7 +5926,7 @@ pmd_thread_main(void *f_)
     /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
     ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
     ovs_numa_thread_setaffinity_core(pmd->core_id);
-    dpdk_set_lcore_id(pmd->core_id);
+    dpdk_init_thread_context(pmd->core_id);
     poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
     dfc_cache_init(&pmd->flow_cache);
     pmd_alloc_static_tx_qid(pmd);
@@ -6061,6 +6061,7 @@ reload:
     dfc_cache_uninit(&pmd->flow_cache);
     free(poll_list);
     pmd_free_cached_ports(pmd);
+    dpdk_uninit_thread_context();
     return NULL;
 }
 
-- 
2.23.0



More information about the dev mailing list