[ovs-dev] [PATCH dpdk-latest v4] dpdk: Support running PMD threads on any core.
David Marchand
david.marchand at redhat.com
Mon Nov 2 13:10:45 UTC 2020
DPDK 20.08 introduced a new API that associates a non-EAL thread to a free
lcore. This new API does not change the thread characteristics (like CPU
affinity).
Using this new API, there is no assumption on lcore X running on cpu X
anymore which leaves OVS free from running its PMD thread on any cpu.
DPDK still limits the number of lcores to RTE_MAX_LCORE (128 on x86_64)
which should be enough for OVS pmd threads (hopefully).
DPDK lcore/OVS pmd threads mapping are logged at threads creation and
destruction.
A new command is added to help get DPDK point of view of the DPDK lcores:
$ ovs-appctl dpdk/lcores-list
lcore 0, socket 0, role RTE, cpuset 0
lcore 1, socket 0, role NON_EAL, cpuset 1
lcore 2, socket 0, role NON_EAL, cpuset 15
Signed-off-by: David Marchand <david.marchand at redhat.com>
---
Changes since v3:
- rebased on current HEAD,
- switched back to simple warning rather than abort when registering a
thread fails,
Changes since v2:
- introduced a new api in DPDK 20.08 (still being discussed), inbox thread at
http://inbox.dpdk.org/dev/20200610144506.30505-1-david.marchand@redhat.com/T/#t
- this current patch depends on a patch on master I sent:
https://patchwork.ozlabs.org/project/openvswitch/patch/20200626122738.28163-1-david.marchand@redhat.com/
- dropped 'dpdk-lcore-mask' compat handling,
Changes since v1:
- rewired existing configuration 'dpdk-lcore-mask' to use --lcores,
- switched to a bitmap to track lcores,
- added a command to dump current mapping (Flavio): used an experimental
API to get DPDK lcores cpuset since it is the most reliable/portable
information,
- used the same code for the logs when starting DPDK/PMD threads,
- addressed Ilya comments,
---
lib/dpdk-stub.c | 8 +++++++-
lib/dpdk.c | 36 ++++++++++++++++++++++++++++++++++--
lib/dpdk.h | 3 ++-
lib/dpif-netdev.c | 3 ++-
4 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index b7d577870d..5bc996b665 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -39,7 +39,13 @@ dpdk_init(const struct smap *ovs_other_config)
}
void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_init_thread_context(unsigned cpu OVS_UNUSED)
+{
+ /* Nothing */
+}
+
+void
+dpdk_uninit_thread_context(void)
{
/* Nothing */
}
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 319540394b..7d3c0637c9 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -14,6 +14,10 @@
* limitations under the License.
*/
+#ifndef ALLOW_EXPERIMENTAL_API
+#define ALLOW_EXPERIMENTAL_API
+#endif
+
#include <config.h>
#include "dpdk.h"
@@ -525,6 +529,8 @@ dpdk_init__(const struct smap *ovs_other_config)
dpdk_unixctl_mem_stream, rte_log_dump);
unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0,
INT_MAX, dpdk_unixctl_log_set, NULL);
+ unixctl_command_register("dpdk/lcores-list", "", 0, 0,
+ dpdk_unixctl_mem_stream, rte_lcore_dump);
/* We are called from the main thread here */
RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
@@ -601,11 +607,37 @@ dpdk_available(void)
}
void
-dpdk_set_lcore_id(unsigned cpu)
+dpdk_init_thread_context(unsigned cpu)
{
/* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
ovs_assert(cpu != NON_PMD_CORE_ID);
- RTE_PER_LCORE(_lcore_id) = cpu;
+
+ if (!dpdk_available()) {
+ return;
+ }
+
+ if (rte_thread_register() < 0) {
+ VLOG_WARN("This OVS pmd thread will share resources with the non-pmd "
+ "thread: %s.", rte_strerror(rte_errno));
+ } else {
+ VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id());
+ }
+}
+
+void
+dpdk_uninit_thread_context(void)
+{
+ unsigned int lcore_id;
+
+ if (!dpdk_available()) {
+ return;
+ }
+
+ lcore_id = rte_lcore_id();
+ rte_thread_unregister();
+ if (lcore_id != LCORE_ID_ANY) {
+ VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
+ }
}
void
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 445a51d065..1bd16b31db 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -36,7 +36,8 @@ struct smap;
struct ovsrec_open_vswitch;
void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+void dpdk_init_thread_context(unsigned cpu);
+void dpdk_uninit_thread_context(void);
const char *dpdk_get_vhost_sock_dir(void);
bool dpdk_vhost_iommu_enabled(void);
bool dpdk_vhost_postcopy_enabled(void);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 300861ca59..da9e1e8fcf 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5926,7 +5926,7 @@ pmd_thread_main(void *f_)
/* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
ovs_numa_thread_setaffinity_core(pmd->core_id);
- dpdk_set_lcore_id(pmd->core_id);
+ dpdk_init_thread_context(pmd->core_id);
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
dfc_cache_init(&pmd->flow_cache);
pmd_alloc_static_tx_qid(pmd);
@@ -6061,6 +6061,7 @@ reload:
dfc_cache_uninit(&pmd->flow_cache);
free(poll_list);
pmd_free_cached_ports(pmd);
+ dpdk_uninit_thread_context();
return NULL;
}
--
2.23.0
More information about the dev
mailing list