[ovs-dev] [PATCH v2 1/3] dpif-netdev: Incremental addition/deletion of PMD threads.
Kavanagh, Mark B
mark.b.kavanagh at intel.com
Thu Jun 1 14:30:20 UTC 2017
>From: ovs-dev-bounces at openvswitch.org [mailto:ovs-dev-bounces at openvswitch.org] On Behalf Of
>Ilya Maximets
>Sent: Tuesday, May 30, 2017 3:12 PM
>To: dev at openvswitch.org; Darrell Ball <dball at vmware.com>
>Cc: Heetae Ahn <heetae82.ahn at samsung.com>; Ilya Maximets <i.maximets at samsung.com>
>Subject: [ovs-dev] [PATCH v2 1/3] dpif-netdev: Incremental addition/deletion of PMD threads.
>
>Currently, change of 'pmd-cpu-mask' is very heavy operation.
>It requires destroying of all the PMD threads and creating
>them back. After that, all the threads will sleep until
>ports' redistribution finished.
>
>This patch adds ability to not stop the datapath while
>adjusting number/placement of PMD threads. All not affected
>threads will forward traffic without any additional latencies.
>
>id-pool created for static tx queue ids to keep them sequential
>in a flexible way. non-PMD thread will always have
>static_tx_qid = 0 as it was before.
>
>Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
Hi Ilya,
Patch LGTM - just once comment inline.
I conducted some light testing and observed the following:
- temporary dip in forwarding rate (~50%) when modifying pmd-cpu-mask (in previous implementation, forwarding stopped completely)
- occasional drop to zero in forwarding rate when modifying pmd-cpu-mask (e.g. change from '2' to '12')
- temporary drop of ~1mpps when adding/deleting port (similar to existing implementation)
I've also conducted the following sanity checks on the patch, and found no issues:
- checkpatch.py
- compilation with clang
- compilation with gcc
- sparse
- make check (no additional tests fail after application of this patch)
Tested-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
Thanks,
Mark
>---
> lib/dpif-netdev.c | 143 +++++++++++++++++++++++++++++++++++++++---------------
> tests/pmd.at | 2 +-
> 2 files changed, 105 insertions(+), 40 deletions(-)
>
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>index b50164b..596d133 100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -48,6 +48,7 @@
> #include "fat-rwlock.h"
> #include "flow.h"
> #include "hmapx.h"
>+#include "id-pool.h"
> #include "latch.h"
> #include "netdev.h"
> #include "netdev-vport.h"
>@@ -277,6 +278,9 @@ struct dp_netdev {
>
> /* Stores all 'struct dp_netdev_pmd_thread's. */
> struct cmap poll_threads;
>+ /* id pool for per thread static_tx_qid. */
>+ struct id_pool *tx_qid_pool;
>+ struct ovs_mutex tx_qid_pool_mutex;
>
> /* Protects the access of the 'struct dp_netdev_pmd_thread'
> * instance for non-pmd thread. */
>@@ -562,7 +566,7 @@ struct dp_netdev_pmd_thread {
> /* Queue id used by this pmd thread to send packets on all netdevs if
> * XPS disabled for this netdev. All static_tx_qid's are unique and less
> * than 'cmap_count(dp->poll_threads)'. */
>- const int static_tx_qid;
>+ uint32_t static_tx_qid;
>
> struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */
> /* List of rx queues to poll. */
>@@ -642,6 +646,8 @@ static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev
>*dp,
> unsigned core_id);
> static struct dp_netdev_pmd_thread *
> dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
>+static void dp_netdev_del_pmd(struct dp_netdev *dp,
>+ struct dp_netdev_pmd_thread *pmd);
> static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool non_pmd);
> static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
> static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
>@@ -1181,10 +1187,17 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
> atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
>
> cmap_init(&dp->poll_threads);
>+
>+ ovs_mutex_init(&dp->tx_qid_pool_mutex);
>+ /* We need 1 Tx queue for each possible core + 1 for non-PMD threads. */
>+ dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + 1);
>+
> ovs_mutex_init_recursive(&dp->non_pmd_mutex);
> ovsthread_key_create(&dp->per_pmd_key, NULL);
>
> ovs_mutex_lock(&dp->port_mutex);
>+ /* non-PMD will be created before all other threads and will
>+ * allocate static_tx_qid = 0. */
> dp_netdev_set_nonpmd(dp);
>
> error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class,
>@@ -1279,6 +1292,9 @@ dp_netdev_free(struct dp_netdev *dp)
> dp_netdev_destroy_all_pmds(dp, true);
> cmap_destroy(&dp->poll_threads);
>
>+ ovs_mutex_destroy(&dp->tx_qid_pool_mutex);
>+ id_pool_destroy(dp->tx_qid_pool);
>+
> ovs_mutex_destroy(&dp->non_pmd_mutex);
> ovsthread_key_delete(dp->per_pmd_key);
>
>@@ -3302,12 +3318,29 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp-
>>port_mutex)
> }
>
> static void
>+reload_affected_pmds(struct dp_netdev *dp)
>+{
>+ struct dp_netdev_pmd_thread *pmd;
>+
>+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>+ if (pmd->need_reload) {
>+ dp_netdev_reload_pmd__(pmd);
>+ pmd->need_reload = false;
>+ }
>+ }
>+}
>+
>+static void
> reconfigure_pmd_threads(struct dp_netdev *dp)
> OVS_REQUIRES(dp->port_mutex)
> {
> struct dp_netdev_pmd_thread *pmd;
> struct ovs_numa_dump *pmd_cores;
>+ struct ovs_numa_info_core *core;
>+ struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete);
>+ struct hmapx_node *node;
> bool changed = false;
>+ bool need_to_adjust_static_tx_qids = false;
>
> /* The pmd threads should be started only if there's a pmd port in the
> * datapath. If the user didn't provide any "pmd-cpu-mask", we start
>@@ -3320,40 +3353,61 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
> pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS);
> }
>
>- /* Check for changed configuration */
>- if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) {
>- changed = true;
>- } else {
>- CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>- if (pmd->core_id != NON_PMD_CORE_ID
>- && !ovs_numa_dump_contains_core(pmd_cores,
>- pmd->numa_id,
>- pmd->core_id)) {
>- changed = true;
>- break;
>- }
>+ /* We need to adjust 'static_tx_qid's only if we're reducing number of
>+ * PMD threads. Otherwise, new threads will allocate all the freed ids. */
Can you elaborate on why the static_tx_qids need to be adjusted - it's still not clear to me from the comment.
>+ if (ovs_numa_dump_count(pmd_cores) < cmap_count(&dp->poll_threads) - 1) {
>+ need_to_adjust_static_tx_qids = true;
>+ }
>+
>+ /* Check for unwanted pmd threads */
>+ CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
>+ if (pmd->core_id == NON_PMD_CORE_ID) {
>+ continue;
>+ }
>+ if (!ovs_numa_dump_contains_core(pmd_cores, pmd->numa_id,
>+ pmd->core_id)) {
>+ hmapx_add(&to_delete, pmd);
>+ } else if (need_to_adjust_static_tx_qids) {
>+ pmd->need_reload = true;
> }
> }
>
>- /* Destroy the old and recreate the new pmd threads. We don't perform an
>- * incremental update because we would have to adjust 'static_tx_qid'. */
>- if (changed) {
>- struct ovs_numa_info_core *core;
>- struct ovs_numa_info_numa *numa;
>+ HMAPX_FOR_EACH (node, &to_delete) {
>+ pmd = (struct dp_netdev_pmd_thread *) node->data;
>+ VLOG_INFO("PMD thread on numa_id: %d, core id: %2d destroyed.",
>+ pmd->numa_id, pmd->core_id);
>+ dp_netdev_del_pmd(dp, pmd);
>+ }
>+ changed = !hmapx_is_empty(&to_delete);
>+ hmapx_destroy(&to_delete);
>
>- /* Do not destroy the non pmd thread. */
>- dp_netdev_destroy_all_pmds(dp, false);
>- FOR_EACH_CORE_ON_DUMP (core, pmd_cores) {
>- struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>+ if (need_to_adjust_static_tx_qids) {
>+ /* 'static_tx_qid's are not sequential now.
>+ * Reload remaining threads to fix this. */
>+ reload_affected_pmds(dp);
>+ }
>
>+ /* Check for required new pmd threads */
>+ FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
>+ pmd = dp_netdev_get_pmd(dp, core->core_id);
>+ if (!pmd) {
>+ pmd = xzalloc(sizeof *pmd);
> dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
>-
> pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
>+ VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.",
>+ pmd->numa_id, pmd->core_id);
>+ changed = true;
>+ } else {
>+ dp_netdev_pmd_unref(pmd);
> }
>+ }
>+
>+ if (changed) {
>+ struct ovs_numa_info_numa *numa;
>
> /* Log the number of pmd threads per numa node. */
> FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) {
>- VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node %d",
>+ VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa node %d",
> numa->n_cores, numa->numa_id);
> }
> }
>@@ -3362,19 +3416,6 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
> }
>
> static void
>-reload_affected_pmds(struct dp_netdev *dp)
>-{
>- struct dp_netdev_pmd_thread *pmd;
>-
>- CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>- if (pmd->need_reload) {
>- dp_netdev_reload_pmd__(pmd);
>- pmd->need_reload = false;
>- }
>- }
>-}
>-
>-static void
> pmd_remove_stale_ports(struct dp_netdev *dp,
> struct dp_netdev_pmd_thread *pmd)
> OVS_EXCLUDED(pmd->port_mutex)
>@@ -3673,6 +3714,28 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
> }
> }
>
>+static void
>+pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
>+{
>+ ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
>+ if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) {
>+ VLOG_ABORT("static_tx_qid allocation failed for PMD on core %2d"
>+ ", numa_id %d.", pmd->core_id, pmd->numa_id);
>+ }
>+ ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
>+
>+ VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core %2d"
>+ ", numa_id %d.", pmd->static_tx_qid, pmd->core_id, pmd->numa_id);
>+}
>+
>+static void
>+pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
>+{
>+ ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
>+ id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid);
>+ ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
>+}
>+
> static int
> pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
> struct polled_queue **ppoll_list)
>@@ -3718,6 +3781,7 @@ pmd_thread_main(void *f_)
> dpdk_set_lcore_id(pmd->core_id);
> poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
> reload:
>+ pmd_alloc_static_tx_qid(pmd);
> emc_cache_init(&pmd->flow_cache);
>
> /* List port/core affinity */
>@@ -3766,6 +3830,7 @@ reload:
> dp_netdev_pmd_reload_done(pmd);
>
> emc_cache_uninit(&pmd->flow_cache);
>+ pmd_free_static_tx_qid(pmd);
>
> if (!exiting) {
> goto reload;
>@@ -4176,8 +4241,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct
>dp_netdev *dp,
> pmd->numa_id = numa_id;
> pmd->need_reload = false;
>
>- *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp->poll_threads);
>-
> ovs_refcount_init(&pmd->ref_cnt);
> latch_init(&pmd->exit_latch);
> pmd->reload_seq = seq_create();
>@@ -4198,6 +4261,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct
>dp_netdev *dp,
> * actual thread created for NON_PMD_CORE_ID. */
> if (core_id == NON_PMD_CORE_ID) {
> emc_cache_init(&pmd->flow_cache);
>+ pmd_alloc_static_tx_qid(pmd);
> }
> cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
> hash_int(core_id, 0));
>@@ -4240,6 +4304,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread
>*pmd)
> ovs_mutex_lock(&dp->non_pmd_mutex);
> emc_cache_uninit(&pmd->flow_cache);
> pmd_free_cached_ports(pmd);
>+ pmd_free_static_tx_qid(pmd);
> ovs_mutex_unlock(&dp->non_pmd_mutex);
> } else {
> latch_set(&pmd->exit_latch);
>diff --git a/tests/pmd.at b/tests/pmd.at
>index 05755b3..39a3645 100644
>--- a/tests/pmd.at
>+++ b/tests/pmd.at
>@@ -38,7 +38,7 @@ dnl
> dnl Whaits for creation of 'n_threads' or at least 1 thread if $1 not
> dnl passed. Checking starts from line number 'line' in ovs-vswithd.log .
> m4_define([CHECK_PMD_THREADS_CREATED], [
>- PATTERN="Created [[0-9]]* pmd threads on numa node $2"
>+ PATTERN="There are [[0-9]]* pmd threads on numa node $2"
> line_st=$3
> if [[ -z "$line_st" ]]
> then
>--
>2.7.4
>
>_______________________________________________
>dev mailing list
>dev at openvswitch.org
>https://mail.openvswitch.org/mailman/listinfo/ovs-dev
More information about the dev
mailing list