[ovs-dev] [PATCH 00/17] DPDK/pmd reconfiguration refactor and bugfixes

Ilya Maximets i.maximets at samsung.com
Fri Nov 25 14:19:46 UTC 2016


Sorry, CC to list.

On 25.11.2016 17:15, Ilya Maximets wrote:
Hi, Daniele.

I've prepared some incremental changes for this path-set (only very
simple tests performed). Mostly for the largest patch.
I hope, you will find some of this changes useful.
I'll continue to test and review this path-set.

---
 lib/dpif-netdev.c | 65 ++++++++++++++++++++++++-------------------------------
 lib/ovs-numa.c    | 23 ++++++--------------
 lib/ovs-numa.h    |  1 +
 3 files changed, 36 insertions(+), 53 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 8fad4f3..31c5b5a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3067,11 +3067,11 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
                 if (!pmd) {
                     VLOG_WARN("There is no PMD thread on core %d. Queue "
                               "%d on port \'%s\' will not be polled.",
-                              port->rxqs[qid].core_id, qid,
-                              netdev_get_name(port->netdev));
+                              q->core_id, qid, netdev_get_name(port->netdev));
                 } else {
                     q->pmd = pmd;
                     pmd->isolated = true;
+                    dp_netdev_pmd_unref(pmd);
                 }
             } else if (!pinned && q->core_id == RXQ_CORE_UNPINNED) {
                 if (!numa) {
@@ -3107,31 +3107,31 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
         pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS);
     }
 
-    /* Check for unwanted pmd threads */
-    CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
-        if (pmd->core_id != NON_PMD_CORE_ID &&
-            !ovs_numa_dump_contains_core(pmd_cores,
-                                         pmd->numa_id,
-                                         pmd->core_id)) {
-            changed = true;
-        }
-    }
-
-    /* Check for required new pmd threads */
-    FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
-        pmd = dp_netdev_get_pmd(dp, core->core_id);
-        if (pmd) {
-            dp_netdev_pmd_unref(pmd);
-            continue;
-        }
+    /* Check for changed configuration */
+    if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) {
         changed = true;
+    } else {
+        CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
+            if (pmd->core_id != NON_PMD_CORE_ID
+                && !ovs_numa_dump_contains_core(pmd_cores,
+                                                pmd->numa_id,
+                                                pmd->core_id)) {
+                changed = true;
+                break;
+            }
+        }
     }
 
     /* Destroy the old and recreate the new pmd threads.  We don't perform an
      * incremental update because we would have to adjust 'static_tx_qid'. */
     if (changed) {
-        struct ovs_numa_dump *all_numas;
-        struct ovs_numa_info *numa;
+        uint32_t *n_threads_on_numa;
+        int i, n_numas = ovs_numa_get_n_numas();
+
+        ovs_assert(n_numas != OVS_NUMA_UNSPEC);
+
+        /* This is not expensive because n_numas <= MAX_NUMA_NODES */
+        n_threads_on_numa = xzalloc(n_numas * sizeof *n_threads_on_numa);
 
         /* Do not destroy the non pmd thread. */
         dp_netdev_destroy_all_pmds(dp, false);
@@ -3141,26 +3141,17 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
             dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
 
             pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
+            n_threads_on_numa[core->numa_id]++;
         }
 
         /* Log the number of pmd threads per numa node. */
-        all_numas = ovs_numa_dump_n_cores_per_numa(1);
-
-        FOR_EACH_CORE_ON_DUMP(numa, all_numas) {
-            int n = 0;
-
-            FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
-                if (core->numa_id == numa->numa_id) {
-                    n++;
-                }
-            }
-
-            if (n) {
+        for (i = 0; i < n_numas; i++) {
+            if (n_threads_on_numa[i]) {
                 VLOG_INFO("Created %d pmd threads on numa node %d",
-                          n, numa->numa_id);
+                          n_threads_on_numa[i], i);
             }
         }
-        ovs_numa_dump_destroy(all_numas);
+        free(n_threads_on_numa);
     }
 
     ovs_numa_dump_destroy(pmd_cores);
@@ -3321,7 +3312,7 @@ reconfigure_datapath(struct dp_netdev *dp)
 
             if (q->pmd) {
                 ovs_mutex_lock(&q->pmd->port_mutex);
-                dp_netdev_add_rxq_to_pmd(q->pmd, &port->rxqs[qid]);
+                dp_netdev_add_rxq_to_pmd(q->pmd, q);
                 ovs_mutex_unlock(&q->pmd->port_mutex);
             }
         }
@@ -3728,7 +3719,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
     /* All flows (including their dpcls_rules) have been deleted already */
     CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
         dpcls_destroy(cls);
-        free(cls);
+        ovsrcu_postpone(free, cls);
     }
     cmap_destroy(&pmd->classifiers);
     cmap_destroy(&pmd->flow_table);
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index 6599542..b7c305f 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -406,21 +406,6 @@ ovs_numa_dump_cores_on_numa(int numa_id)
     return dump;
 }
 
-static struct cpu_core *
-get_cpu_core(int core_id)
-{
-    struct cpu_core *core;
-
-    HMAP_FOR_EACH_WITH_HASH(core, hmap_node, hash_int(core_id, 0),
-                            &all_cpu_cores) {
-        if (core->core_id == core_id) {
-            return core;
-        }
-    }
-
-    return NULL;
-}
-
 struct ovs_numa_dump *
 ovs_numa_dump_cores_with_cmask(const char *cmask)
 {
@@ -444,7 +429,7 @@ ovs_numa_dump_cores_with_cmask(const char *cmask)
 
         for (j = 0; j < 4; j++) {
             if ((bin >> j) & 0x1) {
-                struct cpu_core *core = get_cpu_core(core_id);
+                struct cpu_core *core = get_core_by_core_id(core_id);
 
                 if (core) {
                     struct ovs_numa_info *info = xmalloc(sizeof *info);
@@ -508,6 +493,12 @@ ovs_numa_dump_contains_core(const struct ovs_numa_dump *dump,
     return false;
 }
 
+size_t
+ovs_numa_dump_count(struct ovs_numa_dump *dump)
+{
+    return hmap_count(&dump->dump);
+}
+
 void
 ovs_numa_dump_destroy(struct ovs_numa_dump *dump)
 {
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index 89b6a30..9c6f08f 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -49,6 +49,7 @@ int ovs_numa_get_n_cores_on_numa(int numa_id);
 struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id);
 struct ovs_numa_dump *ovs_numa_dump_cores_with_cmask(const char *cmask);
 struct ovs_numa_dump *ovs_numa_dump_n_cores_per_numa(int n);
+size_t ovs_numa_dump_count(struct ovs_numa_dump *);
 bool ovs_numa_dump_contains_core(const struct ovs_numa_dump *,
                                  int numa_id, unsigned core_id);
 void ovs_numa_dump_destroy(struct ovs_numa_dump *);
--


Best regards, Ilya Maximets.

> On 16.11.2016 03:45, diproiettod at vmware.com (Daniele Di Proietto) wrote:
>> The first two commits of the series are trivial bugfixes for dpif-netdev.
>>
>> Then the series fixes a long standing bug that caused a crash when the
>> admin link state of a port is changed while traffic is flowing.
>>
>> The next part makes use of reconfiguration for port add: this makes
>> the operation twice as fast and reduce some code duplication.  This part
>> conflicts with the port naming change, so I'm willing to postpone it, unless
>> we find it to be useful for the port naming change.
>>
>> The rest of the series refactors a lot of code if dpif-netdev:
>>
>> * We no longer start pmd threads on demand for each numa node.  This made
>>   the code very complicated and introduced a lot of bugs.
>> * The pmd threads state is now internal to dpif-netdev and it's not stored in
>>   ovs-numa.
>> * There's now a single function that handles pmd threads/ports changes: this
>>   reduces code duplication and makes port reconfiguration faster, as we don't
>>   have to bring down the whole datapath.
>>
>> Daniele Di Proietto (17):
>>   dpif-netdev: Fix memory leak.
>>   dpif-netdev: Take non_pmd_mutex to access tx cached ports.
>>   dpif-netdev: Don't try to output on a device without txqs.
>>   netdev-dpdk: Don't call rte_dev_stop() in update_flags().
>>   netdev-dpdk: Start the device only once on port-add.
>>   netdev-dpdk: Refactor construct and destruct.
>>   dpif-netdev: Use a boolean instead of pmd->port_seq.
>>   dpif-netdev: Block pmd threads if there are no ports.
>>   dpif-netdev: Create pmd threads for every numa node.
>>   dpif-netdev: Make 'static_tx_qid' const.
>>   dpctl: Avoid making assumptions on pmd threads.
>>   ovs-numa: New ovs_numa_dump_contains_core() function.
>>   ovs-numa: Add new dump types.
>>   dpif-netdev: core_id shouldn't be negative.
>>   dpif-netdev: Use hmap for poll_list in pmd threads.
>>   dpif-netdev: Centralized threads and queues handling code.
>>   ovs-numa: Remove unused functions.
>>
>>  lib/dpctl.c       |  107 +---
>>  lib/dpif-netdev.c | 1429 ++++++++++++++++++++++++++++-------------------------
>>  lib/dpif.c        |    6 +-
>>  lib/dpif.h        |   12 +-
>>  lib/netdev-dpdk.c |  173 +++----
>>  lib/netdev.c      |   21 +-
>>  lib/netdev.h      |    1 +
>>  lib/ovs-numa.c    |  227 +++------
>>  lib/ovs-numa.h    |   21 +-
>>  tests/pmd.at      |   49 +-
>>  vswitchd/bridge.c |    2 +
>>  11 files changed, 1030 insertions(+), 1018 deletions(-)
>>


More information about the dev mailing list