[ovs-dev] [PATCH 00/17] DPDK/pmd reconfiguration refactor and bugfixes
Ilya Maximets
i.maximets at samsung.com
Fri Nov 25 14:19:46 UTC 2016
Sorry, CC to list.
On 25.11.2016 17:15, Ilya Maximets wrote:
Hi, Daniele.
I've prepared some incremental changes for this path-set (only very
simple tests performed). Mostly for the largest patch.
I hope, you will find some of this changes useful.
I'll continue to test and review this path-set.
---
lib/dpif-netdev.c | 65 ++++++++++++++++++++++++-------------------------------
lib/ovs-numa.c | 23 ++++++--------------
lib/ovs-numa.h | 1 +
3 files changed, 36 insertions(+), 53 deletions(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 8fad4f3..31c5b5a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3067,11 +3067,11 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
if (!pmd) {
VLOG_WARN("There is no PMD thread on core %d. Queue "
"%d on port \'%s\' will not be polled.",
- port->rxqs[qid].core_id, qid,
- netdev_get_name(port->netdev));
+ q->core_id, qid, netdev_get_name(port->netdev));
} else {
q->pmd = pmd;
pmd->isolated = true;
+ dp_netdev_pmd_unref(pmd);
}
} else if (!pinned && q->core_id == RXQ_CORE_UNPINNED) {
if (!numa) {
@@ -3107,31 +3107,31 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS);
}
- /* Check for unwanted pmd threads */
- CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
- if (pmd->core_id != NON_PMD_CORE_ID &&
- !ovs_numa_dump_contains_core(pmd_cores,
- pmd->numa_id,
- pmd->core_id)) {
- changed = true;
- }
- }
-
- /* Check for required new pmd threads */
- FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
- pmd = dp_netdev_get_pmd(dp, core->core_id);
- if (pmd) {
- dp_netdev_pmd_unref(pmd);
- continue;
- }
+ /* Check for changed configuration */
+ if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) {
changed = true;
+ } else {
+ CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
+ if (pmd->core_id != NON_PMD_CORE_ID
+ && !ovs_numa_dump_contains_core(pmd_cores,
+ pmd->numa_id,
+ pmd->core_id)) {
+ changed = true;
+ break;
+ }
+ }
}
/* Destroy the old and recreate the new pmd threads. We don't perform an
* incremental update because we would have to adjust 'static_tx_qid'. */
if (changed) {
- struct ovs_numa_dump *all_numas;
- struct ovs_numa_info *numa;
+ uint32_t *n_threads_on_numa;
+ int i, n_numas = ovs_numa_get_n_numas();
+
+ ovs_assert(n_numas != OVS_NUMA_UNSPEC);
+
+ /* This is not expensive because n_numas <= MAX_NUMA_NODES */
+ n_threads_on_numa = xzalloc(n_numas * sizeof *n_threads_on_numa);
/* Do not destroy the non pmd thread. */
dp_netdev_destroy_all_pmds(dp, false);
@@ -3141,26 +3141,17 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
+ n_threads_on_numa[core->numa_id]++;
}
/* Log the number of pmd threads per numa node. */
- all_numas = ovs_numa_dump_n_cores_per_numa(1);
-
- FOR_EACH_CORE_ON_DUMP(numa, all_numas) {
- int n = 0;
-
- FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
- if (core->numa_id == numa->numa_id) {
- n++;
- }
- }
-
- if (n) {
+ for (i = 0; i < n_numas; i++) {
+ if (n_threads_on_numa[i]) {
VLOG_INFO("Created %d pmd threads on numa node %d",
- n, numa->numa_id);
+ n_threads_on_numa[i], i);
}
}
- ovs_numa_dump_destroy(all_numas);
+ free(n_threads_on_numa);
}
ovs_numa_dump_destroy(pmd_cores);
@@ -3321,7 +3312,7 @@ reconfigure_datapath(struct dp_netdev *dp)
if (q->pmd) {
ovs_mutex_lock(&q->pmd->port_mutex);
- dp_netdev_add_rxq_to_pmd(q->pmd, &port->rxqs[qid]);
+ dp_netdev_add_rxq_to_pmd(q->pmd, q);
ovs_mutex_unlock(&q->pmd->port_mutex);
}
}
@@ -3728,7 +3719,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
/* All flows (including their dpcls_rules) have been deleted already */
CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
dpcls_destroy(cls);
- free(cls);
+ ovsrcu_postpone(free, cls);
}
cmap_destroy(&pmd->classifiers);
cmap_destroy(&pmd->flow_table);
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index 6599542..b7c305f 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -406,21 +406,6 @@ ovs_numa_dump_cores_on_numa(int numa_id)
return dump;
}
-static struct cpu_core *
-get_cpu_core(int core_id)
-{
- struct cpu_core *core;
-
- HMAP_FOR_EACH_WITH_HASH(core, hmap_node, hash_int(core_id, 0),
- &all_cpu_cores) {
- if (core->core_id == core_id) {
- return core;
- }
- }
-
- return NULL;
-}
-
struct ovs_numa_dump *
ovs_numa_dump_cores_with_cmask(const char *cmask)
{
@@ -444,7 +429,7 @@ ovs_numa_dump_cores_with_cmask(const char *cmask)
for (j = 0; j < 4; j++) {
if ((bin >> j) & 0x1) {
- struct cpu_core *core = get_cpu_core(core_id);
+ struct cpu_core *core = get_core_by_core_id(core_id);
if (core) {
struct ovs_numa_info *info = xmalloc(sizeof *info);
@@ -508,6 +493,12 @@ ovs_numa_dump_contains_core(const struct ovs_numa_dump *dump,
return false;
}
+size_t
+ovs_numa_dump_count(struct ovs_numa_dump *dump)
+{
+ return hmap_count(&dump->dump);
+}
+
void
ovs_numa_dump_destroy(struct ovs_numa_dump *dump)
{
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index 89b6a30..9c6f08f 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -49,6 +49,7 @@ int ovs_numa_get_n_cores_on_numa(int numa_id);
struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id);
struct ovs_numa_dump *ovs_numa_dump_cores_with_cmask(const char *cmask);
struct ovs_numa_dump *ovs_numa_dump_n_cores_per_numa(int n);
+size_t ovs_numa_dump_count(struct ovs_numa_dump *);
bool ovs_numa_dump_contains_core(const struct ovs_numa_dump *,
int numa_id, unsigned core_id);
void ovs_numa_dump_destroy(struct ovs_numa_dump *);
--
Best regards, Ilya Maximets.
> On 16.11.2016 03:45, diproiettod at vmware.com (Daniele Di Proietto) wrote:
>> The first two commits of the series are trivial bugfixes for dpif-netdev.
>>
>> Then the series fixes a long standing bug that caused a crash when the
>> admin link state of a port is changed while traffic is flowing.
>>
>> The next part makes use of reconfiguration for port add: this makes
>> the operation twice as fast and reduce some code duplication. This part
>> conflicts with the port naming change, so I'm willing to postpone it, unless
>> we find it to be useful for the port naming change.
>>
>> The rest of the series refactors a lot of code if dpif-netdev:
>>
>> * We no longer start pmd threads on demand for each numa node. This made
>> the code very complicated and introduced a lot of bugs.
>> * The pmd threads state is now internal to dpif-netdev and it's not stored in
>> ovs-numa.
>> * There's now a single function that handles pmd threads/ports changes: this
>> reduces code duplication and makes port reconfiguration faster, as we don't
>> have to bring down the whole datapath.
>>
>> Daniele Di Proietto (17):
>> dpif-netdev: Fix memory leak.
>> dpif-netdev: Take non_pmd_mutex to access tx cached ports.
>> dpif-netdev: Don't try to output on a device without txqs.
>> netdev-dpdk: Don't call rte_dev_stop() in update_flags().
>> netdev-dpdk: Start the device only once on port-add.
>> netdev-dpdk: Refactor construct and destruct.
>> dpif-netdev: Use a boolean instead of pmd->port_seq.
>> dpif-netdev: Block pmd threads if there are no ports.
>> dpif-netdev: Create pmd threads for every numa node.
>> dpif-netdev: Make 'static_tx_qid' const.
>> dpctl: Avoid making assumptions on pmd threads.
>> ovs-numa: New ovs_numa_dump_contains_core() function.
>> ovs-numa: Add new dump types.
>> dpif-netdev: core_id shouldn't be negative.
>> dpif-netdev: Use hmap for poll_list in pmd threads.
>> dpif-netdev: Centralized threads and queues handling code.
>> ovs-numa: Remove unused functions.
>>
>> lib/dpctl.c | 107 +---
>> lib/dpif-netdev.c | 1429 ++++++++++++++++++++++++++++-------------------------
>> lib/dpif.c | 6 +-
>> lib/dpif.h | 12 +-
>> lib/netdev-dpdk.c | 173 +++----
>> lib/netdev.c | 21 +-
>> lib/netdev.h | 1 +
>> lib/ovs-numa.c | 227 +++------
>> lib/ovs-numa.h | 21 +-
>> tests/pmd.at | 49 +-
>> vswitchd/bridge.c | 2 +
>> 11 files changed, 1030 insertions(+), 1018 deletions(-)
>>
More information about the dev
mailing list