[ovs-dev] [PATCH v3] dpif-netdev: Report overhead busy cycles per pmd.
David Marchand
david.marchand at redhat.com
Fri Jul 16 16:21:16 UTC 2021
Users complained that per rxq pmd usage was confusing: summing those
values per pmd would never reach 100% even if increasing traffic load
beyond pmd capacity.
This is because the dpif-netdev/pmd-rxq-show command only reports "pure"
rxq cycles while some cycles are used in the pmd mainloop and adds up to
the total pmd load.
dpif-netdev/pmd-stats-show does report per pmd load usage.
This load is measured since the last dpif-netdev/pmd-stats-clear call.
On the other hand, the per rxq pmd usage reflects the pmd load on a 10s
sliding window which makes it non trivial to correlate.
Gather per pmd busy cycles with the same periodicity and report the
difference as overhead in dpif-netdev/pmd-rxq-show so that we have all
info in a single command.
Example:
$ ovs-appctl dpif-netdev/pmd-rxq-show
pmd thread numa_id 1 core_id 3:
isolated : true
port: dpdk0 queue-id: 0 (enabled) pmd usage: 90 %
overhead: 4 %
pmd thread numa_id 1 core_id 5:
isolated : false
port: vhost0 queue-id: 0 (enabled) pmd usage: 0 %
port: vhost1 queue-id: 0 (enabled) pmd usage: 93 %
port: vhost2 queue-id: 0 (enabled) pmd usage: 0 %
port: vhost6 queue-id: 0 (enabled) pmd usage: 0 %
overhead: 6 %
pmd thread numa_id 1 core_id 31:
isolated : true
port: dpdk1 queue-id: 0 (enabled) pmd usage: 86 %
overhead: 4 %
pmd thread numa_id 1 core_id 33:
isolated : false
port: vhost3 queue-id: 0 (enabled) pmd usage: 0 %
port: vhost4 queue-id: 0 (enabled) pmd usage: 0 %
port: vhost5 queue-id: 0 (enabled) pmd usage: 92 %
port: vhost7 queue-id: 0 (enabled) pmd usage: 0 %
overhead: 7 %
Signed-off-by: David Marchand <david.marchand at redhat.com>
---
Changes since v2:
- rebased on master, dynamically allocating added stats array to avoid
exposing internal dpif-netdev array size,
- fixed UT on FreeBSD,
- rebased on top of Kevin series to ease merging wrt UT update,
- GHA result: https://github.com/david-marchand/ovs/runs/3087888172
Changes since v1:
- fixed unit test and documentation update,
- moved documentation update under pmd-rxq-show command description,
- updated commitlog,
- renamed variables for better readability,
- avoided reporting a N/A overhead for idle PMD,
- reset overhead stats on PMD reconfigure,
---
Documentation/topics/dpdk/pmd.rst | 5 ++
lib/dpif-netdev-private-thread.h | 7 +-
lib/dpif-netdev.c | 105 +++++++++++++++++++++---------
tests/pmd.at | 9 +++
4 files changed, 93 insertions(+), 33 deletions(-)
diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst
index 30040d7033..95fa7af128 100644
--- a/Documentation/topics/dpdk/pmd.rst
+++ b/Documentation/topics/dpdk/pmd.rst
@@ -195,6 +195,11 @@ queue::
due to traffic pattern or reconfig changes, will take one minute to be fully
reflected in the stats.
+.. versionchanged:: 2.16.0
+
+ A ``overhead`` statistics is shown per PMD: it represents the number of
+ cycles inherently consumed by the OVS PMD processing loop.
+
Rx queue to PMD assignment takes place whenever there are configuration changes
or can be triggered by using::
diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h
index a4c092b692..a782d9678a 100644
--- a/lib/dpif-netdev-private-thread.h
+++ b/lib/dpif-netdev-private-thread.h
@@ -99,13 +99,18 @@ struct dp_netdev_pmd_thread {
long long int next_optimization;
/* End of the next time interval for which processing cycles
are stored for each polled rxq. */
- long long int rxq_next_cycle_store;
+ long long int next_cycle_store;
/* Last interval timestamp. */
uint64_t intrvl_tsc_prev;
/* Last interval cycles. */
atomic_ullong intrvl_cycles;
+ /* Write index for 'busy_cycles_intrvl'. */
+ unsigned int intrvl_idx;
+ /* Busy cycles in last PMD_INTERVAL_MAX intervals. */
+ atomic_ullong *busy_cycles_intrvl;
+
/* Current context of the PMD thread. */
struct dp_netdev_pmd_thread_ctx ctx;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 05d414ad96..bb27c79857 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -155,11 +155,11 @@ static struct odp_support dp_netdev_support = {
/* Time in microseconds of the interval in which rxq processing cycles used
* in rxq to pmd assignments is measured and stored. */
-#define PMD_RXQ_INTERVAL_LEN 10000000LL
+#define PMD_INTERVAL_LEN 10000000LL
/* Number of intervals for which cycles are stored
* and used during rxq to pmd assignment. */
-#define PMD_RXQ_INTERVAL_MAX 6
+#define PMD_INTERVAL_MAX 6
/* Time in microseconds to try RCU quiescing. */
#define PMD_RCU_QUIESCE_INTERVAL 10000LL
@@ -379,9 +379,9 @@ struct dp_netdev_rxq {
/* Counters of cycles spent successfully polling and processing pkts. */
atomic_ullong cycles[RXQ_N_CYCLES];
- /* We store PMD_RXQ_INTERVAL_MAX intervals of data for an rxq and then
+ /* We store PMD_INTERVAL_MAX intervals of data for an rxq and then
sum them to yield the cycles used for an rxq. */
- atomic_ullong cycles_intrvl[PMD_RXQ_INTERVAL_MAX];
+ atomic_ullong cycles_intrvl[PMD_INTERVAL_MAX];
};
/* A port in a netdev-based datapath. */
@@ -791,6 +791,8 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
struct rxq_poll *list;
size_t n_rxq;
uint64_t total_cycles = 0;
+ uint64_t busy_cycles = 0;
+ uint64_t total_rxq_proc_cycles = 0;
ds_put_format(reply,
"pmd thread numa_id %d core_id %u:\n isolated : %s\n",
@@ -803,16 +805,27 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
/* Get the total pmd cycles for an interval. */
atomic_read_relaxed(&pmd->intrvl_cycles, &total_cycles);
/* Estimate the cycles to cover all intervals. */
- total_cycles *= PMD_RXQ_INTERVAL_MAX;
+ total_cycles *= PMD_INTERVAL_MAX;
+
+ for (int j = 0; j < PMD_INTERVAL_MAX; j++) {
+ uint64_t cycles;
+
+ atomic_read_relaxed(&pmd->busy_cycles_intrvl[j], &cycles);
+ busy_cycles += cycles;
+ }
+ if (busy_cycles > total_cycles) {
+ busy_cycles = total_cycles;
+ }
for (int i = 0; i < n_rxq; i++) {
struct dp_netdev_rxq *rxq = list[i].rxq;
const char *name = netdev_rxq_get_name(rxq->rx);
- uint64_t proc_cycles = 0;
+ uint64_t rxq_proc_cycles = 0;
- for (int j = 0; j < PMD_RXQ_INTERVAL_MAX; j++) {
- proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, j);
+ for (int j = 0; j < PMD_INTERVAL_MAX; j++) {
+ rxq_proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, j);
}
+ total_rxq_proc_cycles += rxq_proc_cycles;
ds_put_format(reply, " port: %-16s queue-id: %2d", name,
netdev_rxq_get_queue_id(list[i].rxq->rx));
ds_put_format(reply, " %s", netdev_rxq_enabled(list[i].rxq->rx)
@@ -820,13 +833,30 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
ds_put_format(reply, " pmd usage: ");
if (total_cycles) {
ds_put_format(reply, "%2"PRIu64"",
- proc_cycles * 100 / total_cycles);
+ rxq_proc_cycles * 100 / total_cycles);
ds_put_cstr(reply, " %");
} else {
ds_put_format(reply, "%s", "NOT AVAIL");
}
ds_put_cstr(reply, "\n");
}
+
+ if (n_rxq > 0) {
+ ds_put_cstr(reply, " overhead: ");
+ if (total_cycles) {
+ uint64_t overhead_cycles = 0;
+
+ if (total_rxq_proc_cycles < busy_cycles) {
+ overhead_cycles = busy_cycles - total_rxq_proc_cycles;
+ }
+ ds_put_format(reply, "%2"PRIu64" %%",
+ overhead_cycles * 100 / total_cycles);
+ } else {
+ ds_put_cstr(reply, "NOT AVAIL");
+ }
+ ds_put_cstr(reply, "\n");
+ }
+
ovs_mutex_unlock(&pmd->port_mutex);
free(list);
}
@@ -4521,7 +4551,7 @@ static void
dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
unsigned long long cycles)
{
- unsigned int idx = rx->intrvl_idx++ % PMD_RXQ_INTERVAL_MAX;
+ unsigned int idx = rx->intrvl_idx++ % PMD_INTERVAL_MAX;
atomic_store_relaxed(&rx->cycles_intrvl[idx], cycles);
}
@@ -4978,7 +5008,7 @@ sched_numa_list_assignments(struct sched_numa_list *numa_list,
struct sched_pmd *sched_pmd;
uint64_t proc_cycles = 0;
- for (int i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
+ for (int i = 0; i < PMD_INTERVAL_MAX; i++) {
proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, i);
}
@@ -5238,7 +5268,7 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list,
uint64_t cycle_hist = 0;
/* Sum the queue intervals and store the cycle history. */
- for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
+ for (unsigned i = 0; i < PMD_INTERVAL_MAX; i++) {
cycle_hist += dp_netdev_rxq_get_intrvl_cycles(rxq, i);
}
dp_netdev_rxq_set_cycles(rxq, RXQ_CYCLES_PROC_HIST,
@@ -5418,7 +5448,7 @@ sched_numa_list_variance(struct sched_numa_list *numa_list)
if (total_cycles) {
/* Estimate the cycles to cover all intervals. */
- total_cycles *= PMD_RXQ_INTERVAL_MAX;
+ total_cycles *= PMD_INTERVAL_MAX;
percent_busy[n_proc++] = (sched_pmd->pmd_proc_cycles * 100)
/ total_cycles;
} else {
@@ -5935,7 +5965,7 @@ dpif_netdev_run(struct dpif *dpif)
pmd_alb->rebalance_poll_timer = now;
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
if (atomic_count_get(&pmd->pmd_overloaded) >=
- PMD_RXQ_INTERVAL_MAX) {
+ PMD_INTERVAL_MAX) {
pmd_rebalance = true;
break;
}
@@ -6145,6 +6175,10 @@ reload:
pmd->intrvl_tsc_prev = 0;
atomic_store_relaxed(&pmd->intrvl_cycles, 0);
+ for (i = 0; i < PMD_INTERVAL_MAX; i++) {
+ atomic_store_relaxed(&pmd->busy_cycles_intrvl[i], 0);
+ }
+ pmd->intrvl_idx = 0;
cycles_counter_update(s);
pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
@@ -6677,7 +6711,9 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
pmd_thread_ctx_time_update(pmd);
pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
- pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
+ pmd->next_cycle_store = pmd->ctx.now + PMD_INTERVAL_LEN;
+ pmd->busy_cycles_intrvl = xzalloc(PMD_INTERVAL_MAX *
+ sizeof *pmd->busy_cycles_intrvl);
hmap_init(&pmd->poll_list);
hmap_init(&pmd->tx_ports);
hmap_init(&pmd->tnl_port_cache);
@@ -6716,6 +6752,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
hmap_destroy(&pmd->tx_ports);
cmap_destroy(&pmd->tx_bonds);
hmap_destroy(&pmd->poll_list);
+ free(pmd->busy_cycles_intrvl);
/* All flows (including their dpcls_rules) have been deleted already */
CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
dpcls_destroy(cls);
@@ -8992,31 +9029,33 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
uint64_t tot_idle = 0, tot_proc = 0;
unsigned int pmd_load = 0;
- if (pmd->ctx.now > pmd->rxq_next_cycle_store) {
+ if (pmd->ctx.now > pmd->next_cycle_store) {
uint64_t curr_tsc;
uint8_t rebalance_load_trigger;
struct pmd_auto_lb *pmd_alb = &pmd->dp->pmd_alb;
- if (pmd_alb->is_enabled && !pmd->isolated
- && (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] >=
- pmd->prev_stats[PMD_CYCLES_ITER_IDLE])
- && (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] >=
- pmd->prev_stats[PMD_CYCLES_ITER_BUSY]))
- {
+ unsigned int idx;
+
+ if (pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] >=
+ pmd->prev_stats[PMD_CYCLES_ITER_IDLE] &&
+ pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] >=
+ pmd->prev_stats[PMD_CYCLES_ITER_BUSY]) {
tot_idle = pmd->perf_stats.counters.n[PMD_CYCLES_ITER_IDLE] -
pmd->prev_stats[PMD_CYCLES_ITER_IDLE];
tot_proc = pmd->perf_stats.counters.n[PMD_CYCLES_ITER_BUSY] -
pmd->prev_stats[PMD_CYCLES_ITER_BUSY];
- if (tot_proc) {
- pmd_load = ((tot_proc * 100) / (tot_idle + tot_proc));
- }
+ if (pmd_alb->is_enabled && !pmd->isolated) {
+ if (tot_proc) {
+ pmd_load = ((tot_proc * 100) / (tot_idle + tot_proc));
+ }
- atomic_read_relaxed(&pmd_alb->rebalance_load_thresh,
- &rebalance_load_trigger);
- if (pmd_load >= rebalance_load_trigger) {
- atomic_count_inc(&pmd->pmd_overloaded);
- } else {
- atomic_count_set(&pmd->pmd_overloaded, 0);
+ atomic_read_relaxed(&pmd_alb->rebalance_load_thresh,
+ &rebalance_load_trigger);
+ if (pmd_load >= rebalance_load_trigger) {
+ atomic_count_inc(&pmd->pmd_overloaded);
+ } else {
+ atomic_count_set(&pmd->pmd_overloaded, 0);
+ }
}
}
@@ -9039,9 +9078,11 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
atomic_store_relaxed(&pmd->intrvl_cycles,
curr_tsc - pmd->intrvl_tsc_prev);
}
+ idx = pmd->intrvl_idx++ % PMD_INTERVAL_MAX;
+ atomic_store_relaxed(&pmd->busy_cycles_intrvl[idx], tot_proc);
pmd->intrvl_tsc_prev = curr_tsc;
/* Start new measuring interval */
- pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
+ pmd->next_cycle_store = pmd->ctx.now + PMD_INTERVAL_LEN;
}
if (pmd->ctx.now > pmd->next_optimization) {
diff --git a/tests/pmd.at b/tests/pmd.at
index 08939bfef0..225d4ee3a4 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -73,6 +73,7 @@ AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | sed SED_NUMA_CORE_PATTERN], [0],
pmd thread numa_id <cleared> core_id <cleared>:
isolated : false
port: p0 queue-id: 0 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
])
AT_CHECK([ovs-appctl dpif/show | sed 's/\(tx_queues=\)[[0-9]]*/\1<cleared>/g'], [0], [dnl
@@ -111,6 +112,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
])
OVS_VSWITCHD_STOP
@@ -142,6 +144,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
])
TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1))
@@ -190,6 +193,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
])
OVS_VSWITCHD_STOP
@@ -221,6 +225,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
])
# Force cross-numa polling
@@ -285,6 +290,7 @@ pmd thread numa_id 1 core_id 1:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
pmd thread numa_id 0 core_id 2:
isolated : false
])
@@ -306,6 +312,7 @@ pmd thread numa_id 1 core_id 1:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
pmd thread numa_id 0 core_id 2:
isolated : false
])
@@ -325,6 +332,7 @@ pmd thread numa_id 1 core_id 1:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
pmd thread numa_id 0 core_id 2:
isolated : false
])
@@ -345,6 +353,7 @@ pmd thread numa_id 1 core_id 0:
port: p0 queue-id: 5 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 6 (enabled) pmd usage: NOT AVAIL
port: p0 queue-id: 7 (enabled) pmd usage: NOT AVAIL
+ overhead: NOT AVAIL
])
OVS_VSWITCHD_STOP
--
2.23.0
More information about the dev
mailing list