[ovs-dev] [PATCH] dpif-netdev: add cache hits to histogram and history stats for PMDs
Paolo Valerio
pvalerio at redhat.com
Fri Nov 20 16:37:11 UTC 2020
this patch records cache hits every iteration of a PMD thread.
The new metrics covered are:
- EMC hits
- SMC hits
- Megaflow hits
The gathered data are shown in the histogram as well as in iteration
and millisecond cyclic histories.
Signed-off-by: Paolo Valerio <pvalerio at redhat.com>
---
NEWS | 2 +
lib/dpif-netdev-perf.c | 78 ++++++++++++++++++++++++++++++-------
lib/dpif-netdev-perf.h | 6 +++
lib/dpif-netdev-unixctl.man | 18 +++++++++
lib/dpif-netdev.c | 33 +++++++++++-----
5 files changed, 112 insertions(+), 25 deletions(-)
diff --git a/NEWS b/NEWS
index 185555848..ad2dbca79 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,8 @@ Post-v2.14.0
restricts a flow dump to a single PMD thread if set.
* New 'options:dpdk-vf-mac' field for DPDK interface of VF ports,
that allows configuring the MAC address of a VF representor.
+ * ovs-appctl dpif-netdev/pmd-perf-show now includes EMC, SMC, and megaflow
+ hits in histogram and histories.
- The environment variable OVS_UNBOUND_CONF, if set, is now used
as the DNS resolver's (unbound) configuration file.
- Linux datapath:
diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
index 9560e7c3c..4be79345f 100644
--- a/lib/dpif-netdev-perf.c
+++ b/lib/dpif-netdev-perf.c
@@ -198,6 +198,13 @@ pmd_perf_stats_init(struct pmd_perf_stats *s)
* descriptors (maximum configurable length in Qemu), with the
* DPDK 17.11 virtio PMD in the guest. */
histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
+ /* Linear histogram for emc hits/it ranging from 0 to 100. */
+ histogram_walls_set_lin(&s->emc_hits, 0, 100);
+ /* Linear histogram for smc hits/it ranging from 0 to 100. */
+ histogram_walls_set_lin(&s->smc_hits, 0, 100);
+ /* Linear histogram for megaflow hits/it ranging from 0 to 100. */
+ histogram_walls_set_lin(&s->megaflow_hits, 0, 100);
+
s->iteration_cnt = 0;
s->start_ms = time_msec();
s->log_susp_it = UINT32_MAX;
@@ -289,13 +296,16 @@ pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
ds_put_cstr(str, "Histograms\n");
ds_put_format(str,
- " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
+ " %-21s %-21s %-21s %-21s %-21s %-21s %-21s"
+ " %-21s %-21s %-21s\n",
"cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
- "max vhost qlen", "upcalls/it", "cycles/upcall");
+ "max vhost qlen", "upcalls/it", "cycles/upcall",
+ "EMC hits/it", "SMC hits/it", "megaflow hits/it");
for (i = 0; i < NUM_BINS-1; i++) {
ds_put_format(str,
" %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
" %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
+ " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
" %-9d %-11"PRIu64"\n",
s->cycles.wall[i], s->cycles.bin[i],
s->pkts.wall[i],s->pkts.bin[i],
@@ -303,11 +313,15 @@ pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i],
s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i],
s->upcalls.wall[i], s->upcalls.bin[i],
- s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]);
+ s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i],
+ s->emc_hits.wall[i], s->emc_hits.bin[i],
+ s->smc_hits.wall[i], s->smc_hits.bin[i],
+ s->megaflow_hits.wall[i], s->megaflow_hits.bin[i]);
}
ds_put_format(str,
" %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
" %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
+ " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
" %-9s %-11"PRIu64"\n",
">", s->cycles.bin[i],
">", s->pkts.bin[i],
@@ -315,19 +329,27 @@ pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
">", s->pkts_per_batch.bin[i],
">", s->max_vhost_qfill.bin[i],
">", s->upcalls.bin[i],
- ">", s->cycles_per_upcall.bin[i]);
+ ">", s->cycles_per_upcall.bin[i],
+ ">", s->emc_hits.bin[i],
+ ">", s->smc_hits.bin[i],
+ ">", s->megaflow_hits.bin[i]);
if (s->totals.iterations > 0) {
ds_put_cstr(str,
"-----------------------------------------------------"
"-----------------------------------------------------"
- "------------------------------------------------\n");
+ "-----------------------------------------------------"
+ "-----------------------------------------------------"
+ "--------------\n");
ds_put_format(str,
- " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
+ " %-21s %-21s %-21s %-21s %-21s %-21s %-21s"
+ " %-21s %-21s %-21s\n",
"cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
- "vhost qlen", "upcalls/it", "cycles/upcall");
+ "vhost qlen", "upcalls/it", "cycles/upcall",
+ "EMC hits/it", "SMC hits/it", "megaflow hits/it");
ds_put_format(str,
" %-21"PRIu64" %-21.5f %-21"PRIu64
- " %-21.5f %-21.5f %-21.5f %-21"PRIu32"\n",
+ " %-21.5f %-21.5f %-21.5f %-21"PRIu32
+ " %-21.5f %-21.5f %-21.5f\n",
s->totals.cycles / s->totals.iterations,
1.0 * s->totals.pkts / s->totals.iterations,
s->totals.pkts
@@ -337,7 +359,10 @@ pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
1.0 * s->totals.max_vhost_qfill / s->totals.iterations,
1.0 * s->totals.upcalls / s->totals.iterations,
s->totals.upcalls
- ? s->totals.upcall_cycles / s->totals.upcalls : 0);
+ ? s->totals.upcall_cycles / s->totals.upcalls : 0,
+ 1.0 * s->totals.emc_hits / s->totals.iterations,
+ 1.0 * s->totals.smc_hits / s->totals.iterations,
+ 1.0 * s->totals.megaflow_hits / s->totals.iterations);
}
}
@@ -353,15 +378,17 @@ pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
return;
}
ds_put_format(str, " %-17s %-10s %-10s %-10s %-10s "
- "%-10s %-10s %-10s\n",
+ "%-10s %-10s %-10s %-10s %-10s %-10s\n",
"iter", "cycles", "packets", "cycles/pkt", "pkts/batch",
- "vhost qlen", "upcalls", "cycles/upcall");
+ "vhost qlen", "upcalls", "cycles/upcall", "EMC hits",
+ "SMC hits", "megaflow hits");
for (i = 1; i <= n_iter; i++) {
index = history_sub(s->iterations.idx, i);
is = &s->iterations.sample[index];
ds_put_format(str,
" %-17"PRIu64" %-11"PRIu64" %-11"PRIu32
" %-11"PRIu64" %-11"PRIu32" %-11"PRIu32
+ " %-11"PRIu32" %-14"PRIu32" %-11"PRIu32
" %-11"PRIu32" %-11"PRIu32"\n",
is->timestamp,
is->cycles,
@@ -370,7 +397,10 @@ pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
is->batches ? is->pkts / is->batches : 0,
is->max_vhost_qfill,
is->upcalls,
- is->upcalls ? is->upcall_cycles / is->upcalls : 0);
+ is->upcalls ? is->upcall_cycles / is->upcalls : 0,
+ is->emc_hits,
+ is->smc_hits,
+ is->megaflow_hits);
}
}
@@ -386,15 +416,18 @@ pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
}
ds_put_format(str,
" %-12s %-10s %-10s %-10s %-10s"
- " %-10s %-10s %-10s %-10s\n",
+ " %-10s %-10s %-10s %-10s %-10s"
+ " %-10s %-10s\n",
"ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
- "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
+ "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall",
+ "EMC hits", "SMC hits", "megaflow hits");
for (i = 1; i <= n_ms; i++) {
index = history_sub(s->milliseconds.idx, i);
is = &s->milliseconds.sample[index];
ds_put_format(str,
" %-12"PRIu64" %-11"PRIu32" %-11"PRIu64
" %-11"PRIu32" %-11"PRIu64" %-11"PRIu32
+ " %-11"PRIu32" %-11"PRIu32" %-14"PRIu32
" %-11"PRIu32" %-11"PRIu32" %-11"PRIu32"\n",
is->timestamp,
is->iterations,
@@ -405,7 +438,10 @@ pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
is->iterations
? is->max_vhost_qfill / is->iterations : 0,
is->upcalls,
- is->upcalls ? is->upcall_cycles / is->upcalls : 0);
+ is->upcalls ? is->upcall_cycles / is->upcalls : 0,
+ is->emc_hits,
+ is->smc_hits,
+ is->megaflow_hits);
}
}
@@ -452,6 +488,9 @@ pmd_perf_stats_clear_lock(struct pmd_perf_stats *s)
histogram_clear(&s->cycles_per_upcall);
histogram_clear(&s->pkts_per_batch);
histogram_clear(&s->max_vhost_qfill);
+ histogram_clear(&s->emc_hits);
+ histogram_clear(&s->smc_hits);
+ histogram_clear(&s->megaflow_hits);
history_init(&s->iterations);
history_init(&s->milliseconds);
s->start_ms = time_msec();
@@ -540,6 +579,9 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
}
histogram_add_sample(&s->upcalls, s->current.upcalls);
histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill);
+ histogram_add_sample(&s->emc_hits, s->current.emc_hits);
+ histogram_add_sample(&s->smc_hits, s->current.smc_hits);
+ histogram_add_sample(&s->megaflow_hits, s->current.megaflow_hits);
/* Add iteration samples to millisecond stats. */
cum_ms = history_current(&s->milliseconds);
@@ -553,6 +595,9 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
cum_ms->upcall_cycles += s->current.upcall_cycles;
cum_ms->batches += s->current.batches;
cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
+ cum_ms->emc_hits += s->current.emc_hits;
+ cum_ms->smc_hits += s->current.smc_hits;
+ cum_ms->megaflow_hits += s->current.megaflow_hits;
if (log_enabled) {
/* Log suspicious iterations. */
@@ -591,6 +636,9 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
s->totals.upcall_cycles += cum_ms->upcall_cycles;
s->totals.batches += cum_ms->batches;
s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill;
+ s->totals.emc_hits += cum_ms->emc_hits;
+ s->totals.smc_hits += cum_ms->smc_hits;
+ s->totals.megaflow_hits += cum_ms->megaflow_hits;
cum_ms = history_next(&s->milliseconds);
cum_ms->timestamp = now;
}
diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
index 72645b6b3..87b0fe7e4 100644
--- a/lib/dpif-netdev-perf.h
+++ b/lib/dpif-netdev-perf.h
@@ -120,6 +120,9 @@ struct iter_stats {
uint32_t upcall_cycles; /* Cycles spent in upcalls in it. or ms. */
uint32_t batches; /* Number of rx batches in iteration or ms. */
uint32_t max_vhost_qfill; /* Maximum fill level in iteration or ms. */
+ uint32_t emc_hits; /* EMC Hits in iteration or ms. */
+ uint32_t smc_hits; /* SMC Hits in iteration or ms. */
+ uint32_t megaflow_hits; /* Megaflow Hits in iteration or ms. */
};
#define HISTORY_LEN 1000 /* Length of recorded history
@@ -173,6 +176,9 @@ struct pmd_perf_stats {
struct histogram cycles_per_upcall;
struct histogram pkts_per_batch;
struct histogram max_vhost_qfill;
+ struct histogram emc_hits;
+ struct histogram smc_hits;
+ struct histogram megaflow_hits;
/* Iteration history buffer. */
struct history iterations;
/* Millisecond history buffer. */
diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
index 858d491df..3399ff067 100644
--- a/lib/dpif-netdev-unixctl.man
+++ b/lib/dpif-netdev-unixctl.man
@@ -58,6 +58,12 @@ max. vhostuser queue fill level
number of upcalls
.IP \(em
cycles spent in upcalls
+.IP \(em
+number of EMC hits
+.IP \(em
+number of SMC hits
+.IP \(em
+number of megaflow hits
.PD
.RE
.IP
@@ -84,6 +90,12 @@ upcalls
.IP \(em
cycles/upcall (logarithmic)
The histograms bins are divided linear or logarithmic.
+.IP \(em
+EMC hits/iteration
+.IP \(em
+SMC hits/iteration
+.IP \(em
+megaflow hits/iteration
.RE
.IP 2.
A cyclic history of the above metrics for 1024 iterations
@@ -105,6 +117,12 @@ avg. max vhost qlen
upcalls
.IP \(em
avg. cycles/upcall
+.IP \(em
+EMC hits
+.IP \(em
+SMC hits
+.IP \(em
+megaflow hits
.RE
.PD
.RE
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 300861ca5..dbf5cf85c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6936,6 +6936,7 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
size_t n_smc_hit = 0, n_missed = 0;
struct dfc_cache *cache = &pmd->flow_cache;
struct smc_cache *smc_cache = &cache->smc_cache;
+ struct pmd_perf_stats *stats = &pmd->perf_stats;
const struct cmap_node *flow_node;
int recv_idx;
uint16_t tcp_flags;
@@ -6990,7 +6991,11 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
missed_keys[n_missed++] = &keys[i];
}
- pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit);
+ pmd_perf_update_counter(stats, PMD_STAT_SMC_HIT, n_smc_hit);
+
+ if (pmd_perf_metrics_enabled(pmd)) {
+ stats->current.smc_hits += n_smc_hit;
+ }
}
/* Try to process all ('cnt') the 'packets' using only the datapath flow cache
@@ -7023,6 +7028,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
struct dfc_cache *cache = &pmd->flow_cache;
struct dp_packet *packet;
const size_t cnt = dp_packet_batch_size(packets_);
+ struct pmd_perf_stats *stats = &pmd->perf_stats;
uint32_t cur_min = pmd->ctx.emc_insert_min;
int i;
uint16_t tcp_flags;
@@ -7031,7 +7037,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
bool batch_enable = true;
atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
- pmd_perf_update_counter(&pmd->perf_stats,
+ pmd_perf_update_counter(stats,
md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
cnt);
@@ -7123,7 +7129,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
/* Count of packets which are not flow batched. */
*n_flows = map_cnt;
- pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, n_emc_hit);
+ pmd_perf_update_counter(stats, PMD_STAT_EXACT_HIT, n_emc_hit);
+
+ if (pmd_perf_metrics_enabled(pmd)) {
+ stats->current.emc_hits += n_emc_hit;
+ }
if (!smc_enable_db) {
return dp_packet_batch_size(packets_);
@@ -7224,6 +7234,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
odp_port_t in_port)
{
const size_t cnt = dp_packet_batch_size(packets_);
+ struct pmd_perf_stats *stats = &pmd->perf_stats;
#if !defined(__CHECKER__) && !defined(_WIN32)
const size_t PKT_ARRAY_SIZE = cnt;
#else
@@ -7322,14 +7333,16 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
flow_map, recv_idx);
}
- pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
+ if (pmd_perf_metrics_enabled(pmd)) {
+ stats->current.megaflow_hits +=
+ (cnt - upcall_ok_cnt - upcall_fail_cnt);
+ }
+
+ pmd_perf_update_counter(stats, PMD_STAT_MASKED_HIT,
cnt - upcall_ok_cnt - upcall_fail_cnt);
- pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_LOOKUP,
- lookup_cnt);
- pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MISS,
- upcall_ok_cnt);
- pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_LOST,
- upcall_fail_cnt);
+ pmd_perf_update_counter(stats, PMD_STAT_MASKED_LOOKUP, lookup_cnt);
+ pmd_perf_update_counter(stats, PMD_STAT_MISS, upcall_ok_cnt);
+ pmd_perf_update_counter(stats, PMD_STAT_LOST, upcall_fail_cnt);
}
/* Packets enter the datapath from a port (or from recirculation) here.
--
2.26.2
More information about the dev
mailing list