[ovs-dev] [PATCH] dpif-netdev: add cache hits to histogram and history stats for PMDs

Paolo Valerio pvalerio at redhat.com
Fri Nov 20 16:37:11 UTC 2020


this patch records cache hits every iteration of a PMD thread.

The new metrics covered are:

  - EMC hits
  - SMC hits
  - Megaflow hits

The gathered data are shown in the histogram as well as in iteration
and millisecond cyclic histories.

Signed-off-by: Paolo Valerio <pvalerio at redhat.com>
---
 NEWS                        |  2 +
 lib/dpif-netdev-perf.c      | 78 ++++++++++++++++++++++++++++++-------
 lib/dpif-netdev-perf.h      |  6 +++
 lib/dpif-netdev-unixctl.man | 18 +++++++++
 lib/dpif-netdev.c           | 33 +++++++++++-----
 5 files changed, 112 insertions(+), 25 deletions(-)

diff --git a/NEWS b/NEWS
index 185555848..ad2dbca79 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,8 @@ Post-v2.14.0
        restricts a flow dump to a single PMD thread if set.
      * New 'options:dpdk-vf-mac' field for DPDK interface of VF ports,
        that allows configuring the MAC address of a VF representor.
+     * ovs-appctl dpif-netdev/pmd-perf-show now includes EMC, SMC, and megaflow
+       hits in histogram and histories.
    - The environment variable OVS_UNBOUND_CONF, if set, is now used
      as the DNS resolver's (unbound) configuration file.
    - Linux datapath:
diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
index 9560e7c3c..4be79345f 100644
--- a/lib/dpif-netdev-perf.c
+++ b/lib/dpif-netdev-perf.c
@@ -198,6 +198,13 @@ pmd_perf_stats_init(struct pmd_perf_stats *s)
      * descriptors (maximum configurable length in Qemu), with the
      * DPDK 17.11 virtio PMD in the guest. */
     histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
+    /* Linear histogram for emc hits/it ranging from 0 to 100. */
+    histogram_walls_set_lin(&s->emc_hits, 0, 100);
+    /* Linear histogram for smc hits/it ranging from 0 to 100. */
+    histogram_walls_set_lin(&s->smc_hits, 0, 100);
+    /* Linear histogram for megaflow hits/it ranging from 0 to 100. */
+    histogram_walls_set_lin(&s->megaflow_hits, 0, 100);
+
     s->iteration_cnt = 0;
     s->start_ms = time_msec();
     s->log_susp_it = UINT32_MAX;
@@ -289,13 +296,16 @@ pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
 
     ds_put_cstr(str, "Histograms\n");
     ds_put_format(str,
-                  "   %-21s  %-21s  %-21s  %-21s  %-21s  %-21s  %-21s\n",
+                  "   %-21s  %-21s  %-21s  %-21s  %-21s  %-21s  %-21s"
+                  "  %-21s  %-21s  %-21s\n",
                   "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
-                  "max vhost qlen", "upcalls/it", "cycles/upcall");
+                  "max vhost qlen", "upcalls/it", "cycles/upcall",
+                  "EMC hits/it", "SMC hits/it", "megaflow hits/it");
     for (i = 0; i < NUM_BINS-1; i++) {
         ds_put_format(str,
             "   %-9d %-11"PRIu64"  %-9d %-11"PRIu64"  %-9d %-11"PRIu64
             "  %-9d %-11"PRIu64"  %-9d %-11"PRIu64"  %-9d %-11"PRIu64
+            "  %-9d %-11"PRIu64"  %-9d %-11"PRIu64"  %-9d %-11"PRIu64
             "  %-9d %-11"PRIu64"\n",
             s->cycles.wall[i], s->cycles.bin[i],
             s->pkts.wall[i],s->pkts.bin[i],
@@ -303,11 +313,15 @@ pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
             s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i],
             s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i],
             s->upcalls.wall[i], s->upcalls.bin[i],
-            s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]);
+            s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i],
+            s->emc_hits.wall[i], s->emc_hits.bin[i],
+            s->smc_hits.wall[i], s->smc_hits.bin[i],
+            s->megaflow_hits.wall[i], s->megaflow_hits.bin[i]);
     }
     ds_put_format(str,
                   "   %-9s %-11"PRIu64"  %-9s %-11"PRIu64"  %-9s %-11"PRIu64
                   "  %-9s %-11"PRIu64"  %-9s %-11"PRIu64"  %-9s %-11"PRIu64
+                  "  %-9s %-11"PRIu64"  %-9s %-11"PRIu64"  %-9s %-11"PRIu64
                   "  %-9s %-11"PRIu64"\n",
                   ">", s->cycles.bin[i],
                   ">", s->pkts.bin[i],
@@ -315,19 +329,27 @@ pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
                   ">", s->pkts_per_batch.bin[i],
                   ">", s->max_vhost_qfill.bin[i],
                   ">", s->upcalls.bin[i],
-                  ">", s->cycles_per_upcall.bin[i]);
+                  ">", s->cycles_per_upcall.bin[i],
+                  ">", s->emc_hits.bin[i],
+                  ">", s->smc_hits.bin[i],
+                  ">", s->megaflow_hits.bin[i]);
     if (s->totals.iterations > 0) {
         ds_put_cstr(str,
                     "-----------------------------------------------------"
                     "-----------------------------------------------------"
-                    "------------------------------------------------\n");
+                    "-----------------------------------------------------"
+                    "-----------------------------------------------------"
+                    "--------------\n");
         ds_put_format(str,
-                      "   %-21s  %-21s  %-21s  %-21s  %-21s  %-21s  %-21s\n",
+                      "   %-21s  %-21s  %-21s  %-21s  %-21s  %-21s  %-21s"
+                      "  %-21s  %-21s  %-21s\n",
                       "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
-                      "vhost qlen", "upcalls/it", "cycles/upcall");
+                      "vhost qlen", "upcalls/it", "cycles/upcall",
+                      "EMC hits/it", "SMC hits/it", "megaflow hits/it");
         ds_put_format(str,
                       "   %-21"PRIu64"  %-21.5f  %-21"PRIu64
-                      "  %-21.5f  %-21.5f  %-21.5f  %-21"PRIu32"\n",
+                      "  %-21.5f  %-21.5f  %-21.5f  %-21"PRIu32
+                      "  %-21.5f  %-21.5f  %-21.5f\n",
                       s->totals.cycles / s->totals.iterations,
                       1.0 * s->totals.pkts / s->totals.iterations,
                       s->totals.pkts
@@ -337,7 +359,10 @@ pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
                       1.0 * s->totals.max_vhost_qfill / s->totals.iterations,
                       1.0 * s->totals.upcalls / s->totals.iterations,
                       s->totals.upcalls
-                          ? s->totals.upcall_cycles / s->totals.upcalls : 0);
+                          ? s->totals.upcall_cycles / s->totals.upcalls : 0,
+                      1.0 * s->totals.emc_hits / s->totals.iterations,
+                      1.0 * s->totals.smc_hits / s->totals.iterations,
+                      1.0 * s->totals.megaflow_hits / s->totals.iterations);
     }
 }
 
@@ -353,15 +378,17 @@ pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
         return;
     }
     ds_put_format(str, "   %-17s   %-10s   %-10s   %-10s   %-10s   "
-                  "%-10s   %-10s   %-10s\n",
+                  "%-10s   %-10s   %-10s   %-10s   %-10s   %-10s\n",
                   "iter", "cycles", "packets", "cycles/pkt", "pkts/batch",
-                  "vhost qlen", "upcalls", "cycles/upcall");
+                  "vhost qlen", "upcalls", "cycles/upcall", "EMC hits",
+                  "SMC hits", "megaflow hits");
     for (i = 1; i <= n_iter; i++) {
         index = history_sub(s->iterations.idx, i);
         is = &s->iterations.sample[index];
         ds_put_format(str,
                       "   %-17"PRIu64"   %-11"PRIu64"  %-11"PRIu32
                       "  %-11"PRIu64"  %-11"PRIu32"  %-11"PRIu32
+                      "  %-11"PRIu32"  %-14"PRIu32"  %-11"PRIu32
                       "  %-11"PRIu32"  %-11"PRIu32"\n",
                       is->timestamp,
                       is->cycles,
@@ -370,7 +397,10 @@ pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
                       is->batches ? is->pkts / is->batches : 0,
                       is->max_vhost_qfill,
                       is->upcalls,
-                      is->upcalls ? is->upcall_cycles / is->upcalls : 0);
+                      is->upcalls ? is->upcall_cycles / is->upcalls : 0,
+                      is->emc_hits,
+                      is->smc_hits,
+                      is->megaflow_hits);
     }
 }
 
@@ -386,15 +416,18 @@ pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
     }
     ds_put_format(str,
                   "   %-12s   %-10s   %-10s   %-10s   %-10s"
-                  "   %-10s   %-10s   %-10s   %-10s\n",
+                  "   %-10s   %-10s   %-10s   %-10s   %-10s"
+                  "   %-10s   %-10s\n",
                   "ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
-                  "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
+                  "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall",
+                  "EMC hits", "SMC hits", "megaflow hits");
     for (i = 1; i <= n_ms; i++) {
         index = history_sub(s->milliseconds.idx, i);
         is = &s->milliseconds.sample[index];
         ds_put_format(str,
                       "   %-12"PRIu64"   %-11"PRIu32"  %-11"PRIu64
                       "  %-11"PRIu32"  %-11"PRIu64"  %-11"PRIu32
+                      "  %-11"PRIu32"  %-11"PRIu32"  %-14"PRIu32
                       "  %-11"PRIu32"  %-11"PRIu32"  %-11"PRIu32"\n",
                       is->timestamp,
                       is->iterations,
@@ -405,7 +438,10 @@ pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
                       is->iterations
                           ? is->max_vhost_qfill / is->iterations : 0,
                       is->upcalls,
-                      is->upcalls ? is->upcall_cycles / is->upcalls : 0);
+                      is->upcalls ? is->upcall_cycles / is->upcalls : 0,
+                      is->emc_hits,
+                      is->smc_hits,
+                      is->megaflow_hits);
     }
 }
 
@@ -452,6 +488,9 @@ pmd_perf_stats_clear_lock(struct pmd_perf_stats *s)
     histogram_clear(&s->cycles_per_upcall);
     histogram_clear(&s->pkts_per_batch);
     histogram_clear(&s->max_vhost_qfill);
+    histogram_clear(&s->emc_hits);
+    histogram_clear(&s->smc_hits);
+    histogram_clear(&s->megaflow_hits);
     history_init(&s->iterations);
     history_init(&s->milliseconds);
     s->start_ms = time_msec();
@@ -540,6 +579,9 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
     }
     histogram_add_sample(&s->upcalls, s->current.upcalls);
     histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill);
+    histogram_add_sample(&s->emc_hits, s->current.emc_hits);
+    histogram_add_sample(&s->smc_hits, s->current.smc_hits);
+    histogram_add_sample(&s->megaflow_hits, s->current.megaflow_hits);
 
     /* Add iteration samples to millisecond stats. */
     cum_ms = history_current(&s->milliseconds);
@@ -553,6 +595,9 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
     cum_ms->upcall_cycles += s->current.upcall_cycles;
     cum_ms->batches += s->current.batches;
     cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
+    cum_ms->emc_hits += s->current.emc_hits;
+    cum_ms->smc_hits += s->current.smc_hits;
+    cum_ms->megaflow_hits += s->current.megaflow_hits;
 
     if (log_enabled) {
         /* Log suspicious iterations. */
@@ -591,6 +636,9 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
             s->totals.upcall_cycles += cum_ms->upcall_cycles;
             s->totals.batches += cum_ms->batches;
             s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill;
+            s->totals.emc_hits += cum_ms->emc_hits;
+            s->totals.smc_hits += cum_ms->smc_hits;
+            s->totals.megaflow_hits += cum_ms->megaflow_hits;
             cum_ms = history_next(&s->milliseconds);
             cum_ms->timestamp = now;
         }
diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
index 72645b6b3..87b0fe7e4 100644
--- a/lib/dpif-netdev-perf.h
+++ b/lib/dpif-netdev-perf.h
@@ -120,6 +120,9 @@ struct iter_stats {
     uint32_t upcall_cycles;     /* Cycles spent in upcalls in it. or ms. */
     uint32_t batches;           /* Number of rx batches in iteration or ms. */
     uint32_t max_vhost_qfill;   /* Maximum fill level in iteration or ms. */
+    uint32_t emc_hits;          /* EMC Hits in iteration or ms. */
+    uint32_t smc_hits;          /* SMC Hits in iteration or ms. */
+    uint32_t megaflow_hits;     /* Megaflow Hits in iteration or ms. */
 };
 
 #define HISTORY_LEN 1000        /* Length of recorded history
@@ -173,6 +176,9 @@ struct pmd_perf_stats {
     struct histogram cycles_per_upcall;
     struct histogram pkts_per_batch;
     struct histogram max_vhost_qfill;
+    struct histogram emc_hits;
+    struct histogram smc_hits;
+    struct histogram megaflow_hits;
     /* Iteration history buffer. */
     struct history iterations;
     /* Millisecond history buffer. */
diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
index 858d491df..3399ff067 100644
--- a/lib/dpif-netdev-unixctl.man
+++ b/lib/dpif-netdev-unixctl.man
@@ -58,6 +58,12 @@ max. vhostuser queue fill level
 number of upcalls
 .IP \(em
 cycles spent in upcalls
+.IP \(em
+number of EMC hits
+.IP \(em
+number of SMC hits
+.IP \(em
+number of megaflow hits
 .PD
 .RE
 .IP
@@ -84,6 +90,12 @@ upcalls
 .IP \(em
 cycles/upcall (logarithmic)
 The histograms bins are divided linear or logarithmic.
+.IP \(em
+EMC hits/iteration
+.IP \(em
+SMC hits/iteration
+.IP \(em
+megaflow hits/iteration
 .RE
 .IP 2.
 A cyclic history of the above metrics for 1024 iterations
@@ -105,6 +117,12 @@ avg. max vhost qlen
 upcalls
 .IP \(em
 avg. cycles/upcall
+.IP \(em
+EMC hits
+.IP \(em
+SMC hits
+.IP \(em
+megaflow hits
 .RE
 .PD
 .RE
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 300861ca5..dbf5cf85c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6936,6 +6936,7 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
     size_t n_smc_hit = 0, n_missed = 0;
     struct dfc_cache *cache = &pmd->flow_cache;
     struct smc_cache *smc_cache = &cache->smc_cache;
+    struct pmd_perf_stats *stats = &pmd->perf_stats;
     const struct cmap_node *flow_node;
     int recv_idx;
     uint16_t tcp_flags;
@@ -6990,7 +6991,11 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
         missed_keys[n_missed++] = &keys[i];
     }
 
-    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit);
+    pmd_perf_update_counter(stats, PMD_STAT_SMC_HIT, n_smc_hit);
+
+    if (pmd_perf_metrics_enabled(pmd)) {
+        stats->current.smc_hits += n_smc_hit;
+    }
 }
 
 /* Try to process all ('cnt') the 'packets' using only the datapath flow cache
@@ -7023,6 +7028,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
     struct dfc_cache *cache = &pmd->flow_cache;
     struct dp_packet *packet;
     const size_t cnt = dp_packet_batch_size(packets_);
+    struct pmd_perf_stats *stats = &pmd->perf_stats;
     uint32_t cur_min = pmd->ctx.emc_insert_min;
     int i;
     uint16_t tcp_flags;
@@ -7031,7 +7037,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
     bool batch_enable = true;
 
     atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
-    pmd_perf_update_counter(&pmd->perf_stats,
+    pmd_perf_update_counter(stats,
                             md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
                             cnt);
 
@@ -7123,7 +7129,11 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
     /* Count of packets which are not flow batched. */
     *n_flows = map_cnt;
 
-    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, n_emc_hit);
+    pmd_perf_update_counter(stats, PMD_STAT_EXACT_HIT, n_emc_hit);
+
+    if (pmd_perf_metrics_enabled(pmd)) {
+        stats->current.emc_hits += n_emc_hit;
+    }
 
     if (!smc_enable_db) {
         return dp_packet_batch_size(packets_);
@@ -7224,6 +7234,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
                      odp_port_t in_port)
 {
     const size_t cnt = dp_packet_batch_size(packets_);
+    struct pmd_perf_stats *stats = &pmd->perf_stats;
 #if !defined(__CHECKER__) && !defined(_WIN32)
     const size_t PKT_ARRAY_SIZE = cnt;
 #else
@@ -7322,14 +7333,16 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
                                    flow_map, recv_idx);
     }
 
-    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
+    if (pmd_perf_metrics_enabled(pmd)) {
+        stats->current.megaflow_hits +=
+            (cnt - upcall_ok_cnt - upcall_fail_cnt);
+    }
+
+    pmd_perf_update_counter(stats, PMD_STAT_MASKED_HIT,
                             cnt - upcall_ok_cnt - upcall_fail_cnt);
-    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_LOOKUP,
-                            lookup_cnt);
-    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MISS,
-                            upcall_ok_cnt);
-    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_LOST,
-                            upcall_fail_cnt);
+    pmd_perf_update_counter(stats, PMD_STAT_MASKED_LOOKUP, lookup_cnt);
+    pmd_perf_update_counter(stats, PMD_STAT_MISS, upcall_ok_cnt);
+    pmd_perf_update_counter(stats, PMD_STAT_LOST, upcall_fail_cnt);
 }
 
 /* Packets enter the datapath from a port (or from recirculation) here.
-- 
2.26.2



More information about the dev mailing list