[ovs-dev] [PATCH] dpif-netdev: display EMC used entries for PMDs
Kevin Traynor
ktraynor at redhat.com
Fri Feb 5 19:01:27 UTC 2021
Hi Paolo,
On 14/01/2021 13:19, Paolo Valerio wrote:
> adds "emc entries" to "ovs-appctl dpif-netdev/pmd-stats-show" in order
> to show the number of alive entries.
>
Thanks for working on this. Not a full review, but a few high level
comments. I would like to ask about the motivation - is it so a user can
judge the effectiveness of using the EMC to decide to disable it? or to
tune the EMC insertion rate?
If I run this and increase the flows, I see:
emc hits: 1961314
emc entries: 6032 (73.63%)
smc hits: 0
megaflow hits: 688946
If I look at pmd-perf-stats (below), it shows the % split between emc
and megaflow and I think I would use that to judge the effectiveness of
the emc. I'm not too fussed if the emc occupancy is high or low, I just
want to know if it is being effective at getting hits for my pkts, so
I'm not sure that 'emc entries: 6032 (73.63%)' helps me decide to
enable/disable it.
- EMC hits: 1972766 ( 74.0 %)
- SMC hits: 0 ( 0.0 %)
- Megaflow hits: 693022 ( 26.0 %, 1.00 subtbl lookups/hit)
Of course it doesn't account for the differing cost between them, but if
emc hits were a low %, I'd want to experiment with disabling it.
Depending on your motivation, you might also want to take a look at the
really nice fdb stats that Eelco did, it might give some ideas.
Kevin.
> Signed-off-by: Paolo Valerio <pvalerio at redhat.com>
> ---
> NEWS | 2 ++
> lib/dpif-netdev-perf.h | 2 ++
> lib/dpif-netdev.c | 76 +++++++++++++++++++++++++++++++-----------
> tests/pmd.at | 6 ++--
> 4 files changed, 65 insertions(+), 21 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 617fe8e6a..e5d53a83b 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -20,6 +20,8 @@ Post-v2.14.0
> * Add generic IP protocol support to conntrack. With this change, all
> none UDP, TCP, and ICMP traffic will be treated as general L3
> traffic, i.e. using 3 tupples.
> + * EMC alive entries counter has been added to command
> + "ovs-appctl dpif-netdev/pmd-stats-show"
> - The environment variable OVS_UNBOUND_CONF, if set, is now used
> as the DNS resolver's (unbound) configuration file.
> - Linux datapath:
> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
> index 72645b6b3..80f50eae9 100644
> --- a/lib/dpif-netdev-perf.h
> +++ b/lib/dpif-netdev-perf.h
> @@ -157,6 +157,8 @@ struct pmd_perf_stats {
> uint64_t last_tsc;
> /* Used to space certain checks in time. */
> uint64_t next_check_tsc;
> + /* Exact Match Cache used entries counter. */
> + atomic_uint32_t emc_n_entries;
> /* If non-NULL, outermost cycle timer currently running in PMD. */
> struct cycle_timer *cur_timer;
> /* Set of PMD counters with their zero offsets. */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 300861ca5..3eb70ccd5 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -913,7 +913,7 @@ dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
> odp_port_t in_port);
>
> static inline bool emc_entry_alive(struct emc_entry *ce);
> -static void emc_clear_entry(struct emc_entry *ce);
> +static void emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s);
> static void smc_clear_entry(struct smc_bucket *b, int idx);
>
> static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
> @@ -955,13 +955,16 @@ dfc_cache_init(struct dfc_cache *flow_cache)
> }
>
> static void
> -emc_cache_uninit(struct emc_cache *flow_cache)
> +emc_cache_uninit(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
> {
> int i;
>
> +
> for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
> - emc_clear_entry(&flow_cache->entries[i]);
> + emc_clear_entry(&flow_cache->entries[i], s);
> }
> +
> + atomic_store_relaxed(&s->emc_n_entries, 0);
> }
>
> static void
> @@ -977,21 +980,21 @@ smc_cache_uninit(struct smc_cache *smc)
> }
>
> static void
> -dfc_cache_uninit(struct dfc_cache *flow_cache)
> +dfc_cache_uninit(struct dfc_cache *flow_cache, struct pmd_perf_stats *s)
> {
> smc_cache_uninit(&flow_cache->smc_cache);
> - emc_cache_uninit(&flow_cache->emc_cache);
> + emc_cache_uninit(&flow_cache->emc_cache, s);
> }
>
> /* Check and clear dead flow references slowly (one entry at each
> * invocation). */
> static void
> -emc_cache_slow_sweep(struct emc_cache *flow_cache)
> +emc_cache_slow_sweep(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
> {
> struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx];
>
> if (!emc_entry_alive(entry)) {
> - emc_clear_entry(entry);
> + emc_clear_entry(entry, s);
> }
> flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
> }
> @@ -1093,15 +1096,26 @@ pmd_info_show_stats(struct ds *reply,
> " packets received: %"PRIu64"\n"
> " packet recirculations: %"PRIu64"\n"
> " avg. datapath passes per packet: %.02f\n"
> - " emc hits: %"PRIu64"\n"
> + " emc hits: %"PRIu64"\n",
> + total_packets, stats[PMD_STAT_RECIRC],
> + passes_per_pkt, stats[PMD_STAT_EXACT_HIT]
> + );
> +
> + if (pmd->core_id != NON_PMD_CORE_ID) {
> + uint32_t emc_entries;
> + atomic_read_relaxed(&pmd->perf_stats.emc_n_entries, &emc_entries);
> + ds_put_format(reply, " emc entries: %"PRIu32" (%.02f%%)\n",
> + emc_entries,
> + 100.0 * emc_entries / EM_FLOW_HASH_ENTRIES);
> + }
> +
> + ds_put_format(reply,
> " smc hits: %"PRIu64"\n"
> " megaflow hits: %"PRIu64"\n"
> " avg. subtable lookups per megaflow hit: %.02f\n"
> " miss with success upcall: %"PRIu64"\n"
> " miss with failed upcall: %"PRIu64"\n"
> " avg. packets per output batch: %.02f\n",
> - total_packets, stats[PMD_STAT_RECIRC],
> - passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
> stats[PMD_STAT_SMC_HIT],
> stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
> stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
> @@ -2004,6 +2018,26 @@ non_atomic_ullong_add(atomic_ullong *var, unsigned long long n)
> atomic_store_relaxed(var, tmp);
> }
>
> +static void
> +non_atomic_dec(atomic_uint32_t *var)
> +{
> + unsigned int tmp;
> +
> + atomic_read_relaxed(var, &tmp);
> + tmp--;
> + atomic_store_relaxed(var, tmp);
> +}
> +
> +static void
> +non_atomic_inc(atomic_uint32_t *var)
> +{
> + unsigned int tmp;
> +
> + atomic_read_relaxed(var, &tmp);
> + tmp++;
> + atomic_store_relaxed(var, tmp);
> +}
> +
> static int
> dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
> {
> @@ -3070,25 +3104,28 @@ emc_entry_alive(struct emc_entry *ce)
> }
>
> static void
> -emc_clear_entry(struct emc_entry *ce)
> +emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s)
> {
> if (ce->flow) {
> dp_netdev_flow_unref(ce->flow);
> ce->flow = NULL;
> + non_atomic_dec(&s->emc_n_entries);
> }
> }
>
> static inline void
> emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
> - const struct netdev_flow_key *key)
> + const struct netdev_flow_key *key, struct pmd_perf_stats *s)
> {
> if (ce->flow != flow) {
> if (ce->flow) {
> dp_netdev_flow_unref(ce->flow);
> + non_atomic_dec(&s->emc_n_entries);
> }
>
> if (dp_netdev_flow_ref(flow)) {
> ce->flow = flow;
> + non_atomic_inc(&s->emc_n_entries);
> } else {
> ce->flow = NULL;
> }
> @@ -3100,7 +3137,7 @@ emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
>
> static inline void
> emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
> - struct dp_netdev_flow *flow)
> + struct dp_netdev_flow *flow, struct pmd_perf_stats *s)
> {
> struct emc_entry *to_be_replaced = NULL;
> struct emc_entry *current_entry;
> @@ -3108,7 +3145,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
> EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
> if (netdev_flow_key_equal(¤t_entry->key, key)) {
> /* We found the entry with the 'mf' miniflow */
> - emc_change_entry(current_entry, flow, NULL);
> + emc_change_entry(current_entry, flow, NULL, s);
> return;
> }
>
> @@ -3124,7 +3161,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
> /* We didn't find the miniflow in the cache.
> * The 'to_be_replaced' entry is where the new flow will be stored */
>
> - emc_change_entry(to_be_replaced, flow, key);
> + emc_change_entry(to_be_replaced, flow, key, s);
> }
>
> static inline void
> @@ -3139,7 +3176,7 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
> uint32_t min = pmd->ctx.emc_insert_min;
>
> if (min && random_uint32() <= min) {
> - emc_insert(&(pmd->flow_cache).emc_cache, key, flow);
> + emc_insert(&(pmd->flow_cache).emc_cache, key, flow, &pmd->perf_stats);
> }
> }
>
> @@ -6014,7 +6051,8 @@ reload:
> coverage_try_clear();
> dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
> if (!ovsrcu_try_quiesce()) {
> - emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
> + emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache),
> + &pmd->perf_stats);
> pmd->next_rcu_quiesce =
> pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
> }
> @@ -6058,7 +6096,7 @@ reload:
> }
>
> pmd_free_static_tx_qid(pmd);
> - dfc_cache_uninit(&pmd->flow_cache);
> + dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
> free(poll_list);
> pmd_free_cached_ports(pmd);
> return NULL;
> @@ -6537,7 +6575,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
> * but extra cleanup is necessary */
> if (pmd->core_id == NON_PMD_CORE_ID) {
> ovs_mutex_lock(&dp->non_pmd_mutex);
> - dfc_cache_uninit(&pmd->flow_cache);
> + dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
> pmd_free_cached_ports(pmd);
> pmd_free_static_tx_qid(pmd);
> ovs_mutex_unlock(&dp->non_pmd_mutex);
> diff --git a/tests/pmd.at b/tests/pmd.at
> index cc5371d5a..45c69563c 100644
> --- a/tests/pmd.at
> +++ b/tests/pmd.at
> @@ -202,12 +202,13 @@ dummy at ovs-dummy: hit:0 missed:0
> p0 7/1: (dummy-pmd: configured_rx_queues=4, configured_tx_queues=<cleared>, requested_rx_queues=4, requested_tx_queues=<cleared>)
> ])
>
> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
> pmd thread numa_id <cleared> core_id <cleared>:
> packets received: 0
> packet recirculations: 0
> avg. datapath passes per packet: 0.00
> emc hits: 0
> + emc entries: 0 (0.00%)
> smc hits: 0
> megaflow hits: 0
> avg. subtable lookups per megaflow hit: 0.00
> @@ -233,12 +234,13 @@ AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl
> recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
> ])
>
> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
> pmd thread numa_id <cleared> core_id <cleared>:
> packets received: 20
> packet recirculations: 0
> avg. datapath passes per packet: 1.00
> emc hits: 19
> + emc entries: 1 (0.01%)
> smc hits: 0
> megaflow hits: 0
> avg. subtable lookups per megaflow hit: 0.00
>
More information about the dev
mailing list