[ovs-dev] [PATCH] dpif-netdev: display EMC used entries for PMDs

Paolo Valerio pvalerio at redhat.com
Mon Feb 8 14:34:13 UTC 2021


Hi Kevin,

thank you for your feedback

Kevin Traynor <ktraynor at redhat.com> writes:

> Hi Paolo,
>
> On 14/01/2021 13:19, Paolo Valerio wrote:
>> adds "emc entries" to "ovs-appctl dpif-netdev/pmd-stats-show" in order
>> to show the number of alive entries.
>> 
>
> Thanks for working on this. Not a full review, but a few high level
> comments. I would like to ask about the motivation - is it so a user can
> judge the effectiveness of using the EMC to decide to disable it? or to
> tune the EMC insertion rate?
>

the purpose is more like giving an extra bit of information that of course
can be used for tuning.

> If I run this and increase the flows, I see:
>   emc hits: 1961314
>   emc entries: 6032 (73.63%)
>   smc hits: 0
>   megaflow hits: 688946
>
> If I look at pmd-perf-stats (below), it shows the % split between emc
> and megaflow and I think I would use that to judge the effectiveness of
> the emc. I'm not too fussed if the emc occupancy is high or low, I just
> want to know if it is being effective at getting hits for my pkts, so
> I'm not sure that 'emc entries: 6032 (73.63%)' helps me decide to
> enable/disable it.
>
>   - EMC hits:             1972766  ( 74.0 %)
>   - SMC hits:                   0  (  0.0 %)
>   - Megaflow hits:         693022  ( 26.0 %, 1.00 subtbl lookups/hit)
>
> Of course it doesn't account for the differing cost between them, but if
> emc hits were a low %, I'd want to experiment with disabling it.
>

When you are approaching a reasonably high number of alive entries,
and you expect for any reason that the number of flows for that PMD will
grow significantly, it gives you an insight that the hits rate may
decrease.

> Depending on your motivation, you might also want to take a look at the
> really nice fdb stats that Eelco did, it might give some ideas.
>

Thanks for the heads up.

Paolo

> Kevin.
>
>> Signed-off-by: Paolo Valerio <pvalerio at redhat.com>
>> ---
>>  NEWS                   |  2 ++
>>  lib/dpif-netdev-perf.h |  2 ++
>>  lib/dpif-netdev.c      | 76 +++++++++++++++++++++++++++++++-----------
>>  tests/pmd.at           |  6 ++--
>>  4 files changed, 65 insertions(+), 21 deletions(-)
>> 
>> diff --git a/NEWS b/NEWS
>> index 617fe8e6a..e5d53a83b 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -20,6 +20,8 @@ Post-v2.14.0
>>       * Add generic IP protocol support to conntrack. With this change, all
>>         none UDP, TCP, and ICMP traffic will be treated as general L3
>>         traffic, i.e. using 3 tupples.
>> +     * EMC alive entries counter has been added to command
>> +       "ovs-appctl dpif-netdev/pmd-stats-show"
>>     - The environment variable OVS_UNBOUND_CONF, if set, is now used
>>       as the DNS resolver's (unbound) configuration file.
>>     - Linux datapath:
>> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
>> index 72645b6b3..80f50eae9 100644
>> --- a/lib/dpif-netdev-perf.h
>> +++ b/lib/dpif-netdev-perf.h
>> @@ -157,6 +157,8 @@ struct pmd_perf_stats {
>>      uint64_t last_tsc;
>>      /* Used to space certain checks in time. */
>>      uint64_t next_check_tsc;
>> +    /* Exact Match Cache used entries counter. */
>> +    atomic_uint32_t emc_n_entries;
>>      /* If non-NULL, outermost cycle timer currently running in PMD. */
>>      struct cycle_timer *cur_timer;
>>      /* Set of PMD counters with their zero offsets. */
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 300861ca5..3eb70ccd5 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -913,7 +913,7 @@ dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
>>                             odp_port_t in_port);
>>  
>>  static inline bool emc_entry_alive(struct emc_entry *ce);
>> -static void emc_clear_entry(struct emc_entry *ce);
>> +static void emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s);
>>  static void smc_clear_entry(struct smc_bucket *b, int idx);
>>  
>>  static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>> @@ -955,13 +955,16 @@ dfc_cache_init(struct dfc_cache *flow_cache)
>>  }
>>  
>>  static void
>> -emc_cache_uninit(struct emc_cache *flow_cache)
>> +emc_cache_uninit(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
>>  {
>>      int i;
>>  
>> +
>>      for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
>> -        emc_clear_entry(&flow_cache->entries[i]);
>> +        emc_clear_entry(&flow_cache->entries[i], s);
>>      }
>> +
>> +    atomic_store_relaxed(&s->emc_n_entries, 0);
>>  }
>>  
>>  static void
>> @@ -977,21 +980,21 @@ smc_cache_uninit(struct smc_cache *smc)
>>  }
>>  
>>  static void
>> -dfc_cache_uninit(struct dfc_cache *flow_cache)
>> +dfc_cache_uninit(struct dfc_cache *flow_cache, struct pmd_perf_stats *s)
>>  {
>>      smc_cache_uninit(&flow_cache->smc_cache);
>> -    emc_cache_uninit(&flow_cache->emc_cache);
>> +    emc_cache_uninit(&flow_cache->emc_cache, s);
>>  }
>>  
>>  /* Check and clear dead flow references slowly (one entry at each
>>   * invocation).  */
>>  static void
>> -emc_cache_slow_sweep(struct emc_cache *flow_cache)
>> +emc_cache_slow_sweep(struct emc_cache *flow_cache, struct pmd_perf_stats *s)
>>  {
>>      struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx];
>>  
>>      if (!emc_entry_alive(entry)) {
>> -        emc_clear_entry(entry);
>> +        emc_clear_entry(entry, s);
>>      }
>>      flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
>>  }
>> @@ -1093,15 +1096,26 @@ pmd_info_show_stats(struct ds *reply,
>>                    "  packets received: %"PRIu64"\n"
>>                    "  packet recirculations: %"PRIu64"\n"
>>                    "  avg. datapath passes per packet: %.02f\n"
>> -                  "  emc hits: %"PRIu64"\n"
>> +                  "  emc hits: %"PRIu64"\n",
>> +                  total_packets, stats[PMD_STAT_RECIRC],
>> +                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT]
>> +                  );
>> +
>> +    if (pmd->core_id != NON_PMD_CORE_ID) {
>> +        uint32_t emc_entries;
>> +        atomic_read_relaxed(&pmd->perf_stats.emc_n_entries, &emc_entries);
>> +        ds_put_format(reply, "  emc entries: %"PRIu32" (%.02f%%)\n",
>> +                      emc_entries,
>> +                      100.0 * emc_entries / EM_FLOW_HASH_ENTRIES);
>> +    }
>> +
>> +    ds_put_format(reply,
>>                    "  smc hits: %"PRIu64"\n"
>>                    "  megaflow hits: %"PRIu64"\n"
>>                    "  avg. subtable lookups per megaflow hit: %.02f\n"
>>                    "  miss with success upcall: %"PRIu64"\n"
>>                    "  miss with failed upcall: %"PRIu64"\n"
>>                    "  avg. packets per output batch: %.02f\n",
>> -                  total_packets, stats[PMD_STAT_RECIRC],
>> -                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
>>                    stats[PMD_STAT_SMC_HIT],
>>                    stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
>>                    stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
>> @@ -2004,6 +2018,26 @@ non_atomic_ullong_add(atomic_ullong *var, unsigned long long n)
>>      atomic_store_relaxed(var, tmp);
>>  }
>>  
>> +static void
>> +non_atomic_dec(atomic_uint32_t *var)
>> +{
>> +    unsigned int tmp;
>> +
>> +    atomic_read_relaxed(var, &tmp);
>> +    tmp--;
>> +    atomic_store_relaxed(var, tmp);
>> +}
>> +
>> +static void
>> +non_atomic_inc(atomic_uint32_t *var)
>> +{
>> +    unsigned int tmp;
>> +
>> +    atomic_read_relaxed(var, &tmp);
>> +    tmp++;
>> +    atomic_store_relaxed(var, tmp);
>> +}
>> +
>>  static int
>>  dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
>>  {
>> @@ -3070,25 +3104,28 @@ emc_entry_alive(struct emc_entry *ce)
>>  }
>>  
>>  static void
>> -emc_clear_entry(struct emc_entry *ce)
>> +emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s)
>>  {
>>      if (ce->flow) {
>>          dp_netdev_flow_unref(ce->flow);
>>          ce->flow = NULL;
>> +        non_atomic_dec(&s->emc_n_entries);
>>      }
>>  }
>>  
>>  static inline void
>>  emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
>> -                 const struct netdev_flow_key *key)
>> +                 const struct netdev_flow_key *key, struct pmd_perf_stats *s)
>>  {
>>      if (ce->flow != flow) {
>>          if (ce->flow) {
>>              dp_netdev_flow_unref(ce->flow);
>> +            non_atomic_dec(&s->emc_n_entries);
>>          }
>>  
>>          if (dp_netdev_flow_ref(flow)) {
>>              ce->flow = flow;
>> +            non_atomic_inc(&s->emc_n_entries);
>>          } else {
>>              ce->flow = NULL;
>>          }
>> @@ -3100,7 +3137,7 @@ emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
>>  
>>  static inline void
>>  emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>> -           struct dp_netdev_flow *flow)
>> +           struct dp_netdev_flow *flow, struct pmd_perf_stats *s)
>>  {
>>      struct emc_entry *to_be_replaced = NULL;
>>      struct emc_entry *current_entry;
>> @@ -3108,7 +3145,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>>      EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
>>          if (netdev_flow_key_equal(&current_entry->key, key)) {
>>              /* We found the entry with the 'mf' miniflow */
>> -            emc_change_entry(current_entry, flow, NULL);
>> +            emc_change_entry(current_entry, flow, NULL, s);
>>              return;
>>          }
>>  
>> @@ -3124,7 +3161,7 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
>>      /* We didn't find the miniflow in the cache.
>>       * The 'to_be_replaced' entry is where the new flow will be stored */
>>  
>> -    emc_change_entry(to_be_replaced, flow, key);
>> +    emc_change_entry(to_be_replaced, flow, key, s);
>>  }
>>  
>>  static inline void
>> @@ -3139,7 +3176,7 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
>>      uint32_t min = pmd->ctx.emc_insert_min;
>>  
>>      if (min && random_uint32() <= min) {
>> -        emc_insert(&(pmd->flow_cache).emc_cache, key, flow);
>> +        emc_insert(&(pmd->flow_cache).emc_cache, key, flow, &pmd->perf_stats);
>>      }
>>  }
>>  
>> @@ -6014,7 +6051,8 @@ reload:
>>              coverage_try_clear();
>>              dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
>>              if (!ovsrcu_try_quiesce()) {
>> -                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
>> +                emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache),
>> +                                     &pmd->perf_stats);
>>                  pmd->next_rcu_quiesce =
>>                      pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
>>              }
>> @@ -6058,7 +6096,7 @@ reload:
>>      }
>>  
>>      pmd_free_static_tx_qid(pmd);
>> -    dfc_cache_uninit(&pmd->flow_cache);
>> +    dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
>>      free(poll_list);
>>      pmd_free_cached_ports(pmd);
>>      return NULL;
>> @@ -6537,7 +6575,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
>>       * but extra cleanup is necessary */
>>      if (pmd->core_id == NON_PMD_CORE_ID) {
>>          ovs_mutex_lock(&dp->non_pmd_mutex);
>> -        dfc_cache_uninit(&pmd->flow_cache);
>> +        dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats);
>>          pmd_free_cached_ports(pmd);
>>          pmd_free_static_tx_qid(pmd);
>>          ovs_mutex_unlock(&dp->non_pmd_mutex);
>> diff --git a/tests/pmd.at b/tests/pmd.at
>> index cc5371d5a..45c69563c 100644
>> --- a/tests/pmd.at
>> +++ b/tests/pmd.at
>> @@ -202,12 +202,13 @@ dummy at ovs-dummy: hit:0 missed:0
>>      p0 7/1: (dummy-pmd: configured_rx_queues=4, configured_tx_queues=<cleared>, requested_rx_queues=4, requested_tx_queues=<cleared>)
>>  ])
>>  
>> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
>> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>>  pmd thread numa_id <cleared> core_id <cleared>:
>>    packets received: 0
>>    packet recirculations: 0
>>    avg. datapath passes per packet: 0.00
>>    emc hits: 0
>> +  emc entries: 0 (0.00%)
>>    smc hits: 0
>>    megaflow hits: 0
>>    avg. subtable lookups per megaflow hit: 0.00
>> @@ -233,12 +234,13 @@ AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl
>>  recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), actions: <del>
>>  ])
>>  
>> -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 9], [0], [dnl
>> +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | sed '/cycles/d' | grep pmd -A 10], [0], [dnl
>>  pmd thread numa_id <cleared> core_id <cleared>:
>>    packets received: 20
>>    packet recirculations: 0
>>    avg. datapath passes per packet: 1.00
>>    emc hits: 19
>> +  emc entries: 1 (0.01%)
>>    smc hits: 0
>>    megaflow hits: 0
>>    avg. subtable lookups per megaflow hit: 0.00
>> 



More information about the dev mailing list