[ovs-dev] [v6 03/11] dpif-netdev: Add study function to select the best mfex function

Eelco Chaudron echaudro at redhat.com
Wed Jul 7 12:23:57 UTC 2021



On 6 Jul 2021, at 15:11, Cian Ferriter wrote:

> From: Kumar Amber <kumar.amber at intel.com>
>
> The study function runs all the available implementations
> of miniflow_extract and makes a choice whose hitmask has
> maximum hits and sets the mfex to that function.
>
> Study can be run at runtime using the following command:
>
> $ ovs-appctl dpif-netdev/miniflow-parser-set study
>
> Signed-off-by: Kumar Amber <kumar.amber at intel.com>
> Co-authored-by: Harry van Haaren <harry.van.haaren at intel.com>
> Signed-off-by: Harry van Haaren <harry.van.haaren at intel.com>
>
> ---
>
> v5:
> - fix review comments(Ian, Flavio, Eelco)
> - add Atomic set in study
> ---
> ---
>  NEWS                              |   3 +
>  lib/automake.mk                   |   1 +
>  lib/dpif-netdev-extract-study.c   | 124 ++++++++++++++++++++++++++++++
>  lib/dpif-netdev-private-extract.c |  19 ++++-
>  lib/dpif-netdev-private-extract.h |  20 +++++
>  5 files changed, 165 insertions(+), 2 deletions(-)
>  create mode 100644 lib/dpif-netdev-extract-study.c
>
> diff --git a/NEWS b/NEWS
> index ccf9a0f1e..275aa1868 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -25,6 +25,9 @@ Post-v2.15.0
>       * Add command line option to switch between mfex function pointers.
>       * Add miniflow extract auto-validator function to compare different
>         miniflow extract implementations against default implementation.
> +    *  Add study function to miniflow function table which studies packet
> +       and automatically chooses the best miniflow implementation for that
> +       traffic.
>     - ovs-ctl:
>       * New option '--no-record-hostname' to disable hostname configuration
>         in ovsdb on startup.
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 6657b9ae5..5223d321b 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -107,6 +107,7 @@ lib_libopenvswitch_la_SOURCES = \
>  	lib/dp-packet.h \
>  	lib/dp-packet.c \
>  	lib/dpdk.h \
> +	lib/dpif-netdev-extract-study.c \
>  	lib/dpif-netdev-lookup.h \
>  	lib/dpif-netdev-lookup.c \
>  	lib/dpif-netdev-lookup-autovalidator.c \
> diff --git a/lib/dpif-netdev-extract-study.c b/lib/dpif-netdev-extract-study.c
> new file mode 100644
> index 000000000..32b76bd03
> --- /dev/null
> +++ b/lib/dpif-netdev-extract-study.c
> @@ -0,0 +1,124 @@
> +/*
> + * Copyright (c) 2021 Intel.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include <errno.h>
> +#include <stdint.h>
> +#include <string.h>
> +
> +#include "dpif-netdev-private-thread.h"
> +#include "openvswitch/vlog.h"
> +#include "ovs-thread.h"
> +
> +VLOG_DEFINE_THIS_MODULE(dpif_mfex_extract_study);
> +
> +/* Max count of packets to be compared. */
> +#define MFEX_MAX_COUNT (128)
> +
> +static uint32_t mfex_study_pkts_count = 0;
> +
> +/* Struct to hold miniflow study stats. */
> +struct study_stats {
> +    uint32_t pkt_count;
> +    uint32_t impl_hitcount[MFEX_IMPL_MAX];
> +};
> +
> +/* Define per thread data to hold the study stats. */
> +DEFINE_PER_THREAD_MALLOCED_DATA(struct study_stats *, study_stats);
> +
> +/* Allocate per thread PMD pointer space for study_stats. */
> +static inline struct study_stats *
> +mfex_study_get_study_stats_ptr(void)
> +{
> +    struct study_stats *stats = study_stats_get();
> +    if (OVS_UNLIKELY(!stats)) {
> +       stats = xzalloc(sizeof *stats);
> +       study_stats_set_unsafe(stats);
> +    }
> +    return stats;
> +}
> +
> +uint32_t
> +mfex_study_traffic(struct dp_packet_batch *packets,
> +                   struct netdev_flow_key *keys,
> +                   uint32_t keys_size, odp_port_t in_port,
> +                   struct dp_netdev_pmd_thread *pmd_handle)
> +{
> +    uint32_t hitmask = 0;
> +    uint32_t mask = 0;
> +    struct dp_netdev_pmd_thread *pmd = pmd_handle;
> +    struct dpif_miniflow_extract_impl *miniflow_funcs;
> +    uint32_t impl_count = dpif_mfex_impl_info_get(&miniflow_funcs);

This function returns an -errno on failure, so we should test the return.

> +    struct study_stats *stats = mfex_study_get_study_stats_ptr();
> +
> +    /* Run traffic optimized miniflow_extract to collect the hitmask
> +     * to be compared after certain packets have been hit to choose
> +     * the best miniflow_extract version for that traffic.
> +     */
> +    for (int i = MFEX_IMPL_MAX; i < impl_count; i++) {

See comment on patch 2 on using an explicit minimum value.

> +        if (miniflow_funcs[i].available) {

For consistency, and to safe one indent level, why not do the below, as you did in your other patch:
	
if (!miniflow_funcs[i].available) {
  Continue;
}

> +            hitmask = miniflow_funcs[i].extract_func(packets, keys, keys_size,
> +                                                     in_port, pmd_handle);
> +            stats->impl_hitcount[i] += count_1bits(hitmask);
> +
> +            /* If traffic is not classified then we dont overwrite the keys
> +             * array in minfiflow implementations so its safe to create a
> +             * mask for all those packets whose miniflow have been created.
> +             */
> +            mask |= hitmask;
> +        }
> +    }
> +    stats->pkt_count += dp_packet_batch_size(packets);
> +
> +    /* Choose the best implementation after a minimum packets have been
> +     * processed.
> +     */
> +    if (stats->pkt_count >= MFEX_MAX_COUNT) {
> +        uint32_t best_func_index = MFEX_IMPL_MAX;

See comment on MFEX_IMPL_START_IDX for above and for loop below.

> +        uint32_t max_hits = 0;
> +        for (int i = MFEX_IMPL_MAX; i < impl_count; i++) {
> +            if (stats->impl_hitcount[i] > max_hits) {
> +                max_hits = stats->impl_hitcount[i];
> +                best_func_index = i;
> +            }
> +        }
> +
> +        /* If 50% of the packets hit, enable the function. */
> +        if (max_hits >= (mfex_study_pkts_count / 2)) {
> +            miniflow_extract_func mf_func =
> +                        miniflow_funcs[best_func_index].extract_func;
> +            atomic_uintptr_t *pmd_func = (void *)&pmd->miniflow_extract_opt;
> +            atomic_store_relaxed(pmd_func, (uintptr_t) mf_func);
> +            VLOG_INFO("MFEX study chose impl %s: (hits %d/%d pkts)",
> +                      miniflow_funcs[best_func_index].name, max_hits,
> +                      stats->pkt_count);
> +        } else {
> +            /* Set the implementation to null for default miniflow. */
> +            miniflow_extract_func mf_func =
> +                        miniflow_funcs[MFEX_IMPL_SCALAR].extract_func;
> +            atomic_uintptr_t *pmd_func = (void *)&pmd->miniflow_extract_opt;
> +            atomic_store_relaxed(pmd_func, (uintptr_t) mf_func);
> +            VLOG_INFO("Not enough packets matched (%d/%d), disabling"
> +                      " optimized MFEX.", max_hits, stats->pkt_count);
> +        }

I still would like to see the hits for all hits when debugging is enabled.
Maybe something like

if (VLOG_IS_DBG_ENABLED()) {
	for each imp in i:
       VLOG_DBG(“MFEX study results for implementation %s: (hits %d/%d pkts)",
                      miniflow_funcs[i].name, stats->impl_hitcount[i],
                      stats->pkt_count);

}

> +        /* Reset stats so that study function can be called again
> +         * for next traffic type and optimal function ptr can be
> +         * chosen.
> +         */
> +        memset(stats, 0, sizeof(struct study_stats));
> +    }
> +    return mask;
> +}
> diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
> index 62170ff6c..eaddeceaf 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -47,6 +47,11 @@ static struct dpif_miniflow_extract_impl mfex_impls[] = {
>          .probe = NULL,
>          .extract_func = NULL,
>          .name = "scalar", },
> +
> +    [MFEX_IMPL_STUDY] = {
> +        .probe = NULL,
> +        .extract_func = mfex_study_traffic,
> +        .name = "study", },
>  };
>
>  BUILD_ASSERT_DECL(MFEX_IMPL_MAX >= ARRAY_SIZE(mfex_impls));
> @@ -88,7 +93,6 @@ dp_mfex_impl_set_default_by_name(const char *name)
>  {
>      miniflow_extract_func new_default;
>
> -

Guess this need to be fixed in the previous patch.

>      int32_t err = dp_mfex_impl_get_by_name(name, &new_default);
>
>      if (!err) {
> @@ -146,7 +150,6 @@ dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func)
>      }
>
>      uint32_t i;
> -

Guess this need to be fixed in the previous patch.

>      for (i = 0; i < ARRAY_SIZE(mfex_impls); i++) {
>          if (strcmp(mfex_impls[i].name, name) == 0) {
>              /* Probe function is optional - so check it is set before exec. */
> @@ -163,6 +166,18 @@ dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func)
>      return -EINVAL;
>  }
>
> +int32_t

Please make this an int.

> +dpif_mfex_impl_info_get(struct dpif_miniflow_extract_impl **out_ptr)
> +{
> +    if (out_ptr == NULL) {
> +        return -EINVAL;
> +    }
> +
> +    *out_ptr = mfex_impls;
> +
> +    return ARRAY_SIZE(mfex_impls);
> +}
> +
>  uint32_t
>  dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
>                                      struct netdev_flow_key *keys,
> diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
> index 10525c378..cd46c94dd 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -71,6 +71,7 @@ struct dpif_miniflow_extract_impl {
>  enum dpif_miniflow_extract_impl_idx {
>      MFEX_IMPL_AUTOVALIDATOR,
>      MFEX_IMPL_SCALAR,
> +    MFEX_IMPL_STUDY,
>      MFEX_IMPL_MAX
>  };
>
> @@ -94,6 +95,13 @@ miniflow_extract_func dp_mfex_impl_get_default(void);
>  /* Overrides the default MFEX with the user set MFEX. */
>  int32_t dp_mfex_impl_set_default_by_name(const char *name);
>
> +/* Retrieve the array of miniflow implementations for iteration.
> + * On error, returns a negative number.
> + * On success, returns the size of the arrays pointed to by the out parameter.
> + */
> +int32_t
> +dpif_mfex_impl_info_get(struct dpif_miniflow_extract_impl **out_ptr);
> +
>
>  /* Initializes the available miniflow extract implementations by probing for
>   * the CPU ISA requirements. As the runtime available CPU ISA does not change
> @@ -115,4 +123,16 @@ dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
>                                      uint32_t keys_size, odp_port_t in_port,
>                                      struct dp_netdev_pmd_thread *pmd_handle);
>
> +/* Retrieve the number of packets by studying packets using different miniflow
> + * implementations to choose the best implementation using the maximum hitmask
> + * count.
> + * On error, returns a zero for no packets.
> + * On success, returns mask of the packets hit.
> + */
> +uint32_t
> +mfex_study_traffic(struct dp_packet_batch *packets,
> +                   struct netdev_flow_key *keys,
> +                   uint32_t keys_size, odp_port_t in_port,
> +                   struct dp_netdev_pmd_thread *pmd_handle);
> +
>  #endif /* MFEX_AVX512_EXTRACT */
> -- 
> 2.32.0



More information about the dev mailing list