[ovs-dev] [v4 02/12] dpif-netdev: Add auto validation function for miniflow extract

Eelco Chaudron echaudro at redhat.com
Fri Jun 25 13:00:12 UTC 2021


Hi Kumar,

I plan to review this patch set, but I need to go over the dpif AVX512 
set first to get a better understanding.

However, I did run some performance tests on old hardware (as I do not 
have an AVX512 system) and notice some degradation (and improvements). 
This was a single run for both scenarios, with the following results 
(based on ovs_perf), on a single Intel(R) Xeon(R) CPU E5-2690 v4 @ 
2.60GHz:

        Number of flows 64      128     256     512     768     1024    
1514
Delta
        10              1.48%    1.72%   1.59%  -0.12%   0.44%   6.99%   
7.31%
        1000            1.06%   -0.73%  -1.46%  -1.42%  -2.54%  -0.20%  
-0.98%
        10000          -0.93%   -1.62%  -0.32%  -1.50%  -0.30%  -0.56%   
0.19%
        100000          0.39%   -0.05%  -0.60%  -0.51%  -0.90%   1.24%  
-1.10%


Master
        10              4767168 4601575 4382381 4127125 3594158 2932787 
2400479
        100             3804956 3612716 3547054 3127117 2950324 2615856 
2133892
        1000            3251959 3257535 2985693 2869970 2549086 2286262 
1979985
        10000           2671946 2624808 2536575 2412845 2190386 1952359 
1699142


Patch
        10              4838691 4682131 4453022 4122100 3609915 3153228 
2589748
        100             3845585 3586650 3496167 3083467 2877265 2610640 
2113108
        1000            3221894 3205732 2976203 2827620 2541349 2273468 
1983794
        10000           2682461 2623585 2521419 2400627 2170751 1976909 
1680607

Zero loss for master 5.8% (3,452,306pps) vs on Patch 5.7% 
(3,392,783pps).


Did you guys do any tests like this? I think it would be good not only 
to know the improvement but also the degradation of existing systems 
without AVX512.


I see Ian is currently reviewing the v4 and was wondering if you plan to 
send the v5 soon, if so, I hold off a bit, and do the v5 rather than 
doing the v4 and verify it’s not something Ian mentioned.

Cheers,


Eelco


On 17 Jun 2021, at 18:27, Kumar Amber wrote:

> This patch introduced the auto-validation function which
> allows users to compare the batch of packets obtained from
> different miniflow implementations against the linear
> miniflow extract and return a hitmask.
>
> The autovaidator function can be triggered at runtime using the
> following command:
>
> $ ovs-appctl dpif-netdev/miniflow-parser-set autovalidator
>
> Signed-off-by: Kumar Amber <kumar.amber at intel.com>
> Co-authored-by: Harry van Haaren <harry.van.haaren at intel.com>
> Signed-off-by: Harry van Haaren <harry.van.haaren at intel.com>
> ---
>  lib/dpif-netdev-private-extract.c | 141 
> ++++++++++++++++++++++++++++++
>  lib/dpif-netdev-private-extract.h |  15 ++++
>  lib/dpif-netdev.c                 |   2 +-
>  3 files changed, 157 insertions(+), 1 deletion(-)
>
> diff --git a/lib/dpif-netdev-private-extract.c 
> b/lib/dpif-netdev-private-extract.c
> index fcc56ef26..0741c19f9 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -32,6 +32,11 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
>
>  /* Implementations of available extract options. */
>  static struct dpif_miniflow_extract_impl mfex_impls[] = {
> +   {
> +        .probe = NULL,
> +        .extract_func = dpif_miniflow_extract_autovalidator,
> +        .name = "autovalidator",
> +    },
>      {
>          .probe = NULL,
>          .extract_func = NULL,
> @@ -84,3 +89,139 @@ dpif_miniflow_extract_info_get(struct 
> dpif_miniflow_extract_impl **out_ptr)
>      *out_ptr = mfex_impls;
>      return ARRAY_SIZE(mfex_impls);
>  }
> +
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t 
> in_port,
> +                                    void *pmd_handle)
> +{
> +    const size_t cnt = dp_packet_batch_size(packets);
> +    uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l3_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l4_ofs[NETDEV_MAX_BURST];
> +    uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
> +    struct dp_packet *packet;
> +    struct dp_netdev_pmd_thread *pmd = pmd_handle;
> +    struct dpif_miniflow_extract_impl *miniflow_funcs;
> +
> +    int32_t mfunc_count = 
> dpif_miniflow_extract_info_get(&miniflow_funcs);
> +    if (mfunc_count < 0) {
> +        pmd->miniflow_extract_opt = NULL;
> +        VLOG_ERR("failed to get miniflow extract function 
> implementations\n");
> +        return 0;
> +    }
> +    ovs_assert(keys_size >= cnt);
> +    struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
> +
> +    /* Run scalar miniflow_extract to get default result. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        pkt_metadata_init(&packet->md, in_port);
> +        miniflow_extract(packet, &keys[i].mf);
> +
> +        /* Store known good metadata to compare with optimized 
> metadata. */
> +        good_l2_5_ofs[i] = packet->l2_5_ofs;
> +        good_l3_ofs[i] = packet->l3_ofs;
> +        good_l4_ofs[i] = packet->l4_ofs;
> +        good_l2_pad_size[i] = packet->l2_pad_size;
> +    }
> +
> +    /* Iterate through each version of miniflow implementations. */
> +    for (int j = MFEX_IMPL_START_IDX; j < ARRAY_SIZE(mfex_impls); 
> j++) {
> +        if (!mfex_impls[j].available) {
> +            continue;
> +        }
> +
> +        /* Reset keys and offsets before each implementation. */
> +        memset(test_keys, 0, keys_size * sizeof(struct 
> netdev_flow_key));
> +        DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +            dp_packet_reset_offsets(packet);
> +        }
> +        /* Call optimized miniflow for each batch of packet. */
> +        uint32_t hit_mask = mfex_impls[j].extract_func(packets, 
> test_keys,
> +                                            keys_size, in_port, 
> pmd_handle);
> +
> +        /* Do a miniflow compare for bits, blocks and offsets for all 
> the
> +         * classified packets in the hitmask marked by set bits. */
> +        while (hit_mask) {
> +            /* Index for the set bit. */
> +            uint32_t i = __builtin_ctz(hit_mask);
> +            /* Set the index in hitmask to Zero. */
> +            hit_mask &= (hit_mask - 1);
> +
> +            uint32_t failed = 0;
> +
> +            /* Check miniflow bits are equal. */
> +            if ((keys[i].mf.map.bits[0] != 
> test_keys[i].mf.map.bits[0]) ||
> +                (keys[i].mf.map.bits[1] != 
> test_keys[i].mf.map.bits[1])) {
> +                VLOG_ERR("Good 0x%llx 0x%llx\tTest 0x%llx 0x%llx\n",
> +                         keys[i].mf.map.bits[0], 
> keys[i].mf.map.bits[1],
> +                         test_keys[i].mf.map.bits[0],
> +                         test_keys[i].mf.map.bits[1]);
> +                failed = 1;
> +            }
> +
> +            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
> +                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> +                VLOG_ERR("Autovalidation blocks failed for %s pkt 
> %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good hexdump:\n");
> +                uint64_t *good_block_ptr = (uint64_t *)&keys[i].buf;
> +                uint64_t *test_block_ptr = (uint64_t 
> *)&test_keys[i].buf;
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", good_block_ptr[b]);
> +                }
> +                VLOG_ERR("  Test hexdump:\n");
> +                for (uint32_t b = 0; b < block_cnt; b++) {
> +                    VLOG_ERR("    %"PRIx64"\n", test_block_ptr[b]);
> +                }
> +                failed = 1;
> +            }
> +
> +            if ((packets->packets[i]->l2_pad_size != 
> good_l2_pad_size[i]) ||
> +                    (packets->packets[i]->l2_5_ofs != 
> good_l2_5_ofs[i]) ||
> +                    (packets->packets[i]->l3_ofs != good_l3_ofs[i]) 
> ||
> +                    (packets->packets[i]->l4_ofs != good_l4_ofs[i])) 
> {
> +                VLOG_ERR("Autovalidation packet offsets failed for %s 
> pkt %d",
> +                         mfex_impls[j].name, i);
> +                VLOG_ERR("  Good offsets: l2_pad_size %u, l2_5_ofs : 
> %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         good_l2_pad_size[i], good_l2_5_ofs[i],
> +                         good_l3_ofs[i], good_l4_ofs[i]);
> +                VLOG_ERR("  Test offsets: l2_pad_size %u, l2_5_ofs : 
> %u"
> +                         " l3_ofs %u, l4_ofs %u\n",
> +                         packets->packets[i]->l2_pad_size,
> +                         packets->packets[i]->l2_5_ofs,
> +                         packets->packets[i]->l3_ofs,
> +                         packets->packets[i]->l4_ofs);
> +                failed = 1;
> +            }
> +
> +            if (failed) {
> +                /* Having dumped the debug info, disable 
> autovalidator. */
> +                VLOG_ERR("Autovalidation failed in %s pkt %d, 
> disabling.\n",
> +                         mfex_impls[j].name, i);
> +                /* Halt OVS here on debug builds. */
> +                ovs_assert(0);
> +                pmd->miniflow_extract_opt = NULL;
> +                break;
> +            }
> +        }
> +    }
> +
> +    /* preserve packet correctness by storing back the good offsets 
> in
> +     * packets back. */
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
> +        packet->l2_5_ofs = good_l2_5_ofs[i];
> +        packet->l3_ofs = good_l3_ofs[i];
> +        packet->l4_ofs = good_l4_ofs[i];
> +        packet->l2_pad_size = good_l2_pad_size[i];
> +    }
> +
> +    /* Returning zero implies no packets were hit by autovalidation. 
> This
> +     * simplifies unit-tests as changing 
> --enable-mfex-default-autovalidator
> +     * would pass/fail. By always returning zero, autovalidator is a 
> little
> +     * slower, but we gain consistency in testing.
> +     */
> +    return 0;
> +}
> diff --git a/lib/dpif-netdev-private-extract.h 
> b/lib/dpif-netdev-private-extract.h
> index b7b0b2be4..455a7b590 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -24,6 +24,11 @@
>  /* Max size of dpif_miniflow_extract_impl array. */
>  #define MFEX_IMPLS_MAX_SIZE (16)
>
> +/* Skip the autovalidator study and null when iterating all available
> + * miniflow implementations.
> + */
> +#define MFEX_IMPL_START_IDX (1)
> +
>  /* Forward declarations. */
>  struct dp_packet;
>  struct miniflow;
> @@ -90,5 +95,15 @@ dpif_miniflow_extract_init(void);
>  int32_t
>  dpif_miniflow_extract_info_get(struct dpif_miniflow_extract_impl 
> **out_ptr);
>
> +/* Retrieve the hitmask of the batch of pakcets which is obtained by 
> comparing
> + * different miniflow implementations with linear miniflow extract.
> + * On error, returns a zero.
> + * On success, returns the number of packets in the batch compared.
> + */
> +uint32_t
> +dpif_miniflow_extract_autovalidator(struct dp_packet_batch *batch,
> +                                    struct netdev_flow_key *keys,
> +                                    uint32_t keys_size, odp_port_t 
> in_port,
> +                                    void *pmd_handle);
>
>  #endif /* DPIF_NETDEV_AVX512_EXTRACT */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 567ebd952..4f4ab2790 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1181,8 +1181,8 @@ dpif_miniflow_extract_impl_set(struct 
> unixctl_conn *conn, int argc,
>      struct ds reply = DS_EMPTY_INITIALIZER;
>      ds_put_format(&reply, "Miniflow implementation set to %s.\n", 
> mfex_name);
>      const char *reply_str = ds_cstr(&reply);
> -    unixctl_command_reply(conn, reply_str);
>      VLOG_INFO("%s", reply_str);
> +    unixctl_command_reply(conn, reply_str);
>      ds_destroy(&reply);
>  }
>
> -- 
> 2.25.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list