[ovs-dev] [v13 04/12] dpif-avx512: Add ISA implementation of dpif.

Flavio Leitner fbl at sysclose.org
Thu Jun 24 04:06:03 UTC 2021


On Thu, Jun 17, 2021 at 05:18:17PM +0100, Cian Ferriter wrote:
> From: Harry van Haaren <harry.van.haaren at intel.com>
> 
> This commit adds the AVX512 implementation of DPIF functionality,
> specifically the dp_netdev_input_outer_avx512 function. This function
> only handles outer (no re-circulations), and is optimized to use the
> AVX512 ISA for packet batching and other DPIF work.
> 
> Sparse is not able to handle the AVX512 intrinsics, causing compile
> time failures, so it is disabled for this file.
> 
> Signed-off-by: Harry van Haaren <harry.van.haaren at intel.com>
> Co-authored-by: Cian Ferriter <cian.ferriter at intel.com>
> Signed-off-by: Cian Ferriter <cian.ferriter at intel.com>
> Co-authored-by: Kumar Amber <kumar.amber at intel.com>
> Signed-off-by: Kumar Amber <kumar.amber at intel.com>
> 
> ---
> 
> v13:
> - Squash "Add HWOL support" commit into this commit.
> - Add NEWS item about this feature here rather than in a later commit.
> - Add #define NUM_U64_IN_ZMM_REG 8.
> - Add comment describing operation of while loop handling HWOL->EMC->SMC
>   lookups in dp_netdev_input_outer_avx512().
> - Add EMC and SMC batch insert functions for better handling of EMC and
>   SMC in AVX512 DPIF.
> - Minor code refactor to address review comments.
> ---
>  NEWS                             |   2 +
>  lib/automake.mk                  |   5 +-
>  lib/dpif-netdev-avx512.c         | 327 +++++++++++++++++++++++++++++++
>  lib/dpif-netdev-private-dfc.h    |  25 +++
>  lib/dpif-netdev-private-dpif.h   |  32 +++
>  lib/dpif-netdev-private-thread.h |  11 +-
>  lib/dpif-netdev-private.h        |  25 +++
>  lib/dpif-netdev.c                | 103 ++++++++--
>  8 files changed, 514 insertions(+), 16 deletions(-)
>  create mode 100644 lib/dpif-netdev-avx512.c
>  create mode 100644 lib/dpif-netdev-private-dpif.h
> 
> diff --git a/NEWS b/NEWS
> index 96b3a61c8..6a4a7b76d 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -10,6 +10,8 @@ Post-v2.15.0
>       * Auto load balancing of PMDs now partially supports cross-NUMA polling
>         cases, e.g if all PMD threads are running on the same NUMA node.
>       * Refactor lib/dpif-netdev.c to multiple header files.
> +     * Add avx512 implementation of dpif which can process non recirculated
> +       packets. It supports partial HWOL, EMC, SMC and DPCLS lookups.
>     - ovs-ctl:
>       * New option '--no-record-hostname' to disable hostname configuration
>         in ovsdb on startup.
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 3a33cdd5c..660cd07f0 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -33,11 +33,13 @@ lib_libopenvswitchavx512_la_CFLAGS = \
>  	-mavx512f \
>  	-mavx512bw \
>  	-mavx512dq \
> +	-mbmi \
>  	-mbmi2 \
>  	-fPIC \
>  	$(AM_CFLAGS)
>  lib_libopenvswitchavx512_la_SOURCES = \
> -	lib/dpif-netdev-lookup-avx512-gather.c
> +	lib/dpif-netdev-lookup-avx512-gather.c \
> +	lib/dpif-netdev-avx512.c
>  lib_libopenvswitchavx512_la_LDFLAGS = \
>  	-static
>  endif
> @@ -114,6 +116,7 @@ lib_libopenvswitch_la_SOURCES = \
>  	lib/dpif-netdev-private-dfc.c \
>  	lib/dpif-netdev-private-dfc.h \
>  	lib/dpif-netdev-private-dpcls.h \
> +	lib/dpif-netdev-private-dpif.h \
>  	lib/dpif-netdev-private-flow.h \
>  	lib/dpif-netdev-private-hwol.h \
>  	lib/dpif-netdev-private-thread.h \
> diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
> new file mode 100644
> index 000000000..0e55b0be2
> --- /dev/null
> +++ b/lib/dpif-netdev-avx512.c
> @@ -0,0 +1,327 @@
> +/*
> + * Copyright (c) 2021 Intel Corporation.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifdef __x86_64__
> +/* Sparse cannot handle the AVX512 instructions. */
> +#if !defined(__CHECKER__)
> +
> +#include <config.h>
> +
> +#include "dpif-netdev.h"
> +#include "dpif-netdev-perf.h"
> +
> +#include "dpif-netdev-private.h"
> +#include "dpif-netdev-private-dpcls.h"
> +#include "dpif-netdev-private-flow.h"
> +#include "dpif-netdev-private-thread.h"
> +#include "dpif-netdev-private-hwol.h"
> +
> +#include "dp-packet.h"
> +#include "netdev.h"
> +
> +#include "immintrin.h"
> +
> +/* Each AVX512 register (zmm register in assembly notation) can contain up to
> + * 512 bits, which is equivalent to 8 uint64_t variables. This is the maximum
> + * number of miniflow blocks that can be processed in a single pass of the
> + * AVX512 code at a time.
> + */
> +#define NUM_U64_IN_ZMM_REG (8)
> +
> +/* Structure to contain per-packet metadata that must be attributed to the
> + * dp netdev flow. This is unfortunate to have to track per packet, however
> + * it's a bit awkward to maintain them in a performant way. This structure
> + * helps to keep two variables on a single cache line per packet.
> + */
> +struct pkt_flow_meta {
> +    uint16_t bytes;
> +    uint16_t tcp_flags;
> +};
> +
> +/* Structure of heap allocated memory for DPIF internals. */
> +struct dpif_userdata {
> +    OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
> +        struct netdev_flow_key keys[NETDEV_MAX_BURST];
> +    OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
> +        struct netdev_flow_key *key_ptrs[NETDEV_MAX_BURST];
> +    OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
> +        struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
> +};
> +
> +int32_t
> +dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
> +                             struct dp_packet_batch *packets,
> +                             odp_port_t in_port)
> +{
> +    /* Allocate DPIF userdata. */
> +    if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
> +        pmd->netdev_input_func_userdata =
> +                xmalloc_pagealign(sizeof(struct dpif_userdata));
> +    }
> +
> +    struct dpif_userdata *ud = pmd->netdev_input_func_userdata;
> +    struct netdev_flow_key *keys = ud->keys;
> +    struct netdev_flow_key **key_ptrs = ud->key_ptrs;
> +    struct pkt_flow_meta *pkt_meta = ud->pkt_meta;
> +
> +    /* The AVX512 DPIF implementation handles rules in a way that is optimized
> +     * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is
> +     * achieved by separating the rule arrays. Bitmasks are kept for each
> +     * packet, indicating if it matched in the HWOL/EMC/SMC array or DPCLS
> +     * array. Later the two arrays are merged by AVX-512 expand instructions.
> +     */
> +
> +    /* Stores the computed output: a rule pointer for each packet. */
> +    /* Used initially for HWOL/EMC/SMC. */
> +    struct dpcls_rule *rules[NETDEV_MAX_BURST];
> +    /* Used for DPCLS. */
> +    struct dpcls_rule *dpcls_rules[NETDEV_MAX_BURST];
> +
> +    uint32_t dpcls_key_idx = 0;
> +
> +    for (uint32_t i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) {
> +        _mm512_storeu_si512(&rules[i], _mm512_setzero_si512());
> +        _mm512_storeu_si512(&dpcls_rules[i], _mm512_setzero_si512());
> +    }
> +
> +    /* Prefetch each packet's metadata. */
> +    const size_t batch_size = dp_packet_batch_size(packets);
> +    for (int i = 0; i < batch_size; i++) {
> +        struct dp_packet *packet = packets->packets[i];
> +        OVS_PREFETCH(dp_packet_data(packet));
> +        pkt_metadata_prefetch_init(&packet->md);
> +    }
> +
> +    /* Check if EMC or SMC are enabled. */
> +    struct dfc_cache *cache = &pmd->flow_cache;
> +    const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0;
> +    const uint32_t smc_enabled = pmd->ctx.smc_enable_db;
> +
> +    uint32_t emc_hits = 0;
> +    uint32_t smc_hits = 0;
> +
> +    /* A 1 bit in this mask indicates a hit, so no DPCLS lookup on the pkt. */
> +    uint32_t hwol_emc_smc_hitmask = 0;
> +    uint32_t smc_hitmask = 0;
> +
> +    /* The below while loop is based on the 'iter' variable which has a number
> +     * of bits set representing packets that we want to process
> +     * (HWOL->MFEX->EMC->SMC). As each packet is processed, we clear (set to 0)
> +     * the bit representing that packet using '_blsr_u64()'. The
> +     * '__builtin_ctz()' will give us the correct index into the 'packets',
> +     * 'pkt_meta', 'keys' and 'rules' arrays.
> +     *
> +     * For one iteration of the while loop, here's some psuedocode as an
> +     * example where 'iter' is represented in binary:
> +     *
> +     * while (iter) { // iter = 1100
> +     *     uint32_t i = __builtin_ctz(iter); // i = 2
> +     *     iter = _blsr_u64(iter); // iter = 1000
> +     *     // do all processing (HWOL->MFEX->EMC->SMC)
> +     * }
> +     */
> +    uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1;
> +    uint32_t iter = lookup_pkts_bitmask;
> +    while (iter) {
> +        uint32_t i = __builtin_ctz(iter);
> +        iter = _blsr_u64(iter);
> +
> +        /* Get packet pointer from bitmask and packet md. */
> +        struct dp_packet *packet = packets->packets[i];
> +        pkt_metadata_init(&packet->md, in_port);
> +
> +        struct dp_netdev_flow *f = NULL;
> +
> +        /* Check for partial hardware offload mark. */
> +        uint32_t mark;
> +        if (dp_packet_has_flow_mark(packet, &mark)) {
> +            f = mark_to_flow_find(pmd, mark);
> +            if (f) {
> +                rules[i] = &f->cr;
> +                pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
> +                pkt_meta[i].bytes = dp_packet_size(packet);
> +                hwol_emc_smc_hitmask |= (1 << i);
> +                continue;
> +            }
> +        }
> +
> +        /* Do miniflow extract into keys. */
> +        struct netdev_flow_key *key = &keys[i];
> +        miniflow_extract(packet, &key->mf);
> +
> +        /* Cache TCP and byte values for all packets. */
> +        pkt_meta[i].bytes = dp_packet_size(packet);
> +        pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf);
> +
> +        key->len = netdev_flow_key_size(miniflow_n_values(&key->mf));
> +        key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, &key->mf);
> +
> +        if (emc_enabled) {
> +            f = emc_lookup(&cache->emc_cache, key);
> +
> +            if (f) {
> +                rules[i] = &f->cr;
> +                emc_hits++;
> +                hwol_emc_smc_hitmask |= (1 << i);
> +                continue;
> +            }
> +        }
> +
> +        if (smc_enabled && !f) {
> +            f = smc_lookup_single(pmd, packet, key);
> +            if (f) {
> +                rules[i] = &f->cr;
> +                smc_hits++;
> +                smc_hitmask |= (1 << i);
> +                continue;
> +            }
> +        }
> +
> +        /* The flow pointer was not found in HWOL/EMC/SMC, so add it to the
> +         * dpcls input keys array for batch lookup later.
> +         */
> +        key_ptrs[dpcls_key_idx] = &keys[i];
> +        dpcls_key_idx++;
> +    }
> +
> +    hwol_emc_smc_hitmask |= smc_hitmask;
> +
> +    /* DPCLS handles any packets missed by HWOL/EMC/SMC. It operates on the
> +     * key_ptrs[] for input miniflows to match, storing results in the
> +     * dpcls_rules[] array.
> +     */
> +    if (dpcls_key_idx > 0) {
> +        struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
> +        if (OVS_UNLIKELY(!cls)) {
> +            return -1;
> +        }
> +        bool any_miss =
> +            !dpcls_lookup(cls, (const struct netdev_flow_key **) key_ptrs,
> +                          dpcls_rules, dpcls_key_idx, NULL);
> +        if (OVS_UNLIKELY(any_miss)) {
> +            return -1;
> +        }
> +
> +        /* Merge DPCLS rules and HWOL/EMC/SMC rules. */
> +        uint32_t dpcls_idx = 0;
> +        for (int i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) {
> +            /* Indexing here is somewhat complicated due to DPCLS output rule
> +             * load index depending on the hitmask of HWOL/EMC/SMC. More
> +             * packets from HWOL/EMC/SMC bitmask means less DPCLS rules are
> +             * used.
> +             */
> +            __m512i v_cache_rules = _mm512_loadu_si512(&rules[i]);
> +            __m512i v_merged_rules =
> +                        _mm512_mask_expandloadu_epi64(v_cache_rules,
> +                                                      ~hwol_emc_smc_hitmask,
> +                                                      &dpcls_rules[dpcls_idx]);
> +            _mm512_storeu_si512(&rules[i], v_merged_rules);
> +
> +            /* Update DPCLS load index and bitmask for HWOL/EMC/SMC hits.
> +             * There are NUM_U64_IN_ZMM_REG output pointers per register,
> +             * subtract the HWOL/EMC/SMC lanes equals the number of DPCLS rules
> +             * consumed.
> +             */
> +            uint32_t hitmask_FF = (hwol_emc_smc_hitmask & 0xFF);
> +            dpcls_idx += NUM_U64_IN_ZMM_REG - __builtin_popcountll(hitmask_FF);
> +            hwol_emc_smc_hitmask =
> +                (hwol_emc_smc_hitmask >> NUM_U64_IN_ZMM_REG);
> +        }
> +    }
> +
> +    /* At this point we have a 1:1 pkt to rules mapping, so update EMC/SMC
> +     * if required.
> +     */
> +    /* Insert SMC and DPCLS hits into EMC. */
> +    /* Insert DPCLS hits into SMC. */
> +    if (emc_enabled) {
> +        uint32_t emc_insert_mask = smc_hitmask | ~hwol_emc_smc_hitmask;
> +        emc_insert_mask &= lookup_pkts_bitmask;
> +        emc_probabilistic_insert_batch(pmd, keys, &rules[0], emc_insert_mask);
> +    }
> +    if (smc_enabled) {
> +        uint32_t smc_insert_mask = ~hwol_emc_smc_hitmask;
> +        smc_insert_mask &= lookup_pkts_bitmask;
> +        smc_insert_batch(pmd, keys, &rules[0], smc_insert_mask);
> +    }
> +
> +    /* At this point we don't return error anymore, so commit stats here. */
> +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV, batch_size);
> +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, emc_hits);
> +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, smc_hits);
> +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
> +                            dpcls_key_idx);
> +    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_LOOKUP,
> +                            dpcls_key_idx);
> +
> +    /* Initialize the "Action Batch" for each flow handled below. */
> +    struct dp_packet_batch action_batch;
> +    action_batch.trunc = 0;
> +
> +    while (lookup_pkts_bitmask) {
> +        uint32_t rule_pkt_idx = __builtin_ctz(lookup_pkts_bitmask);
> +        uint64_t needle = (uintptr_t) rules[rule_pkt_idx];
> +
> +        /* Parallel compare NUM_U64_IN_ZMM_REG flow* 's to the needle, create a
> +         * bitmask.
> +         */
> +        uint32_t batch_bitmask = 0;
> +        for (uint32_t j = 0; j < NETDEV_MAX_BURST; j += NUM_U64_IN_ZMM_REG) {
> +            /* Pre-calculate store addr. */
> +            uint32_t num_pkts_in_batch = __builtin_popcountll(batch_bitmask);
> +            void *store_addr = &action_batch.packets[num_pkts_in_batch];
> +
> +            /* Search for identical flow* in burst, update bitmask. */
> +            __m512i v_needle = _mm512_set1_epi64(needle);
> +            __m512i v_hay = _mm512_loadu_si512(&rules[j]);
> +            __mmask8 k_cmp_bits = _mm512_cmpeq_epi64_mask(v_needle, v_hay);
> +            uint32_t cmp_bits = k_cmp_bits;
> +            batch_bitmask |= cmp_bits << j;
> +
> +            /* Compress and store the batched packets. */
> +            struct dp_packet **packets_ptrs = &packets->packets[j];
> +            __m512i v_pkt_ptrs = _mm512_loadu_si512(packets_ptrs);
> +            _mm512_mask_compressstoreu_epi64(store_addr, cmp_bits, v_pkt_ptrs);
> +        }
> +
> +        /* Strip all packets in this batch from the lookup_pkts_bitmask. */
> +        lookup_pkts_bitmask &= (~batch_bitmask);
> +        action_batch.count = __builtin_popcountll(batch_bitmask);
> +
> +        /* Loop over all packets in this batch, to gather the byte and tcp_flag
> +         * values, and pass them to the execute function. It would be nice to
> +         * optimize this away, however it is not easy to refactor in dpif.
> +         */
> +        uint32_t bytes = 0;
> +        uint16_t tcp_flags = 0;
> +        uint32_t bitmask_iter = batch_bitmask;
> +        for (int i = 0; i < action_batch.count; i++) {
> +            uint32_t idx = __builtin_ctzll(bitmask_iter);
> +            bitmask_iter = _blsr_u64(bitmask_iter);
> +
> +            bytes += pkt_meta[idx].bytes;
> +            tcp_flags |= pkt_meta[idx].tcp_flags;
> +        }
> +
> +        dp_netdev_batch_execute(pmd, &action_batch, rules[rule_pkt_idx],
> +                                bytes, tcp_flags);
> +    }
> +
> +    return 0;
> +}
> +
> +#endif
> +#endif
> diff --git a/lib/dpif-netdev-private-dfc.h b/lib/dpif-netdev-private-dfc.h
> index 6a672d1b3..d5d4da7ea 100644
> --- a/lib/dpif-netdev-private-dfc.h
> +++ b/lib/dpif-netdev-private-dfc.h
> @@ -81,6 +81,14 @@ extern "C" {
>  #define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX /                     \
>                                      DEFAULT_EM_FLOW_INSERT_INV_PROB)
>  
> +/* Forward declaration for SMC function prototype that requires access to
> + * 'struct dp_netdev_pmd_thread'. */
> +struct dp_netdev_pmd_thread;
> +
> +/* Forward declaration for EMC and SMC batch insert function prototypes that
> + * require access to 'struct dpcls_rule'. */
> +struct dpcls_rule;
> +
>  struct emc_entry {
>      struct dp_netdev_flow *flow;
>      struct netdev_flow_key key;   /* key.hash used for emc hash value. */
> @@ -168,6 +176,23 @@ emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
>      return NULL;
>  }
>  
> +/* Insert a batch of keys/flows into the EMC and SMC caches. */
> +void
> +emc_probabilistic_insert_batch(struct dp_netdev_pmd_thread *pmd,
> +                               const struct netdev_flow_key *keys,
> +                               struct dpcls_rule **rules,
> +                               uint32_t emc_insert_mask);
> +
> +void
> +smc_insert_batch(struct dp_netdev_pmd_thread *pmd,
> +                               const struct netdev_flow_key *keys,
> +                               struct dpcls_rule **rules,
> +                               uint32_t smc_insert_mask);
> +
> +struct dp_netdev_flow *
> +smc_lookup_single(struct dp_netdev_pmd_thread *pmd,
> +                  struct dp_packet *packet,
> +                  struct netdev_flow_key *key);
>  
>  #ifdef  __cplusplus
>  }
> diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
> new file mode 100644
> index 000000000..2fd7cc400
> --- /dev/null
> +++ b/lib/dpif-netdev-private-dpif.h
> @@ -0,0 +1,32 @@
> +/*
> + * Copyright (c) 2021 Intel Corporation.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef DPIF_NETDEV_PRIVATE_DPIF_H
> +#define DPIF_NETDEV_PRIVATE_DPIF_H 1
> +
> +#include "openvswitch/types.h"
> +
> +/* Forward declarations to avoid including files. */
> +struct dp_netdev_pmd_thread;
> +struct dp_packet_batch;
> +
> +/* Available implementations for dpif work. */
> +int32_t
> +dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
> +                             struct dp_packet_batch *packets,
> +                             odp_port_t in_port);
> +
> +#endif /* netdev-private.h */
> diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h
> index 0d674ab83..17356d5e2 100644
> --- a/lib/dpif-netdev-private-thread.h
> +++ b/lib/dpif-netdev-private-thread.h
> @@ -45,14 +45,19 @@ struct dp_netdev_pmd_thread_ctx {
>      struct dp_netdev_rxq *last_rxq;
>      /* EMC insertion probability context for the current processing cycle. */
>      uint32_t emc_insert_min;
> +    /* Enable the SMC cache from ovsdb config. */
> +    bool smc_enable_db;
>  };
>  
>  /* Forward declaration for typedef. */
>  struct dp_netdev_pmd_thread;
>  
> -typedef void (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
> -                                     struct dp_packet_batch *packets,
> -                                     odp_port_t port_no);
> +/* Typedef for DPIF functions.
> + * Returns a bitmask of packets to handle, possibly including upcall/misses.
> + */
> +typedef int32_t (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
> +                                        struct dp_packet_batch *packets,
> +                                        odp_port_t port_no);
>  
>  /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
>   * the performance overhead of interrupt processing.  Therefore netdev can
> diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
> index d7b6fd7ec..0315b5bf6 100644
> --- a/lib/dpif-netdev-private.h
> +++ b/lib/dpif-netdev-private.h
> @@ -31,4 +31,29 @@
>  #include "dpif-netdev-private-dfc.h"
>  #include "dpif-netdev-private-thread.h"
>  
> +/* Allow other implementations to lookup the DPCLS instances. */
> +struct dpcls *
> +dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
> +                           odp_port_t in_port);
> +
> +/* Allow other implementations to call dpcls_lookup() for subtable search. */
> +bool
> +dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
> +             struct dpcls_rule **rules, const size_t cnt,
> +             int *num_lookups_p);
> +
> +/* Allow other implementations to execute actions on a batch. */
> +void
> +dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
> +                        struct dp_packet_batch *packets,
> +                        struct dpcls_rule *rule,
> +                        uint32_t bytes,
> +                        uint16_t tcp_flags);
> +
> +/* Available implementations for dpif work. */
> +int32_t
> +dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
> +                             struct dp_packet_batch *packets,
> +                             odp_port_t in_port);
> +
>  #endif /* netdev-private.h */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index e6486417e..1f15af882 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -183,10 +183,6 @@ static uint32_t dpcls_subtable_lookup_reprobe(struct dpcls *cls);
>  static void dpcls_insert(struct dpcls *, struct dpcls_rule *,
>                           const struct netdev_flow_key *mask);
>  static void dpcls_remove(struct dpcls *, struct dpcls_rule *);
> -static bool dpcls_lookup(struct dpcls *cls,
> -                         const struct netdev_flow_key *keys[],
> -                         struct dpcls_rule **rules, size_t cnt,
> -                         int *num_lookups_p);
>  
>  /* Set of supported meter flags */
>  #define DP_SUPPORTED_METER_FLAGS_MASK \
> @@ -483,7 +479,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
>                                        const struct flow *flow,
>                                        const struct nlattr *actions,
>                                        size_t actions_len);
> -static void dp_netdev_input(struct dp_netdev_pmd_thread *,
> +static int32_t dp_netdev_input(struct dp_netdev_pmd_thread *,
>                              struct dp_packet_batch *, odp_port_t port_no);
>  static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
>                                    struct dp_packet_batch *);
> @@ -555,7 +551,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
>                                 bool purge);
>  static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
>                                        struct tx_port *tx);
> -static inline struct dpcls *
> +inline struct dpcls *
>  dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
>                             odp_port_t in_port);
>  
> @@ -1920,7 +1916,7 @@ void dp_netdev_flow_unref(struct dp_netdev_flow *flow)
>      }
>  }
>  
> -static inline struct dpcls *
> +inline struct dpcls *
>  dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
>                             odp_port_t in_port)
>  {
> @@ -2714,13 +2710,46 @@ smc_insert(struct dp_netdev_pmd_thread *pmd,
>      bucket->flow_idx[i] = index;
>  }
>  
> +inline void
> +emc_probabilistic_insert_batch(struct dp_netdev_pmd_thread *pmd,
> +                               const struct netdev_flow_key *keys,
> +                               struct dpcls_rule **rules,
> +                               uint32_t emc_insert_mask)
> +{
> +    while (emc_insert_mask) {
> +        uint32_t i = __builtin_ctz(emc_insert_mask);

I got an error on Windows:

[...]
libtool: compile:  build-aux/cccl -DHAVE_CONFIG_H -I. -I ./include/windows -I ./datapath-windows/include -Ic:/PTHREADS-BUILT//include -O2 -I ./include -I ./include -I ./lib -I ./lib -IC:/OpenSSL-Win64/include -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -Wthread-safety -fno-strict-aliasing -Wswitch-bool -Wlogical-not-parentheses -Wsizeof-array-argument -Wbool-compare -Wshift-negative-value -Wduplicated-cond -Qunused-arguments -Wshadow -Wmultistatement-macros -Wcast-align=strict -Wno-null-pointer-arithmetic -Warray-bounds-pointer-arithmetic -g -DHAVE_AVX512F -c lib/dpif-netdev.c
libtool: compile: mv -f "dpif-netdev-lookup-autovalidator.obj" "lib/dpif-netdev-lookup-autovalidator.obj"
c:\PTHREADS-BUILT\include\_ptw32.h(120): warning C4005: 'HAVE_STRUCT_TIMESPEC': macro redefinition
c:\openvswitch_compile\config.h(207): note: see previous definition of 'HAVE_STRUCT_TIMESPEC'
c:\openvswitch_compile\lib\ovs-rcu.h(215): warning C4311: 'type cast': pointer truncation from 'void *' to 'long'
libtool: compile: mv -f "dpif-netdev-lookup-generic.obj" "lib/dpif-netdev-lookup-generic.obj"
dpif-netdev.c
\
	source='lib/dpif-netdev-private-dfc.c' object='lib/dpif-netdev-private-dfc.lo' libtool=yes \
	DEPDIR=.deps depmode=none /bin/sh ./build-aux/depcomp \
	/bin/sh ./libtool  --tag=CC   --mode=compile build-aux/cccl -DHAVE_CONFIG_H -I.   -I ./include/windows -I ./datapath-windows/include -Ic:/PTHREADS-BUILT//include -O2 -I ./include -I ./include -I ./lib -I ./lib -IC:/OpenSSL-Win64/include   -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -Wthread-safety -fno-strict-aliasing -Wswitch-bool -Wlogical-not-parentheses -Wsizeof-array-argument -Wbool-compare -Wshift-negative-value -Wduplicated-cond -Qunused-arguments -Wshadow -Wmultistatement-macros -Wcast-align=strict -Wno-null-pointer-arithmetic -Warray-bounds-pointer-arithmetic    -g -DHAVE_AVX512F -c -o lib/dpif-netdev-private-dfc.lo lib/dpif-netdev-private-dfc.c
c:\PTHREADS-BUILT\include\_ptw32.h(120): warning C4005: 'HAVE_STRUCT_TIMESPEC': macro redefinition
c:\openvswitch_compile\config.h(207): note: see previous definition of 'HAVE_STRUCT_TIMESPEC'
c:\openvswitch_compile\lib\ovs-rcu.h(215): warning C4311: 'type cast': pointer truncation from 'void *' to 'long'
c:\openvswitch_compile\config.h(207): warning C4005: 'HAVE_STRUCT_TIMESPEC': macro redefinition
c:\PTHREADS-BUILT\include\_ptw32.h(120): note: see previous definition of 'HAVE_STRUCT_TIMESPEC'
lib/dpif-netdev.c(2826): error C4013: '__builtin_ctz' undefined; assuming extern returning int
lib/dpif-netdev.c(2919): warning C4311: 'type cast': pointer truncation from 'const char *const ' to 'long'
\
	source='lib/dpif-netdev-private-dpif.c' object='lib/dpif-netdev-private-dpif.lo' libtool=yes \
	DEPDIR=.deps depmode=none /bin/sh ./build-aux/depcomp \
	/bin/sh ./libtool  --tag=CC   --mode=compile build-aux/cccl -DHAVE_CONFIG_H -I.   -I ./include/windows -I ./datapath-windows/include -Ic:/PTHREADS-BUILT//include -O2 -I ./include -I ./include -I ./lib -I ./lib -IC:/OpenSSL-Win64/include   -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -Wthread-safety -fno-strict-aliasing -Wswitch-bool -Wlogical-not-parentheses -Wsizeof-array-argument -Wbool-compare -Wshift-negative-value -Wduplicated-cond -Qunused-arguments -Wshadow -Wmultistatement-macros -Wcast-align=strict -Wno-null-pointer-arithmetic -Warray-bounds-pointer-arithmetic    -g -DHAVE_AVX512F -c -o lib/dpif-netdev-private-dpif.lo lib/dpif-netdev-private-dpif.c
make[2]: *** [lib/dpif-netdev.lo] Error 1
make[2]: *** Waiting for unfinished jobs....

Thanks,
fbl


> +        emc_insert_mask &= emc_insert_mask - 1;
> +        /* Get the require parameters for EMC/SMC from the rule */
> +        struct dp_netdev_flow *flow = dp_netdev_flow_cast(rules[i]);
> +        /* Insert the key into EMC/SMC. */
> +        emc_probabilistic_insert(pmd, &keys[i], flow);
> +    }
> +}
> +
> +inline void
> +smc_insert_batch(struct dp_netdev_pmd_thread *pmd,
> +                 const struct netdev_flow_key *keys,
> +                 struct dpcls_rule **rules,
> +                 uint32_t smc_insert_mask)
> +{
> +    while (smc_insert_mask) {
> +        uint32_t i = __builtin_ctz(smc_insert_mask);
> +        smc_insert_mask &= smc_insert_mask - 1;
> +        /* Get the require parameters for EMC/SMC from the rule */
> +        struct dp_netdev_flow *flow = dp_netdev_flow_cast(rules[i]);
> +        uint32_t hash = dp_netdev_flow_hash(&flow->ufid);
> +        /* Insert the key into EMC/SMC. */
> +        smc_insert(pmd, &keys[i], hash);
> +    }
> +}
> +
>  static struct dp_netdev_flow *
>  dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
>                            const struct netdev_flow_key *key,
>                            int *lookup_num_p)
>  {
>      struct dpcls *cls;
> -    struct dpcls_rule *rule;
> +    struct dpcls_rule *rule = NULL;
>      odp_port_t in_port = u32_to_odp(MINIFLOW_GET_U32(&key->mf,
>                                                       in_port.odp_port));
>      struct dp_netdev_flow *netdev_flow = NULL;
> @@ -4233,7 +4262,10 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
>          }
>  
>          /* Process packet batch. */
> -        pmd->netdev_input_func(pmd, &batch, port_no);
> +        int32_t ret = pmd->netdev_input_func(pmd, &batch, port_no);
> +        if (ret) {
> +            dp_netdev_input(pmd, &batch, port_no);
> +        }
>  
>          /* Assign processing cycles to rx queue. */
>          cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
> @@ -5251,6 +5283,8 @@ dpif_netdev_run(struct dpif *dpif)
>                      non_pmd->ctx.emc_insert_min = 0;
>                  }
>  
> +                non_pmd->ctx.smc_enable_db = dp->smc_enable_db;
> +
>                  for (i = 0; i < port->n_rxq; i++) {
>  
>                      if (!netdev_rxq_enabled(port->rxqs[i].rx)) {
> @@ -5522,6 +5556,8 @@ reload:
>                  pmd->ctx.emc_insert_min = 0;
>              }
>  
> +            pmd->ctx.smc_enable_db = pmd->dp->smc_enable_db;
> +
>              process_packets =
>                  dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
>                                             poll_list[i].port_no);
> @@ -6415,6 +6451,24 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
>                                actions->actions, actions->size);
>  }
>  
> +void
> +dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
> +                        struct dp_packet_batch *packets,
> +                        struct dpcls_rule *rule,
> +                        uint32_t bytes,
> +                        uint16_t tcp_flags)
> +{
> +    /* Gets action* from the rule. */
> +    struct dp_netdev_flow *flow = dp_netdev_flow_cast(rule);
> +    struct dp_netdev_actions *actions = dp_netdev_flow_get_actions(flow);
> +
> +    dp_netdev_flow_used(flow, dp_packet_batch_size(packets), bytes,
> +                        tcp_flags, pmd->ctx.now / 1000);
> +    const uint32_t steal = 1;
> +    dp_netdev_execute_actions(pmd, packets, steal, &flow->flow,
> +                              actions->actions, actions->size);
> +}
> +
>  static inline void
>  dp_netdev_queue_batches(struct dp_packet *pkt,
>                          struct dp_netdev_flow *flow, uint16_t tcp_flags,
> @@ -6519,6 +6573,30 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
>      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit);
>  }
>  
> +struct dp_netdev_flow *
> +smc_lookup_single(struct dp_netdev_pmd_thread *pmd,
> +                  struct dp_packet *packet,
> +                  struct netdev_flow_key *key)
> +{
> +    const struct cmap_node *flow_node = smc_entry_get(pmd, key->hash);
> +
> +    if (OVS_LIKELY(flow_node != NULL)) {
> +        struct dp_netdev_flow *flow = NULL;
> +
> +        CMAP_NODE_FOR_EACH (flow, node, flow_node) {
> +            /* Since we dont have per-port megaflow to check the port
> +             * number, we need to verify that the input ports match. */
> +            if (OVS_LIKELY(dpcls_rule_matches_key(&flow->cr, key) &&
> +                flow->flow.in_port.odp_port == packet->md.in_port.odp_port)) {
> +
> +                return (void *) flow;
> +            }
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
>  /* Try to process all ('cnt') the 'packets' using only the datapath flow cache
>   * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the
>   * miniflow is copied into 'keys' and the packet pointer is moved at the
> @@ -6924,12 +7002,13 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
>      }
>  }
>  
> -static void
> +static int32_t
>  dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
>                  struct dp_packet_batch *packets,
>                  odp_port_t port_no)
>  {
>      dp_netdev_input__(pmd, packets, false, port_no);
> +    return 0;
>  }
>  
>  static void
> @@ -8369,7 +8448,7 @@ dpcls_flow_key_gen_masks(const struct netdev_flow_key *tbl,
>  
>  /* Returns true if 'target' satisfies 'key' in 'mask', that is, if each 1-bit
>   * in 'mask' the values in 'key' and 'target' are the same. */
> -bool
> +inline bool ALWAYS_INLINE
>  dpcls_rule_matches_key(const struct dpcls_rule *rule,
>                         const struct netdev_flow_key *target)
>  {
> @@ -8395,7 +8474,7 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule,
>   * priorities, instead returning any rule which matches the flow.
>   *
>   * Returns true if all miniflows found a corresponding rule. */
> -static bool
> +bool
>  dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
>               struct dpcls_rule **rules, const size_t cnt,
>               int *num_lookups_p)
> -- 
> 2.32.0
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

-- 
fbl


More information about the dev mailing list