[ovs-dev] [v13 04/12] dpif-avx512: Add ISA implementation of dpif.
Ferriter, Cian
cian.ferriter at intel.com
Thu Jun 24 11:44:45 UTC 2021
Hi Flavio,
Thanks for the testing here. My responses are inline.
Cian
> -----Original Message-----
> From: Flavio Leitner <fbl at sysclose.org>
> Sent: Thursday 24 June 2021 05:06
> To: Ferriter, Cian <cian.ferriter at intel.com>
> Cc: ovs-dev at openvswitch.org; Amber, Kumar <kumar.amber at intel.com>; i.maximets at ovn.org
> Subject: Re: [ovs-dev] [v13 04/12] dpif-avx512: Add ISA implementation of dpif.
>
> On Thu, Jun 17, 2021 at 05:18:17PM +0100, Cian Ferriter wrote:
> > From: Harry van Haaren <harry.van.haaren at intel.com>
> >
> > This commit adds the AVX512 implementation of DPIF functionality,
> > specifically the dp_netdev_input_outer_avx512 function. This function
> > only handles outer (no re-circulations), and is optimized to use the
> > AVX512 ISA for packet batching and other DPIF work.
> >
> > Sparse is not able to handle the AVX512 intrinsics, causing compile
> > time failures, so it is disabled for this file.
> >
> > Signed-off-by: Harry van Haaren <harry.van.haaren at intel.com>
> > Co-authored-by: Cian Ferriter <cian.ferriter at intel.com>
> > Signed-off-by: Cian Ferriter <cian.ferriter at intel.com>
> > Co-authored-by: Kumar Amber <kumar.amber at intel.com>
> > Signed-off-by: Kumar Amber <kumar.amber at intel.com>
> >
> > ---
> >
> > v13:
> > - Squash "Add HWOL support" commit into this commit.
> > - Add NEWS item about this feature here rather than in a later commit.
> > - Add #define NUM_U64_IN_ZMM_REG 8.
> > - Add comment describing operation of while loop handling HWOL->EMC->SMC
> > lookups in dp_netdev_input_outer_avx512().
> > - Add EMC and SMC batch insert functions for better handling of EMC and
> > SMC in AVX512 DPIF.
> > - Minor code refactor to address review comments.
> > ---
> > NEWS | 2 +
> > lib/automake.mk | 5 +-
> > lib/dpif-netdev-avx512.c | 327 +++++++++++++++++++++++++++++++
> > lib/dpif-netdev-private-dfc.h | 25 +++
> > lib/dpif-netdev-private-dpif.h | 32 +++
> > lib/dpif-netdev-private-thread.h | 11 +-
> > lib/dpif-netdev-private.h | 25 +++
> > lib/dpif-netdev.c | 103 ++++++++--
> > 8 files changed, 514 insertions(+), 16 deletions(-)
> > create mode 100644 lib/dpif-netdev-avx512.c
> > create mode 100644 lib/dpif-netdev-private-dpif.h
> >
> > diff --git a/NEWS b/NEWS
> > index 96b3a61c8..6a4a7b76d 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -10,6 +10,8 @@ Post-v2.15.0
> > * Auto load balancing of PMDs now partially supports cross-NUMA polling
> > cases, e.g if all PMD threads are running on the same NUMA node.
> > * Refactor lib/dpif-netdev.c to multiple header files.
> > + * Add avx512 implementation of dpif which can process non recirculated
> > + packets. It supports partial HWOL, EMC, SMC and DPCLS lookups.
> > - ovs-ctl:
> > * New option '--no-record-hostname' to disable hostname configuration
> > in ovsdb on startup.
> > diff --git a/lib/automake.mk b/lib/automake.mk
> > index 3a33cdd5c..660cd07f0 100644
> > --- a/lib/automake.mk
> > +++ b/lib/automake.mk
> > @@ -33,11 +33,13 @@ lib_libopenvswitchavx512_la_CFLAGS = \
> > -mavx512f \
> > -mavx512bw \
> > -mavx512dq \
> > + -mbmi \
> > -mbmi2 \
> > -fPIC \
> > $(AM_CFLAGS)
> > lib_libopenvswitchavx512_la_SOURCES = \
> > - lib/dpif-netdev-lookup-avx512-gather.c
> > + lib/dpif-netdev-lookup-avx512-gather.c \
> > + lib/dpif-netdev-avx512.c
> > lib_libopenvswitchavx512_la_LDFLAGS = \
> > -static
> > endif
> > @@ -114,6 +116,7 @@ lib_libopenvswitch_la_SOURCES = \
> > lib/dpif-netdev-private-dfc.c \
> > lib/dpif-netdev-private-dfc.h \
> > lib/dpif-netdev-private-dpcls.h \
> > + lib/dpif-netdev-private-dpif.h \
> > lib/dpif-netdev-private-flow.h \
> > lib/dpif-netdev-private-hwol.h \
> > lib/dpif-netdev-private-thread.h \
> > diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c
> > new file mode 100644
> > index 000000000..0e55b0be2
> > --- /dev/null
> > +++ b/lib/dpif-netdev-avx512.c
> > @@ -0,0 +1,327 @@
> > +/*
> > + * Copyright (c) 2021 Intel Corporation.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + * http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#ifdef __x86_64__
> > +/* Sparse cannot handle the AVX512 instructions. */
> > +#if !defined(__CHECKER__)
> > +
> > +#include <config.h>
> > +
> > +#include "dpif-netdev.h"
> > +#include "dpif-netdev-perf.h"
> > +
> > +#include "dpif-netdev-private.h"
> > +#include "dpif-netdev-private-dpcls.h"
> > +#include "dpif-netdev-private-flow.h"
> > +#include "dpif-netdev-private-thread.h"
> > +#include "dpif-netdev-private-hwol.h"
> > +
> > +#include "dp-packet.h"
> > +#include "netdev.h"
> > +
> > +#include "immintrin.h"
> > +
> > +/* Each AVX512 register (zmm register in assembly notation) can contain up to
> > + * 512 bits, which is equivalent to 8 uint64_t variables. This is the maximum
> > + * number of miniflow blocks that can be processed in a single pass of the
> > + * AVX512 code at a time.
> > + */
> > +#define NUM_U64_IN_ZMM_REG (8)
> > +
> > +/* Structure to contain per-packet metadata that must be attributed to the
> > + * dp netdev flow. This is unfortunate to have to track per packet, however
> > + * it's a bit awkward to maintain them in a performant way. This structure
> > + * helps to keep two variables on a single cache line per packet.
> > + */
> > +struct pkt_flow_meta {
> > + uint16_t bytes;
> > + uint16_t tcp_flags;
> > +};
> > +
> > +/* Structure of heap allocated memory for DPIF internals. */
> > +struct dpif_userdata {
> > + OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
> > + struct netdev_flow_key keys[NETDEV_MAX_BURST];
> > + OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
> > + struct netdev_flow_key *key_ptrs[NETDEV_MAX_BURST];
> > + OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
> > + struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
> > +};
> > +
> > +int32_t
> > +dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_packet_batch *packets,
> > + odp_port_t in_port)
> > +{
> > + /* Allocate DPIF userdata. */
> > + if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
> > + pmd->netdev_input_func_userdata =
> > + xmalloc_pagealign(sizeof(struct dpif_userdata));
> > + }
> > +
> > + struct dpif_userdata *ud = pmd->netdev_input_func_userdata;
> > + struct netdev_flow_key *keys = ud->keys;
> > + struct netdev_flow_key **key_ptrs = ud->key_ptrs;
> > + struct pkt_flow_meta *pkt_meta = ud->pkt_meta;
> > +
> > + /* The AVX512 DPIF implementation handles rules in a way that is optimized
> > + * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is
> > + * achieved by separating the rule arrays. Bitmasks are kept for each
> > + * packet, indicating if it matched in the HWOL/EMC/SMC array or DPCLS
> > + * array. Later the two arrays are merged by AVX-512 expand instructions.
> > + */
> > +
> > + /* Stores the computed output: a rule pointer for each packet. */
> > + /* Used initially for HWOL/EMC/SMC. */
> > + struct dpcls_rule *rules[NETDEV_MAX_BURST];
> > + /* Used for DPCLS. */
> > + struct dpcls_rule *dpcls_rules[NETDEV_MAX_BURST];
> > +
> > + uint32_t dpcls_key_idx = 0;
> > +
> > + for (uint32_t i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) {
> > + _mm512_storeu_si512(&rules[i], _mm512_setzero_si512());
> > + _mm512_storeu_si512(&dpcls_rules[i], _mm512_setzero_si512());
> > + }
> > +
> > + /* Prefetch each packet's metadata. */
> > + const size_t batch_size = dp_packet_batch_size(packets);
> > + for (int i = 0; i < batch_size; i++) {
> > + struct dp_packet *packet = packets->packets[i];
> > + OVS_PREFETCH(dp_packet_data(packet));
> > + pkt_metadata_prefetch_init(&packet->md);
> > + }
> > +
> > + /* Check if EMC or SMC are enabled. */
> > + struct dfc_cache *cache = &pmd->flow_cache;
> > + const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0;
> > + const uint32_t smc_enabled = pmd->ctx.smc_enable_db;
> > +
> > + uint32_t emc_hits = 0;
> > + uint32_t smc_hits = 0;
> > +
> > + /* A 1 bit in this mask indicates a hit, so no DPCLS lookup on the pkt. */
> > + uint32_t hwol_emc_smc_hitmask = 0;
> > + uint32_t smc_hitmask = 0;
> > +
> > + /* The below while loop is based on the 'iter' variable which has a number
> > + * of bits set representing packets that we want to process
> > + * (HWOL->MFEX->EMC->SMC). As each packet is processed, we clear (set to 0)
> > + * the bit representing that packet using '_blsr_u64()'. The
> > + * '__builtin_ctz()' will give us the correct index into the 'packets',
> > + * 'pkt_meta', 'keys' and 'rules' arrays.
> > + *
> > + * For one iteration of the while loop, here's some psuedocode as an
> > + * example where 'iter' is represented in binary:
> > + *
> > + * while (iter) { // iter = 1100
> > + * uint32_t i = __builtin_ctz(iter); // i = 2
> > + * iter = _blsr_u64(iter); // iter = 1000
> > + * // do all processing (HWOL->MFEX->EMC->SMC)
> > + * }
> > + */
> > + uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1;
> > + uint32_t iter = lookup_pkts_bitmask;
> > + while (iter) {
> > + uint32_t i = __builtin_ctz(iter);
> > + iter = _blsr_u64(iter);
> > +
> > + /* Get packet pointer from bitmask and packet md. */
> > + struct dp_packet *packet = packets->packets[i];
> > + pkt_metadata_init(&packet->md, in_port);
> > +
> > + struct dp_netdev_flow *f = NULL;
> > +
> > + /* Check for partial hardware offload mark. */
> > + uint32_t mark;
> > + if (dp_packet_has_flow_mark(packet, &mark)) {
> > + f = mark_to_flow_find(pmd, mark);
> > + if (f) {
> > + rules[i] = &f->cr;
> > + pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
> > + pkt_meta[i].bytes = dp_packet_size(packet);
> > + hwol_emc_smc_hitmask |= (1 << i);
> > + continue;
> > + }
> > + }
> > +
> > + /* Do miniflow extract into keys. */
> > + struct netdev_flow_key *key = &keys[i];
> > + miniflow_extract(packet, &key->mf);
> > +
> > + /* Cache TCP and byte values for all packets. */
> > + pkt_meta[i].bytes = dp_packet_size(packet);
> > + pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf);
> > +
> > + key->len = netdev_flow_key_size(miniflow_n_values(&key->mf));
> > + key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, &key->mf);
> > +
> > + if (emc_enabled) {
> > + f = emc_lookup(&cache->emc_cache, key);
> > +
> > + if (f) {
> > + rules[i] = &f->cr;
> > + emc_hits++;
> > + hwol_emc_smc_hitmask |= (1 << i);
> > + continue;
> > + }
> > + }
> > +
> > + if (smc_enabled && !f) {
> > + f = smc_lookup_single(pmd, packet, key);
> > + if (f) {
> > + rules[i] = &f->cr;
> > + smc_hits++;
> > + smc_hitmask |= (1 << i);
> > + continue;
> > + }
> > + }
> > +
> > + /* The flow pointer was not found in HWOL/EMC/SMC, so add it to the
> > + * dpcls input keys array for batch lookup later.
> > + */
> > + key_ptrs[dpcls_key_idx] = &keys[i];
> > + dpcls_key_idx++;
> > + }
> > +
> > + hwol_emc_smc_hitmask |= smc_hitmask;
> > +
> > + /* DPCLS handles any packets missed by HWOL/EMC/SMC. It operates on the
> > + * key_ptrs[] for input miniflows to match, storing results in the
> > + * dpcls_rules[] array.
> > + */
> > + if (dpcls_key_idx > 0) {
> > + struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
> > + if (OVS_UNLIKELY(!cls)) {
> > + return -1;
> > + }
> > + bool any_miss =
> > + !dpcls_lookup(cls, (const struct netdev_flow_key **) key_ptrs,
> > + dpcls_rules, dpcls_key_idx, NULL);
> > + if (OVS_UNLIKELY(any_miss)) {
> > + return -1;
> > + }
> > +
> > + /* Merge DPCLS rules and HWOL/EMC/SMC rules. */
> > + uint32_t dpcls_idx = 0;
> > + for (int i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) {
> > + /* Indexing here is somewhat complicated due to DPCLS output rule
> > + * load index depending on the hitmask of HWOL/EMC/SMC. More
> > + * packets from HWOL/EMC/SMC bitmask means less DPCLS rules are
> > + * used.
> > + */
> > + __m512i v_cache_rules = _mm512_loadu_si512(&rules[i]);
> > + __m512i v_merged_rules =
> > + _mm512_mask_expandloadu_epi64(v_cache_rules,
> > + ~hwol_emc_smc_hitmask,
> > + &dpcls_rules[dpcls_idx]);
> > + _mm512_storeu_si512(&rules[i], v_merged_rules);
> > +
> > + /* Update DPCLS load index and bitmask for HWOL/EMC/SMC hits.
> > + * There are NUM_U64_IN_ZMM_REG output pointers per register,
> > + * subtract the HWOL/EMC/SMC lanes equals the number of DPCLS rules
> > + * consumed.
> > + */
> > + uint32_t hitmask_FF = (hwol_emc_smc_hitmask & 0xFF);
> > + dpcls_idx += NUM_U64_IN_ZMM_REG - __builtin_popcountll(hitmask_FF);
> > + hwol_emc_smc_hitmask =
> > + (hwol_emc_smc_hitmask >> NUM_U64_IN_ZMM_REG);
> > + }
> > + }
> > +
> > + /* At this point we have a 1:1 pkt to rules mapping, so update EMC/SMC
> > + * if required.
> > + */
> > + /* Insert SMC and DPCLS hits into EMC. */
> > + /* Insert DPCLS hits into SMC. */
> > + if (emc_enabled) {
> > + uint32_t emc_insert_mask = smc_hitmask | ~hwol_emc_smc_hitmask;
> > + emc_insert_mask &= lookup_pkts_bitmask;
> > + emc_probabilistic_insert_batch(pmd, keys, &rules[0], emc_insert_mask);
> > + }
> > + if (smc_enabled) {
> > + uint32_t smc_insert_mask = ~hwol_emc_smc_hitmask;
> > + smc_insert_mask &= lookup_pkts_bitmask;
> > + smc_insert_batch(pmd, keys, &rules[0], smc_insert_mask);
> > + }
> > +
> > + /* At this point we don't return error anymore, so commit stats here. */
> > + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV, batch_size);
> > + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, emc_hits);
> > + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, smc_hits);
> > + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
> > + dpcls_key_idx);
> > + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_LOOKUP,
> > + dpcls_key_idx);
> > +
> > + /* Initialize the "Action Batch" for each flow handled below. */
> > + struct dp_packet_batch action_batch;
> > + action_batch.trunc = 0;
> > +
> > + while (lookup_pkts_bitmask) {
> > + uint32_t rule_pkt_idx = __builtin_ctz(lookup_pkts_bitmask);
> > + uint64_t needle = (uintptr_t) rules[rule_pkt_idx];
> > +
> > + /* Parallel compare NUM_U64_IN_ZMM_REG flow* 's to the needle, create a
> > + * bitmask.
> > + */
> > + uint32_t batch_bitmask = 0;
> > + for (uint32_t j = 0; j < NETDEV_MAX_BURST; j += NUM_U64_IN_ZMM_REG) {
> > + /* Pre-calculate store addr. */
> > + uint32_t num_pkts_in_batch = __builtin_popcountll(batch_bitmask);
> > + void *store_addr = &action_batch.packets[num_pkts_in_batch];
> > +
> > + /* Search for identical flow* in burst, update bitmask. */
> > + __m512i v_needle = _mm512_set1_epi64(needle);
> > + __m512i v_hay = _mm512_loadu_si512(&rules[j]);
> > + __mmask8 k_cmp_bits = _mm512_cmpeq_epi64_mask(v_needle, v_hay);
> > + uint32_t cmp_bits = k_cmp_bits;
> > + batch_bitmask |= cmp_bits << j;
> > +
> > + /* Compress and store the batched packets. */
> > + struct dp_packet **packets_ptrs = &packets->packets[j];
> > + __m512i v_pkt_ptrs = _mm512_loadu_si512(packets_ptrs);
> > + _mm512_mask_compressstoreu_epi64(store_addr, cmp_bits, v_pkt_ptrs);
> > + }
> > +
> > + /* Strip all packets in this batch from the lookup_pkts_bitmask. */
> > + lookup_pkts_bitmask &= (~batch_bitmask);
> > + action_batch.count = __builtin_popcountll(batch_bitmask);
> > +
> > + /* Loop over all packets in this batch, to gather the byte and tcp_flag
> > + * values, and pass them to the execute function. It would be nice to
> > + * optimize this away, however it is not easy to refactor in dpif.
> > + */
> > + uint32_t bytes = 0;
> > + uint16_t tcp_flags = 0;
> > + uint32_t bitmask_iter = batch_bitmask;
> > + for (int i = 0; i < action_batch.count; i++) {
> > + uint32_t idx = __builtin_ctzll(bitmask_iter);
> > + bitmask_iter = _blsr_u64(bitmask_iter);
> > +
> > + bytes += pkt_meta[idx].bytes;
> > + tcp_flags |= pkt_meta[idx].tcp_flags;
> > + }
> > +
> > + dp_netdev_batch_execute(pmd, &action_batch, rules[rule_pkt_idx],
> > + bytes, tcp_flags);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +#endif
> > +#endif
> > diff --git a/lib/dpif-netdev-private-dfc.h b/lib/dpif-netdev-private-dfc.h
> > index 6a672d1b3..d5d4da7ea 100644
> > --- a/lib/dpif-netdev-private-dfc.h
> > +++ b/lib/dpif-netdev-private-dfc.h
> > @@ -81,6 +81,14 @@ extern "C" {
> > #define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / \
> > DEFAULT_EM_FLOW_INSERT_INV_PROB)
> >
> > +/* Forward declaration for SMC function prototype that requires access to
> > + * 'struct dp_netdev_pmd_thread'. */
> > +struct dp_netdev_pmd_thread;
> > +
> > +/* Forward declaration for EMC and SMC batch insert function prototypes that
> > + * require access to 'struct dpcls_rule'. */
> > +struct dpcls_rule;
> > +
> > struct emc_entry {
> > struct dp_netdev_flow *flow;
> > struct netdev_flow_key key; /* key.hash used for emc hash value. */
> > @@ -168,6 +176,23 @@ emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
> > return NULL;
> > }
> >
> > +/* Insert a batch of keys/flows into the EMC and SMC caches. */
> > +void
> > +emc_probabilistic_insert_batch(struct dp_netdev_pmd_thread *pmd,
> > + const struct netdev_flow_key *keys,
> > + struct dpcls_rule **rules,
> > + uint32_t emc_insert_mask);
> > +
> > +void
> > +smc_insert_batch(struct dp_netdev_pmd_thread *pmd,
> > + const struct netdev_flow_key *keys,
> > + struct dpcls_rule **rules,
> > + uint32_t smc_insert_mask);
> > +
> > +struct dp_netdev_flow *
> > +smc_lookup_single(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_packet *packet,
> > + struct netdev_flow_key *key);
> >
> > #ifdef __cplusplus
> > }
> > diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h
> > new file mode 100644
> > index 000000000..2fd7cc400
> > --- /dev/null
> > +++ b/lib/dpif-netdev-private-dpif.h
> > @@ -0,0 +1,32 @@
> > +/*
> > + * Copyright (c) 2021 Intel Corporation.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + * http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#ifndef DPIF_NETDEV_PRIVATE_DPIF_H
> > +#define DPIF_NETDEV_PRIVATE_DPIF_H 1
> > +
> > +#include "openvswitch/types.h"
> > +
> > +/* Forward declarations to avoid including files. */
> > +struct dp_netdev_pmd_thread;
> > +struct dp_packet_batch;
> > +
> > +/* Available implementations for dpif work. */
> > +int32_t
> > +dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_packet_batch *packets,
> > + odp_port_t in_port);
> > +
> > +#endif /* netdev-private.h */
> > diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h
> > index 0d674ab83..17356d5e2 100644
> > --- a/lib/dpif-netdev-private-thread.h
> > +++ b/lib/dpif-netdev-private-thread.h
> > @@ -45,14 +45,19 @@ struct dp_netdev_pmd_thread_ctx {
> > struct dp_netdev_rxq *last_rxq;
> > /* EMC insertion probability context for the current processing cycle. */
> > uint32_t emc_insert_min;
> > + /* Enable the SMC cache from ovsdb config. */
> > + bool smc_enable_db;
> > };
> >
> > /* Forward declaration for typedef. */
> > struct dp_netdev_pmd_thread;
> >
> > -typedef void (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
> > - struct dp_packet_batch *packets,
> > - odp_port_t port_no);
> > +/* Typedef for DPIF functions.
> > + * Returns a bitmask of packets to handle, possibly including upcall/misses.
> > + */
> > +typedef int32_t (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_packet_batch *packets,
> > + odp_port_t port_no);
> >
> > /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
> > * the performance overhead of interrupt processing. Therefore netdev can
> > diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
> > index d7b6fd7ec..0315b5bf6 100644
> > --- a/lib/dpif-netdev-private.h
> > +++ b/lib/dpif-netdev-private.h
> > @@ -31,4 +31,29 @@
> > #include "dpif-netdev-private-dfc.h"
> > #include "dpif-netdev-private-thread.h"
> >
> > +/* Allow other implementations to lookup the DPCLS instances. */
> > +struct dpcls *
> > +dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
> > + odp_port_t in_port);
> > +
> > +/* Allow other implementations to call dpcls_lookup() for subtable search. */
> > +bool
> > +dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
> > + struct dpcls_rule **rules, const size_t cnt,
> > + int *num_lookups_p);
> > +
> > +/* Allow other implementations to execute actions on a batch. */
> > +void
> > +dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_packet_batch *packets,
> > + struct dpcls_rule *rule,
> > + uint32_t bytes,
> > + uint16_t tcp_flags);
> > +
> > +/* Available implementations for dpif work. */
> > +int32_t
> > +dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_packet_batch *packets,
> > + odp_port_t in_port);
> > +
> > #endif /* netdev-private.h */
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index e6486417e..1f15af882 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -183,10 +183,6 @@ static uint32_t dpcls_subtable_lookup_reprobe(struct dpcls *cls);
> > static void dpcls_insert(struct dpcls *, struct dpcls_rule *,
> > const struct netdev_flow_key *mask);
> > static void dpcls_remove(struct dpcls *, struct dpcls_rule *);
> > -static bool dpcls_lookup(struct dpcls *cls,
> > - const struct netdev_flow_key *keys[],
> > - struct dpcls_rule **rules, size_t cnt,
> > - int *num_lookups_p);
> >
> > /* Set of supported meter flags */
> > #define DP_SUPPORTED_METER_FLAGS_MASK \
> > @@ -483,7 +479,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
> > const struct flow *flow,
> > const struct nlattr *actions,
> > size_t actions_len);
> > -static void dp_netdev_input(struct dp_netdev_pmd_thread *,
> > +static int32_t dp_netdev_input(struct dp_netdev_pmd_thread *,
> > struct dp_packet_batch *, odp_port_t port_no);
> > static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
> > struct dp_packet_batch *);
> > @@ -555,7 +551,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
> > bool purge);
> > static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
> > struct tx_port *tx);
> > -static inline struct dpcls *
> > +inline struct dpcls *
> > dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
> > odp_port_t in_port);
> >
> > @@ -1920,7 +1916,7 @@ void dp_netdev_flow_unref(struct dp_netdev_flow *flow)
> > }
> > }
> >
> > -static inline struct dpcls *
> > +inline struct dpcls *
> > dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
> > odp_port_t in_port)
> > {
> > @@ -2714,13 +2710,46 @@ smc_insert(struct dp_netdev_pmd_thread *pmd,
> > bucket->flow_idx[i] = index;
> > }
> >
> > +inline void
> > +emc_probabilistic_insert_batch(struct dp_netdev_pmd_thread *pmd,
> > + const struct netdev_flow_key *keys,
> > + struct dpcls_rule **rules,
> > + uint32_t emc_insert_mask)
> > +{
> > + while (emc_insert_mask) {
> > + uint32_t i = __builtin_ctz(emc_insert_mask);
>
> I got an error on Windows:
>
> [...]
> libtool: compile: build-aux/cccl -DHAVE_CONFIG_H -I. -I ./include/windows -I ./datapath-
> windows/include -Ic:/PTHREADS-BUILT//include -O2 -I ./include -I ./include -I ./lib -I ./lib -
> IC:/OpenSSL-Win64/include -Wstrict-prototypes -Wall -Wextra -Wno-sign-compare -Wpointer-arith -Wformat
> -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-cast -Wcast-align -Wstrict-
> prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-initializers -Wthread-safety -
> fno-strict-aliasing -Wswitch-bool -Wlogical-not-parentheses -Wsizeof-array-argument -Wbool-compare -
> Wshift-negative-value -Wduplicated-cond -Qunused-arguments -Wshadow -Wmultistatement-macros -Wcast-
> align=strict -Wno-null-pointer-arithmetic -Warray-bounds-pointer-arithmetic -g -DHAVE_AVX512F -c
> lib/dpif-netdev.c
> libtool: compile: mv -f "dpif-netdev-lookup-autovalidator.obj" "lib/dpif-netdev-lookup-
> autovalidator.obj"
> c:\PTHREADS-BUILT\include\_ptw32.h(120): warning C4005: 'HAVE_STRUCT_TIMESPEC': macro redefinition
> c:\openvswitch_compile\config.h(207): note: see previous definition of 'HAVE_STRUCT_TIMESPEC'
> c:\openvswitch_compile\lib\ovs-rcu.h(215): warning C4311: 'type cast': pointer truncation from 'void
> *' to 'long'
> libtool: compile: mv -f "dpif-netdev-lookup-generic.obj" "lib/dpif-netdev-lookup-generic.obj"
> dpif-netdev.c
> \
> source='lib/dpif-netdev-private-dfc.c' object='lib/dpif-netdev-private-dfc.lo' libtool=yes \
> DEPDIR=.deps depmode=none /bin/sh ./build-aux/depcomp \
> /bin/sh ./libtool --tag=CC --mode=compile build-aux/cccl -DHAVE_CONFIG_H -I. -I
> ./include/windows -I ./datapath-windows/include -Ic:/PTHREADS-BUILT//include -O2 -I ./include -I
> ./include -I ./lib -I ./lib -IC:/OpenSSL-Win64/include -Wstrict-prototypes -Wall -Wextra -Wno-sign-
> compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-
> cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-
> initializers -Wthread-safety -fno-strict-aliasing -Wswitch-bool -Wlogical-not-parentheses -Wsizeof-
> array-argument -Wbool-compare -Wshift-negative-value -Wduplicated-cond -Qunused-arguments -Wshadow -
> Wmultistatement-macros -Wcast-align=strict -Wno-null-pointer-arithmetic -Warray-bounds-pointer-
> arithmetic -g -DHAVE_AVX512F -c -o lib/dpif-netdev-private-dfc.lo lib/dpif-netdev-private-dfc.c
> c:\PTHREADS-BUILT\include\_ptw32.h(120): warning C4005: 'HAVE_STRUCT_TIMESPEC': macro redefinition
> c:\openvswitch_compile\config.h(207): note: see previous definition of 'HAVE_STRUCT_TIMESPEC'
> c:\openvswitch_compile\lib\ovs-rcu.h(215): warning C4311: 'type cast': pointer truncation from 'void
> *' to 'long'
> c:\openvswitch_compile\config.h(207): warning C4005: 'HAVE_STRUCT_TIMESPEC': macro redefinition
> c:\PTHREADS-BUILT\include\_ptw32.h(120): note: see previous definition of 'HAVE_STRUCT_TIMESPEC'
> lib/dpif-netdev.c(2826): error C4013: '__builtin_ctz' undefined; assuming extern returning int
> lib/dpif-netdev.c(2919): warning C4311: 'type cast': pointer truncation from 'const char *const ' to
> 'long'
> \
> source='lib/dpif-netdev-private-dpif.c' object='lib/dpif-netdev-private-dpif.lo' libtool=yes \
> DEPDIR=.deps depmode=none /bin/sh ./build-aux/depcomp \
> /bin/sh ./libtool --tag=CC --mode=compile build-aux/cccl -DHAVE_CONFIG_H -I. -I
> ./include/windows -I ./datapath-windows/include -Ic:/PTHREADS-BUILT//include -O2 -I ./include -I
> ./include -I ./lib -I ./lib -IC:/OpenSSL-Win64/include -Wstrict-prototypes -Wall -Wextra -Wno-sign-
> compare -Wpointer-arith -Wformat -Wformat-security -Wswitch-enum -Wunused-parameter -Wbad-function-
> cast -Wcast-align -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-field-
> initializers -Wthread-safety -fno-strict-aliasing -Wswitch-bool -Wlogical-not-parentheses -Wsizeof-
> array-argument -Wbool-compare -Wshift-negative-value -Wduplicated-cond -Qunused-arguments -Wshadow -
> Wmultistatement-macros -Wcast-align=strict -Wno-null-pointer-arithmetic -Warray-bounds-pointer-
> arithmetic -g -DHAVE_AVX512F -c -o lib/dpif-netdev-private-dpif.lo lib/dpif-netdev-private-dpif.c
> make[2]: *** [lib/dpif-netdev.lo] Error 1
> make[2]: *** Waiting for unfinished jobs....
>
Thanks for testing and finding this. We don't have the Windows machines set up to test this.
We need to use OVS's raw_ctz(). This will wrap the uses of __builtin_ctz and __builtin_ctzll. This should fix the above error. I'll fix this.
> Thanks,
> fbl
>
>
> > + emc_insert_mask &= emc_insert_mask - 1;
> > + /* Get the require parameters for EMC/SMC from the rule */
> > + struct dp_netdev_flow *flow = dp_netdev_flow_cast(rules[i]);
> > + /* Insert the key into EMC/SMC. */
> > + emc_probabilistic_insert(pmd, &keys[i], flow);
> > + }
> > +}
> > +
> > +inline void
> > +smc_insert_batch(struct dp_netdev_pmd_thread *pmd,
> > + const struct netdev_flow_key *keys,
> > + struct dpcls_rule **rules,
> > + uint32_t smc_insert_mask)
> > +{
> > + while (smc_insert_mask) {
> > + uint32_t i = __builtin_ctz(smc_insert_mask);
> > + smc_insert_mask &= smc_insert_mask - 1;
> > + /* Get the require parameters for EMC/SMC from the rule */
> > + struct dp_netdev_flow *flow = dp_netdev_flow_cast(rules[i]);
> > + uint32_t hash = dp_netdev_flow_hash(&flow->ufid);
> > + /* Insert the key into EMC/SMC. */
> > + smc_insert(pmd, &keys[i], hash);
> > + }
> > +}
> > +
> > static struct dp_netdev_flow *
> > dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
> > const struct netdev_flow_key *key,
> > int *lookup_num_p)
> > {
> > struct dpcls *cls;
> > - struct dpcls_rule *rule;
> > + struct dpcls_rule *rule = NULL;
> > odp_port_t in_port = u32_to_odp(MINIFLOW_GET_U32(&key->mf,
> > in_port.odp_port));
> > struct dp_netdev_flow *netdev_flow = NULL;
> > @@ -4233,7 +4262,10 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
> > }
> >
> > /* Process packet batch. */
> > - pmd->netdev_input_func(pmd, &batch, port_no);
> > + int32_t ret = pmd->netdev_input_func(pmd, &batch, port_no);
> > + if (ret) {
> > + dp_netdev_input(pmd, &batch, port_no);
> > + }
> >
> > /* Assign processing cycles to rx queue. */
> > cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
> > @@ -5251,6 +5283,8 @@ dpif_netdev_run(struct dpif *dpif)
> > non_pmd->ctx.emc_insert_min = 0;
> > }
> >
> > + non_pmd->ctx.smc_enable_db = dp->smc_enable_db;
> > +
> > for (i = 0; i < port->n_rxq; i++) {
> >
> > if (!netdev_rxq_enabled(port->rxqs[i].rx)) {
> > @@ -5522,6 +5556,8 @@ reload:
> > pmd->ctx.emc_insert_min = 0;
> > }
> >
> > + pmd->ctx.smc_enable_db = pmd->dp->smc_enable_db;
> > +
> > process_packets =
> > dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
> > poll_list[i].port_no);
> > @@ -6415,6 +6451,24 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
> > actions->actions, actions->size);
> > }
> >
> > +void
> > +dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_packet_batch *packets,
> > + struct dpcls_rule *rule,
> > + uint32_t bytes,
> > + uint16_t tcp_flags)
> > +{
> > + /* Gets action* from the rule. */
> > + struct dp_netdev_flow *flow = dp_netdev_flow_cast(rule);
> > + struct dp_netdev_actions *actions = dp_netdev_flow_get_actions(flow);
> > +
> > + dp_netdev_flow_used(flow, dp_packet_batch_size(packets), bytes,
> > + tcp_flags, pmd->ctx.now / 1000);
> > + const uint32_t steal = 1;
> > + dp_netdev_execute_actions(pmd, packets, steal, &flow->flow,
> > + actions->actions, actions->size);
> > +}
> > +
> > static inline void
> > dp_netdev_queue_batches(struct dp_packet *pkt,
> > struct dp_netdev_flow *flow, uint16_t tcp_flags,
> > @@ -6519,6 +6573,30 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
> > pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit);
> > }
> >
> > +struct dp_netdev_flow *
> > +smc_lookup_single(struct dp_netdev_pmd_thread *pmd,
> > + struct dp_packet *packet,
> > + struct netdev_flow_key *key)
> > +{
> > + const struct cmap_node *flow_node = smc_entry_get(pmd, key->hash);
> > +
> > + if (OVS_LIKELY(flow_node != NULL)) {
> > + struct dp_netdev_flow *flow = NULL;
> > +
> > + CMAP_NODE_FOR_EACH (flow, node, flow_node) {
> > + /* Since we dont have per-port megaflow to check the port
> > + * number, we need to verify that the input ports match. */
> > + if (OVS_LIKELY(dpcls_rule_matches_key(&flow->cr, key) &&
> > + flow->flow.in_port.odp_port == packet->md.in_port.odp_port)) {
> > +
> > + return (void *) flow;
> > + }
> > + }
> > + }
> > +
> > + return NULL;
> > +}
> > +
> > /* Try to process all ('cnt') the 'packets' using only the datapath flow cache
> > * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the
> > * miniflow is copied into 'keys' and the packet pointer is moved at the
> > @@ -6924,12 +7002,13 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
> > }
> > }
> >
> > -static void
> > +static int32_t
> > dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
> > struct dp_packet_batch *packets,
> > odp_port_t port_no)
> > {
> > dp_netdev_input__(pmd, packets, false, port_no);
> > + return 0;
> > }
> >
> > static void
> > @@ -8369,7 +8448,7 @@ dpcls_flow_key_gen_masks(const struct netdev_flow_key *tbl,
> >
> > /* Returns true if 'target' satisfies 'key' in 'mask', that is, if each 1-bit
> > * in 'mask' the values in 'key' and 'target' are the same. */
> > -bool
> > +inline bool ALWAYS_INLINE
> > dpcls_rule_matches_key(const struct dpcls_rule *rule,
> > const struct netdev_flow_key *target)
> > {
> > @@ -8395,7 +8474,7 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule,
> > * priorities, instead returning any rule which matches the flow.
> > *
> > * Returns true if all miniflows found a corresponding rule. */
> > -static bool
> > +bool
> > dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
> > struct dpcls_rule **rules, const size_t cnt,
> > int *num_lookups_p)
> > --
> > 2.32.0
> >
> > _______________________________________________
> > dev mailing list
> > dev at openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
> --
> fbl
More information about the dev
mailing list