[ovs-dev] [v1 v1 4/6] dpif-netdev: Add avx512 miniflow extract for traffic ip/udp.

Kumar Amber kumar.amber at intel.com
Wed Apr 28 07:55:52 UTC 2021


This patch introduces avx512 optimized function
pointer for IP/UDP traffic type and supporting
functions in dpif-netdev-extract-avx512.

Signed-off-by: Harry van Haaren <harry.van.haaren at intel.com>
Co-authored-by: Kumar Amber <kumar.amber at intel.com>
Signed-off-by: Kumar Amber <kumar.amber at intel.com>
---
 lib/automake.mk                   |   1 +
 lib/dpdk.c                        |   1 +
 lib/dpif-netdev-extract-avx512.c  | 218 ++++++++++++++++++++++++++++++
 lib/dpif-netdev-private-extract.c |   5 +
 lib/dpif-netdev-private-extract.h |  11 ++
 5 files changed, 236 insertions(+)
 create mode 100644 lib/dpif-netdev-extract-avx512.c

diff --git a/lib/automake.mk b/lib/automake.mk
index 783730fc7..8e2d84e29 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -113,6 +113,7 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/dpif-netdev-lookup-generic.c \
 	lib/dpif-netdev.c \
 	lib/dpif-netdev.h \
+	lib/dpif-netdev-extract-avx512.c \
 	lib/dpif-netdev-extract-study.c \
 	lib/dpif-netdev-private-dfc.h \
 	lib/dpif-netdev-private-dpcls.h \
diff --git a/lib/dpdk.c b/lib/dpdk.c
index a9494a40f..e0c76abe3 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -657,6 +657,7 @@ dpdk_get_cpu_has_isa(const char *arch, const char *feature)
     CHECK_CPU_FEATURE(feature, "avx512f", RTE_CPUFLAG_AVX512F);
     CHECK_CPU_FEATURE(feature, "avx512vpopcntdq", RTE_CPUFLAG_AVX512VPOPCNTDQ);
     CHECK_CPU_FEATURE(feature, "bmi2", RTE_CPUFLAG_BMI2);
+    CHECK_CPU_FEATURE(feature, "avx512bw", RTE_CPUFLAG_AVX512BW);
 #endif
 
     VLOG_WARN("Unknown CPU arch,feature: %s,%s. Returning not supported.\n",
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
new file mode 100644
index 000000000..169775f4b
--- /dev/null
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2021 Intel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <config.h>
+#include <errno.h>
+#include <immintrin.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "dpdk.h"
+#include "dpif-netdev-private-dpcls.h"
+#include "dpif-netdev-private-extract.h"
+#include "flow.h"
+
+/* This file contains optimized implementations of miniflow_extract()
+ * for specific common traffic patterns. The optimizations allow for
+ * quick probing of a specific packet type, and if a match with a specific
+ * type is found, a shuffle like proceedure builds up the required miniflow.
+ *
+ * The functionality here can be easily auto-validated and tested against the
+ * scalar miniflow_extract() function. As such, manual review of the code by
+ * the community (although welcome) is not required. Confidence in the
+ * correctness of the code can be had from the autovalidation.
+ */
+
+/* Generator for EtherType masks and values. */
+#define PATTERN_ETHERTYPE_GEN(type_b0, type_b1) \
+  0, 0, 0, 0, 0, 0, /* Ether MAC DST */                                 \
+  0, 0, 0, 0, 0, 0, /* Ether MAC SRC */                                 \
+  type_b0, type_b1, /* EtherType */
+
+#define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF)
+#define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00)
+
+/* Generator for checking IPv4 ver, ihl, and proto */
+#define PATTERN_IPV4_GEN(VER_IHL, FLAG_OFF_B0, FLAG_OFF_B1, PROTO) \
+  VER_IHL, /* Version and IHL */                                        \
+  0, 0, 0, /* DSCP, ECN, Total Lenght */                                \
+  0, 0, /* Identification */                                            \
+  /* Flags/Fragment offset: don't match MoreFrag (MF) or FragOffset */  \
+  FLAG_OFF_B0, FLAG_OFF_B1,                                             \
+  0, /* TTL */                                                          \
+  PROTO, /* Protocol */                                                 \
+  0, 0, /* Header checksum */                                           \
+  0, 0, 0, 0, /* Src IP */                                              \
+  0, 0, 0, 0, /* Dst IP */
+
+#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xFE, 0xFF, 0xFF)
+#define PATTERN_IPV4_UDP PATTERN_IPV4_GEN(0x45, 0, 0, 0x11)
+
+#define NU 0
+#define PATTERN_IPV4_UDP_SHUFFLE \
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, NU, NU, /* Ether */ \
+  26, 27, 28, 29, 30, 31, 32, 33, NU, NU, NU, NU, 20, 15, 22, 23, /* IPv4 */  \
+  34, 35, 36, 37, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* UDP */
+
+/* Masks for Ether()/IP()/UDP() traffic */
+static const uint8_t eth_ip_udp_mask[64] = {
+    PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK
+};
+static const uint8_t eth_ip_udp_values[64] = {
+    PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_UDP
+};
+static const uint8_t eth_ip_udp_shuf[64] = {
+    PATTERN_IPV4_UDP_SHUFFLE
+};
+
+static inline __m512i
+__attribute__((target("avx512bw")))
+_mm512_maskz_permutex2var_epi8_skx(__mmask64 k_mask,
+                                   __m512i v_data_0,
+                                   __m512i v_shuf_idxs,
+                                   __m512i v_data_1)
+{
+    /* Manipulate shuffle indexes for u16 size. */
+    __mmask64 k_mask_odd_lanes = 0xAAAAAAAAAAAAAAAA;
+    /* clear away ODD lane bytes. Cannot be done above due to no u8 shift */
+    __m512i v_shuf_idx_evn = _mm512_mask_blend_epi8(k_mask_odd_lanes,
+                v_shuf_idxs, _mm512_setzero_si512());
+    v_shuf_idx_evn = _mm512_srli_epi16(v_shuf_idx_evn, 1);
+
+    __m512i v_shuf_idx_odd = _mm512_srli_epi16(v_shuf_idxs, 9);
+
+    /* Shuffle each half at 16-bit width */
+    __m512i v_shuf1 = _mm512_permutex2var_epi16(v_data_0, v_shuf_idx_evn,
+                                                v_data_1);
+    __m512i v_shuf2 = _mm512_permutex2var_epi16(v_data_0, v_shuf_idx_odd,
+                                                v_data_1);
+
+    /* Find if the shuffle index was odd, via mask and compare */
+    uint16_t index_odd_mask = 0x1;
+    const __m512i v_index_mask_u16 = _mm512_set1_epi16(index_odd_mask);
+
+    /* EVEN lanes, find if u8 index was odd,  result as u16 bitmask */
+    __m512i v_idx_even_masked = _mm512_and_si512(v_shuf_idxs,
+                                                 v_index_mask_u16);
+    __mmask32 evn_rotate_mask = _mm512_cmpeq_epi16_mask(v_idx_even_masked,
+                                                        v_index_mask_u16);
+
+    /* ODD lanes, find if u8 index was odd, result as u16 bitmask */
+    __m512i v_shuf_idx_srli8 = _mm512_srli_epi16(v_shuf_idxs, 8);
+    __m512i v_idx_odd_masked = _mm512_and_si512(v_shuf_idx_srli8,
+                                                v_index_mask_u16);
+    __mmask32 odd_rotate_mask = _mm512_cmpeq_epi16_mask(v_idx_odd_masked,
+                                                        v_index_mask_u16);
+    odd_rotate_mask = ~odd_rotate_mask;
+
+    /* Rotate and blend results from each index */
+    __m512i v_shuf_res_evn = _mm512_mask_srli_epi16(v_shuf1, evn_rotate_mask,
+                                                    v_shuf1, 8);
+    __m512i v_shuf_res_odd = _mm512_mask_slli_epi16(v_shuf2, odd_rotate_mask,
+                                                    v_shuf2, 8);
+
+    /* If shuffle index was odd, blend shifted version */
+    __m512i v_shuf_result = _mm512_mask_blend_epi8(k_mask_odd_lanes,
+                                               v_shuf_res_evn, v_shuf_res_odd);
+
+    __m512i v_zeros = _mm512_setzero_si512();
+    __m512i v_result_kmskd = _mm512_mask_blend_epi8(k_mask, v_zeros,
+                                                    v_shuf_result);
+
+    return v_result_kmskd;
+}
+
+static inline void
+__attribute__((target("avx512bw")))
+avx512_ipv4_udp_store(const uint8_t *pkt, struct miniflow *mf,
+                          uint32_t in_port)
+{
+    int64_t u0b = 0x18a0000000000000;
+    int64_t u1b = 0x0000000000040401;
+    __m128i v_bits = {u0b, u1b};
+
+    /* Store mf Bits */
+    uint64_t *bits = (void *)&mf->map.bits[0];
+    uint64_t *blocks = miniflow_values(mf);
+    _mm_storeu_si128((void *) bits, v_bits);
+
+    /* Load packet and shuffle */
+    __m512i v_pkt0 = _mm512_loadu_si512(&pkt[0]);
+    __m512i v_eth_ip_udp_shuf = _mm512_loadu_si512(eth_ip_udp_shuf);
+
+    /* Shuffle pkt and store blocks */
+    __mmask64 k_shufzero = 0b0000111111110000111111110011111111111111;
+    __m512i v_zeros = _mm512_setzero_si512();
+    __m512i v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shufzero,
+                                        v_pkt0, v_eth_ip_udp_shuf, v_zeros);
+
+    _mm512_storeu_si512(&blocks[2], v_blk0);
+
+    uint64_t inp = ((uint64_t) in_port) << 32;
+    blocks[0] = inp;
+}
+
+static inline uint32_t
+__attribute__((target("avx512bw")))
+avx512_ipv4_udp_probe(const uint8_t *pkt, uint32_t len)
+{
+    /* Packet data is masked to known IPv4/UDP parse length. */
+    uint64_t klen = UINT64_MAX;
+    if (len < 64) {
+        klen = (1ULL << len) - 1;
+    }
+
+    __m512i v_pkt0 = _mm512_maskz_loadu_epi8(klen, &pkt[0]);
+    __m512i v_eth_ip_udp_mask = _mm512_loadu_si512(eth_ip_udp_mask);
+    __m512i v_eth_ip_udp_vals = _mm512_loadu_si512(eth_ip_udp_values);
+    __m512i v_pkt0_masked = _mm512_and_si512(v_pkt0, v_eth_ip_udp_mask);
+    __mmask64 k_cmp = _mm512_cmpeq_epi8_mask(v_pkt0_masked, v_eth_ip_udp_vals);
+
+    return (k_cmp == -1);
+}
+
+uint32_t
+__attribute__((target("avx512bw")))
+mfex_avx512_ipv4_udp(struct dp_packet_batch *packets,
+                     struct netdev_flow_key *keys,
+                     uint32_t keys_size OVS_UNUSED, odp_port_t in_port,
+                     void *pmd_handle OVS_UNUSED)
+{
+    uint32_t hitmask = 0;
+    struct dp_packet *packet;
+    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
+        const uint32_t size = dp_packet_size(packet);
+        const uint8_t *pkt = dp_packet_data(packet);
+        uint32_t match = avx512_ipv4_udp_probe(pkt, size);
+        if (match) {
+            avx512_ipv4_udp_store(pkt, &keys[i].mf, in_port);
+            hitmask |= 1 << i;
+        }
+    }
+    return hitmask;
+}
+
+int32_t
+mfex_avx512_probe(void)
+{
+    int avx512f_available = dpdk_get_cpu_has_isa("x86_64", "avx512f");
+    int bmi2_available = dpdk_get_cpu_has_isa("x86_64", "bmi2");
+    int avx512bw_available = dpdk_get_cpu_has_isa("x86_64", "avx512bw");
+    if (!avx512f_available || !avx512bw_available || !bmi2_available) {
+        return -ENOTSUP;
+    }
+
+    return 0;
+}
diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
index 76c24c2f8..060c1939a 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -47,6 +47,11 @@ static struct dpif_miniflow_extract_impl mfex_impls[] = {
         .extract_func = mfex_study_traffic,
         .name = "study",
     },
+    {
+        .probe = mfex_avx512_probe,
+        .extract_func = mfex_avx512_ipv4_udp,
+        .name = "avx512_ip_udp",
+    },
 };
 
 BUILD_ASSERT_DECL(MFEX_IMPLS_MAX_SIZE > ARRAY_SIZE(mfex_impls));
diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
index 3ada413bb..e7b45c2b1 100644
--- a/lib/dpif-netdev-private-extract.h
+++ b/lib/dpif-netdev-private-extract.h
@@ -118,4 +118,15 @@ mfex_study_traffic(struct dp_packet_batch *packets,
                    uint32_t keys_size, odp_port_t in_port,
                    void *pmd_handle);
 
+/* Probe function to detect CPU ISA for SKX. */
+int32_t
+mfex_avx512_probe(void);
+
+/* Traffic specific AVX512 Eth/Ipv4/Udp traffic type for SKX. */
+uint32_t
+mfex_avx512_ipv4_udp(struct dp_packet_batch *packets,
+                         struct netdev_flow_key *keys,
+                         uint32_t keys_size, odp_port_t in_port,
+                         void *pmd_handle);
+
 #endif /* DPIF_NETDEV_AVX512_EXTRACT */
-- 
2.25.1



More information about the dev mailing list