[ovs-dev] [v4 11/12] dpif-netdev/mfex: add more AVX512 traffic profiles

Kumar Amber kumar.amber at intel.com
Thu Jun 17 16:27:53 UTC 2021


From: Harry van Haaren <harry.van.haaren at intel.com>

This commit adds 3 new traffic profile implementations to the
existing avx512 miniflow extract infrastructure. The profiles added are:
- Ether()/IP()/TCP()
- Ether()/Dot1Q()/IP()/UDP()
- Ether()/Dot1Q()/IP()/TCP()

The design of the avx512 code here is for scalability to add more
traffic profiles, as well as enabling CPU ISA. Note that an implementation
is primarily adding static const data, which the compiler then specializes
away when the profile specific function is declared below.

As a result, the code is relatively maintainable, and scalable for new
traffic profiles as well as new ISA, and does not lower performance
compared with manually written code for each profile/ISA.

Note that confidence in the correctness of each implementation is
achieved through autovalidation, unit tests with known packets, and
fuzz tested packets.

Signed-off-by: Harry van Haaren <harry.van.haaren at intel.com>

---

Hi Readers,

If you have a traffic profile you'd like to see accelerated using
avx512 code, please send me an email and we can collaborate on adding
support for it!

Regards, -Harry
---
 lib/dpif-netdev-extract-avx512.c  | 155 ++++++++++++++++++++++++++++++
 lib/dpif-netdev-private-extract.c |  31 ++++++
 lib/dpif-netdev-private-extract.h |   4 +
 3 files changed, 190 insertions(+)

diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 1145ac8a9..0e0f6e295 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -117,6 +117,13 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
 
 #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF)
 #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00)
+#define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00)
+
+/* VLAN (Dot1Q) patterns and masks. */
+#define PATTERN_DT1Q_MASK                                               \
+  0x00, 0x00, 0xFF, 0xFF,
+#define PATTERN_DT1Q_IPV4                                               \
+  0x00, 0x00, 0x08, 0x00,
 
 /* Generator for checking IPv4 ver, ihl, and proto */
 #define PATTERN_IPV4_GEN(VER_IHL, FLAG_OFF_B0, FLAG_OFF_B1, PROTO) \
@@ -142,6 +149,29 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
   34, 35, 36, 37, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* UDP */   \
   NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
 
+/* TCP shuffle: tcp_ctl bits require mask/processing, not included here. */
+#define PATTERN_IPV4_TCP_SHUFFLE \
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, NU, NU, /* Ether */ \
+  26, 27, 28, 29, 30, 31, 32, 33, NU, NU, NU, NU, 20, 15, 22, 23, /* IPv4 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, 34, 35, 36, 37, NU, NU, NU, NU, /* TCP */   \
+  NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
+
+#define PATTERN_DT1Q_IPV4_UDP_SHUFFLE                                         \
+  /* Ether (2 blocks): Note that *VLAN* type is written here. */              \
+  0,  1,  2,  3,  4,  5,  6,  7, 8,  9, 10, 11, 16, 17,  0,  0,               \
+  /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */       \
+  12, 13, 14, 15, 0, 0, 0, 0,                                                 \
+  30, 31, 32, 33, 34, 35, 36, 37, 0, 0, 0, 0, 24, 19, 26, 27,     /* IPv4 */  \
+  38, 39, 40, 41, NU, NU, NU, NU, /* UDP */
+
+#define PATTERN_DT1Q_IPV4_TCP_SHUFFLE                                         \
+  /* Ether (2 blocks): Note that *VLAN* type is written here. */              \
+  0,  1,  2,  3,  4,  5,  6,  7, 8,  9, 10, 11, 16, 17,  0,  0,               \
+  /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */       \
+  12, 13, 14, 15, 0, 0, 0, 0,                                                 \
+  30, 31, 32, 33, 34, 35, 36, 37, 0, 0, 0, 0, 24, 19, 26, 27,     /* IPv4 */  \
+  NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */   \
+  NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
 
 /* Generation of K-mask bitmask values, to zero out data in result. Note that
  * these correspond 1:1 to the above "*_SHUFFLE" values, and bit used must be
@@ -151,12 +181,22 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
  * Note the ULL suffix allows shifting by 32 or more without integer overflow.
  */
 #define KMASK_ETHER     0x1FFFULL
+#define KMASK_DT1Q      0x000FULL
 #define KMASK_IPV4      0xF0FFULL
 #define KMASK_UDP       0x000FULL
+#define KMASK_TCP       0x0F00ULL
 
 #define PATTERN_IPV4_UDP_KMASK \
     (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_UDP << 32))
 
+#define PATTERN_IPV4_TCP_KMASK \
+    (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_TCP << 32))
+
+#define PATTERN_DT1Q_IPV4_UDP_KMASK \
+    (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_UDP << 40))
+
+#define PATTERN_DT1Q_IPV4_TCP_KMASK \
+    (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_TCP << 40))
 
 /* This union allows initializing static data as u8, but easily loading it
  * into AVX512 registers too. The union ensures proper alignment for the zmm.
@@ -194,6 +234,9 @@ struct mfex_profile {
 
 enum MFEX_PROFILES {
     PROFILE_ETH_IPV4_UDP,
+    PROFILE_ETH_IPV4_TCP,
+    PROFILE_ETH_VLAN_IPV4_UDP,
+    PROFILE_ETH_VLAN_IPV4_TCP,
     PROFILE_COUNT,
 };
 
@@ -215,6 +258,56 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
         },
         .dp_pkt_min_size = 42,
     },
+
+    [PROFILE_ETH_IPV4_TCP] = {
+        .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK },
+        .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_TCP},
+
+        .store_shuf.u8_data = { PATTERN_IPV4_TCP_SHUFFLE },
+        .store_kmsk = PATTERN_IPV4_TCP_KMASK,
+
+        .mf_bits = { 0x18a0000000000000, 0x0000000000044401},
+        .dp_pkt_offs = {
+            0, UINT16_MAX, 14, 34,
+        },
+        .dp_pkt_min_size = 54,
+    },
+
+    [PROFILE_ETH_VLAN_IPV4_UDP] = {
+        .probe_mask.u8_data = {
+            PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK
+        },
+        .probe_data.u8_data = {
+            PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_UDP
+        },
+
+        .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_UDP_SHUFFLE },
+        .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
+
+        .mf_bits = { 0x38a0000000000000, 0x0000000000040401},
+        .dp_pkt_offs = {
+            14, UINT16_MAX, 18, 38,
+        },
+        .dp_pkt_min_size = 46,
+    },
+
+    [PROFILE_ETH_VLAN_IPV4_TCP] = {
+        .probe_mask.u8_data = {
+            PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK
+        },
+        .probe_data.u8_data = {
+            PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_TCP
+        },
+
+        .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_TCP_SHUFFLE },
+        .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
+
+        .mf_bits = { 0x38a0000000000000, 0x0000000000044401},
+        .dp_pkt_offs = {
+            14, UINT16_MAX, 18, 38,
+        },
+        .dp_pkt_min_size = 46,
+    },
 };
 
 
@@ -233,6 +326,28 @@ mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh,
         return 0;
 }
 
+/* Fixup the VLAN CFI and PCP, reading the PCP from the input to this function,
+ * and storing the output CFI bit bitwise-OR-ed with the PCP to miniflow.
+ */
+static void
+mfex_vlan_pcp(const uint8_t vlan_pcp, uint64_t *block)
+{
+    /* Bitwise-OR in the CFI flag, keeping other data the same. */
+    uint8_t *cfi_byte = (uint8_t *) block;
+    cfi_byte[2] = 0x10 | vlan_pcp;
+}
+
+/* Process TCP flags using known LE endian-ness as this is AVX512 code. */
+#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32) TCP_FLAGS_BE16(tcp_ctl))
+
+static void
+mfex_handle_tcp_flags(const struct tcp_header *tcp, uint64_t *block)
+{
+    uint16_t ctl = (OVS_FORCE uint16_t) TCP_FLAGS_BE16(tcp->tcp_ctl);
+    uint64_t ctl_u64 = ctl;
+    *block = ctl_u64 << 32;
+}
+
 /* Generic loop to process any mfex profile. This code is specialized into
  * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE
  * to ensure the compiler specializes each instance. The code is marked "hot"
@@ -321,6 +436,43 @@ mfex_avx512_process(struct dp_packet_batch *packets,
             ovs_assert(0); /* avoid compiler warning on missing ENUM */
             break;
 
+        case PROFILE_ETH_VLAN_IPV4_TCP: {
+                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
+
+                uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
+                struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
+                if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+                    continue;
+                }
+
+                /* Process TCP flags, and store to blocks. */
+                const struct tcp_header *tcp = (void *)&pkt[38];
+                mfex_handle_tcp_flags(tcp, &blocks[7]);
+            } break;
+
+        case PROFILE_ETH_VLAN_IPV4_UDP: {
+                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
+
+                uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
+                struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
+                if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+                    continue;
+                }
+            } break;
+
+        case PROFILE_ETH_IPV4_TCP: {
+                /* Process TCP flags, and store to blocks. */
+                const struct tcp_header *tcp = (void *)&pkt[34];
+                mfex_handle_tcp_flags(tcp, &blocks[6]);
+
+                /* Handle dynamic l2_pad_size. */
+                uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
+                struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)];
+                if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+                    continue;
+                }
+            } break;
+
         case PROFILE_ETH_IPV4_UDP: {
                 /* Handle dynamic l2_pad_size. */
                 uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
@@ -370,6 +522,9 @@ mfex_avx512_##name(struct dp_packet_batch *packets,                     \
  * as required.
  */
 DECLARE_MFEX_FUNC(ip_udp,PROFILE_ETH_IPV4_UDP)
+DECLARE_MFEX_FUNC(ip_tcp,PROFILE_ETH_IPV4_TCP)
+DECLARE_MFEX_FUNC(dot1q_ip_udp,PROFILE_ETH_VLAN_IPV4_UDP)
+DECLARE_MFEX_FUNC(dot1q_ip_tcp,PROFILE_ETH_VLAN_IPV4_TCP)
 
 
 static int32_t
diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
index 106a83867..65072eb38 100644
--- a/lib/dpif-netdev-private-extract.c
+++ b/lib/dpif-netdev-private-extract.c
@@ -60,6 +60,37 @@ static struct dpif_miniflow_extract_impl mfex_impls[] = {
         .extract_func = mfex_avx512_ip_udp,
         .name = "avx512_ipv4_udp",
     },
+    {
+        .probe = mfex_avx512_vbmi_probe,
+        .extract_func = mfex_avx512_vbmi_ip_tcp,
+        .name = "avx512_vbmi_ipv4_tcp",
+    },
+    {
+        .probe = mfex_avx512_probe,
+        .extract_func = mfex_avx512_ip_tcp,
+        .name = "avx512_ipv4_tcp",
+    },
+
+    {
+        .probe = mfex_avx512_vbmi_probe,
+        .extract_func = mfex_avx512_vbmi_dot1q_ip_udp,
+        .name = "avx512_vbmi_dot1q_ipv4_udp",
+    },
+    {
+        .probe = mfex_avx512_probe,
+        .extract_func = mfex_avx512_dot1q_ip_udp,
+        .name = "avx512_dot1q_ipv4_udp",
+    },
+    {
+        .probe = mfex_avx512_vbmi_probe,
+        .extract_func = mfex_avx512_vbmi_dot1q_ip_tcp,
+        .name = "avx512_vbmi_dot1q_ipv4_tcp",
+    },
+    {
+        .probe = mfex_avx512_probe,
+        .extract_func = mfex_avx512_dot1q_ip_tcp,
+        .name = "avx512_dot1q_ipv4_tcp",
+    },
 #endif
 };
 
diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
index f32be202a..b9a59c5a0 100644
--- a/lib/dpif-netdev-private-extract.h
+++ b/lib/dpif-netdev-private-extract.h
@@ -152,6 +152,10 @@ int32_t mfex_avx512_vbmi_probe(void);
                         odp_port_t in_port, void *pmd_handle);
 
 DECLARE_AVX512_MFEX_PROTOTYPE(ip_udp);
+DECLARE_AVX512_MFEX_PROTOTYPE(ip_tcp);
+DECLARE_AVX512_MFEX_PROTOTYPE(dot1q_ip_udp);
+DECLARE_AVX512_MFEX_PROTOTYPE(dot1q_ip_tcp);
+
 #endif /* __x86_64__ */
 
 
-- 
2.25.1



More information about the dev mailing list