[ovs-dev] [v4 11/12] dpif-netdev/mfex: add more AVX512 traffic profiles

Flavio Leitner fbl at sysclose.org
Wed Jun 30 20:37:55 UTC 2021


Hi,

On Thu, Jun 17, 2021 at 09:57:53PM +0530, Kumar Amber wrote:
> From: Harry van Haaren <harry.van.haaren at intel.com>
> 
> This commit adds 3 new traffic profile implementations to the
> existing avx512 miniflow extract infrastructure. The profiles added are:
> - Ether()/IP()/TCP()
> - Ether()/Dot1Q()/IP()/UDP()
> - Ether()/Dot1Q()/IP()/TCP()
> 
> The design of the avx512 code here is for scalability to add more
> traffic profiles, as well as enabling CPU ISA. Note that an implementation
> is primarily adding static const data, which the compiler then specializes
> away when the profile specific function is declared below.
> 
> As a result, the code is relatively maintainable, and scalable for new
> traffic profiles as well as new ISA, and does not lower performance
> compared with manually written code for each profile/ISA.
> 
> Note that confidence in the correctness of each implementation is
> achieved through autovalidation, unit tests with known packets, and
> fuzz tested packets.
> 
> Signed-off-by: Harry van Haaren <harry.van.haaren at intel.com>
> 
> ---
> 
> Hi Readers,
> 
> If you have a traffic profile you'd like to see accelerated using
> avx512 code, please send me an email and we can collaborate on adding
> support for it!
> 
> Regards, -Harry
> ---
>  lib/dpif-netdev-extract-avx512.c  | 155 ++++++++++++++++++++++++++++++
>  lib/dpif-netdev-private-extract.c |  31 ++++++
>  lib/dpif-netdev-private-extract.h |   4 +
>  3 files changed, 190 insertions(+)
> 
> diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
> index 1145ac8a9..0e0f6e295 100644
> --- a/lib/dpif-netdev-extract-avx512.c
> +++ b/lib/dpif-netdev-extract-avx512.c
> @@ -117,6 +117,13 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
>  
>  #define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF)
>  #define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00)
> +#define PATTERN_ETHERTYPE_DT1Q PATTERN_ETHERTYPE_GEN(0x81, 0x00)
> +
> +/* VLAN (Dot1Q) patterns and masks. */
> +#define PATTERN_DT1Q_MASK                                               \
> +  0x00, 0x00, 0xFF, 0xFF,
> +#define PATTERN_DT1Q_IPV4                                               \
> +  0x00, 0x00, 0x08, 0x00,
>  
>  /* Generator for checking IPv4 ver, ihl, and proto */
>  #define PATTERN_IPV4_GEN(VER_IHL, FLAG_OFF_B0, FLAG_OFF_B1, PROTO) \
> @@ -142,6 +149,29 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
>    34, 35, 36, 37, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* UDP */   \
>    NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
>  
> +/* TCP shuffle: tcp_ctl bits require mask/processing, not included here. */
> +#define PATTERN_IPV4_TCP_SHUFFLE \
> +   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, NU, NU, /* Ether */ \
> +  26, 27, 28, 29, 30, 31, 32, 33, NU, NU, NU, NU, 20, 15, 22, 23, /* IPv4 */  \
> +  NU, NU, NU, NU, NU, NU, NU, NU, 34, 35, 36, 37, NU, NU, NU, NU, /* TCP */   \
> +  NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
> +
> +#define PATTERN_DT1Q_IPV4_UDP_SHUFFLE                                         \
> +  /* Ether (2 blocks): Note that *VLAN* type is written here. */              \
> +  0,  1,  2,  3,  4,  5,  6,  7, 8,  9, 10, 11, 16, 17,  0,  0,               \
> +  /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */       \
> +  12, 13, 14, 15, 0, 0, 0, 0,                                                 \
> +  30, 31, 32, 33, 34, 35, 36, 37, 0, 0, 0, 0, 24, 19, 26, 27,     /* IPv4 */  \
> +  38, 39, 40, 41, NU, NU, NU, NU, /* UDP */
> +
> +#define PATTERN_DT1Q_IPV4_TCP_SHUFFLE                                         \
> +  /* Ether (2 blocks): Note that *VLAN* type is written here. */              \
> +  0,  1,  2,  3,  4,  5,  6,  7, 8,  9, 10, 11, 16, 17,  0,  0,               \
> +  /* VLAN (1 block): Note that the *EtherHdr->Type* is written here. */       \
> +  12, 13, 14, 15, 0, 0, 0, 0,                                                 \
> +  30, 31, 32, 33, 34, 35, 36, 37, 0, 0, 0, 0, 24, 19, 26, 27,     /* IPv4 */  \
> +  NU, NU, NU, NU, NU, NU, NU, NU, 38, 39, 40, 41, NU, NU, NU, NU, /* TCP */   \
> +  NU, NU, NU, NU, NU, NU, NU, NU, /* Unused. */
>  
>  /* Generation of K-mask bitmask values, to zero out data in result. Note that
>   * these correspond 1:1 to the above "*_SHUFFLE" values, and bit used must be
> @@ -151,12 +181,22 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
>   * Note the ULL suffix allows shifting by 32 or more without integer overflow.
>   */
>  #define KMASK_ETHER     0x1FFFULL
> +#define KMASK_DT1Q      0x000FULL
>  #define KMASK_IPV4      0xF0FFULL
>  #define KMASK_UDP       0x000FULL
> +#define KMASK_TCP       0x0F00ULL
>  
>  #define PATTERN_IPV4_UDP_KMASK \
>      (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_UDP << 32))
>  
> +#define PATTERN_IPV4_TCP_KMASK \
> +    (KMASK_ETHER | (KMASK_IPV4 << 16) | (KMASK_TCP << 32))
> +
> +#define PATTERN_DT1Q_IPV4_UDP_KMASK \
> +    (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_UDP << 40))
> +
> +#define PATTERN_DT1Q_IPV4_TCP_KMASK \
> +    (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_TCP << 40))
>  
>  /* This union allows initializing static data as u8, but easily loading it
>   * into AVX512 registers too. The union ensures proper alignment for the zmm.
> @@ -194,6 +234,9 @@ struct mfex_profile {
>  
>  enum MFEX_PROFILES {
>      PROFILE_ETH_IPV4_UDP,
> +    PROFILE_ETH_IPV4_TCP,
> +    PROFILE_ETH_VLAN_IPV4_UDP,
> +    PROFILE_ETH_VLAN_IPV4_TCP,
>      PROFILE_COUNT,
>  };
>  
> @@ -215,6 +258,56 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
>          },
>          .dp_pkt_min_size = 42,
>      },
> +
> +    [PROFILE_ETH_IPV4_TCP] = {
> +        .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK },
> +        .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_TCP},
> +
> +        .store_shuf.u8_data = { PATTERN_IPV4_TCP_SHUFFLE },
> +        .store_kmsk = PATTERN_IPV4_TCP_KMASK,
> +
> +        .mf_bits = { 0x18a0000000000000, 0x0000000000044401},
> +        .dp_pkt_offs = {
> +            0, UINT16_MAX, 14, 34,
> +        },
> +        .dp_pkt_min_size = 54,
> +    },
> +
> +    [PROFILE_ETH_VLAN_IPV4_UDP] = {
> +        .probe_mask.u8_data = {
> +            PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK
> +        },
> +        .probe_data.u8_data = {
> +            PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_UDP
> +        },
> +
> +        .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_UDP_SHUFFLE },
> +        .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
> +
> +        .mf_bits = { 0x38a0000000000000, 0x0000000000040401},
> +        .dp_pkt_offs = {
> +            14, UINT16_MAX, 18, 38,
> +        },
> +        .dp_pkt_min_size = 46,
> +    },
> +
> +    [PROFILE_ETH_VLAN_IPV4_TCP] = {
> +        .probe_mask.u8_data = {
> +            PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK
> +        },
> +        .probe_data.u8_data = {
> +            PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_TCP
> +        },
> +
> +        .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_TCP_SHUFFLE },
> +        .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
> +
> +        .mf_bits = { 0x38a0000000000000, 0x0000000000044401},
> +        .dp_pkt_offs = {
> +            14, UINT16_MAX, 18, 38,
> +        },
> +        .dp_pkt_min_size = 46,
> +    },
>  };
>  
>  
> @@ -233,6 +326,28 @@ mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh,
>          return 0;
>  }
>  
> +/* Fixup the VLAN CFI and PCP, reading the PCP from the input to this function,
> + * and storing the output CFI bit bitwise-OR-ed with the PCP to miniflow.
> + */
> +static void
> +mfex_vlan_pcp(const uint8_t vlan_pcp, uint64_t *block)
> +{
> +    /* Bitwise-OR in the CFI flag, keeping other data the same. */
> +    uint8_t *cfi_byte = (uint8_t *) block;
> +    cfi_byte[2] = 0x10 | vlan_pcp;

Trying to reduce the magic numbers around, can we use OVS's VLAN_CFI
instead of 0x10?

> +}
> +
> +/* Process TCP flags using known LE endian-ness as this is AVX512 code. */
> +#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32) TCP_FLAGS_BE16(tcp_ctl))

The above is not used, right?

> +
> +static void
> +mfex_handle_tcp_flags(const struct tcp_header *tcp, uint64_t *block)
> +{
> +    uint16_t ctl = (OVS_FORCE uint16_t) TCP_FLAGS_BE16(tcp->tcp_ctl);

Why casting to uint16_t?

> +    uint64_t ctl_u64 = ctl;
> +    *block = ctl_u64 << 32;
> +}
> +
>  /* Generic loop to process any mfex profile. This code is specialized into
>   * multiple actual MFEX implementation functions. Its marked ALWAYS_INLINE
>   * to ensure the compiler specializes each instance. The code is marked "hot"
> @@ -321,6 +436,43 @@ mfex_avx512_process(struct dp_packet_batch *packets,
>              ovs_assert(0); /* avoid compiler warning on missing ENUM */
>              break;
>  
> +        case PROFILE_ETH_VLAN_IPV4_TCP: {
> +                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);

Maybe cast pkt[2*ETH_ADDR_LEN] to struct flow_vlan_hdr and pass to that,
then use a flow_vlan_hdr->tci to improve readability?

> +
> +                uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
> +                struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
> +                if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
> +                    continue;
> +                }
> +
> +                /* Process TCP flags, and store to blocks. */
> +                const struct tcp_header *tcp = (void *)&pkt[38];
> +                mfex_handle_tcp_flags(tcp, &blocks[7]);
> +            } break;
> +
> +        case PROFILE_ETH_VLAN_IPV4_UDP: {
> +                mfex_vlan_pcp(pkt[14], &keys[i].buf[4]);
> +
> +                uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
> +                struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
> +                if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
> +                    continue;
> +                }
> +            } break;
> +
> +        case PROFILE_ETH_IPV4_TCP: {
> +                /* Process TCP flags, and store to blocks. */
> +                const struct tcp_header *tcp = (void *)&pkt[34];
> +                mfex_handle_tcp_flags(tcp, &blocks[6]);
> +
> +                /* Handle dynamic l2_pad_size. */
> +                uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
> +                struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)];
> +                if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
> +                    continue;
> +                }
> +            } break;
> +
>          case PROFILE_ETH_IPV4_UDP: {
>                  /* Handle dynamic l2_pad_size. */
>                  uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
> @@ -370,6 +522,9 @@ mfex_avx512_##name(struct dp_packet_batch *packets,                     \
>   * as required.
>   */
>  DECLARE_MFEX_FUNC(ip_udp,PROFILE_ETH_IPV4_UDP)
> +DECLARE_MFEX_FUNC(ip_tcp,PROFILE_ETH_IPV4_TCP)
> +DECLARE_MFEX_FUNC(dot1q_ip_udp,PROFILE_ETH_VLAN_IPV4_UDP)
> +DECLARE_MFEX_FUNC(dot1q_ip_tcp,PROFILE_ETH_VLAN_IPV4_TCP)

I forgot to mention this in the previous patch, but please
add a space after the comma.

>  
>  
>  static int32_t
> diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c
> index 106a83867..65072eb38 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -60,6 +60,37 @@ static struct dpif_miniflow_extract_impl mfex_impls[] = {
>          .extract_func = mfex_avx512_ip_udp,
>          .name = "avx512_ipv4_udp",
>      },
> +    {
> +        .probe = mfex_avx512_vbmi_probe,
> +        .extract_func = mfex_avx512_vbmi_ip_tcp,
> +        .name = "avx512_vbmi_ipv4_tcp",
> +    },
> +    {
> +        .probe = mfex_avx512_probe,
> +        .extract_func = mfex_avx512_ip_tcp,
> +        .name = "avx512_ipv4_tcp",
> +    },
> +
> +    {
> +        .probe = mfex_avx512_vbmi_probe,
> +        .extract_func = mfex_avx512_vbmi_dot1q_ip_udp,
> +        .name = "avx512_vbmi_dot1q_ipv4_udp",
> +    },
> +    {
> +        .probe = mfex_avx512_probe,
> +        .extract_func = mfex_avx512_dot1q_ip_udp,
> +        .name = "avx512_dot1q_ipv4_udp",
> +    },
> +    {
> +        .probe = mfex_avx512_vbmi_probe,
> +        .extract_func = mfex_avx512_vbmi_dot1q_ip_tcp,
> +        .name = "avx512_vbmi_dot1q_ipv4_tcp",
> +    },
> +    {
> +        .probe = mfex_avx512_probe,
> +        .extract_func = mfex_avx512_dot1q_ip_tcp,
> +        .name = "avx512_dot1q_ipv4_tcp",
> +    },
>  #endif
>  };
>  
> diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h
> index f32be202a..b9a59c5a0 100644
> --- a/lib/dpif-netdev-private-extract.h
> +++ b/lib/dpif-netdev-private-extract.h
> @@ -152,6 +152,10 @@ int32_t mfex_avx512_vbmi_probe(void);
>                          odp_port_t in_port, void *pmd_handle);
>  
>  DECLARE_AVX512_MFEX_PROTOTYPE(ip_udp);
> +DECLARE_AVX512_MFEX_PROTOTYPE(ip_tcp);
> +DECLARE_AVX512_MFEX_PROTOTYPE(dot1q_ip_udp);
> +DECLARE_AVX512_MFEX_PROTOTYPE(dot1q_ip_tcp);
> +
>  #endif /* __x86_64__ */
>  
>  
> -- 
> 2.25.1
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

-- 
fbl


More information about the dev mailing list