[ovs-dev] [PATCH v2] flow: miniflow_extract metadata branchless optimization

Yanqin Wei (Arm Technology China) Yanqin.Wei at arm.com
Thu Aug 29 09:21:17 UTC 2019


Hi Ben,
Thanks for the feedback.  It is indeed related with userspace datapath. 

Hi Ilya, 
Could you take a look at this patch when you have time? 

I knew first-pass and recirculating traffic share the same packet handling. It makes code common and maintainable. 
But if we can introduce some data-oriented and well-defined flag to bypass some time-consuming handling, it can improve performance a lot.

Best Regards,
Wei Yanqin

-----Original Message-----
From: Ben Pfaff <blp at ovn.org> 
Sent: Thursday, August 29, 2019 6:11 AM
To: Yanqin Wei (Arm Technology China) <Yanqin.Wei at arm.com>; Ilya Maximets <i.maximets at samsung.com>
Cc: dev at openvswitch.org; nd <nd at arm.com>; Gavin Hu (Arm Technology China) <Gavin.Hu at arm.com>
Subject: Re: [ovs-dev] [PATCH v2] flow: miniflow_extract metadata branchless optimization

This fails to apply to current master.

Whereas most of the time I'd be comfortable with reviewing 'flow'
patches myself, this one is closed related to the dpif-netdev code and I'd prefer to have someone who understands that code well, as well as the tradeoffs between performance and maintainability, review it.  Ilya (added to the To line) is a good choice.

On Thu, Aug 22, 2019 at 06:09:16PM +0800, Yanqin Wei wrote:
> "miniflow_extract" is a branch heavy implementation for packet header 
> and metadata parsing. There is a lot of meta data handling for all traffic.
> But this should not be applicable for packets from interface.
> This patch adds a layer of inline encapsulation to miniflow_extract 
> and introduces constant "md_valid" input parameter as a branch condition.
> The new branch will be removed by the compiler at compile time. Two 
> instances of miniflow_extract with different branches will be generated.
> 
> This patch is tested on an arm64 platform. It improves more than 3.5% 
> performance in P2P forwarding cases.
> 
> Reviewed-by: Gavin Hu <Gavin.Hu at arm.com>
> Signed-off-by: Yanqin Wei <Yanqin.Wei at arm.com>
> ---
>  lib/dpif-netdev.c |  13 +++---
>  lib/flow.c        | 116 ++++++++++++++++++++++++++++++++----------------------
>  lib/flow.h        |   2 +
>  3 files changed, 79 insertions(+), 52 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 
> d0a1c58..6686b14 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -6508,12 +6508,15 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
>              }
>          }
>  
> -        miniflow_extract(packet, &key->mf);
> +        if (!md_is_valid) {
> +            miniflow_extract_firstpass(packet, &key->mf);
> +            key->hash =
> +                dpif_netdev_packet_get_rss_hash_orig_pkt(packet, &key->mf);
> +        } else {
> +            miniflow_extract(packet, &key->mf);
> +            key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf);
> +        }
>          key->len = 0; /* Not computed yet. */
> -        key->hash =
> -                (md_is_valid == false)
> -                ? dpif_netdev_packet_get_rss_hash_orig_pkt(packet, &key->mf)
> -                : dpif_netdev_packet_get_rss_hash(packet, &key->mf);
>  
>          /* If EMC is disabled skip emc_lookup */
>          flow = (cur_min != 0) ? emc_lookup(&cache->emc_cache, key) : 
> NULL; diff --git a/lib/flow.c b/lib/flow.c index e54fd2e..e5b554b 
> 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -707,7 +707,8 @@ ipv6_sanity_check(const struct 
> ovs_16aligned_ip6_hdr *nh, size_t size)  }
>  
>  /* Initializes 'dst' from 'packet' and 'md', taking the packet type 
> into
> - * account.  'dst' must have enough space for FLOW_U64S * 8 bytes.
> + * account.  'dst' must have enough space for FLOW_U64S * 8 bytes. 
> + Metadata
> + * initialization should be bypassed if "md_valid" is false.
>   *
>   * Initializes the layer offsets as follows:
>   *
> @@ -732,8 +733,9 @@ ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t size)
>   *      present and the packet has at least the content used for the fields
>   *      of interest for the flow, otherwise UINT16_MAX.
>   */
> -void
> -miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
> +static inline ALWAYS_INLINE void
> +miniflow_extract__(struct dp_packet *packet, struct miniflow *dst,
> +                    const bool md_valid)
>  {
>      /* Add code to this function (or its callees) to extract new fields. */
>      BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); @@ -752,54 +754,60 @@ 
> miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>      ovs_be16 ct_tp_src = 0, ct_tp_dst = 0;
>  
>      /* Metadata. */
> -    if (flow_tnl_dst_is_set(&md->tunnel)) {
> -        miniflow_push_words(mf, tunnel, &md->tunnel,
> -                            offsetof(struct flow_tnl, metadata) /
> -                            sizeof(uint64_t));
> -
> -        if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) {
> -            if (md->tunnel.metadata.present.map) {
> -                miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
> -                                    sizeof md->tunnel.metadata /
> -                                    sizeof(uint64_t));
> -            }
> -        } else {
> -            if (md->tunnel.metadata.present.len) {
> -                miniflow_push_words(mf, tunnel.metadata.present,
> -                                    &md->tunnel.metadata.present, 1);
> -                miniflow_push_words(mf, tunnel.metadata.opts.gnv,
> -                                    md->tunnel.metadata.opts.gnv,
> -                                    DIV_ROUND_UP(md->tunnel.metadata.present.len,
> -                                                 sizeof(uint64_t)));
> +    if (md_valid) {
> +        if (flow_tnl_dst_is_set(&md->tunnel)) {
> +            miniflow_push_words(mf, tunnel, &md->tunnel,
> +                                offsetof(struct flow_tnl, metadata) /
> +                                sizeof(uint64_t));
> +
> +            if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) {
> +                if (md->tunnel.metadata.present.map) {
> +                    miniflow_push_words(mf, tunnel.metadata,
> +                                        &md->tunnel.metadata,
> +                                        sizeof md->tunnel.metadata /
> +                                        sizeof(uint64_t));
> +                }
> +            } else {
> +                if (md->tunnel.metadata.present.len) {
> +                    miniflow_push_words(mf, tunnel.metadata.present,
> +                                        &md->tunnel.metadata.present, 1);
> +                    miniflow_push_words(mf, tunnel.metadata.opts.gnv,
> +                                        md->tunnel.metadata.opts.gnv,
> +                                        DIV_ROUND_UP(
> +                                               md->tunnel.metadata.present.len,
> +                                               sizeof(uint64_t)));
> +                }
>              }
>          }
> -    }
> -    if (md->skb_priority || md->pkt_mark) {
> -        miniflow_push_uint32(mf, skb_priority, md->skb_priority);
> -        miniflow_push_uint32(mf, pkt_mark, md->pkt_mark);
> -    }
> -    miniflow_push_uint32(mf, dp_hash, md->dp_hash);
> -    miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port));
> -    if (md->ct_state) {
> -        miniflow_push_uint32(mf, recirc_id, md->recirc_id);
> -        miniflow_push_uint8(mf, ct_state, md->ct_state);
> -        ct_nw_proto_p = miniflow_pointer(mf, ct_nw_proto);
> -        miniflow_push_uint8(mf, ct_nw_proto, 0);
> -        miniflow_push_uint16(mf, ct_zone, md->ct_zone);
> -    } else if (md->recirc_id) {
> -        miniflow_push_uint32(mf, recirc_id, md->recirc_id);
> -        miniflow_pad_to_64(mf, recirc_id);
> -    }
> -
> -    if (md->ct_state) {
> -        miniflow_push_uint32(mf, ct_mark, md->ct_mark);
> -        miniflow_push_be32(mf, packet_type, packet_type);
> -
> -        if (!ovs_u128_is_zero(md->ct_label)) {
> -            miniflow_push_words(mf, ct_label, &md->ct_label,
> -                                sizeof md->ct_label / sizeof(uint64_t));
> +        if (md->skb_priority || md->pkt_mark) {
> +            miniflow_push_uint32(mf, skb_priority, md->skb_priority);
> +            miniflow_push_uint32(mf, pkt_mark, md->pkt_mark);
> +        }
> +        miniflow_push_uint32(mf, dp_hash, md->dp_hash);
> +        miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port));
> +        if (md->ct_state) {
> +            miniflow_push_uint32(mf, recirc_id, md->recirc_id);
> +            miniflow_push_uint8(mf, ct_state, md->ct_state);
> +            ct_nw_proto_p = miniflow_pointer(mf, ct_nw_proto);
> +            miniflow_push_uint8(mf, ct_nw_proto, 0);
> +            miniflow_push_uint16(mf, ct_zone, md->ct_zone);
> +        } else if (md->recirc_id) {
> +            miniflow_push_uint32(mf, recirc_id, md->recirc_id);
> +            miniflow_pad_to_64(mf, recirc_id);
> +        }
> +
> +        if (md->ct_state) {
> +            miniflow_push_uint32(mf, ct_mark, md->ct_mark);
> +            miniflow_push_be32(mf, packet_type, packet_type);
> +
> +            if (!ovs_u128_is_zero(md->ct_label)) {
> +                miniflow_push_words(mf, ct_label, &md->ct_label,
> +                                    sizeof md->ct_label / sizeof(uint64_t));
> +            }
>          }
>      } else {
> +        miniflow_push_uint32(mf, dp_hash, md->dp_hash);
> +        miniflow_push_uint32(mf, in_port, 
> + odp_to_u32(md->in_port.odp_port));
>          miniflow_pad_from_64(mf, packet_type);
>          miniflow_push_be32(mf, packet_type, packet_type);
>      }
> @@ -865,6 +873,7 @@ miniflow_extract(struct dp_packet *packet, struct 
> miniflow *dst)
>  
>          /* Push both source and destination address at once. */
>          miniflow_push_words(mf, nw_src, &nh->ip_src, 1);
> +
>          if (ct_nw_proto_p && !md->ct_orig_tuple_ipv6) {
>              *ct_nw_proto_p = md->ct_orig_tuple.ipv4.ipv4_proto;
>              if (*ct_nw_proto_p) {
> @@ -900,6 +909,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>                              sizeof nh->ip6_src / 8);
>          miniflow_push_words(mf, ipv6_dst, &nh->ip6_dst,
>                              sizeof nh->ip6_dst / 8);
> +
>          if (ct_nw_proto_p && md->ct_orig_tuple_ipv6) {
>              *ct_nw_proto_p = md->ct_orig_tuple.ipv6.ipv6_proto;
>              if (*ct_nw_proto_p) {
> @@ -1076,6 +1086,18 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>      dst->map = mf.map;
>  }
>  
> +void
> +miniflow_extract(struct dp_packet *packet, struct miniflow *dst) {
> +    miniflow_extract__(packet, dst, true); }
> +
> +void
> +miniflow_extract_firstpass(struct dp_packet *packet, struct miniflow 
> +*dst) {
> +    miniflow_extract__(packet, dst, false); }
> +
>  ovs_be16
>  parse_dl_type(const struct eth_header *data_, size_t size)  { diff 
> --git a/lib/flow.h b/lib/flow.h index 7298c71..2d38574 100644
> --- a/lib/flow.h
> +++ b/lib/flow.h
> @@ -541,6 +541,8 @@ struct pkt_metadata;
>   * 'dst->map' is ignored on input and set on output to indicate which fields
>   * were extracted. */
>  void miniflow_extract(struct dp_packet *packet, struct miniflow 
> *dst);
> +void miniflow_extract_firstpass(struct dp_packet *packet,
> +                                struct miniflow *dst);
>  void miniflow_map_init(struct miniflow *, const struct flow *);  void 
> flow_wc_map(const struct flow *, struct flowmap *);  size_t 
> miniflow_alloc(struct miniflow *dsts[], size_t n,
> --
> 2.7.4
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list