[ovs-dev] [PATCH v10 3/7] dpif-netdev: retrieve flow directly from the flow mark

Stokes, Ian ian.stokes at intel.com
Mon May 21 17:30:29 UTC 2018


> From: Yuanhan Liu <yliu at fridaylinux.org>
> 
> So that we could skip some very costly CPU operations, including but not
> limiting to miniflow_extract, emc lookup, dpcls lookup, etc. Thus,
> performance could be greatly improved.
> 
> A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and
> 1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more that 260%
> performance boost.
> 
> Note that though the heavy miniflow_extract is skipped, we still have to
> do per packet checking, due to we have to check the tcp_flags.
> 

I flagged this back in the v8 but it must have been missed, the patch causes compilation failure within the travis OVS build. This should be addressed.

lib/flow.c:1070:16: error: incorrect type in return expression (different base types)
lib/flow.c:1070:16:    expected unsigned short
lib/flow.c:1070:16:    got restricted ovs_be16

Thanks
Ian

> Co-authored-by: Finn Christensen <fc at napatech.com>
> Signed-off-by: Yuanhan Liu <yliu at fridaylinux.org>
> Signed-off-by: Finn Christensen <fc at napatech.com>
> Signed-off-by: Shahaf Shuler <shahafs at mellanox.com>
> ---
>  lib/dp-packet.h   | 13 ++++++++++++
>  lib/dpif-netdev.c | 44 ++++++++++++++++++++++++++++++++++------
>  lib/flow.c        | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/flow.h        |  1 +
>  4 files changed, 106 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 21c8ca525..dd3f17b9f
> 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet
> *p)  #define reset_dp_packet_checksum_ol_flags(arg)
>  #endif
> 
> +static inline bool
> +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
> +                        uint32_t *mark OVS_UNUSED) { #ifdef DPDK_NETDEV
> +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
> +        *mark = p->mbuf.hash.fdir.hi;
> +        return true;
> +    }
> +#endif
> +    return false;
> +}
> +
>  enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
> 
>  struct dp_packet_batch {
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> 0f69d7d96..4748f70fa 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2118,6 +2118,23 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
>      }
>  }
> 
> +static struct dp_netdev_flow *
> +mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd,
> +                  const uint32_t mark)
> +{
> +    struct dp_netdev_flow *flow;
> +
> +    CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0),
> +                             &flow_mark.mark_to_flow) {
> +        if (flow->mark == mark && flow->pmd_id == pmd->core_id &&
> +            flow->dead == false) {
> +            return flow;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
>  static void
>  dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
>                            struct dp_netdev_flow *flow) @@ -5364,10
> +5381,10 @@ struct packet_batch_per_flow {  static inline void
> packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
>                               struct dp_packet *packet,
> -                             const struct miniflow *mf)
> +                             uint16_t tcp_flags)
>  {
>      batch->byte_count += dp_packet_size(packet);
> -    batch->tcp_flags |= miniflow_get_tcp_flags(mf);
> +    batch->tcp_flags |= tcp_flags;
>      batch->array.packets[batch->array.count++] = packet;  }
> 
> @@ -5401,7 +5418,7 @@ packet_batch_per_flow_execute(struct
> packet_batch_per_flow *batch,
> 
>  static inline void
>  dp_netdev_queue_batches(struct dp_packet *pkt,
> -                        struct dp_netdev_flow *flow, const struct
> miniflow *mf,
> +                        struct dp_netdev_flow *flow, uint16_t
> + tcp_flags,
>                          struct packet_batch_per_flow *batches,
>                          size_t *n_batches)  { @@ -5412,7 +5429,7 @@
> dp_netdev_queue_batches(struct dp_packet *pkt,
>          packet_batch_per_flow_init(batch, flow);
>      }
> 
> -    packet_batch_per_flow_update(batch, pkt, mf);
> +    packet_batch_per_flow_update(batch, pkt, tcp_flags);
>  }
> 
>  /* Try to process all ('cnt') the 'packets' using only the exact match
> cache @@ -5443,6 +5460,7 @@ emc_processing(struct dp_netdev_pmd_thread
> *pmd,
>      const size_t cnt = dp_packet_batch_size(packets_);
>      uint32_t cur_min;
>      int i;
> +    uint16_t tcp_flags;
> 
>      atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
>      pmd_perf_update_counter(&pmd->perf_stats,
> @@ -5451,6 +5469,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
> 
>      DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) {
>          struct dp_netdev_flow *flow;
> +        uint32_t mark;
> 
>          if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
>              dp_packet_delete(packet);
> @@ -5458,6 +5477,16 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>              continue;
>          }
> 
> +        if (dp_packet_has_flow_mark(packet, &mark)) {
> +            flow = mark_to_flow_find(pmd, mark);
> +            if (flow) {
> +                tcp_flags = parse_tcp_flags(packet);
> +                dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
> +                                        n_batches);
> +                continue;
> +            }
> +        }
> +
>          if (i != cnt - 1) {
>              struct dp_packet **packets = packets_->packets;
>              /* Prefetch next packet data and metadata. */ @@ -5483,7
> +5512,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>              flow = NULL;
>          }
>          if (OVS_LIKELY(flow)) {
> -            dp_netdev_queue_batches(packet, flow, &key->mf, batches,
> +            tcp_flags = miniflow_get_tcp_flags(&key->mf);
> +            dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
>                                      n_batches);
>          } else {
>              /* Exact match cache missed. Group missed packets together at
> @@ -5670,7 +5700,9 @@ fast_path_processing(struct dp_netdev_pmd_thread
> *pmd,
>          flow = dp_netdev_flow_cast(rules[i]);
> 
>          emc_probabilistic_insert(pmd, &keys[i], flow);
> -        dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches,
> n_batches);
> +        dp_netdev_queue_batches(packet, flow,
> +                                miniflow_get_tcp_flags(&keys[i].mf),
> +                                batches, n_batches);
>      }
> 
>      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT, diff -
> -git a/lib/flow.c b/lib/flow.c index 2c99a5ff4..5413a4ceb 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -1019,6 +1019,60 @@ parse_dl_type(const struct eth_header *data_,
> size_t size)
>      return parse_ethertype(&data, &size);  }
> 
> +uint16_t
> +parse_tcp_flags(struct dp_packet *packet) {
> +    const void *data = dp_packet_data(packet);
> +    size_t size = dp_packet_size(packet);
> +    ovs_be16 dl_type;
> +    uint8_t nw_frag = 0, nw_proto = 0;
> +
> +    if (packet->packet_type != htonl(PT_ETH)) {
> +        return 0;
> +    }
> +
> +    data_pull(&data, &size, ETH_ADDR_LEN * 2);
> +    dl_type = parse_ethertype(&data, &size);
> +    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
> +        const struct ip_header *nh = data;
> +        int ip_len;
> +        uint16_t tot_len;
> +
> +        if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len,
> &tot_len))) {
> +            return 0;
> +        }
> +        nw_proto = nh->ip_proto;
> +        nw_frag = ipv4_get_nw_frag(nh);
> +
> +        size = tot_len;   /* Never pull padding. */
> +        data_pull(&data, &size, ip_len);
> +    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
> +        const struct ovs_16aligned_ip6_hdr *nh = data;
> +
> +        if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
> +            return 0;
> +        }
> +        data_pull(&data, &size, sizeof *nh);
> +
> +        size = ntohs(nh->ip6_plen); /* Never pull padding. */
> +        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {
> +            return 0;
> +        }
> +        nw_proto = nh->ip6_nxt;
> +    } else {
> +        return 0;
> +    }
> +
> +    if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP &&
> +        size >= TCP_HEADER_LEN) {
> +        const struct tcp_header *tcp = data;
> +
> +        return TCP_FLAGS_BE16(tcp->tcp_ctl);
> +    }
> +
> +    return 0;
> +}
> +
>  /* For every bit of a field that is wildcarded in 'wildcards', sets the
>   * corresponding bit in 'flow' to zero. */  void diff --git a/lib/flow.h
> b/lib/flow.h index af829319e..888f8cc9b 100644
> --- a/lib/flow.h
> +++ b/lib/flow.h
> @@ -132,6 +132,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t
> *sizep, uint8_t *nw_proto,
>                           uint8_t *nw_frag);
>  ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size);
> bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh
> *key);
> +uint16_t parse_tcp_flags(struct dp_packet *packet);
> 
>  static inline uint64_t
>  flow_get_xreg(const struct flow *flow, int idx)
> --
> 2.12.0



More information about the dev mailing list