[ovs-dev] [PATCH v2 2/8] dpif-netdev: retrieve flow directly from the flow mark

Wed Sep 13 05:20:58 UTC 2017

On 9/5/17, 2:23 AM, "Yuanhan Liu" <yliu at fridaylinux.org> wrote:

    So that we could skip the heavy emc processing, notably, the
    miniflow_extract function. A simple PHY-PHY forwarding testing
    shows 53% performance improvement.

    Note that though the heavy miniflow_extract is skipped

[Darrell] How much of the increased performance is due to skipping 
               miniflow_extract ?

, we
    still have to do per packet checking, due to we have to check
    the tcp_flags.

    Co-authored-by: Finn Christensen <fc at napatech.com>
    Signed-off-by: Yuanhan Liu <yliu at fridaylinux.org>
    Signed-off-by: Finn Christensen <fc at napatech.com>
    ---

    v2: update tcp_flags, which also fixes the build warnings
    ---
     lib/dp-packet.h   | 13 ++++++++++
     lib/dpif-netdev.c | 27 ++++++++++++++-----
     lib/flow.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
     lib/flow.h        |  1 +
     4 files changed, 113 insertions(+), 6 deletions(-)

    diff --git a/lib/dp-packet.h b/lib/dp-packet.h
    index 046f3ab..a7a062f 100644
    --- a/lib/dp-packet.h
    +++ b/lib/dp-packet.h
    @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
     #define reset_dp_packet_checksum_ol_flags(arg)
     #endif

    +static inline bool
    +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
    +                        uint32_t *mark OVS_UNUSED)
    +{
    +#ifdef DPDK_NETDEV
    +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
    +        *mark = p->mbuf.hash.fdir.hi;
    +        return true;
    +    }
    +#endif
    +    return false;
    +}
    +
     enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */

     struct dp_packet_batch {
    diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
    index f3b7f25..a95b8d4 100644
    --- a/lib/dpif-netdev.c
    +++ b/lib/dpif-netdev.c
    @@ -4883,10 +4883,10 @@ struct packet_batch_per_flow {
     static inline void
     packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
                                  struct dp_packet *packet,
    -                             const struct miniflow *mf)
    +                             uint16_t tcp_flags)
     {
         batch->byte_count += dp_packet_size(packet);
    -    batch->tcp_flags |= miniflow_get_tcp_flags(mf);
    +    batch->tcp_flags |= tcp_flags;
         batch->array.packets[batch->array.count++] = packet;
     }

    @@ -4921,7 +4921,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,

     static inline void
     dp_netdev_queue_batches(struct dp_packet *pkt,
    -                        struct dp_netdev_flow *flow, const struct miniflow *mf,
    +                        struct dp_netdev_flow *flow, uint16_t tcp_flags,
                             struct packet_batch_per_flow *batches,
                             size_t *n_batches)
     {
    @@ -4932,7 +4932,7 @@ dp_netdev_queue_batches(struct dp_packet *pkt,
             packet_batch_per_flow_init(batch, flow);
         }

    -    packet_batch_per_flow_update(batch, pkt, mf);
    +    packet_batch_per_flow_update(batch, pkt, tcp_flags);
     }

     /* Try to process all ('cnt') the 'packets' using only the exact match cache
    @@ -4960,11 +4960,13 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
         const size_t size = dp_packet_batch_size(packets_);
         uint32_t cur_min;
         int i;
    +    uint16_t tcp_flags;

         atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);

         DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
             struct dp_netdev_flow *flow;
    +        uint32_t flow_mark;

             if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
                 dp_packet_delete(packet);
    @@ -4972,6 +4974,16 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
                 continue;
             }

    +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
    +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
    +            if (flow) {
    +                tcp_flags = parse_tcp_flags(packet);
    +                dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
    +                                        n_batches);
    +                continue;
    +            }
    +        }
    +

[Darrell] As mentioned, you would move dp_netdev_pmd_find_flow_by_mark()  to following code; 
              also, maybe, you can get rid of parse_tcp_flags() as a result.

             if (i != size - 1) {
                 struct dp_packet **packets = packets_->packets;
                 /* Prefetch next packet data and metadata. */
    @@ -4989,7 +5001,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
             /* If EMC is disabled skip emc_lookup */
             flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);
             if (OVS_LIKELY(flow)) {
    -            dp_netdev_queue_batches(packet, flow, &key->mf, batches,
    +            tcp_flags = miniflow_get_tcp_flags(&key->mf);
    +            dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
                                         n_batches);
             } else {
                 /* Exact match cache missed. Group missed packets together at
    @@ -5166,7 +5179,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
             flow = dp_netdev_flow_cast(rules[i]);

             emc_probabilistic_insert(pmd, &keys[i], flow);
    -        dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
    +        dp_netdev_queue_batches(packet, flow,
    +                                miniflow_get_tcp_flags(&keys[i].mf),
    +                                batches, n_batches);
         }

         dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
    diff --git a/lib/flow.c b/lib/flow.c
    index b2b10aa..912c538 100644
    --- a/lib/flow.c
    +++ b/lib/flow.c
    @@ -991,6 +991,84 @@ parse_dl_type(const struct eth_header *data_, size_t size)
         return parse_ethertype(&data, &size);
     }

[Darrell] Hopefully we can get rid of the following function which removes the changes to flow.c/.h
                If not, we would have to splice out common code.

    +uint16_t
    +parse_tcp_flags(struct dp_packet *packet)
    +{
    +    const void *data = dp_packet_data(packet);
    +    size_t size = dp_packet_size(packet);
    +    ovs_be16 dl_type;
    +    uint8_t nw_frag = 0, nw_proto = 0;
    +
    +    if (packet->packet_type != htonl(PT_ETH)) {
    +        return 0;
    +    }
    +
    +    data_pull(&data, &size, ETH_ADDR_LEN * 2);
    +    dl_type = parse_ethertype(&data, &size);
    +    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
    +        const struct ip_header *nh = data;
    +        int ip_len;
    +
    +        if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
    +            return 0;
    +        }
    +        ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
    +
    +        if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {
    +            return 0;
    +        }
    +        if (OVS_UNLIKELY(size < ip_len)) {
    +            return 0;
    +        }
    +
    +        if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
    +            nw_frag = FLOW_NW_FRAG_ANY;
    +            if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
    +                nw_frag |= FLOW_NW_FRAG_LATER;
    +            }
    +        }
    +        nw_proto = nh->ip_proto;
    +        data_pull(&data, &size, ip_len);
    +    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
    +        const struct ovs_16aligned_ip6_hdr *nh;
    +        uint16_t plen;
    +
    +        if (OVS_UNLIKELY(size < sizeof *nh)) {
    +            return 0;
    +        }
    +        nh = data_pull(&data, &size, sizeof *nh);
    +
    +        plen = ntohs(nh->ip6_plen);
    +        if (OVS_UNLIKELY(plen > size)) {
    +            return 0;
    +        }
    +        /* Jumbo Payload option not supported yet. */
    +        if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {
    +            return 0;
    +        }
    +        size = plen;
    +
    +        nw_proto = nh->ip6_nxt;
    +        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {
    +            return 0;
    +        }
    +    } else {
    +        return 0;
    +    }
    +
    +    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
    +        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
    +            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
    +                const struct tcp_header *tcp = data;
    +
    +                return TCP_FLAGS_BE32(tcp->tcp_ctl);
    +            }
    +        }
    +    }
    +
    +    return 0;
    +}
    +
     /* For every bit of a field that is wildcarded in 'wildcards', sets the
      * corresponding bit in 'flow' to zero. */
     void
    diff --git a/lib/flow.h b/lib/flow.h
    index 6ae5a67..f113ec4 100644
    --- a/lib/flow.h
    +++ b/lib/flow.h
    @@ -130,6 +130,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
                              uint8_t *nw_frag);
     ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size);
     bool parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key);
    +uint16_t parse_tcp_flags(struct dp_packet *packet);

     static inline uint64_t
     flow_get_xreg(const struct flow *flow, int idx)
    -- 
    2.7.4