[ovs-dev] [PATCH v6 2/6] dpif-netdev: retrieve flow directly from the flow mark
Stokes, Ian
ian.stokes at intel.com
Thu Mar 15 11:55:43 UTC 2018
> -----Original Message-----
> From: Stokes, Ian
> Sent: Wednesday, March 7, 2018 5:00 PM
> To: 'Yuanhan Liu' <yliu at fridaylinux.org>; dev at openvswitch.org
> Cc: Finn Christensen <fc at napatech.com>
> Subject: RE: [PATCH v6 2/6] dpif-netdev: retrieve flow directly from the
> flow mark
>
Adding Shahaf Shuler
> > So that we could skip some very costly CPU operations, including but
> > not limiting to miniflow_extract, emc lookup, dpcls lookup, etc. Thus,
> > performance could be greatly improved.
> >
> > A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and
> > 1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more that
> > 260% performance boost.
> >
> > Note that though the heavy miniflow_extract is skipped, we still have
> > to do per packet checking, due to we have to check the tcp_flags.
> >
>
> Hi Yuanhan,
>
> This patch breaks a number of tests in the OVS Travis nightly build, these
> will have to be fixed as each patch must be able to compile independently.
>
> I've included a link to the Travis build where the failures occur (note
> the tests passed without issue for patch 1 of the series).
>
> https://travis-ci.org/istokes/ovs/builds/350372444
>
> Few more comments below.
>
> > Co-authored-by: Finn Christensen <fc at napatech.com>
> > Signed-off-by: Yuanhan Liu <yliu at fridaylinux.org>
> > Signed-off-by: Finn Christensen <fc at napatech.com>
> >
> > ---
> > v5: - do not fetch the flow if the flow has been dead
> > ---
> > lib/dp-packet.h | 13 +++++
> > lib/dpif-netdev.c | 43 ++++++++++++---
> > lib/flow.c | 155 +++++++++++++++++++++++++++++++++++++++++++----
> --
> > -----
> > lib/flow.h | 1 +
> > 4 files changed, 174 insertions(+), 38 deletions(-)
> >
> > diff --git a/lib/dp-packet.h b/lib/dp-packet.h index b4b721c..ea1194c
> > 100644
> > --- a/lib/dp-packet.h
> > +++ b/lib/dp-packet.h
> > @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct
> > dp_packet
> > *p) #define reset_dp_packet_checksum_ol_flags(arg)
> > #endif
> >
> > +static inline bool
> > +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
> > + uint32_t *mark OVS_UNUSED) { #ifdef DPDK_NETDEV
> > + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
> > + *mark = p->mbuf.hash.fdir.hi;
> > + return true;
> > + }
> > +#endif
> > + return false;
> > +}
> > +
> > enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch.
> > */
> >
> > struct dp_packet_batch {
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > a514de8..e16f5ce
> > 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -2013,6 +2013,22 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
> > }
> > }
> >
> > +static struct dp_netdev_flow *
> > +mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd,
> > + const uint32_t mark) {
> > + struct dp_netdev_flow *flow;
> > +
> > + CMAP_FOR_EACH_WITH_HASH (flow, mark_node, mark,
> > &flow_mark.mark_to_flow) {
> > + if (flow->mark == mark && flow->pmd_id == pmd->core_id &&
> > + flow->dead == false) {
> > + return flow;
> > + }
> > + }
> > +
> > + return NULL;
> > +}
> > +
> > static void
> > dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
> > struct dp_netdev_flow *flow) @@ -5203,10
> > +5219,10 @@ struct packet_batch_per_flow { static inline void
> > packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
> > struct dp_packet *packet,
> > - const struct miniflow *mf)
> > + uint16_t tcp_flags)
> > {
> > batch->byte_count += dp_packet_size(packet);
> > - batch->tcp_flags |= miniflow_get_tcp_flags(mf);
> > + batch->tcp_flags |= tcp_flags;
> > batch->array.packets[batch->array.count++] = packet; }
> >
> > @@ -5240,7 +5256,7 @@ packet_batch_per_flow_execute(struct
> > packet_batch_per_flow *batch,
> >
> > static inline void
> > dp_netdev_queue_batches(struct dp_packet *pkt,
> > - struct dp_netdev_flow *flow, const struct
> > miniflow *mf,
> > + struct dp_netdev_flow *flow, uint16_t
> > + tcp_flags,
> > struct packet_batch_per_flow *batches,
> > size_t *n_batches) { @@ -5251,7 +5267,7 @@
> > dp_netdev_queue_batches(struct dp_packet *pkt,
> > packet_batch_per_flow_init(batch, flow);
> > }
> >
> > - packet_batch_per_flow_update(batch, pkt, mf);
> > + packet_batch_per_flow_update(batch, pkt, tcp_flags);
> > }
> >
> > /* Try to process all ('cnt') the 'packets' using only the exact
> > match cache @@ -5282,6 +5298,7 @@ emc_processing(struct
> > dp_netdev_pmd_thread *pmd,
> > const size_t cnt = dp_packet_batch_size(packets_);
> > uint32_t cur_min;
> > int i;
> > + uint16_t tcp_flags;
> >
> > atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
> > pmd_perf_update_counter(&pmd->perf_stats,
> > @@ -5290,6 +5307,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
> >
> > DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) {
> > struct dp_netdev_flow *flow;
> > + uint32_t flow_mark;
>
> This will cause a compilation failure
>
> lib/dpif-netdev.c:5310:18: error: declaration of 'flow_mark' shadows a
> global declaration [-Werror=shadow]
> uint32_t flow_mark;
> ^
> lib/dpif-netdev.c:1860:25: error: shadowed declaration is here [-
> Werror=shadow] static struct flow_mark flow_mark = {
>
> >
> > if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
> > dp_packet_delete(packet); @@ -5297,6 +5315,16 @@
> > emc_processing(struct dp_netdev_pmd_thread *pmd,
> > continue;
> > }
> >
> > + if (dp_packet_has_flow_mark(packet, &flow_mark)) {
> > + flow = mark_to_flow_find(pmd, flow_mark);
> > + if (flow) {
> > + tcp_flags = parse_tcp_flags(packet);
> > + dp_netdev_queue_batches(packet, flow, tcp_flags,
> batches,
> > + n_batches);
> > + continue;
> > + }
> > + }
> > +
> > if (i != cnt - 1) {
> > struct dp_packet **packets = packets_->packets;
> > /* Prefetch next packet data and metadata. */ @@ -5322,7
> > +5350,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
> > flow = NULL;
> > }
> > if (OVS_LIKELY(flow)) {
> > - dp_netdev_queue_batches(packet, flow, &key->mf, batches,
> > + tcp_flags = miniflow_get_tcp_flags(&key->mf);
> > + dp_netdev_queue_batches(packet, flow, tcp_flags, batches,
> > n_batches);
> > } else {
> > /* Exact match cache missed. Group missed packets
> > together at @@ -5501,7 +5530,9 @@ fast_path_processing(struct
> > dp_netdev_pmd_thread *pmd,
> > flow = dp_netdev_flow_cast(rules[i]);
> >
> > emc_probabilistic_insert(pmd, &keys[i], flow);
> > - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches,
> > n_batches);
> > + dp_netdev_queue_batches(packet, flow,
> > + miniflow_get_tcp_flags(&keys[i].mf),
> > + batches, n_batches);
> > }
> >
> > pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
> > diff - -git a/lib/flow.c b/lib/flow.c index f9d7c2a..80718d4 100644
> > --- a/lib/flow.c
> > +++ b/lib/flow.c
> > @@ -624,6 +624,70 @@ flow_extract(struct dp_packet *packet, struct
> > flow
> > *flow)
> > miniflow_expand(&m.mf, flow);
> > }
> >
>
> I'm not a fan of the sanity checks for IPv4,IPv6,fragementaion etc. being
> included in this patch.
>
> I understand the motivation of doing this but nowhere is it mentioned in
> the commit message that these changes occur.
>
> As the miniflow extract function is an important function that affects
> users regardless of HWOL usage, I think these changes should be applied in
> a separate patch and called out clearly in the commit so that users are
> aware.
>
> > +static inline bool
> > +ipv4_sanity_check(const struct ip_header *nh, size_t size,
> > + int *ip_lenp, uint16_t *tot_lenp) {
> > + int ip_len;
> > + uint16_t tot_len;
> > +
> > + if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
> > + return false;
> > + }
> > + ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
> > +
> > + if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN || size < ip_len)) {
> > + return false;
> > + }
> > +
> > + tot_len = ntohs(nh->ip_tot_len);
> > + if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len ||
> > + size - tot_len > UINT8_MAX)) {
> > + return false;
> > + }
> > +
> > + *ip_lenp = ip_len;
> > + *tot_lenp = tot_len;
> > +
> > + return true;
> > +}
> > +
> > +static inline uint8_t
> > +ipv4_get_nw_frag(const struct ip_header *nh) {
> > + uint8_t nw_frag = 0;
> > +
> > + if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
> > + nw_frag = FLOW_NW_FRAG_ANY;
> > + if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
> > + nw_frag |= FLOW_NW_FRAG_LATER;
> > + }
> > + }
> > +
> > + return nw_frag;
> > +}
> > +
> > +static inline bool
> > +ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t
> > +size) {
> > + uint16_t plen;
> > +
> > + if (OVS_UNLIKELY(size < sizeof *nh)) {
> > + return false;
> > + }
> > +
> > + plen = ntohs(nh->ip6_plen);
> > + if (OVS_UNLIKELY(plen > size)) {
> > + return false;
> > + }
> > + /* Jumbo Payload option not supported yet. */
> > + if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {
> > + return false;
> > + }
> > +
> > + return true;
> > +}
> > +
> > /* Caller is responsible for initializing 'dst' with enough storage for
> > * FLOW_U64S * 8 bytes. */
> > void
> > @@ -748,22 +812,7 @@ miniflow_extract(struct dp_packet *packet, struct
> > miniflow *dst)
> > int ip_len;
> > uint16_t tot_len;
> >
> > - if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
> > - goto out;
> > - }
> > - ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
> > -
> > - if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {
> > - goto out;
> > - }
> > - if (OVS_UNLIKELY(size < ip_len)) {
> > - goto out;
> > - }
> > - tot_len = ntohs(nh->ip_tot_len);
> > - if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len)) {
> > - goto out;
> > - }
> > - if (OVS_UNLIKELY(size - tot_len > UINT8_MAX)) {
> > + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len,
> > + &tot_len))) {
> > goto out;
> > }
> > dp_packet_set_l2_pad_size(packet, size - tot_len); @@ -786,31
> > +835,19 @@ miniflow_extract(struct dp_packet *packet, struct miniflow
> > *dst)
> > nw_tos = nh->ip_tos;
> > nw_ttl = nh->ip_ttl;
> > nw_proto = nh->ip_proto;
> > - if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
> > - nw_frag = FLOW_NW_FRAG_ANY;
> > - if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
> > - nw_frag |= FLOW_NW_FRAG_LATER;
> > - }
> > - }
> > + nw_frag = ipv4_get_nw_frag(nh);
> > data_pull(&data, &size, ip_len);
> > } else if (dl_type == htons(ETH_TYPE_IPV6)) {
> > - const struct ovs_16aligned_ip6_hdr *nh;
> > + const struct ovs_16aligned_ip6_hdr *nh = data;
> > ovs_be32 tc_flow;
> > uint16_t plen;
> >
> > - if (OVS_UNLIKELY(size < sizeof *nh)) {
> > + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
> > goto out;
> > }
> > - nh = data_pull(&data, &size, sizeof *nh);
> > + data_pull(&data, &size, sizeof *nh);
> >
> > plen = ntohs(nh->ip6_plen);
> > - if (OVS_UNLIKELY(plen > size)) {
> > - goto out;
> > - }
> > - /* Jumbo Payload option not supported yet. */
> > - if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {
> > - goto out;
> > - }
> > dp_packet_set_l2_pad_size(packet, size - plen);
> > size = plen; /* Never pull padding. */
> >
> > @@ -982,6 +1019,60 @@ parse_dl_type(const struct eth_header *data_,
> > size_t
> > size)
> > return parse_ethertype(&data, &size); }
> >
> > +uint16_t
> > +parse_tcp_flags(struct dp_packet *packet) {
> > + const void *data = dp_packet_data(packet);
> > + size_t size = dp_packet_size(packet);
> > + ovs_be16 dl_type;
> > + uint8_t nw_frag = 0, nw_proto = 0;
> > +
> > + if (packet->packet_type != htonl(PT_ETH)) {
> > + return 0;
> > + }
> > +
> > + data_pull(&data, &size, ETH_ADDR_LEN * 2);
> > + dl_type = parse_ethertype(&data, &size);
> > + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
> > + const struct ip_header *nh = data;
> > + int ip_len;
> > + uint16_t tot_len;
> > +
> > + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len,
> > &tot_len))) {
> > + return 0;
> > + }
> > + nw_proto = nh->ip_proto;
> > + nw_frag = ipv4_get_nw_frag(nh);
> > +
> > + size = tot_len; /* Never pull padding. */
> > + data_pull(&data, &size, ip_len);
> > + } else if (dl_type == htons(ETH_TYPE_IPV6)) {
> > + const struct ovs_16aligned_ip6_hdr *nh = data;
> > +
> > + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
> > + return 0;
> > + }
> > + data_pull(&data, &size, sizeof *nh);
> > +
> > + size = ntohs(nh->ip6_plen); /* Never pull padding. */
> > + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag))
> {
> > + return 0;
> > + }
> > + nw_proto = nh->ip6_nxt;
> > + } else {
> > + return 0;
> > + }
> > +
> > + if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP &&
> > + size >= TCP_HEADER_LEN) {
> > + const struct tcp_header *tcp = data;
> > +
> > + return TCP_FLAGS_BE32(tcp->tcp_ctl);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > /* For every bit of a field that is wildcarded in 'wildcards', sets the
> > * corresponding bit in 'flow' to zero. */ void diff --git
> > a/lib/flow.h b/lib/flow.h index eb1e2bf..6be4199 100644
> > --- a/lib/flow.h
> > +++ b/lib/flow.h
> > @@ -130,6 +130,7 @@ bool parse_ipv6_ext_hdrs(const void **datap,
> > size_t *sizep, uint8_t *nw_proto,
> > uint8_t *nw_frag);
> > ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size);
> > bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh
> > *key);
> > +uint16_t parse_tcp_flags(struct dp_packet *packet);
> >
> > static inline uint64_t
> > flow_get_xreg(const struct flow *flow, int idx)
> > --
> > 2.7.4
More information about the dev
mailing list