[ovs-dev] [PATCH v3] ipv6 tunneling: Fix for performance drop introduced by ipv6 tunnel support.

Daniele Di Proietto diproiettod at vmware.com
Fri Jan 29 02:00:59 UTC 2016


Hi Sugesh,

Sorry for the delay, I've been trying to test different approaches.

I tested the patch and I see that the performance is up again. I'm not
sure whether introducing the 'dl_type' flag is going to be very easy to
maintain, since we have to remember to update when we touch the structure.
Unfortunately that structure is used everywhere in OVS, not just in the
fast path.

I have prepared a workaround to speed up the metadata initialization here:

http://openvswitch.org/pipermail/dev/2016-January/065263.html

In my setup it restores the throughput to the same numbers. It complicates
the code slightly, but the modifications are local to dpif-netdev.

What do you think?

Thanks,

Daniele

On 19/01/2016 09:23, "Sugesh Chandran" <sugesh.chandran at intel.com> wrote:

>Adding a new field called dl_type in flow tunnel structure to verify the
>validity
>of tunnel metadata. This field avoids the need of resetting and
>validating the
>entire ipv4/ipv6 tunnel destination address which caused a serious
>performance
>drop.
>
>Fixes: 3ae91c019019 ("tunneling: add IPv6 support to
>netdev_tunnel_config")
>Signed-off-by: Sugesh Chandran <sugesh.chandran at intel.com>
>---
> lib/flow.c                   |  7 ++---
> lib/match.c                  | 12 ++++----
> lib/meta-flow.c              | 10 +++++--
> lib/netdev-vport.c           | 10 +++----
> lib/odp-util.c               | 10 +++----
> lib/packets.c                |  7 ++++-
> lib/packets.h                | 69
>++++++++++++++++++++++++++++++++------------
> ofproto/ofproto-dpif-ipfix.c |  3 +-
> ofproto/ofproto-dpif-rid.c   |  3 +-
> ofproto/ofproto-dpif-sflow.c |  6 ++--
> ofproto/tunnel.c             | 12 ++++----
> 11 files changed, 96 insertions(+), 53 deletions(-)
>
>diff --git a/lib/flow.c b/lib/flow.c
>index 5668d0c..33aadd1 100644
>--- a/lib/flow.c
>+++ b/lib/flow.c
>@@ -818,15 +818,14 @@ flow_get_metadata(const struct flow *flow, struct
>match *flow_metadata)
>     if (flow->tunnel.ip_src) {
>         match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
>     }
>-    if (flow->tunnel.ip_dst) {
>+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>         match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
>+    } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
>+        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
>     }
>     if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
>         match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
>     }
>-    if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
>-        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
>-    }
>     if (flow->tunnel.gbp_id != htons(0)) {
>         match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
>     }
>diff --git a/lib/match.c b/lib/match.c
>index 95d34bc..97a623e 100644
>--- a/lib/match.c
>+++ b/lib/match.c
>@@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32 dst)
> void
> match_set_tun_dst_masked(struct match *match, ovs_be32 dst, ovs_be32
>mask)
> {
>-    match->wc.masks.tunnel.ip_dst = mask;
>-    match->flow.tunnel.ip_dst = dst & mask;
>+    set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask);
>+    set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask);
> }
> 
> void
>@@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match *match,
>const struct in6_addr *src,
> void
> match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst)
> {
>-    match->flow.tunnel.ipv6_dst = *dst;
>-    match->wc.masks.tunnel.ipv6_dst = in6addr_exact;
>+    set_ipv6_dst_tnl(&match->flow.tunnel, *dst);
>+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact);
> }
> 
> void
> match_set_tun_ipv6_dst_masked(struct match *match, const struct in6_addr
>*dst,
>                               const struct in6_addr *mask)
> {
>-    match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask);
>-    match->wc.masks.tunnel.ipv6_dst = *mask;
>+    set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask));
>+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask);
> }
> 
> void
>diff --git a/lib/meta-flow.c b/lib/meta-flow.c
>index 6bd0b99..9d939be 100644
>--- a/lib/meta-flow.c
>+++ b/lib/meta-flow.c
>@@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf,
>         flow->tunnel.ip_src = value->be32;
>         break;
>     case MFF_TUN_DST:
>-        flow->tunnel.ip_dst = value->be32;
>+        set_ipv4_dst_tnl(&flow->tunnel, value->be32);
>         break;
>     case MFF_TUN_IPV6_SRC:
>         flow->tunnel.ipv6_src = value->ipv6;
>         break;
>     case MFF_TUN_IPV6_DST:
>-        flow->tunnel.ipv6_dst = value->ipv6;
>+        set_ipv6_dst_tnl(&flow->tunnel, value->ipv6);
>         break;
>     case MFF_TUN_FLAGS:
>         flow->tunnel.flags = (flow->tunnel.flags & ~FLOW_TNL_PUB_F_MASK)
>|
>@@ -1503,6 +1503,12 @@ mf_set_wild(const struct mf_field *mf, struct
>match *match, char **err_str)
>                sizeof match->wc.masks.tunnel.ipv6_dst);
>         memset(&match->flow.tunnel.ipv6_dst, 0,
>                sizeof match->flow.tunnel.ipv6_dst);
>+        /* What if flow have a valid ipv4 tunnel data??
>+         * Reset the dl_type only if thats not the case.
>+         */
>+        match->wc.masks.tunnel.dl_type = (match->wc.masks.tunnel.dl_type
>==
>+                                          htons(ETH_TYPE_IP)) ?
>+                                          match->wc.masks.tunnel.dl_type
>: 0;
>         break;
>     case MFF_TUN_FLAGS:
>         match_set_tun_flags_masked(match, 0, 0);
>diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
>index 88f5022..94ddf59 100644
>--- a/lib/netdev-vport.c
>+++ b/lib/netdev-vport.c
>@@ -877,7 +877,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct
>flow_tnl *tnl,
> {
>     void *nh;
>     struct ip_header *ip;
>-    struct ovs_16aligned_ip6_hdr *ip6;
>+    struct ip6_hdr *ip6;
>     void *l4;
>     int l3_size;
> 
>@@ -919,19 +919,19 @@ ip_extract_tnl_md(struct dp_packet *packet, struct
>flow_tnl *tnl,
>         ip_dst = get_16aligned_be32(&ip->ip_dst);
> 
>         tnl->ip_src = ip_src;
>-        tnl->ip_dst = ip_dst;
>         tnl->ip_tos = ip->ip_tos;
>         tnl->ip_ttl = ip->ip_ttl;
>+        set_ipv4_dst_tnl(tnl, ip_dst);
> 
>         *hlen += IP_HEADER_LEN;
> 
>     } else if (IP_VER(ip->ip_ihl_ver) == 6) {
> 
>-        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof
>ip6->ip6_src);
>-        memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof
>ip6->ip6_dst);
>+        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.s6_addr,
>+               sizeof ip6->ip6_src);
>         tnl->ip_tos = 0;
>         tnl->ip_ttl = ip6->ip6_hlim;
>-
>+        set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst);
>         *hlen += IPV6_HEADER_LEN;
> 
>     } else {
>diff --git a/lib/odp-util.c b/lib/odp-util.c
>index f16e113..309bb9c 100644
>--- a/lib/odp-util.c
>+++ b/lib/odp-util.c
>@@ -510,7 +510,7 @@ format_odp_tnl_push_header(struct ds *ds, struct
>ovs_action_push_tnl *data)
>                       gnh->oam ? "oam," : "",
>                       gnh->critical ? "crit," : "",
>                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
>- 
>+
>         if (gnh->opt_len) {
>             ds_put_cstr(ds, ",options(");
>             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
>@@ -1864,13 +1864,13 @@ odp_tun_key_from_attr__(const struct nlattr *attr,
>             tun->ip_src = nl_attr_get_be32(a);
>             break;
>         case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
>-            tun->ip_dst = nl_attr_get_be32(a);
>+            set_ipv4_dst_tnl(tun, nl_attr_get_be32(a));
>             break;
>         case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
>             tun->ipv6_src = nl_attr_get_in6_addr(a);
>             break;
>         case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
>-            tun->ipv6_dst = nl_attr_get_in6_addr(a);
>+            set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a));
>             break;
>         case OVS_TUNNEL_KEY_ATTR_TOS:
>             tun->ip_tos = nl_attr_get_u8(a);
>@@ -1961,13 +1961,13 @@ tun_key_to_attr(struct ofpbuf *a, const struct
>flow_tnl *tun_key,
>     if (tun_key->ip_src) {
>         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
>tun_key->ip_src);
>     }
>-    if (tun_key->ip_dst) {
>+    if (tun_key->dl_type == htons(ETH_TYPE_IP)) {
>         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
>tun_key->ip_dst);
>     }
>     if (ipv6_addr_is_set(&tun_key->ipv6_src)) {
>         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
>&tun_key->ipv6_src);
>     }
>-    if (ipv6_addr_is_set(&tun_key->ipv6_dst)) {
>+    if (tun_key->dl_type == htons(ETH_TYPE_IPV6)) {
>         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
>&tun_key->ipv6_dst);
>     }
>     if (tun_key->ip_tos) {
>diff --git a/lib/packets.c b/lib/packets.c
>index d82341d..4f00c0e 100644
>--- a/lib/packets.c
>+++ b/lib/packets.c
>@@ -39,7 +39,12 @@ const struct in6_addr in6addr_all_hosts =
>IN6ADDR_ALL_HOSTS_INIT;
> struct in6_addr
> flow_tnl_dst(const struct flow_tnl *tnl)
> {
>-    return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) :
>tnl->ipv6_dst;
>+    if (tnl->dl_type == htons(ETH_TYPE_IP)) {
>+        return in6_addr_mapped_ipv4(tnl->ip_dst);
>+    } else if (tnl->dl_type == htons(ETH_TYPE_IPV6)) {
>+        return tnl->ipv6_dst;
>+    }
>+    return in6addr_any;
> }
> 
> struct in6_addr
>diff --git a/lib/packets.h b/lib/packets.h
>index 834e8a4..7e8dd91 100644
>--- a/lib/packets.h
>+++ b/lib/packets.h
>@@ -35,8 +35,26 @@
> struct dp_packet;
> struct ds;
> 
>+#define ETH_TYPE_IP            0x0800
>+#define ETH_TYPE_ARP           0x0806
>+#define ETH_TYPE_TEB           0x6558
>+#define ETH_TYPE_VLAN_8021Q    0x8100
>+#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
>+#define ETH_TYPE_VLAN_8021AD   0x88a8
>+#define ETH_TYPE_IPV6          0x86dd
>+#define ETH_TYPE_LACP          0x8809
>+#define ETH_TYPE_RARP          0x8035
>+#define ETH_TYPE_MPLS          0x8847
>+#define ETH_TYPE_MPLS_MCAST    0x8848
>+
> /* Tunnel information used in flow key and metadata. */
> struct flow_tnl {
>+    /* The tunnel destination ip/ipv6 address are not initializing to
>zero
>+     * due to the performance constrains. the ethernet type field
>'dl_type' is
>+     * used to validate the destination addresses. 'dl_type' is set to
>zero
>+     * for a invalid flow_tnl entry.
>+     */
>+    ovs_be16 dl_type;
>     ovs_be32 ip_dst;
>     struct in6_addr ipv6_dst;
>     ovs_be32 ip_src;
>@@ -49,7 +67,7 @@ struct flow_tnl {
>     ovs_be16 tp_dst;
>     ovs_be16 gbp_id;
>     uint8_t  gbp_flags;
>-    uint8_t  pad1[5];        /* Pad to 64 bits. */
>+    uint8_t  pad1[1];        /* Pad to 64 bits. */
>     struct tun_metadata metadata;
> };
> 
>@@ -76,10 +94,36 @@ struct flow_tnl {
> 
> static inline bool ipv6_addr_is_set(const struct in6_addr *addr);
> 
>+static inline void
>+set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst)
>+{
>+    if (tnl) {
>+        tnl->ip_dst = ip_dst;
>+        if (tnl->ip_dst) {
>+            tnl->dl_type = htons(ETH_TYPE_IP);
>+        } else {
>+            tnl->dl_type = 0;
>+        }
>+    }
>+}
>+
>+static inline void
>+set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst)
>+{
>+    if (tnl) {
>+        tnl->ipv6_dst = ipv6_dst;
>+        if (ipv6_addr_is_set(&tnl->ipv6_dst)) {
>+            tnl->dl_type = htons(ETH_TYPE_IPV6);
>+        } else {
>+            tnl->dl_type = 0;
>+        }
>+    }
>+}
>+
> static inline bool
> flow_tnl_dst_is_set(const struct flow_tnl *tnl)
> {
>-    return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
>+    return (tnl->dl_type != 0);
> }
> 
> struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl);
>@@ -90,8 +134,8 @@ static inline size_t
> flow_tnl_size(const struct flow_tnl *src)
> {
>     if (!flow_tnl_dst_is_set(src)) {
>-        /* Covers ip_dst and ipv6_dst only. */
>-        return offsetof(struct flow_tnl, ip_src);
>+        /* Covers dl_type field only. */
>+        return offsetof(struct flow_tnl, ip_dst);
>     }
>     if (src->flags & FLOW_TNL_F_UDPIF) {
>         /* Datapath format, cover all options we have. */
>@@ -154,11 +198,10 @@ static inline void
> pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
> {
>     /* It can be expensive to zero out all of the tunnel metadata.
>However,
>-     * we can just zero out ip_dst and the rest of the data will never be
>+     * we can just zero out dl_type and the rest of the data will never
>be
>      * looked at. */
>     memset(md, 0, offsetof(struct pkt_metadata, in_port));
>-    md->tunnel.ip_dst = 0;
>-    md->tunnel.ipv6_dst = in6addr_any;
>+    md->tunnel.dl_type = 0;
> 
>     md->in_port.odp_port = port;
> }
>@@ -347,18 +390,6 @@ ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t
>tc, uint8_t bos,
> #define ETH_ADDR_SCAN_ARGS(EA) \
>     &(EA).ea[0], &(EA).ea[1], &(EA).ea[2], &(EA).ea[3], &(EA).ea[4],
>&(EA).ea[5]
> 
>-#define ETH_TYPE_IP            0x0800
>-#define ETH_TYPE_ARP           0x0806
>-#define ETH_TYPE_TEB           0x6558
>-#define ETH_TYPE_VLAN_8021Q    0x8100
>-#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
>-#define ETH_TYPE_VLAN_8021AD   0x88a8
>-#define ETH_TYPE_IPV6          0x86dd
>-#define ETH_TYPE_LACP          0x8809
>-#define ETH_TYPE_RARP          0x8035
>-#define ETH_TYPE_MPLS          0x8847
>-#define ETH_TYPE_MPLS_MCAST    0x8848
>-
> static inline bool eth_type_mpls(ovs_be16 eth_type)
> {
>     return eth_type == htons(ETH_TYPE_MPLS) ||
>diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
>index a610c53..ca06583 100644
>--- a/ofproto/ofproto-dpif-ipfix.c
>+++ b/ofproto/ofproto-dpif-ipfix.c
>@@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di,
>const struct dp_packet *packet,
>      * of matched packets. */
>     packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
>     if (di->bridge_exporter.options->enable_tunnel_sampling) {
>-        if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
>+        if (output_odp_port == ODPP_NONE &&
>+                flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>             /* Input tunnel. */
>             tunnel_key = &flow->tunnel;
>             tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
>diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
>index d142933..735b5f3 100644
>--- a/ofproto/ofproto-dpif-rid.c
>+++ b/ofproto/ofproto-dpif-rid.c
>@@ -297,8 +297,7 @@ uint32_t
> recirc_alloc_id(struct ofproto_dpif *ofproto)
> {
>     struct flow_tnl tunnel;
>-    tunnel.ip_dst = htonl(0);
>-    tunnel.ipv6_dst = in6addr_any;
>+    tunnel.dl_type = 0;
>     struct recirc_state state = {
>         .table_id = TBL_INTERNAL,
>         .ofproto = ofproto,
>diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
>index f11699c..ea21a1a 100644
>--- a/ofproto/ofproto-dpif-sflow.c
>+++ b/ofproto/ofproto-dpif-sflow.c
>@@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr *attr,
>     /* IPv4 */
>     /* Cannot assume alignment so just use memcpy. */
>     sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
>-    sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
>+    set_ipv4_dst_tnl(&sflow_actions->tunnel,
>get_16aligned_be32(&ip->ip_dst));
>     sflow_actions->tunnel.ip_tos = ip->ip_tos;
>     sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
>     /* The tnl_push action can supply the ip_protocol too. */
>@@ -991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr,
>                 sflow_actions->tunnel.ip_src = key->ipv4_src;
>             }
>             if (key->ipv4_dst) {
>-                sflow_actions->tunnel.ip_dst = key->ipv4_dst;
>+                set_ipv4_dst_tnl(&sflow_actions->tunnel, key->ipv4_dst);
>             }
>             if (key->ipv4_proto) {
>                 sflow_actions->tunnel_ipproto = key->ipv4_proto;
>@@ -1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const
>struct dp_packet *packet,
>     fs.output = cookie->sflow.output;
> 
>     /* Input tunnel. */
>-    if (flow->tunnel.ip_dst) {
>+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
> 	memset(&tnlInElem, 0, sizeof(tnlInElem));
> 	tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
> 	tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
>diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
>index 5cf6c75..b2bd254 100644
>--- a/ofproto/tunnel.c
>+++ b/ofproto/tunnel.c
>@@ -362,12 +362,12 @@ tnl_wc_init(struct flow *flow, struct
>flow_wildcards *wc)
> {
>     if (tnl_port_should_receive(flow)) {
>         wc->masks.tunnel.tun_id = OVS_BE64_MAX;
>-        if (flow->tunnel.ip_dst) {
>+        if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>             wc->masks.tunnel.ip_src = OVS_BE32_MAX;
>-            wc->masks.tunnel.ip_dst = OVS_BE32_MAX;
>-        } else {
>+            set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX);
>+        } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
>             wc->masks.tunnel.ipv6_src = in6addr_exact;
>-            wc->masks.tunnel.ipv6_dst = in6addr_exact;
>+            set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact);
>         }
>         wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT |
>                                   FLOW_TNL_F_CSUM |
>@@ -424,7 +424,9 @@ tnl_port_send(const struct ofport_dpif *ofport,
>struct flow *flow,
>     if (!cfg->ip_dst_flow) {
>         flow->tunnel.ip_dst =
>in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst);
>         if (!flow->tunnel.ip_dst) {
>-            flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst;
>+            set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst);
>+        } else {
>+            set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst);
>         }
>     }
>     flow->pkt_mark = tnl_port->match.pkt_mark;
>-- 
>1.9.1
>
>_______________________________________________
>dev mailing list
>dev at openvswitch.org
>http://openvswitch.org/mailman/listinfo/dev




More information about the dev mailing list