[ovs-dev] [PATCH v2] ipv6 tunneling: Fix for performance drop introduced by ipv6 tunnel support.

Sugesh Chandran sugesh.chandran at intel.com
Thu Jan 14 13:27:45 UTC 2016


Adding a new field called dl_type in flow tunnel structure to verify the validity
of tunnel metadata. This field avoids the need of resetting and validating the
entire ipv4/ipv6 tunnel destination address which caused a serious performance
drop.

Fixes: 3ae91c019019 ("tunneling: add IPv6 support to netdev_tunnel_config")
Signed-off-by: Sugesh Chandran <sugesh.chandran at intel.com>
---
 lib/flow.c                   |  7 ++---
 lib/match.c                  | 12 ++++----
 lib/meta-flow.c              | 10 +++++--
 lib/netdev-vport.c           | 10 +++----
 lib/odp-util.c               | 10 +++----
 lib/packets.c                |  7 ++++-
 lib/packets.h                | 69 ++++++++++++++++++++++++++++++++------------
 ofproto/ofproto-dpif-ipfix.c |  3 +-
 ofproto/ofproto-dpif-rid.c   |  3 +-
 ofproto/ofproto-dpif-sflow.c |  6 ++--
 ofproto/tunnel.c             | 12 ++++----
 11 files changed, 96 insertions(+), 53 deletions(-)

diff --git a/lib/flow.c b/lib/flow.c
index 5668d0c..f9a645b 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -818,15 +818,14 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
     if (flow->tunnel.ip_src) {
         match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
     }
-    if (flow->tunnel.ip_dst) {
+    if (flow->tunnel.dl_type == ETH_TYPE_IP) {
         match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
+    } else if (flow->tunnel.dl_type == ETH_TYPE_IPV6) {
+        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
     }
     if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
         match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
     }
-    if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
-        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
-    }
     if (flow->tunnel.gbp_id != htons(0)) {
         match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
     }
diff --git a/lib/match.c b/lib/match.c
index 95d34bc..97a623e 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32 dst)
 void
 match_set_tun_dst_masked(struct match *match, ovs_be32 dst, ovs_be32 mask)
 {
-    match->wc.masks.tunnel.ip_dst = mask;
-    match->flow.tunnel.ip_dst = dst & mask;
+    set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask);
+    set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask);
 }
 
 void
@@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match *match, const struct in6_addr *src,
 void
 match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst)
 {
-    match->flow.tunnel.ipv6_dst = *dst;
-    match->wc.masks.tunnel.ipv6_dst = in6addr_exact;
+    set_ipv6_dst_tnl(&match->flow.tunnel, *dst);
+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact);
 }
 
 void
 match_set_tun_ipv6_dst_masked(struct match *match, const struct in6_addr *dst,
                               const struct in6_addr *mask)
 {
-    match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask);
-    match->wc.masks.tunnel.ipv6_dst = *mask;
+    set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask));
+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask);
 }
 
 void
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index 6bd0b99..7c854cf 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf,
         flow->tunnel.ip_src = value->be32;
         break;
     case MFF_TUN_DST:
-        flow->tunnel.ip_dst = value->be32;
+        set_ipv4_dst_tnl(&flow->tunnel, value->be32);
         break;
     case MFF_TUN_IPV6_SRC:
         flow->tunnel.ipv6_src = value->ipv6;
         break;
     case MFF_TUN_IPV6_DST:
-        flow->tunnel.ipv6_dst = value->ipv6;
+        set_ipv6_dst_tnl(&flow->tunnel, value->ipv6);
         break;
     case MFF_TUN_FLAGS:
         flow->tunnel.flags = (flow->tunnel.flags & ~FLOW_TNL_PUB_F_MASK) |
@@ -1503,6 +1503,12 @@ mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str)
                sizeof match->wc.masks.tunnel.ipv6_dst);
         memset(&match->flow.tunnel.ipv6_dst, 0,
                sizeof match->flow.tunnel.ipv6_dst);
+        /* What if flow have a valid ipv4 tunnel data??
+         * Reset the dl_type only if thats not the case.
+         */
+        match->wc.masks.tunnel.dl_type = (match->wc.masks.tunnel.dl_type ==
+                                          ETH_TYPE_IP) ?
+                                          match->wc.masks.tunnel.dl_type : 0;
         break;
     case MFF_TUN_FLAGS:
         match_set_tun_flags_masked(match, 0, 0);
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 88f5022..94ddf59 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -877,7 +877,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
 {
     void *nh;
     struct ip_header *ip;
-    struct ovs_16aligned_ip6_hdr *ip6;
+    struct ip6_hdr *ip6;
     void *l4;
     int l3_size;
 
@@ -919,19 +919,19 @@ ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
         ip_dst = get_16aligned_be32(&ip->ip_dst);
 
         tnl->ip_src = ip_src;
-        tnl->ip_dst = ip_dst;
         tnl->ip_tos = ip->ip_tos;
         tnl->ip_ttl = ip->ip_ttl;
+        set_ipv4_dst_tnl(tnl, ip_dst);
 
         *hlen += IP_HEADER_LEN;
 
     } else if (IP_VER(ip->ip_ihl_ver) == 6) {
 
-        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src);
-        memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst);
+        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.s6_addr,
+               sizeof ip6->ip6_src);
         tnl->ip_tos = 0;
         tnl->ip_ttl = ip6->ip6_hlim;
-
+        set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst);
         *hlen += IPV6_HEADER_LEN;
 
     } else {
diff --git a/lib/odp-util.c b/lib/odp-util.c
index f16e113..b6611da 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -510,7 +510,7 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
                       gnh->oam ? "oam," : "",
                       gnh->critical ? "crit," : "",
                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
- 
+
         if (gnh->opt_len) {
             ds_put_cstr(ds, ",options(");
             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
@@ -1864,13 +1864,13 @@ odp_tun_key_from_attr__(const struct nlattr *attr,
             tun->ip_src = nl_attr_get_be32(a);
             break;
         case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
-            tun->ip_dst = nl_attr_get_be32(a);
+            set_ipv4_dst_tnl(tun, nl_attr_get_be32(a));
             break;
         case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
             tun->ipv6_src = nl_attr_get_in6_addr(a);
             break;
         case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
-            tun->ipv6_dst = nl_attr_get_in6_addr(a);
+            set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a));
             break;
         case OVS_TUNNEL_KEY_ATTR_TOS:
             tun->ip_tos = nl_attr_get_u8(a);
@@ -1961,13 +1961,13 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
     if (tun_key->ip_src) {
         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ip_src);
     }
-    if (tun_key->ip_dst) {
+    if (tun_key->dl_type == ETH_TYPE_IP) {
         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ip_dst);
     }
     if (ipv6_addr_is_set(&tun_key->ipv6_src)) {
         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, &tun_key->ipv6_src);
     }
-    if (ipv6_addr_is_set(&tun_key->ipv6_dst)) {
+    if (tun_key->dl_type == ETH_TYPE_IPV6) {
         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST, &tun_key->ipv6_dst);
     }
     if (tun_key->ip_tos) {
diff --git a/lib/packets.c b/lib/packets.c
index d82341d..94d1b42 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -39,7 +39,12 @@ const struct in6_addr in6addr_all_hosts = IN6ADDR_ALL_HOSTS_INIT;
 struct in6_addr
 flow_tnl_dst(const struct flow_tnl *tnl)
 {
-    return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) : tnl->ipv6_dst;
+    if (tnl->dl_type == ETH_TYPE_IP) {
+        return in6_addr_mapped_ipv4(tnl->ip_dst);
+    } else if (tnl->dl_type == ETH_TYPE_IPV6) {
+        return tnl->ipv6_dst;
+    }
+    return in6addr_any;
 }
 
 struct in6_addr
diff --git a/lib/packets.h b/lib/packets.h
index 834e8a4..d2b5f27 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -35,8 +35,26 @@
 struct dp_packet;
 struct ds;
 
+#define ETH_TYPE_IP            0x0800
+#define ETH_TYPE_ARP           0x0806
+#define ETH_TYPE_TEB           0x6558
+#define ETH_TYPE_VLAN_8021Q    0x8100
+#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
+#define ETH_TYPE_VLAN_8021AD   0x88a8
+#define ETH_TYPE_IPV6          0x86dd
+#define ETH_TYPE_LACP          0x8809
+#define ETH_TYPE_RARP          0x8035
+#define ETH_TYPE_MPLS          0x8847
+#define ETH_TYPE_MPLS_MCAST    0x8848
+
 /* Tunnel information used in flow key and metadata. */
 struct flow_tnl {
+    /* The tunnel destination ip/ipv6 address are not initializing to zero
+     * due to the performance constrains. the ethernet type field 'dl_type' is
+     * used to validate the destination addresses. 'dl_type' is set to zero
+     * for a invalid flow_tnl entry.
+     */
+    ovs_be16 dl_type;
     ovs_be32 ip_dst;
     struct in6_addr ipv6_dst;
     ovs_be32 ip_src;
@@ -49,7 +67,7 @@ struct flow_tnl {
     ovs_be16 tp_dst;
     ovs_be16 gbp_id;
     uint8_t  gbp_flags;
-    uint8_t  pad1[5];        /* Pad to 64 bits. */
+    uint8_t  pad1[1];        /* Pad to 64 bits. */
     struct tun_metadata metadata;
 };
 
@@ -76,10 +94,36 @@ struct flow_tnl {
 
 static inline bool ipv6_addr_is_set(const struct in6_addr *addr);
 
+static inline void
+set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst)
+{
+    if (tnl) {
+        tnl->ip_dst = ip_dst;
+        if (tnl->ip_dst) {
+            tnl->dl_type = ETH_TYPE_IP;
+        } else {
+            tnl->dl_type = 0;
+        }
+    }
+}
+
+static inline void
+set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst)
+{
+    if (tnl) {
+        tnl->ipv6_dst = ipv6_dst;
+        if (ipv6_addr_is_set(&tnl->ipv6_dst)) {
+            tnl->dl_type = ETH_TYPE_IPV6;
+        } else {
+            tnl->dl_type = 0;
+        }
+    }
+}
+
 static inline bool
 flow_tnl_dst_is_set(const struct flow_tnl *tnl)
 {
-    return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
+    return (tnl->dl_type != 0);
 }
 
 struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl);
@@ -90,8 +134,8 @@ static inline size_t
 flow_tnl_size(const struct flow_tnl *src)
 {
     if (!flow_tnl_dst_is_set(src)) {
-        /* Covers ip_dst and ipv6_dst only. */
-        return offsetof(struct flow_tnl, ip_src);
+        /* Covers dl_type field only. */
+        return offsetof(struct flow_tnl, ip_dst);
     }
     if (src->flags & FLOW_TNL_F_UDPIF) {
         /* Datapath format, cover all options we have. */
@@ -154,11 +198,10 @@ static inline void
 pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
 {
     /* It can be expensive to zero out all of the tunnel metadata. However,
-     * we can just zero out ip_dst and the rest of the data will never be
+     * we can just zero out dl_type and the rest of the data will never be
      * looked at. */
     memset(md, 0, offsetof(struct pkt_metadata, in_port));
-    md->tunnel.ip_dst = 0;
-    md->tunnel.ipv6_dst = in6addr_any;
+    md->tunnel.dl_type = 0;
 
     md->in_port.odp_port = port;
 }
@@ -347,18 +390,6 @@ ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos,
 #define ETH_ADDR_SCAN_ARGS(EA) \
     &(EA).ea[0], &(EA).ea[1], &(EA).ea[2], &(EA).ea[3], &(EA).ea[4], &(EA).ea[5]
 
-#define ETH_TYPE_IP            0x0800
-#define ETH_TYPE_ARP           0x0806
-#define ETH_TYPE_TEB           0x6558
-#define ETH_TYPE_VLAN_8021Q    0x8100
-#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
-#define ETH_TYPE_VLAN_8021AD   0x88a8
-#define ETH_TYPE_IPV6          0x86dd
-#define ETH_TYPE_LACP          0x8809
-#define ETH_TYPE_RARP          0x8035
-#define ETH_TYPE_MPLS          0x8847
-#define ETH_TYPE_MPLS_MCAST    0x8848
-
 static inline bool eth_type_mpls(ovs_be16 eth_type)
 {
     return eth_type == htons(ETH_TYPE_MPLS) ||
diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
index a610c53..e022fc1 100644
--- a/ofproto/ofproto-dpif-ipfix.c
+++ b/ofproto/ofproto-dpif-ipfix.c
@@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di, const struct dp_packet *packet,
      * of matched packets. */
     packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
     if (di->bridge_exporter.options->enable_tunnel_sampling) {
-        if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
+        if (output_odp_port == ODPP_NONE &&
+                flow->tunnel.dl_type == ETH_TYPE_IP) {
             /* Input tunnel. */
             tunnel_key = &flow->tunnel;
             tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
index d142933..735b5f3 100644
--- a/ofproto/ofproto-dpif-rid.c
+++ b/ofproto/ofproto-dpif-rid.c
@@ -297,8 +297,7 @@ uint32_t
 recirc_alloc_id(struct ofproto_dpif *ofproto)
 {
     struct flow_tnl tunnel;
-    tunnel.ip_dst = htonl(0);
-    tunnel.ipv6_dst = in6addr_any;
+    tunnel.dl_type = 0;
     struct recirc_state state = {
         .table_id = TBL_INTERNAL,
         .ofproto = ofproto,
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index f11699c..6cf6da5 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr *attr,
     /* IPv4 */
     /* Cannot assume alignment so just use memcpy. */
     sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
-    sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
+    set_ipv4_dst_tnl(&sflow_actions->tunnel, get_16aligned_be32(&ip->ip_dst));
     sflow_actions->tunnel.ip_tos = ip->ip_tos;
     sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
     /* The tnl_push action can supply the ip_protocol too. */
@@ -991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr,
                 sflow_actions->tunnel.ip_src = key->ipv4_src;
             }
             if (key->ipv4_dst) {
-                sflow_actions->tunnel.ip_dst = key->ipv4_dst;
+                set_ipv4_dst_tnl(&sflow_actions->tunnel, key->ipv4_dst);
             }
             if (key->ipv4_proto) {
                 sflow_actions->tunnel_ipproto = key->ipv4_proto;
@@ -1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
     fs.output = cookie->sflow.output;
 
     /* Input tunnel. */
-    if (flow->tunnel.ip_dst) {
+    if (flow->tunnel.dl_type == ETH_TYPE_IP) {
 	memset(&tnlInElem, 0, sizeof(tnlInElem));
 	tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
 	tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
index 5cf6c75..f031324 100644
--- a/ofproto/tunnel.c
+++ b/ofproto/tunnel.c
@@ -362,12 +362,12 @@ tnl_wc_init(struct flow *flow, struct flow_wildcards *wc)
 {
     if (tnl_port_should_receive(flow)) {
         wc->masks.tunnel.tun_id = OVS_BE64_MAX;
-        if (flow->tunnel.ip_dst) {
+        if (flow->tunnel.dl_type == ETH_TYPE_IP) {
             wc->masks.tunnel.ip_src = OVS_BE32_MAX;
-            wc->masks.tunnel.ip_dst = OVS_BE32_MAX;
-        } else {
+            set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX);
+        } else if (flow->tunnel.dl_type == ETH_TYPE_IPV6) {
             wc->masks.tunnel.ipv6_src = in6addr_exact;
-            wc->masks.tunnel.ipv6_dst = in6addr_exact;
+            set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact);
         }
         wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT |
                                   FLOW_TNL_F_CSUM |
@@ -424,7 +424,9 @@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
     if (!cfg->ip_dst_flow) {
         flow->tunnel.ip_dst = in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst);
         if (!flow->tunnel.ip_dst) {
-            flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst;
+            set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst);
+        } else {
+            set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst);
         }
     }
     flow->pkt_mark = tnl_port->match.pkt_mark;
-- 
1.9.1




More information about the dev mailing list