[ovs-dev] [PATCH v4 18/28] datapath: compat: Use dst-cache for Geneve and VxLAN tunnels.

Pravin B Shelar pshelar at ovn.org
Fri Jul 8 00:23:54 UTC 2016


It partialy backport commit:
    commit d71785ffc7e7cae3fbdc4ea8a9d05b7a1c59f7b8
    Author: Paolo Abeni <pabeni at redhat.com>

    net: add dst_cache to ovs vxlan lwtunnel

    In case of UDP traffic with datagram length
    below MTU this give about 2% performance increase
    when tunneling over ipv4 and about 60% when tunneling
    over ipv6

    Signed-off-by: Paolo Abeni <pabeni at redhat.com>
    Suggested-and-acked-by: Hannes Frederic Sowa <hannes at stressinduktion.org>
    Signed-off-by: David S. Miller <davem at davemloft.net>

Bug fix commit db3c6139e6e ("bpf, vxlan, geneve, gre: fix usage of
dst_cache on xmit"). is also included. Geneve changes
were added in 468dfffcd762cbb2777ec5a76bc21e3748ebf47e ("geneve: add
dst caching support")

Signed-off-by: Pravin B Shelar <pshelar at ovn.org>
---
 datapath/flow_netlink.c                        |  5 +++
 datapath/linux/compat/geneve.c                 | 42 ++++++++++++++++++++++++++
 datapath/linux/compat/gso.h                    |  3 ++
 datapath/linux/compat/include/net/ip_tunnels.h | 18 +++++++++++
 datapath/linux/compat/include/net/vxlan.h      |  2 ++
 datapath/linux/compat/vxlan.c                  | 35 ++++++++++++++++++---
 6 files changed, 101 insertions(+), 4 deletions(-)

diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index cbfa233..b6020ab 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -1964,6 +1964,11 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	if (!tun_dst)
 		return -ENOMEM;
 
+	err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
+	if (err) {
+		dst_release((struct dst_entry *)tun_dst);
+		return err;
+	}
 	a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
 			 sizeof(*ovs_tun), log);
 	if (IS_ERR(a)) {
diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
index f5daefb..061ceb5 100644
--- a/datapath/linux/compat/geneve.c
+++ b/datapath/linux/compat/geneve.c
@@ -19,6 +19,7 @@
 #include <linux/if_vlan.h>
 
 #include <net/addrconf.h>
+#include <net/dst_cache.h>
 #include <net/dst_metadata.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -88,6 +89,7 @@ struct geneve_dev {
 	__be16		   dst_port;
 	bool		   collect_md;
 	u32		   flags;
+	struct dst_cache   dst_cache;
 };
 
 /* Geneve device flags */
@@ -301,15 +303,27 @@ drop:
 /* Setup stats when device is created */
 static int geneve_init(struct net_device *dev)
 {
+	struct geneve_dev *geneve = netdev_priv(dev);
+	int err;
+
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
+	if (err) {
+		free_percpu(dev->tstats);
+		return err;
+	}
+
 	return 0;
 }
 
 static void geneve_uninit(struct net_device *dev)
 {
+	struct geneve_dev *geneve = netdev_priv(dev);
+
+	dst_cache_destroy(&geneve->dst_cache);
 	free_percpu(dev->tstats);
 }
 
@@ -799,7 +813,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 				       struct flowi4 *fl4,
 				       struct ip_tunnel_info *info)
 {
+	bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
 	struct geneve_dev *geneve = netdev_priv(dev);
+	struct dst_cache *dst_cache;
 	struct rtable *rt = NULL;
 	__u8 tos;
 
@@ -811,16 +827,25 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 		fl4->daddr = info->key.u.ipv4.dst;
 		fl4->saddr = info->key.u.ipv4.src;
 		fl4->flowi4_tos = RT_TOS(info->key.tos);
+		dst_cache = &info->dst_cache;
 	} else {
 		tos = geneve->tos;
 		if (tos == 1) {
 			const struct iphdr *iip = ip_hdr(skb);
 
 			tos = ip_tunnel_get_dsfield(iip, skb);
+			use_cache = false;
 		}
 
 		fl4->flowi4_tos = RT_TOS(tos);
 		fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
+		dst_cache = &geneve->dst_cache;
+	}
+
+	if (use_cache) {
+		rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
+		if (rt)
+			return rt;
 	}
 
 	rt = ip_route_output_key(geneve->net, fl4);
@@ -833,6 +858,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 		ip_rt_put(rt);
 		return ERR_PTR(-ELOOP);
 	}
+	if (use_cache)
+		dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
 	return rt;
 }
 
@@ -842,9 +869,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 					   struct flowi6 *fl6,
 					   struct ip_tunnel_info *info)
 {
+	bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
 	struct geneve_dev *geneve = netdev_priv(dev);
 	struct geneve_sock *gs6 = geneve->sock6;
 	struct dst_entry *dst = NULL;
+	struct dst_cache *dst_cache;
 	__u8 prio;
 
 	memset(fl6, 0, sizeof(*fl6));
@@ -856,17 +885,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 		fl6->saddr = info->key.u.ipv6.src;
 		fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos),
 						   info->key.label);
+		dst_cache = &info->dst_cache;
 	} else {
 		prio = geneve->tos;
 		if (prio == 1) {
 			const struct iphdr *iip = ip_hdr(skb);
 
 			prio = ip_tunnel_get_dsfield(iip, skb);
+			use_cache = false;
 		}
 
 		fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
 						   geneve->label);
 		fl6->daddr = geneve->remote.sin6.sin6_addr;
+		dst_cache = &geneve->dst_cache;
+	}
+
+	if (use_cache) {
+		dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
+		if (dst)
+			return dst;
 	}
 
 #ifdef HAVE_IPV6_DST_LOOKUP_NET
@@ -887,6 +925,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 		return ERR_PTR(-ELOOP);
 	}
 
+	if (use_cache)
+		dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
 	return dst;
 }
 #endif
@@ -1374,6 +1414,8 @@ static int geneve_configure(struct net *net, struct net_device *dev,
 			return -EPERM;
 	}
 
+	dst_cache_reset(&geneve->dst_cache);
+
 	err = register_netdevice(dev);
 	if (err)
 		return err;
diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h
index f082be1..9a38a19 100644
--- a/datapath/linux/compat/gso.h
+++ b/datapath/linux/compat/gso.h
@@ -191,6 +191,9 @@ static inline void ovs_dst_hold(void *dst)
 
 static inline void ovs_dst_release(struct dst_entry *dst)
 {
+	struct metadata_dst *tun_dst = (struct metadata_dst *) dst;
+
+	dst_cache_destroy(&tun_dst->u.tun_info.dst_cache);
 	kfree(dst);
 }
 
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index 7fe6a04..e3f9b60 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -197,6 +197,24 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
 
 #define ip_tunnel_collect_metadata() true
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+#define TUNNEL_NOCACHE 0
+
+static inline bool
+ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
+			   const struct ip_tunnel_info *info)
+{
+	if (skb->mark)
+		return false;
+	if (!info)
+		return true;
+	if (info->key.tun_flags & TUNNEL_NOCACHE)
+		return false;
+
+	return true;
+}
+#endif
+
 #define ip_tunnel rpl_ip_tunnel
 
 struct ip_tunnel {
diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h
index a6a5f30..8212d3a 100644
--- a/datapath/linux/compat/include/net/vxlan.h
+++ b/datapath/linux/compat/include/net/vxlan.h
@@ -25,6 +25,7 @@ static inline void rpl_vxlan_cleanup_module(void)
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
+#include <net/dst_cache.h>
 #include <net/dst_metadata.h>
 
 #include "compat.h"
@@ -227,6 +228,7 @@ struct vxlan_rdst {
 	u32			 remote_ifindex;
 	struct list_head	 list;
 	struct rcu_head		 rcu;
+	struct dst_cache	 dst_cache;
 };
 
 struct vxlan_config {
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 5d05047..bb230cb 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -907,11 +907,21 @@ out_free:
 static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
 				      struct sk_buff *skb, int oif, u8 tos,
 				      __be32 daddr, __be32 *saddr,
+				      struct dst_cache *dst_cache,
 				      const struct ip_tunnel_info *info)
 {
+	bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
 	struct rtable *rt = NULL;
 	struct flowi4 fl4;
 
+	if (tos && !info)
+		use_cache = false;
+	if (use_cache) {
+		rt = dst_cache_get_ip4(dst_cache, saddr);
+		if (rt)
+			return rt;
+	}
+
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.flowi4_oif = oif;
 	fl4.flowi4_tos = RT_TOS(tos);
@@ -923,6 +933,8 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
 	rt = ip_route_output_key(vxlan->net, &fl4);
 	if (!IS_ERR(rt)) {
 		*saddr = fl4.saddr;
+		if (use_cache)
+			dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
 	}
 	return rt;
 }
@@ -933,12 +945,22 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 					  __be32 label,
 					  const struct in6_addr *daddr,
 					  struct in6_addr *saddr,
+					  struct dst_cache *dst_cache,
 					  const struct ip_tunnel_info *info)
 {
+	bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
 	struct dst_entry *ndst;
 	struct flowi6 fl6;
 	int err;
 
+	if (tos && !info)
+		use_cache = false;
+	if (use_cache) {
+		ndst = dst_cache_get_ip6(dst_cache, saddr);
+		if (ndst)
+			return ndst;
+	}
+
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
 	fl6.daddr = *daddr;
@@ -963,6 +985,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 		return ERR_PTR(err);
 
 	*saddr = fl6.saddr;
+	if (use_cache)
+		dst_cache_set_ip6(dst_cache, ndst, saddr);
 	return ndst;
 }
 #endif
@@ -978,6 +1002,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
 static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 			   struct vxlan_rdst *rdst, bool did_rsc)
 {
+	struct dst_cache *dst_cache;
 	struct ip_tunnel_info *info;
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct sock *sk;
@@ -1002,6 +1027,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
 		vni = rdst->remote_vni;
 		dst = &rdst->remote_ip;
+		dst_cache = &rdst->dst_cache;
 	} else {
 		if (!info) {
 			WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
@@ -1016,6 +1042,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		else
 			remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
 		dst = &remote_ip;
+		dst_cache = &info->dst_cache;
 	}
 
 	if (vxlan_addr_any(dst)) {
@@ -1063,7 +1090,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		rt = vxlan_get_route(vxlan, skb,
 				     rdst ? rdst->remote_ifindex : 0, tos,
 				     dst->sin.sin_addr.s_addr, &saddr,
-				     info);
+				     dst_cache, info);
 		if (IS_ERR(rt)) {
 			netdev_dbg(dev, "no route to %pI4\n",
 				   &dst->sin.sin_addr.s_addr);
@@ -1121,7 +1148,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		ndst = vxlan6_get_route(vxlan, skb,
 					rdst ? rdst->remote_ifindex : 0, tos,
 					label, &dst->sin6.sin6_addr, &saddr,
-					info);
+					dst_cache, info);
 		if (IS_ERR(ndst)) {
 			netdev_dbg(dev, "no route to %pI6\n",
 				   &dst->sin6.sin6_addr);
@@ -1409,7 +1436,7 @@ int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 			return -EINVAL;
 		rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
 				     info->key.u.ipv4.dst,
-				     &info->key.u.ipv4.src, info);
+				     &info->key.u.ipv4.src, NULL, info);
 		if (IS_ERR(rt))
 			return PTR_ERR(rt);
 		ip_rt_put(rt);
@@ -1421,7 +1448,7 @@ int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 			return -EINVAL;
 		ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
 					info->key.label, &info->key.u.ipv6.dst,
-					&info->key.u.ipv6.src, info);
+					&info->key.u.ipv6.src, NULL, info);
 		if (IS_ERR(ndst))
 			return PTR_ERR(ndst);
 		dst_release(ndst);
-- 
1.9.1




More information about the dev mailing list