[ovs-dev] [PATCH v4 18/28] datapath: compat: Use dst-cache for Geneve and VxLAN tunnels.
Pravin B Shelar
pshelar at ovn.org
Fri Jul 8 00:23:54 UTC 2016
It partialy backport commit:
commit d71785ffc7e7cae3fbdc4ea8a9d05b7a1c59f7b8
Author: Paolo Abeni <pabeni at redhat.com>
net: add dst_cache to ovs vxlan lwtunnel
In case of UDP traffic with datagram length
below MTU this give about 2% performance increase
when tunneling over ipv4 and about 60% when tunneling
over ipv6
Signed-off-by: Paolo Abeni <pabeni at redhat.com>
Suggested-and-acked-by: Hannes Frederic Sowa <hannes at stressinduktion.org>
Signed-off-by: David S. Miller <davem at davemloft.net>
Bug fix commit db3c6139e6e ("bpf, vxlan, geneve, gre: fix usage of
dst_cache on xmit"). is also included. Geneve changes
were added in 468dfffcd762cbb2777ec5a76bc21e3748ebf47e ("geneve: add
dst caching support")
Signed-off-by: Pravin B Shelar <pshelar at ovn.org>
---
datapath/flow_netlink.c | 5 +++
datapath/linux/compat/geneve.c | 42 ++++++++++++++++++++++++++
datapath/linux/compat/gso.h | 3 ++
datapath/linux/compat/include/net/ip_tunnels.h | 18 +++++++++++
datapath/linux/compat/include/net/vxlan.h | 2 ++
datapath/linux/compat/vxlan.c | 35 ++++++++++++++++++---
6 files changed, 101 insertions(+), 4 deletions(-)
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index cbfa233..b6020ab 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -1964,6 +1964,11 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (!tun_dst)
return -ENOMEM;
+ err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
+ if (err) {
+ dst_release((struct dst_entry *)tun_dst);
+ return err;
+ }
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
sizeof(*ovs_tun), log);
if (IS_ERR(a)) {
diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
index f5daefb..061ceb5 100644
--- a/datapath/linux/compat/geneve.c
+++ b/datapath/linux/compat/geneve.c
@@ -19,6 +19,7 @@
#include <linux/if_vlan.h>
#include <net/addrconf.h>
+#include <net/dst_cache.h>
#include <net/dst_metadata.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -88,6 +89,7 @@ struct geneve_dev {
__be16 dst_port;
bool collect_md;
u32 flags;
+ struct dst_cache dst_cache;
};
/* Geneve device flags */
@@ -301,15 +303,27 @@ drop:
/* Setup stats when device is created */
static int geneve_init(struct net_device *dev)
{
+ struct geneve_dev *geneve = netdev_priv(dev);
+ int err;
+
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
+ if (err) {
+ free_percpu(dev->tstats);
+ return err;
+ }
+
return 0;
}
static void geneve_uninit(struct net_device *dev)
{
+ struct geneve_dev *geneve = netdev_priv(dev);
+
+ dst_cache_destroy(&geneve->dst_cache);
free_percpu(dev->tstats);
}
@@ -799,7 +813,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
struct flowi4 *fl4,
struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
+ struct dst_cache *dst_cache;
struct rtable *rt = NULL;
__u8 tos;
@@ -811,16 +827,25 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
fl4->daddr = info->key.u.ipv4.dst;
fl4->saddr = info->key.u.ipv4.src;
fl4->flowi4_tos = RT_TOS(info->key.tos);
+ dst_cache = &info->dst_cache;
} else {
tos = geneve->tos;
if (tos == 1) {
const struct iphdr *iip = ip_hdr(skb);
tos = ip_tunnel_get_dsfield(iip, skb);
+ use_cache = false;
}
fl4->flowi4_tos = RT_TOS(tos);
fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
+ dst_cache = &geneve->dst_cache;
+ }
+
+ if (use_cache) {
+ rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
+ if (rt)
+ return rt;
}
rt = ip_route_output_key(geneve->net, fl4);
@@ -833,6 +858,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
ip_rt_put(rt);
return ERR_PTR(-ELOOP);
}
+ if (use_cache)
+ dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
return rt;
}
@@ -842,9 +869,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
struct flowi6 *fl6,
struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL;
+ struct dst_cache *dst_cache;
__u8 prio;
memset(fl6, 0, sizeof(*fl6));
@@ -856,17 +885,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
fl6->saddr = info->key.u.ipv6.src;
fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos),
info->key.label);
+ dst_cache = &info->dst_cache;
} else {
prio = geneve->tos;
if (prio == 1) {
const struct iphdr *iip = ip_hdr(skb);
prio = ip_tunnel_get_dsfield(iip, skb);
+ use_cache = false;
}
fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
geneve->label);
fl6->daddr = geneve->remote.sin6.sin6_addr;
+ dst_cache = &geneve->dst_cache;
+ }
+
+ if (use_cache) {
+ dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
+ if (dst)
+ return dst;
}
#ifdef HAVE_IPV6_DST_LOOKUP_NET
@@ -887,6 +925,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
return ERR_PTR(-ELOOP);
}
+ if (use_cache)
+ dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
return dst;
}
#endif
@@ -1374,6 +1414,8 @@ static int geneve_configure(struct net *net, struct net_device *dev,
return -EPERM;
}
+ dst_cache_reset(&geneve->dst_cache);
+
err = register_netdevice(dev);
if (err)
return err;
diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h
index f082be1..9a38a19 100644
--- a/datapath/linux/compat/gso.h
+++ b/datapath/linux/compat/gso.h
@@ -191,6 +191,9 @@ static inline void ovs_dst_hold(void *dst)
static inline void ovs_dst_release(struct dst_entry *dst)
{
+ struct metadata_dst *tun_dst = (struct metadata_dst *) dst;
+
+ dst_cache_destroy(&tun_dst->u.tun_info.dst_cache);
kfree(dst);
}
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index 7fe6a04..e3f9b60 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -197,6 +197,24 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
#define ip_tunnel_collect_metadata() true
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+#define TUNNEL_NOCACHE 0
+
+static inline bool
+ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
+ const struct ip_tunnel_info *info)
+{
+ if (skb->mark)
+ return false;
+ if (!info)
+ return true;
+ if (info->key.tun_flags & TUNNEL_NOCACHE)
+ return false;
+
+ return true;
+}
+#endif
+
#define ip_tunnel rpl_ip_tunnel
struct ip_tunnel {
diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h
index a6a5f30..8212d3a 100644
--- a/datapath/linux/compat/include/net/vxlan.h
+++ b/datapath/linux/compat/include/net/vxlan.h
@@ -25,6 +25,7 @@ static inline void rpl_vxlan_cleanup_module(void)
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>
+#include <net/dst_cache.h>
#include <net/dst_metadata.h>
#include "compat.h"
@@ -227,6 +228,7 @@ struct vxlan_rdst {
u32 remote_ifindex;
struct list_head list;
struct rcu_head rcu;
+ struct dst_cache dst_cache;
};
struct vxlan_config {
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 5d05047..bb230cb 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -907,11 +907,21 @@ out_free:
static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr,
+ struct dst_cache *dst_cache,
const struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct rtable *rt = NULL;
struct flowi4 fl4;
+ if (tos && !info)
+ use_cache = false;
+ if (use_cache) {
+ rt = dst_cache_get_ip4(dst_cache, saddr);
+ if (rt)
+ return rt;
+ }
+
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = oif;
fl4.flowi4_tos = RT_TOS(tos);
@@ -923,6 +933,8 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
rt = ip_route_output_key(vxlan->net, &fl4);
if (!IS_ERR(rt)) {
*saddr = fl4.saddr;
+ if (use_cache)
+ dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
}
return rt;
}
@@ -933,12 +945,22 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
__be32 label,
const struct in6_addr *daddr,
struct in6_addr *saddr,
+ struct dst_cache *dst_cache,
const struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct dst_entry *ndst;
struct flowi6 fl6;
int err;
+ if (tos && !info)
+ use_cache = false;
+ if (use_cache) {
+ ndst = dst_cache_get_ip6(dst_cache, saddr);
+ if (ndst)
+ return ndst;
+ }
+
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.daddr = *daddr;
@@ -963,6 +985,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
return ERR_PTR(err);
*saddr = fl6.saddr;
+ if (use_cache)
+ dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst;
}
#endif
@@ -978,6 +1002,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc)
{
+ struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
struct sock *sk;
@@ -1002,6 +1027,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni;
dst = &rdst->remote_ip;
+ dst_cache = &rdst->dst_cache;
} else {
if (!info) {
WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
@@ -1016,6 +1042,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
else
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
dst = &remote_ip;
+ dst_cache = &info->dst_cache;
}
if (vxlan_addr_any(dst)) {
@@ -1063,7 +1090,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
rt = vxlan_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
dst->sin.sin_addr.s_addr, &saddr,
- info);
+ dst_cache, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n",
&dst->sin.sin_addr.s_addr);
@@ -1121,7 +1148,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
label, &dst->sin6.sin6_addr, &saddr,
- info);
+ dst_cache, info);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr);
@@ -1409,7 +1436,7 @@ int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return -EINVAL;
rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
info->key.u.ipv4.dst,
- &info->key.u.ipv4.src, info);
+ &info->key.u.ipv4.src, NULL, info);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
@@ -1421,7 +1448,7 @@ int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return -EINVAL;
ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
info->key.label, &info->key.u.ipv6.dst,
- &info->key.u.ipv6.src, info);
+ &info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);
--
1.9.1
More information about the dev
mailing list