[ovs-dev] [PATCH v2] vxlan: Optimize vxlan rcv

Pravin B Shelar pshelar at nicira.com
Fri Oct 11 19:40:13 UTC 2013


vxlan-udp-recv function lookup vxlan_sock struct on every packet
recv by using udp-port number. we can use sk->sk_user_data to
store vxlan_sock and avoid lookup.

This commit also allows us to get rid of socket hash table.

Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
 datapath/linux/Modules.mk                |    1 +
 datapath/linux/compat/include/net/sock.h |   13 ++++
 datapath/linux/compat/vxlan.c            |  114 ++----------------------------
 3 files changed, 20 insertions(+), 108 deletions(-)
 create mode 100644 datapath/linux/compat/include/net/sock.h

diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 057e1d5..fee132e 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -64,5 +64,6 @@ openvswitch_headers += \
 	linux/compat/include/net/ipv6.h \
 	linux/compat/include/net/net_namespace.h \
 	linux/compat/include/net/netlink.h \
+	linux/compat/include/net/sock.h \
 	linux/compat/include/net/vxlan.h \
 	linux/compat/include/net/sctp/checksum.h
diff --git a/datapath/linux/compat/include/net/sock.h b/datapath/linux/compat/include/net/sock.h
new file mode 100644
index 0000000..2900704
--- /dev/null
+++ b/datapath/linux/compat/include/net/sock.h
@@ -0,0 +1,13 @@
+#ifndef __NET_SOCK_WRAPPER_H
+#define __NET_SOCK_WRAPPER_H 1
+
+#include_next <net/sock.h>
+
+#ifndef __sk_user_data
+#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
+
+#define rcu_dereference_sk_user_data(sk)       rcu_dereference(__sk_user_data((sk)))
+#define rcu_assign_sk_user_data(sk, ptr)       rcu_assign_pointer(__sk_user_data((sk)), ptr)
+#endif
+
+#endif
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 09d0fd7..64877e0 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -52,14 +52,10 @@
 #include <net/vxlan.h>
 
 #include "compat.h"
+#include "datapath.h"
 #include "gso.h"
 #include "vlan.h"
 
-#define PORT_HASH_BITS	8
-#define PORT_HASH_SIZE  (1<<PORT_HASH_BITS)
-
-/* IP header + UDP + VXLAN + Ethernet header */
-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
 
 #define VXLAN_FLAGS 0x08000000	/* struct vxlanhdr.vx_flags required value. */
@@ -70,38 +66,6 @@ struct vxlanhdr {
 	__be32 vx_vni;
 };
 
-static int vxlan_net_id;
-
-static int vxlan_init_module(void);
-static void vxlan_cleanup_module(void);
-
-/* per-network namespace private data for this module */
-struct vxlan_net {
-	struct hlist_head sock_list[PORT_HASH_SIZE];
-	spinlock_t  sock_lock;
-};
-
-/* Socket hash table head */
-static inline struct hlist_head *vs_head(struct net *net, __be16 port)
-{
-	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-
-	return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
-}
-
-/* Find VXLAN socket based on network namespace and UDP port */
-
-static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
-{
-	struct vxlan_sock *vs;
-
-	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
-		if (inet_sport(vs->sock->sk) == port)
-			return vs;
-	}
-	return NULL;
-}
-
 /* Callback from net/ipv4/udp.c to receive packets */
 static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
@@ -124,7 +88,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
 		goto drop;
 
-	vs = vxlan_find_sock(sock_net(sk), inet_sport(sk));
+	vs = rcu_dereference_sk_user_data(sk);
 	if (!vs)
 		goto drop;
 
@@ -275,13 +239,11 @@ static void vxlan_del_work(struct work_struct *work)
 
 	sk_release_kernel(vs->sock->sk);
 	call_rcu(&vs->rcu, rcu_free_vs);
-	vxlan_cleanup_module();
 }
 
 static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 					      vxlan_rcv_t *rcv, void *data)
 {
-	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_sock *vs;
 	struct sock *sk;
 	struct sockaddr_in vxlan_addr = {
@@ -325,9 +287,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 
 	/* Disable multicast loopback */
 	inet_sk(sk)->mc_loop = 0;
-	spin_lock(&vn->sock_lock);
-	hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
-	spin_unlock(&vn->sock_lock);
+	rcu_assign_sk_user_data(vs->sock->sk, vs);
 
 	/* Mark socket as an encapsulation socket. */
 	udp_sk(sk)->encap_type = 1;
@@ -340,75 +300,13 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
 				  bool no_share)
 {
-	struct vxlan_net *vn;
-	struct vxlan_sock *vs;
-	int err;
-
-	err = vxlan_init_module();
-	if (err)
-		return ERR_PTR(err);
-
-	vn = net_generic(net, vxlan_net_id);
-	vs = vxlan_socket_create(net, port, rcv, data);
-	return vs;
+	return vxlan_socket_create(net, port, rcv, data);
 }
 
 void vxlan_sock_release(struct vxlan_sock *vs)
 {
-	struct vxlan_net *vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
-
-	spin_lock(&vn->sock_lock);
-	hlist_del_rcu(&vs->hlist);
-	spin_unlock(&vn->sock_lock);
+	ASSERT_OVSL();
+	rcu_assign_sk_user_data(vs->sock->sk, NULL);
 
 	queue_work(system_wq, &vs->del_work);
 }
-
-static int vxlan_init_net(struct net *net)
-{
-	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-	unsigned int h;
-
-	spin_lock_init(&vn->sock_lock);
-
-	for (h = 0; h < PORT_HASH_SIZE; ++h)
-		INIT_HLIST_HEAD(&vn->sock_list[h]);
-
-	return 0;
-}
-
-static struct pernet_operations vxlan_net_ops = {
-	.init = vxlan_init_net,
-	.id   = &vxlan_net_id,
-	.size = sizeof(struct vxlan_net),
-};
-
-static int refcnt;
-static DEFINE_MUTEX(init_lock);
-DEFINE_COMPAT_PNET_REG_FUNC(device);
-
-static int vxlan_init_module(void)
-{
-	int err = 0;
-
-	mutex_lock(&init_lock);
-	if (refcnt)
-		goto out;
-	err = register_pernet_device(&vxlan_net_ops);
-out:
-	if (!err)
-		refcnt++;
-	mutex_unlock(&init_lock);
-	return err;
-}
-
-static void vxlan_cleanup_module(void)
-{
-	mutex_lock(&init_lock);
-	refcnt--;
-	if (refcnt)
-		goto out;
-	unregister_pernet_device(&vxlan_net_ops);
-out:
-	mutex_unlock(&init_lock);
-}
-- 
1.7.1




More information about the dev mailing list