[ovs-dev] [PATCH net-next 4/7] vxlan: Add vxlan protocol handler.

Pravin B Shelar pshelar at nicira.com
Sat Mar 30 16:18:28 UTC 2013


Add vxlan multiplexer for vxlan packet handler. This is required for
openvswitch vxlan support.

Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
 drivers/net/vxlan.c |  208 ++++++++++++++++++++++++++++++++++++++-------------
 include/net/vxlan.h |   17 ++++
 2 files changed, 172 insertions(+), 53 deletions(-)
 create mode 100644 include/net/vxlan.h

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 62a4438..bcfa933 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
 #include <net/inet_ecn.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/vxlan.h>
 
 #define VXLAN_VERSION	"0.1"
 
@@ -66,19 +67,24 @@ struct vxlanhdr {
 };
 
 /* UDP port for VXLAN traffic. */
-static unsigned int vxlan_port __read_mostly = 8472;
-module_param_named(udp_port, vxlan_port, uint, 0444);
+static unsigned int vxlan_portno __read_mostly = 8472;
+module_param_named(udp_port, vxlan_portno, uint, 0444);
 MODULE_PARM_DESC(udp_port, "Destination UDP port");
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
+#define MAX_VXLAN_PORTS	8
+
 /* per-net private data for this module */
 static unsigned int vxlan_net_id;
+
+static DEFINE_MUTEX(vxlan_mutex);
 struct vxlan_net {
-	struct socket	  *sock;	/* UDP encap socket */
 	struct hlist_head vni_list[VNI_HASH_SIZE];
+	struct vxlan_port __rcu *vxlan_ports[MAX_VXLAN_PORTS];
+	struct vxlan_port port;
 };
 
 struct vxlan_rdst {
@@ -191,7 +197,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip))
 		goto nla_put_failure;
 
-	if (rdst->remote_port && rdst->remote_port != vxlan_port &&
+	if (rdst->remote_port && rdst->remote_port != vxlan_portno &&
 	    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
 		goto nla_put_failure;
 	if (rdst->remote_vni != vxlan->vni &&
@@ -458,7 +464,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			return -EINVAL;
 		port = nla_get_u32(tb[NDA_PORT]);
 	} else
-		port = vxlan_port;
+		port = vxlan_portno;
 
 	if (tb[NDA_VNI]) {
 		if (nla_len(tb[NDA_VNI]) != sizeof(u32))
@@ -570,7 +576,7 @@ static void vxlan_snoop(struct net_device *dev,
 		err = vxlan_fdb_create(vxlan, src_mac, src_ip,
 				       NUD_REACHABLE,
 				       NLM_F_EXCL|NLM_F_CREATE,
-				       vxlan_port, vxlan->vni, 0);
+				       vxlan_portno, vxlan->vni, 0);
 		spin_unlock(&vxlan->hash_lock);
 	}
 }
@@ -603,7 +609,7 @@ static int vxlan_join_group(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
-	struct sock *sk = vn->sock->sk;
+	struct sock *sk = vn->port.sock->sk;
 	struct ip_mreqn mreq = {
 		.imr_multiaddr.s_addr	= vxlan->gaddr,
 		.imr_ifindex		= vxlan->link,
@@ -631,7 +637,7 @@ static int vxlan_leave_group(struct net_device *dev)
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	int err = 0;
-	struct sock *sk = vn->sock->sk;
+	struct sock *sk = vn->port.sock->sk;
 	struct ip_mreqn mreq = {
 		.imr_multiaddr.s_addr	= vxlan->gaddr,
 		.imr_ifindex		= vxlan->link,
@@ -654,12 +660,9 @@ static int vxlan_leave_group(struct net_device *dev)
 /* Callback from net/ipv4/udp.c to receive packets */
 static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
-	struct iphdr *oip;
+	struct vxlan_net *vn = net_generic(dev_net(skb->dev), vxlan_net_id);
 	struct vxlanhdr *vxh;
-	struct vxlan_dev *vxlan;
-	struct pcpu_tstats *stats;
-	__u32 vni;
-	int err;
+	int i;
 
 	/* pop off outer UDP header */
 	__skb_pull(skb, sizeof(struct udphdr));
@@ -678,13 +681,43 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	__skb_pull(skb, sizeof(struct vxlanhdr));
+	rcu_read_lock();
+	for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+		struct vxlan_port *port = rcu_dereference(vn->vxlan_ports[i]);
+		int ret;
+
+		if (!port)
+			continue;
+		if (port->portno != udp_hdr(skb)->dest)
+			continue;
+		ret = port->vx_rcv(port, skb, vxh->vx_vni);
+		if (ret == PACKET_RCVD) {
+			rcu_read_unlock();
+			return 0;
+		}
+	}
+	rcu_read_unlock();
+error:
+	/* Put UDP header back */
+	__skb_push(skb, sizeof(struct udphdr));
+	return 1;
+}
+
+static int vxlan_rcv(struct vxlan_port *port, struct sk_buff *skb,
+		     __be32 _vni)
+{
+	struct vxlan_dev *vxlan;
+	struct net *net = dev_net(skb->dev);
+	struct iphdr *oip;
+	struct pcpu_tstats *stats;
+	int err;
+	int vni;
 
 	/* Is this VNI defined? */
-	vni = ntohl(vxh->vx_vni) >> 8;
-	vxlan = vxlan_find_vni(sock_net(sk), vni);
+	vni = ntohl(_vni) >> 8;
+	vxlan = vxlan_find_vni(net, vni);
 	if (!vxlan) {
-		netdev_dbg(skb->dev, "unknown vni %d\n", vni);
-		goto drop;
+		return PACKET_REJECT;
 	}
 
 	if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -741,16 +774,11 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
 	netif_rx(skb);
 
-	return 0;
-error:
-	/* Put UDP header back */
-	__skb_push(skb, sizeof(struct udphdr));
-
-	return 1;
+	return PACKET_RCVD;
 drop:
 	/* Consume bad packet */
 	kfree_skb(skb);
-	return 0;
+	return PACKET_RCVD;
 }
 
 static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
@@ -869,10 +897,9 @@ static void vxlan_sock_free(struct sk_buff *skb)
 }
 
 /* On transmit, associate with the tunnel socket */
-static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
+static void vxlan_set_owner(const struct vxlan_port *port, struct sk_buff *skb)
 {
-	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
-	struct sock *sk = vn->sock->sk;
+	struct sock *sk = port->sock->sk;
 
 	skb_orphan(skb);
 	sock_hold(sk);
@@ -915,6 +942,7 @@ static int handle_offloads(struct sk_buff *skb)
 static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				  struct vxlan_rdst *rdst, bool did_rsc)
 {
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct rtable *rt;
 	const struct iphdr *old_iph;
@@ -929,7 +957,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	__be16 df = 0;
 	__u8 tos, ttl;
 
-	dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port;
+	dst_port = rdst->remote_port ? rdst->remote_port : vxlan_portno;
 	vni = rdst->remote_vni;
 	dst = rdst->remote_ip;
 
@@ -1032,7 +1060,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 	nf_reset(skb);
 
-	vxlan_set_owner(dev, skb);
+	vxlan_set_owner(&vn->port, skb);
 
 	if (handle_offloads(skb))
 		goto drop;
@@ -1077,7 +1105,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 	f = vxlan_find_mac(vxlan, eth->h_dest);
 	if (f == NULL) {
 		did_rsc = false;
-		group.remote_port = vxlan_port;
+		group.remote_port = vxlan_portno;
 		group.remote_vni = vxlan->vni;
 		group.remote_ip = vxlan->gaddr;
 		group.remote_ifindex = vxlan->link;
@@ -1526,37 +1554,35 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.fill_info	= vxlan_fill_info,
 };
 
-static __net_init int vxlan_init_net(struct net *net)
+struct socket *vxlan_create_socket(struct net *net, __be16 portno)
 {
-	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct socket *sock;
 	struct sock *sk;
 	struct sockaddr_in vxlan_addr = {
 		.sin_family = AF_INET,
 		.sin_addr.s_addr = htonl(INADDR_ANY),
 	};
 	int rc;
-	unsigned h;
 
 	/* Create UDP socket for encapsulation receive. */
-	rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock);
+	rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (rc < 0) {
 		pr_debug("UDP socket create failed\n");
-		return rc;
+		return ERR_PTR(rc);
 	}
 	/* Put in proper namespace */
-	sk = vn->sock->sk;
+	sk = sock->sk;
 	sk_change_net(sk, net);
 
-	vxlan_addr.sin_port = htons(vxlan_port);
+	vxlan_addr.sin_port = portno;
 
-	rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr,
+	rc = kernel_bind(sock, (struct sockaddr *) &vxlan_addr,
 			 sizeof(vxlan_addr));
 	if (rc < 0) {
 		pr_debug("bind for UDP socket %pI4:%u (%d)\n",
 			 &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
 		sk_release_kernel(sk);
-		vn->sock = NULL;
-		return rc;
+		return ERR_PTR(rc);
 	}
 
 	/* Disable multicast loopback */
@@ -1567,28 +1593,104 @@ static __net_init int vxlan_init_net(struct net *net)
 	udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
 	udp_encap_enable();
 
-	for (h = 0; h < VNI_HASH_SIZE; ++h)
-		INIT_HLIST_HEAD(&vn->vni_list[h]);
+	return sock;
+}
 
-	return 0;
+int vxlan_add_handler(struct net *net, struct vxlan_port *new)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct socket *sock = NULL;
+	int i, p = -1;
+	int err;
+
+	mutex_lock(&vxlan_mutex);
+	for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+		struct vxlan_port *port = vn->vxlan_ports[i];
+
+		if (!port) {
+			if (p < 0)
+				p = i;
+			continue;
+		}
+		if (port->portno == new->portno)
+			sock = port->sock;
+	}
+
+	if (p < 0) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (!sock) {
+		sock = vxlan_create_socket(net, new->portno);
+		if (IS_ERR(sock)) {
+			err = PTR_ERR(sock);
+			goto out;
+		}
+	}
+
+	new->sock = sock;
+	rcu_assign_pointer(vn->vxlan_ports[p], new);
+	err = 0;
+out:
+	mutex_unlock(&vxlan_mutex);
+	return err;
 }
+EXPORT_SYMBOL_GPL(vxlan_add_handler);
 
-static __net_exit void vxlan_exit_net(struct net *net)
+void vxlan_del_handler(struct net *net, const struct vxlan_port *del)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	bool inuse = false;
+	int i;
+
+	mutex_lock(&vxlan_mutex);
+
+	/* check if sock is still used. */
+	for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+		struct vxlan_port *port = vn->vxlan_ports[i];
+
+		if (!port)
+			continue;
+
+		if (port == del) {
+			RCU_INIT_POINTER(vn->vxlan_ports[i], NULL);
+			synchronize_net();
+			continue;
+		}
+		if (port->portno == del->portno)
+			inuse = true;
+	}
+
+	if (!inuse)
+		sk_release_kernel(del->sock->sk);
+	mutex_unlock(&vxlan_mutex);
+}
+EXPORT_SYMBOL_GPL(vxlan_del_handler);
+
+static __net_init int vxlan_init_net(struct net *net)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-	struct vxlan_dev *vxlan;
 	unsigned h;
+	int err;
+
+	vn->port.portno = htons(vxlan_portno);
+	vn->port.vx_rcv = vxlan_rcv;
+
+	err = vxlan_add_handler(net, &vn->port);
+	if (err)
+		return err;
 
-	rtnl_lock();
 	for (h = 0; h < VNI_HASH_SIZE; ++h)
-		hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist)
-			dev_close(vxlan->dev);
-	rtnl_unlock();
+		INIT_HLIST_HEAD(&vn->vni_list[h]);
 
-	if (vn->sock) {
-		sk_release_kernel(vn->sock->sk);
-		vn->sock = NULL;
-	}
+	return 0;
+}
+
+static __net_exit void vxlan_exit_net(struct net *net)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	vxlan_del_handler(net, &vn->port);
 }
 
 static struct pernet_operations vxlan_net_ops = {
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
new file mode 100644
index 0000000..dae9619
--- /dev/null
+++ b/include/net/vxlan.h
@@ -0,0 +1,17 @@
+#ifndef __NET_IP_VXLAN_H
+#define __NET_IP_VXLAN_H 1
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+
+struct vxlan_port {
+	int (*vx_rcv)(struct vxlan_port *port, struct sk_buff *skb, __be32 key);
+	void *user_data;
+	struct socket *sock;
+	__be16 portno;
+};
+
+int vxlan_add_handler(struct net *net, struct vxlan_port *);
+void vxlan_del_handler(struct net *net, const struct vxlan_port *port);
+#endif
-- 
1.7.1




More information about the dev mailing list