[ovs-dev] [PATCH net-next 4/7] vxlan: Add vxlan protocol handler.
Pravin B Shelar
pshelar at nicira.com
Sat Mar 30 16:18:28 UTC 2013
Add vxlan multiplexer for vxlan packet handler. This is required for
openvswitch vxlan support.
Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
drivers/net/vxlan.c | 208 ++++++++++++++++++++++++++++++++++++++-------------
include/net/vxlan.h | 17 ++++
2 files changed, 172 insertions(+), 53 deletions(-)
create mode 100644 include/net/vxlan.h
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 62a4438..bcfa933 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/vxlan.h>
#define VXLAN_VERSION "0.1"
@@ -66,19 +67,24 @@ struct vxlanhdr {
};
/* UDP port for VXLAN traffic. */
-static unsigned int vxlan_port __read_mostly = 8472;
-module_param_named(udp_port, vxlan_port, uint, 0444);
+static unsigned int vxlan_portno __read_mostly = 8472;
+module_param_named(udp_port, vxlan_portno, uint, 0444);
MODULE_PARM_DESC(udp_port, "Destination UDP port");
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+#define MAX_VXLAN_PORTS 8
+
/* per-net private data for this module */
static unsigned int vxlan_net_id;
+
+static DEFINE_MUTEX(vxlan_mutex);
struct vxlan_net {
- struct socket *sock; /* UDP encap socket */
struct hlist_head vni_list[VNI_HASH_SIZE];
+ struct vxlan_port __rcu *vxlan_ports[MAX_VXLAN_PORTS];
+ struct vxlan_port port;
};
struct vxlan_rdst {
@@ -191,7 +197,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip))
goto nla_put_failure;
- if (rdst->remote_port && rdst->remote_port != vxlan_port &&
+ if (rdst->remote_port && rdst->remote_port != vxlan_portno &&
nla_put_be16(skb, NDA_PORT, rdst->remote_port))
goto nla_put_failure;
if (rdst->remote_vni != vxlan->vni &&
@@ -458,7 +464,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
return -EINVAL;
port = nla_get_u32(tb[NDA_PORT]);
} else
- port = vxlan_port;
+ port = vxlan_portno;
if (tb[NDA_VNI]) {
if (nla_len(tb[NDA_VNI]) != sizeof(u32))
@@ -570,7 +576,7 @@ static void vxlan_snoop(struct net_device *dev,
err = vxlan_fdb_create(vxlan, src_mac, src_ip,
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
- vxlan_port, vxlan->vni, 0);
+ vxlan_portno, vxlan->vni, 0);
spin_unlock(&vxlan->hash_lock);
}
}
@@ -603,7 +609,7 @@ static int vxlan_join_group(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
- struct sock *sk = vn->sock->sk;
+ struct sock *sk = vn->port.sock->sk;
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = vxlan->gaddr,
.imr_ifindex = vxlan->link,
@@ -631,7 +637,7 @@ static int vxlan_leave_group(struct net_device *dev)
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
int err = 0;
- struct sock *sk = vn->sock->sk;
+ struct sock *sk = vn->port.sock->sk;
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = vxlan->gaddr,
.imr_ifindex = vxlan->link,
@@ -654,12 +660,9 @@ static int vxlan_leave_group(struct net_device *dev)
/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
- struct iphdr *oip;
+ struct vxlan_net *vn = net_generic(dev_net(skb->dev), vxlan_net_id);
struct vxlanhdr *vxh;
- struct vxlan_dev *vxlan;
- struct pcpu_tstats *stats;
- __u32 vni;
- int err;
+ int i;
/* pop off outer UDP header */
__skb_pull(skb, sizeof(struct udphdr));
@@ -678,13 +681,43 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
}
__skb_pull(skb, sizeof(struct vxlanhdr));
+ rcu_read_lock();
+ for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+ struct vxlan_port *port = rcu_dereference(vn->vxlan_ports[i]);
+ int ret;
+
+ if (!port)
+ continue;
+ if (port->portno != udp_hdr(skb)->dest)
+ continue;
+ ret = port->vx_rcv(port, skb, vxh->vx_vni);
+ if (ret == PACKET_RCVD) {
+ rcu_read_unlock();
+ return 0;
+ }
+ }
+ rcu_read_unlock();
+error:
+ /* Put UDP header back */
+ __skb_push(skb, sizeof(struct udphdr));
+ return 1;
+}
+
+static int vxlan_rcv(struct vxlan_port *port, struct sk_buff *skb,
+ __be32 _vni)
+{
+ struct vxlan_dev *vxlan;
+ struct net *net = dev_net(skb->dev);
+ struct iphdr *oip;
+ struct pcpu_tstats *stats;
+ int err;
+ int vni;
/* Is this VNI defined? */
- vni = ntohl(vxh->vx_vni) >> 8;
- vxlan = vxlan_find_vni(sock_net(sk), vni);
+ vni = ntohl(_vni) >> 8;
+ vxlan = vxlan_find_vni(net, vni);
if (!vxlan) {
- netdev_dbg(skb->dev, "unknown vni %d\n", vni);
- goto drop;
+ return PACKET_REJECT;
}
if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -741,16 +774,11 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
netif_rx(skb);
- return 0;
-error:
- /* Put UDP header back */
- __skb_push(skb, sizeof(struct udphdr));
-
- return 1;
+ return PACKET_RCVD;
drop:
/* Consume bad packet */
kfree_skb(skb);
- return 0;
+ return PACKET_RCVD;
}
static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
@@ -869,10 +897,9 @@ static void vxlan_sock_free(struct sk_buff *skb)
}
/* On transmit, associate with the tunnel socket */
-static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
+static void vxlan_set_owner(const struct vxlan_port *port, struct sk_buff *skb)
{
- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
- struct sock *sk = vn->sock->sk;
+ struct sock *sk = port->sock->sk;
skb_orphan(skb);
sock_hold(sk);
@@ -915,6 +942,7 @@ static int handle_offloads(struct sk_buff *skb)
static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc)
{
+ struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
struct rtable *rt;
const struct iphdr *old_iph;
@@ -929,7 +957,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
__be16 df = 0;
__u8 tos, ttl;
- dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port;
+ dst_port = rdst->remote_port ? rdst->remote_port : vxlan_portno;
vni = rdst->remote_vni;
dst = rdst->remote_ip;
@@ -1032,7 +1060,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
nf_reset(skb);
- vxlan_set_owner(dev, skb);
+ vxlan_set_owner(&vn->port, skb);
if (handle_offloads(skb))
goto drop;
@@ -1077,7 +1105,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
f = vxlan_find_mac(vxlan, eth->h_dest);
if (f == NULL) {
did_rsc = false;
- group.remote_port = vxlan_port;
+ group.remote_port = vxlan_portno;
group.remote_vni = vxlan->vni;
group.remote_ip = vxlan->gaddr;
group.remote_ifindex = vxlan->link;
@@ -1526,37 +1554,35 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
.fill_info = vxlan_fill_info,
};
-static __net_init int vxlan_init_net(struct net *net)
+struct socket *vxlan_create_socket(struct net *net, __be16 portno)
{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct socket *sock;
struct sock *sk;
struct sockaddr_in vxlan_addr = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
};
int rc;
- unsigned h;
/* Create UDP socket for encapsulation receive. */
- rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock);
+ rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (rc < 0) {
pr_debug("UDP socket create failed\n");
- return rc;
+ return ERR_PTR(rc);
}
/* Put in proper namespace */
- sk = vn->sock->sk;
+ sk = sock->sk;
sk_change_net(sk, net);
- vxlan_addr.sin_port = htons(vxlan_port);
+ vxlan_addr.sin_port = portno;
- rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr,
+ rc = kernel_bind(sock, (struct sockaddr *) &vxlan_addr,
sizeof(vxlan_addr));
if (rc < 0) {
pr_debug("bind for UDP socket %pI4:%u (%d)\n",
&vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
sk_release_kernel(sk);
- vn->sock = NULL;
- return rc;
+ return ERR_PTR(rc);
}
/* Disable multicast loopback */
@@ -1567,28 +1593,104 @@ static __net_init int vxlan_init_net(struct net *net)
udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
udp_encap_enable();
- for (h = 0; h < VNI_HASH_SIZE; ++h)
- INIT_HLIST_HEAD(&vn->vni_list[h]);
+ return sock;
+}
- return 0;
+int vxlan_add_handler(struct net *net, struct vxlan_port *new)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct socket *sock = NULL;
+ int i, p = -1;
+ int err;
+
+ mutex_lock(&vxlan_mutex);
+ for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+ struct vxlan_port *port = vn->vxlan_ports[i];
+
+ if (!port) {
+ if (p < 0)
+ p = i;
+ continue;
+ }
+ if (port->portno == new->portno)
+ sock = port->sock;
+ }
+
+ if (p < 0) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (!sock) {
+ sock = vxlan_create_socket(net, new->portno);
+ if (IS_ERR(sock)) {
+ err = PTR_ERR(sock);
+ goto out;
+ }
+ }
+
+ new->sock = sock;
+ rcu_assign_pointer(vn->vxlan_ports[p], new);
+ err = 0;
+out:
+ mutex_unlock(&vxlan_mutex);
+ return err;
}
+EXPORT_SYMBOL_GPL(vxlan_add_handler);
-static __net_exit void vxlan_exit_net(struct net *net)
+void vxlan_del_handler(struct net *net, const struct vxlan_port *del)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ bool inuse = false;
+ int i;
+
+ mutex_lock(&vxlan_mutex);
+
+ /* check if sock is still used. */
+ for (i = 0; i < MAX_VXLAN_PORTS; i++) {
+ struct vxlan_port *port = vn->vxlan_ports[i];
+
+ if (!port)
+ continue;
+
+ if (port == del) {
+ RCU_INIT_POINTER(vn->vxlan_ports[i], NULL);
+ synchronize_net();
+ continue;
+ }
+ if (port->portno == del->portno)
+ inuse = true;
+ }
+
+ if (!inuse)
+ sk_release_kernel(del->sock->sk);
+ mutex_unlock(&vxlan_mutex);
+}
+EXPORT_SYMBOL_GPL(vxlan_del_handler);
+
+static __net_init int vxlan_init_net(struct net *net)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- struct vxlan_dev *vxlan;
unsigned h;
+ int err;
+
+ vn->port.portno = htons(vxlan_portno);
+ vn->port.vx_rcv = vxlan_rcv;
+
+ err = vxlan_add_handler(net, &vn->port);
+ if (err)
+ return err;
- rtnl_lock();
for (h = 0; h < VNI_HASH_SIZE; ++h)
- hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist)
- dev_close(vxlan->dev);
- rtnl_unlock();
+ INIT_HLIST_HEAD(&vn->vni_list[h]);
- if (vn->sock) {
- sk_release_kernel(vn->sock->sk);
- vn->sock = NULL;
- }
+ return 0;
+}
+
+static __net_exit void vxlan_exit_net(struct net *net)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ vxlan_del_handler(net, &vn->port);
}
static struct pernet_operations vxlan_net_ops = {
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
new file mode 100644
index 0000000..dae9619
--- /dev/null
+++ b/include/net/vxlan.h
@@ -0,0 +1,17 @@
+#ifndef __NET_IP_VXLAN_H
+#define __NET_IP_VXLAN_H 1
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+
+struct vxlan_port {
+ int (*vx_rcv)(struct vxlan_port *port, struct sk_buff *skb, __be32 key);
+ void *user_data;
+ struct socket *sock;
+ __be16 portno;
+};
+
+int vxlan_add_handler(struct net *net, struct vxlan_port *);
+void vxlan_del_handler(struct net *net, const struct vxlan_port *port);
+#endif
--
1.7.1
More information about the dev
mailing list