[ovs-dev] [PATCH v4] datapath: Add support for VXLAN tunnels to Open vSwitch

Kyle Mestery (kmestery) kmestery at cisco.com
Wed Nov 28 15:59:25 UTC 2012


On Nov 27, 2012, at 9:24 PM, Chris Wright <chrisw at sous-sol.org> wrote:

> * Kyle Mestery (kmestery at cisco.com) wrote:
>> @@ -23,6 +23,7 @@
>> /kmemdup.c
>> /loop_counter.c
>> /modules.order
>> +/net_namespace.c
>> /netdevice.c
>> /net_namespace.c
>> /random32.c
> 
> spurious change?
> 
>> @@ -39,5 +40,6 @@
>> /vport-internal_dev.c
>> /vport-netdev.c
>> /vport-patch.c
>> +/vport-vxlan.c
>> /vport.c
>> /workqueue.c
>> diff --git a/datapath/tunnel.c b/datapath/tunnel.c
>> index fb4854a..05a73df 100644
>> --- a/datapath/tunnel.c
>> +++ b/datapath/tunnel.c
>> @@ -1042,6 +1042,7 @@ static const struct nla_policy tnl_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
>> 	[OVS_TUNNEL_ATTR_IN_KEY]   = { .type = NLA_U64 },
>> 	[OVS_TUNNEL_ATTR_TOS]      = { .type = NLA_U8 },
>> 	[OVS_TUNNEL_ATTR_TTL]      = { .type = NLA_U8 },
>> +	[OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
>> };
>> 
>> /* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be
>> @@ -1087,6 +1088,9 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
>> 	if (a[OVS_TUNNEL_ATTR_TTL])
>> 		mutable->ttl = nla_get_u8(a[OVS_TUNNEL_ATTR_TTL]);
>> 
>> +	if (a[OVS_TUNNEL_ATTR_DST_PORT])
>> +		mutable->dst_port = nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]);
>> +
>> 	if (!a[OVS_TUNNEL_ATTR_IN_KEY]) {
>> 		mutable->key.tunnel_type |= TNL_T_KEY_MATCH;
>> 		mutable->flags |= TNL_F_IN_KEY_MATCH;
>> @@ -1242,6 +1246,9 @@ int ovs_tnl_get_options(const struct vport *vport, struct sk_buff *skb)
>> 		goto nla_put_failure;
>> 	if (mutable->ttl && nla_put_u8(skb, OVS_TUNNEL_ATTR_TTL, mutable->ttl))
>> 		goto nla_put_failure;
>> +	if (mutable->dst_port && nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
>> +					     mutable->dst_port))
>> +		goto nla_put_failure;
>> 
>> 	return 0;
>> 
>> diff --git a/datapath/tunnel.h b/datapath/tunnel.h
>> index c268057..c0b50e7 100644
>> --- a/datapath/tunnel.h
>> +++ b/datapath/tunnel.h
>> @@ -42,6 +42,7 @@
>> #define TNL_T_PROTO_GRE		0
>> #define TNL_T_PROTO_GRE64	1
>> #define TNL_T_PROTO_CAPWAP	2
>> +#define TNL_T_PROTO_VXLAN	3
>> 
>> /* These flags are only needed when calling tnl_find_port(). */
>> #define TNL_T_KEY_EXACT		(1 << 10)
>> @@ -116,6 +117,7 @@ struct tnl_mutable_config {
>> 	u32	flags;
>> 	u8	tos;
>> 	u8	ttl;
>> +	u16	dst_port;
>> 
>> 	/* Multicast configuration. */
>> 	int	mlink;
>> diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
>> new file mode 100644
>> index 0000000..88e03d5
>> --- /dev/null
>> +++ b/datapath/vport-vxlan.c
>> @@ -0,0 +1,459 @@
>> + /*
>> + * Copyright (c) 2011 Nicira, Inc.
>> + * Copyright (c) 2012 Cisco Systems, Inc.
>> + * Distributed under the terms of the GNU GPL version 2.
>> + *
>> + * Significant portions of this file may be copied from parts of the Linux
>> + * kernel, by Linus Torvalds and others.
> 
> Only this and capwap have this unusual header bit.
> 
>> + */
>> +
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +
>> +#include <linux/version.h>
>> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
>> +
>> +#include <linux/in.h>
>> +#include <linux/ip.h>
>> +#include <linux/jhash.h>
>> +#include <linux/list.h>
>> +#include <linux/net.h>
>> +#include <linux/udp.h>
>> +
>> +#include <net/icmp.h>
>> +#include <net/ip.h>
>> +#include <net/udp.h>
>> +
>> +#include "datapath.h"
>> +#include "tunnel.h"
>> +#include "vport.h"
>> +#include "vport-generic.h"
>> +
>> +/* Default to the OTV port, per the VXLAN IETF draft. */
>> +#define VXLAN_DST_PORT 8472
>> +
>> +#define VXLAN_FLAGS 0x08000000  /* struct vxlanhdr.vx_flags required value. */
>> +
>> +/**
>> + * struct vxlanhdr - VXLAN header
>> + * @vx_flags: Must have the exact value %VXLAN_FLAGS.
>> + * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
>> + */
>> +struct vxlanhdr {
>> +	__be32 vx_flags;
>> +	__be32 vx_vni;
>> +};
>> +
>> +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
>> +
>> +static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
>> +				const struct ovs_key_ipv4_tunnel *tun_key)
>> +{
>> +	return VXLAN_HLEN;
>> +}
>> +
>> +/**
>> + * struct vxlan_port - Keeps track of open UDP ports
>> + * @port: The UDP port number.
>> + * @socket: The socket created for this port number.
>> + * @count: How many ports are using this socket/port.
>> + * @hash_node: Hash node.
>> + */
>> +struct vxlan_port {
>> +	u16 port;
>> +	struct socket *vxlan_rcv_socket;
>> +	int count;
>> +
>> +	/* Protected by RTNL lock. */
>> +	struct hlist_node hash_node;
>> +};
>> +
>> +/* Protected by RTNL lock. */
>> +static struct hlist_head *vxlan_ports;
>> +#define VXLAN_SOCK_HASH_BUCKETS 64
>> +
>> +/**
>> + * struct vxlan_if - Maps port names to UDP port numbers
>> + * @port: The UDP port number this interface is using.
>> + * @ifname: The name of the interface.
>> + * @hash_node: Hash node.
>> + */
>> +struct vxlan_if {
>> +	u16 port;
>> +	char ifname[IFNAMSIZ];
>> +
>> +	/* Protected by RTNL lock. */
>> +	struct hlist_node hash_node;
>> +};
>> +
>> +/* Protected by RTNL lock. */
>> +static struct hlist_head *vxlan_ifs;
>> +#define VXLAN_IF_HASH_BUCKETS 64
>> +
>> +static struct hlist_head *vxlan_hash_bucket(struct net *net, u16 port)
>> +{
>> +	unsigned int hash = jhash(&port, sizeof(port), (unsigned long) net);
>> +	return &vxlan_ports[hash & (VXLAN_SOCK_HASH_BUCKETS - 1)];
>> +}
>> +
>> +static struct vxlan_port *vxlan_port_exists(struct net *net, u16 port)
>> +{
>> +	struct hlist_head *bucket = vxlan_hash_bucket(net, port);
>> +	struct vxlan_port *vxlan_port;
>> +	struct hlist_node *node;
>> +
>> +	hlist_for_each_entry(vxlan_port, node, bucket, hash_node) {
>> +		if (vxlan_port->port == port)
>> +			return vxlan_port;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +static struct hlist_head *vxlanif_hash_bucket(struct net *net, const char *name)
>> +{
>> +	unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
>> +	return &vxlan_ifs[hash & (VXLAN_IF_HASH_BUCKETS - 1)];
>> +}
>> +
>> +static struct vxlan_if *vxlan_if_by_name(struct net *net, const char *name)
>> +{
>> +	struct hlist_head *bucket = vxlanif_hash_bucket(net, name);
>> +	struct vxlan_if *vxlan_if;
>> +	struct hlist_node *node;
>> +
>> +	hlist_for_each_entry(vxlan_if, node, bucket, hash_node) {
>> +		if (!strcmp(vxlan_if->ifname, name))
>> +			return vxlan_if;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
>> +{
>> +	return (struct vxlanhdr *)(udp_hdr(skb) + 1);
>> +}
>> +
>> +/* The below used as the min/max for the UDP port range */
>> +#define VXLAN_SRC_PORT_MIN      32768
>> +#define VXLAN_SRC_PORT_MAX      61000
> 
> This is consistent with Linux defaults, but inconsistent with comments
> added below in vswitch.xml.  Any reason not to use inet_get_local_port_range()?
> 
>> +/* Compute source port for outgoing packet.
>> + * Currently we use the flow hash.
>> + */
>> +static u16 get_src_port(struct sk_buff *skb)
>> +{
>> +	unsigned int range = (VXLAN_SRC_PORT_MAX - VXLAN_SRC_PORT_MIN) + 1;
>> +	u32 hash = OVS_CB(skb)->flow->hash;
>> +
>> +	return (__force u16)(((u64) hash * range) >> 32) + VXLAN_SRC_PORT_MIN;
>> +}
>> +
>> +static struct sk_buff *vxlan_build_header(const struct vport *vport,
>> +					  const struct tnl_mutable_config *mutable,
>> +					  struct dst_entry *dst,
>> +					  struct sk_buff *skb,
>> +					  int tunnel_hlen)
>> +{
>> +	struct udphdr *udph = udp_hdr(skb);
>> +	struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
>> +	const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
>> +	__be64 out_key;
>> +
>> +	if (tun_key->ipv4_dst)
>> +		out_key = tun_key->tun_id;
>> +	else
>> +		out_key = mutable->out_key;
>> +
>> +	if (mutable->dst_port)
>> +		udph->dest = htons(mutable->dst_port);
>> +	else
>> +		udph->dest = htons(VXLAN_DST_PORT);
>> +	udph->source = htons(get_src_port(skb));
>> +	udph->check = 0;
>> +	udph->len = htons(skb->len - skb_transport_offset(skb));
>> +
>> +	vxh->vx_flags = htonl(VXLAN_FLAGS);
>> +	vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8);
>> +
>> +	/*
>> +	 * Allow our local IP stack to fragment the outer packet even if the
>> +	 * DF bit is set as a last resort.  We also need to force selection of
>> +	 * an IP ID here because Linux will otherwise leave it at 0 if the
>> +	 * packet originally had DF set.
>> +	 */
>> +	skb->local_df = 1;
>> +	__ip_select_ident(ip_hdr(skb), dst, 0);
>> +
>> +	return skb;
>> +}
>> +
>> +/* Called with rcu_read_lock and BH disabled. */
>> +static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
>> +{
>> +	struct vport *vport;
>> +	struct vxlanhdr *vxh;
>> +	const struct tnl_mutable_config *mutable;
>> +	struct iphdr *iph;
>> +	struct ovs_key_ipv4_tunnel tun_key;
>> +	int tunnel_type;
>> +	__be64 key;
>> +	u32 tunnel_flags = 0;
>> +
>> +	if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
>> +		goto error;
>> +
>> +	vxh = vxlan_hdr(skb);
>> +	if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
>> +		     vxh->vx_vni & htonl(0xff)))
>> +		goto error;
>> +
>> +	__skb_pull(skb, VXLAN_HLEN);
>> +	skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
>> +
>> +	key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
>> +
>> +	tunnel_type = TNL_T_PROTO_VXLAN;
>> +
>> +	iph = ip_hdr(skb);
>> +	vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
>> +		key, tunnel_type, &mutable);
>> +	if (unlikely(!vport)) {
>> +		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
>> +		goto error;
>> +	}
>> +
>> +	if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
>> +		tunnel_flags = OVS_TNL_F_KEY;
>> +	else
>> +		key = 0;
>> +
>> +	/* Save outer tunnel values */
>> +	tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
>> +	OVS_CB(skb)->tun_key = &tun_key;
>> +
>> +	ovs_tnl_rcv(vport, skb);
>> +	goto out;
>> +
>> +error:
>> +	kfree_skb(skb);
>> +out:
>> +	return 0;
>> +}
>> +
>> +/* Random value.  Irrelevant as long as it's not 0 since we set the handler. */
>> +#define UDP_ENCAP_VXLAN 10
> 
> Linux upstream is using 1 (like you said, !0 is the only functional
> importance, but consistency is developer friendly ;)
> 
>> +static int vxlan_socket_init(struct vxlan_port *vxlan_port)
>> +{
>> +	int err;
>> +	struct sockaddr_in sin;
>> +
>> +	err = sock_create(AF_INET, SOCK_DGRAM, 0, &vxlan_port->vxlan_rcv_socket);
>> +	if (err)
>> +		goto error;
>> +
>> +	sin.sin_family = AF_INET;
>> +	sin.sin_addr.s_addr = htonl(INADDR_ANY);
>> +	sin.sin_port = htons(vxlan_port->port);
>> +
>> +	err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
>> +			  sizeof(struct sockaddr_in));
>> +	if (err)
>> +		goto error_sock;
>> +
>> +	udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
>> +	udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
>> +
>> +	udp_encap_enable();
> 
> This is a new function, did you provide a fallback/noop (since it looks
> like it's meant to build back to older >=2.6.26 kernels).
> 
>> +
>> +	return 0;
>> +
>> +error_sock:
>> +	sock_release(vxlan_port->vxlan_rcv_socket);
>> +error:
>> +	pr_warn("cannot register vxlan protocol handler\n");
>> +	return err;
>> +}
>> +
>> +static const struct nla_policy vxlan_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
>> +	[OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
>> +};
>> +
>> +static int vxlan_tunnel_setup(struct net *net, const char *linkname,
>> +			     struct nlattr *options)
>> +{
>> +	struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
>> +	int err;
>> +	u16 dst_port;
>> +	struct vxlan_port *vxlan_port;
>> +	struct vxlan_if *vxlan_if;
>> +
>> +	if (!options) {
>> +		err = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	err = nla_parse_nested(a, OVS_TUNNEL_ATTR_MAX, options, vxlan_policy);
> 
> This is already parsed in tnl_set_config() later.  So I'm not sure why
> it's done twice during ->set_options() and ->create().
> 
> 
>> +	if (err)
>> +		goto out;
>> +
>> +	if (a[OVS_TUNNEL_ATTR_DST_PORT])
>> +		dst_port = nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]);
>> +	else
>> +		dst_port = VXLAN_DST_PORT;
>> +
>> +	/* Verify if we already have a socket created for this port */
>> +	vxlan_port = vxlan_port_exists(net, dst_port);
>> +	if (vxlan_port) {
>> +		vxlan_port->count++;
>> +		err = 0;
>> +		goto out;
>> +	}
>> +
>> +	/* Add a new socket for this port */
>> +	vxlan_port = kmalloc(sizeof(struct vxlan_port), GFP_KERNEL);
>> +	if (!vxlan_port) {
>> +		err = -ENOMEM;
>> +		goto out;
>> +	}
>> +	memset (vxlan_port, 0, sizeof(struct vxlan_port));
> 
> kzalloc()
> 
>> +
>> +	vxlan_port->port = dst_port;
>> +	vxlan_port->count++;
>> +	hlist_add_head(&vxlan_port->hash_node,
>> +		       vxlan_hash_bucket(net, dst_port));
> 
> A little unusual to have a hashtable for this.  Is this expected to be
> temporary until IANA port is allocated?
> 
The idea is to allow destination port configuration on a per-tunnel basis,
so I needed to track this. As Jesse said, maybe a hash table was overkill
here, but that's a design point I think (e.g. I could have done a linked
list instead).

>> +
>> +	err = vxlan_socket_init(vxlan_port);
>> +	if (err)
>> +		goto error_vxlan_if;
>> +
>> +	vxlan_if = kmalloc(sizeof(struct vxlan_if), GFP_KERNEL);
>> +	if (!vxlan_if) {
>> +		err = -ENOMEM;
>> +		goto error_vxlan_if;
>> +	}
>> +	memset(vxlan_if, 0, sizeof(*vxlan_if));
> 
> kzalloc()
> 
>> +	vxlan_if->port = dst_port;
>> +	memcpy(vxlan_if->ifname, linkname, IFNAMSIZ);
>> +	hlist_add_head(&vxlan_if->hash_node,
>> +		       vxlanif_hash_bucket(net, linkname));
>> +
>> +out:
>> +	return err;
>> +error_vxlan_if:
>> +	hlist_del(&vxlan_port->hash_node);
>> +	kfree(vxlan_port);
>> +	goto out;
>> +}
>> +
>> +static int vxlan_set_options(struct vport *vport, struct nlattr *options)
>> +{
>> +	int err;
>> +	const char *vname = vport->ops->get_name(vport);
>> +
>> +	err = vxlan_tunnel_setup(ovs_dp_get_net(vport->dp), vname, options);
>> +	if (err)
>> +		goto out;
>> +
>> +	err = ovs_tnl_set_options(vport, options);
>> +
>> +out:
>> +	return err;
>> +}
>> +
>> +static const struct tnl_ops ovs_vxlan_tnl_ops = {
>> +	.tunnel_type	= TNL_T_PROTO_VXLAN,
>> +	.ipproto	= IPPROTO_UDP,
>> +	.hdr_len	= vxlan_hdr_len,
>> +	.build_header	= vxlan_build_header,
>> +};
>> +
>> +void vxlan_tnl_destroy(struct vport *vport)
>> +{
>> +	struct vxlan_if *vxlan_if;
>> +	struct vxlan_port *vxlan_port;
>> +	const char *vname = vport->ops->get_name(vport);
>> +
>> +	vxlan_if = vxlan_if_by_name(ovs_dp_get_net(vport->dp), vname);
>> +	if (!vxlan_if)
>> +		goto out;
>> +
>> +	vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp),
>> +					 vxlan_if->port);
>> +	if (!vxlan_port)
>> +		goto out_if;
> 
> Are the above two actually valid failure cases on destroy?
> 
>> +	if (!--vxlan_port->count) {
>> +		sock_release(vxlan_port->vxlan_rcv_socket);
>> +		hlist_del(&vxlan_port->hash_node);
>> +		kfree(vxlan_port);
>> +	}
>> +
>> +out_if:
>> +	hlist_del(&vxlan_if->hash_node);
>> +	kfree(vxlan_if);
>> +out:
>> +	ovs_tnl_destroy(vport);
>> +}
>> +
>> +static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
>> +{
>> +	int err;
>> +
>> +	err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->name,
>> +						parms->options);
>> +	return ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
>> +}
>> +
>> +static int vxlan_init(void)
>> +{
>> +	int err;
>> +
>> +	vxlan_ifs = kzalloc(VXLAN_IF_HASH_BUCKETS * sizeof(struct hlist_head),
>> +			    GFP_KERNEL);
>> +	if (!vxlan_ifs) {
>> +		err = -ENOMEM;
>> +		goto out;
>> +	}
>> +
>> +	vxlan_ports = kzalloc(VXLAN_SOCK_HASH_BUCKETS * sizeof(struct hlist_head),
>> +				GFP_KERNEL);
> 
> Suppose these hash tables could just be statically allocated...
> 
>> +	if (!vxlan_ports) {
>> +		err = -ENOMEM;
>> +		goto free_ifs;
>> +	}
>> +
>> +out:
>> +	return 0;
>> +free_ifs:
>> +	kfree(vxlan_ifs);
>> +	goto out;
>> +}
>> +
>> +static void vxlan_exit(void)
>> +{
>> +	kfree(vxlan_ports);
>> +	kfree(vxlan_ifs);
> 
> ...which would obviate the need for ->exit()
> 
>> +}
>> +
>> +const struct vport_ops ovs_vxlan_vport_ops = {
>> +	.type		= OVS_VPORT_TYPE_VXLAN,
>> +	.flags		= VPORT_F_TUN_ID,
>> +	.init		= vxlan_init,
>> +	.exit		= vxlan_exit,
>> +	.create		= vxlan_tnl_create,
>> +	.destroy	= vxlan_tnl_destroy,
>> +	.set_addr	= ovs_tnl_set_addr,
>> +	.get_name	= ovs_tnl_get_name,
>> +	.get_addr	= ovs_tnl_get_addr,
>> +	.get_options	= ovs_tnl_get_options,
>> +	.set_options	= vxlan_set_options,
>> +	.get_dev_flags	= ovs_vport_gen_get_dev_flags,
>> +	.is_running	= ovs_vport_gen_is_running,
>> +	.get_operstate	= ovs_vport_gen_get_operstate,
>> +	.send		= ovs_tnl_send,
>> +};
>> +#else
>> +#warning VXLAN tunneling will not be available on kernels before 2.6.26
>> +#endif /* Linux kernel < 2.6.26 */
>> diff --git a/datapath/vport.c b/datapath/vport.c
>> index 4934ac1..a1c7542 100644
>> --- a/datapath/vport.c
>> +++ b/datapath/vport.c
>> @@ -45,6 +45,7 @@ static const struct vport_ops *base_vport_ops_list[] = {
>> 	&ovs_gre64_vport_ops,
>> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
>> 	&ovs_capwap_vport_ops,
>> +	&ovs_vxlan_vport_ops,
>> #endif
>> };
>> 
>> diff --git a/datapath/vport.h b/datapath/vport.h
>> index 5a7caf5..5080629 100644
>> --- a/datapath/vport.h
>> +++ b/datapath/vport.h
>> @@ -257,5 +257,6 @@ extern const struct vport_ops ovs_gre_vport_ops;
>> extern const struct vport_ops ovs_gre_ft_vport_ops;
>> extern const struct vport_ops ovs_gre64_vport_ops;
>> extern const struct vport_ops ovs_capwap_vport_ops;
>> +extern const struct vport_ops ovs_vxlan_vport_ops;
>> 
>> #endif /* vport.h */
>> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
>> index e7d4b49..2ae5681 100644
>> --- a/include/linux/openvswitch.h
>> +++ b/include/linux/openvswitch.h
>> @@ -186,6 +186,7 @@ enum ovs_vport_type {
>> 	OVS_VPORT_TYPE_PATCH = 100, /* virtual tunnel connecting two vports */
>> 	OVS_VPORT_TYPE_GRE,      /* GRE tunnel */
>> 	OVS_VPORT_TYPE_CAPWAP,   /* CAPWAP tunnel */
>> +	OVS_VPORT_TYPE_VXLAN,    /* VXLAN tunnel */
>> 	OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */
>> 	__OVS_VPORT_TYPE_MAX
>> };
>> diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
>> index 88eba19..11b761d 100644
>> --- a/include/openflow/nicira-ext.h
>> +++ b/include/openflow/nicira-ext.h
>> @@ -1578,9 +1578,11 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
>> 
>> /* Tunnel ID.
>>  *
>> - * For a packet received via GRE tunnel including a (32-bit) key, the key is
>> - * stored in the low 32-bits and the high bits are zeroed.  For other packets,
>> - * the value is 0.
>> + * For a packet received via a GRE or VXLAN tunnel including a (32-bit) key, the
>> + * key is stored in the low 32-bits and the high bits are zeroed.  For other
>> + * packets, the value is 0.
> 
> Confine to 80 columns since you're touching it already?
> 
>> + *
>> + * All zero bits, for packets not received via a keyed tunnel.
>>  *
>>  * Prereqs: None.
>>  *
>> diff --git a/include/openvswitch/tunnel.h b/include/openvswitch/tunnel.h
>> index 42c3621..23d8ba7 100644
>> --- a/include/openvswitch/tunnel.h
>> +++ b/include/openvswitch/tunnel.h
>> @@ -57,6 +57,7 @@ enum {
>> 	OVS_TUNNEL_ATTR_IN_KEY,   /* __be64 key to match on input. */
>> 	OVS_TUNNEL_ATTR_TOS,      /* 8-bit TOS value. */
>> 	OVS_TUNNEL_ATTR_TTL,      /* 8-bit TTL value. */
>> +	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by VXLAN. */
>> 	__OVS_TUNNEL_ATTR_MAX
>> };
>> 
>> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
>> index 5171171..3dbb798 100644
>> --- a/lib/netdev-vport.c
>> +++ b/lib/netdev-vport.c
>> @@ -173,6 +173,13 @@ netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
>>     case OVS_VPORT_TYPE_CAPWAP:
>>         return "capwap";
>> 
>> +    case OVS_VPORT_TYPE_VXLAN:
>> +        if (tnl_port_config_from_nlattr(vport->options, vport->options_len,
>> +                                        a)) {
>> +            break;
> 
> Should tnl_port_config_from_nlattr() grow an optional OVS_TUNNEL_ATTR_DST_PORT
> check?  And similarly, I seem to be missing the vport_class added to
> netdev_vport_register().
> 
>> --- a/vswitchd/vswitch.xml
>> +++ b/vswitchd/vswitch.xml
>> @@ -1247,6 +1246,23 @@
>>             February 2013.
>>           </dd>
>> 
>> +          <dt><code>vxlan</code></dt>
>> +          <dd>
>> +	    <p>
>> +	      An Ethernet tunnel over the experimental, UDP-based VXLAN
>> +	      protocol described at
>> +	      <code>http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-02</code>.
>> +	      VXLAN is currently supported only with the Linux kernel datapath
>> +	      with kernel version 2.6.26 or later.
>> +	    </p>
>> +	    <p>
>> +	      As an experimental protocol, VXLAN has no officially assigned UDP
>> +	      port.  Open vSwitch currently uses UDP destination port 8472.
>> +	      The source port used for VXLAN traffic varies on a per-flow basis
>> +	      between 32768 and 65535 to allow load balancing.
> 
> Inconsistent with code (and should sysctl's as base for local port
> range).
> 
>> @@ -1427,11 +1447,19 @@
>>         </column>
>>       </group>
>> 
>> -      <group title="Tunnel Options: ipsec_gre only">
>> +      <group title="Tunnel Options: ipsec_gre and ipsec_vxlan only">
> 
> Ooops, not completely removed...
> 
>>         <p>
>> -          Only <code>ipsec_gre</code> interfaces support these options.
>> +          Only <code>ipsec_gre</code> and <code>ipsec_vxlan</code> interfaces
>> +          support these options.
>>         </p>




More information about the dev mailing list