[ovs-dev] [PATCH v4] datapath: Add support for VXLAN tunnels to Open vSwitch

Chris Wright chrisw at sous-sol.org
Wed Nov 28 03:24:44 UTC 2012


* Kyle Mestery (kmestery at cisco.com) wrote:
> @@ -23,6 +23,7 @@
>  /kmemdup.c
>  /loop_counter.c
>  /modules.order
> +/net_namespace.c
>  /netdevice.c
>  /net_namespace.c
>  /random32.c

spurious change?

> @@ -39,5 +40,6 @@
>  /vport-internal_dev.c
>  /vport-netdev.c
>  /vport-patch.c
> +/vport-vxlan.c
>  /vport.c
>  /workqueue.c
> diff --git a/datapath/tunnel.c b/datapath/tunnel.c
> index fb4854a..05a73df 100644
> --- a/datapath/tunnel.c
> +++ b/datapath/tunnel.c
> @@ -1042,6 +1042,7 @@ static const struct nla_policy tnl_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
>  	[OVS_TUNNEL_ATTR_IN_KEY]   = { .type = NLA_U64 },
>  	[OVS_TUNNEL_ATTR_TOS]      = { .type = NLA_U8 },
>  	[OVS_TUNNEL_ATTR_TTL]      = { .type = NLA_U8 },
> +	[OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
>  };
>  
>  /* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be
> @@ -1087,6 +1088,9 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
>  	if (a[OVS_TUNNEL_ATTR_TTL])
>  		mutable->ttl = nla_get_u8(a[OVS_TUNNEL_ATTR_TTL]);
>  
> +	if (a[OVS_TUNNEL_ATTR_DST_PORT])
> +		mutable->dst_port = nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]);
> +
>  	if (!a[OVS_TUNNEL_ATTR_IN_KEY]) {
>  		mutable->key.tunnel_type |= TNL_T_KEY_MATCH;
>  		mutable->flags |= TNL_F_IN_KEY_MATCH;
> @@ -1242,6 +1246,9 @@ int ovs_tnl_get_options(const struct vport *vport, struct sk_buff *skb)
>  		goto nla_put_failure;
>  	if (mutable->ttl && nla_put_u8(skb, OVS_TUNNEL_ATTR_TTL, mutable->ttl))
>  		goto nla_put_failure;
> +	if (mutable->dst_port && nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
> +					     mutable->dst_port))
> +		goto nla_put_failure;
>  
>  	return 0;
>  
> diff --git a/datapath/tunnel.h b/datapath/tunnel.h
> index c268057..c0b50e7 100644
> --- a/datapath/tunnel.h
> +++ b/datapath/tunnel.h
> @@ -42,6 +42,7 @@
>  #define TNL_T_PROTO_GRE		0
>  #define TNL_T_PROTO_GRE64	1
>  #define TNL_T_PROTO_CAPWAP	2
> +#define TNL_T_PROTO_VXLAN	3
>  
>  /* These flags are only needed when calling tnl_find_port(). */
>  #define TNL_T_KEY_EXACT		(1 << 10)
> @@ -116,6 +117,7 @@ struct tnl_mutable_config {
>  	u32	flags;
>  	u8	tos;
>  	u8	ttl;
> +	u16	dst_port;
>  
>  	/* Multicast configuration. */
>  	int	mlink;
> diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
> new file mode 100644
> index 0000000..88e03d5
> --- /dev/null
> +++ b/datapath/vport-vxlan.c
> @@ -0,0 +1,459 @@
> + /*
> + * Copyright (c) 2011 Nicira, Inc.
> + * Copyright (c) 2012 Cisco Systems, Inc.
> + * Distributed under the terms of the GNU GPL version 2.
> + *
> + * Significant portions of this file may be copied from parts of the Linux
> + * kernel, by Linus Torvalds and others.

Only this and capwap have this unusual header bit.

> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/version.h>
> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
> +
> +#include <linux/in.h>
> +#include <linux/ip.h>
> +#include <linux/jhash.h>
> +#include <linux/list.h>
> +#include <linux/net.h>
> +#include <linux/udp.h>
> +
> +#include <net/icmp.h>
> +#include <net/ip.h>
> +#include <net/udp.h>
> +
> +#include "datapath.h"
> +#include "tunnel.h"
> +#include "vport.h"
> +#include "vport-generic.h"
> +
> +/* Default to the OTV port, per the VXLAN IETF draft. */
> +#define VXLAN_DST_PORT 8472
> +
> +#define VXLAN_FLAGS 0x08000000  /* struct vxlanhdr.vx_flags required value. */
> +
> +/**
> + * struct vxlanhdr - VXLAN header
> + * @vx_flags: Must have the exact value %VXLAN_FLAGS.
> + * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
> + */
> +struct vxlanhdr {
> +	__be32 vx_flags;
> +	__be32 vx_vni;
> +};
> +
> +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
> +
> +static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
> +				const struct ovs_key_ipv4_tunnel *tun_key)
> +{
> +	return VXLAN_HLEN;
> +}
> +
> +/**
> + * struct vxlan_port - Keeps track of open UDP ports
> + * @port: The UDP port number.
> + * @socket: The socket created for this port number.
> + * @count: How many ports are using this socket/port.
> + * @hash_node: Hash node.
> + */
> +struct vxlan_port {
> +	u16 port;
> +	struct socket *vxlan_rcv_socket;
> +	int count;
> +
> +	/* Protected by RTNL lock. */
> +	struct hlist_node hash_node;
> +};
> +
> +/* Protected by RTNL lock. */
> +static struct hlist_head *vxlan_ports;
> +#define VXLAN_SOCK_HASH_BUCKETS 64
> +
> +/**
> + * struct vxlan_if - Maps port names to UDP port numbers
> + * @port: The UDP port number this interface is using.
> + * @ifname: The name of the interface.
> + * @hash_node: Hash node.
> + */
> +struct vxlan_if {
> +	u16 port;
> +	char ifname[IFNAMSIZ];
> +
> +	/* Protected by RTNL lock. */
> +	struct hlist_node hash_node;
> +};
> +
> +/* Protected by RTNL lock. */
> +static struct hlist_head *vxlan_ifs;
> +#define VXLAN_IF_HASH_BUCKETS 64
> +
> +static struct hlist_head *vxlan_hash_bucket(struct net *net, u16 port)
> +{
> +	unsigned int hash = jhash(&port, sizeof(port), (unsigned long) net);
> +	return &vxlan_ports[hash & (VXLAN_SOCK_HASH_BUCKETS - 1)];
> +}
> +
> +static struct vxlan_port *vxlan_port_exists(struct net *net, u16 port)
> +{
> +	struct hlist_head *bucket = vxlan_hash_bucket(net, port);
> +	struct vxlan_port *vxlan_port;
> +	struct hlist_node *node;
> +
> +	hlist_for_each_entry(vxlan_port, node, bucket, hash_node) {
> +		if (vxlan_port->port == port)
> +			return vxlan_port;
> +	}
> +
> +	return NULL;
> +}
> +
> +static struct hlist_head *vxlanif_hash_bucket(struct net *net, const char *name)
> +{
> +	unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
> +	return &vxlan_ifs[hash & (VXLAN_IF_HASH_BUCKETS - 1)];
> +}
> +
> +static struct vxlan_if *vxlan_if_by_name(struct net *net, const char *name)
> +{
> +	struct hlist_head *bucket = vxlanif_hash_bucket(net, name);
> +	struct vxlan_if *vxlan_if;
> +	struct hlist_node *node;
> +
> +	hlist_for_each_entry(vxlan_if, node, bucket, hash_node) {
> +		if (!strcmp(vxlan_if->ifname, name))
> +			return vxlan_if;
> +	}
> +
> +	return NULL;
> +}
> +
> +static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
> +{
> +	return (struct vxlanhdr *)(udp_hdr(skb) + 1);
> +}
> +
> +/* The below used as the min/max for the UDP port range */
> +#define VXLAN_SRC_PORT_MIN      32768
> +#define VXLAN_SRC_PORT_MAX      61000

This is consistent with Linux defaults, but inconsistent with comments
added below in vswitch.xml.  Any reason not to use inet_get_local_port_range()?

> +/* Compute source port for outgoing packet.
> + * Currently we use the flow hash.
> + */
> +static u16 get_src_port(struct sk_buff *skb)
> +{
> +	unsigned int range = (VXLAN_SRC_PORT_MAX - VXLAN_SRC_PORT_MIN) + 1;
> +	u32 hash = OVS_CB(skb)->flow->hash;
> +
> +	return (__force u16)(((u64) hash * range) >> 32) + VXLAN_SRC_PORT_MIN;
> +}
> +
> +static struct sk_buff *vxlan_build_header(const struct vport *vport,
> +					  const struct tnl_mutable_config *mutable,
> +					  struct dst_entry *dst,
> +					  struct sk_buff *skb,
> +					  int tunnel_hlen)
> +{
> +	struct udphdr *udph = udp_hdr(skb);
> +	struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
> +	const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
> +	__be64 out_key;
> +
> +	if (tun_key->ipv4_dst)
> +		out_key = tun_key->tun_id;
> +	else
> +		out_key = mutable->out_key;
> +
> +	if (mutable->dst_port)
> +		udph->dest = htons(mutable->dst_port);
> +	else
> +		udph->dest = htons(VXLAN_DST_PORT);
> +	udph->source = htons(get_src_port(skb));
> +	udph->check = 0;
> +	udph->len = htons(skb->len - skb_transport_offset(skb));
> +
> +	vxh->vx_flags = htonl(VXLAN_FLAGS);
> +	vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8);
> +
> +	/*
> +	 * Allow our local IP stack to fragment the outer packet even if the
> +	 * DF bit is set as a last resort.  We also need to force selection of
> +	 * an IP ID here because Linux will otherwise leave it at 0 if the
> +	 * packet originally had DF set.
> +	 */
> +	skb->local_df = 1;
> +	__ip_select_ident(ip_hdr(skb), dst, 0);
> +
> +	return skb;
> +}
> +
> +/* Called with rcu_read_lock and BH disabled. */
> +static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +	struct vport *vport;
> +	struct vxlanhdr *vxh;
> +	const struct tnl_mutable_config *mutable;
> +	struct iphdr *iph;
> +	struct ovs_key_ipv4_tunnel tun_key;
> +	int tunnel_type;
> +	__be64 key;
> +	u32 tunnel_flags = 0;
> +
> +	if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
> +		goto error;
> +
> +	vxh = vxlan_hdr(skb);
> +	if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
> +		     vxh->vx_vni & htonl(0xff)))
> +		goto error;
> +
> +	__skb_pull(skb, VXLAN_HLEN);
> +	skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
> +
> +	key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
> +
> +	tunnel_type = TNL_T_PROTO_VXLAN;
> +
> +	iph = ip_hdr(skb);
> +	vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
> +		key, tunnel_type, &mutable);
> +	if (unlikely(!vport)) {
> +		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
> +		goto error;
> +	}
> +
> +	if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
> +		tunnel_flags = OVS_TNL_F_KEY;
> +	else
> +		key = 0;
> +
> +	/* Save outer tunnel values */
> +	tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
> +	OVS_CB(skb)->tun_key = &tun_key;
> +
> +	ovs_tnl_rcv(vport, skb);
> +	goto out;
> +
> +error:
> +	kfree_skb(skb);
> +out:
> +	return 0;
> +}
> +
> +/* Random value.  Irrelevant as long as it's not 0 since we set the handler. */
> +#define UDP_ENCAP_VXLAN 10

Linux upstream is using 1 (like you said, !0 is the only functional
importance, but consistency is developer friendly ;)

> +static int vxlan_socket_init(struct vxlan_port *vxlan_port)
> +{
> +	int err;
> +	struct sockaddr_in sin;
> +
> +	err = sock_create(AF_INET, SOCK_DGRAM, 0, &vxlan_port->vxlan_rcv_socket);
> +	if (err)
> +		goto error;
> +
> +	sin.sin_family = AF_INET;
> +	sin.sin_addr.s_addr = htonl(INADDR_ANY);
> +	sin.sin_port = htons(vxlan_port->port);
> +
> +	err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
> +			  sizeof(struct sockaddr_in));
> +	if (err)
> +		goto error_sock;
> +
> +	udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
> +	udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
> +
> +	udp_encap_enable();

This is a new function, did you provide a fallback/noop (since it looks
like it's meant to build back to older >=2.6.26 kernels).

> +
> +	return 0;
> +
> +error_sock:
> +	sock_release(vxlan_port->vxlan_rcv_socket);
> +error:
> +	pr_warn("cannot register vxlan protocol handler\n");
> +	return err;
> +}
> +
> +static const struct nla_policy vxlan_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
> +	[OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
> +};
> +
> +static int vxlan_tunnel_setup(struct net *net, const char *linkname,
> +			     struct nlattr *options)
> +{
> +	struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
> +	int err;
> +	u16 dst_port;
> +	struct vxlan_port *vxlan_port;
> +	struct vxlan_if *vxlan_if;
> +
> +	if (!options) {
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	err = nla_parse_nested(a, OVS_TUNNEL_ATTR_MAX, options, vxlan_policy);

This is already parsed in tnl_set_config() later.  So I'm not sure why
it's done twice during ->set_options() and ->create().


> +	if (err)
> +		goto out;
> +
> +	if (a[OVS_TUNNEL_ATTR_DST_PORT])
> +		dst_port = nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]);
> +	else
> +		dst_port = VXLAN_DST_PORT;
> +
> +	/* Verify if we already have a socket created for this port */
> +	vxlan_port = vxlan_port_exists(net, dst_port);
> +	if (vxlan_port) {
> +		vxlan_port->count++;
> +		err = 0;
> +		goto out;
> +	}
> +
> +	/* Add a new socket for this port */
> +	vxlan_port = kmalloc(sizeof(struct vxlan_port), GFP_KERNEL);
> +	if (!vxlan_port) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +	memset (vxlan_port, 0, sizeof(struct vxlan_port));

kzalloc()

> +
> +	vxlan_port->port = dst_port;
> +	vxlan_port->count++;
> +	hlist_add_head(&vxlan_port->hash_node,
> +		       vxlan_hash_bucket(net, dst_port));

A little unusual to have a hashtable for this.  Is this expected to be
temporary until IANA port is allocated?

> +
> +	err = vxlan_socket_init(vxlan_port);
> +	if (err)
> +		goto error_vxlan_if;
> +
> +	vxlan_if = kmalloc(sizeof(struct vxlan_if), GFP_KERNEL);
> +	if (!vxlan_if) {
> +		err = -ENOMEM;
> +		goto error_vxlan_if;
> +	}
> +	memset(vxlan_if, 0, sizeof(*vxlan_if));

kzalloc()

> +	vxlan_if->port = dst_port;
> +	memcpy(vxlan_if->ifname, linkname, IFNAMSIZ);
> +	hlist_add_head(&vxlan_if->hash_node,
> +		       vxlanif_hash_bucket(net, linkname));
> +
> +out:
> +	return err;
> +error_vxlan_if:
> +	hlist_del(&vxlan_port->hash_node);
> +	kfree(vxlan_port);
> +	goto out;
> +}
> +
> +static int vxlan_set_options(struct vport *vport, struct nlattr *options)
> +{
> +	int err;
> +	const char *vname = vport->ops->get_name(vport);
> +
> +	err = vxlan_tunnel_setup(ovs_dp_get_net(vport->dp), vname, options);
> +	if (err)
> +		goto out;
> +
> +	err = ovs_tnl_set_options(vport, options);
> +
> +out:
> +	return err;
> +}
> +
> +static const struct tnl_ops ovs_vxlan_tnl_ops = {
> +	.tunnel_type	= TNL_T_PROTO_VXLAN,
> +	.ipproto	= IPPROTO_UDP,
> +	.hdr_len	= vxlan_hdr_len,
> +	.build_header	= vxlan_build_header,
> +};
> +
> +void vxlan_tnl_destroy(struct vport *vport)
> +{
> +	struct vxlan_if *vxlan_if;
> +	struct vxlan_port *vxlan_port;
> +	const char *vname = vport->ops->get_name(vport);
> +
> +	vxlan_if = vxlan_if_by_name(ovs_dp_get_net(vport->dp), vname);
> +	if (!vxlan_if)
> +		goto out;
> +
> +	vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp),
> +					 vxlan_if->port);
> +	if (!vxlan_port)
> +		goto out_if;

Are the above two actually valid failure cases on destroy?

> +	if (!--vxlan_port->count) {
> +		sock_release(vxlan_port->vxlan_rcv_socket);
> +		hlist_del(&vxlan_port->hash_node);
> +		kfree(vxlan_port);
> +	}
> +
> +out_if:
> +	hlist_del(&vxlan_if->hash_node);
> +	kfree(vxlan_if);
> +out:
> +	ovs_tnl_destroy(vport);
> +}
> +
> +static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
> +{
> +	int err;
> +
> +	err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->name,
> +						parms->options);
> +	return ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
> +}
> +
> +static int vxlan_init(void)
> +{
> +	int err;
> +
> +	vxlan_ifs = kzalloc(VXLAN_IF_HASH_BUCKETS * sizeof(struct hlist_head),
> +			    GFP_KERNEL);
> +	if (!vxlan_ifs) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	vxlan_ports = kzalloc(VXLAN_SOCK_HASH_BUCKETS * sizeof(struct hlist_head),
> +				GFP_KERNEL);

Suppose these hash tables could just be statically allocated...

> +	if (!vxlan_ports) {
> +		err = -ENOMEM;
> +		goto free_ifs;
> +	}
> +
> +out:
> +	return 0;
> +free_ifs:
> +	kfree(vxlan_ifs);
> +	goto out;
> +}
> +
> +static void vxlan_exit(void)
> +{
> +	kfree(vxlan_ports);
> +	kfree(vxlan_ifs);

...which would obviate the need for ->exit()

> +}
> +
> +const struct vport_ops ovs_vxlan_vport_ops = {
> +	.type		= OVS_VPORT_TYPE_VXLAN,
> +	.flags		= VPORT_F_TUN_ID,
> +	.init		= vxlan_init,
> +	.exit		= vxlan_exit,
> +	.create		= vxlan_tnl_create,
> +	.destroy	= vxlan_tnl_destroy,
> +	.set_addr	= ovs_tnl_set_addr,
> +	.get_name	= ovs_tnl_get_name,
> +	.get_addr	= ovs_tnl_get_addr,
> +	.get_options	= ovs_tnl_get_options,
> +	.set_options	= vxlan_set_options,
> +	.get_dev_flags	= ovs_vport_gen_get_dev_flags,
> +	.is_running	= ovs_vport_gen_is_running,
> +	.get_operstate	= ovs_vport_gen_get_operstate,
> +	.send		= ovs_tnl_send,
> +};
> +#else
> +#warning VXLAN tunneling will not be available on kernels before 2.6.26
> +#endif /* Linux kernel < 2.6.26 */
> diff --git a/datapath/vport.c b/datapath/vport.c
> index 4934ac1..a1c7542 100644
> --- a/datapath/vport.c
> +++ b/datapath/vport.c
> @@ -45,6 +45,7 @@ static const struct vport_ops *base_vport_ops_list[] = {
>  	&ovs_gre64_vport_ops,
>  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
>  	&ovs_capwap_vport_ops,
> +	&ovs_vxlan_vport_ops,
>  #endif
>  };
>  
> diff --git a/datapath/vport.h b/datapath/vport.h
> index 5a7caf5..5080629 100644
> --- a/datapath/vport.h
> +++ b/datapath/vport.h
> @@ -257,5 +257,6 @@ extern const struct vport_ops ovs_gre_vport_ops;
>  extern const struct vport_ops ovs_gre_ft_vport_ops;
>  extern const struct vport_ops ovs_gre64_vport_ops;
>  extern const struct vport_ops ovs_capwap_vport_ops;
> +extern const struct vport_ops ovs_vxlan_vport_ops;
>  
>  #endif /* vport.h */
> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index e7d4b49..2ae5681 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> @@ -186,6 +186,7 @@ enum ovs_vport_type {
>  	OVS_VPORT_TYPE_PATCH = 100, /* virtual tunnel connecting two vports */
>  	OVS_VPORT_TYPE_GRE,      /* GRE tunnel */
>  	OVS_VPORT_TYPE_CAPWAP,   /* CAPWAP tunnel */
> +	OVS_VPORT_TYPE_VXLAN,    /* VXLAN tunnel */
>  	OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */
>  	__OVS_VPORT_TYPE_MAX
>  };
> diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
> index 88eba19..11b761d 100644
> --- a/include/openflow/nicira-ext.h
> +++ b/include/openflow/nicira-ext.h
> @@ -1578,9 +1578,11 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
>  
>  /* Tunnel ID.
>   *
> - * For a packet received via GRE tunnel including a (32-bit) key, the key is
> - * stored in the low 32-bits and the high bits are zeroed.  For other packets,
> - * the value is 0.
> + * For a packet received via a GRE or VXLAN tunnel including a (32-bit) key, the
> + * key is stored in the low 32-bits and the high bits are zeroed.  For other
> + * packets, the value is 0.

Confine to 80 columns since you're touching it already?

> + *
> + * All zero bits, for packets not received via a keyed tunnel.
>   *
>   * Prereqs: None.
>   *
> diff --git a/include/openvswitch/tunnel.h b/include/openvswitch/tunnel.h
> index 42c3621..23d8ba7 100644
> --- a/include/openvswitch/tunnel.h
> +++ b/include/openvswitch/tunnel.h
> @@ -57,6 +57,7 @@ enum {
>  	OVS_TUNNEL_ATTR_IN_KEY,   /* __be64 key to match on input. */
>  	OVS_TUNNEL_ATTR_TOS,      /* 8-bit TOS value. */
>  	OVS_TUNNEL_ATTR_TTL,      /* 8-bit TTL value. */
> +	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by VXLAN. */
>  	__OVS_TUNNEL_ATTR_MAX
>  };
>  
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index 5171171..3dbb798 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -173,6 +173,13 @@ netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
>      case OVS_VPORT_TYPE_CAPWAP:
>          return "capwap";
>  
> +    case OVS_VPORT_TYPE_VXLAN:
> +        if (tnl_port_config_from_nlattr(vport->options, vport->options_len,
> +                                        a)) {
> +            break;

Should tnl_port_config_from_nlattr() grow an optional OVS_TUNNEL_ATTR_DST_PORT
check?  And similarly, I seem to be missing the vport_class added to
netdev_vport_register().

> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -1247,6 +1246,23 @@
>              February 2013.
>            </dd>
>  
> +          <dt><code>vxlan</code></dt>
> +          <dd>
> +	    <p>
> +	      An Ethernet tunnel over the experimental, UDP-based VXLAN
> +	      protocol described at
> +	      <code>http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-02</code>.
> +	      VXLAN is currently supported only with the Linux kernel datapath
> +	      with kernel version 2.6.26 or later.
> +	    </p>
> +	    <p>
> +	      As an experimental protocol, VXLAN has no officially assigned UDP
> +	      port.  Open vSwitch currently uses UDP destination port 8472.
> +	      The source port used for VXLAN traffic varies on a per-flow basis
> +	      between 32768 and 65535 to allow load balancing.

Inconsistent with code (and should sysctl's as base for local port
range).

> @@ -1427,11 +1447,19 @@
>          </column>
>        </group>
>  
> -      <group title="Tunnel Options: ipsec_gre only">
> +      <group title="Tunnel Options: ipsec_gre and ipsec_vxlan only">

Ooops, not completely removed...

>          <p>
> -          Only <code>ipsec_gre</code> interfaces support these options.
> +          Only <code>ipsec_gre</code> and <code>ipsec_vxlan</code> interfaces
> +          support these options.
>          </p>



More information about the dev mailing list