[ovs-dev] [PATCH v4] datapath: Add support for VXLAN tunnels to Open vSwitch
Kyle Mestery (kmestery)
kmestery at cisco.com
Wed Nov 28 15:59:25 UTC 2012
On Nov 27, 2012, at 9:24 PM, Chris Wright <chrisw at sous-sol.org> wrote:
> * Kyle Mestery (kmestery at cisco.com) wrote:
>> @@ -23,6 +23,7 @@
>> /kmemdup.c
>> /loop_counter.c
>> /modules.order
>> +/net_namespace.c
>> /netdevice.c
>> /net_namespace.c
>> /random32.c
>
> spurious change?
>
>> @@ -39,5 +40,6 @@
>> /vport-internal_dev.c
>> /vport-netdev.c
>> /vport-patch.c
>> +/vport-vxlan.c
>> /vport.c
>> /workqueue.c
>> diff --git a/datapath/tunnel.c b/datapath/tunnel.c
>> index fb4854a..05a73df 100644
>> --- a/datapath/tunnel.c
>> +++ b/datapath/tunnel.c
>> @@ -1042,6 +1042,7 @@ static const struct nla_policy tnl_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
>> [OVS_TUNNEL_ATTR_IN_KEY] = { .type = NLA_U64 },
>> [OVS_TUNNEL_ATTR_TOS] = { .type = NLA_U8 },
>> [OVS_TUNNEL_ATTR_TTL] = { .type = NLA_U8 },
>> + [OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
>> };
>>
>> /* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be
>> @@ -1087,6 +1088,9 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
>> if (a[OVS_TUNNEL_ATTR_TTL])
>> mutable->ttl = nla_get_u8(a[OVS_TUNNEL_ATTR_TTL]);
>>
>> + if (a[OVS_TUNNEL_ATTR_DST_PORT])
>> + mutable->dst_port = nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]);
>> +
>> if (!a[OVS_TUNNEL_ATTR_IN_KEY]) {
>> mutable->key.tunnel_type |= TNL_T_KEY_MATCH;
>> mutable->flags |= TNL_F_IN_KEY_MATCH;
>> @@ -1242,6 +1246,9 @@ int ovs_tnl_get_options(const struct vport *vport, struct sk_buff *skb)
>> goto nla_put_failure;
>> if (mutable->ttl && nla_put_u8(skb, OVS_TUNNEL_ATTR_TTL, mutable->ttl))
>> goto nla_put_failure;
>> + if (mutable->dst_port && nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
>> + mutable->dst_port))
>> + goto nla_put_failure;
>>
>> return 0;
>>
>> diff --git a/datapath/tunnel.h b/datapath/tunnel.h
>> index c268057..c0b50e7 100644
>> --- a/datapath/tunnel.h
>> +++ b/datapath/tunnel.h
>> @@ -42,6 +42,7 @@
>> #define TNL_T_PROTO_GRE 0
>> #define TNL_T_PROTO_GRE64 1
>> #define TNL_T_PROTO_CAPWAP 2
>> +#define TNL_T_PROTO_VXLAN 3
>>
>> /* These flags are only needed when calling tnl_find_port(). */
>> #define TNL_T_KEY_EXACT (1 << 10)
>> @@ -116,6 +117,7 @@ struct tnl_mutable_config {
>> u32 flags;
>> u8 tos;
>> u8 ttl;
>> + u16 dst_port;
>>
>> /* Multicast configuration. */
>> int mlink;
>> diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
>> new file mode 100644
>> index 0000000..88e03d5
>> --- /dev/null
>> +++ b/datapath/vport-vxlan.c
>> @@ -0,0 +1,459 @@
>> + /*
>> + * Copyright (c) 2011 Nicira, Inc.
>> + * Copyright (c) 2012 Cisco Systems, Inc.
>> + * Distributed under the terms of the GNU GPL version 2.
>> + *
>> + * Significant portions of this file may be copied from parts of the Linux
>> + * kernel, by Linus Torvalds and others.
>
> Only this and capwap have this unusual header bit.
>
>> + */
>> +
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +
>> +#include <linux/version.h>
>> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
>> +
>> +#include <linux/in.h>
>> +#include <linux/ip.h>
>> +#include <linux/jhash.h>
>> +#include <linux/list.h>
>> +#include <linux/net.h>
>> +#include <linux/udp.h>
>> +
>> +#include <net/icmp.h>
>> +#include <net/ip.h>
>> +#include <net/udp.h>
>> +
>> +#include "datapath.h"
>> +#include "tunnel.h"
>> +#include "vport.h"
>> +#include "vport-generic.h"
>> +
>> +/* Default to the OTV port, per the VXLAN IETF draft. */
>> +#define VXLAN_DST_PORT 8472
>> +
>> +#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
>> +
>> +/**
>> + * struct vxlanhdr - VXLAN header
>> + * @vx_flags: Must have the exact value %VXLAN_FLAGS.
>> + * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
>> + */
>> +struct vxlanhdr {
>> + __be32 vx_flags;
>> + __be32 vx_vni;
>> +};
>> +
>> +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
>> +
>> +static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
>> + const struct ovs_key_ipv4_tunnel *tun_key)
>> +{
>> + return VXLAN_HLEN;
>> +}
>> +
>> +/**
>> + * struct vxlan_port - Keeps track of open UDP ports
>> + * @port: The UDP port number.
>> + * @socket: The socket created for this port number.
>> + * @count: How many ports are using this socket/port.
>> + * @hash_node: Hash node.
>> + */
>> +struct vxlan_port {
>> + u16 port;
>> + struct socket *vxlan_rcv_socket;
>> + int count;
>> +
>> + /* Protected by RTNL lock. */
>> + struct hlist_node hash_node;
>> +};
>> +
>> +/* Protected by RTNL lock. */
>> +static struct hlist_head *vxlan_ports;
>> +#define VXLAN_SOCK_HASH_BUCKETS 64
>> +
>> +/**
>> + * struct vxlan_if - Maps port names to UDP port numbers
>> + * @port: The UDP port number this interface is using.
>> + * @ifname: The name of the interface.
>> + * @hash_node: Hash node.
>> + */
>> +struct vxlan_if {
>> + u16 port;
>> + char ifname[IFNAMSIZ];
>> +
>> + /* Protected by RTNL lock. */
>> + struct hlist_node hash_node;
>> +};
>> +
>> +/* Protected by RTNL lock. */
>> +static struct hlist_head *vxlan_ifs;
>> +#define VXLAN_IF_HASH_BUCKETS 64
>> +
>> +static struct hlist_head *vxlan_hash_bucket(struct net *net, u16 port)
>> +{
>> + unsigned int hash = jhash(&port, sizeof(port), (unsigned long) net);
>> + return &vxlan_ports[hash & (VXLAN_SOCK_HASH_BUCKETS - 1)];
>> +}
>> +
>> +static struct vxlan_port *vxlan_port_exists(struct net *net, u16 port)
>> +{
>> + struct hlist_head *bucket = vxlan_hash_bucket(net, port);
>> + struct vxlan_port *vxlan_port;
>> + struct hlist_node *node;
>> +
>> + hlist_for_each_entry(vxlan_port, node, bucket, hash_node) {
>> + if (vxlan_port->port == port)
>> + return vxlan_port;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +static struct hlist_head *vxlanif_hash_bucket(struct net *net, const char *name)
>> +{
>> + unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
>> + return &vxlan_ifs[hash & (VXLAN_IF_HASH_BUCKETS - 1)];
>> +}
>> +
>> +static struct vxlan_if *vxlan_if_by_name(struct net *net, const char *name)
>> +{
>> + struct hlist_head *bucket = vxlanif_hash_bucket(net, name);
>> + struct vxlan_if *vxlan_if;
>> + struct hlist_node *node;
>> +
>> + hlist_for_each_entry(vxlan_if, node, bucket, hash_node) {
>> + if (!strcmp(vxlan_if->ifname, name))
>> + return vxlan_if;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
>> +{
>> + return (struct vxlanhdr *)(udp_hdr(skb) + 1);
>> +}
>> +
>> +/* The below used as the min/max for the UDP port range */
>> +#define VXLAN_SRC_PORT_MIN 32768
>> +#define VXLAN_SRC_PORT_MAX 61000
>
> This is consistent with Linux defaults, but inconsistent with comments
> added below in vswitch.xml. Any reason not to use inet_get_local_port_range()?
>
>> +/* Compute source port for outgoing packet.
>> + * Currently we use the flow hash.
>> + */
>> +static u16 get_src_port(struct sk_buff *skb)
>> +{
>> + unsigned int range = (VXLAN_SRC_PORT_MAX - VXLAN_SRC_PORT_MIN) + 1;
>> + u32 hash = OVS_CB(skb)->flow->hash;
>> +
>> + return (__force u16)(((u64) hash * range) >> 32) + VXLAN_SRC_PORT_MIN;
>> +}
>> +
>> +static struct sk_buff *vxlan_build_header(const struct vport *vport,
>> + const struct tnl_mutable_config *mutable,
>> + struct dst_entry *dst,
>> + struct sk_buff *skb,
>> + int tunnel_hlen)
>> +{
>> + struct udphdr *udph = udp_hdr(skb);
>> + struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
>> + const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
>> + __be64 out_key;
>> +
>> + if (tun_key->ipv4_dst)
>> + out_key = tun_key->tun_id;
>> + else
>> + out_key = mutable->out_key;
>> +
>> + if (mutable->dst_port)
>> + udph->dest = htons(mutable->dst_port);
>> + else
>> + udph->dest = htons(VXLAN_DST_PORT);
>> + udph->source = htons(get_src_port(skb));
>> + udph->check = 0;
>> + udph->len = htons(skb->len - skb_transport_offset(skb));
>> +
>> + vxh->vx_flags = htonl(VXLAN_FLAGS);
>> + vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8);
>> +
>> + /*
>> + * Allow our local IP stack to fragment the outer packet even if the
>> + * DF bit is set as a last resort. We also need to force selection of
>> + * an IP ID here because Linux will otherwise leave it at 0 if the
>> + * packet originally had DF set.
>> + */
>> + skb->local_df = 1;
>> + __ip_select_ident(ip_hdr(skb), dst, 0);
>> +
>> + return skb;
>> +}
>> +
>> +/* Called with rcu_read_lock and BH disabled. */
>> +static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
>> +{
>> + struct vport *vport;
>> + struct vxlanhdr *vxh;
>> + const struct tnl_mutable_config *mutable;
>> + struct iphdr *iph;
>> + struct ovs_key_ipv4_tunnel tun_key;
>> + int tunnel_type;
>> + __be64 key;
>> + u32 tunnel_flags = 0;
>> +
>> + if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
>> + goto error;
>> +
>> + vxh = vxlan_hdr(skb);
>> + if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
>> + vxh->vx_vni & htonl(0xff)))
>> + goto error;
>> +
>> + __skb_pull(skb, VXLAN_HLEN);
>> + skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
>> +
>> + key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
>> +
>> + tunnel_type = TNL_T_PROTO_VXLAN;
>> +
>> + iph = ip_hdr(skb);
>> + vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
>> + key, tunnel_type, &mutable);
>> + if (unlikely(!vport)) {
>> + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
>> + goto error;
>> + }
>> +
>> + if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
>> + tunnel_flags = OVS_TNL_F_KEY;
>> + else
>> + key = 0;
>> +
>> + /* Save outer tunnel values */
>> + tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
>> + OVS_CB(skb)->tun_key = &tun_key;
>> +
>> + ovs_tnl_rcv(vport, skb);
>> + goto out;
>> +
>> +error:
>> + kfree_skb(skb);
>> +out:
>> + return 0;
>> +}
>> +
>> +/* Random value. Irrelevant as long as it's not 0 since we set the handler. */
>> +#define UDP_ENCAP_VXLAN 10
>
> Linux upstream is using 1 (like you said, !0 is the only functional
> importance, but consistency is developer friendly ;)
>
>> +static int vxlan_socket_init(struct vxlan_port *vxlan_port)
>> +{
>> + int err;
>> + struct sockaddr_in sin;
>> +
>> + err = sock_create(AF_INET, SOCK_DGRAM, 0, &vxlan_port->vxlan_rcv_socket);
>> + if (err)
>> + goto error;
>> +
>> + sin.sin_family = AF_INET;
>> + sin.sin_addr.s_addr = htonl(INADDR_ANY);
>> + sin.sin_port = htons(vxlan_port->port);
>> +
>> + err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
>> + sizeof(struct sockaddr_in));
>> + if (err)
>> + goto error_sock;
>> +
>> + udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
>> + udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
>> +
>> + udp_encap_enable();
>
> This is a new function, did you provide a fallback/noop (since it looks
> like it's meant to build back to older >=2.6.26 kernels).
>
>> +
>> + return 0;
>> +
>> +error_sock:
>> + sock_release(vxlan_port->vxlan_rcv_socket);
>> +error:
>> + pr_warn("cannot register vxlan protocol handler\n");
>> + return err;
>> +}
>> +
>> +static const struct nla_policy vxlan_policy[OVS_TUNNEL_ATTR_MAX + 1] = {
>> + [OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
>> +};
>> +
>> +static int vxlan_tunnel_setup(struct net *net, const char *linkname,
>> + struct nlattr *options)
>> +{
>> + struct nlattr *a[OVS_TUNNEL_ATTR_MAX + 1];
>> + int err;
>> + u16 dst_port;
>> + struct vxlan_port *vxlan_port;
>> + struct vxlan_if *vxlan_if;
>> +
>> + if (!options) {
>> + err = -EINVAL;
>> + goto out;
>> + }
>> +
>> + err = nla_parse_nested(a, OVS_TUNNEL_ATTR_MAX, options, vxlan_policy);
>
> This is already parsed in tnl_set_config() later. So I'm not sure why
> it's done twice during ->set_options() and ->create().
>
>
>> + if (err)
>> + goto out;
>> +
>> + if (a[OVS_TUNNEL_ATTR_DST_PORT])
>> + dst_port = nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]);
>> + else
>> + dst_port = VXLAN_DST_PORT;
>> +
>> + /* Verify if we already have a socket created for this port */
>> + vxlan_port = vxlan_port_exists(net, dst_port);
>> + if (vxlan_port) {
>> + vxlan_port->count++;
>> + err = 0;
>> + goto out;
>> + }
>> +
>> + /* Add a new socket for this port */
>> + vxlan_port = kmalloc(sizeof(struct vxlan_port), GFP_KERNEL);
>> + if (!vxlan_port) {
>> + err = -ENOMEM;
>> + goto out;
>> + }
>> + memset (vxlan_port, 0, sizeof(struct vxlan_port));
>
> kzalloc()
>
>> +
>> + vxlan_port->port = dst_port;
>> + vxlan_port->count++;
>> + hlist_add_head(&vxlan_port->hash_node,
>> + vxlan_hash_bucket(net, dst_port));
>
> A little unusual to have a hashtable for this. Is this expected to be
> temporary until IANA port is allocated?
>
The idea is to allow destination port configuration on a per-tunnel basis,
so I needed to track this. As Jesse said, maybe a hash table was overkill
here, but that's a design point I think (e.g. I could have done a linked
list instead).
>> +
>> + err = vxlan_socket_init(vxlan_port);
>> + if (err)
>> + goto error_vxlan_if;
>> +
>> + vxlan_if = kmalloc(sizeof(struct vxlan_if), GFP_KERNEL);
>> + if (!vxlan_if) {
>> + err = -ENOMEM;
>> + goto error_vxlan_if;
>> + }
>> + memset(vxlan_if, 0, sizeof(*vxlan_if));
>
> kzalloc()
>
>> + vxlan_if->port = dst_port;
>> + memcpy(vxlan_if->ifname, linkname, IFNAMSIZ);
>> + hlist_add_head(&vxlan_if->hash_node,
>> + vxlanif_hash_bucket(net, linkname));
>> +
>> +out:
>> + return err;
>> +error_vxlan_if:
>> + hlist_del(&vxlan_port->hash_node);
>> + kfree(vxlan_port);
>> + goto out;
>> +}
>> +
>> +static int vxlan_set_options(struct vport *vport, struct nlattr *options)
>> +{
>> + int err;
>> + const char *vname = vport->ops->get_name(vport);
>> +
>> + err = vxlan_tunnel_setup(ovs_dp_get_net(vport->dp), vname, options);
>> + if (err)
>> + goto out;
>> +
>> + err = ovs_tnl_set_options(vport, options);
>> +
>> +out:
>> + return err;
>> +}
>> +
>> +static const struct tnl_ops ovs_vxlan_tnl_ops = {
>> + .tunnel_type = TNL_T_PROTO_VXLAN,
>> + .ipproto = IPPROTO_UDP,
>> + .hdr_len = vxlan_hdr_len,
>> + .build_header = vxlan_build_header,
>> +};
>> +
>> +void vxlan_tnl_destroy(struct vport *vport)
>> +{
>> + struct vxlan_if *vxlan_if;
>> + struct vxlan_port *vxlan_port;
>> + const char *vname = vport->ops->get_name(vport);
>> +
>> + vxlan_if = vxlan_if_by_name(ovs_dp_get_net(vport->dp), vname);
>> + if (!vxlan_if)
>> + goto out;
>> +
>> + vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp),
>> + vxlan_if->port);
>> + if (!vxlan_port)
>> + goto out_if;
>
> Are the above two actually valid failure cases on destroy?
>
>> + if (!--vxlan_port->count) {
>> + sock_release(vxlan_port->vxlan_rcv_socket);
>> + hlist_del(&vxlan_port->hash_node);
>> + kfree(vxlan_port);
>> + }
>> +
>> +out_if:
>> + hlist_del(&vxlan_if->hash_node);
>> + kfree(vxlan_if);
>> +out:
>> + ovs_tnl_destroy(vport);
>> +}
>> +
>> +static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
>> +{
>> + int err;
>> +
>> + err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->name,
>> + parms->options);
>> + return ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
>> +}
>> +
>> +static int vxlan_init(void)
>> +{
>> + int err;
>> +
>> + vxlan_ifs = kzalloc(VXLAN_IF_HASH_BUCKETS * sizeof(struct hlist_head),
>> + GFP_KERNEL);
>> + if (!vxlan_ifs) {
>> + err = -ENOMEM;
>> + goto out;
>> + }
>> +
>> + vxlan_ports = kzalloc(VXLAN_SOCK_HASH_BUCKETS * sizeof(struct hlist_head),
>> + GFP_KERNEL);
>
> Suppose these hash tables could just be statically allocated...
>
>> + if (!vxlan_ports) {
>> + err = -ENOMEM;
>> + goto free_ifs;
>> + }
>> +
>> +out:
>> + return 0;
>> +free_ifs:
>> + kfree(vxlan_ifs);
>> + goto out;
>> +}
>> +
>> +static void vxlan_exit(void)
>> +{
>> + kfree(vxlan_ports);
>> + kfree(vxlan_ifs);
>
> ...which would obviate the need for ->exit()
>
>> +}
>> +
>> +const struct vport_ops ovs_vxlan_vport_ops = {
>> + .type = OVS_VPORT_TYPE_VXLAN,
>> + .flags = VPORT_F_TUN_ID,
>> + .init = vxlan_init,
>> + .exit = vxlan_exit,
>> + .create = vxlan_tnl_create,
>> + .destroy = vxlan_tnl_destroy,
>> + .set_addr = ovs_tnl_set_addr,
>> + .get_name = ovs_tnl_get_name,
>> + .get_addr = ovs_tnl_get_addr,
>> + .get_options = ovs_tnl_get_options,
>> + .set_options = vxlan_set_options,
>> + .get_dev_flags = ovs_vport_gen_get_dev_flags,
>> + .is_running = ovs_vport_gen_is_running,
>> + .get_operstate = ovs_vport_gen_get_operstate,
>> + .send = ovs_tnl_send,
>> +};
>> +#else
>> +#warning VXLAN tunneling will not be available on kernels before 2.6.26
>> +#endif /* Linux kernel < 2.6.26 */
>> diff --git a/datapath/vport.c b/datapath/vport.c
>> index 4934ac1..a1c7542 100644
>> --- a/datapath/vport.c
>> +++ b/datapath/vport.c
>> @@ -45,6 +45,7 @@ static const struct vport_ops *base_vport_ops_list[] = {
>> &ovs_gre64_vport_ops,
>> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
>> &ovs_capwap_vport_ops,
>> + &ovs_vxlan_vport_ops,
>> #endif
>> };
>>
>> diff --git a/datapath/vport.h b/datapath/vport.h
>> index 5a7caf5..5080629 100644
>> --- a/datapath/vport.h
>> +++ b/datapath/vport.h
>> @@ -257,5 +257,6 @@ extern const struct vport_ops ovs_gre_vport_ops;
>> extern const struct vport_ops ovs_gre_ft_vport_ops;
>> extern const struct vport_ops ovs_gre64_vport_ops;
>> extern const struct vport_ops ovs_capwap_vport_ops;
>> +extern const struct vport_ops ovs_vxlan_vport_ops;
>>
>> #endif /* vport.h */
>> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
>> index e7d4b49..2ae5681 100644
>> --- a/include/linux/openvswitch.h
>> +++ b/include/linux/openvswitch.h
>> @@ -186,6 +186,7 @@ enum ovs_vport_type {
>> OVS_VPORT_TYPE_PATCH = 100, /* virtual tunnel connecting two vports */
>> OVS_VPORT_TYPE_GRE, /* GRE tunnel */
>> OVS_VPORT_TYPE_CAPWAP, /* CAPWAP tunnel */
>> + OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel */
>> OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */
>> __OVS_VPORT_TYPE_MAX
>> };
>> diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
>> index 88eba19..11b761d 100644
>> --- a/include/openflow/nicira-ext.h
>> +++ b/include/openflow/nicira-ext.h
>> @@ -1578,9 +1578,11 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
>>
>> /* Tunnel ID.
>> *
>> - * For a packet received via GRE tunnel including a (32-bit) key, the key is
>> - * stored in the low 32-bits and the high bits are zeroed. For other packets,
>> - * the value is 0.
>> + * For a packet received via a GRE or VXLAN tunnel including a (32-bit) key, the
>> + * key is stored in the low 32-bits and the high bits are zeroed. For other
>> + * packets, the value is 0.
>
> Confine to 80 columns since you're touching it already?
>
>> + *
>> + * All zero bits, for packets not received via a keyed tunnel.
>> *
>> * Prereqs: None.
>> *
>> diff --git a/include/openvswitch/tunnel.h b/include/openvswitch/tunnel.h
>> index 42c3621..23d8ba7 100644
>> --- a/include/openvswitch/tunnel.h
>> +++ b/include/openvswitch/tunnel.h
>> @@ -57,6 +57,7 @@ enum {
>> OVS_TUNNEL_ATTR_IN_KEY, /* __be64 key to match on input. */
>> OVS_TUNNEL_ATTR_TOS, /* 8-bit TOS value. */
>> OVS_TUNNEL_ATTR_TTL, /* 8-bit TTL value. */
>> + OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by VXLAN. */
>> __OVS_TUNNEL_ATTR_MAX
>> };
>>
>> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
>> index 5171171..3dbb798 100644
>> --- a/lib/netdev-vport.c
>> +++ b/lib/netdev-vport.c
>> @@ -173,6 +173,13 @@ netdev_vport_get_netdev_type(const struct dpif_linux_vport *vport)
>> case OVS_VPORT_TYPE_CAPWAP:
>> return "capwap";
>>
>> + case OVS_VPORT_TYPE_VXLAN:
>> + if (tnl_port_config_from_nlattr(vport->options, vport->options_len,
>> + a)) {
>> + break;
>
> Should tnl_port_config_from_nlattr() grow an optional OVS_TUNNEL_ATTR_DST_PORT
> check? And similarly, I seem to be missing the vport_class added to
> netdev_vport_register().
>
>> --- a/vswitchd/vswitch.xml
>> +++ b/vswitchd/vswitch.xml
>> @@ -1247,6 +1246,23 @@
>> February 2013.
>> </dd>
>>
>> + <dt><code>vxlan</code></dt>
>> + <dd>
>> + <p>
>> + An Ethernet tunnel over the experimental, UDP-based VXLAN
>> + protocol described at
>> + <code>http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-02</code>.
>> + VXLAN is currently supported only with the Linux kernel datapath
>> + with kernel version 2.6.26 or later.
>> + </p>
>> + <p>
>> + As an experimental protocol, VXLAN has no officially assigned UDP
>> + port. Open vSwitch currently uses UDP destination port 8472.
>> + The source port used for VXLAN traffic varies on a per-flow basis
>> + between 32768 and 65535 to allow load balancing.
>
> Inconsistent with code (and should sysctl's as base for local port
> range).
>
>> @@ -1427,11 +1447,19 @@
>> </column>
>> </group>
>>
>> - <group title="Tunnel Options: ipsec_gre only">
>> + <group title="Tunnel Options: ipsec_gre and ipsec_vxlan only">
>
> Ooops, not completely removed...
>
>> <p>
>> - Only <code>ipsec_gre</code> interfaces support these options.
>> + Only <code>ipsec_gre</code> and <code>ipsec_vxlan</code> interfaces
>> + support these options.
>> </p>
More information about the dev
mailing list