[ovs-dev] [PATCH net-next v3 2/2] datapath: Restructure vxlan tunneling.

Kyle Mestery (kmestery) kmestery at cisco.com
Fri Jul 19 20:33:39 UTC 2013


On Jul 19, 2013, at 3:31 PM, Pravin Shelar <pshelar at nicira.com> wrote:
> On Fri, Jul 19, 2013 at 6:28 AM, Kyle Mestery (kmestery)
> <kmestery at cisco.com> wrote:
>> 
>> On Jul 18, 2013, at 5:22 PM, Pravin B Shelar <pshelar at nicira.com> wrote:
>> 
>>> Following patch restructures vxlan tunneling so that it is more
>>> in sync with upstream vxlan tunneling code.
>>> 
>>> Signed-off-by: Pravin Shelar <pshelar at nicira.com>
>>> ---
>>> v3-v2:
>>> - Moved kernel version in flow_dissector check to top.
>>> v1-v2:
>>> - Added create flag to vxlan-port add.
>>> - Moved rxhash functions to flow_dissector.c
>>> ---
>>> datapath/compat.h                             |    6 +
>>> datapath/linux/Modules.mk                     |    6 +-
>>> datapath/linux/compat/flow_dissector.c        |  203 +++++++++++
>>> datapath/linux/compat/include/linux/in.h      |   20 ++
>>> datapath/linux/compat/include/linux/skbuff.h  |   22 ++
>>> datapath/linux/compat/include/net/flow_keys.h |   22 ++
>>> datapath/linux/compat/include/net/ip.h        |    7 +
>>> datapath/linux/compat/include/net/ipv6.h      |   15 +
>>> datapath/linux/compat/include/net/vxlan.h     |   43 +++
>>> datapath/linux/compat/vxlan.c                 |  457 +++++++++++++++++++++++++
>>> datapath/vport-vxlan.c                        |  221 +++++--------
>>> 11 files changed, 877 insertions(+), 145 deletions(-)
>>> create mode 100644 datapath/linux/compat/flow_dissector.c
>>> create mode 100644 datapath/linux/compat/include/net/flow_keys.h
>>> create mode 100644 datapath/linux/compat/include/net/vxlan.h
>>> create mode 100644 datapath/linux/compat/vxlan.c
>>> 
>>> diff --git a/datapath/compat.h b/datapath/compat.h
>>> index a6a01d5..4dfd192 100644
>>> --- a/datapath/compat.h
>>> +++ b/datapath/compat.h
>>> @@ -100,4 +100,10 @@ static inline void skb_set_mark(struct sk_buff *skb, u32 mark)
>>> #define rt_dst(rt) (rt->u.dst)
>>> #endif
>>> 
>>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33)
>>> +#define inet_sport(sk)       (inet_sk(sk)->sport)
>>> +#else
>>> +#define inet_sport(sk)       (inet_sk(sk)->inet_sport)
>>> +#endif
>>> +
>>> #endif /* compat.h */
>>> diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
>>> index dcacc79..edaeabb 100644
>>> --- a/datapath/linux/Modules.mk
>>> +++ b/datapath/linux/Modules.mk
>>> @@ -3,6 +3,7 @@ openvswitch_sources += \
>>>      linux/compat/dev-openvswitch.c \
>>>      linux/compat/exthdrs_core.c \
>>>      linux/compat/flex_array.c \
>>> +     linux/compat/flow_dissector.c \
>>>      linux/compat/gre.c \
>>>      linux/compat/gso.c \
>>>      linux/compat/genetlink-openvswitch.c \
>>> @@ -14,6 +15,7 @@ openvswitch_sources += \
>>>      linux/compat/reciprocal_div.c \
>>>      linux/compat/skbuff-openvswitch.c \
>>>      linux/compat/time.c     \
>>> +     linux/compat/vxlan.c    \
>>>      linux/compat/workqueue.c
>>> openvswitch_headers += \
>>>      linux/compat/gso.h \
>>> @@ -65,6 +67,7 @@ openvswitch_headers += \
>>>      linux/compat/include/linux/workqueue.h \
>>>      linux/compat/include/net/checksum.h \
>>>      linux/compat/include/net/dst.h \
>>> +     linux/compat/include/net/flow_keys.h \
>>>      linux/compat/include/net/genetlink.h \
>>>      linux/compat/include/net/gre.h \
>>>      linux/compat/include/net/inet_frag.h \
>>> @@ -76,4 +79,5 @@ openvswitch_headers += \
>>>      linux/compat/include/net/protocol.h \
>>>      linux/compat/include/net/route.h \
>>>      linux/compat/include/net/sock.h \
>>> -     linux/compat/include/net/netns/generic.h
>>> +     linux/compat/include/net/netns/generic.h \
>>> +     linux/compat/include/net/vxlan.h
>>> diff --git a/datapath/linux/compat/flow_dissector.c b/datapath/linux/compat/flow_dissector.c
>>> new file mode 100644
>>> index 0000000..c2078d6
>>> --- /dev/null
>>> +++ b/datapath/linux/compat/flow_dissector.c
>>> @@ -0,0 +1,203 @@
>>> +
>>> +#include <linux/version.h>
>>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
>>> +#include <linux/ip.h>
>>> +#include <linux/ipv6.h>
>>> +#include <linux/if_vlan.h>
>>> +#include <net/ip.h>
>>> +#include <net/ipv6.h>
>>> +#include <linux/igmp.h>
>>> +#include <linux/icmp.h>
>>> +#include <linux/sctp.h>
>>> +#include <linux/dccp.h>
>>> +#include <linux/if_tunnel.h>
>>> +#include <linux/if_pppox.h>
>>> +#include <linux/ppp_defs.h>
>>> +#include <net/flow_keys.h>
>>> +
>> This file appears to be missing license and copyright information.
>> 
>>> +
>>> +/* copy saddr & daddr, possibly using 64bit load/store
>>> + * Equivalent to :   flow->src = iph->saddr;
>>> + *                   flow->dst = iph->daddr;
>>> + */
>>> +static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
>>> +{
>>> +     BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
>>> +                  offsetof(typeof(*flow), src) + sizeof(flow->src));
>>> +     memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
>>> +}
>>> +
>>> +static bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
>>> +{
>>> +     int poff, nhoff = skb_network_offset(skb);
>>> +     u8 ip_proto;
>>> +     __be16 proto = skb->protocol;
>>> +
>>> +     memset(flow, 0, sizeof(*flow));
>>> +
>>> +again:
>>> +     switch (proto) {
>>> +     case __constant_htons(ETH_P_IP): {
>>> +             const struct iphdr *iph;
>>> +             struct iphdr _iph;
>>> +ip:
>>> +             iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
>>> +             if (!iph)
>>> +                     return false;
>>> +
>>> +             if (ip_is_fragment(iph))
>>> +                     ip_proto = 0;
>>> +             else
>>> +                     ip_proto = iph->protocol;
>>> +             iph_to_flow_copy_addrs(flow, iph);
>>> +             nhoff += iph->ihl * 4;
>>> +             break;
>>> +     }
>>> +     case __constant_htons(ETH_P_IPV6): {
>>> +             const struct ipv6hdr *iph;
>>> +             struct ipv6hdr _iph;
>>> +ipv6:
>>> +             iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
>>> +             if (!iph)
>>> +                     return false;
>>> +
>>> +             ip_proto = iph->nexthdr;
>>> +             flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
>>> +             flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
>>> +             nhoff += sizeof(struct ipv6hdr);
>>> +             break;
>>> +     }
>>> +     case __constant_htons(ETH_P_8021Q): {
>>> +             const struct vlan_hdr *vlan;
>>> +             struct vlan_hdr _vlan;
>>> +
>>> +             vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan);
>>> +             if (!vlan)
>>> +                     return false;
>>> +
>>> +             proto = vlan->h_vlan_encapsulated_proto;
>>> +             nhoff += sizeof(*vlan);
>>> +             goto again;
>>> +     }
>>> +     case __constant_htons(ETH_P_PPP_SES): {
>>> +             struct {
>>> +                     struct pppoe_hdr hdr;
>>> +                     __be16 proto;
>>> +             } *hdr, _hdr;
>>> +             hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
>>> +             if (!hdr)
>>> +                     return false;
>>> +             proto = hdr->proto;
>>> +             nhoff += PPPOE_SES_HLEN;
>>> +             switch (proto) {
>>> +             case __constant_htons(PPP_IP):
>>> +                     goto ip;
>>> +             case __constant_htons(PPP_IPV6):
>>> +                     goto ipv6;
>>> +             default:
>>> +                     return false;
>>> +             }
>>> +     }
>>> +     default:
>>> +             return false;
>>> +     }
>>> +
>>> +     switch (ip_proto) {
>>> +     case IPPROTO_GRE: {
>>> +             struct gre_hdr {
>>> +                     __be16 flags;
>>> +                     __be16 proto;
>>> +             } *hdr, _hdr;
>>> +
>>> +             hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
>>> +             if (!hdr)
>>> +                     return false;
>>> +             /*
>>> +              * Only look inside GRE if version zero and no
>>> +              * routing
>>> +              */
>>> +             if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
>>> +                     proto = hdr->proto;
>>> +                     nhoff += 4;
>>> +                     if (hdr->flags & GRE_CSUM)
>>> +                             nhoff += 4;
>>> +                     if (hdr->flags & GRE_KEY)
>>> +                             nhoff += 4;
>>> +                     if (hdr->flags & GRE_SEQ)
>>> +                             nhoff += 4;
>>> +                     if (proto == htons(ETH_P_TEB)) {
>>> +                             const struct ethhdr *eth;
>>> +                             struct ethhdr _eth;
>>> +
>>> +                             eth = skb_header_pointer(skb, nhoff,
>>> +                                                      sizeof(_eth), &_eth);
>>> +                             if (!eth)
>>> +                                     return false;
>>> +                             proto = eth->h_proto;
>>> +                             nhoff += sizeof(*eth);
>>> +                     }
>>> +                     goto again;
>>> +             }
>>> +             break;
>>> +     }
>>> +     case IPPROTO_IPIP:
>>> +             goto again;
>>> +     default:
>>> +             break;
>>> +     }
>>> +
>>> +     flow->ip_proto = ip_proto;
>>> +     poff = proto_ports_offset(ip_proto);
>>> +     if (poff >= 0) {
>>> +             __be32 *ports, _ports;
>>> +
>>> +             nhoff += poff;
>>> +             ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
>>> +             if (ports)
>>> +                     flow->ports = *ports;
>>> +     }
>>> +
>>> +     flow->thoff = (u16) nhoff;
>>> +
>>> +     return true;
>>> +}
>>> +
>>> +static u32 hashrnd __read_mostly;
>>> +
>>> +static void init_hashrnd(void)
>>> +{
>>> +     if (likely(hashrnd))
>>> +             return;
>>> +     get_random_bytes(&hashrnd, sizeof(hashrnd));
>>> +}
>>> +
>>> +u32 __skb_get_rxhash(struct sk_buff *skb)
>>> +{
>>> +     struct flow_keys keys;
>>> +     u32 hash;
>>> +
>>> +     if (!skb_flow_dissect(skb, &keys))
>>> +             return 0;
>>> +
>>> +     /* get a consistent hash (same value on both flow directions) */
>>> +     if (((__force u32)keys.dst < (__force u32)keys.src) ||
>>> +         (((__force u32)keys.dst == (__force u32)keys.src) &&
>>> +          ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
>>> +             swap(keys.dst, keys.src);
>>> +             swap(keys.port16[0], keys.port16[1]);
>>> +     }
>>> +
>>> +     init_hashrnd();
>>> +
>>> +     hash = jhash_3words((__force u32)keys.dst,
>>> +                         (__force u32)keys.src,
>>> +                         (__force u32)keys.ports, hashrnd);
>>> +     if (!hash)
>>> +             hash = 1;
>>> +
>>> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34)
>>> +     skb->rxhash = hash;
>>> +#endif
>>> +     return hash;
>>> +}
>>> +#endif
>>> diff --git a/datapath/linux/compat/include/linux/in.h b/datapath/linux/compat/include/linux/in.h
>>> index f91a832..fa2e026 100644
>>> --- a/datapath/linux/compat/include/linux/in.h
>>> +++ b/datapath/linux/compat/include/linux/in.h
>>> @@ -3,6 +3,26 @@
>>> 
>>> #include_next <linux/in.h>
>>> 
>>> +#include <linux/module.h>
>>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
>>> +static inline int proto_ports_offset(int proto)
>>> +{
>>> +     switch (proto) {
>>> +     case IPPROTO_TCP:
>>> +     case IPPROTO_UDP:
>>> +     case IPPROTO_DCCP:
>>> +     case IPPROTO_ESP:       /* SPI */
>>> +     case IPPROTO_SCTP:
>>> +     case IPPROTO_UDPLITE:
>>> +             return 0;
>>> +     case IPPROTO_AH:        /* SPI */
>>> +             return 4;
>>> +     default:
>>> +             return -EINVAL;
>>> +     }
>>> +}
>>> +#endif
>>> +
>>> #ifndef HAVE_IPV4_IS_MULTICAST
>>> 
>>> static inline bool ipv4_is_loopback(__be32 addr)
>>> diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h
>>> index d485b39..c9c103d 100644
>>> --- a/datapath/linux/compat/include/linux/skbuff.h
>>> +++ b/datapath/linux/compat/include/linux/skbuff.h
>>> @@ -251,4 +251,26 @@ static inline void skb_reset_mac_len(struct sk_buff *skb)
>>>      skb->mac_len = skb->network_header - skb->mac_header;
>>> }
>>> #endif
>>> +
>>> +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
>>> +{
>>> +     might_sleep_if(pri & __GFP_WAIT);
>>> +
>>> +     if (skb_cloned(skb))
>>> +             return pskb_expand_head(skb, 0, 0, pri);
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
>>> +extern u32 __skb_get_rxhash(struct sk_buff *skb);
>>> +static inline __u32 skb_get_rxhash(struct sk_buff *skb)
>>> +{
>>> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34)
>>> +     if (!skb->rxhash)
>>> +#endif
>>> +     return __skb_get_rxhash(skb);
>>> +}
>>> +#endif
>>> +
>>> #endif
>>> diff --git a/datapath/linux/compat/include/net/flow_keys.h b/datapath/linux/compat/include/net/flow_keys.h
>>> new file mode 100644
>>> index 0000000..4de17d1
>>> --- /dev/null
>>> +++ b/datapath/linux/compat/include/net/flow_keys.h
>>> @@ -0,0 +1,22 @@
>>> +#ifndef _NET_FLOW_KEYS_WRAPPER_H
>>> +#define _NET_FLOW_KEYS_WRAPPER_H
>>> +
>>> +#include <linux/version.h>
>>> +
>>> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)
>>> +#include_next <net/flow_keys.h>
>>> +#else
>>> +struct flow_keys {
>>> +     /* (src,dst) must be grouped, in the same way than in IP header */
>>> +     __be32 src;
>>> +     __be32 dst;
>>> +     union {
>>> +             __be32 ports;
>>> +             __be16 port16[2];
>>> +     };
>>> +     u16 thoff;
>>> +     u8 ip_proto;
>>> +};
>>> +#endif
>>> +
>>> +#endif
>>> diff --git a/datapath/linux/compat/include/net/ip.h b/datapath/linux/compat/include/net/ip.h
>>> index b18b968..1dccdea 100644
>>> --- a/datapath/linux/compat/include/net/ip.h
>>> +++ b/datapath/linux/compat/include/net/ip.h
>>> @@ -11,4 +11,11 @@ extern int         ip_local_out(struct sk_buff *skb);
>>> 
>>> #endif /* linux kernel < 2.6.25 */
>>> 
>>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0)
>>> +static inline bool ip_is_fragment(const struct iphdr *iph)
>>> +{
>>> +     return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
>>> +}
>>> +#endif
>>> +
>>> #endif
>>> diff --git a/datapath/linux/compat/include/net/ipv6.h b/datapath/linux/compat/include/net/ipv6.h
>>> index d1e3248..7ab234a 100644
>>> --- a/datapath/linux/compat/include/net/ipv6.h
>>> +++ b/datapath/linux/compat/include/net/ipv6.h
>>> @@ -23,4 +23,19 @@ enum {
>>> extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
>>>                       int target, unsigned short *fragoff, int *fragflg);
>>> 
>>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
>>> +static inline u32 ipv6_addr_hash(const struct in6_addr *a)
>>> +{
>>> +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
>>> +     const unsigned long *ul = (const unsigned long *)a;
>>> +     unsigned long x = ul[0] ^ ul[1];
>>> +
>>> +     return (u32)(x ^ (x >> 32));
>>> +#else
>>> +     return (__force u32)(a->s6_addr32[0] ^ a->s6_addr32[1] ^
>>> +                          a->s6_addr32[2] ^ a->s6_addr32[3]);
>>> +#endif
>>> +}
>>> +#endif
>>> +
>>> #endif
>>> diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h
>>> new file mode 100644
>>> index 0000000..102bc0c
>>> --- /dev/null
>>> +++ b/datapath/linux/compat/include/net/vxlan.h
>>> @@ -0,0 +1,43 @@
>>> +#ifndef __NET_VXLAN_WRAPPER_H
>>> +#define __NET_VXLAN_WRAPPER_H  1
>>> +
>>> +#include <linux/skbuff.h>
>>> +#include <linux/netdevice.h>
>>> +#include <linux/udp.h>
>>> +
>>> +/* per UDP socket information */
>>> +struct vxlan_sock {
>>> +     struct hlist_node hlist;
>>> +     struct rcu_head   rcu;
>>> +     struct socket     *sock;
>>> +     struct list_head  handler_list;
>>> +};
>>> +
>>> +struct vxlan_handler;
>>> +typedef int (vxlan_rcv_t)(struct vxlan_handler *vh, struct sk_buff *skb, __be32 key);
>>> +
>>> +struct vxlan_handler {
>>> +     vxlan_rcv_t       *rcv;
>>> +     struct list_head   node;
>>> +     void              *data;
>>> +     struct vxlan_sock *vs;
>>> +     atomic_t           refcnt;
>>> +     struct rcu_head    rcu;
>>> +     struct work_struct del_work;
>>> +     int                priority;
>>> +};
>>> +
>>> +void vxlan_handler_put(struct vxlan_handler *vh);
>>> +
>>> +struct vxlan_handler *vxlan_handler_add(struct net *net,
>>> +                                     __be16 portno, vxlan_rcv_t *rcv,
>>> +                                     void *data, int priority, bool create);
>>> +
>>> +int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh,
>>> +                struct rtable *rt, struct sk_buff *skb,
>>> +                __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
>>> +                __be16 src_port, __be16 dst_port, __be32 vni);
>>> +
>>> +__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
>>> +
>>> +#endif
>>> diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
>>> new file mode 100644
>>> index 0000000..b41ecc2
>>> --- /dev/null
>>> +++ b/datapath/linux/compat/vxlan.c
>>> @@ -0,0 +1,457 @@
>>> +#include <linux/kernel.h>
>>> +#include <linux/types.h>
>>> +#include <linux/module.h>
>>> +#include <linux/errno.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/skbuff.h>
>>> +#include <linux/rculist.h>
>>> +#include <linux/netdevice.h>
>>> +#include <linux/in.h>
>>> +#include <linux/ip.h>
>>> +#include <linux/udp.h>
>>> +#include <linux/igmp.h>
>>> +#include <linux/etherdevice.h>
>>> +#include <linux/if_ether.h>
>>> +#include <linux/if_vlan.h>
>>> +#include <linux/hash.h>
>>> +#include <linux/ethtool.h>
>>> +#include <net/arp.h>
>>> +#include <net/ndisc.h>
>>> +#include <net/ip.h>
>>> +#include <net/ip_tunnels.h>
>>> +#include <net/icmp.h>
>>> +#include <net/udp.h>
>>> +#include <net/rtnetlink.h>
>>> +#include <net/route.h>
>>> +#include <net/dsfield.h>
>>> +#include <net/inet_ecn.h>
>>> +#include <net/net_namespace.h>
>>> +#include <net/netns/generic.h>
>>> +#include <net/vxlan.h>
>>> +
>> Same thing here, no license or copyright.
>> 
> 
> Most of files in compat directory does not have copyright notice,
> anyways I will add it.

I wasn't sure why that was, actually, just thought I'd point it out. Thanks for adding it though Pravin!

Kyle


More information about the dev mailing list