[ovs-dev] [PATCH v4] gre: Restructure tunneling.

Pravin B Shelar pshelar at nicira.com
Tue Jun 4 20:31:32 UTC 2013


Following patch restructures ovs tunneling and gre vport
implementation to make ovs tunneling more in sync with
upstream kernel tunneling.  Doing this tunneling code is
simplified as most of protocol processing on send and
recv is pushed to kernel tunneling.  For external ovs
module the code is moved to kernel compatibility code.

Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
v4-v3:
 - Fix coding style.
 - Fix __build_header() parameters.
 - Using looser check of skb->ip_summed at __skb_gso_segment.
v2-v3:
 - Move rcv packet datapath processing to ip_tunnel code.
 - Use TUNNEL_* flags.
 - better check gre64 vport.
 - return skb if there is no gre port created.
v1-v2:
 - Fix Modules path for ip_tunnels_core.c
---
 datapath/compat.h                              |    6 +
 datapath/flow.c                                |   12 +-
 datapath/flow.h                                |    9 +-
 datapath/linux/Modules.mk                      |    6 +
 datapath/linux/compat/gre.c                    |  352 ++++++++++++++++++++++++
 datapath/linux/compat/gso.c                    |  175 ++++++++++++
 datapath/linux/compat/gso.h                    |   46 +++
 datapath/linux/compat/include/linux/err.h      |    9 +
 datapath/linux/compat/include/linux/if_ether.h |    4 +
 datapath/linux/compat/include/net/gre.h        |  109 ++++++++
 datapath/linux/compat/include/net/ip_tunnels.h |   54 ++++
 datapath/linux/compat/ip_tunnels_core.c        |  115 ++++++++
 datapath/tunnel.c                              |   14 +-
 datapath/tunnel.h                              |   11 +-
 datapath/vport-gre.c                           |  341 +++++++++---------------
 datapath/vport-lisp.c                          |    2 +-
 datapath/vport-vxlan.c                         |    2 +-
 17 files changed, 1025 insertions(+), 242 deletions(-)
 create mode 100644 datapath/linux/compat/gre.c
 create mode 100644 datapath/linux/compat/gso.c
 create mode 100644 datapath/linux/compat/gso.h
 create mode 100644 datapath/linux/compat/include/net/gre.h
 create mode 100644 datapath/linux/compat/include/net/ip_tunnels.h
 create mode 100644 datapath/linux/compat/ip_tunnels_core.c

diff --git a/datapath/compat.h b/datapath/compat.h
index c7fd225..6095323 100644
--- a/datapath/compat.h
+++ b/datapath/compat.h
@@ -93,5 +93,11 @@ static inline void skb_set_mark(struct sk_buff *skb, u32 mark)
 	skb->mark = mark;
 }
 #endif /* after 2.6.20 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
+#define rt_dst(rt) (rt->dst)
+#else
+#define rt_dst(rt) (rt->u.dst)
+#endif
+
 
 #endif /* compat.h */
diff --git a/datapath/flow.c b/datapath/flow.c
index 3ce926e..a4479cd 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -1013,7 +1013,7 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		switch (type) {
 		case OVS_TUNNEL_KEY_ATTR_ID:
 			tun_key->tun_id = nla_get_be64(a);
-			tun_key->tun_flags |= OVS_TNL_F_KEY;
+			tun_key->tun_flags |= TUNNEL_KEY;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
 			tun_key->ipv4_src = nla_get_be32(a);
@@ -1029,10 +1029,10 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			ttl = true;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
-			tun_key->tun_flags |= OVS_TNL_F_DONT_FRAGMENT;
+			tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_CSUM:
-			tun_key->tun_flags |= OVS_TNL_F_CSUM;
+			tun_key->tun_flags |= TUNNEL_CSUM;
 			break;
 		default:
 			return -EINVAL;
@@ -1060,7 +1060,7 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if (!nla)
 		return -EMSGSIZE;
 
-	if (tun_key->tun_flags & OVS_TNL_F_KEY &&
+	if (tun_key->tun_flags & TUNNEL_KEY &&
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id))
 		return -EMSGSIZE;
 	if (tun_key->ipv4_src &&
@@ -1073,10 +1073,10 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb,
 		return -EMSGSIZE;
 	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl))
 		return -EMSGSIZE;
-	if ((tun_key->tun_flags & OVS_TNL_F_DONT_FRAGMENT) &&
+	if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
 		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
 		return -EMSGSIZE;
-	if ((tun_key->tun_flags & OVS_TNL_F_CSUM) &&
+	if ((tun_key->tun_flags & TUNNEL_CSUM) &&
 		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 		return -EMSGSIZE;
 
diff --git a/datapath/flow.h b/datapath/flow.h
index dba66cf..dfffed7 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -30,7 +30,9 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+
 #include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
 
 struct sk_buff;
 
@@ -40,11 +42,6 @@ struct sw_flow_actions {
 	struct nlattr actions[];
 };
 
-/* Tunnel flow flags. */
-#define OVS_TNL_F_DONT_FRAGMENT		(1 << 0)
-#define OVS_TNL_F_CSUM			(1 << 1)
-#define OVS_TNL_F_KEY			(1 << 2)
-
 /* Used to memset ovs_key_ipv4_tunnel padding. */
 #define OVS_TUNNEL_KEY_SIZE					\
         (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + 	\
@@ -54,7 +51,7 @@ struct ovs_key_ipv4_tunnel {
 	__be64 tun_id;
 	__be32 ipv4_src;
 	__be32 ipv4_dst;
-	u16  tun_flags;
+	__be16 tun_flags;
 	u8   ipv4_tos;
 	u8   ipv4_ttl;
 };
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 1434a2d..a62d444 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -3,8 +3,11 @@ openvswitch_sources += \
 	linux/compat/dev-openvswitch.c \
 	linux/compat/exthdrs_core.c \
 	linux/compat/flex_array.c \
+	linux/compat/gre.c \
+	linux/compat/gso.c \
 	linux/compat/genetlink-openvswitch.c \
 	linux/compat/ip_output-openvswitch.c \
+	linux/compat/ip_tunnels_core.c \
 	linux/compat/kmemdup.c \
 	linux/compat/netdevice.c \
 	linux/compat/net_namespace.c \
@@ -13,6 +16,7 @@ openvswitch_sources += \
 	linux/compat/time.c	\
 	linux/compat/workqueue.c
 openvswitch_headers += \
+	linux/compat/gso.h \
 	linux/compat/include/asm/percpu.h \
 	linux/compat/include/linux/compiler.h \
 	linux/compat/include/linux/compiler-gcc.h \
@@ -61,8 +65,10 @@ openvswitch_headers += \
 	linux/compat/include/net/checksum.h \
 	linux/compat/include/net/dst.h \
 	linux/compat/include/net/genetlink.h \
+	linux/compat/include/net/gre.h \
 	linux/compat/include/net/inet_frag.h \
 	linux/compat/include/net/ip.h \
+	linux/compat/include/net/ip_tunnels.h \
 	linux/compat/include/net/ipv6.h \
 	linux/compat/include/net/net_namespace.h \
 	linux/compat/include/net/netlink.h \
diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
new file mode 100644
index 0000000..63c298b
--- /dev/null
+++ b/datapath/linux/compat/gre.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/module.h>
+#include <linux/if.h>
+#include <linux/if_tunnel.h>
+#include <linux/icmp.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+
+#include <net/gre.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
+#include "gso.h"
+
+static struct gre_cisco_protocol __rcu *gre_cisco_proto;
+
+static void gre_csum_fix(struct sk_buff *skb)
+{
+	struct gre_base_hdr *greh;
+	__be32 *options;
+	int gre_offset = skb_transport_offset(skb);
+
+	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	options = ((__be32 *)greh + 1);
+
+	*options = 0;
+	*(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
+						     skb->len - gre_offset, 0));
+}
+
+struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
+{
+	int err;
+
+	skb_reset_inner_headers(skb);
+
+	if (skb_is_gso(skb)) {
+		if (gre_csum)
+			OVS_GSO_CB(skb)->fix_segment = gre_csum_fix;
+	} else {
+		if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
+			err = skb_checksum_help(skb);
+			if (err)
+				goto error;
+
+		} else if (skb->ip_summed != CHECKSUM_PARTIAL)
+			skb->ip_summed = CHECKSUM_NONE;
+	}
+	return skb;
+error:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
+static bool is_gre_gso(struct sk_buff *skb)
+{
+	return skb_is_gso(skb);
+}
+
+void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+		      int hdr_len)
+{
+	struct gre_base_hdr *greh;
+
+	__skb_push(skb, hdr_len);
+
+	greh = (struct gre_base_hdr *)skb->data;
+	greh->flags = tnl_flags_to_gre_flags(tpi->flags);
+	greh->protocol = tpi->proto;
+
+	if (tpi->flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
+		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+		if (tpi->flags & TUNNEL_SEQ) {
+			*ptr = tpi->seq;
+			ptr--;
+		}
+		if (tpi->flags & TUNNEL_KEY) {
+			*ptr = tpi->key;
+			ptr--;
+		}
+		if (tpi->flags & TUNNEL_CSUM && !is_gre_gso(skb)) {
+			*ptr = 0;
+			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+						skb->len, 0));
+		}
+	}
+}
+
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		csum = csum_fold(skb->csum);
+
+		if (!csum)
+			break;
+		/* Fall through. */
+
+	case CHECKSUM_NONE:
+		skb->csum = 0;
+		csum = __skb_checksum_complete(skb);
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		break;
+	}
+
+	return csum;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+			    bool *csum_err)
+{
+	unsigned int ip_hlen = ip_hdrlen(skb);
+	struct gre_base_hdr *greh;
+	__be32 *options;
+	int hdr_len;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+		return -EINVAL;
+
+	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+		return -EINVAL;
+
+	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+	hdr_len = ip_gre_calc_hlen(tpi->flags);
+
+	if (!pskb_may_pull(skb, hdr_len))
+		return -EINVAL;
+
+	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+	tpi->proto = greh->protocol;
+
+	options = (__be32 *)(greh + 1);
+	if (greh->flags & GRE_CSUM) {
+		if (check_checksum(skb)) {
+			*csum_err = true;
+			return -EINVAL;
+		}
+		options++;
+	}
+
+	if (greh->flags & GRE_KEY) {
+		tpi->key = *options;
+		options++;
+	} else
+		tpi->key = 0;
+
+	if (unlikely(greh->flags & GRE_SEQ)) {
+		tpi->seq = *options;
+		options++;
+	} else
+		tpi->seq = 0;
+
+	/* WCCP version 1 and 2 protocol decoding.
+	 * - Change protocol to IP
+	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+	 */
+	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+		tpi->proto = htons(ETH_P_IP);
+		if ((*(u8 *)options & 0xF0) != 0x40) {
+			hdr_len += 4;
+			if (!pskb_may_pull(skb, hdr_len))
+				return -EINVAL;
+		}
+	}
+
+	return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+}
+
+static int gre_cisco_rcv(struct sk_buff *skb)
+{
+	struct tnl_ptk_info tpi;
+	bool csum_err = false;
+	struct gre_cisco_protocol *proto;
+
+	rcu_read_lock();
+	proto = rcu_dereference(gre_cisco_proto);
+	if (!proto)
+		goto drop;
+
+	if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+		goto drop;
+	proto->handler(skb, &tpi);
+	rcu_read_unlock();
+	return 0;
+
+drop:
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return 0;
+}
+
+static const struct gre_protocol ipgre_protocol = {
+	.handler	=	gre_cisco_rcv,
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
+
+int gre_add_protocol(const struct gre_protocol *proto, u8 version)
+{
+	if (version >= GREPROTO_MAX)
+		return -EINVAL;
+
+	return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
+		0 : -EBUSY;
+}
+
+int gre_del_protocol(const struct gre_protocol *proto, u8 version)
+{
+	int ret;
+
+	if (version >= GREPROTO_MAX)
+		return -EINVAL;
+
+	ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
+		0 : -EBUSY;
+
+	if (ret)
+		return ret;
+
+	synchronize_net();
+	return 0;
+}
+
+static int gre_rcv(struct sk_buff *skb)
+{
+	const struct gre_protocol *proto;
+	u8 ver;
+	int ret;
+
+	if (!pskb_may_pull(skb, 12))
+		goto drop;
+
+	ver = skb->data[1] & 0x7f;
+	if (ver >= GREPROTO_MAX)
+		goto drop;
+
+	rcu_read_lock();
+	proto = rcu_dereference(gre_proto[ver]);
+	if (!proto || !proto->handler)
+		goto drop_unlock;
+	ret = proto->handler(skb);
+	rcu_read_unlock();
+	return ret;
+
+drop_unlock:
+	rcu_read_unlock();
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static const struct net_protocol net_gre_protocol = {
+	.handler     = gre_rcv,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
+	.netns_ok    = 1,
+#endif
+};
+#endif
+
+static int gre_compat_init(void)
+{
+	int err;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+	if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
+		pr_err("can't add protocol\n");
+		return -EAGAIN;
+	}
+#endif
+	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
+	if (err) {
+		pr_warn("cannot register gre protocol handler\n");
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+		inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+#endif
+	}
+
+	return err;
+}
+
+static int gre_compat_exit(void)
+{
+	int ret;
+
+	ret = gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
+	if (ret)
+		return ret;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+	ret = inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+	if (ret)
+		return ret;
+#endif
+	return 0;
+}
+
+int gre_cisco_register(struct gre_cisco_protocol *newp)
+{
+	int err;
+
+	err = gre_compat_init();
+	if (err)
+		return err;
+
+	return (cmpxchg((struct gre_cisco_protocol **)&gre_cisco_proto, NULL, newp) == NULL) ?
+		0 : -EBUSY;
+}
+
+int gre_cisco_unregister(struct gre_cisco_protocol *proto)
+{
+	int ret;
+
+	ret = (cmpxchg((struct gre_cisco_protocol **)&gre_cisco_proto, proto, NULL) == proto) ?
+		0 : -EINVAL;
+
+	if (ret)
+		return ret;
+
+	synchronize_net();
+	ret = gre_compat_exit();
+	return ret;
+}
diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
new file mode 100644
index 0000000..5a3a786
--- /dev/null
+++ b/datapath/linux/compat/gso.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/module.h>
+#include <linux/if.h>
+#include <linux/if_tunnel.h>
+#include <linux/icmp.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+
+#include <net/gre.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
+#include "gso.h"
+
+static __be16 skb_network_protocol(struct sk_buff *skb)
+{
+	__be16 type = skb->protocol;
+	int vlan_depth = ETH_HLEN;
+
+	while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vh;
+
+		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
+			return 0;
+
+		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+		type = vh->h_vlan_encapsulated_proto;
+		vlan_depth += VLAN_HLEN;
+	}
+
+	return type;
+}
+
+static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
+					   netdev_features_t features,
+					   bool tx_path)
+{
+	struct iphdr *iph	= ip_hdr(skb);
+	int tnl_hlen		= skb_inner_network_offset(skb);
+	struct sk_buff *skb1	= skb;
+	struct ovs_gso_cb cb	= *OVS_GSO_CB(skb);
+	struct sk_buff *segs;
+
+	__skb_pull(skb, tnl_hlen);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->protocol = skb_network_protocol(skb);
+
+	segs = __skb_gso_segment(skb, 0, tx_path);
+	if (!segs || IS_ERR(segs))
+		goto free;
+
+	skb = segs;
+	while (skb) {
+		__skb_push(skb, tnl_hlen);
+		skb_reset_mac_header(skb);
+		skb_reset_network_header(skb);
+		skb_set_transport_header(skb, sizeof(struct iphdr));
+		skb->mac_len = 0;
+
+		memcpy(ip_hdr(skb), iph, tnl_hlen);
+		*OVS_GSO_CB(skb) = cb;
+		if (OVS_GSO_CB(skb)->fix_segment)
+			OVS_GSO_CB(skb)->fix_segment(skb);
+		skb = skb->next;
+	}
+free:
+	consume_skb(skb1);
+	return segs;
+}
+
+static bool need_linearize(const struct sk_buff *skb)
+{
+	int i;
+
+	if (unlikely(skb_shinfo(skb)->frag_list))
+		return true;
+
+	/*
+	 * Generally speaking we should linearize if there are paged frags.
+	 * However, if all of the refcounts are 1 we know nobody else can
+	 * change them from underneath us and we can skip the linearization.
+	 */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1))
+			return true;
+
+	return false;
+}
+
+int rpl_ip_local_out(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	int ret = NETDEV_TX_OK;
+
+	if (skb_is_gso(skb)) {
+		skb = tnl_skb_gso_segment(skb, 0, false);
+		if (!skb || IS_ERR(skb))
+			return 0;
+	}  else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		int err;
+
+		if (unlikely(need_linearize(skb))) {
+			err = __skb_linearize(skb);
+			if (unlikely(err))
+				return 0;
+		}
+
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			return 0;
+	}
+
+	while (skb) {
+		struct sk_buff *next_skb = skb->next;
+		struct iphdr *iph;
+		int err;
+
+		if (next_skb)
+			skb_dst_set(skb, dst_clone(dst));
+		else
+			skb_dst_set(skb, dst);
+
+		skb->next = NULL;
+
+		/*
+		 * Allow our local IP stack to fragment the outer packet even
+		 * if the DF bit is set as a last resort.  We also need to
+		 * force selection of an IP ID here because Linux will
+		 * otherwise leave it at 0 if the packet originally had DF set.
+		 */
+
+		skb->local_df = 1;
+		iph = ip_hdr(skb);
+		tunnel_ip_select_ident(skb,
+		       (const struct iphdr *)skb_inner_network_header(skb),
+			skb_dst(skb));
+
+
+		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
+#undef ip_local_out
+		err = ip_local_out(skb);
+		if (unlikely(net_xmit_eval(err)))
+			ret = err;
+
+		skb = next_skb;
+	}
+	return ret;
+}
diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h
new file mode 100644
index 0000000..3157041
--- /dev/null
+++ b/datapath/linux/compat/gso.h
@@ -0,0 +1,46 @@
+#ifndef __LINUX_GSO_WRAPPER_H
+#define __LINUX_GSO_WRAPPER_H
+
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+
+#include "datapath.h"
+
+struct ovs_gso_cb {
+	struct ovs_skb_cb dp_cb;
+	sk_buff_data_t	inner_network_header;
+	void (*fix_segment)(struct sk_buff *);
+};
+#define OVS_GSO_CB(skb) ((struct ovs_gso_cb *)(skb)->cb)
+
+#define skb_inner_network_header rpl_skb_inner_network_header
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
+{
+	return skb->head + OVS_GSO_CB(skb)->inner_network_header;
+}
+#else
+static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
+{
+	return OVS_GSO_CB(skb)->inner_network_header;
+}
+#endif
+
+#define skb_inner_network_offset rpl_skb_inner_network_offset
+static inline int skb_inner_network_offset(const struct sk_buff *skb)
+{
+	return skb_inner_network_header(skb) - skb->data;
+}
+
+#define skb_reset_inner_headers rpl_skb_reset_inner_headers
+static inline void skb_reset_inner_headers(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > 48);
+	OVS_GSO_CB(skb)->inner_network_header = skb->network_header;
+	OVS_GSO_CB(skb)->fix_segment = NULL;
+}
+
+#define ip_local_out rpl_ip_local_out
+int ip_local_out(struct sk_buff *skb);
+#endif
diff --git a/datapath/linux/compat/include/linux/err.h b/datapath/linux/compat/include/linux/err.h
index 50faf2a..d640298 100644
--- a/datapath/linux/compat/include/linux/err.h
+++ b/datapath/linux/compat/include/linux/err.h
@@ -18,4 +18,13 @@ static inline void *ERR_CAST(const void *ptr)
 }
 #endif /* HAVE_ERR_CAST */
 
+#ifndef IS_ERR_VALUE
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+static inline long __must_check IS_ERR_OR_NULL(const void *ptr)
+{
+	return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+#endif
+
+
 #endif
diff --git a/datapath/linux/compat/include/linux/if_ether.h b/datapath/linux/compat/include/linux/if_ether.h
index 85b0d22..e22ea96 100644
--- a/datapath/linux/compat/include/linux/if_ether.h
+++ b/datapath/linux/compat/include/linux/if_ether.h
@@ -20,4 +20,8 @@
 #define ETH_P_802_3_MIN        0x0600
 #endif
 
+#ifndef ETH_P_8021AD
+#define ETH_P_8021AD    0x88A8          /* 802.1ad Service VLAN         */
+#endif
+
 #endif
diff --git a/datapath/linux/compat/include/net/gre.h b/datapath/linux/compat/include/net/gre.h
new file mode 100644
index 0000000..3285ccd
--- /dev/null
+++ b/datapath/linux/compat/include/net/gre.h
@@ -0,0 +1,109 @@
+#ifndef __LINUX_GRE_WRAPPER_H
+#define __LINUX_GRE_WRAPPER_H
+
+#include <linux/skbuff.h>
+#include <net/ip_tunnels.h>
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37)
+#include_next <net/gre.h>
+
+#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) */
+
+#define GREPROTO_CISCO		0
+#define GREPROTO_PPTP		1
+#define GREPROTO_MAX		2
+#define GRE_IP_PROTO_MAX	2
+
+struct gre_protocol {
+	int  (*handler)(struct sk_buff *skb);
+	void (*err_handler)(struct sk_buff *skb, u32 info);
+};
+
+int gre_add_protocol(const struct gre_protocol *proto, u8 version);
+int gre_del_protocol(const struct gre_protocol *proto, u8 version);
+
+#endif
+
+#define GRE_IP_PROTO_MAX	2
+
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
+
+#define MAX_GRE_PROTO_PRIORITY 255
+struct gre_cisco_protocol {
+	int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
+	int (*err_handler)(struct sk_buff *skb, u32 info,
+			   const struct tnl_ptk_info *tpi);
+	u8 priority;
+};
+
+#define gre_build_header rpl_gre_build_header
+void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+		      int hdr_len);
+
+#define gre_handle_offloads rpl_gre_handle_offloads
+struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum);
+
+int gre_cisco_register(struct gre_cisco_protocol *proto);
+int gre_cisco_unregister(struct gre_cisco_protocol *proto);
+
+static inline int ip_gre_calc_hlen(__be16 o_flags)
+{
+	int addend = 4;
+
+	if (o_flags & TUNNEL_CSUM)
+		addend += 4;
+	if (o_flags & TUNNEL_KEY)
+		addend += 4;
+	if (o_flags & TUNNEL_SEQ)
+		addend += 4;
+	return addend;
+}
+
+static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
+{
+	__be16 tflags = 0;
+
+	if (flags & GRE_CSUM)
+		tflags |= TUNNEL_CSUM;
+	if (flags & GRE_ROUTING)
+		tflags |= TUNNEL_ROUTING;
+	if (flags & GRE_KEY)
+		tflags |= TUNNEL_KEY;
+	if (flags & GRE_SEQ)
+		tflags |= TUNNEL_SEQ;
+	if (flags & GRE_STRICT)
+		tflags |= TUNNEL_STRICT;
+	if (flags & GRE_REC)
+		tflags |= TUNNEL_REC;
+	if (flags & GRE_VERSION)
+		tflags |= TUNNEL_VERSION;
+
+	return tflags;
+}
+
+static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
+{
+	__be16 flags = 0;
+
+	if (tflags & TUNNEL_CSUM)
+		flags |= GRE_CSUM;
+	if (tflags & TUNNEL_ROUTING)
+		flags |= GRE_ROUTING;
+	if (tflags & TUNNEL_KEY)
+		flags |= GRE_KEY;
+	if (tflags & TUNNEL_SEQ)
+		flags |= GRE_SEQ;
+	if (tflags & TUNNEL_STRICT)
+		flags |= GRE_STRICT;
+	if (tflags & TUNNEL_REC)
+		flags |= GRE_REC;
+	if (tflags & TUNNEL_VERSION)
+		flags |= GRE_VERSION;
+
+	return flags;
+}
+#endif
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
new file mode 100644
index 0000000..ad17c9d
--- /dev/null
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -0,0 +1,54 @@
+#ifndef __NET_IP_TUNNELS_WRAPPER_H
+#define __NET_IP_TUNNELS_WRAPPER_H 1
+
+#include <linux/if_tunnel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/dsfield.h>
+#include <net/flow.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/rtnetlink.h>
+
+#define TUNNEL_CSUM	__cpu_to_be16(0x01)
+#define TUNNEL_ROUTING	__cpu_to_be16(0x02)
+#define TUNNEL_KEY	__cpu_to_be16(0x04)
+#define TUNNEL_SEQ	__cpu_to_be16(0x08)
+#define TUNNEL_STRICT	__cpu_to_be16(0x10)
+#define TUNNEL_REC	__cpu_to_be16(0x20)
+#define TUNNEL_VERSION	__cpu_to_be16(0x40)
+#define TUNNEL_NO_KEY	__cpu_to_be16(0x80)
+#define TUNNEL_DONT_FRAGMENT	__cpu_to_be16(0x0100)
+
+struct tnl_ptk_info {
+	__be16 flags;
+	__be16 proto;
+	__be32 key;
+	__be32 seq;
+};
+
+#define PACKET_RCVD	0
+#define PACKET_REJECT	1
+
+static inline void tunnel_ip_select_ident(struct sk_buff *skb,
+					  const struct iphdr  *old_iph,
+					  struct dst_entry *dst)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	/* Use inner packet iph-id if possible. */
+	if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
+		iph->id = old_iph->id;
+	else
+		__ip_select_ident(iph, dst,
+				(skb_shinfo(skb)->gso_segs ?: 1) - 1);
+}
+
+int iptunnel_xmit(struct net *net, struct rtable *rt,
+		  struct sk_buff *skb,
+		  __be32 src, __be32 dst, __u8 proto,
+		  __u8 tos, __u8 ttl, __be16 df);
+
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
+#endif /* __NET_IP_TUNNELS_H */
diff --git a/datapath/linux/compat/ip_tunnels_core.c b/datapath/linux/compat/ip_tunnels_core.c
new file mode 100644
index 0000000..a8e62ee
--- /dev/null
+++ b/datapath/linux/compat/ip_tunnels_core.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/in.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <net/ip_tunnels.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
+#include "gso.h"
+#include "compat.h"
+
+int iptunnel_xmit(struct net *net, struct rtable *rt,
+		  struct sk_buff *skb,
+		  __be32 src, __be32 dst, __u8 proto,
+		  __u8 tos, __u8 ttl, __be16 df)
+{
+	int pkt_len = skb->len;
+	struct iphdr *iph;
+	int err;
+
+	nf_reset(skb);
+	secpath_reset(skb);
+	skb_clear_rxhash(skb);
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt_dst(rt));
+#if 0
+	/* Do not clear ovs_skb_cb.  It will be done in gso code. */
+	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+#endif
+
+	/* Push down and install the IP header. */
+	__skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+
+	iph = ip_hdr(skb);
+
+	iph->version	=	4;
+	iph->ihl	=	sizeof(struct iphdr) >> 2;
+	iph->frag_off	=	df;
+	iph->protocol	=	proto;
+	iph->tos	=	tos;
+	iph->daddr	=	dst;
+	iph->saddr	=	src;
+	iph->ttl	=	ttl;
+	tunnel_ip_select_ident(skb,
+			       (const struct iphdr *)skb_inner_network_header(skb),
+			       &rt_dst(rt));
+
+	err = ip_local_out(skb);
+	if (unlikely(net_xmit_eval(err)))
+		pkt_len = 0;
+	return pkt_len;
+}
+
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
+{
+	if (inner_proto == htons(ETH_P_TEB)) {
+		struct ethhdr *eh;
+
+		if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
+			return -ENOMEM;
+
+		__skb_pull(skb, hdr_len);
+		skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
+
+		eh = (struct ethhdr *)skb->data;
+
+		if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
+			skb->protocol = eh->h_proto;
+		else
+			skb->protocol = htons(ETH_P_802_2);
+
+	} else {
+		if (unlikely(!pskb_may_pull(skb, hdr_len)))
+			return -ENOMEM;
+
+		__skb_pull(skb, hdr_len);
+		skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len);
+		skb->protocol = inner_proto;
+	}
+
+	nf_reset(skb);
+	secpath_reset(skb);
+	skb_clear_rxhash(skb);
+	skb_dst_drop(skb);
+	vlan_set_tci(skb, 0);
+	skb_set_queue_mapping(skb, 0);
+	skb->pkt_type = PACKET_HOST;
+	return 0;
+}
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 8c93e18..9102786 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -37,12 +37,6 @@
 #include "vlan.h"
 #include "vport.h"
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
-#define rt_dst(rt) (rt->dst)
-#else
-#define rt_dst(rt) (rt->u.dst)
-#endif
-
 /**
  *	ovs_tnl_rcv - ingress point for generic tunnel code
  *
@@ -85,9 +79,9 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb,
 	ovs_vport_receive(vport, skb, tun_key);
 }
 
-static struct rtable *find_route(struct net *net,
-		__be32 *saddr, __be32 daddr, u8 ipproto,
-		u8 tos, u32 skb_mark)
+struct rtable *find_route(struct net *net,
+			  __be32 *saddr, __be32 daddr, u8 ipproto,
+			  u8 tos, u32 skb_mark)
 {
 	struct rtable *rt;
 	/* Tunnel configuration keeps DSCP part of TOS bits, But Linux
@@ -289,7 +283,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
 		iph->tos	= OVS_CB(skb)->tun_key->ipv4_tos;
 		iph->ttl	= OVS_CB(skb)->tun_key->ipv4_ttl;
 		iph->frag_off	= OVS_CB(skb)->tun_key->tun_flags &
-				  OVS_TNL_F_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+				  TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 		/*
 		 * Allow our local IP stack to fragment the outer packet even
 		 * if the DF bit is set as a last resort.  We also need to
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index 89c4e16..17de7c4 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -26,6 +26,11 @@
 #include "flow.h"
 #include "vport.h"
 
+struct rtable *find_route(struct net *net,
+			  __be32 *saddr, __be32 daddr, u8 ipproto,
+			  u8 tos, u32 skb_mark);
+
+u16 ovs_tnl_get_src_port(struct sk_buff *skb);
 
 int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
 		 u8 ipproto, int tunnel_hlen,
@@ -35,10 +40,10 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
 
 void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb,
 		 struct ovs_key_ipv4_tunnel *tun_key);
-u16 ovs_tnl_get_src_port(struct sk_buff *skb);
 
 static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
-				    const struct iphdr *iph, __be64 tun_id, u32 tun_flags)
+					 const struct iphdr *iph, __be64 tun_id,
+					 __be16 tun_flags)
 {
 	tun_key->tun_id = tun_id;
 	tun_key->ipv4_src = iph->saddr;
@@ -52,4 +57,4 @@ static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
 	       sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
 }
 
-#endif /* tunnel.h */
+#endif /* TUNNEL_H */
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index add17d9..e98f423 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -24,50 +24,29 @@
 #include <linux/if_tunnel.h>
 #include <linux/if_vlan.h>
 #include <linux/in.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
 
 #include <net/icmp.h>
 #include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/gre.h>
 #include <net/protocol.h>
 
 #include "datapath.h"
 #include "tunnel.h"
 #include "vport.h"
 
-/*
- * The GRE header is composed of a series of sections: a base and then a variable
- * number of options.
- */
-#define GRE_HEADER_SECTION 4
-
-struct gre_base_hdr {
-	__be16 flags;
-	__be16 protocol;
-};
-
-static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
-{
-	int len = GRE_HEADER_SECTION;
-
-	if (tun_key->tun_flags & OVS_TNL_F_KEY)
-		len += GRE_HEADER_SECTION;
-	if (tun_key->tun_flags & OVS_TNL_F_CSUM)
-		len += GRE_HEADER_SECTION;
-	return len;
-}
-
-static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
-{
-	/* Set key for GRE64 tunnels, even when key if is zero. */
-	int len = GRE_HEADER_SECTION +		/* GRE Hdr */
-		  GRE_HEADER_SECTION +		/* GRE Key */
-		  GRE_HEADER_SECTION;		/* GRE SEQ */
-
-	if (tun_key->tun_flags & OVS_TNL_F_CSUM)
-		len += GRE_HEADER_SECTION;
-
-	return len;
-}
-
 /* Returns the least-significant 32 bits of a __be64. */
 static __be32 be64_get_low32(__be64 x)
 {
@@ -78,61 +57,30 @@ static __be32 be64_get_low32(__be64 x)
 #endif
 }
 
-static __be32 be64_get_high32(__be64 x)
+static __be16 filter_tnl_flags(__be16 flags)
 {
-#ifdef __BIG_ENDIAN
-	return (__force __be32)((__force u64)x >> 32);
-#else
-	return (__force __be32)x;
-#endif
+	return flags & (TUNNEL_CSUM | TUNNEL_KEY);
 }
 
-static void __gre_build_header(struct sk_buff *skb,
-			       int tunnel_hlen,
-			       bool is_gre64)
+static struct sk_buff *__build_header(struct sk_buff *skb,
+				      int tunnel_hlen,
+				      __be32 seq, __be16 gre64_flag)
 {
 	const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
-	__be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
-			- GRE_HEADER_SECTION);
-	struct gre_base_hdr *greh = (struct gre_base_hdr *) skb_transport_header(skb);
-	greh->protocol = htons(ETH_P_TEB);
-	greh->flags = 0;
-
-	/* Work backwards over the options so the checksum is last. */
-	if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) {
-		greh->flags |= GRE_KEY;
-		if (is_gre64) {
-			/* Set higher 32 bits to seq. */
-			*options = be64_get_high32(tun_key->tun_id);
-			options--;
-			greh->flags |= GRE_SEQ;
-		}
-		*options = be64_get_low32(tun_key->tun_id);
-		options--;
-	}
+	struct tnl_ptk_info tpi;
 
-	if (tun_key->tun_flags & OVS_TNL_F_CSUM) {
-		greh->flags |= GRE_CSUM;
-		*options = 0;
-		*(__sum16 *)options = csum_fold(skb_checksum(skb,
-						skb_transport_offset(skb),
-						skb->len - skb_transport_offset(skb),
-						0));
-	}
-}
+	skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
+	if (IS_ERR(skb))
+		return NULL;
 
-static void gre_build_header(const struct vport *vport,
-			     struct sk_buff *skb,
-			     int tunnel_hlen)
-{
-	__gre_build_header(skb, tunnel_hlen, false);
-}
+	tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
 
-static void gre64_build_header(const struct vport *vport,
-			       struct sk_buff *skb,
-			       int tunnel_hlen)
-{
-	__gre_build_header(skb, tunnel_hlen, true);
+	tpi.proto = htons(ETH_P_TEB);
+	tpi.key = be64_get_low32(tun_key->tun_id);
+	tpi.seq = seq;
+	gre_build_header(skb, &tpi, tunnel_hlen);
+
+	return skb;
 }
 
 static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
@@ -144,149 +92,98 @@ static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
 #endif
 }
 
-static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id,
-			bool *is_gre64)
-{
-	/* IP and ICMP protocol handlers check that the IHL is valid. */
-	struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
-	__be32 *options = (__be32 *)(greh + 1);
-	int hdr_len;
-
-	*flags = greh->flags;
-
-	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
-		return -EINVAL;
-
-	if (unlikely(greh->protocol != htons(ETH_P_TEB)))
-		return -EINVAL;
-
-	hdr_len = GRE_HEADER_SECTION;
-
-	if (greh->flags & GRE_CSUM) {
-		hdr_len += GRE_HEADER_SECTION;
-		options++;
-	}
-
-	if (greh->flags & GRE_KEY) {
-		__be32 seq;
-		__be32 gre_key;
-
-		gre_key = *options;
-		hdr_len += GRE_HEADER_SECTION;
-		options++;
-
-		if (greh->flags & GRE_SEQ) {
-			seq = *options;
-			*is_gre64 = true;
-		} else {
-			seq = 0;
-			*is_gre64 = false;
-		}
-		*tun_id = key_to_tunnel_id(gre_key, seq);
-	} else {
-		*tun_id = 0;
-		/* Ignore GRE seq if there is no key present. */
-		*is_gre64 = false;
-	}
-
-	if (greh->flags & GRE_SEQ)
-		hdr_len += GRE_HEADER_SECTION;
-
-	return hdr_len;
-}
-
-static bool check_checksum(struct sk_buff *skb)
-{
-	struct iphdr *iph = ip_hdr(skb);
-	struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1);
-	__sum16 csum = 0;
-
-	if (greh->flags & GRE_CSUM) {
-		switch (skb->ip_summed) {
-		case CHECKSUM_COMPLETE:
-			csum = csum_fold(skb->csum);
-
-			if (!csum)
-				break;
-			/* Fall through. */
-
-		case CHECKSUM_NONE:
-			skb->csum = 0;
-			csum = __skb_checksum_complete(skb);
-			skb->ip_summed = CHECKSUM_COMPLETE;
-			break;
-		}
-	}
-
-	return (csum == 0);
-}
-
-static u32 gre_flags_to_tunnel_flags(__be16 gre_flags, bool is_gre64)
-{
-	u32 tunnel_flags = 0;
-
-	if (gre_flags & GRE_KEY || is_gre64)
-		tunnel_flags = OVS_TNL_F_KEY;
-
-	if (gre_flags & GRE_CSUM)
-		tunnel_flags |= OVS_TNL_F_CSUM;
-
-	return tunnel_flags;
-}
-
 /* Called with rcu_read_lock and BH disabled. */
-static int gre_rcv(struct sk_buff *skb)
+static int gre_rcv(struct sk_buff *skb,
+		   const struct tnl_ptk_info *tpi)
 {
+	struct ovs_key_ipv4_tunnel tun_key;
 	struct ovs_net *ovs_net;
 	struct vport *vport;
-	int hdr_len;
-	struct iphdr *iph;
-	struct ovs_key_ipv4_tunnel tun_key;
-	__be16 gre_flags;
-	u32 tnl_flags;
 	__be64 key;
-	bool is_gre64;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN)))
-		goto error;
-	if (unlikely(!check_checksum(skb)))
-		goto error;
-
-	hdr_len = parse_header(ip_hdr(skb), &gre_flags, &key, &is_gre64);
-	if (unlikely(hdr_len < 0))
-		goto error;
 
 	ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
-	if (is_gre64)
+	if ((tpi->flags & TUNNEL_KEY) && (tpi->flags & TUNNEL_SEQ))
 		vport = rcu_dereference(ovs_net->vport_net.gre64_vport);
 	else
 		vport = rcu_dereference(ovs_net->vport_net.gre_vport);
 	if (unlikely(!vport))
-		goto error;
+		return PACKET_REJECT;
+
+	key = key_to_tunnel_id(tpi->key, tpi->seq);
+	tnl_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags));
+
+	ovs_vport_receive(vport, skb, &tun_key);
+	return PACKET_RCVD;
+}
 
-	if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
+static int __send(struct vport *vport, struct sk_buff *skb,
+		  int tunnel_hlen,
+		  __be32 seq, __be16 gre64_flag)
+{
+	struct net *net = ovs_dp_get_net(vport->dp);
+	struct rtable *rt;
+	int min_headroom;
+	__be16 df;
+	__be32 saddr;
+	int err;
+
+	forward_ip_summed(skb, true);
+
+	/* Route lookup */
+	saddr = OVS_CB(skb)->tun_key->ipv4_src;
+	rt = find_route(ovs_dp_get_net(vport->dp),
+			&saddr,
+			OVS_CB(skb)->tun_key->ipv4_dst,
+			IPPROTO_GRE,
+			OVS_CB(skb)->tun_key->ipv4_tos,
+			skb_get_mark(skb));
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
 		goto error;
+	}
 
-	iph = ip_hdr(skb);
-	tnl_flags = gre_flags_to_tunnel_flags(gre_flags, is_gre64);
-	tnl_tun_key_init(&tun_key, iph, key, tnl_flags);
+	min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+			+ tunnel_hlen + sizeof(struct iphdr)
+			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+		int head_delta = SKB_DATA_ALIGN(min_headroom -
+						skb_headroom(skb) +
+						16);
+		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+					0, GFP_ATOMIC);
+		if (unlikely(err))
+			goto err_free_rt;
+	}
 
-	__skb_pull(skb, hdr_len);
-	skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
+	if (unlikely(vlan_deaccel_tag(skb))) {
+		err = -ENOMEM;
+		goto err_free_rt;
+	}
+
+	/* Push Tunnel header. */
+	skb = __build_header(skb, tunnel_hlen, seq, gre64_flag);
+	if (unlikely(!skb)) {
+		err = 0;
+		goto err_free_rt;
+	}
 
-	ovs_tnl_rcv(vport, skb, &tun_key);
-	return 0;
+	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+		htons(IP_DF) : 0;
 
+	return iptunnel_xmit(net, rt, skb, saddr,
+			     OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
+			     OVS_CB(skb)->tun_key->ipv4_tos,
+			     OVS_CB(skb)->tun_key->ipv4_ttl, df);
+err_free_rt:
+	ip_rt_put(rt);
 error:
-	kfree_skb(skb);
-	return 0;
+	return err;
 }
 
-static const struct net_protocol gre_protocol_handlers = {
-	.handler	=	gre_rcv,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
-	.netns_ok	=	1,
-#endif
+static struct gre_cisco_protocol gre_protocol = {
+	.handler	= gre_rcv,
+	.priority	= 1,
 };
 
 static int gre_ports;
@@ -298,7 +195,7 @@ static int gre_init(void)
 	if (gre_ports > 1)
 		return 0;
 
-	err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE);
+	err = gre_cisco_register(&gre_protocol);
 	if (err)
 		pr_warn("cannot register gre protocol handler\n");
 
@@ -311,7 +208,7 @@ static void gre_exit(void)
 	if (gre_ports > 0)
 		return;
 
-	inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
+	gre_cisco_unregister(&gre_protocol);
 }
 
 static const char *gre_get_name(const struct vport *vport)
@@ -361,15 +258,16 @@ static void gre_tnl_destroy(struct vport *vport)
 	gre_exit();
 }
 
-static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
+static int gre_send(struct vport *vport, struct sk_buff *skb)
 {
 	int hlen;
 
 	if (unlikely(!OVS_CB(skb)->tun_key))
 		return -EINVAL;
 
-	hlen = gre_hdr_len(OVS_CB(skb)->tun_key);
-	return ovs_tnl_send(vport, skb, IPPROTO_GRE, hlen, gre_build_header);
+	hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
+
+	return __send(vport, skb, hlen, 0, 0);
 }
 
 const struct vport_ops ovs_gre_vport_ops = {
@@ -377,7 +275,7 @@ const struct vport_ops ovs_gre_vport_ops = {
 	.create		= gre_create,
 	.destroy	= gre_tnl_destroy,
 	.get_name	= gre_get_name,
-	.send		= gre_tnl_send,
+	.send		= gre_send,
 };
 
 /* GRE64 vport. */
@@ -422,15 +320,28 @@ static void gre64_tnl_destroy(struct vport *vport)
 	gre_exit();
 }
 
-static int gre64_tnl_send(struct vport *vport, struct sk_buff *skb)
+static __be32 be64_get_high32(__be64 x)
+{
+#ifdef __BIG_ENDIAN
+	return (__force __be32)((__force u64)x >> 32);
+#else
+	return (__force __be32)x;
+#endif
+}
+
+static int gre64_send(struct vport *vport, struct sk_buff *skb)
 {
 	int hlen;
+	__be32 seq;
 
 	if (unlikely(!OVS_CB(skb)->tun_key))
 		return -EINVAL;
 
-	hlen = gre64_hdr_len(OVS_CB(skb)->tun_key);
-	return ovs_tnl_send(vport, skb, IPPROTO_GRE, hlen, gre64_build_header);
+	hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags)
+	       + GRE_HEADER_SECTION;
+
+	seq = be64_get_high32(OVS_CB(skb)->tun_key->tun_id);
+	return __send(vport, skb, hlen, seq, TUNNEL_SEQ);
 }
 
 const struct vport_ops ovs_gre64_vport_ops = {
@@ -438,5 +349,5 @@ const struct vport_ops ovs_gre64_vport_ops = {
 	.create		= gre64_create,
 	.destroy	= gre64_tnl_destroy,
 	.get_name	= gre_get_name,
-	.send		= gre64_tnl_send,
+	.send		= gre64_send,
 };
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index 3d0a315..4342e75 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -218,7 +218,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
-	tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
+	tnl_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
 
 	/* Drop non-IP inner packets */
 	inner_iph = (struct iphdr *)(lisph + 1);
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index d140c3b..64d7541 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -136,7 +136,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
-	tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
+	tnl_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
 
 	ovs_tnl_rcv(vport_from_priv(vxlan_vport), skb, &tun_key);
 	goto out;
-- 
1.7.1




More information about the dev mailing list