[ovs-dev] [PATCH v5] gre: Restructure tunneling.

Pravin B Shelar pshelar at nicira.com
Thu Jun 13 00:46:56 UTC 2013


Following patch restructures ovs tunneling and gre vport
implementation to make ovs tunneling more in sync with
upstream kernel tunneling.  Doing this tunneling code is
simplified as most of protocol processing on send and
recv is pushed to kernel tunneling.  For external ovs
module the code is moved to kernel compatibility code.

Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
---
v4-v5:
 - Added context to erro msg.
 - remove cb restore from gso.
 - use standard skb_needs_linearize().
 - clearer dst refcnt.
 - fixed gso ip-id
 - removed IS_ERR_VALUE
v4-v3:
 - Fix coding style.
 - Fix __build_header() parameters.
 - Using looser check of skb->ip_summed at __skb_gso_segment.
v2-v3:
 - Move rcv packet datapath processing to ip_tunnel code.
 - Use TUNNEL_* flags.
 - better check gre64 vport.
 - return skb if there is no gre port created.
v1-v2:
 - Fix Modules path for ip_tunnels_core.c
---
 datapath/compat.h                               |    6 +
 datapath/flow.c                                 |   12 +-
 datapath/flow.h                                 |    9 +-
 datapath/linux/Modules.mk                       |    6 +
 datapath/linux/compat/gre.c                     |  352 +++++++++++++++++++++++
 datapath/linux/compat/gso.c                     |  157 ++++++++++
 datapath/linux/compat/gso.h                     |   72 +++++
 datapath/linux/compat/include/linux/if_ether.h  |    4 +
 datapath/linux/compat/include/linux/netdevice.h |   13 +
 datapath/linux/compat/include/net/gre.h         |  102 +++++++
 datapath/linux/compat/include/net/ip_tunnels.h  |   54 ++++
 datapath/linux/compat/ip_tunnels_core.c         |  112 +++++++
 datapath/tunnel.c                               |   14 +-
 datapath/tunnel.h                               |   11 +-
 datapath/vport-gre.c                            |  349 +++++++++--------------
 datapath/vport-lisp.c                           |    2 +-
 datapath/vport-vxlan.c                          |    2 +-
 17 files changed, 1036 insertions(+), 241 deletions(-)
 create mode 100644 datapath/linux/compat/gre.c
 create mode 100644 datapath/linux/compat/gso.c
 create mode 100644 datapath/linux/compat/gso.h
 create mode 100644 datapath/linux/compat/include/net/gre.h
 create mode 100644 datapath/linux/compat/include/net/ip_tunnels.h
 create mode 100644 datapath/linux/compat/ip_tunnels_core.c

diff --git a/datapath/compat.h b/datapath/compat.h
index c7fd225..a6a01d5 100644
--- a/datapath/compat.h
+++ b/datapath/compat.h
@@ -94,4 +94,10 @@ static inline void skb_set_mark(struct sk_buff *skb, u32 mark)
 }
 #endif /* after 2.6.20 */
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
+#define rt_dst(rt) (rt->dst)
+#else
+#define rt_dst(rt) (rt->u.dst)
+#endif
+
 #endif /* compat.h */
diff --git a/datapath/flow.c b/datapath/flow.c
index 7604405..1f5a8e5 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -1015,7 +1015,7 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		switch (type) {
 		case OVS_TUNNEL_KEY_ATTR_ID:
 			tun_key->tun_id = nla_get_be64(a);
-			tun_key->tun_flags |= OVS_TNL_F_KEY;
+			tun_key->tun_flags |= TUNNEL_KEY;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
 			tun_key->ipv4_src = nla_get_be32(a);
@@ -1031,10 +1031,10 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			ttl = true;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
-			tun_key->tun_flags |= OVS_TNL_F_DONT_FRAGMENT;
+			tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_CSUM:
-			tun_key->tun_flags |= OVS_TNL_F_CSUM;
+			tun_key->tun_flags |= TUNNEL_CSUM;
 			break;
 		default:
 			return -EINVAL;
@@ -1062,7 +1062,7 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if (!nla)
 		return -EMSGSIZE;
 
-	if (tun_key->tun_flags & OVS_TNL_F_KEY &&
+	if (tun_key->tun_flags & TUNNEL_KEY &&
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id))
 		return -EMSGSIZE;
 	if (tun_key->ipv4_src &&
@@ -1075,10 +1075,10 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb,
 		return -EMSGSIZE;
 	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl))
 		return -EMSGSIZE;
-	if ((tun_key->tun_flags & OVS_TNL_F_DONT_FRAGMENT) &&
+	if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
 		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
 		return -EMSGSIZE;
-	if ((tun_key->tun_flags & OVS_TNL_F_CSUM) &&
+	if ((tun_key->tun_flags & TUNNEL_CSUM) &&
 		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
 		return -EMSGSIZE;
 
diff --git a/datapath/flow.h b/datapath/flow.h
index dba66cf..dfffed7 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -30,7 +30,9 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+
 #include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
 
 struct sk_buff;
 
@@ -40,11 +42,6 @@ struct sw_flow_actions {
 	struct nlattr actions[];
 };
 
-/* Tunnel flow flags. */
-#define OVS_TNL_F_DONT_FRAGMENT		(1 << 0)
-#define OVS_TNL_F_CSUM			(1 << 1)
-#define OVS_TNL_F_KEY			(1 << 2)
-
 /* Used to memset ovs_key_ipv4_tunnel padding. */
 #define OVS_TUNNEL_KEY_SIZE					\
         (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + 	\
@@ -54,7 +51,7 @@ struct ovs_key_ipv4_tunnel {
 	__be64 tun_id;
 	__be32 ipv4_src;
 	__be32 ipv4_dst;
-	u16  tun_flags;
+	__be16 tun_flags;
 	u8   ipv4_tos;
 	u8   ipv4_ttl;
 };
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 1434a2d..a62d444 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -3,8 +3,11 @@ openvswitch_sources += \
 	linux/compat/dev-openvswitch.c \
 	linux/compat/exthdrs_core.c \
 	linux/compat/flex_array.c \
+	linux/compat/gre.c \
+	linux/compat/gso.c \
 	linux/compat/genetlink-openvswitch.c \
 	linux/compat/ip_output-openvswitch.c \
+	linux/compat/ip_tunnels_core.c \
 	linux/compat/kmemdup.c \
 	linux/compat/netdevice.c \
 	linux/compat/net_namespace.c \
@@ -13,6 +16,7 @@ openvswitch_sources += \
 	linux/compat/time.c	\
 	linux/compat/workqueue.c
 openvswitch_headers += \
+	linux/compat/gso.h \
 	linux/compat/include/asm/percpu.h \
 	linux/compat/include/linux/compiler.h \
 	linux/compat/include/linux/compiler-gcc.h \
@@ -61,8 +65,10 @@ openvswitch_headers += \
 	linux/compat/include/net/checksum.h \
 	linux/compat/include/net/dst.h \
 	linux/compat/include/net/genetlink.h \
+	linux/compat/include/net/gre.h \
 	linux/compat/include/net/inet_frag.h \
 	linux/compat/include/net/ip.h \
+	linux/compat/include/net/ip_tunnels.h \
 	linux/compat/include/net/ipv6.h \
 	linux/compat/include/net/net_namespace.h \
 	linux/compat/include/net/netlink.h \
diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
new file mode 100644
index 0000000..fbb9fb9
--- /dev/null
+++ b/datapath/linux/compat/gre.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/module.h>
+#include <linux/if.h>
+#include <linux/if_tunnel.h>
+#include <linux/icmp.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+
+#include <net/gre.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
+#include "gso.h"
+
+static struct gre_cisco_protocol __rcu *gre_cisco_proto;
+
+static void gre_csum_fix(struct sk_buff *skb)
+{
+	struct gre_base_hdr *greh;
+	__be32 *options;
+	int gre_offset = skb_transport_offset(skb);
+
+	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	options = ((__be32 *)greh + 1);
+
+	*options = 0;
+	*(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
+						     skb->len - gre_offset, 0));
+}
+
+struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
+{
+	int err;
+
+	skb_reset_inner_headers(skb);
+
+	if (skb_is_gso(skb)) {
+		if (gre_csum)
+			OVS_GSO_CB(skb)->fix_segment = gre_csum_fix;
+	} else {
+		if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
+			err = skb_checksum_help(skb);
+			if (err)
+				goto error;
+
+		} else if (skb->ip_summed != CHECKSUM_PARTIAL)
+			skb->ip_summed = CHECKSUM_NONE;
+	}
+	return skb;
+error:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
+static bool is_gre_gso(struct sk_buff *skb)
+{
+	return skb_is_gso(skb);
+}
+
+void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+		      int hdr_len)
+{
+	struct gre_base_hdr *greh;
+
+	__skb_push(skb, hdr_len);
+
+	greh = (struct gre_base_hdr *)skb->data;
+	greh->flags = tnl_flags_to_gre_flags(tpi->flags);
+	greh->protocol = tpi->proto;
+
+	if (tpi->flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
+		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+		if (tpi->flags & TUNNEL_SEQ) {
+			*ptr = tpi->seq;
+			ptr--;
+		}
+		if (tpi->flags & TUNNEL_KEY) {
+			*ptr = tpi->key;
+			ptr--;
+		}
+		if (tpi->flags & TUNNEL_CSUM && !is_gre_gso(skb)) {
+			*ptr = 0;
+			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+						skb->len, 0));
+		}
+	}
+}
+
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		csum = csum_fold(skb->csum);
+
+		if (!csum)
+			break;
+		/* Fall through. */
+
+	case CHECKSUM_NONE:
+		skb->csum = 0;
+		csum = __skb_checksum_complete(skb);
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		break;
+	}
+
+	return csum;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+			    bool *csum_err)
+{
+	unsigned int ip_hlen = ip_hdrlen(skb);
+	struct gre_base_hdr *greh;
+	__be32 *options;
+	int hdr_len;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+		return -EINVAL;
+
+	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+		return -EINVAL;
+
+	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+	hdr_len = ip_gre_calc_hlen(tpi->flags);
+
+	if (!pskb_may_pull(skb, hdr_len))
+		return -EINVAL;
+
+	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+	tpi->proto = greh->protocol;
+
+	options = (__be32 *)(greh + 1);
+	if (greh->flags & GRE_CSUM) {
+		if (check_checksum(skb)) {
+			*csum_err = true;
+			return -EINVAL;
+		}
+		options++;
+	}
+
+	if (greh->flags & GRE_KEY) {
+		tpi->key = *options;
+		options++;
+	} else
+		tpi->key = 0;
+
+	if (unlikely(greh->flags & GRE_SEQ)) {
+		tpi->seq = *options;
+		options++;
+	} else
+		tpi->seq = 0;
+
+	/* WCCP version 1 and 2 protocol decoding.
+	 * - Change protocol to IP
+	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+	 */
+	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+		tpi->proto = htons(ETH_P_IP);
+		if ((*(u8 *)options & 0xF0) != 0x40) {
+			hdr_len += 4;
+			if (!pskb_may_pull(skb, hdr_len))
+				return -EINVAL;
+		}
+	}
+
+	return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+}
+
+static int gre_cisco_rcv(struct sk_buff *skb)
+{
+	struct tnl_ptk_info tpi;
+	bool csum_err = false;
+	struct gre_cisco_protocol *proto;
+
+	rcu_read_lock();
+	proto = rcu_dereference(gre_cisco_proto);
+	if (!proto)
+		goto drop;
+
+	if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+		goto drop;
+	proto->handler(skb, &tpi);
+	rcu_read_unlock();
+	return 0;
+
+drop:
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return 0;
+}
+
+static const struct gre_protocol ipgre_protocol = {
+	.handler	=	gre_cisco_rcv,
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
+
+int gre_add_protocol(const struct gre_protocol *proto, u8 version)
+{
+	if (version >= GREPROTO_MAX)
+		return -EINVAL;
+
+	return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
+		0 : -EBUSY;
+}
+
+int gre_del_protocol(const struct gre_protocol *proto, u8 version)
+{
+	int ret;
+
+	if (version >= GREPROTO_MAX)
+		return -EINVAL;
+
+	ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
+		0 : -EBUSY;
+
+	if (ret)
+		return ret;
+
+	synchronize_net();
+	return 0;
+}
+
+static int gre_rcv(struct sk_buff *skb)
+{
+	const struct gre_protocol *proto;
+	u8 ver;
+	int ret;
+
+	if (!pskb_may_pull(skb, 12))
+		goto drop;
+
+	ver = skb->data[1] & 0x7f;
+	if (ver >= GREPROTO_MAX)
+		goto drop;
+
+	rcu_read_lock();
+	proto = rcu_dereference(gre_proto[ver]);
+	if (!proto || !proto->handler)
+		goto drop_unlock;
+	ret = proto->handler(skb);
+	rcu_read_unlock();
+	return ret;
+
+drop_unlock:
+	rcu_read_unlock();
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static const struct net_protocol net_gre_protocol = {
+	.handler     = gre_rcv,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
+	.netns_ok    = 1,
+#endif
+};
+#endif
+
+static int gre_compat_init(void)
+{
+	int err;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+	if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
+		pr_err("%s: cannot register gre protocol handler\n", __func__);
+		return -EAGAIN;
+	}
+#endif
+	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
+	if (err) {
+		pr_warn("%s: cannot register gre_cisco protocol handler\n", __func__);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+		inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+#endif
+	}
+
+	return err;
+}
+
+static int gre_compat_exit(void)
+{
+	int ret;
+
+	ret = gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
+	if (ret)
+		return ret;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+	ret = inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+	if (ret)
+		return ret;
+#endif
+	return 0;
+}
+
+int gre_cisco_register(struct gre_cisco_protocol *newp)
+{
+	int err;
+
+	err = gre_compat_init();
+	if (err)
+		return err;
+
+	return (cmpxchg((struct gre_cisco_protocol **)&gre_cisco_proto, NULL, newp) == NULL) ?
+		0 : -EBUSY;
+}
+
+int gre_cisco_unregister(struct gre_cisco_protocol *proto)
+{
+	int ret;
+
+	ret = (cmpxchg((struct gre_cisco_protocol **)&gre_cisco_proto, proto, NULL) == proto) ?
+		0 : -EINVAL;
+
+	if (ret)
+		return ret;
+
+	synchronize_net();
+	ret = gre_compat_exit();
+	return ret;
+}
diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
new file mode 100644
index 0000000..8cb2e06
--- /dev/null
+++ b/datapath/linux/compat/gso.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/module.h>
+#include <linux/if.h>
+#include <linux/if_tunnel.h>
+#include <linux/icmp.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+
+#include <net/gre.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
+#include "gso.h"
+
+static __be16 skb_network_protocol(struct sk_buff *skb)
+{
+	__be16 type = skb->protocol;
+	int vlan_depth = ETH_HLEN;
+
+	while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vh;
+
+		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
+			return 0;
+
+		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+		type = vh->h_vlan_encapsulated_proto;
+		vlan_depth += VLAN_HLEN;
+	}
+
+	return type;
+}
+
+static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
+					   netdev_features_t features,
+					   bool tx_path)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	int pkt_hlen = skb_inner_network_offset(skb); /* inner l2 + tunnel hdr. */
+	int mac_offset = skb_inner_mac_offset(skb);
+	struct sk_buff *skb1 = skb;
+	struct sk_buff *segs;
+	__be16 proto = skb->protocol;
+
+	/* setup whole inner packet to get protocol. */
+	__skb_pull(skb, mac_offset);
+	skb->protocol = skb_network_protocol(skb);
+
+	/* setup l3 packet to gso, to get around segmentation bug on older kernel.*/
+	__skb_pull(skb, (pkt_hlen - mac_offset));
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	segs = __skb_gso_segment(skb, 0, tx_path);
+	if (!segs || IS_ERR(segs))
+		goto free;
+
+	skb = segs;
+	while (skb) {
+		__skb_push(skb, pkt_hlen);
+		skb_reset_mac_header(skb);
+		skb_reset_network_header(skb);
+		skb_set_transport_header(skb, sizeof(struct iphdr));
+		skb->mac_len = 0;
+
+		memcpy(ip_hdr(skb), iph, pkt_hlen);
+		if (OVS_GSO_CB(skb)->fix_segment)
+			OVS_GSO_CB(skb)->fix_segment(skb);
+
+		skb->protocol = proto;
+		skb = skb->next;
+	}
+free:
+	consume_skb(skb1);
+	return segs;
+}
+
+int rpl_ip_local_out(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	int ret = NETDEV_TX_OK;
+	int id;
+
+	if (skb_is_gso(skb)) {
+		struct iphdr *iph;
+
+		iph = ip_hdr(skb);
+		id = ntohs(iph->id);
+		skb = tnl_skb_gso_segment(skb, 0, false);
+		if (!skb || IS_ERR(skb))
+			return 0;
+	}  else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		int err;
+
+		/* Use default features for dst device. */
+		if (unlikely(skb_needs_linearize(skb, dst->dev->features))) {
+			err = __skb_linearize(skb);
+			if (unlikely(err))
+				return 0;
+		}
+
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			return 0;
+		id = -1;
+	}
+
+	while (skb) {
+		struct sk_buff *next_skb = skb->next;
+		struct iphdr *iph;
+		int err;
+
+		if (next_skb)
+			dst_clone(dst);
+
+		skb->next = NULL;
+
+		iph = ip_hdr(skb);
+		if (id >= 0)
+			iph->id = htons(id++);
+
+		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
+#undef ip_local_out
+		err = ip_local_out(skb);
+		if (unlikely(net_xmit_eval(err)))
+			ret = err;
+
+		skb = next_skb;
+	}
+	return ret;
+}
diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h
new file mode 100644
index 0000000..44fd213
--- /dev/null
+++ b/datapath/linux/compat/gso.h
@@ -0,0 +1,72 @@
+#ifndef __LINUX_GSO_WRAPPER_H
+#define __LINUX_GSO_WRAPPER_H
+
+#include <linux/skbuff.h>
+#include <net/protocol.h>
+
+#include "datapath.h"
+
+struct ovs_gso_cb {
+	struct ovs_skb_cb dp_cb;
+	sk_buff_data_t	inner_network_header;
+	sk_buff_data_t	inner_mac_header;
+	void (*fix_segment)(struct sk_buff *);
+};
+#define OVS_GSO_CB(skb) ((struct ovs_gso_cb *)(skb)->cb)
+
+#define skb_inner_network_header rpl_skb_inner_network_header
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+#define skb_inner_network_header rpl_skb_inner_network_header
+static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
+{
+	return skb->head + OVS_GSO_CB(skb)->inner_network_header;
+}
+
+#define skb_inner_mac_header rpl_skb_inner_mac_header
+static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
+{
+	return skb->head + OVS_GSO_CB(skb)->inner_mac_header;
+}
+
+#else
+
+#define skb_inner_network_header rpl_skb_inner_network_header
+static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
+{
+	return OVS_GSO_CB(skb)->inner_network_header;
+}
+
+#define skb_inner_mac_header rpl_skb_inner_mac_header
+static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
+{
+	return OVS_GSO_CB(skb)->inner_mac_header;
+}
+
+#endif
+
+#define skb_inner_network_offset rpl_skb_inner_network_offset
+static inline int skb_inner_network_offset(const struct sk_buff *skb)
+{
+	return skb_inner_network_header(skb) - skb->data;
+}
+
+#define skb_inner_mac_offset rpl_skb_inner_mac_offset
+static inline int skb_inner_mac_offset(const struct sk_buff *skb)
+{
+	return skb_inner_mac_header(skb) - skb->data;
+}
+
+#define skb_reset_inner_headers rpl_skb_reset_inner_headers
+static inline void skb_reset_inner_headers(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+	OVS_GSO_CB(skb)->inner_network_header = skb->network_header;
+	OVS_GSO_CB(skb)->inner_mac_header = skb->mac_header;
+
+	OVS_GSO_CB(skb)->fix_segment = NULL;
+}
+
+#define ip_local_out rpl_ip_local_out
+int ip_local_out(struct sk_buff *skb);
+#endif
diff --git a/datapath/linux/compat/include/linux/if_ether.h b/datapath/linux/compat/include/linux/if_ether.h
index 85b0d22..e22ea96 100644
--- a/datapath/linux/compat/include/linux/if_ether.h
+++ b/datapath/linux/compat/include/linux/if_ether.h
@@ -20,4 +20,8 @@
 #define ETH_P_802_3_MIN        0x0600
 #endif
 
+#ifndef ETH_P_8021AD
+#define ETH_P_8021AD    0x88A8          /* 802.1ad Service VLAN         */
+#endif
+
 #endif
diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h
index f8240df..644e7d7 100644
--- a/datapath/linux/compat/include/linux/netdevice.h
+++ b/datapath/linux/compat/include/linux/netdevice.h
@@ -167,4 +167,17 @@ static inline struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 }
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+#define skb_has_frag_list skb_has_frags
+#endif
+
+static inline int skb_needs_linearize(struct sk_buff *skb,
+				      int features)
+{
+	return skb_is_nonlinear(skb) &&
+		((skb_has_frag_list(skb) &&
+		  !(features & NETIF_F_FRAGLIST)) ||
+		 (skb_shinfo(skb)->nr_frags &&
+		  !(features & NETIF_F_SG)));
+}
 #endif
diff --git a/datapath/linux/compat/include/net/gre.h b/datapath/linux/compat/include/net/gre.h
new file mode 100644
index 0000000..139e4ab
--- /dev/null
+++ b/datapath/linux/compat/include/net/gre.h
@@ -0,0 +1,102 @@
+#ifndef __LINUX_GRE_WRAPPER_H
+#define __LINUX_GRE_WRAPPER_H
+
+#include <linux/skbuff.h>
+#include <net/ip_tunnels.h>
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37)
+#include_next <net/gre.h>
+
+#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) */
+
+#define GREPROTO_CISCO		0
+#define GREPROTO_MAX		2
+
+struct gre_protocol {
+	int  (*handler)(struct sk_buff *skb);
+};
+
+int gre_add_protocol(const struct gre_protocol *proto, u8 version);
+int gre_del_protocol(const struct gre_protocol *proto, u8 version);
+
+#endif
+
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
+
+#define MAX_GRE_PROTO_PRIORITY 255
+struct gre_cisco_protocol {
+	int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
+	u8 priority;
+};
+
+#define gre_build_header rpl_gre_build_header
+void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+		      int hdr_len);
+
+#define gre_handle_offloads rpl_gre_handle_offloads
+struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum);
+
+int gre_cisco_register(struct gre_cisco_protocol *proto);
+int gre_cisco_unregister(struct gre_cisco_protocol *proto);
+
+static inline int ip_gre_calc_hlen(__be16 o_flags)
+{
+	int addend = 4;
+
+	if (o_flags & TUNNEL_CSUM)
+		addend += 4;
+	if (o_flags & TUNNEL_KEY)
+		addend += 4;
+	if (o_flags & TUNNEL_SEQ)
+		addend += 4;
+	return addend;
+}
+
+static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
+{
+	__be16 tflags = 0;
+
+	if (flags & GRE_CSUM)
+		tflags |= TUNNEL_CSUM;
+	if (flags & GRE_ROUTING)
+		tflags |= TUNNEL_ROUTING;
+	if (flags & GRE_KEY)
+		tflags |= TUNNEL_KEY;
+	if (flags & GRE_SEQ)
+		tflags |= TUNNEL_SEQ;
+	if (flags & GRE_STRICT)
+		tflags |= TUNNEL_STRICT;
+	if (flags & GRE_REC)
+		tflags |= TUNNEL_REC;
+	if (flags & GRE_VERSION)
+		tflags |= TUNNEL_VERSION;
+
+	return tflags;
+}
+
+static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
+{
+	__be16 flags = 0;
+
+	if (tflags & TUNNEL_CSUM)
+		flags |= GRE_CSUM;
+	if (tflags & TUNNEL_ROUTING)
+		flags |= GRE_ROUTING;
+	if (tflags & TUNNEL_KEY)
+		flags |= GRE_KEY;
+	if (tflags & TUNNEL_SEQ)
+		flags |= GRE_SEQ;
+	if (tflags & TUNNEL_STRICT)
+		flags |= GRE_STRICT;
+	if (tflags & TUNNEL_REC)
+		flags |= GRE_REC;
+	if (tflags & TUNNEL_VERSION)
+		flags |= GRE_VERSION;
+
+	return flags;
+}
+#endif
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
new file mode 100644
index 0000000..ad17c9d
--- /dev/null
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -0,0 +1,54 @@
+#ifndef __NET_IP_TUNNELS_WRAPPER_H
+#define __NET_IP_TUNNELS_WRAPPER_H 1
+
+#include <linux/if_tunnel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/dsfield.h>
+#include <net/flow.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/rtnetlink.h>
+
+#define TUNNEL_CSUM	__cpu_to_be16(0x01)
+#define TUNNEL_ROUTING	__cpu_to_be16(0x02)
+#define TUNNEL_KEY	__cpu_to_be16(0x04)
+#define TUNNEL_SEQ	__cpu_to_be16(0x08)
+#define TUNNEL_STRICT	__cpu_to_be16(0x10)
+#define TUNNEL_REC	__cpu_to_be16(0x20)
+#define TUNNEL_VERSION	__cpu_to_be16(0x40)
+#define TUNNEL_NO_KEY	__cpu_to_be16(0x80)
+#define TUNNEL_DONT_FRAGMENT	__cpu_to_be16(0x0100)
+
+struct tnl_ptk_info {
+	__be16 flags;
+	__be16 proto;
+	__be32 key;
+	__be32 seq;
+};
+
+#define PACKET_RCVD	0
+#define PACKET_REJECT	1
+
+static inline void tunnel_ip_select_ident(struct sk_buff *skb,
+					  const struct iphdr  *old_iph,
+					  struct dst_entry *dst)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	/* Use inner packet iph-id if possible. */
+	if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
+		iph->id = old_iph->id;
+	else
+		__ip_select_ident(iph, dst,
+				(skb_shinfo(skb)->gso_segs ?: 1) - 1);
+}
+
+int iptunnel_xmit(struct net *net, struct rtable *rt,
+		  struct sk_buff *skb,
+		  __be32 src, __be32 dst, __u8 proto,
+		  __u8 tos, __u8 ttl, __be16 df);
+
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
+#endif /* __NET_IP_TUNNELS_H */
diff --git a/datapath/linux/compat/ip_tunnels_core.c b/datapath/linux/compat/ip_tunnels_core.c
new file mode 100644
index 0000000..58bedd1
--- /dev/null
+++ b/datapath/linux/compat/ip_tunnels_core.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/in.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <net/ip_tunnels.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
+#include "gso.h"
+#include "compat.h"
+
+int iptunnel_xmit(struct net *net, struct rtable *rt,
+		  struct sk_buff *skb,
+		  __be32 src, __be32 dst, __u8 proto,
+		  __u8 tos, __u8 ttl, __be16 df)
+{
+	int pkt_len = skb->len;
+	struct iphdr *iph;
+	int err;
+
+	nf_reset(skb);
+	secpath_reset(skb);
+	skb_clear_rxhash(skb);
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt_dst(rt));
+#if 0
+	/* Do not clear ovs_skb_cb.  It will be done in gso code. */
+	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+#endif
+
+	/* Push down and install the IP header. */
+	__skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+
+	iph = ip_hdr(skb);
+
+	iph->version	=	4;
+	iph->ihl	=	sizeof(struct iphdr) >> 2;
+	iph->frag_off	=	df;
+	iph->protocol	=	proto;
+	iph->tos	=	tos;
+	iph->daddr	=	dst;
+	iph->saddr	=	src;
+	iph->ttl	=	ttl;
+	tunnel_ip_select_ident(skb,
+			       (const struct iphdr *)skb_inner_network_header(skb),
+			       &rt_dst(rt));
+
+	err = ip_local_out(skb);
+	if (unlikely(net_xmit_eval(err)))
+		pkt_len = 0;
+	return pkt_len;
+}
+
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
+{
+	if (unlikely(!pskb_may_pull(skb, hdr_len)))
+		return -ENOMEM;
+
+	skb_pull_rcsum(skb, hdr_len);
+
+	if (inner_proto == htons(ETH_P_TEB)) {
+		struct ethhdr *eh;
+
+		if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+			return -ENOMEM;
+
+		eh = (struct ethhdr *)skb->data;
+
+		if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
+			skb->protocol = eh->h_proto;
+		else
+			skb->protocol = htons(ETH_P_802_2);
+
+	} else {
+		skb->protocol = inner_proto;
+	}
+
+	nf_reset(skb);
+	secpath_reset(skb);
+	skb_clear_rxhash(skb);
+	skb_dst_drop(skb);
+	vlan_set_tci(skb, 0);
+	skb_set_queue_mapping(skb, 0);
+	skb->pkt_type = PACKET_HOST;
+	return 0;
+}
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 8c93e18..9102786 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -37,12 +37,6 @@
 #include "vlan.h"
 #include "vport.h"
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
-#define rt_dst(rt) (rt->dst)
-#else
-#define rt_dst(rt) (rt->u.dst)
-#endif
-
 /**
  *	ovs_tnl_rcv - ingress point for generic tunnel code
  *
@@ -85,9 +79,9 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb,
 	ovs_vport_receive(vport, skb, tun_key);
 }
 
-static struct rtable *find_route(struct net *net,
-		__be32 *saddr, __be32 daddr, u8 ipproto,
-		u8 tos, u32 skb_mark)
+struct rtable *find_route(struct net *net,
+			  __be32 *saddr, __be32 daddr, u8 ipproto,
+			  u8 tos, u32 skb_mark)
 {
 	struct rtable *rt;
 	/* Tunnel configuration keeps DSCP part of TOS bits, But Linux
@@ -289,7 +283,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
 		iph->tos	= OVS_CB(skb)->tun_key->ipv4_tos;
 		iph->ttl	= OVS_CB(skb)->tun_key->ipv4_ttl;
 		iph->frag_off	= OVS_CB(skb)->tun_key->tun_flags &
-				  OVS_TNL_F_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+				  TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 		/*
 		 * Allow our local IP stack to fragment the outer packet even
 		 * if the DF bit is set as a last resort.  We also need to
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index 89c4e16..17de7c4 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -26,6 +26,11 @@
 #include "flow.h"
 #include "vport.h"
 
+struct rtable *find_route(struct net *net,
+			  __be32 *saddr, __be32 daddr, u8 ipproto,
+			  u8 tos, u32 skb_mark);
+
+u16 ovs_tnl_get_src_port(struct sk_buff *skb);
 
 int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
 		 u8 ipproto, int tunnel_hlen,
@@ -35,10 +40,10 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
 
 void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb,
 		 struct ovs_key_ipv4_tunnel *tun_key);
-u16 ovs_tnl_get_src_port(struct sk_buff *skb);
 
 static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
-				    const struct iphdr *iph, __be64 tun_id, u32 tun_flags)
+					 const struct iphdr *iph, __be64 tun_id,
+					 __be16 tun_flags)
 {
 	tun_key->tun_id = tun_id;
 	tun_key->ipv4_src = iph->saddr;
@@ -52,4 +57,4 @@ static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
 	       sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
 }
 
-#endif /* tunnel.h */
+#endif /* TUNNEL_H */
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index dae35ac..fda6481 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -24,50 +24,29 @@
 #include <linux/if_tunnel.h>
 #include <linux/if_vlan.h>
 #include <linux/in.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
 
 #include <net/icmp.h>
 #include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/gre.h>
 #include <net/protocol.h>
 
 #include "datapath.h"
 #include "tunnel.h"
 #include "vport.h"
 
-/*
- * The GRE header is composed of a series of sections: a base and then a variable
- * number of options.
- */
-#define GRE_HEADER_SECTION 4
-
-struct gre_base_hdr {
-	__be16 flags;
-	__be16 protocol;
-};
-
-static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
-{
-	int len = GRE_HEADER_SECTION;
-
-	if (tun_key->tun_flags & OVS_TNL_F_KEY)
-		len += GRE_HEADER_SECTION;
-	if (tun_key->tun_flags & OVS_TNL_F_CSUM)
-		len += GRE_HEADER_SECTION;
-	return len;
-}
-
-static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
-{
-	/* Set key for GRE64 tunnels, even when key if is zero. */
-	int len = GRE_HEADER_SECTION +		/* GRE Hdr */
-		  GRE_HEADER_SECTION +		/* GRE Key */
-		  GRE_HEADER_SECTION;		/* GRE SEQ */
-
-	if (tun_key->tun_flags & OVS_TNL_F_CSUM)
-		len += GRE_HEADER_SECTION;
-
-	return len;
-}
-
 /* Returns the least-significant 32 bits of a __be64. */
 static __be32 be64_get_low32(__be64 x)
 {
@@ -78,61 +57,30 @@ static __be32 be64_get_low32(__be64 x)
 #endif
 }
 
-static __be32 be64_get_high32(__be64 x)
+static __be16 filter_tnl_flags(__be16 flags)
 {
-#ifdef __BIG_ENDIAN
-	return (__force __be32)((__force u64)x >> 32);
-#else
-	return (__force __be32)x;
-#endif
+	return flags & (TUNNEL_CSUM | TUNNEL_KEY);
 }
 
-static void __gre_build_header(struct sk_buff *skb,
-			       int tunnel_hlen,
-			       bool is_gre64)
+static struct sk_buff *__build_header(struct sk_buff *skb,
+				      int tunnel_hlen,
+				      __be32 seq, __be16 gre64_flag)
 {
 	const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
-	__be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
-			- GRE_HEADER_SECTION);
-	struct gre_base_hdr *greh = (struct gre_base_hdr *) skb_transport_header(skb);
-	greh->protocol = htons(ETH_P_TEB);
-	greh->flags = 0;
-
-	/* Work backwards over the options so the checksum is last. */
-	if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) {
-		greh->flags |= GRE_KEY;
-		if (is_gre64) {
-			/* Set higher 32 bits to seq. */
-			*options = be64_get_high32(tun_key->tun_id);
-			options--;
-			greh->flags |= GRE_SEQ;
-		}
-		*options = be64_get_low32(tun_key->tun_id);
-		options--;
-	}
+	struct tnl_ptk_info tpi;
 
-	if (tun_key->tun_flags & OVS_TNL_F_CSUM) {
-		greh->flags |= GRE_CSUM;
-		*options = 0;
-		*(__sum16 *)options = csum_fold(skb_checksum(skb,
-						skb_transport_offset(skb),
-						skb->len - skb_transport_offset(skb),
-						0));
-	}
-}
+	skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
+	if (IS_ERR(skb))
+		return NULL;
 
-static void gre_build_header(const struct vport *vport,
-			     struct sk_buff *skb,
-			     int tunnel_hlen)
-{
-	__gre_build_header(skb, tunnel_hlen, false);
-}
+	tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
 
-static void gre64_build_header(const struct vport *vport,
-			       struct sk_buff *skb,
-			       int tunnel_hlen)
-{
-	__gre_build_header(skb, tunnel_hlen, true);
+	tpi.proto = htons(ETH_P_TEB);
+	tpi.key = be64_get_low32(tun_key->tun_id);
+	tpi.seq = seq;
+	gre_build_header(skb, &tpi, tunnel_hlen);
+
+	return skb;
 }
 
 static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
@@ -144,148 +92,107 @@ static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
 #endif
 }
 
-static int parse_header(struct iphdr *iph, __be16 *flags, __be64 *tun_id,
-			bool *is_gre64)
-{
-	/* IP and ICMP protocol handlers check that the IHL is valid. */
-	struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
-	__be32 *options = (__be32 *)(greh + 1);
-	int hdr_len;
-
-	*flags = greh->flags;
-
-	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
-		return -EINVAL;
-
-	if (unlikely(greh->protocol != htons(ETH_P_TEB)))
-		return -EINVAL;
-
-	hdr_len = GRE_HEADER_SECTION;
-
-	if (greh->flags & GRE_CSUM) {
-		hdr_len += GRE_HEADER_SECTION;
-		options++;
-	}
-
-	if (greh->flags & GRE_KEY) {
-		__be32 seq;
-		__be32 gre_key;
-
-		gre_key = *options;
-		hdr_len += GRE_HEADER_SECTION;
-		options++;
-
-		if (greh->flags & GRE_SEQ) {
-			seq = *options;
-			*is_gre64 = true;
-		} else {
-			seq = 0;
-			*is_gre64 = false;
-		}
-		*tun_id = key_to_tunnel_id(gre_key, seq);
-	} else {
-		*tun_id = 0;
-		/* Ignore GRE seq if there is no key present. */
-		*is_gre64 = false;
-	}
-
-	if (greh->flags & GRE_SEQ)
-		hdr_len += GRE_HEADER_SECTION;
-
-	return hdr_len;
-}
-
-static bool check_checksum(struct sk_buff *skb)
-{
-	struct iphdr *iph = ip_hdr(skb);
-	struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1);
-	__sum16 csum = 0;
-
-	if (greh->flags & GRE_CSUM) {
-		switch (skb->ip_summed) {
-		case CHECKSUM_COMPLETE:
-			csum = csum_fold(skb->csum);
-
-			if (!csum)
-				break;
-			/* Fall through. */
-
-		case CHECKSUM_NONE:
-			skb->csum = 0;
-			csum = __skb_checksum_complete(skb);
-			skb->ip_summed = CHECKSUM_COMPLETE;
-			break;
-		}
-	}
-
-	return (csum == 0);
-}
-
-static u32 gre_flags_to_tunnel_flags(__be16 gre_flags, bool is_gre64)
-{
-	u32 tunnel_flags = 0;
-
-	if (gre_flags & GRE_KEY || is_gre64)
-		tunnel_flags = OVS_TNL_F_KEY;
-
-	if (gre_flags & GRE_CSUM)
-		tunnel_flags |= OVS_TNL_F_CSUM;
-
-	return tunnel_flags;
-}
-
 /* Called with rcu_read_lock and BH disabled. */
-static int gre_rcv(struct sk_buff *skb)
+static int gre_rcv(struct sk_buff *skb,
+		   const struct tnl_ptk_info *tpi)
 {
+	struct ovs_key_ipv4_tunnel tun_key;
 	struct ovs_net *ovs_net;
 	struct vport *vport;
-	int hdr_len;
-	struct iphdr *iph;
-	struct ovs_key_ipv4_tunnel tun_key;
-	__be16 gre_flags;
-	u32 tnl_flags;
 	__be64 key;
-	bool is_gre64;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN)))
-		goto error;
-	if (unlikely(!check_checksum(skb)))
-		goto error;
-
-	hdr_len = parse_header(ip_hdr(skb), &gre_flags, &key, &is_gre64);
-	if (unlikely(hdr_len < 0))
-		goto error;
 
 	ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
-	if (is_gre64)
+	if ((tpi->flags & TUNNEL_KEY) && (tpi->flags & TUNNEL_SEQ))
 		vport = rcu_dereference(ovs_net->vport_net.gre64_vport);
 	else
 		vport = rcu_dereference(ovs_net->vport_net.gre_vport);
 	if (unlikely(!vport))
-		goto error;
+		return PACKET_REJECT;
 
-	if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
+	key = key_to_tunnel_id(tpi->key, tpi->seq);
+	tnl_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags));
+
+	ovs_vport_receive(vport, skb, &tun_key);
+	return PACKET_RCVD;
+}
+
+static int __send(struct vport *vport, struct sk_buff *skb,
+		  int tunnel_hlen,
+		  __be32 seq, __be16 gre64_flag)
+{
+	struct net *net = ovs_dp_get_net(vport->dp);
+	struct rtable *rt;
+	int min_headroom;
+	__be16 df;
+	__be32 saddr;
+	int err;
+
+	forward_ip_summed(skb, true);
+
+	/* Route lookup */
+	saddr = OVS_CB(skb)->tun_key->ipv4_src;
+	rt = find_route(ovs_dp_get_net(vport->dp),
+			&saddr,
+			OVS_CB(skb)->tun_key->ipv4_dst,
+			IPPROTO_GRE,
+			OVS_CB(skb)->tun_key->ipv4_tos,
+			skb_get_mark(skb));
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
 		goto error;
+	}
 
-	iph = ip_hdr(skb);
-	tnl_flags = gre_flags_to_tunnel_flags(gre_flags, is_gre64);
-	tnl_tun_key_init(&tun_key, iph, key, tnl_flags);
+	min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+			+ tunnel_hlen + sizeof(struct iphdr)
+			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+		int head_delta = SKB_DATA_ALIGN(min_headroom -
+						skb_headroom(skb) +
+						16);
+		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+					0, GFP_ATOMIC);
+		if (unlikely(err))
+			goto err_free_rt;
+	}
 
-	skb_pull_rcsum(skb, hdr_len);
+	if (unlikely(vlan_deaccel_tag(skb))) {
+		err = -ENOMEM;
+		goto err_free_rt;
+	}
 
-	ovs_tnl_rcv(vport, skb, &tun_key);
-	return 0;
+	/* Push Tunnel header. */
+	skb = __build_header(skb, tunnel_hlen, seq, gre64_flag);
+	if (unlikely(!skb)) {
+		err = 0;
+		goto err_free_rt;
+	}
 
+	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+		htons(IP_DF) : 0;
+
+	/*
+	 * Allow our local IP stack to fragment the outer packet even
+	 * if the DF bit is set as a last resort.  We also need to
+	 * force selection of an IP ID here because Linux will
+	 * otherwise leave it at 0 if the packet originally had DF set.
+	 */
+
+	skb->local_df = 1;
+
+	return iptunnel_xmit(net, rt, skb, saddr,
+			     OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
+			     OVS_CB(skb)->tun_key->ipv4_tos,
+			     OVS_CB(skb)->tun_key->ipv4_ttl, df);
+err_free_rt:
+	ip_rt_put(rt);
 error:
-	kfree_skb(skb);
-	return 0;
+	return err;
 }
 
-static const struct net_protocol gre_protocol_handlers = {
-	.handler	=	gre_rcv,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)
-	.netns_ok	=	1,
-#endif
+static struct gre_cisco_protocol gre_protocol = {
+	.handler	= gre_rcv,
+	.priority	= 1,
 };
 
 static int gre_ports;
@@ -297,7 +204,7 @@ static int gre_init(void)
 	if (gre_ports > 1)
 		return 0;
 
-	err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE);
+	err = gre_cisco_register(&gre_protocol);
 	if (err)
 		pr_warn("cannot register gre protocol handler\n");
 
@@ -310,7 +217,7 @@ static void gre_exit(void)
 	if (gre_ports > 0)
 		return;
 
-	inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
+	gre_cisco_unregister(&gre_protocol);
 }
 
 static const char *gre_get_name(const struct vport *vport)
@@ -360,15 +267,16 @@ static void gre_tnl_destroy(struct vport *vport)
 	gre_exit();
 }
 
-static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
+static int gre_send(struct vport *vport, struct sk_buff *skb)
 {
 	int hlen;
 
 	if (unlikely(!OVS_CB(skb)->tun_key))
 		return -EINVAL;
 
-	hlen = gre_hdr_len(OVS_CB(skb)->tun_key);
-	return ovs_tnl_send(vport, skb, IPPROTO_GRE, hlen, gre_build_header);
+	hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
+
+	return __send(vport, skb, hlen, 0, 0);
 }
 
 const struct vport_ops ovs_gre_vport_ops = {
@@ -376,7 +284,7 @@ const struct vport_ops ovs_gre_vport_ops = {
 	.create		= gre_create,
 	.destroy	= gre_tnl_destroy,
 	.get_name	= gre_get_name,
-	.send		= gre_tnl_send,
+	.send		= gre_send,
 };
 
 /* GRE64 vport. */
@@ -421,15 +329,28 @@ static void gre64_tnl_destroy(struct vport *vport)
 	gre_exit();
 }
 
-static int gre64_tnl_send(struct vport *vport, struct sk_buff *skb)
+static __be32 be64_get_high32(__be64 x)
+{
+#ifdef __BIG_ENDIAN
+	return (__force __be32)((__force u64)x >> 32);
+#else
+	return (__force __be32)x;
+#endif
+}
+
+static int gre64_send(struct vport *vport, struct sk_buff *skb)
 {
 	int hlen;
+	__be32 seq;
 
 	if (unlikely(!OVS_CB(skb)->tun_key))
 		return -EINVAL;
 
-	hlen = gre64_hdr_len(OVS_CB(skb)->tun_key);
-	return ovs_tnl_send(vport, skb, IPPROTO_GRE, hlen, gre64_build_header);
+	hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags)
+	       + GRE_HEADER_SECTION;
+
+	seq = be64_get_high32(OVS_CB(skb)->tun_key->tun_id);
+	return __send(vport, skb, hlen, seq, TUNNEL_SEQ);
 }
 
 const struct vport_ops ovs_gre64_vport_ops = {
@@ -437,5 +358,5 @@ const struct vport_ops ovs_gre64_vport_ops = {
 	.create		= gre64_create,
 	.destroy	= gre64_tnl_destroy,
 	.get_name	= gre_get_name,
-	.send		= gre64_tnl_send,
+	.send		= gre64_send,
 };
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index 209df59..54c10ae 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -218,7 +218,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
-	tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
+	tnl_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
 
 	/* Drop non-IP inner packets */
 	inner_iph = (struct iphdr *)(lisph + 1);
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index 708283e..7ff51fd 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -135,7 +135,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
-	tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
+	tnl_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
 
 	ovs_tnl_rcv(vport_from_priv(vxlan_vport), skb, &tun_key);
 	goto out;
-- 
1.7.1




More information about the dev mailing list