[ovs-dev] [PATCH 02/41] gre: introduce native tunnel support for ERSPAN

Greg Rose gvrose8192 at gmail.com
Thu May 17 21:14:54 UTC 2018


From: William Tu <u9012063 at gmail.com>

Upstream commit:
    commit 84e54fe0a5eaed696dee4019c396f8396f5a908b
    Author: William Tu <u9012063 at gmail.com>
    Date:   Tue Aug 22 09:40:28 2017 -0700

    gre: introduce native tunnel support for ERSPAN

    The patch adds ERSPAN type II tunnel support.  The implementation
    is based on the draft at [1].  One of the purposes is for Linux
    box to be able to receive ERSPAN monitoring traffic sent from
    the Cisco switch, by creating a ERSPAN tunnel device.
    In addition, the patch also adds ERSPAN TX, so Linux virtual
    switch can redirect monitored traffic to the ERSPAN tunnel device.
    The traffic will be encapsulated into ERSPAN and sent out.

    The implementation reuses tunnel key as ERSPAN session ID, and
    field 'erspan' as ERSPAN Index fields:
    ./ip link add dev ers11 type erspan seq key 100 erspan 123 \
    			local 172.16.1.200 remote 172.16.1.100

    To use the above device as ERSPAN receiver, configure
    Nexus 5000 switch as below:

    monitor session 100 type erspan-source
      erspan-id 123
      vrf default
      destination ip 172.16.1.200
      source interface Ethernet1/11 both
      source interface Ethernet1/12 both
      no shut
    monitor erspan origin ip-address 172.16.1.100 global

    [1] https://tools.ietf.org/html/draft-foschiano-erspan-01
    [2] iproute2 patch: http://marc.info/?l=linux-netdev&m=150306086924951&w=2
    [3] test script: http://marc.info/?l=linux-netdev&m=150231021807304&w=2

    Signed-off-by: William Tu <u9012063 at gmail.com>
    Signed-off-by: Meenakshi Vohra <mvohra at vmware.com>
    Cc: Alexey Kuznetsov <kuznet at ms2.inr.ac.ru>
    Cc: Hideaki YOSHIFUJI <yoshfuji at linux-ipv6.org>
    Signed-off-by: David S. Miller <davem at davemloft.net>

This commit also backports heavily from upstream gre, ip_gre and
ip_tunnel modules to support the necessary erspan ip gre
infrastructure as well as implementing a variety of compatability
layer changes for same support.

Cc: William Tu <u9012063 at gmail.com>
Signed-off-by: Greg Rose <gvrose8192 at gmail.com>
---
 acinclude.m4                                     |   49 +-
 datapath/linux/Modules.mk                        |    3 +-
 datapath/linux/compat/gre.c                      |  222 +++--
 datapath/linux/compat/include/linux/if_ether.h   |    4 +
 datapath/linux/compat/include/linux/skbuff.h     |   29 +
 datapath/linux/compat/include/net/dst_metadata.h |   23 +-
 datapath/linux/compat/include/net/erspan.h       |   65 ++
 datapath/linux/compat/include/net/gre.h          |   13 +-
 datapath/linux/compat/include/net/ip_tunnels.h   |  158 +++-
 datapath/linux/compat/ip_gre.c                   | 1069 ++++++++++++++++++++--
 datapath/linux/compat/ip_tunnel.c                |  493 +++++++++-
 datapath/linux/compat/ip_tunnels_core.c          |   41 +
 12 files changed, 1935 insertions(+), 234 deletions(-)
 create mode 100644 datapath/linux/compat/include/net/erspan.h

diff --git a/acinclude.m4 b/acinclude.m4
index a2444af..8962862 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -818,7 +818,54 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
   OVS_GREP_IFELSE([$KSRC/include/net/inet_frag.h],
                   frag_percpu_counter_batch[],
                   [OVS_DEFINE([HAVE_FRAG_PERCPU_COUNTER_BATCH])])
-
+  OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+                  [null_compute_pseudo],
+                  [OVS_DEFINE([HAVE_NULL_COMPUTE_PSEUDO])])
+  OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+                  [__skb_checksum_convert],
+                  [OVS_DEFINE([HAVE_SKB_CHECKSUM_CONVERT])])
+  OVS_FIND_FIELD_IFELSE([$KSRC/include/linux/netdevice.h], [net_device],
+                        [max_mtu],
+                        [OVS_DEFINE([HAVE_NET_DEVICE_MAX_MTU])])
+  OVS_GREP_IFELSE([$KSRC/include/net/erspan.h],
+                  [__LINUX_ERSPAN_H],
+                  [OVS_DEFINE([HAVE_LINUX_ERSPAN_H])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/net/ipv6/ip6_gre.c],
+                        [ip6gre_tunnel_validate], [extack],
+                        [OVS_DEFINE([HAVE_IP6GRE_EXTACK])])
+  OVS_FIND_FIELD_IFELSE([$KSRC/include/net/ip6_tunnel.h], [__ip6_tnl_parm],
+                        [erspan_ver],
+                        [OVS_DEFINE([HAVE_IP6_TNL_PARM_ERSPAN_VER])])
+  OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+                  [SKB_GSO_IPXIP6],
+                  [OVS_DEFINE([HAVE_SKB_GSO_IPXIP6])])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/net/ipv6.h],
+                        [ip6_make_flowlabel], [fl6],
+                        [OVS_DEFINE([HAVE_IP6_MAKE_FLOWLABEL_FL6])])
+  OVS_FIND_FIELD_IFELSE([$KSRC/include/net/ipv6.h], [netns_sysctl_ipv6],
+                        [auto_flowlabels],
+                        [OVS_DEFINE([HAVE_NETNS_SYSCTL_IPV6_AUTO_FLOWLABELS])])
+  OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h],
+                  [netif_keep_dst],
+                  [OVS_DEFINE([HAVE_NETIF_KEEP_DST])])
+  OVS_FIND_FIELD_IFELSE([$KSRC/include/linux/netdevice.h], [net_device_ops],
+                        [ndo_get_iflink],
+                        [OVS_DEFINE([HAVE_NDO_GET_IFLINK])])
+  OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+                  [skb_set_inner_ipproto],
+                  [OVS_DEFINE([HAVE_SKB_SET_INNER_IPPROTO])])
+  OVS_GREP_IFELSE([$KSRC/include/uapi/linux/if_tunnel.h],
+                  [tunnel_encap_types],
+                  [OVS_DEFINE([HAVE_TUNNEL_ENCAP_TYPES])])
+  OVS_GREP_IFELSE([$KSRC/include/uapi/linux/if_tunnel.h],
+                  [IFLA_IPTUN_ENCAP_TYPE],
+                  [OVS_DEFINE([HAVE_IFLA_IPTUN_ENCAP_TYPE])])
+  OVS_GREP_IFELSE([$KSRC/include/uapi/linux/if_tunnel.h],
+                  [IFLA_IPTUN_COLLECT_METADATA],
+                  [OVS_DEFINE([HAVE_IFLA_IPTUN_COLLECT_METADATA])])
+  OVS_GREP_IFELSE([$KSRC/net/ipv4/gre_demux.c],
+                  [parse_gre_header],
+                  [OVS_DEFINE([HAVE_DEMUX_PARSE_GRE_HEADER])])
 
   if cmp -s datapath/linux/kcompat.h.new \
             datapath/linux/kcompat.h >/dev/null 2>&1; then
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 0dbc1ed..e0a90c3 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -104,5 +104,6 @@ openvswitch_headers += \
 	linux/compat/include/net/netfilter/nf_conntrack_zones.h \
 	linux/compat/include/net/netfilter/nf_nat.h \
 	linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h \
-	linux/compat/include/net/sctp/checksum.h
+	linux/compat/include/net/sctp/checksum.h \
+	linux/compat/include/net/erspan.h
 EXTRA_DIST += linux/compat/build-aux/export-check-whitelist
diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
index a341fa3..08a5a30 100644
--- a/datapath/linux/compat/gre.c
+++ b/datapath/linux/compat/gre.c
@@ -41,91 +41,25 @@
 #ifndef USE_UPSTREAM_TUNNEL
 #if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
 
-#ifndef HAVE_GRE_HANDLE_OFFLOADS
-
-#ifndef HAVE_GRE_CISCO_REGISTER
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
-
-#define GREPROTO_CISCO		0
-#define GREPROTO_MAX		1
-
-struct gre_protocol {
-	int  (*handler)(struct sk_buff *skb);
-};
-static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
-
-static int gre_rcv(struct sk_buff *skb)
-{
-	const struct gre_protocol *proto;
-	u8 ver;
-	int ret;
-
-	if (!pskb_may_pull(skb, 12))
-		goto drop;
-
-	ver = skb->data[1] & 0x7f;
-	if (ver >= GREPROTO_MAX)
-		goto drop;
-
-	rcu_read_lock();
-	proto = rcu_dereference(gre_proto[ver]);
-	if (!proto || !proto->handler)
-		goto drop_unlock;
-	ret = proto->handler(skb);
-	rcu_read_unlock();
-	return ret;
-
-drop_unlock:
-	rcu_read_unlock();
-drop:
-	kfree_skb(skb);
-	return NET_RX_DROP;
-}
-
-static const struct net_protocol net_gre_protocol = {
-	.handler     = gre_rcv,
-	.netns_ok    = 1,
-};
-
-static int gre_add_protocol(const struct gre_protocol *proto, u8 version)
-{
-	if (version >= GREPROTO_MAX)
-		return -EINVAL;
-
-	if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
-		pr_err("%s: cannot register gre protocol handler\n", __func__);
-		return -EAGAIN;
-	}
-
-	return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
-		0 : -EBUSY;
-}
-
-static int gre_del_protocol(const struct gre_protocol *proto, u8 version)
+#define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
+#define gre_calc_hlen rpl_ip_gre_calc_hlen
+static int rpl_ip_gre_calc_hlen(__be16 o_flags)
 {
-	int ret;
-
-	if (version >= GREPROTO_MAX)
-		return -EINVAL;
-
-	ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
-		0 : -EBUSY;
-
-	if (ret)
-		return ret;
-
-	synchronize_net();
-
-	ret = inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
-	if (ret)
-		return ret;
+	int addend = 4;
 
-	return 0;
+	if (o_flags & TUNNEL_CSUM)
+		addend += 4;
+	if (o_flags & TUNNEL_KEY)
+		addend += 4;
+	if (o_flags & TUNNEL_SEQ)
+		addend += 4;
+	return addend;
 }
 
-#endif
+#ifndef HAVE_GRE_HANDLE_OFFLOADS
+#ifndef HAVE_GRE_CISCO_REGISTER
 
+#ifdef HAVE_DEMUX_PARSE_GRE_HEADER
 static __sum16 check_checksum(struct sk_buff *skb)
 {
 	__sum16 csum = 0;
@@ -148,20 +82,6 @@ static __sum16 check_checksum(struct sk_buff *skb)
 	return csum;
 }
 
-#define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
-static int ip_gre_calc_hlen(__be16 o_flags)
-{
-	int addend = 4;
-
-	if (o_flags & TUNNEL_CSUM)
-		addend += 4;
-	if (o_flags & TUNNEL_KEY)
-		addend += 4;
-	if (o_flags & TUNNEL_SEQ)
-		addend += 4;
-	return addend;
-}
-
 #define gre_flags_to_tnl_flags rpl_gre_flags_to_tnl_flags
 static __be16 gre_flags_to_tnl_flags(__be16 flags)
 {
@@ -202,13 +122,12 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 
 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
 	hdr_len = ip_gre_calc_hlen(tpi->flags);
+	tpi->hdr_len = hdr_len;
+	tpi->proto = greh->protocol;
 
 	if (!pskb_may_pull(skb, hdr_len))
 		return -EINVAL;
 
-	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
-	tpi->proto = greh->protocol;
-
 	options = (__be32 *)(greh + 1);
 	if (greh->flags & GRE_CSUM) {
 		if (check_checksum(skb)) {
@@ -246,20 +165,25 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 	return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
 }
 
+#endif /* HAVE_DEMUX_PARSE_GRE_HEADER */
+
 static struct gre_cisco_protocol __rcu *gre_cisco_proto;
 static int gre_cisco_rcv(struct sk_buff *skb)
 {
 	struct tnl_ptk_info tpi;
-	bool csum_err = false;
 	struct gre_cisco_protocol *proto;
 
 	rcu_read_lock();
 	proto = rcu_dereference(gre_cisco_proto);
 	if (!proto)
 		goto drop;
-
-	if (parse_gre_header(skb, &tpi, &csum_err) < 0)
-		goto drop;
+#ifdef HAVE_DEMUX_PARSE_GRE_HEADER
+	{
+		bool csum_err = false;
+		if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+			goto drop;
+	}
+#endif
 	proto->handler(skb, &tpi);
 	rcu_read_unlock();
 	return 0;
@@ -309,5 +233,101 @@ EXPORT_SYMBOL_GPL(rpl_gre_cisco_unregister);
 #endif /* !HAVE_GRE_CISCO_REGISTER */
 #endif
 
+void rpl_gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+			  int hdr_len)
+{
+	struct gre_base_hdr *greh;
+
+	skb_push(skb, hdr_len);
+
+	skb_reset_transport_header(skb);
+	greh = (struct gre_base_hdr *)skb->data;
+	greh->flags = tnl_flags_to_gre_flags(tpi->flags);
+	greh->protocol = tpi->proto;
+
+	if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
+		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+		if (tpi->flags&TUNNEL_SEQ) {
+			*ptr = tpi->seq;
+			ptr--;
+		}
+		if (tpi->flags&TUNNEL_KEY) {
+			*ptr = tpi->key;
+			ptr--;
+		}
+		if (tpi->flags&TUNNEL_CSUM &&
+		    !(skb_shinfo(skb)->gso_type &
+		      (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
+			*ptr = 0;
+			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+								 skb->len, 0));
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(rpl_gre_build_header);
+
+/* Fills in tpi and returns header length to be pulled. */
+int rpl_gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+		     bool *csum_err, __be16 proto, int nhs)
+{
+	const struct gre_base_hdr *greh;
+	__be32 *options;
+	int hdr_len;
+
+	if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr))))
+		return -EINVAL;
+
+	greh = (struct gre_base_hdr *)(skb->data + nhs);
+	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+		return -EINVAL;
+
+	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+	hdr_len = gre_calc_hlen(tpi->flags);
+
+	if (!pskb_may_pull(skb, nhs + hdr_len))
+		return -EINVAL;
+
+	greh = (struct gre_base_hdr *)(skb->data + nhs);
+	tpi->proto = greh->protocol;
+
+	options = (__be32 *)(greh + 1);
+	if (greh->flags & GRE_CSUM) {
+		if (skb_checksum_simple_validate(skb)) {
+			*csum_err = true;
+			return -EINVAL;
+		}
+
+		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
+					 null_compute_pseudo);
+		options++;
+	}
+
+	if (greh->flags & GRE_KEY) {
+		tpi->key = *options;
+		options++;
+	} else {
+		tpi->key = 0;
+	}
+	if (unlikely(greh->flags & GRE_SEQ)) {
+		tpi->seq = *options;
+		options++;
+	} else {
+		tpi->seq = 0;
+	}
+	/* WCCP version 1 and 2 protocol decoding.
+ 	 * - Change protocol to IPv4/IPv6
+	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+	 */
+	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+		tpi->proto = proto;
+		if ((*(u8 *)options & 0xF0) != 0x40)
+			hdr_len += 4;
+	}
+	tpi->hdr_len = hdr_len;
+	return hdr_len;
+}
+EXPORT_SYMBOL(rpl_gre_parse_header);
+
 #endif /* CONFIG_NET_IPGRE_DEMUX */
 #endif /* USE_UPSTREAM_TUNNEL */
diff --git a/datapath/linux/compat/include/linux/if_ether.h b/datapath/linux/compat/include/linux/if_ether.h
index c989c6e..aaa88db 100644
--- a/datapath/linux/compat/include/linux/if_ether.h
+++ b/datapath/linux/compat/include/linux/if_ether.h
@@ -23,6 +23,10 @@
 #define ETH_P_NSH       0x894F          /* Network Service Header */
 #endif
 
+#ifndef ETH_P_ERSPAN
+#define ETH_P_ERSPAN	0x88BE		/* ERSPAN TYPE II */
+#endif
+
 #define inner_eth_hdr rpl_inner_eth_hdr
 static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb)
 {
diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h
index 149ef28..45778bd 100644
--- a/datapath/linux/compat/include/linux/skbuff.h
+++ b/datapath/linux/compat/include/linux/skbuff.h
@@ -21,6 +21,35 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 #define ignore_df local_df
 #endif
 
+
+#ifndef HAVE_NULL_COMPUTE_PSEUDO
+static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	return 0;
+}
+#endif
+
+#ifndef HAVE_SKB_CHECKSUM_CONVERT
+static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
+{
+	return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid);
+}
+
+static inline void __skb_checksum_convert(struct sk_buff *skb,
+					  __sum16 check, __wsum pseudo)
+{
+	skb->csum = ~pseudo;
+	skb->ip_summed = CHECKSUM_COMPLETE;
+}
+
+#define skb_checksum_try_convert(skb, proto, check, compute_pseudo)	\
+do {									\
+	if (__skb_checksum_convert_check(skb))				\
+		__skb_checksum_convert(skb, check,			\
+				       compute_pseudo(skb, proto));	\
+} while (0)
+
+#endif
 #ifndef HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET
 static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
 						    const int offset, void *to,
diff --git a/datapath/linux/compat/include/net/dst_metadata.h b/datapath/linux/compat/include/net/dst_metadata.h
index e401eb4..93ea954 100644
--- a/datapath/linux/compat/include/net/dst_metadata.h
+++ b/datapath/linux/compat/include/net/dst_metadata.h
@@ -1,6 +1,11 @@
 #ifndef __NET_DST_METADATA_WRAPPER_H
 #define __NET_DST_METADATA_WRAPPER_H 1
 
+enum metadata_type {
+	METADATA_IP_TUNNEL,
+	METADATA_HW_PORT_MUX,
+};
+
 #ifdef USE_UPSTREAM_TUNNEL
 #include_next <net/dst_metadata.h>
 #else
@@ -11,19 +16,26 @@
 #include <net/ipv6.h>
 #include <net/ip_tunnels.h>
 
+struct hw_port_info {
+	struct net_device *lower_dev;
+	u32 port_id;
+};
+
 struct metadata_dst {
-	unsigned long dst;
+	struct dst_entry 	dst;
+	enum metadata_type	type;
 	union {
 		struct ip_tunnel_info	tun_info;
+		struct hw_port_info	port_info;
 	} u;
 };
 
 static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
 {
-	unsigned long *dst;
+	struct dst_entry *dst;
 
 	dst = &md_dst->dst;
-	*dst = 0;
+
 #if 0
 	dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
 			DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
@@ -105,11 +117,6 @@ void ovs_ip_tunnel_rcv(struct net_device *dev, struct sk_buff *skb,
 		      struct metadata_dst *tun_dst);
 
 #ifndef HAVE_METADATA_DST_ALLOC_WITH_METADATA_TYPE
-enum metadata_type {
-	METADATA_IP_TUNNEL,
-	METADATA_HW_PORT_MUX,
-};
-
 static inline struct metadata_dst *
 rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags)
 {
diff --git a/datapath/linux/compat/include/net/erspan.h b/datapath/linux/compat/include/net/erspan.h
new file mode 100644
index 0000000..6c5d3a7
--- /dev/null
+++ b/datapath/linux/compat/include/net/erspan.h
@@ -0,0 +1,65 @@
+#ifndef USE_UPSTREAM_TUNNEL
+#ifndef __LINUX_ERSPAN_H
+#define __LINUX_ERSPAN_H
+
+/*
+ * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes
+ *       0                   1                   2                   3
+ *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |0|0|0|1|0|00000|000000000|00000|    Protocol Type for ERSPAN   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |      Sequence Number (increments per packet per session)      |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *  Note that in the above GRE header [RFC1701] out of the C, R, K, S,
+ *  s, Recur, Flags, Version fields only S (bit 03) is set to 1. The
+ *  other fields are set to zero, so only a sequence number follows.
+ *
+ *  ERSPAN Type II header (8 octets [42:49])
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  Ver  |          VLAN         | COS | En|T|    Session ID     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      Reserved         |                  Index                |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
+ */
+
+#define ERSPAN_VERSION	0x1
+
+#define VER_MASK	0xf000
+#define VLAN_MASK	0x0fff
+#define COS_MASK	0xe000
+#define EN_MASK		0x1800
+#define T_MASK		0x0400
+#define ID_MASK		0x03ff
+#define INDEX_MASK	0xfffff
+
+enum erspan_encap_type {
+	ERSPAN_ENCAP_NOVLAN = 0x0,	/* originally without VLAN tag */
+	ERSPAN_ENCAP_ISL = 0x1,		/* originally ISL encapsulated */
+	ERSPAN_ENCAP_8021Q = 0x2,	/* originally 802.1Q encapsulated */
+	ERSPAN_ENCAP_INFRAME = 0x3,	/* VLAN tag perserved in frame */
+};
+
+struct erspan_metadata {
+	__be32 index;   /* type II */
+};
+
+struct erspanhdr {
+	__be16 ver_vlan;
+#define VER_OFFSET  12
+	__be16 session_id;
+#define COS_OFFSET  13
+#define EN_OFFSET   11
+#define T_OFFSET    10
+	struct erspan_metadata md;
+};
+
+#endif
+#else
+#include_next <net/erspan.h>
+#endif
diff --git a/datapath/linux/compat/include/net/gre.h b/datapath/linux/compat/include/net/gre.h
index 764b9f1..ead86f6 100644
--- a/datapath/linux/compat/include/net/gre.h
+++ b/datapath/linux/compat/include/net/gre.h
@@ -28,11 +28,7 @@ static inline struct net_device *rpl_gretap_fb_dev_create(
 #endif
 
 #else
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) || \
-   defined(HAVE_GRE_CISCO_REGISTER)
 #include_next <net/gre.h>
-#endif
 
 #ifndef HAVE_GRE_CISCO_REGISTER
 
@@ -62,6 +58,10 @@ struct gre_base_hdr {
 
 #endif /* HAVE_GRE_CISCO_REGISTER */
 
+#define gre_build_header rpl_gre_build_header
+void rpl_gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+			  int hdr_len);
+
 int rpl_ipgre_init(void);
 void rpl_ipgre_fini(void);
 
@@ -69,6 +69,10 @@ void rpl_ipgre_fini(void);
 struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
 					u8 name_assign_type);
 
+#define gre_parse_header rpl_gre_parse_header
+int rpl_gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+			 bool *csum_err, __be16 proto, int nhs);
+
 #define gre_fb_xmit rpl_gre_fb_xmit
 netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb);
 #endif /* USE_UPSTREAM_TUNNEL */
@@ -79,4 +83,5 @@ netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb);
 #define gre_fill_metadata_dst ovs_gre_fill_metadata_dst
 int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
 
+
 #endif
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index 68dbd7c..2685de7 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -8,6 +8,11 @@
  * Only function that do not depend on ip_tunnel structure can
  * be used. Those needs to be explicitly defined in this header file. */
 #include_next <net/ip_tunnels.h>
+
+#ifndef TUNNEL_ERSPAN_OPT
+#define TUNNEL_ERSPAN_OPT	__cpu_to_be16(0x4000)
+#endif
+#define ovs_ip_tunnel_encap ip_tunnel_encap
 #else
 
 #include <linux/if_tunnel.h>
@@ -18,6 +23,21 @@
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/rtnetlink.h>
+#include <net/gro_cells.h>
+
+#ifndef MAX_IPTUN_ENCAP_OPS
+#define MAX_IPTUN_ENCAP_OPS 8
+#endif
+
+#ifndef HAVE_TUNNEL_ENCAP_TYPES
+enum tunnel_encap_types {
+	TUNNEL_ENCAP_NONE,
+	TUNNEL_ENCAP_FOU,
+	TUNNEL_ENCAP_GUE,
+};
+
+#define HAVE_TUNNEL_ENCAP_TYPES 1
+#endif
 
 #define __iptunnel_pull_header rpl___iptunnel_pull_header
 int rpl___iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
@@ -41,13 +61,17 @@ int ovs_iptunnel_handle_offloads(struct sk_buff *skb,
  */
 #define iptunnel_handle_offloads rpl_iptunnel_handle_offloads
 struct sk_buff *rpl_iptunnel_handle_offloads(struct sk_buff *skb,
-					 bool csum_help,
-					 int gso_type_mask);
+					     bool csum_help,
+					     int gso_type_mask);
 
 #define iptunnel_xmit rpl_iptunnel_xmit
 void rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 		       __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl,
 		       __be16 df, bool xnet);
+#define ip_tunnel_xmit rpl_ip_tunnel_xmit
+void rpl_ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+			const struct iphdr *tnl_params, const u8 protocol);
+
 
 #ifndef TUNNEL_CSUM
 #define TUNNEL_CSUM	__cpu_to_be16(0x01)
@@ -64,12 +88,17 @@ struct tnl_ptk_info {
 	__be16 proto;
 	__be32 key;
 	__be32 seq;
+	int hdr_len;
 };
 
 #define PACKET_RCVD	0
 #define PACKET_REJECT	1
+#define PACKET_NEXT	2
 #endif
 
+#define IP_TNL_HASH_BITS   7
+#define IP_TNL_HASH_SIZE   (1 << IP_TNL_HASH_BITS)
+
 #ifndef TUNNEL_DONT_FRAGMENT
 #define TUNNEL_DONT_FRAGMENT	__cpu_to_be16(0x0100)
 #endif
@@ -91,6 +120,9 @@ struct tnl_ptk_info {
 #undef TUNNEL_OPTIONS_PRESENT
 #define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
 
+/* Keep error state on tunnel for 30 sec */
+#define IPTUNNEL_ERR_TIMEO	(30*HZ)
+
 /* Used to memset ip_tunnel padding. */
 #define IP_TUNNEL_KEY_SIZE	offsetofend(struct ip_tunnel_key, tp_dst)
 
@@ -131,6 +163,30 @@ struct ip_tunnel_info {
 	u8			mode;
 };
 
+/* 6rd prefix/relay information */
+#ifdef CONFIG_IPV6_SIT_6RD
+struct ip_tunnel_6rd_parm {
+	struct in6_addr		prefix;
+	__be32			relay_prefix;
+	u16			prefixlen;
+	u16			relay_prefixlen;
+};
+#endif
+
+struct ip_tunnel_encap {
+	u16			type;
+	u16			flags;
+	__be16			sport;
+	__be16			dport;
+};
+
+struct ip_tunnel_prl_entry {
+	struct ip_tunnel_prl_entry __rcu *next;
+	__be32				addr;
+	u16				flags;
+	struct rcu_head			rcu_head;
+};
+
 static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info *tun_info)
 {
 	return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
@@ -203,39 +259,115 @@ ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
 }
 #endif
 
-#define ip_tunnel rpl_ip_tunnel
+#define ip_tunnel_dst rpl_ip_tunnel_dst
+struct rpl_ip_tunnel_dst {
+	struct dst_entry __rcu		*dst;
+	__be32				saddr;
+};
 
+#define ip_tunnel rpl_ip_tunnel
 struct ip_tunnel {
+	struct ip_tunnel __rcu	*next;
+	struct hlist_node hash_node;
 	struct net_device	*dev;
 	struct net		*net;	/* netns for packet i/o */
 
-	int		err_count;	/* Number of arrived ICMP errors */
 	unsigned long	err_time;	/* Time when the last ICMP error
-					 * arrived
-					 */
+					 * arrived */
+	int		err_count;	/* Number of arrived ICMP errors */
 
 	/* These four fields used only by GRE */
 	u32		i_seqno;	/* The last seen seqno	*/
 	u32		o_seqno;	/* The last output seqno */
 	int		tun_hlen;	/* Precalculated header length */
-	int		mlink;
+
+	/* These four fields used only by ERSPAN */
+	u32		index;		/* ERSPAN type II index */
+	u8		erspan_ver;	/* ERSPAN version */
+	u8		dir;		/* ERSPAN direction */
+	u16		hwid;		/* ERSPAN hardware ID */
+
+	struct dst_cache dst_cache;
 
 	struct ip_tunnel_parm parms;
 
+	int		mlink;
 	int		encap_hlen;	/* Encap header length (FOU,GUE) */
 	int		hlen;		/* tun_hlen + encap_hlen */
+	struct ip_tunnel_encap encap;
 
-	int		ip_tnl_net_id;
-	bool		collect_md;
+	/* for SIT */
+#ifdef CONFIG_IPV6_SIT_6RD
+	struct ip_tunnel_6rd_parm ip6rd;
+#endif
+	struct ip_tunnel_prl_entry __rcu *prl;	/* potential router list */
+	unsigned int		prl_count;	/* # of entries in PRL */
+	unsigned int		ip_tnl_net_id;
+	struct gro_cells	gro_cells;
+	__u32			fwmark;
+	bool			collect_md;
+	bool			ignore_df;
 };
 
 #define ip_tunnel_net rpl_ip_tunnel_net
 struct ip_tunnel_net {
+	struct net_device *fb_tunnel_dev;
+	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
 	struct ip_tunnel __rcu *collect_md_tun;
-	struct rtnl_link_ops *rtnl_ops;
 };
 
 
+struct ip_tunnel_encap_ops {
+	size_t (*encap_hlen)(struct ip_tunnel_encap *e);
+	int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
+			    const u8 *protocol, struct flowi4 *fl4);
+};
+
+extern const struct ip_tunnel_encap_ops __rcu *
+		rpl_iptun_encaps[MAX_IPTUN_ENCAP_OPS];
+
+#define ip_encap_hlen rpl_ip_encap_hlen
+static inline int rpl_ip_encap_hlen(struct ip_tunnel_encap *e)
+{
+	const struct ip_tunnel_encap_ops *ops;
+	int hlen = -EINVAL;
+
+	if (e->type == TUNNEL_ENCAP_NONE)
+		return 0;
+
+	if (e->type >= MAX_IPTUN_ENCAP_OPS)
+		return -EINVAL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(rpl_iptun_encaps[e->type]);
+	if (likely(ops && ops->encap_hlen))
+		hlen = ops->encap_hlen(e);
+	rcu_read_unlock();
+
+	return hlen;
+}
+
+static inline int ovs_ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+				      const u8 *protocol, struct flowi4 *fl4)
+{
+	const struct ip_tunnel_encap_ops *ops;
+	int ret = -EINVAL;
+
+	if (t->encap.type == TUNNEL_ENCAP_NONE)
+		return 0;
+
+	if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+		return -EINVAL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(rpl_iptun_encaps[t->encap.type]);
+	if (likely(ops && ops->build_header))
+		ret = ops->build_header(skb, &t->encap, protocol, fl4);
+	rcu_read_unlock();
+
+	return ret;
+}
+
 #ifndef HAVE_PCPU_SW_NETSTATS
 #define ip_tunnel_get_stats64 rpl_ip_tunnel_get_stats64
 #else
@@ -322,6 +454,12 @@ struct net *rpl_ip_tunnel_get_link_net(const struct net_device *dev);
 #define __ip_tunnel_change_mtu rpl___ip_tunnel_change_mtu
 int rpl___ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 
+#define ip_tunnel_lookup rpl_ip_tunnel_lookup
+struct ip_tunnel *rpl_ip_tunnel_lookup(struct ip_tunnel_net *itn,
+				       int link, __be16 flags,
+				       __be32 remote, __be32 local,
+				       __be32 key);
+
 static inline int iptunnel_pull_offloads(struct sk_buff *skb)
 {
 	if (skb_is_gso(skb)) {
diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c
index 4304862..3001f3f 100644
--- a/datapath/linux/compat/ip_gre.c
+++ b/datapath/linux/compat/ip_gre.c
@@ -52,6 +52,7 @@
 #include <net/rtnetlink.h>
 #include <net/gre.h>
 #include <net/dst_metadata.h>
+#include <net/erspan.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -63,6 +64,10 @@
 #include "vport-netdev.h"
 
 static int gre_tap_net_id __read_mostly;
+static int ipgre_net_id __read_mostly;
+static unsigned int erspan_net_id __read_mostly;
+
+static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 
 #define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
 static int ip_gre_calc_hlen(__be16 o_flags)
@@ -78,100 +83,308 @@ static int ip_gre_calc_hlen(__be16 o_flags)
 	return addend;
 }
 
-#define tnl_flags_to_gre_flags rpl_tnl_flags_to_gre_flags
-static __be16 tnl_flags_to_gre_flags(__be16 tflags)
+/* Returns the least-significant 32 bits of a __be64. */
+static __be32 tunnel_id_to_key(__be64 x)
 {
-	__be16 flags = 0;
-
-	if (tflags & TUNNEL_CSUM)
-		flags |= GRE_CSUM;
-	if (tflags & TUNNEL_ROUTING)
-		flags |= GRE_ROUTING;
-	if (tflags & TUNNEL_KEY)
-		flags |= GRE_KEY;
-	if (tflags & TUNNEL_SEQ)
-		flags |= GRE_SEQ;
-	if (tflags & TUNNEL_STRICT)
-		flags |= GRE_STRICT;
-	if (tflags & TUNNEL_REC)
-		flags |= GRE_REC;
-	if (tflags & TUNNEL_VERSION)
-		flags |= GRE_VERSION;
+#ifdef __BIG_ENDIAN
+	return (__force __be32)x;
+#else
+	return (__force __be32)((__force u64)x >> 32);
+#endif
+}
 
-	return flags;
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_err(struct sk_buff *skb, u32 info,
+		   const struct tnl_ptk_info *tpi)
+{
+	return PACKET_REJECT;
 }
 
-static __be64 key_to_tunnel_id(__be32 key)
+static struct dst_ops md_dst_ops = {
+	.family =		AF_UNSPEC,
+};
+
+#ifndef DST_METADATA
+#define DST_METADATA 0x0080
+#endif
+
+static void rpl__metadata_dst_init(struct metadata_dst *md_dst,
+				enum metadata_type type, u8 optslen)
+
 {
-#ifdef __BIG_ENDIAN
-	return (__force __be64)((__force u32)key);
-#else
-	return (__force __be64)((__force u64)key << 32);
+	struct dst_entry *dst;
+
+	dst = &md_dst->dst;
+	dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
+		 DST_METADATA | DST_NOCOUNT);
+
+#if 0
+	/* unused in OVS */
+	dst->input = dst_md_discard;
+	dst->output = dst_md_discard_out;
 #endif
+	memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
+	md_dst->type = type;
 }
 
-/* Returns the least-significant 32 bits of a __be64. */
-static __be32 tunnel_id_to_key(__be64 x)
+static struct metadata_dst *erspan_rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type,
+					gfp_t flags)
 {
-#ifdef __BIG_ENDIAN
-	return (__force __be32)x;
-#else
-	return (__force __be32)((__force u64)x >> 32);
-#endif
+	struct metadata_dst *md_dst;
+
+	md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
+	if (!md_dst)
+		return NULL;
+
+	rpl__metadata_dst_init(md_dst, type, optslen);
+
+	return md_dst;
+}
+static inline struct metadata_dst *rpl_tun_rx_dst(int md_size)
+{
+	struct metadata_dst *tun_dst;
+
+	tun_dst = erspan_rpl_metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC);
+	if (!tun_dst)
+		return NULL;
+
+	tun_dst->u.tun_info.options_len = 0;
+	tun_dst->u.tun_info.mode = 0;
+	return tun_dst;
+}
+static inline struct metadata_dst *rpl__ip_tun_set_dst(__be32 saddr,
+						    __be32 daddr,
+						    __u8 tos, __u8 ttl,
+						    __be16 tp_dst,
+						    __be16 flags,
+						    __be64 tunnel_id,
+						    int md_size)
+{
+	struct metadata_dst *tun_dst;
+
+	tun_dst = rpl_tun_rx_dst(md_size);
+	if (!tun_dst)
+		return NULL;
+
+	ip_tunnel_key_init(&tun_dst->u.tun_info.key,
+			   saddr, daddr, tos, ttl,
+			   0, 0, tp_dst, tunnel_id, flags);
+	return tun_dst;
 }
 
-static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
+static inline struct metadata_dst *rpl_ip_tun_rx_dst(struct sk_buff *skb,
+						 __be16 flags,
+						 __be64 tunnel_id,
+						 int md_size)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+
+	return rpl__ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
+				0, flags, tunnel_id, md_size);
+}
+
+static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+		      int gre_hdr_len)
 {
 	struct net *net = dev_net(skb->dev);
-	struct metadata_dst tun_dst;
+	struct metadata_dst *tun_dst = NULL;
 	struct ip_tunnel_net *itn;
-	const struct iphdr *iph;
 	struct ip_tunnel *tunnel;
+	struct erspanhdr *ershdr;
+	const struct iphdr *iph;
+	__be32 session_id;
+	__be32 index;
+	int len;
 
-	if (tpi->proto != htons(ETH_P_TEB))
-		return PACKET_REJECT;
+	itn = net_generic(net, erspan_net_id);
+	iph = ip_hdr(skb);
+	len = gre_hdr_len + sizeof(*ershdr);
 
-	itn = net_generic(net, gre_tap_net_id);
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
 
 	iph = ip_hdr(skb);
-	tunnel = rcu_dereference(itn->collect_md_tun);
+	ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+
+	/* The original GRE header does not have key field,
+	 * Use ERSPAN 10-bit session ID as key.
+	 */
+	tpi->key = cpu_to_be32(get_session_id(ershdr));
+	/* OVS doesn't set tunnel key - so don't bother with it */
+	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+				  tpi->flags,
+				  iph->saddr, iph->daddr, 0);
+
 	if (tunnel) {
-		__be16 flags;
-		__be64 tun_id;
-		int err;
-
-		if (iptunnel_pull_offloads(skb))
-			return PACKET_REJECT;
-
-		skb_pop_mac_header(skb);
-		flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
-		tun_id = key_to_tunnel_id(tpi->key);
-		ovs_ip_tun_rx_dst(&tun_dst, skb, flags, tun_id, 0);
-
-		skb_reset_network_header(skb);
-		err = IP_ECN_decapsulate(iph, skb);
-		if (unlikely(err)) {
-			if (err > 1) {
-				++tunnel->dev->stats.rx_frame_errors;
-				++tunnel->dev->stats.rx_errors;
+		if (__iptunnel_pull_header(skb,
+					   gre_hdr_len + sizeof(*ershdr),
+					   htons(ETH_P_TEB),
+					   false, false) < 0)
+			goto drop;
+
+		if (tunnel->collect_md) {
+			struct ip_tunnel_info *info;
+			struct erspan_metadata *md;
+			__be64 tun_id;
+			__be16 flags;
+
+			tpi->flags |= TUNNEL_KEY;
+			flags = tpi->flags;
+			tun_id = key32_to_tunnel_id(tpi->key);
+
+			tun_dst = rpl_ip_tun_rx_dst(skb, flags, tun_id, sizeof(*md));
+			if (!tun_dst)
 				return PACKET_REJECT;
-			}
+
+			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+			md->version = ver;
+			md2 = &md->u.md2;
+			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
+						       ERSPAN_V2_MDSIZE);
+
+			info = &tun_dst->u.tun_info;
+			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+			info->options_len = sizeof(*md);
+		}
+
+		skb_reset_mac_header(skb);
+		ovs_ip_tunnel_rcv(tunnel->dev, skb, tun_dst);
+		kfree(tun_dst);
+		return PACKET_RCVD;
+	}
+drop:
+	kfree_skb(skb);
+	return PACKET_RCVD;
+}
+
+
+static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
+{
+	struct metadata_dst tun_dst;
+	const struct iphdr *iph;
+	struct ip_tunnel *tunnel;
+
+	iph = ip_hdr(skb);
+	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
+				  iph->saddr, iph->daddr, tpi->key);
+
+	if (tunnel) {
+		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
+					   raw_proto, false) < 0)
+			goto drop;
+
+		if (tunnel->dev->type != ARPHRD_NONE)
+			skb_pop_mac_header(skb);
+		else
+			skb_reset_mac_header(skb);
+		if (tunnel->collect_md) {
+			__be16 flags;
+			__be64 tun_id;
+
+			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
+			tun_id = key32_to_tunnel_id(tpi->key);
+			ovs_ip_tun_rx_dst(&tun_dst, skb, flags, tun_id, 0);
 		}
 
 		ovs_ip_tunnel_rcv(tunnel->dev, skb, &tun_dst);
 		return PACKET_RCVD;
 	}
-	return PACKET_REJECT;
+	return PACKET_NEXT;
+
+drop:
+	kfree_skb(skb);
+	return PACKET_RCVD;
+}
+
+
+static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+		     int hdr_len)
+{
+	struct net *net = dev_net(skb->dev);
+	struct ip_tunnel_net *itn;
+	int res;
+
+	if (tpi->proto == htons(ETH_P_TEB))
+		itn = net_generic(net, gre_tap_net_id);
+	else
+		itn = net_generic(net, ipgre_net_id);
+
+	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
+	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
+		/* ipgre tunnels in collect metadata mode should receive
+		 * also ETH_P_TEB traffic.
+		 */
+		itn = net_generic(net, ipgre_net_id);
+		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
+	}
+	return res;
 }
 
-static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
+static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
+		       const struct iphdr *tnl_params,
+		       __be16 proto)
 {
-	if (ipgre_rcv(skb, tpi) == PACKET_RCVD)
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct tnl_ptk_info tpi;
+
+	tpi.flags = tunnel->parms.o_flags;
+	tpi.proto = proto;
+	tpi.key = tunnel->parms.o_key;
+	if (tunnel->parms.o_flags & TUNNEL_SEQ)
+		tunnel->o_seqno++;
+	tpi.seq = htonl(tunnel->o_seqno);
+
+	/* Push GRE header. */
+	gre_build_header(skb, &tpi, tunnel->hlen);
+
+	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
+}
+
+#ifndef HAVE_DEMUX_PARSE_GRE_HEADER
+static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *unused_tpi)
+{
+	struct tnl_ptk_info tpi;
+	bool csum_err = false;
+	int hdr_len;
+
+	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
+	if (hdr_len < 0)
+		goto drop;
+
+	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+			return 0;
+	}
+
+	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+		return 0;
+drop:
+
+	kfree_skb(skb);
+	return 0;
+}
+#else
+static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *__tpi)
+{
+	struct tnl_ptk_info tpi = *__tpi;
+
+	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+		     tpi.proto == htons(ETH_P_ERSPAN2))) {
+		if (erspan_rcv(skb, &tpi, 0) == PACKET_RCVD)
+			return 0;
+		goto drop;
+	}
+
+	if (ipgre_rcv(skb, &tpi, 0) == PACKET_RCVD)
 		return 0;
 
+drop:
+
 	kfree_skb(skb);
 	return 0;
 }
+#endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
 /* gre_handle_offloads() has different return type on older kernsl. */
@@ -342,6 +555,83 @@ err_free_skb:
 }
 EXPORT_SYMBOL(rpl_gre_fb_xmit);
 
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+			   __be16 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	struct erspan_metadata *md;
+	struct rtable *rt = NULL;
+	struct tnl_ptk_info tpi;
+	bool truncate = false;
+	struct flowi4 fl;
+	int tunnel_hlen;
+	int version;
+	__be16 df;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto err_free_skb;
+
+	key = &tun_info->key;
+	md = ip_tunnel_info_opts(tun_info);
+	if (!md)
+		goto err_free_rt;
+
+	/* ERSPAN has fixed 8 byte GRE header */
+	version = md->version;
+	tunnel_hlen = 8 + erspan_hdr_len(version);
+
+	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+	if (!rt)
+		return;
+
+	if (gre_handle_offloads(skb, false))
+		goto err_free_rt;
+
+	if (skb->len > dev->mtu + dev->hard_header_len) {
+		pskb_trim(skb, dev->mtu + dev->hard_header_len);
+		truncate = true;
+	}
+
+	if (version == 1) {
+		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
+				    ntohl(md->u.index), truncate, true);
+		tpi.hdr_len = ERSPAN_V1_MDSIZE;
+		tpi.proto = htons(ETH_P_ERSPAN);
+	} else if (version == 2) {
+		erspan_build_header_v2(skb,
+				       ntohl(tunnel_id_to_key32(key->tun_id)),
+				       md->u.md2.dir,
+				       get_hwid(&md->u.md2),
+				       truncate, true);
+		tpi.hdr_len = ERSPAN_V2_MDSIZE;
+		tpi.proto = htons(ETH_P_ERSPAN2);
+	} else {
+		goto err_free_rt;
+	}
+
+	tpi.flags = TUNNEL_SEQ;
+	tpi.key = tunnel_id_to_key32(key->tun_id);
+	tpi.seq = htonl(tunnel->o_seqno++);
+
+	gre_build_header(skb, &tpi, 8);
+
+	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+
+	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+		      key->tos, key->ttl, df, false);
+	return;
+
+err_free_rt:
+	ip_rt_put(rt);
+err_free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+}
+
 #define GRE_FEATURES	(NETIF_F_SG |		\
 			 NETIF_F_FRAGLIST |	\
 			 NETIF_F_HIGHDMA |	\
@@ -354,23 +644,27 @@ static void __gre_tunnel_init(struct net_device *dev)
 	int t_hlen;
 
 	tunnel = netdev_priv(dev);
-	tunnel->parms.iph.protocol = IPPROTO_GRE;
 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+	tunnel->parms.iph.protocol = IPPROTO_GRE;
 
 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 
 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
-	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
-	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
-
 	dev->features		|= GRE_FEATURES;
 	dev->hw_features	|= GRE_FEATURES;
 
 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
-		/* TCP offload with GRE SEQ is not supported. */
-		dev->features    |= NETIF_F_GSO_SOFTWARE;
-		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		/* TCP offload with GRE SEQ is not supported, nor
+		 * can we support 2 levels of outer headers requiring
+		 * an update.
+		 */
+		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
+			dev->features    |= NETIF_F_GSO_SOFTWARE;
+			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		}
+
 		/* Can use a lockless transmit, unless we generate
 		 * output sequences
 		 */
@@ -378,19 +672,31 @@ static void __gre_tunnel_init(struct net_device *dev)
 	}
 }
 
-/* Called with rcu_read_lock and BH disabled. */
-static int gre_err(struct sk_buff *skb, u32 info,
-		   const struct tnl_ptk_info *tpi)
-{
-	return PACKET_REJECT;
-}
-
 static struct gre_cisco_protocol ipgre_protocol = {
 	.handler        = gre_rcv,
 	.err_handler    = gre_err,
 	.priority       = 1,
 };
 
+static int __net_init ipgre_init_net(struct net *net)
+{
+	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
+}
+
+static void __net_exit ipgre_exit_net(struct net *net)
+{
+	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
+
+	ip_tunnel_delete_net(itn, &ipgre_link_ops);
+}
+
+static struct pernet_operations ipgre_net_ops = {
+	.init = ipgre_init_net,
+	.exit = ipgre_exit_net,
+	.id   = &ipgre_net_id,
+	.size = sizeof(struct ip_tunnel_net),
+};
+
 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 {
 	__be16 flags;
@@ -433,14 +739,129 @@ out:
 	return ipgre_tunnel_validate(tb, data);
 }
 
-static void ipgre_netlink_parms(struct net_device *dev,
-				struct nlattr *data[],
-				struct nlattr *tb[],
-				struct ip_tunnel_parm *parms)
+enum {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
+	IFLA_GRE_ENCAP_TYPE = IFLA_GRE_FLAGS + 1,
+	IFLA_GRE_ENCAP_FLAGS,
+	IFLA_GRE_ENCAP_SPORT,
+	IFLA_GRE_ENCAP_DPORT,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0)
+	IFLA_GRE_COLLECT_METADATA = IFLA_GRE_ENCAP_DPORT + 1,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
+	IFLA_GRE_IGNORE_DF = IFLA_GRE_COLLECT_METADATA + 1,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,12,0)
+	IFLA_GRE_FWMARK = IFLA_GRE_IGNORE_DF + 1,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
+	IFLA_GRE_ERSPAN_INDEX = IFLA_GRE_FWMARK + 1,
+#endif
+};
+
+#define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_INDEX + 1)
+
+static int erspan_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	__be16 flags = 0;
+	int ret;
+
+	if (!data)
+		return 0;
+
+	ret = ipgre_tap_validate(tb, data);
+	if (ret)
+		return ret;
+
+	/* ERSPAN should only have GRE sequence and key flag */
+	flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (flags != (GRE_SEQ | GRE_KEY))
+		return -EINVAL;
+
+	/* ERSPAN Session ID only has 10-bit. Since we reuse
+	 * 32-bit key field as ID, check it's range.
+	 */
+	if (data[IFLA_GRE_IKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	if (data[IFLA_GRE_OKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ipgre_netlink_parms(struct net_device *dev,
+			       struct nlattr *data[],
+			       struct nlattr *tb[],
+			       struct ip_tunnel_parm *parms)
 {
+	struct ip_tunnel *t = netdev_priv(dev);
+
 	memset(parms, 0, sizeof(*parms));
 
 	parms->iph.protocol = IPPROTO_GRE;
+
+	if (!data)
+		return 0;
+
+	if (data[IFLA_GRE_LINK])
+		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+	if (data[IFLA_GRE_IFLAGS])
+		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
+
+	if (data[IFLA_GRE_OFLAGS])
+		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
+
+	if (data[IFLA_GRE_IKEY])
+		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+	if (data[IFLA_GRE_OKEY])
+		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+	if (data[IFLA_GRE_LOCAL])
+		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
+
+	if (data[IFLA_GRE_REMOTE])
+		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
+
+	if (data[IFLA_GRE_TTL])
+		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
+
+	if (data[IFLA_GRE_TOS])
+		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
+
+	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
+		if (t->ignore_df)
+			return -EINVAL;
+		parms->iph.frag_off = htons(IP_DF);
+	}
+
+	if (data[IFLA_GRE_COLLECT_METADATA]) {
+		t->collect_md = true;
+		if (dev->type == ARPHRD_IPGRE)
+			dev->type = ARPHRD_NONE;
+	}
+
+	if (data[IFLA_GRE_IGNORE_DF]) {
+		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
+		  && (parms->iph.frag_off & htons(IP_DF)))
+			return -EINVAL;
+		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
+	}
+
+	if (data[IFLA_GRE_ERSPAN_INDEX]) {
+		t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+		if (t->index & ~INDEX_MASK)
+			return -EINVAL;
+	}
+
+	return 0;
 }
 
 static int gre_tap_init(struct net_device *dev)
@@ -462,6 +883,87 @@ static netdev_tx_t gre_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
+static inline u8 tos_to_cos(u8 tos)
+{
+	u8 dscp, cos;
+
+	dscp = tos >> 2;
+	cos = dscp >> 3;
+	return cos;
+}
+
+static void erspan_build_header(struct sk_buff *skb,
+				__be32 id, u32 index, bool truncate)
+{
+	struct iphdr *iphdr = ip_hdr(skb);
+	struct ethhdr *eth = eth_hdr(skb);
+	enum erspan_encap_type enc_type;
+	struct erspanhdr *ershdr;
+	struct qtag_prefix {
+		__be16 eth_type;
+		__be16 tci;
+	} *qp;
+	u16 vlan_tci = 0;
+
+	enc_type = ERSPAN_ENCAP_NOVLAN;
+
+	/* If mirrored packet has vlan tag, extract tci and
+	 *  perserve vlan header in the mirrored frame.
+	 */
+	if (eth->h_proto == htons(ETH_P_8021Q)) {
+		qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+		vlan_tci = ntohs(qp->tci);
+		enc_type = ERSPAN_ENCAP_INFRAME;
+	}
+
+	skb_push(skb, sizeof(*ershdr));
+	ershdr = (struct erspanhdr *)skb->data;
+	memset(ershdr, 0, sizeof(*ershdr));
+
+	ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
+				 (ERSPAN_VERSION << VER_OFFSET));
+	ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
+			   ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
+			   (enc_type << EN_OFFSET & EN_MASK) |
+			   ((truncate << T_OFFSET) & T_MASK));
+	ershdr->md.index = htonl(index & INDEX_MASK);
+}
+
+static netdev_tx_t erspan_xmit(struct sk_buff *skb,
+			       struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	bool truncate = false;
+
+	if (gre_handle_offloads(skb, false))
+		goto free_skb;
+
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto free_skb;
+
+	if (skb->len > dev->mtu) {
+		pskb_trim(skb, dev->mtu);
+		truncate = true;
+	}
+
+	/* Push ERSPAN header */
+	erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+	tunnel->parms.o_flags &= ~TUNNEL_KEY;
+	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
+	return NETDEV_TX_OK;
+
+free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
+static netdev_tx_t __erspan_fb_xmit(struct sk_buff *skb)
+{
+	erspan_fb_xmit(skb, skb->dev, skb->protocol);
+	return NETDEV_TX_OK;
+}
+
 int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -481,9 +983,140 @@ int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(ovs_gre_fill_metadata_dst);
 
+static int erspan_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	int t_hlen;
+
+	tunnel->tun_hlen = 8;
+	tunnel->parms.iph.protocol = IPPROTO_GRE;
+	t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr);
+
+	dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+	dev->mtu = ETH_DATA_LEN - t_hlen - 4;
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
+	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
+
+	return ip_tunnel_init(dev);
+}
+
+static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
+			unsigned short type,
+			const void *daddr, const void *saddr, unsigned int len)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct iphdr *iph;
+	struct gre_base_hdr *greh;
+
+	iph = (struct iphdr *)__skb_push(skb, t->hlen + sizeof(*iph));
+	greh = (struct gre_base_hdr *)(iph+1);
+	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
+	greh->protocol = htons(type);
+
+	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
+
+	/* Set the source hardware address. */
+	if (saddr)
+		memcpy(&iph->saddr, saddr, 4);
+	if (daddr)
+		memcpy(&iph->daddr, daddr, 4);
+	if (iph->daddr)
+		return t->hlen + sizeof(*iph);
+
+	return -(t->hlen + sizeof(*iph));
+}
+
+static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+{
+	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
+	memcpy(haddr, &iph->saddr, 4);
+	return 4;
+}
+
+static const struct header_ops ipgre_header_ops = {
+	.create	= ipgre_header,
+	.parse	= ipgre_header_parse,
+};
+
+static int ipgre_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct iphdr *iph = &tunnel->parms.iph;
+
+	__gre_tunnel_init(dev);
+
+	memcpy(dev->dev_addr, &iph->saddr, 4);
+	memcpy(dev->broadcast, &iph->daddr, 4);
+
+	dev->flags		= IFF_NOARP;
+	netif_keep_dst(dev);
+	dev->addr_len		= 4;
+
+	if (!tunnel->collect_md) {
+		dev->header_ops = &ipgre_header_ops;
+	}
+
+	return ip_tunnel_init(dev);
+}
+
+static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
+			      struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	const struct iphdr *tnl_params;
+
+	if (tunnel->collect_md) {
+		gre_fb_xmit(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (dev->header_ops) {
+		/* Need space for new headers */
+		if (skb_cow_head(skb, dev->needed_headroom -
+				      (tunnel->hlen + sizeof(struct iphdr))))
+			goto free_skb;
+
+		tnl_params = (const struct iphdr *)skb->data;
+
+		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
+		 * to gre header.
+		 */
+		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+		skb_reset_mac_header(skb);
+	} else {
+		if (skb_cow_head(skb, dev->needed_headroom))
+			goto free_skb;
+
+		tnl_params = &tunnel->parms.iph;
+	}
+
+	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+		goto free_skb;
+
+	__gre_xmit(skb, dev, tnl_params, skb->protocol);
+	return NETDEV_TX_OK;
+
+free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops ipgre_netdev_ops = {
+	.ndo_init		= ipgre_tunnel_init,
+	.ndo_uninit		= rpl_ip_tunnel_uninit,
+	.ndo_start_xmit		= ipgre_xmit,
+	.ndo_change_mtu		= ip_tunnel_change_mtu,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+#ifdef HAVE_GET_LINK_NET
+	.ndo_get_iflink		= ip_tunnel_get_iflink,
+#endif
+};
+
 static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_init		= gre_tap_init,
-	.ndo_uninit		= ip_tunnel_uninit,
+	.ndo_uninit		= rpl_ip_tunnel_uninit,
 	.ndo_start_xmit		= gre_dev_xmit,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -492,15 +1125,38 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 #else
 	.ndo_change_mtu		= ip_tunnel_change_mtu,
 #endif
-	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_get_stats64	= rpl_ip_tunnel_get_stats64,
 #ifdef HAVE_NDO_GET_IFLINK
-	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_get_iflink		= rpl_ip_tunnel_get_iflink,
 #endif
 #ifdef HAVE_NDO_FILL_METADATA_DST
 	.ndo_fill_metadata_dst  = gre_fill_metadata_dst,
 #endif
 };
 
+static const struct net_device_ops erspan_netdev_ops = {
+	.ndo_init		= erspan_tunnel_init,
+	.ndo_uninit		= rpl_ip_tunnel_uninit,
+	.ndo_start_xmit		= erspan_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= ip_tunnel_change_mtu,
+	.ndo_get_stats64	= rpl_ip_tunnel_get_stats64,
+#ifdef HAVE_NDO_GET_IFLINK
+	.ndo_get_iflink		= rpl_ip_tunnel_get_iflink,
+#endif
+#ifdef HAVE_NDO_FILL_METADATA_DST
+	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
+#endif
+};
+
+static void ipgre_tunnel_setup(struct net_device *dev)
+{
+	dev->netdev_ops		= &ipgre_netdev_ops;
+	dev->type		= ARPHRD_IPGRE;
+	ip_tunnel_setup(dev, ipgre_net_id);
+}
+
 static void ipgre_tap_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -509,6 +1165,16 @@ static void ipgre_tap_setup(struct net_device *dev)
 	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
+static void erspan_setup(struct net_device *dev)
+{
+	eth_hw_addr_random(dev);
+	ether_setup(dev);
+	dev->netdev_ops = &erspan_netdev_ops;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	ip_tunnel_setup(dev, erspan_net_id);
+}
+
 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
@@ -554,6 +1220,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
 		nla_total_size(2) +
 		/* IFLA_GRE_COLLECT_METADATA */
 		nla_total_size(0) +
+		/* IFLA_GRE_ERSPAN_INDEX */
+		nla_total_size(4) +
 		0;
 }
 
@@ -575,13 +1243,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		       !!(p->iph.frag_off & htons(IP_DF))))
 		goto nla_put_failure;
 
+	if (t->index)
+		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+			goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
+static const struct nla_policy ipgre_policy[RPL_IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
@@ -592,11 +1264,28 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
+	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
+	.kind		= "gre",
+	.maxtype	= RPL_IFLA_GRE_MAX,
+	.policy		= ipgre_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= ipgre_tunnel_setup,
+	.validate	= ipgre_tunnel_validate,
+	.newlink	= ipgre_newlink,
+	.dellink	= ip_tunnel_dellink,
+	.get_size	= ipgre_get_size,
+	.fill_info	= ipgre_fill_info,
+#ifdef HAVE_GET_LINK_NET
+	.get_link_net	= ip_tunnel_get_link_net,
+#endif
 };
 
 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 	.kind		= "ovs_gretap",
-	.maxtype	= IFLA_GRE_MAX,
+	.maxtype	= RPL_IFLA_GRE_MAX,
 	.policy		= ipgre_policy,
 	.priv_size	= sizeof(struct ip_tunnel),
 	.setup		= ipgre_tap_setup,
@@ -610,6 +1299,22 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 #endif
 };
 
+static struct rtnl_link_ops erspan_link_ops __read_mostly = {
+	.kind		= "erspan",
+	.maxtype	= RPL_IFLA_GRE_MAX,
+	.policy		= ipgre_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= erspan_setup,
+	.validate	= erspan_validate,
+	.newlink	= ipgre_newlink,
+	.dellink	= ip_tunnel_dellink,
+	.get_size	= ipgre_get_size,
+	.fill_info	= ipgre_fill_info,
+#ifdef HAVE_GET_LINK_NET
+	.get_link_net	= ip_tunnel_get_link_net,
+#endif
+};
+
 struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
 					u8 name_assign_type)
 {
@@ -650,6 +1355,26 @@ out:
 }
 EXPORT_SYMBOL_GPL(rpl_gretap_fb_dev_create);
 
+static int __net_init erspan_init_net(struct net *net)
+{
+	return ip_tunnel_init_net(net, erspan_net_id,
+				  &erspan_link_ops, NULL);
+}
+
+static void __net_exit erspan_exit_net(struct net *net)
+{
+	struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
+
+	ip_tunnel_delete_net(itn, &erspan_link_ops);
+}
+
+static struct pernet_operations erspan_net_ops = {
+	.init = erspan_init_net,
+	.exit = erspan_exit_net,
+	.id   = &erspan_net_id,
+	.size = sizeof(struct ip_tunnel_net),
+};
+
 static int __net_init ipgre_tap_init_net(struct net *net)
 {
 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
@@ -669,6 +1394,158 @@ static struct pernet_operations ipgre_tap_net_ops = {
 	.size = sizeof(struct ip_tunnel_net),
 };
 
+static struct net_device *erspan_fb_dev_create(struct net *net,
+					       const char *name,
+					       u8 name_assign_type)
+{
+	struct nlattr *tb[IFLA_MAX + 1];
+	struct net_device *dev;
+	LIST_HEAD(list_kill);
+	struct ip_tunnel *t;
+	int err;
+
+	memset(&tb, 0, sizeof(tb));
+
+	dev = rtnl_create_link(net, (char *)name, name_assign_type,
+			       &erspan_link_ops, tb);
+	if (IS_ERR(dev))
+		return dev;
+
+	t = netdev_priv(dev);
+	t->collect_md = true;
+	/* Configure flow based GRE device. */
+	err = ipgre_newlink(net, dev, tb, NULL);
+	if (err < 0) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
+
+	/* openvswitch users expect packet sizes to be unrestricted,
+	 * so set the largest MTU we can.
+	 */
+	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+	if (err)
+		goto out;
+
+	return dev;
+out:
+	ip_tunnel_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
+	return ERR_PTR(err);
+}
+
+static struct vport_ops ovs_erspan_vport_ops;
+
+static struct vport *erspan_tnl_create(const struct vport_parms *parms)
+{
+	struct net *net = ovs_dp_get_net(parms->dp);
+	struct net_device *dev;
+	struct vport *vport;
+	int err;
+
+	vport = ovs_vport_alloc(0, &ovs_erspan_vport_ops, parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	rtnl_lock();
+	dev = erspan_fb_dev_create(net, parms->name, NET_NAME_USER);
+	if (IS_ERR(dev)) {
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_CAST(dev);
+	}
+
+	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	if (err < 0) {
+		rtnl_delete_link(dev);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_PTR(err);
+	}
+
+	rtnl_unlock();
+	return vport;
+}
+
+static struct vport *erspan_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+
+	vport = erspan_tnl_create(parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	return ovs_netdev_link(vport, parms->name);
+}
+
+#ifndef OVS_VPORT_TYPE_ERSPAN
+/* Until integration is done... */
+#define OVS_VPORT_TYPE_ERSPAN  107  /* ERSPAN tunnel. */
+#endif
+static struct vport_ops ovs_erspan_vport_ops = {
+	.type		= OVS_VPORT_TYPE_ERSPAN,
+	.create		= erspan_create,
+	.send		= __erspan_fb_xmit,
+#ifndef USE_UPSTREAM_TUNNEL
+	.fill_metadata_dst = gre_fill_metadata_dst,
+#endif
+	.destroy	= ovs_netdev_tunnel_destroy,
+};
+
+static struct vport_ops ovs_ipgre_vport_ops;
+
+static struct vport *ipgre_tnl_create(const struct vport_parms *parms)
+{
+	struct net *net = ovs_dp_get_net(parms->dp);
+	struct net_device *dev;
+	struct vport *vport;
+	int err;
+
+	vport = ovs_vport_alloc(0, &ovs_ipgre_vport_ops, parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	rtnl_lock();
+	dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
+	if (IS_ERR(dev)) {
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_CAST(dev);
+	}
+
+	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	if (err < 0) {
+		rtnl_delete_link(dev);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_PTR(err);
+	}
+
+	rtnl_unlock();
+	return vport;
+}
+
+static struct vport *ipgre_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+
+	vport = ipgre_tnl_create(parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	return ovs_netdev_link(vport, parms->name);
+}
+
+static struct vport_ops ovs_ipgre_vport_ops = {
+	.type		= OVS_VPORT_TYPE_GRE,
+	.create		= ipgre_create,
+	.send		= gre_fb_xmit,
+#ifndef USE_UPSTREAM_TUNNEL
+	.fill_metadata_dst = gre_fill_metadata_dst,
+#endif
+	.destroy	= ovs_netdev_tunnel_destroy,
+};
+
 int rpl_ipgre_init(void)
 {
 	int err;
@@ -677,22 +1554,31 @@ int rpl_ipgre_init(void)
 	if (err < 0)
 		goto pnet_tap_faied;
 
+	err = register_pernet_device(&erspan_net_ops);
+	if (err < 0)
+		goto pnet_erspan_failed;
+
+	err = register_pernet_device(&ipgre_net_ops);
+	if (err < 0)
+		goto pnet_ipgre_failed;
+
 	err = gre_cisco_register(&ipgre_protocol);
 	if (err < 0) {
 		pr_info("%s: can't add protocol\n", __func__);
 		goto add_proto_failed;
 	}
 
-	err = rtnl_link_register(&ipgre_tap_ops);
-	if (err < 0)
-		goto tap_ops_failed;
-
 	pr_info("GRE over IPv4 tunneling driver\n");
+	
+	ovs_vport_ops_register(&ovs_ipgre_vport_ops);
+	ovs_vport_ops_register(&ovs_erspan_vport_ops);
 	return 0;
 
-tap_ops_failed:
-	gre_cisco_unregister(&ipgre_protocol);
 add_proto_failed:
+	unregister_pernet_device(&ipgre_net_ops);
+pnet_ipgre_failed:
+	unregister_pernet_device(&erspan_net_ops);
+pnet_erspan_failed:
 	unregister_pernet_device(&ipgre_tap_net_ops);
 pnet_tap_faied:
 	pr_err("Error while initializing GRE %d\n", err);
@@ -701,8 +1587,11 @@ pnet_tap_faied:
 
 void rpl_ipgre_fini(void)
 {
-	rtnl_link_unregister(&ipgre_tap_ops);
+	ovs_vport_ops_unregister(&ovs_erspan_vport_ops);
+	ovs_vport_ops_unregister(&ovs_ipgre_vport_ops);
 	gre_cisco_unregister(&ipgre_protocol);
+	unregister_pernet_device(&ipgre_net_ops);
+	unregister_pernet_device(&erspan_net_ops);
 	unregister_pernet_device(&ipgre_tap_net_ops);
 }
 
diff --git a/datapath/linux/compat/ip_tunnel.c b/datapath/linux/compat/ip_tunnel.c
index ebd1544..58870bc 100644
--- a/datapath/linux/compat/ip_tunnel.c
+++ b/datapath/linux/compat/ip_tunnel.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Nicira, Inc.
+ * Copyright (c) 2013,2018 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/kconfig.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/skbuff.h>
@@ -52,7 +51,6 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
-#include <net/udp.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -63,18 +61,107 @@
 #include "compat.h"
 
 #ifndef USE_UPSTREAM_TUNNEL
+const struct ip_tunnel_encap_ops __rcu *
+		rpl_iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
+
+static unsigned int rpl_ip_tunnel_hash(__be32 key, __be32 remote)
+{
+	return hash_32((__force u32)key ^ (__force u32)remote,
+			 IP_TNL_HASH_BITS);
+}
+
+static bool rpl_ip_tunnel_key_match(const struct ip_tunnel_parm *p,
+				    __be16 flags, __be32 key)
+{
+	if (p->i_flags & TUNNEL_KEY) {
+		if (flags & TUNNEL_KEY)
+			return key == p->i_key;
+		else
+			/* key expected, none present */
+			return false;
+	} else
+		return !(flags & TUNNEL_KEY);
+}
+
+static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
+				    struct ip_tunnel_parm *parms)
+{
+	unsigned int h;
+	__be32 remote;
+	__be32 i_key = parms->i_key;
+
+	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
+		remote = parms->iph.daddr;
+	else
+		remote = 0;
+
+	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
+		i_key = 0;
+
+	h = rpl_ip_tunnel_hash(i_key, remote);
+	return &itn->tunnels[h];
+}
+
 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 {
+	struct hlist_head *head = ip_bucket(itn, &t->parms);
+
 	if (t->collect_md)
 		rcu_assign_pointer(itn->collect_md_tun, t);
-	else
-		WARN_ONCE(1, "%s: collect md not set\n", t->dev->name);
+	hlist_add_head_rcu(&t->hash_node, head);
 }
 
 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 {
 	if (t->collect_md)
 		rcu_assign_pointer(itn->collect_md_tun, NULL);
+	hlist_del_init_rcu(&t->hash_node);
+}
+
+static struct net_device *__ip_tunnel_create(struct net *net,
+					     const struct rtnl_link_ops *ops,
+					     struct ip_tunnel_parm *parms)
+{
+	int err;
+	struct ip_tunnel *tunnel;
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+
+	if (parms->name[0])
+		strlcpy(name, parms->name, IFNAMSIZ);
+	else {
+		if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
+			err = -E2BIG;
+			goto failed;
+		}
+		strlcpy(name, ops->kind, IFNAMSIZ);
+		strncat(name, "%d", 2);
+	}
+
+	ASSERT_RTNL();
+	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
+	if (!dev) {
+		err = -ENOMEM;
+		goto failed;
+	}
+	dev_net_set(dev, net);
+
+	dev->rtnl_link_ops = ops;
+
+	tunnel = netdev_priv(dev);
+	tunnel->parms = *parms;
+	tunnel->net = net;
+
+	err = register_netdevice(dev);
+	if (err)
+		goto failed_free;
+
+	return dev;
+
+failed_free:
+	free_netdev(dev);
+failed:
+	return ERR_PTR(err);
 }
 
 static inline void init_tunnel_flow(struct flowi4 *fl4,
@@ -118,6 +205,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
 		}
 		if (dev->type != ARPHRD_ETHER)
 			dev->flags |= IFF_POINTOPOINT;
+
+		dst_cache_reset(&tunnel->dst_cache);
 	}
 
 	if (!tdev && tunnel->parms.link)
@@ -162,6 +251,222 @@ int rpl_ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 	return rpl___ip_tunnel_change_mtu(dev, new_mtu, true);
 }
 
+static int rpl_tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+			       struct rtable *rt, __be16 df,
+			       const struct iphdr *inner_iph)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+	int mtu;
+
+	if (df)
+		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
+					- sizeof(struct iphdr) - tunnel->hlen;
+	else
+		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (!skb_is_gso(skb) &&
+		    (inner_iph->frag_off & htons(IP_DF)) &&
+		    mtu < pkt_size) {
+			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+			return -E2BIG;
+		}
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+
+		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
+			   mtu >= IPV6_MIN_MTU) {
+			if ((tunnel->parms.iph.daddr &&
+			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+			    rt6->rt6i_dst.plen == 128) {
+				rt6->rt6i_flags |= RTF_MODIFIED;
+				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
+			}
+		}
+
+		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
+					mtu < pkt_size) {
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			return -E2BIG;
+		}
+	}
+#endif
+	return 0;
+}
+
+void rpl_ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+		        const struct iphdr *tnl_params, const u8 protocol)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	const struct iphdr *inner_iph;
+	struct flowi4 fl4;
+	u8     tos, ttl;
+	__be16 df;
+	struct rtable *rt;		/* Route to the other host */
+	unsigned int max_headroom;	/* The extra header space needed */
+	__be32 dst;
+	bool connected;
+
+	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+	connected = (tunnel->parms.iph.daddr != 0);
+
+	dst = tnl_params->daddr;
+	if (dst == 0) {
+		/* NBMA tunnel */
+
+		if (skb_dst(skb) == NULL) {
+			dev->stats.tx_fifo_errors++;
+			goto tx_error;
+		}
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			rt = skb_rtable(skb);
+			dst = rt_nexthop(rt, inner_iph->daddr);
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+		else if (skb->protocol == htons(ETH_P_IPV6)) {
+			const struct in6_addr *addr6;
+			struct neighbour *neigh;
+			bool do_tx_error_icmp;
+			int addr_type;
+
+			neigh = dst_neigh_lookup(skb_dst(skb),
+						 &ipv6_hdr(skb)->daddr);
+			if (neigh == NULL)
+				goto tx_error;
+
+			addr6 = (const struct in6_addr *)&neigh->primary_key;
+			addr_type = ipv6_addr_type(addr6);
+
+			if (addr_type == IPV6_ADDR_ANY) {
+				addr6 = &ipv6_hdr(skb)->daddr;
+				addr_type = ipv6_addr_type(addr6);
+			}
+
+			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+				do_tx_error_icmp = true;
+			else {
+				do_tx_error_icmp = false;
+				dst = addr6->s6_addr32[3];
+			}
+			neigh_release(neigh);
+			if (do_tx_error_icmp)
+				goto tx_error_icmp;
+		}
+#endif
+		else
+			goto tx_error;
+
+		connected = false;
+	}
+
+	tos = tnl_params->tos;
+	if (tos & 0x1) {
+		tos &= ~0x1;
+		if (skb->protocol == htons(ETH_P_IP)) {
+			tos = inner_iph->tos;
+			connected = false;
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
+			connected = false;
+		}
+	}
+
+	init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+			 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
+
+	if (ovs_ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
+		goto tx_error;
+
+	rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
+			 NULL;
+
+	if (!rt) {
+		rt = ip_route_output_key(tunnel->net, &fl4);
+
+		if (IS_ERR(rt)) {
+			dev->stats.tx_carrier_errors++;
+			goto tx_error;
+		}
+		if (connected)
+			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+					  fl4.saddr);
+	}
+
+	if (rt->dst.dev == dev) {
+		ip_rt_put(rt);
+		dev->stats.collisions++;
+		goto tx_error;
+	}
+
+	if (rpl_tnl_update_pmtu(dev, skb, rt,
+				tnl_params->frag_off, inner_iph)) {
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+
+	if (tunnel->err_count > 0) {
+		if (time_before(jiffies,
+				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
+			tunnel->err_count--;
+
+			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
+	ttl = tnl_params->ttl;
+	if (ttl == 0) {
+		if (skb->protocol == htons(ETH_P_IP))
+			ttl = inner_iph->ttl;
+#if IS_ENABLED(CONFIG_IPV6)
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
+#endif
+		else
+			ttl = ip4_dst_hoplimit(&rt->dst);
+	}
+
+	df = tnl_params->frag_off;
+	if (skb->protocol == htons(ETH_P_IP))
+		df |= (inner_iph->frag_off&htons(IP_DF));
+
+	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+			+ rt->dst.header_len;
+	if (max_headroom > dev->needed_headroom)
+		dev->needed_headroom = max_headroom;
+
+	if (skb_cow_head(skb, dev->needed_headroom)) {
+		ip_rt_put(rt);
+		dev->stats.tx_dropped++;
+		kfree_skb(skb);
+		return;
+	}
+
+	iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
+		      tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+
+	return;
+
+#if IS_ENABLED(CONFIG_IPV6)
+tx_error_icmp:
+	dst_link_failure(skb);
+#endif
+tx_error:
+	dev->stats.tx_errors++;
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(rpl_ip_tunnel_xmit);
+
 static void ip_tunnel_dev_free(struct net_device *dev)
 {
 	free_percpu(dev->tstats);
@@ -183,24 +488,63 @@ int rpl_ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 				  struct rtnl_link_ops *ops, char *devname)
 {
 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
+	struct ip_tunnel_parm parms;
+	unsigned int i;
 
-	itn->collect_md_tun = NULL;
-	itn->rtnl_ops = ops;
-	return 0;
+	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&itn->tunnels[i]);
+
+	if (!ops) {
+		itn->fb_tunnel_dev = NULL;
+		return 0;
+	}
+
+	memset(&parms, 0, sizeof(parms));
+	if (devname)
+		strlcpy(parms.name, devname, IFNAMSIZ);
+
+	rtnl_lock();
+	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
+	/* FB netdevice is special: we have one, and only one per netns.
+ * 	 * Allowing to move it to another netns is clearly unsafe.
+ * 	 	 */
+	if (!IS_ERR(itn->fb_tunnel_dev)) {
+		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
+		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+	}
+	rtnl_unlock();
+
+	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
 }
 
 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
 			      struct rtnl_link_ops *ops)
 {
-	struct ip_tunnel *t;
-
-	t = rtnl_dereference(itn->collect_md_tun);
-	if (!t)
-		return;
-	unregister_netdevice_queue(t->dev, head);
+	struct net *net = dev_net(itn->fb_tunnel_dev);
+	struct net_device *dev, *aux;
+	int h;
+
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == ops)
+			unregister_netdevice_queue(dev, head);
+
+	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
+		struct ip_tunnel *t;
+		struct hlist_node *n;
+		struct hlist_head *thead = &itn->tunnels[h];
+
+		hlist_for_each_entry_safe(t, n, thead, hash_node)
+			/* If dev is in the same netns, it has already
+			 * been added to the list by the previous loop.
+ 			 */
+			if (!net_eq(dev_net(t->dev), net))
+				unregister_netdevice_queue(t->dev, head);
+	}
 }
 
-void rpl_ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
+void rpl_ip_tunnel_delete_net(struct ip_tunnel_net *itn,
+			      struct rtnl_link_ops *ops)
 {
 	LIST_HEAD(list);
 
@@ -251,20 +595,41 @@ int rpl_ip_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
+	int err;
 
-	dev->destructor	= ip_tunnel_dev_free;
-	dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+#ifndef HAVE_NEEDS_FREE_NETDEV
+	dev->destructor = ip_tunnel_dev_free;
+#else
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip_tunnel_dev_free;
+#endif
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;
+
+	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+	if (err) {
+		free_percpu(dev->tstats);
+		return err;
+	}
+
+	err = gro_cells_init(&tunnel->gro_cells, dev);
+	if (err) {
+		dst_cache_destroy(&tunnel->dst_cache);
+		free_percpu(dev->tstats);
+		return err;
+	}
+
 	tunnel->dev = dev;
 	tunnel->net = dev_net(dev);
 	strcpy(tunnel->parms.name, dev->name);
 	iph->version		= 4;
 	iph->ihl		= 5;
 
-	if (tunnel->collect_md)
+	if (tunnel->collect_md) {
 		dev->features |= NETIF_F_NETNS_LOCAL;
-
+		netif_keep_dst(dev);
+	}
 	return 0;
 }
 
@@ -300,4 +665,94 @@ struct net *rpl_ip_tunnel_get_link_net(const struct net_device *dev)
 	return tunnel->net;
 }
 
+struct ip_tunnel *rpl_ip_tunnel_lookup(struct ip_tunnel_net *itn,
+				       int link, __be16 flags,
+				       __be32 remote, __be32 local,
+				       __be32 key)
+{
+	unsigned int hash;
+	struct ip_tunnel *t, *cand = NULL;
+	struct hlist_head *head;
+
+	hash = rpl_ip_tunnel_hash(key, remote);
+	head = &itn->tunnels[hash];
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if (local != t->parms.iph.saddr ||
+		    remote != t->parms.iph.daddr ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
+			continue;
+
+		if (t->parms.link == link)
+			return t;
+		else
+			cand = t;
+	}
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if (remote != t->parms.iph.daddr ||
+		    t->parms.iph.saddr != 0 ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
+			continue;
+
+		if (t->parms.link == link)
+			return t;
+		else if (!cand)
+			cand = t;
+	}
+
+	hash = rpl_ip_tunnel_hash(key, 0);
+	head = &itn->tunnels[hash];
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
+		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
+			continue;
+
+		if (!(t->dev->flags & IFF_UP))
+			continue;
+
+		if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
+			continue;
+
+		if (t->parms.link == link)
+			return t;
+		else if (!cand)
+			cand = t;
+	}
+
+	if (flags & TUNNEL_NO_KEY)
+		goto skip_key_lookup;
+
+	hlist_for_each_entry_rcu(t, head, hash_node) {
+		if (t->parms.i_key != key ||
+		    t->parms.iph.saddr != 0 ||
+		    t->parms.iph.daddr != 0 ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->parms.link == link)
+			return t;
+		else if (!cand)
+			cand = t;
+	}
+
+skip_key_lookup:
+	if (cand)
+		return cand;
+
+	if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
+		return netdev_priv(itn->fb_tunnel_dev);
+
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(rpl_ip_tunnel_lookup);
+
 #endif
diff --git a/datapath/linux/compat/ip_tunnels_core.c b/datapath/linux/compat/ip_tunnels_core.c
index 7ade6c1..90e838a 100644
--- a/datapath/linux/compat/ip_tunnels_core.c
+++ b/datapath/linux/compat/ip_tunnels_core.c
@@ -129,6 +129,47 @@ error:
 }
 EXPORT_SYMBOL_GPL(ovs_iptunnel_handle_offloads);
 
+struct sk_buff *rpl_iptunnel_handle_offloads(struct sk_buff *skb,
+					     bool csum_help,
+					     int gso_type_mask)
+{
+	int err;
+
+	if (likely(!skb->encapsulation)) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
+
+	if (skb_is_gso(skb)) {
+		err = skb_unclone(skb, GFP_ATOMIC);
+		if (unlikely(err))
+			goto error;
+		skb_shinfo(skb)->gso_type |= gso_type_mask;
+		return skb;
+	}
+
+	/* If packet is not gso and we are resolving any partial checksum,
+ 	 * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
+ 	 * on the outer header without confusing devices that implement
+ 	 * NETIF_F_IP_CSUM with encapsulation.
+ 	 */
+	if (csum_help)
+		skb->encapsulation = 0;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			goto error;
+	} else if (skb->ip_summed != CHECKSUM_PARTIAL)
+		skb->ip_summed = CHECKSUM_NONE;
+
+	return skb;
+error:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(rpl_iptunnel_handle_offloads);
+
 int rpl___iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
 			       __be16 inner_proto, bool raw_proto, bool xnet)
 {
-- 
1.8.3.1



More information about the dev mailing list