[ovs-dev] [PATCH 1/3] v7: datapath: Add support for tun_key to Open vSwitch datapath

Pravin B Shelar pshelar at nicira.com
Mon Oct 15 01:41:06 UTC 2012


This patch was posted by Kyle. I fixed few issues found in earlier
version.

V7:
- Fix according to comments posted on V6.
V6:
- Fix more comments addressed from Jesse.
V5:
- Address another round of comments from Jesse.
V4:
- Address 2 comments from Jesse:
  - When processing actions, if OVS_CB(skb)->tun_key is NULL, point it at one
    on the stack temporarily. This goes away when we remove the ability to set
    tun_id outside the scope of tun_key.
  - Move tun_key to the end of struct sw_flow_key.
V3:
- Fix issues found during review by Jesse.
- Add a NEWS entry around tunnel code no longer assuming symmetric input and
  output tunnel keys.

V2:
- Fix blank line addition/removal found by Simon.
- Fix hex printing output found by Simon.

--8<--------------------------cut here-------------------------->8--

This is a first pass at providing a tun_key which can be
used as the basis for flow-based tunnelling. The
tun_key includes and replaces the tun_id in both struct
ovs_skb_cb and struct sw_tun_key.

This patch allows all existing tun_id behaviour to still work. Existing
users of tun_id are redirected to tun_key->tun_id to retain compatibility.
However, when the userspace code is updated to make use of the new tun_key,
the old behaviour will be deprecated and removed.

NOTE: With these changes, the tunneling code no longer assumes input and
output keys are symmetric.  If they are not, PMTUD needs to be disabled
for tunneling to work.

Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
CC: Kyle Mestery <kmestery at cisco.com>
Cc: Simon Horman <horms at verge.net.au>
Cc: Jesse Gross <jesse at nicira.com>
---
 NEWS                        |    3 +
 datapath/actions.c          |   36 +++++--
 datapath/datapath.c         |    9 +-
 datapath/datapath.h         |    5 +-
 datapath/flow.c             |   86 +++++++++++++++--
 datapath/flow.h             |   12 ++-
 datapath/tunnel.c           |  217 ++++++++++++++++++++++++++-----------------
 datapath/tunnel.h           |   26 ++++--
 datapath/vport-capwap.c     |   92 ++++++++++++++----
 datapath/vport-gre.c        |  128 +++++++++++++++++++------
 datapath/vport.c            |    2 +-
 include/linux/openvswitch.h |   18 +++-
 lib/dpif-netdev.c           |    1 +
 lib/odp-util.c              |   15 ++-
 lib/odp-util.h              |    3 +-
 15 files changed, 487 insertions(+), 166 deletions(-)

diff --git a/NEWS b/NEWS
index d841cb3..5ab6a6a 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,8 @@
 post-v1.8.0
 ------------------------
+    - The tunneling code no longer assumes input and output keys are symmetric.
+      If they are not, PMTUD needs to be disabled for tunneling to work. Note
+      this only applies to flow-based keys.
     - FreeBSD is now a supported platform, thanks to code contributions from
       Gaetano Catalli, Ed Maste, and Giuseppe Lettieri.
     - ovs-bugtool: New --ovs option to report only OVS related information.
diff --git a/datapath/actions.c b/datapath/actions.c
index ec9b595..db85642 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -37,7 +37,8 @@
 #include "vport.h"
 
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
-			const struct nlattr *attr, int len, bool keep_skb);
+			      const struct nlattr *attr, int len,
+			      struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb);
 
 static int make_writable(struct sk_buff *skb, int write_len)
 {
@@ -329,11 +330,14 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 	}
 
 	return do_execute_actions(dp, skb, nla_data(acts_list),
-						 nla_len(acts_list), true);
+				  nla_len(acts_list),
+				  OVS_CB(skb)->tun_key,
+				  true);
 }
 
 static int execute_set_action(struct sk_buff *skb,
-				 const struct nlattr *nested_attr)
+				 const struct nlattr *nested_attr,
+				 struct ovs_key_ipv4_tunnel *tun_key)
 {
 	int err = 0;
 
@@ -343,7 +347,21 @@ static int execute_set_action(struct sk_buff *skb,
 		break;
 
 	case OVS_KEY_ATTR_TUN_ID:
-		OVS_CB(skb)->tun_id = nla_get_be64(nested_attr);
+		if (!OVS_CB(skb)->tun_key) {
+			/* If tun_key is NULL for this skb, assign it to
+			 * a value the caller passed in for action processing
+			 * and output. This can disappear once we drop support
+			 * for setting tun_id outside of tun_key.
+			 */
+			memset(tun_key, 0, sizeof(struct ovs_key_ipv4_tunnel));
+			OVS_CB(skb)->tun_key = tun_key;
+		}
+
+		OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr);
+		break;
+
+	case OVS_KEY_ATTR_IPV4_TUNNEL:
+		OVS_CB(skb)->tun_key = nla_data(nested_attr);
 		break;
 
 	case OVS_KEY_ATTR_ETHERNET:
@@ -368,7 +386,8 @@ static int execute_set_action(struct sk_buff *skb,
 
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
-			const struct nlattr *attr, int len, bool keep_skb)
+			const struct nlattr *attr, int len,
+			struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb)
 {
 	/* Every output action needs a separate clone of 'skb', but the common
 	 * case is just a single output action, so that doing a clone and
@@ -407,7 +426,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_ACTION_ATTR_SET:
-			err = execute_set_action(skb, nla_data(a));
+			err = execute_set_action(skb, nla_data(a), tun_key);
 			break;
 
 		case OVS_ACTION_ATTR_SAMPLE:
@@ -458,6 +477,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
 	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
 	struct loop_counter *loop;
 	int error;
+	struct ovs_key_ipv4_tunnel tun_key;
 
 	/* Check whether we've looped too much. */
 	loop = &__get_cpu_var(loop_counters);
@@ -469,9 +489,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
 		goto out_loop;
 	}
 
-	OVS_CB(skb)->tun_id = 0;
+	OVS_CB(skb)->tun_key = NULL;
 	error = do_execute_actions(dp, skb, acts->actions,
-					 acts->actions_len, false);
+					 acts->actions_len, &tun_key, false);
 
 	/* Check whether sub-actions looped too much. */
 	if (unlikely(loop->looping))
diff --git a/datapath/datapath.c b/datapath/datapath.c
index a6915fb..3f963be 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -587,12 +587,19 @@ static int validate_set(const struct nlattr *a,
 
 	switch (key_type) {
 	const struct ovs_key_ipv4 *ipv4_key;
+	const struct ovs_key_ipv4_tunnel *tun_key;
 
 	case OVS_KEY_ATTR_PRIORITY:
 	case OVS_KEY_ATTR_TUN_ID:
 	case OVS_KEY_ATTR_ETHERNET:
 		break;
 
+	case OVS_KEY_ATTR_IPV4_TUNNEL:
+		tun_key = nla_data(ovs_key);
+		if (!tun_key->ipv4_dst)
+			return -EINVAL;
+		break;
+
 	case OVS_KEY_ATTR_IPV4:
 		if (flow_key->eth.type != htons(ETH_P_IP))
 			return -EINVAL;
@@ -785,7 +792,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
 	err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
 					     &flow->key.phy.in_port,
-					     &flow->key.phy.tun_id,
+					     &flow->key.tun.tun_key,
 					     a[OVS_PACKET_ATTR_KEY]);
 	if (err)
 		goto err_flow_put;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index affbf0e..c5df12d 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -96,7 +96,8 @@ struct datapath {
 /**
  * struct ovs_skb_cb - OVS data in skb CB
  * @flow: The flow associated with this packet.  May be %NULL if no flow.
- * @tun_id: ID of the tunnel that encapsulated this packet.  It is 0 if the
+ * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
+ * packet is not being tunneled.
  * @ip_summed: Consistently stores L4 checksumming status across different
  * kernel versions.
  * @csum_start: Stores the offset from which to start checksumming independent
@@ -107,7 +108,7 @@ struct datapath {
  */
 struct ovs_skb_cb {
 	struct sw_flow		*flow;
-	__be64			tun_id;
+	struct ovs_key_ipv4_tunnel  *tun_key;
 #ifdef NEED_CSUM_NORMALIZE
 	enum csum_type		ip_summed;
 	u16			csum_start;
diff --git a/datapath/flow.c b/datapath/flow.c
index d07337c..bec4ebf 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -629,7 +629,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 	memset(key, 0, sizeof(*key));
 
 	key->phy.priority = skb->priority;
-	key->phy.tun_id = OVS_CB(skb)->tun_id;
+	if (OVS_CB(skb)->tun_key)
+		key->tun.tun_key = *OVS_CB(skb)->tun_key;
 	key->phy.in_port = in_port;
 
 	skb_reset_mac_header(skb);
@@ -847,6 +848,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 
 	/* Not upstream. */
 	[OVS_KEY_ATTR_TUN_ID] = sizeof(__be64),
+	[OVS_KEY_ATTR_IPV4_TUNNEL] = sizeof(struct ovs_key_ipv4_tunnel),
 };
 
 static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
@@ -1022,9 +1024,39 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		swkey->phy.in_port = DP_MAX_PORTS;
 	}
 
-	if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) {
-		swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+	if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) &&
+	    attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) {
+		struct ovs_key_ipv4_tunnel *tun_key;
+		__be64 tun_id;
+
+		tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]);
+
+		if (!tun_key->ipv4_dst)
+			return -EINVAL;
+		if (!(tun_key->tun_flags & FLOW_TNL_F_KEY))
+			return -EINVAL;
+
+		tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+		if (tun_id != tun_key->tun_id)
+			return -EINVAL;
+
+		swkey->tun.tun_key = *tun_key;
+		attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
+		attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL);
+	} else if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) {
+		swkey->tun.tun_key.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+		swkey->tun.tun_key.tun_flags |= FLOW_TNL_F_KEY;
+
 		attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
+	} else if (attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) {
+		struct ovs_key_ipv4_tunnel *tun_key;
+		tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]);
+
+		if (!tun_key->ipv4_dst)
+			return -EINVAL;
+
+		swkey->tun.tun_key = *tun_key;
+		attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL);
 	}
 
 	/* Data attributes. */
@@ -1162,14 +1194,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
  * get the metadata, that is, the parts of the flow key that cannot be
  * extracted from the packet itself.
  */
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+				   struct ovs_key_ipv4_tunnel *tun_key,
 				   const struct nlattr *attr)
 {
 	const struct nlattr *nla;
 	int rem;
+	__be64 tun_id;
 
 	*in_port = DP_MAX_PORTS;
-	*tun_id = 0;
+	memset(tun_key, 0, sizeof(*tun_key));
 	*priority = 0;
 
 	nla_for_each_nested(nla, attr, rem) {
@@ -1185,7 +1219,35 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
 				break;
 
 			case OVS_KEY_ATTR_TUN_ID:
-				*tun_id = nla_get_be64(nla);
+				tun_id = nla_get_be64(nla);
+
+				if (tun_key->ipv4_dst) {
+					if (!(tun_key->tun_flags & FLOW_TNL_F_KEY))
+						return -EINVAL;
+					if (tun_key->tun_id != tun_id)
+						return -EINVAL;
+					break;
+				}
+				tun_key->tun_id = tun_id;
+				tun_key->tun_flags |= FLOW_TNL_F_KEY;
+
+				break;
+
+			case OVS_KEY_ATTR_IPV4_TUNNEL:
+				if (tun_key->tun_flags & FLOW_TNL_F_KEY) {
+					tun_id = tun_key->tun_id;
+
+					memcpy(tun_key, nla_data(nla), sizeof(*tun_key));
+					if (!(tun_key->tun_flags & FLOW_TNL_F_KEY))
+						return -EINVAL;
+
+					if (tun_key->tun_id != tun_id)
+						return -EINVAL;
+				} else
+					memcpy(tun_key, nla_data(nla), sizeof(*tun_key));
+
+				if (!tun_key->ipv4_dst)
+					return -EINVAL;
 				break;
 
 			case OVS_KEY_ATTR_IN_PORT:
@@ -1210,8 +1272,16 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 	    nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
 		goto nla_put_failure;
 
-	if (swkey->phy.tun_id != cpu_to_be64(0) &&
-	    nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id))
+	if (swkey->tun.tun_key.ipv4_dst) {
+		struct ovs_key_ipv4_tunnel *tun_key;
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL,
+				  sizeof(*tun_key));
+		if (!nla)
+			goto nla_put_failure;
+		tun_key = nla_data(nla);
+		memcpy(tun_key, &swkey->tun.tun_key, sizeof(*tun_key));
+	} else if ((swkey->tun.tun_key.tun_flags & FLOW_TNL_F_KEY) &&
+	    nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->tun.tun_key.tun_id))
 		goto nla_put_failure;
 
 	if (swkey->phy.in_port != DP_MAX_PORTS &&
diff --git a/datapath/flow.h b/datapath/flow.h
index 02c563a..c52e029 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -42,11 +42,13 @@ struct sw_flow_actions {
 
 struct sw_flow_key {
 	struct {
-		__be64	tun_id;		/* Encapsulating tunnel ID. */
 		u32	priority;	/* Packet QoS priority. */
 		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
 	} phy;
 	struct {
+		struct ovs_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
+	} tun;
+	struct {
 		u8     src[ETH_ALEN];	/* Ethernet source address. */
 		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
 		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
@@ -150,6 +152,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
  *                         ------  ---  ------  -----
  *  OVS_KEY_ATTR_PRIORITY      4    --     4      8
  *  OVS_KEY_ATTR_TUN_ID        8    --     4     12
+ *  OVS_KEY_ATTR_IPV4_TUNNEL  24    --     4     28
  *  OVS_KEY_ATTR_IN_PORT       4    --     4      8
  *  OVS_KEY_ATTR_ETHERNET     12    --     4     16
  *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (outer VLAN ethertype)
@@ -160,14 +163,15 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
  *  OVS_KEY_ATTR_ICMPV6        2     2     4      8
  *  OVS_KEY_ATTR_ND           28    --     4     32
  *  -------------------------------------------------
- *  total                                       156
+ *  total                                       184
  */
-#define FLOW_BUFSIZE 156
+#define FLOW_BUFSIZE 184
 
 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
 int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		      const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+				   struct ovs_key_ipv4_tunnel *tun_key,
 				   const struct nlattr *);
 
 #define MAX_ACTIONS_BUFSIZE	(16 * 1024)
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index d651c11..72ead8f 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -367,9 +367,9 @@ struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
 	return NULL;
 }
 
-static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
+static void ecn_decapsulate(struct sk_buff *skb)
 {
-	if (unlikely(INET_ECN_is_ce(tos))) {
+	if (unlikely(INET_ECN_is_ce(OVS_CB(skb)->tun_key->ipv4_tos))) {
 		__be16 protocol = skb->protocol;
 
 		skb_set_network_header(skb, ETH_HLEN);
@@ -416,7 +416,7 @@ static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
  * - skb->csum does not include the inner Ethernet header.
  * - The layer pointers are undefined.
  */
-void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
+void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb)
 {
 	struct ethhdr *eh;
 
@@ -433,7 +433,7 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
 	skb_clear_rxhash(skb);
 	secpath_reset(skb);
 
-	ecn_decapsulate(skb, tos);
+	ecn_decapsulate(skb);
 	vlan_set_tci(skb, 0);
 
 	if (unlikely(compute_ip_summed(skb, false))) {
@@ -613,7 +613,7 @@ static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
 
 bool ovs_tnl_frag_needed(struct vport *vport,
 			 const struct tnl_mutable_config *mutable,
-			 struct sk_buff *skb, unsigned int mtu, __be64 flow_key)
+			 struct sk_buff *skb, unsigned int mtu)
 {
 	unsigned int eth_hdr_len = ETH_HLEN;
 	unsigned int total_length = 0, header_length = 0, payload_length;
@@ -697,17 +697,6 @@ bool ovs_tnl_frag_needed(struct vport *vport,
 		ipv6_build_icmp(skb, nskb, mtu, payload_length);
 #endif
 
-	/*
-	 * Assume that flow based keys are symmetric with respect to input
-	 * and output and use the key that we were going to put on the
-	 * outgoing packet for the fake received packet.  If the keys are
-	 * not symmetric then PMTUD needs to be disabled since we won't have
-	 * any way of synthesizing packets.
-	 */
-	if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) ==
-	    (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
-		OVS_CB(nskb)->tun_id = flow_key;
-
 	if (unlikely(compute_ip_summed(nskb, false))) {
 		kfree_skb(nskb);
 		return false;
@@ -721,14 +710,26 @@ bool ovs_tnl_frag_needed(struct vport *vport,
 static bool check_mtu(struct sk_buff *skb,
 		      struct vport *vport,
 		      const struct tnl_mutable_config *mutable,
-		      const struct rtable *rt, __be16 *frag_offp)
+		      const struct rtable *rt, __be16 *frag_offp,
+		      int tunnel_hlen)
 {
-	bool df_inherit = mutable->flags & TNL_F_DF_INHERIT;
-	bool pmtud = mutable->flags & TNL_F_PMTUD;
-	__be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
+	bool df_inherit;
+	bool pmtud;
+	__be16 frag_off;
 	int mtu = 0;
 	unsigned int packet_length = skb->len - ETH_HLEN;
 
+	if (OVS_CB(skb)->tun_key->ipv4_dst) {
+		df_inherit = false;
+		pmtud = false;
+		frag_off = OVS_CB(skb)->tun_key->tun_flags & FLOW_TNL_F_DONT_FRAGMENT ?
+				  htons(IP_DF) : 0;
+	} else {
+		df_inherit = mutable->flags & TNL_F_DF_INHERIT;
+		pmtud = mutable->flags & TNL_F_PMTUD;
+		frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
+	}
+
 	/* Allow for one level of tagging in the packet length. */
 	if (!vlan_tx_tag_present(skb) &&
 	    eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
@@ -746,7 +747,7 @@ static bool check_mtu(struct sk_buff *skb,
 
 		mtu = dst_mtu(&rt_dst(rt))
 			- ETH_HLEN
-			- mutable->tunnel_hlen
+			- tunnel_hlen
 			- vlan_header;
 	}
 
@@ -760,8 +761,7 @@ static bool check_mtu(struct sk_buff *skb,
 			mtu = max(mtu, IP_MIN_MTU);
 
 			if (packet_length > mtu &&
-			    ovs_tnl_frag_needed(vport, mutable, skb, mtu,
-						OVS_CB(skb)->tun_id))
+			    ovs_tnl_frag_needed(vport, mutable, skb, mtu))
 				return false;
 		}
 	}
@@ -777,8 +777,7 @@ static bool check_mtu(struct sk_buff *skb,
 			mtu = max(mtu, IPV6_MIN_MTU);
 
 			if (packet_length > mtu &&
-			    ovs_tnl_frag_needed(vport, mutable, skb, mtu,
-						OVS_CB(skb)->tun_id))
+			    ovs_tnl_frag_needed(vport, mutable, skb, mtu))
 				return false;
 		}
 	}
@@ -790,6 +789,7 @@ static bool check_mtu(struct sk_buff *skb,
 
 static void create_tunnel_header(const struct vport *vport,
 				 const struct tnl_mutable_config *mutable,
+				 const struct ovs_key_ipv4_tunnel *tun_key,
 				 const struct rtable *rt, void *header)
 {
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
@@ -806,7 +806,7 @@ static void create_tunnel_header(const struct vport *vport,
 	if (!iph->ttl)
 		iph->ttl = ip4_dst_hoplimit(&rt_dst(rt));
 
-	tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
+	tnl_vport->tnl_ops->build_header(vport, mutable, tun_key, iph + 1);
 }
 
 static void *get_cached_header(const struct tnl_cache *cache)
@@ -907,14 +907,21 @@ static struct tnl_cache *build_cache(struct vport *vport,
 				     struct rtable *rt)
 {
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+	static const struct ovs_key_ipv4_tunnel tun_key;
 	struct tnl_cache *cache;
 	void *cache_data;
 	int cache_len;
 	struct hh_cache *hh;
+	int tunnel_hlen;
 
 	if (!(mutable->flags & TNL_F_HDR_CACHE))
 		return NULL;
 
+	tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, &tun_key) +
+				sizeof(struct iphdr);
+	if (tunnel_hlen < 0)
+		return NULL;
+
 	/*
 	 * If there is no entry in the ARP cache or if this device does not
 	 * support hard header caching just fall back to the IP stack.
@@ -937,7 +944,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
 	else
 		cache = NULL;
 
-	cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + mutable->tunnel_hlen;
+	cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + tunnel_hlen;
 
 	cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) +
 			cache_len, GFP_ATOMIC);
@@ -946,9 +953,9 @@ static struct tnl_cache *build_cache(struct vport *vport,
 
 	create_eth_hdr(cache, hh);
 	cache_data = get_cached_header(cache) + cache->hh_len;
-	cache->len = cache->hh_len + mutable->tunnel_hlen;
+	cache->len = cache->hh_len + tunnel_hlen;
 
-	create_tunnel_header(vport, mutable, rt, cache_data);
+	create_tunnel_header(vport, mutable, &tun_key, rt, cache_data);
 
 	cache->mutable_seq = mutable->seq;
 	cache->rt = rt;
@@ -1000,15 +1007,16 @@ unlock:
 }
 
 static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
-				   u8 ipproto, u8 tos)
+				   __be32 saddr, __be32 daddr, u8 ipproto,
+				   u8 tos)
 {
 	/* Tunnel configuration keeps DSCP part of TOS bits, But Linux
 	 * router expect RT_TOS bits only. */
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
 	struct flowi fl = { .nl_u = { .ip4_u = {
-					.daddr = mutable->key.daddr,
-					.saddr = mutable->key.saddr,
+					.daddr = daddr,
+					.saddr = saddr,
 					.tos   = RT_TOS(tos) } },
 					.proto = ipproto };
 	struct rtable *rt;
@@ -1018,8 +1026,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
 
 	return rt;
 #else
-	struct flowi4 fl = { .daddr = mutable->key.daddr,
-			     .saddr = mutable->key.saddr,
+	struct flowi4 fl = { .daddr = daddr,
+			     .saddr = saddr,
 			     .flowi4_tos = RT_TOS(tos),
 			     .flowi4_proto = ipproto };
 
@@ -1029,7 +1037,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
 
 static struct rtable *find_route(struct vport *vport,
 				 const struct tnl_mutable_config *mutable,
-				 u8 tos, struct tnl_cache **cache)
+				 __be32 saddr, __be32 daddr, u8 tos,
+				 struct tnl_cache **cache)
 {
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
 	struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache);
@@ -1037,17 +1046,17 @@ static struct rtable *find_route(struct vport *vport,
 	*cache = NULL;
 	tos = RT_TOS(tos);
 
-	if (likely(tos == RT_TOS(mutable->tos) &&
-	    check_cache_valid(cur_cache, mutable))) {
+	if (tos == RT_TOS(mutable->tos) &&
+	    check_cache_valid(cur_cache, mutable)) {
 		*cache = cur_cache;
 		return cur_cache->rt;
 	} else {
 		struct rtable *rt;
 
-		rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos);
+		rt = __find_route(mutable, saddr, daddr,
+				  tnl_vport->tnl_ops->ipproto, tos);
 		if (IS_ERR(rt))
 			return NULL;
-
 		if (likely(tos == RT_TOS(mutable->tos)))
 			*cache = build_cache(vport, mutable, rt);
 
@@ -1076,13 +1085,14 @@ static bool need_linearize(const struct sk_buff *skb)
 
 static struct sk_buff *handle_offloads(struct sk_buff *skb,
 				       const struct tnl_mutable_config *mutable,
-				       const struct rtable *rt)
+				       const struct rtable *rt,
+				       int tunnel_hlen)
 {
 	int min_headroom;
 	int err;
 
 	min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-			+ mutable->tunnel_hlen
+			+ tunnel_hlen
 			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
 
 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@@ -1137,14 +1147,14 @@ error:
 }
 
 static int send_frags(struct sk_buff *skb,
-		      const struct tnl_mutable_config *mutable)
+		      int tunnel_hlen)
 {
 	int sent_len;
 
 	sent_len = 0;
 	while (skb) {
 		struct sk_buff *next = skb->next;
-		int frag_len = skb->len - mutable->tunnel_hlen;
+		int frag_len = skb->len - tunnel_hlen;
 		int err;
 
 		skb->next = NULL;
@@ -1173,15 +1183,17 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
 	const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
-
 	enum vport_err_type err = VPORT_E_TX_ERROR;
 	struct rtable *rt;
 	struct dst_entry *unattached_dst = NULL;
 	struct tnl_cache *cache;
+	struct ovs_key_ipv4_tunnel tun_key;
 	int sent_len = 0;
+	int tunnel_hlen;
 	__be16 frag_off = 0;
+	__be32 daddr;
+	__be32 saddr;
 	u8 ttl;
-	u8 inner_tos;
 	u8 tos;
 
 	/* Validate the protocol headers before we try to use them. */
@@ -1207,30 +1219,76 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 #endif
 
-	/* ToS */
-	if (skb->protocol == htons(ETH_P_IP))
-		inner_tos = ip_hdr(skb)->tos;
+	/* If OVS_CB(skb)->tun_key is NULL, point it at the local tun_key here,
+	 * and zero it out.
+	 */
+	if (!OVS_CB(skb)->tun_key) {
+		memset(&tun_key, 0, sizeof(tun_key));
+		OVS_CB(skb)->tun_key = &tun_key;
+	}
+
+	tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, OVS_CB(skb)->tun_key) +
+				sizeof(struct iphdr);
+
+	if (tunnel_hlen < 0) {
+		err = VPORT_E_TX_DROPPED;
+		goto error_free;
+	}
+
+	if (OVS_CB(skb)->tun_key->ipv4_dst) {
+		daddr = OVS_CB(skb)->tun_key->ipv4_dst;
+		saddr = OVS_CB(skb)->tun_key->ipv4_src;
+		tos = OVS_CB(skb)->tun_key->ipv4_tos;
+		ttl = OVS_CB(skb)->tun_key->ipv4_ttl;
+	} else {
+		u8 inner_tos;
+		daddr = mutable->key.daddr;
+		saddr = mutable->key.saddr;
+
+		if (!daddr) {
+			/* Trying to sent packet from Null-port without
+			 * tunnel info? Drop this packet. */
+			err = VPORT_E_TX_DROPPED;
+			goto error_free;
+		}
+
+		/* ToS */
+		if (skb->protocol == htons(ETH_P_IP))
+			inner_tos = ip_hdr(skb)->tos;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (skb->protocol == htons(ETH_P_IPV6))
-		inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
 #endif
-	else
-		inner_tos = 0;
+		else
+			inner_tos = 0;
 
-	if (mutable->flags & TNL_F_TOS_INHERIT)
-		tos = inner_tos;
-	else
-		tos = mutable->tos;
+		if (mutable->flags & TNL_F_TOS_INHERIT)
+			tos = inner_tos;
+		else
+			tos = mutable->tos;
+
+		tos = INET_ECN_encapsulate(tos, inner_tos);
+
+		/* TTL */
+		ttl = mutable->ttl;
+		if (mutable->flags & TNL_F_TTL_INHERIT) {
+			if (skb->protocol == htons(ETH_P_IP))
+				ttl = ip_hdr(skb)->ttl;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+			else if (skb->protocol == htons(ETH_P_IPV6))
+				ttl = ipv6_hdr(skb)->hop_limit;
+#endif
+		}
+
+	}
 
 	/* Route lookup */
-	rt = find_route(vport, mutable, tos, &cache);
+	rt = find_route(vport, mutable, saddr, daddr, tos, &cache);
 	if (unlikely(!rt))
 		goto error_free;
 	if (unlikely(!cache))
 		unattached_dst = &rt_dst(rt);
 
-	tos = INET_ECN_encapsulate(tos, inner_tos);
-
 	/* Reset SKB */
 	nf_reset(skb);
 	secpath_reset(skb);
@@ -1238,12 +1296,12 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
 	skb_clear_rxhash(skb);
 
 	/* Offloading */
-	skb = handle_offloads(skb, mutable, rt);
+	skb = handle_offloads(skb, mutable, rt, tunnel_hlen);
 	if (IS_ERR(skb))
 		goto error;
 
 	/* MTU */
-	if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) {
+	if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off, tunnel_hlen))) {
 		err = VPORT_E_TX_DROPPED;
 		goto error_free;
 	}
@@ -1252,25 +1310,19 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
 	 * If we are over the MTU, allow the IP stack to handle fragmentation.
 	 * Fragmentation is a slow path anyways.
 	 */
-	if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
+	if (unlikely(skb->len + tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
 		     cache)) {
 		unattached_dst = &rt_dst(rt);
 		dst_hold(unattached_dst);
 		cache = NULL;
 	}
 
-	/* TTL */
-	ttl = mutable->ttl;
-	if (!ttl)
-		ttl = ip4_dst_hoplimit(&rt_dst(rt));
-
-	if (mutable->flags & TNL_F_TTL_INHERIT) {
-		if (skb->protocol == htons(ETH_P_IP))
-			ttl = ip_hdr(skb)->ttl;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-		else if (skb->protocol == htons(ETH_P_IPV6))
-			ttl = ipv6_hdr(skb)->hop_limit;
-#endif
+	/* TTL Fixup. */
+	if (!OVS_CB(skb)->tun_key->ipv4_dst) {
+		if (!(mutable->flags & TNL_F_TTL_INHERIT)) {
+			if (!ttl)
+				ttl = ip4_dst_hoplimit(&rt_dst(rt));
+		}
 	}
 
 	while (skb) {
@@ -1288,8 +1340,8 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
 			skb_set_network_header(skb, cache->hh_len);
 
 		} else {
-			skb_push(skb, mutable->tunnel_hlen);
-			create_tunnel_header(vport, mutable, rt, skb->data);
+			skb_push(skb, tunnel_hlen);
+			create_tunnel_header(vport, mutable, OVS_CB(skb)->tun_key, rt, skb->data);
 			skb_reset_network_header(skb);
 
 			if (next_skb)
@@ -1308,7 +1360,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
 		ip_select_ident(iph, &rt_dst(rt), NULL);
 
 		skb = tnl_vport->tnl_ops->update_header(vport, mutable,
-							&rt_dst(rt), skb);
+							&rt_dst(rt), skb, tunnel_hlen);
 		if (unlikely(!skb))
 			goto next;
 
@@ -1341,7 +1393,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
 					sent_len += orig_len;
 			}
 		} else
-			sent_len += send_frags(skb, mutable);
+			sent_len += send_frags(skb, tunnel_hlen);
 
 next:
 		skb = next_skb;
@@ -1427,12 +1479,6 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
 	else
 		mutable->out_key = nla_get_be64(a[OVS_TUNNEL_ATTR_OUT_KEY]);
 
-	mutable->tunnel_hlen = tnl_ops->hdr_len(mutable);
-	if (mutable->tunnel_hlen < 0)
-		return mutable->tunnel_hlen;
-
-	mutable->tunnel_hlen += sizeof(struct iphdr);
-
 	old_vport = port_table_lookup(&mutable->key, &old_mutable);
 	if (old_vport && old_vport != cur_vport)
 		return -EEXIST;
@@ -1442,7 +1488,8 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
 		struct net_device *dev;
 		struct rtable *rt;
 
-		rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos);
+		rt = __find_route(mutable, mutable->key.saddr, mutable->key.daddr,
+				  tnl_ops->ipproto, mutable->tos);
 		if (IS_ERR(rt))
 			return -EADDRNOTAVAIL;
 		dev = rt_dst(rt).dev;
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index d2a87f2..951a6f1 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -109,8 +109,6 @@ struct tnl_mutable_config {
 
 	unsigned seq;
 
-	unsigned tunnel_hlen;
-
 	unsigned char eth_addr[ETH_ALEN];
 
 	/* Configured via OVS_TUNNEL_ATTR_* attributes. */
@@ -132,7 +130,8 @@ struct tnl_ops {
 	 * build_header() (i.e. excludes the IP header).  Returns a negative
 	 * error code if the configuration is invalid.
 	 */
-	int (*hdr_len)(const struct tnl_mutable_config *);
+	int (*hdr_len)(const struct tnl_mutable_config *,
+		       const struct ovs_key_ipv4_tunnel *);
 
 	/*
 	 * Builds the static portion of the tunnel header, which is stored in
@@ -143,7 +142,8 @@ struct tnl_ops {
 	 * called for every packet, so try not to make it too slow.
 	 */
 	void (*build_header)(const struct vport *,
-			     const struct tnl_mutable_config *, void *header);
+			     const struct tnl_mutable_config *,
+			     const struct ovs_key_ipv4_tunnel *, void *header);
 
 	/*
 	 * Updates the cached header of a packet to match the actual packet
@@ -155,7 +155,8 @@ struct tnl_ops {
 	 */
 	struct sk_buff *(*update_header)(const struct vport *,
 					 const struct tnl_mutable_config *,
-					 struct dst_entry *, struct sk_buff *);
+					 struct dst_entry *, struct sk_buff *,
+					 int tunnel_hlen);
 };
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
@@ -270,14 +271,14 @@ int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr);
 const char *ovs_tnl_get_name(const struct vport *vport);
 const unsigned char *ovs_tnl_get_addr(const struct vport *vport);
 int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
-void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos);
+void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb);
 
 struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
 				__be64 key, int tunnel_type,
 				const struct tnl_mutable_config **mutable);
 bool ovs_tnl_frag_needed(struct vport *vport,
 			 const struct tnl_mutable_config *mutable,
-			 struct sk_buff *skb, unsigned int mtu, __be64 flow_key);
+			 struct sk_buff *skb, unsigned int mtu);
 void ovs_tnl_free_linked_skbs(struct sk_buff *skb);
 
 int ovs_tnl_init(void);
@@ -287,4 +288,15 @@ static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
 	return vport_priv(vport);
 }
 
+static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
+				    const struct iphdr *iph, __be64 tun_id, u32 tun_flags)
+{
+	tun_key->tun_id = tun_id;
+	tun_key->ipv4_src = iph->saddr;
+	tun_key->ipv4_dst = iph->daddr;
+	tun_key->ipv4_tos = iph->tos;
+	tun_key->ipv4_ttl = iph->ttl;
+	tun_key->tun_flags = tun_flags;
+}
+
 #endif /* tunnel.h */
diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c
index 05a099d..f6c34ab 100644
--- a/datapath/vport-capwap.c
+++ b/datapath/vport-capwap.c
@@ -155,16 +155,52 @@ static struct inet_frags frag_state = {
 	.secret_interval = CAPWAP_FRAG_SECRET_INTERVAL,
 };
 
-static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
+static int get_capwap_param(const struct tnl_mutable_config *mutable,
+			const struct ovs_key_ipv4_tunnel *tun_key,
+			u32 *flags,  __be64 *out_key)
+{
+	if (tun_key->ipv4_dst) {
+		*flags = 0;
+
+		if (tun_key->tun_flags & FLOW_TNL_F_KEY)
+			*flags = TNL_F_OUT_KEY_ACTION;
+		if (tun_key->tun_flags & FLOW_TNL_F_CSUM)
+			*flags |= TNL_F_CSUM;
+		*out_key = tun_key->tun_id;
+	} else {
+		*flags = mutable->flags;
+		if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
+			if (likely(tun_key->tun_flags & FLOW_TNL_F_KEY)) {
+				*out_key = tun_key->tun_id;
+			} else {
+				*out_key = 0;
+				return -EINVAL;
+			}
+		} else
+			*out_key = mutable->out_key;
+
+	}
+	return 0;
+}
+
+static int capwap_hdr_len(const struct tnl_mutable_config *mutable,
+			  const struct ovs_key_ipv4_tunnel *tun_key)
 {
 	int size = CAPWAP_MIN_HLEN;
+	u32 flags;
+	__be64 out_key;
+	int err;
+
+	err = get_capwap_param(mutable, tun_key, &flags, &out_key);
+	if (err)
+		return err;
 
 	/* CAPWAP has no checksums. */
-	if (mutable->flags & TNL_F_CSUM)
+	if (flags & TNL_F_CSUM)
 		return -EINVAL;
 
 	/* if keys are specified, then add WSI field */
-	if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) {
+	if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) {
 		size += sizeof(struct capwaphdr_wsi) +
 			sizeof(struct capwaphdr_wsi_key);
 	}
@@ -174,10 +210,15 @@ static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
 
 static void capwap_build_header(const struct vport *vport,
 				const struct tnl_mutable_config *mutable,
+				const struct ovs_key_ipv4_tunnel *tun_key,
 				void *header)
 {
 	struct udphdr *udph = header;
 	struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
+	u32 flags;
+	__be64 out_key;
+
+	get_capwap_param(mutable, tun_key, &flags, &out_key);
 
 	udph->source = htons(CAPWAP_SRC_PORT);
 	udph->dest = htons(CAPWAP_DST_PORT);
@@ -186,7 +227,7 @@ static void capwap_build_header(const struct vport *vport,
 	cwh->frag_id = 0;
 	cwh->frag_off = 0;
 
-	if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) {
+	if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) {
 		struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
 
 		cwh->begin = CAPWAP_KEYED;
@@ -197,9 +238,9 @@ static void capwap_build_header(const struct vport *vport,
 		wsi->flags = CAPWAP_WSI_F_KEY64;
 		wsi->reserved_padding = 0;
 
-		if (mutable->out_key) {
+		if (out_key) {
 			struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
-			opt->key = mutable->out_key;
+			opt->key = out_key;
 		}
 	} else {
 		/* make packet readable by old capwap code */
@@ -210,30 +251,39 @@ static void capwap_build_header(const struct vport *vport,
 static struct sk_buff *capwap_update_header(const struct vport *vport,
 					    const struct tnl_mutable_config *mutable,
 					    struct dst_entry *dst,
-					    struct sk_buff *skb)
+					    struct sk_buff *skb,
+					    int tunnel_hlen)
 {
+	struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
 	struct udphdr *udph = udp_hdr(skb);
+	u32 flags;
+	__be64 out_key;
+
+	if (get_capwap_param(mutable, tun_key, &flags, &out_key)) {
+		kfree_skb(skb);
+		return NULL;
+	}
 
-	if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
+	if (flags & TNL_F_OUT_KEY_ACTION) {
 		/* first field in WSI is key */
 		struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
 		struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
 		struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
 
-		opt->key = OVS_CB(skb)->tun_id;
+		opt->key = out_key;
 	}
 
 	udph->len = htons(skb->len - skb_transport_offset(skb));
 
 	if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) {
-		unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable);
+		unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable, tun_key);
 		skb = fragment(skb, vport, dst, hlen);
 	}
 
 	return skb;
 }
 
-static int process_capwap_wsi(struct sk_buff *skb, __be64 *key)
+static int process_capwap_wsi(struct sk_buff *skb, __be64 *key, bool *key_preset)
 {
 	struct capwaphdr *cwh = capwap_hdr(skb);
 	struct capwaphdr_wsi *wsi;
@@ -270,12 +320,15 @@ static int process_capwap_wsi(struct sk_buff *skb, __be64 *key)
 
 		opt = (struct capwaphdr_wsi_key *)(wsi + 1);
 		*key = opt->key;
+		*key_preset = true;
+	} else {
+		*key_preset = false;
 	}
 
 	return 0;
 }
 
-static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key)
+static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key, bool *key_preset)
 {
 	struct capwaphdr *cwh = capwap_hdr(skb);
 	int hdr_len = sizeof(struct udphdr);
@@ -301,7 +354,7 @@ static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key)
 		cwh = capwap_hdr(skb);
 	}
 
-	if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key))
+	if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key, key_preset))
 		goto error;
 
 	return skb;
@@ -316,12 +369,14 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
 	struct vport *vport;
 	const struct tnl_mutable_config *mutable;
 	struct iphdr *iph;
+	struct ovs_key_ipv4_tunnel tun_key;
 	__be64 key = 0;
+	bool key_preset = false;
 
 	if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN)))
 		goto error;
 
-	skb = process_capwap_proto(skb, &key);
+	skb = process_capwap_proto(skb, &key, &key_preset);
 	if (unlikely(!skb))
 		goto out;
 
@@ -334,11 +389,14 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (mutable->flags & TNL_F_IN_KEY_MATCH)
-		OVS_CB(skb)->tun_id = key;
+		key_preset = true;
 	else
-		OVS_CB(skb)->tun_id = 0;
+		key_preset = false;
+
+	tnl_tun_key_init(&tun_key, iph, key, key_preset ? FLOW_TNL_F_KEY : 0);
+	OVS_CB(skb)->tun_key = &tun_key;
 
-	ovs_tnl_rcv(vport, skb, iph->tos);
+	ovs_tnl_rcv(vport, skb);
 	goto out;
 
 error:
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index e3a190f..ec788d5 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -45,22 +45,61 @@ struct gre_base_hdr {
 	__be16 protocol;
 };
 
-static int gre_hdr_len(const struct tnl_mutable_config *mutable)
+static int get_gre_param(const struct tnl_mutable_config *mutable,
+			const struct ovs_key_ipv4_tunnel *tun_key,
+			u32 *flags, u32 *tunnel_type, __be64 *out_key)
+{
+	if (tun_key->ipv4_dst) {
+		*flags = 0;
+
+		if (tun_key->tun_flags & FLOW_TNL_F_KEY)
+			*flags = TNL_F_OUT_KEY_ACTION;
+		if (tun_key->tun_flags & FLOW_TNL_F_CSUM)
+			*flags |= TNL_F_CSUM;
+		*tunnel_type = TNL_T_PROTO_GRE;
+		*out_key = tun_key->tun_id;
+	} else {
+		*flags = mutable->flags;
+		*tunnel_type = mutable->key.tunnel_type;
+		if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
+			if (likely(tun_key->tun_flags & FLOW_TNL_F_KEY)) {
+				*out_key = tun_key->tun_id;
+			} else {
+				*out_key = 0;
+				return -EINVAL;
+			}
+		} else
+			*out_key = mutable->out_key;
+
+	}
+	return 0;
+}
+
+static int gre_hdr_len(const struct tnl_mutable_config *mutable,
+		       const struct ovs_key_ipv4_tunnel *tun_key)
 {
 	int len;
+	u32 flags;
+	u32 tunnel_type;
+	__be64 out_key;
+	int err;
+
+	err = get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key);
+	if (err)
+		return err;
 
 	len = GRE_HEADER_SECTION;
 
-	if (mutable->flags & TNL_F_CSUM)
+	if (flags & TNL_F_CSUM)
 		len += GRE_HEADER_SECTION;
 
 	/* Set key for GRE64 tunnels, even when key if is zero. */
-	if (mutable->out_key ||
-	    mutable->key.tunnel_type & TNL_T_PROTO_GRE64 ||
-	    mutable->flags & TNL_F_OUT_KEY_ACTION) {
+	if (out_key ||
+	    tunnel_type & TNL_T_PROTO_GRE64 ||
+	    flags & TNL_F_OUT_KEY_ACTION) {
 
 		len += GRE_HEADER_SECTION;
-		if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
+		if (tunnel_type & TNL_T_PROTO_GRE64)
 			len += GRE_HEADER_SECTION;
 	}
 	return len;
@@ -88,32 +127,38 @@ static __be32 be64_get_high32(__be64 x)
 
 static void gre_build_header(const struct vport *vport,
 			     const struct tnl_mutable_config *mutable,
+			     const struct ovs_key_ipv4_tunnel *tun_key,
 			     void *header)
 {
 	struct gre_base_hdr *greh = header;
 	__be32 *options = (__be32 *)(greh + 1);
+	u32 flags;
+	u32 tunnel_type;
+	__be64 out_key;
+
+	get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key);
 
 	greh->protocol = htons(ETH_P_TEB);
 	greh->flags = 0;
 
-	if (mutable->flags & TNL_F_CSUM) {
+	if (flags & TNL_F_CSUM) {
 		greh->flags |= GRE_CSUM;
 		*options = 0;
 		options++;
 	}
 
-	if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
+	if (flags & TNL_F_OUT_KEY_ACTION) {
 		greh->flags |= GRE_KEY;
-		if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
+		if (tunnel_type & TNL_T_PROTO_GRE64)
 			greh->flags |= GRE_SEQ;
 
-	} else if (mutable->out_key ||
-		   mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+	} else if (out_key ||
+		   tunnel_type & TNL_T_PROTO_GRE64) {
 		greh->flags |= GRE_KEY;
-		*options = be64_get_low32(mutable->out_key);
-		if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+		*options = be64_get_low32(out_key);
+		if (tunnel_type & TNL_T_PROTO_GRE64) {
 			options++;
-			*options = be64_get_high32(mutable->out_key);
+			*options = be64_get_high32(out_key);
 			greh->flags |= GRE_SEQ;
 		}
 	}
@@ -122,28 +167,38 @@ static void gre_build_header(const struct vport *vport,
 static struct sk_buff *gre_update_header(const struct vport *vport,
 					 const struct tnl_mutable_config *mutable,
 					 struct dst_entry *dst,
-					 struct sk_buff *skb)
+					 struct sk_buff *skb,
+					 int tunnel_hlen)
 {
-	__be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
+	u32 flags;
+	u32 tunnel_type;
+	__be64 out_key;
+	struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
+	__be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
 					       - GRE_HEADER_SECTION);
 
+	if (get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
 	/* Work backwards over the options so the checksum is last. */
-	if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
-		if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+	if (flags & TNL_F_OUT_KEY_ACTION) {
+		if (tunnel_type & TNL_T_PROTO_GRE64) {
 			/* Set higher 32 bits to seq. */
-			*options = be64_get_high32(OVS_CB(skb)->tun_id);
+			*options = be64_get_low32(out_key);
 			options--;
 		}
-		*options = be64_get_low32(OVS_CB(skb)->tun_id);
+		*options = be64_get_low32(out_key);
 		options--;
-	} else if (mutable->out_key ||
-		   mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
+	} else if (out_key ||
+		   tunnel_type & TNL_T_PROTO_GRE64) {
 		options--;
-		if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
+		if (tunnel_type & TNL_T_PROTO_GRE64)
 			options--;
 	}
 
-	if (mutable->flags & TNL_F_CSUM)
+	if (flags & TNL_F_CSUM)
 		*(__sum16 *)options = csum_fold(skb_checksum(skb,
 						skb_transport_offset(skb),
 						skb->len - skb_transport_offset(skb),
@@ -335,7 +390,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
 #endif
 
 	__skb_pull(skb, tunnel_hdr_len);
-	ovs_tnl_frag_needed(vport, mutable, skb, mtu, key);
+	ovs_tnl_frag_needed(vport, mutable, skb, mtu);
 	__skb_push(skb, tunnel_hdr_len);
 
 out:
@@ -370,6 +425,20 @@ static bool check_checksum(struct sk_buff *skb)
 	return (csum == 0);
 }
 
+static u32 gre_flags_to_tunnel_flags(const struct tnl_mutable_config *mutable,
+				     __be16 gre_flags)
+{
+	u32 tunnel_flags = 0;
+
+	if ((mutable->flags & TNL_F_IN_KEY_MATCH) && (gre_flags & GRE_KEY))
+		tunnel_flags = FLOW_TNL_F_KEY;
+
+	if (gre_flags & GRE_CSUM)
+		tunnel_flags |= FLOW_TNL_F_CSUM;
+
+	return tunnel_flags;
+}
+
 /* Called with rcu_read_lock and BH disabled. */
 static int gre_rcv(struct sk_buff *skb)
 {
@@ -377,6 +446,7 @@ static int gre_rcv(struct sk_buff *skb)
 	const struct tnl_mutable_config *mutable;
 	int hdr_len;
 	struct iphdr *iph;
+	struct ovs_key_ipv4_tunnel tun_key;
 	__be16 flags;
 	__be64 key;
 	u32 tunnel_type;
@@ -401,15 +471,13 @@ static int gre_rcv(struct sk_buff *skb)
 		goto error;
 	}
 
-	if (mutable->flags & TNL_F_IN_KEY_MATCH)
-		OVS_CB(skb)->tun_id = key;
-	else
-		OVS_CB(skb)->tun_id = 0;
+	tnl_tun_key_init(&tun_key, iph, key, gre_flags_to_tunnel_flags(mutable, flags));
+	OVS_CB(skb)->tun_key = &tun_key;
 
 	__skb_pull(skb, hdr_len);
 	skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
 
-	ovs_tnl_rcv(vport, skb, iph->tos);
+	ovs_tnl_rcv(vport, skb);
 	return 0;
 
 error:
diff --git a/datapath/vport.c b/datapath/vport.c
index af1c066..d9c8cfd 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -463,7 +463,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
 		OVS_CB(skb)->flow = NULL;
 
 	if (!(vport->ops->flags & VPORT_F_TUN_ID))
-		OVS_CB(skb)->tun_id = 0;
+		OVS_CB(skb)->tun_key = NULL;
 
 	ovs_dp_process_received_packet(vport, skb);
 }
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 294f6d0..2c98490 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -279,7 +279,8 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
 	OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
 	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
-	OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
+	OVS_KEY_ATTR_IPV4_TUNNEL = 62,  /* struct ovs_key_ipv4_tunnel */
+	OVS_KEY_ATTR_TUN_ID = 63,  /* be64 tunnel ID */
 	__OVS_KEY_ATTR_MAX
 };
 
@@ -361,6 +362,21 @@ struct ovs_key_nd {
 	__u8  nd_tll[6];
 };
 
+/* Values for ovs_key_ipv4_tunnel->tun_flags */
+#define FLOW_TNL_F_DONT_FRAGMENT (1 << 0)
+#define FLOW_TNL_F_CSUM (1 << 1)
+#define FLOW_TNL_F_KEY (1 << 2)
+
+struct ovs_key_ipv4_tunnel {
+	__be64 tun_id;
+	__u32  tun_flags;
+	__be32 ipv4_src;
+	__be32 ipv4_dst;
+	__u8   ipv4_tos;
+	__u8   ipv4_ttl;
+	__u8   pad[2];
+};
+
 /**
  * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
  * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c9e3210..797cb06 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1179,6 +1179,7 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
     case OVS_KEY_ATTR_TUN_ID:
     case OVS_KEY_ATTR_PRIORITY:
     case OVS_KEY_ATTR_IPV6:
+    case OVS_KEY_ATTR_IPV4_TUNNEL:
         /* not implemented */
         break;
 
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 257d7a7..9ed17ed 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -93,6 +93,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr)
     case OVS_KEY_ATTR_UNSPEC: return "unspec";
     case OVS_KEY_ATTR_ENCAP: return "encap";
     case OVS_KEY_ATTR_PRIORITY: return "priority";
+    case OVS_KEY_ATTR_TUN_ID: return "tun_id";
+    case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel";
     case OVS_KEY_ATTR_IN_PORT: return "in_port";
     case OVS_KEY_ATTR_ETHERNET: return "eth";
     case OVS_KEY_ATTR_VLAN: return "vlan";
@@ -105,7 +107,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr)
     case OVS_KEY_ATTR_ICMPV6: return "icmpv6";
     case OVS_KEY_ATTR_ARP: return "arp";
     case OVS_KEY_ATTR_ND: return "nd";
-    case OVS_KEY_ATTR_TUN_ID: return "tun_id";
 
     case __OVS_KEY_ATTR_MAX:
     default:
@@ -602,6 +603,7 @@ odp_flow_key_attr_len(uint16_t type)
     case OVS_KEY_ATTR_ENCAP: return -2;
     case OVS_KEY_ATTR_PRIORITY: return 4;
     case OVS_KEY_ATTR_TUN_ID: return 8;
+    case OVS_KEY_ATTR_IPV4_TUNNEL: return sizeof(struct ovs_key_ipv4_tunnel);
     case OVS_KEY_ATTR_IN_PORT: return 4;
     case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet);
     case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16);
@@ -668,6 +670,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
     const struct ovs_key_icmpv6 *icmpv6_key;
     const struct ovs_key_arp *arp_key;
     const struct ovs_key_nd *nd_key;
+    const struct ovs_key_ipv4_tunnel *ipv4_tun_key;
     enum ovs_key_attr attr = nl_attr_type(a);
     int expected_len;
 
@@ -698,6 +701,16 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
         ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a)));
         break;
 
+    case OVS_KEY_ATTR_IPV4_TUNNEL:
+        ipv4_tun_key = nl_attr_get(a);
+        ds_put_format(ds, "(tun_id=0x%"PRIx64",flags=0x%"PRIx32
+                      ",src="IP_FMT",dst="IP_FMT",tos=0x%"PRIx8",ttl=%"PRIu8")",
+                      ntohll(ipv4_tun_key->tun_id), ipv4_tun_key->tun_flags,
+                      IP_ARGS(&ipv4_tun_key->ipv4_src),
+                      IP_ARGS(&ipv4_tun_key->ipv4_dst),
+                      ipv4_tun_key->ipv4_tos, ipv4_tun_key->ipv4_ttl);
+        break;
+
     case OVS_KEY_ATTR_IN_PORT:
         ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a));
         break;
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 16f2b15..57073ba 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -80,6 +80,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names,
  *                         ------  ---  ------  -----
  *  OVS_KEY_ATTR_PRIORITY      4    --     4      8
  *  OVS_KEY_ATTR_TUN_ID        8    --     4     12
+ *  OVS_KEY_ATTR_IPV4_TUNNEL  24    --     4     28
  *  OVS_KEY_ATTR_IN_PORT       4    --     4      8
  *  OVS_KEY_ATTR_ETHERNET     12    --     4     16
  *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (outer VLAN ethertype)
@@ -90,7 +91,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names,
  *  OVS_KEY_ATTR_ICMPV6        2     2     4      8
  *  OVS_KEY_ATTR_ND           28    --     4     32
  *  -------------------------------------------------
- *  total                                       156
+ *  total                                       184
  *
  * We include some slack space in case the calculation isn't quite right or we
  * add another field and forget to adjust this value.
-- 
1.7.10




More information about the dev mailing list