[ovs-dev] [PATCH v8 3/5] datapath: add layer 3 flow/port support

Lorand Jakab lojakab at cisco.com
Mon Nov 17 17:24:11 UTC 2014


Implementation of the pop_eth and push_eth actions in the kernel, and
layer 3 flow support.

Signed-off-by: Lorand Jakab <lojakab at cisco.com>
---
 datapath/actions.c            | 87 ++++++++++++++++++++++++++++++++++++-------
 datapath/flow.c               | 45 ++++++++++++----------
 datapath/flow.h               |  4 +-
 datapath/flow_netlink.c       | 83 +++++++++++++++++++++++++++++++++++++----
 datapath/vport-geneve.c       |  7 +++-
 datapath/vport-gre.c          |  7 +++-
 datapath/vport-internal_dev.c |  2 +-
 datapath/vport-lisp.c         | 34 ++++-------------
 datapath/vport-netdev.c       |  2 +-
 datapath/vport-vxlan.c        |  7 +++-
 datapath/vport.c              |  6 ++-
 datapath/vport.h              |  2 +-
 12 files changed, 211 insertions(+), 75 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 5a1dbe2..76c225e 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -251,6 +251,27 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 	return 0;
 }
 
+/* De-accelerate any hardware accelerated VLAN tag present in skb. */
+static int deaccel_vlan_tx_tag(struct sk_buff *skb)
+{
+	u16 current_tag;
+
+	/* push down current VLAN tag */
+	current_tag = vlan_tx_tag_get(skb);
+
+	if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
+		return -ENOMEM;
+
+	/* Update mac_len for subsequent MPLS actions */
+	skb->mac_len += VLAN_HLEN;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_add(skb->csum, csum_partial(skb->data
+				+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+
+	return 0;
+}
+
 static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
 	__be16 tci;
@@ -287,19 +308,10 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_vlan *vlan)
 {
 	if (unlikely(vlan_tx_tag_present(skb))) {
-		u16 current_tag;
-
-		/* push down current VLAN tag */
-		current_tag = vlan_tx_tag_get(skb);
-
-		if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
-			return -ENOMEM;
-		/* Update mac_len for subsequent MPLS actions */
-		skb->mac_len += VLAN_HLEN;
-
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+		int err;
+		err = deaccel_vlan_tx_tag(skb);
+		if (unlikely(err))
+			return err;
 
 		invalidate_flow_key(key);
 	} else {
@@ -329,6 +341,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
+static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	skb_pull_rcsum(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+	skb->mac_len -= ETH_HLEN;
+
+	invalidate_flow_key(key);
+	return 0;
+}
+
+static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
+		    const struct ovs_action_push_eth *ethh)
+{
+	/* De-accelerate any hardware accelerated VLAN tag added to a previous
+	 * Ethernet header */
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		int err;
+		err = deaccel_vlan_tx_tag(skb);
+		if (unlikely(err))
+			return err;
+	}
+
+	/* Add the new Ethernet header */
+	if (skb_cow_head(skb, ETH_HLEN) < 0)
+		return -ENOMEM;
+
+	skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	ether_addr_copy(eth_hdr(skb)->h_source, ethh->addresses.eth_src);
+	ether_addr_copy(eth_hdr(skb)->h_dest, ethh->addresses.eth_dst);
+	eth_hdr(skb)->h_proto = ethh->eth_type;
+
+	ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+
+	skb->protocol = ethh->eth_type;
+	invalidate_flow_key(key);
+	return 0;
+}
+
 static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
 			__be32 *addr, __be32 new_addr)
 {
@@ -884,6 +937,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			err = pop_vlan(skb, key);
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_ETH:
+			err = push_eth(skb, key, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_POP_ETH:
+			err = pop_eth(skb, key);
+			break;
+
 		case OVS_ACTION_ATTR_RECIRC:
 			err = execute_recirc(dp, skb, key, a, rem);
 			if (nla_is_last(a, rem)) {
diff --git a/datapath/flow.c b/datapath/flow.c
index 69b13b3..b01f7bd 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -459,28 +459,31 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 	skb_reset_mac_header(skb);
 
-	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
-	 * header in the linear data area.
-	 */
-	eth = eth_hdr(skb);
-	ether_addr_copy(key->eth.src, eth->h_source);
-	ether_addr_copy(key->eth.dst, eth->h_dest);
+	/* Link layer. */
+	if (key->phy.is_layer3) {
+		key->eth.tci = 0;
+		key->eth.type = skb->protocol;
+	} else {
+		eth = eth_hdr(skb);
+		ether_addr_copy(key->eth.src, eth->h_source);
+		ether_addr_copy(key->eth.dst, eth->h_dest);
 
-	__skb_pull(skb, 2 * ETH_ALEN);
-	/* We are going to push all headers that we pull, so no need to
-	 * update skb->csum here.
-	 */
+		__skb_pull(skb, 2 * ETH_ALEN);
+		/* We are going to push all headers that we pull, so no need to
+		 * update skb->csum here.
+		 */
 
-	key->eth.tci = 0;
-	if (vlan_tx_tag_present(skb))
-		key->eth.tci = htons(vlan_get_tci(skb));
-	else if (eth->h_proto == htons(ETH_P_8021Q))
-		if (unlikely(parse_vlan(skb, key)))
-			return -ENOMEM;
+		key->eth.tci = 0;
+		if (vlan_tx_tag_present(skb))
+			key->eth.tci = htons(vlan_get_tci(skb));
+		else if (eth->h_proto == htons(ETH_P_8021Q))
+			if (unlikely(parse_vlan(skb, key)))
+				return -ENOMEM;
 
-	key->eth.type = parse_ethertype(skb);
-	if (unlikely(key->eth.type == htons(0)))
-		return -ENOMEM;
+		key->eth.type = parse_ethertype(skb);
+		if (unlikely(key->eth.type == htons(0)))
+			return -ENOMEM;
+	}
 
 	skb_reset_network_header(skb);
 	skb_reset_mac_len(skb);
@@ -682,7 +685,8 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
 }
 
 int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
-			 struct sk_buff *skb, struct sw_flow_key *key)
+			 struct sk_buff *skb, struct sw_flow_key *key,
+			 bool is_layer3)
 {
 	/* Extract metadata from packet. */
 	if (tun_info) {
@@ -706,6 +710,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
 	key->phy.priority = skb->priority;
 	key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
 	key->phy.skb_mark = skb->mark;
+	key->phy.is_layer3 = is_layer3;
 	key->ovs_flow_hash = 0;
 	key->recirc_id = 0;
 
diff --git a/datapath/flow.h b/datapath/flow.h
index 2bbf789..919363b 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -127,6 +127,7 @@ struct sw_flow_key {
 		u32	priority;	/* Packet QoS priority. */
 		u32	skb_mark;	/* SKB mark. */
 		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
+		bool	is_layer3;	/* Packet has no Ethernet header */
 	} __packed phy; /* Safe when right after 'tun_key'. */
 	u32 ovs_flow_hash;		/* Datapath computed hash value.  */
 	u32 recirc_id;			/* Recirculation ID.  */
@@ -253,7 +254,8 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
 int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
 			 struct sk_buff *skb,
-			 struct sw_flow_key *key);
+			 struct sw_flow_key *key,
+			 bool is_layer3);
 /* Extract key from packet coming from userspace. */
 int ovs_flow_key_extract_userspace(const struct nlattr *attr,
 				   struct sk_buff *skb,
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 503cf63..54510c8 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -113,7 +113,7 @@ static void update_range(struct sw_flow_match *match,
 static bool match_validate(const struct sw_flow_match *match,
 			   u64 key_attrs, u64 mask_attrs, bool log)
 {
-	u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
+	u64 key_expected = 0;
 	u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 
 	/* The following mask attributes allowed only if they
@@ -136,6 +136,10 @@ static bool match_validate(const struct sw_flow_match *match,
 		       | (1ULL << OVS_KEY_ATTR_IN_PORT)
 		       | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
 
+	/* If Ethertype is present, expect MAC addresses */
+	if (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))
+		key_expected |= 1ULL << OVS_KEY_ATTR_ETHERNET;
+
 	/* Check key attributes. */
 	if (match->key->eth.type == htons(ETH_P_ARP)
 			|| match->key->eth.type == htons(ETH_P_RARP)) {
@@ -679,6 +683,15 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 			return -EINVAL;
 		*attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
 	}
+	if (is_mask)
+		/* Always exact match is_layer3 */
+		SW_FLOW_KEY_PUT(match, phy.is_layer3, true, is_mask);
+	else {
+		if (*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET))
+			SW_FLOW_KEY_PUT(match, phy.is_layer3, false, is_mask);
+		else
+			SW_FLOW_KEY_PUT(match, phy.is_layer3, true, is_mask);
+	}
 	return 0;
 }
 
@@ -742,6 +755,17 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
 	if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
 		const struct ovs_key_ipv4 *ipv4_key;
 
+		/* Add eth.type value for layer 3 flows */
+		if (!(attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) {
+			__be16 eth_type;
+
+			if (is_mask)
+				eth_type = htons(0xffff);
+			else
+				eth_type = htons(ETH_P_IP);
+			SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+		}
+
 		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
 		if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
 			OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
@@ -766,6 +790,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
 	if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
 		const struct ovs_key_ipv6 *ipv6_key;
 
+		/* Add eth.type value for layer 3 flows */
+		if (!(attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) {
+			__be16 eth_type;
+
+			if (is_mask) {
+				eth_type = htons(0xffff);
+			} else {
+				eth_type = htons(ETH_P_IPV6);
+			}
+			SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+		}
+
 		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
 		if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
 			OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
@@ -1135,7 +1171,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 		     const struct sw_flow_key *output, struct sk_buff *skb)
 {
 	struct ovs_key_ethernet *eth_key;
-	struct nlattr *nla, *encap;
+	struct nlattr *nla, *encap = NULL;
 	bool is_mask = (swkey != output);
 
 	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
@@ -1174,6 +1210,9 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 	if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
 		goto nla_put_failure;
 
+	if (swkey->phy.is_layer3)
+		goto noethernet;
+
 	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
 	if (!nla)
 		goto nla_put_failure;
@@ -1191,8 +1230,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
 		if (!swkey->eth.tci)
 			goto unencap;
-	} else
-		encap = NULL;
+	}
 
 	if (swkey->eth.type == htons(ETH_P_802_2)) {
 		/*
@@ -1211,6 +1249,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
 		goto nla_put_failure;
 
+noethernet:
 	if (swkey->eth.type == htons(ETH_P_IP)) {
 		struct ovs_key_ipv4 *ipv4_key;
 
@@ -1623,7 +1662,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 static int validate_set(const struct nlattr *a,
 			const struct sw_flow_key *flow_key,
 			struct sw_flow_actions **sfa,
-			bool *set_tun, __be16 eth_type, bool log)
+			bool *set_tun, __be16 eth_type, bool log,
+			bool is_layer3)
 {
 	const struct nlattr *ovs_key = nla_data(a);
 	int key_type = nla_type(ovs_key);
@@ -1644,7 +1684,11 @@ static int validate_set(const struct nlattr *a,
 
 	case OVS_KEY_ATTR_PRIORITY:
 	case OVS_KEY_ATTR_SKB_MARK:
+		break;
+
 	case OVS_KEY_ATTR_ETHERNET:
+		if (is_layer3)
+			return -EINVAL;
 		break;
 
 	case OVS_KEY_ATTR_TUNNEL:
@@ -1705,6 +1749,8 @@ static int validate_set(const struct nlattr *a,
 		return validate_tp_port(flow_key, eth_type);
 
 	case OVS_KEY_ATTR_MPLS:
+		if (is_layer3)
+			return -EINVAL;
 		if (!eth_p_mpls(eth_type))
 			return -EINVAL;
 		break;
@@ -1766,6 +1812,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 	const struct nlattr *a;
 	bool out_tnl_port = false;
 	int rem, err;
+	bool is_layer3 = key->phy.is_layer3;
 
 	if (depth >= SAMPLE_ACTION_DEPTH)
 		return -EOVERFLOW;
@@ -1776,6 +1823,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 			[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
 			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+			[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
+			[OVS_ACTION_ATTR_POP_ETH] = 0,
 			[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
 			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
@@ -1824,11 +1873,31 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 			break;
 		}
 
+		case OVS_ACTION_ATTR_POP_ETH:
+			if (is_layer3)
+				return -EINVAL;
+			if (vlan_tci & htons(VLAN_TAG_PRESENT))
+				return -EINVAL;
+			is_layer3 = true;
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_ETH:
+			/* For now disallow pushing an Ethernet header if one
+			 * is already present */
+			if (!is_layer3)
+				return -EINVAL;
+			is_layer3 = false;
+			break;
+
 		case OVS_ACTION_ATTR_POP_VLAN:
+			if (is_layer3)
+				return -EINVAL;
 			vlan_tci = htons(0);
 			break;
 
 		case OVS_ACTION_ATTR_PUSH_VLAN:
+			if (is_layer3)
+				return -EINVAL;
 			vlan = nla_data(a);
 			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
 				return -EINVAL;
@@ -1883,8 +1952,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 			break;
 
 		case OVS_ACTION_ATTR_SET:
-			err = validate_set(a, key, sfa,
-					   &out_tnl_port, eth_type, log);
+			err = validate_set(a, key, sfa, &out_tnl_port,
+					   eth_type, log, is_layer3);
 			if (err)
 				return err;
 
diff --git a/datapath/vport-geneve.c b/datapath/vport-geneve.c
index 7c08577..adb8d4f 100644
--- a/datapath/vport-geneve.c
+++ b/datapath/vport-geneve.c
@@ -200,7 +200,7 @@ static int geneve_rcv(struct sock *sk, struct sk_buff *skb)
 				key, flags,
 				geneveh->options, opts_len);
 
-	ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info);
+	ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info, false);
 	goto out;
 
 error:
@@ -368,6 +368,11 @@ static int geneve_send(struct vport *vport, struct sk_buff *skb)
 	if (unlikely(!OVS_CB(skb)->egress_tun_info))
 		return -EINVAL;
 
+	/* Reject layer 3 packets */
+	if (unlikely(skb->protocol == htons(ETH_P_IP) ||
+	    skb->protocol == htons(ETH_P_IPV6)))
+		return -EINVAL;
+
 	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
 
 	/* Route lookup */
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 41c025d..7732c47 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -114,7 +114,7 @@ static int gre_rcv(struct sk_buff *skb,
 	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
 			       filter_tnl_flags(tpi->flags), NULL, 0);
 
-	ovs_vport_receive(vport, skb, &tun_info);
+	ovs_vport_receive(vport, skb, &tun_info, false);
 	return PACKET_RCVD;
 }
 
@@ -289,6 +289,11 @@ static int gre_send(struct vport *vport, struct sk_buff *skb)
 	if (unlikely(!OVS_CB(skb)->egress_tun_info))
 		return -EINVAL;
 
+	/* Reject layer 3 packets */
+	if (unlikely(skb->protocol == htons(ETH_P_IP) ||
+	    skb->protocol == htons(ETH_P_IPV6)))
+		return -EINVAL;
+
 	hlen = ip_gre_calc_hlen(OVS_CB(skb)->egress_tun_info->tunnel.tun_flags);
 
 	return __send(vport, skb, hlen, 0, 0);
diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
index a1c4949..faf8378 100644
--- a/datapath/vport-internal_dev.c
+++ b/datapath/vport-internal_dev.c
@@ -77,7 +77,7 @@ static struct net_device_stats *internal_dev_sys_stats(struct net_device *netdev
 static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	rcu_read_lock();
-	ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
+	ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL, false);
 	rcu_read_unlock();
 	return 0;
 }
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index f3d450f..ff1f6c1 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -232,8 +232,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 	struct iphdr *iph, *inner_iph;
 	struct ovs_tunnel_info tun_info;
 	__be64 key;
-	struct ethhdr *ethh;
-	__be16 protocol;
 
 	lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest);
 	if (unlikely(!lisp_port))
@@ -259,26 +257,16 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 	inner_iph = (struct iphdr *)(lisph + 1);
 	switch (inner_iph->version) {
 	case 4:
-		protocol = htons(ETH_P_IP);
+		skb->protocol = htons(ETH_P_IP);
 		break;
 	case 6:
-		protocol = htons(ETH_P_IPV6);
+		skb->protocol = htons(ETH_P_IPV6);
 		break;
 	default:
 		goto error;
 	}
-	skb->protocol = protocol;
 
-	/* Add Ethernet header */
-	ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
-	memset(ethh, 0, ETH_HLEN);
-	ethh->h_dest[0] = 0x02;
-	ethh->h_source[0] = 0x02;
-	ethh->h_proto = protocol;
-
-	ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
-
-	ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_info);
+	ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_info, true);
 	goto out;
 
 error:
@@ -450,13 +438,12 @@ static int lisp_send(struct vport *vport, struct sk_buff *skb)
 	if (unlikely(!OVS_CB(skb)->egress_tun_info))
 		return -EINVAL;
 
-	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
+	if (unlikely((skb->protocol != htons(ETH_P_IP) &&
+	    skb->protocol != htons(ETH_P_IPV6)) ||
+	    vlan_tx_tag_present(skb)))
+		return -EINVAL;
 
-	if (skb->protocol != htons(ETH_P_IP) &&
-	    skb->protocol != htons(ETH_P_IPV6)) {
-		kfree_skb(skb);
-		return 0;
-	}
+	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
 
 	/* Route lookup */
 	saddr = tun_key->ipv4_src;
@@ -483,11 +470,6 @@ static int lisp_send(struct vport *vport, struct sk_buff *skb)
 			goto err_free_rt;
 	}
 
-	/* Reset l2 headers. */
-	skb_pull(skb, network_offset);
-	skb_reset_mac_header(skb);
-	vlan_set_tci(skb, 0);
-
 	skb_reset_inner_headers(skb);
 
 	__skb_push(skb, LISP_HLEN);
diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
index 9c0908a..72bba4e 100644
--- a/datapath/vport-netdev.c
+++ b/datapath/vport-netdev.c
@@ -211,7 +211,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 	skb_push(skb, ETH_HLEN);
 	ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
 
-	ovs_vport_receive(vport, skb, NULL);
+	ovs_vport_receive(vport, skb, NULL, false);
 	return;
 
 error:
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index 8689853..d3a193b 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -72,7 +72,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
 			       key, TUNNEL_KEY, NULL, 0);
 
-	ovs_vport_receive(vport, skb, &tun_info);
+	ovs_vport_receive(vport, skb, &tun_info, false);
 }
 
 static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
@@ -156,6 +156,11 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 		goto error;
 	}
 
+	/* Reject layer 3 packets */
+	if (unlikely(skb->protocol == htons(ETH_P_IP) ||
+	    skb->protocol == htons(ETH_P_IPV6)))
+		return -EINVAL;
+
 	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
 
 	/* Route lookup */
diff --git a/datapath/vport.c b/datapath/vport.c
index 274e47f..7b588bd 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -438,13 +438,15 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
  * @vport: vport that received the packet
  * @skb: skb that was received
  * @tun_info: tunnel (if any) that carried packet
+ * @is_layer3: packet is layer 3
  *
  * Must be called with rcu_read_lock.  The packet cannot be shared and
  * skb->data should point to the Ethernet header.  The caller must have already
  * called compute_ip_summed() to initialize the checksumming fields.
  */
 void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
-		       const struct ovs_tunnel_info *tun_info)
+		       const struct ovs_tunnel_info *tun_info,
+		       bool is_layer3)
 {
 	struct pcpu_sw_netstats *stats;
 	struct sw_flow_key key;
@@ -459,7 +461,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 	ovs_skb_init_inner_protocol(skb);
 	OVS_CB(skb)->input_vport = vport;
 	OVS_CB(skb)->egress_tun_info = NULL;
-	error = ovs_flow_key_extract(tun_info, skb, &key);
+	error = ovs_flow_key_extract(tun_info, skb, &key, is_layer3);
 	if (unlikely(error)) {
 		kfree_skb(skb);
 		return;
diff --git a/datapath/vport.h b/datapath/vport.h
index a08a7ce..2fd837c 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -219,7 +219,7 @@ static inline struct vport *vport_from_priv(void *priv)
 }
 
 void ovs_vport_receive(struct vport *, struct sk_buff *,
-		       const struct ovs_tunnel_info *);
+		       const struct ovs_tunnel_info *, bool is_layer3);
 
 /* List of statically compiled vport implementations.  Don't forget to also
  * add yours to the list at the top of vport.c.
-- 
1.9.3 (Apple Git-50)




More information about the dev mailing list