[ovs-dev] [PATCH 11/16] datapath: Add basic MPLS support to kernel

Simon Horman horms at verge.net.au
Wed Feb 6 13:54:02 UTC 2013


Allow datapath to recognize and extract MPLS labels into flow keys
and execute actions which push, pop, and set labels on packets.

Based heavily on work by Leo Alterman and Ravi K.

Cc: Ravi K <rkerur at gmail.com>
Cc: Leo Alterman <lalterman at nicira.com>
Reviewed-by: Isaku Yamahata <yamahata at valinux.co.jp>
Signed-off-by: Simon Horman <horms at verge.net.au>

---

v2.18
* No change

v2.17
* As suggested by Ben Pfaff
  - Use consistent terminology for MPLS.
    + Consistently refer to the MPLS component of a packet as the
      MPLS label stack and entries in the stack as MPLS label stack entries
      (LSE).  An MPLS label is a component of an MPLS label stack entry.
      The other components are the traffic class (TC), time to live (TTL)
      and bottom of stack (BoS) bit.
  - Rename compose_.*mpls_ functions as execute_.*mpls_

v2.16
* No change

v2.15
* As suggested by Ben Pfaff
  - Use OVS_ACTION_SET to set OVS_KEY_ATTR_MPLS instead of
    OVS_ACTION_ATTR_SET_MPLS

v2.14
* Remove include/linux/openvswitch.h portion which added add
  new key and action attributes. This
  now present in "User-Space MPLS actions and matches"
  which is now a dependency of this patch

v2.13
* As suggested by Jarno Rajahalme
  - Rename mpls_bos element of ovs_skb_cb as l2_size as it is set and used
    regardless of if an MPLS stack is present or not. Update the name of
    helper functions and documentation accordingly.
  - Ensure that skb_cb_mpls_bos() never returns NULL
* Correct endieness in eth_p_mpls()

v2.12
* Update skb and network header on MPLS extraction in ovs_flow_extract()
* Use NULL in skb_cb_mpls_bos()
* Add eth_p_mpls helper

v2.10 - v2.11
* No change

v2.9
* datapath: Always update the mpls bos if  vlan_pop is successful

  Regardless of the details of how a successful
  vlan_pop is achieved, the mpls bos needs to be updated.

  Without this fix it has been observed that the following
  results in malformed packets

v2.8
* No change

v2.7
* Rebase

v2.6
* As suggested by Yamahata-san
  - Do not guard against label == 0 for
    OVS_ACTION_ATTR_SET_MPLS in validate_actions().
    A label of 0 is valid
  - Remove comment stupulating that if
    the top_label element of struct sw_flow_key is 0 then
    there is no MPLS label. An MPLS label of 0 is valid
    and the correct check if ethertype is
    ntohs(ETH_TYPE_MPLS) or ntohs(ETH_TYPE_MPLS_MCAST)

v2.4 - v2.5
* No change

v2.3
* s/mpls_stack/mpls_bos/
  This is in keeping with the naming used in the OpenFlow 1.3 specification

v2.2
* Call skb_reset_mac_header() in skb_cb_set_mpls_stack()
  eth_hdr(skb) is non-NULL when called in skb_cb_set_mpls_stack().
* Add a call to skb_cb_set_mpls_stack() in ovs_packet_cmd_execute().
  I apologise that I have mislaid my notes on this but
  it avoids a kernel panic. I can investigate again if necessary.
* Use struct ovs_action_push_mpls instead of
  __be16 to decode OVS_ACTION_ATTR_PUSH_MPLS in validate_actions(). This is
  consistent with the data format for the attribute.
* Indentation fix in skb_cb_mpls_stack(). [cosmetic]

v2.1
* Manual rebase
---
 datapath/actions.c  |   79 +++++++++++++++++++++++++++++++++++++++++++++++++++
 datapath/datapath.c |   57 +++++++++++++++++++++++++++++++++++++
 datapath/datapath.h |    9 ++++++
 datapath/flow.c     |   31 ++++++++++++++++++++
 datapath/flow.h     |   13 +++++++++
 datapath/vport.c    |    2 ++
 6 files changed, 191 insertions(+)

diff --git a/datapath/actions.c b/datapath/actions.c
index f638ffc..60522be 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -49,6 +49,64 @@ static int make_writable(struct sk_buff *skb, int write_len)
 	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 }
 
+static __be16 get_ethertype(const struct sk_buff *skb)
+{
+	struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_bos(skb) - ETH_HLEN);
+	return hdr->h_proto;
+}
+
+static void set_ethertype(struct sk_buff *skb, const __be16 ethertype)
+{
+	struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_bos(skb) - ETH_HLEN);
+	hdr->h_proto = ethertype;
+}
+
+static int push_mpls(struct sk_buff *skb, const struct ovs_action_push_mpls *mpls)
+{
+	u32 l2_size;
+	__be32 *new_mpls_lse;
+
+	if (skb_cow_head(skb, MPLS_HLEN) < 0) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	l2_size = skb_cb_l2_size(skb);
+	skb_push(skb, MPLS_HLEN);
+	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), l2_size);
+	skb_reset_mac_header(skb);
+
+	new_mpls_lse = (__be32 *)(skb_mac_header(skb) + l2_size);
+	*new_mpls_lse = mpls->mpls_lse;
+
+	set_ethertype(skb, mpls->mpls_ethertype);
+	return 0;
+}
+
+static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
+{
+	__be16 current_ethertype = get_ethertype(skb);
+	if (eth_p_mpls(current_ethertype)) {
+		u32 l2_size = skb_cb_l2_size(skb);
+
+		memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), l2_size);
+
+		skb_pull(skb, MPLS_HLEN);
+		skb_reset_mac_header(skb);
+
+		set_ethertype(skb, *ethertype);
+	}
+	return 0;
+}
+
+static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
+{
+	__be16 current_ethertype = get_ethertype(skb);
+	if (eth_p_mpls(current_ethertype))
+		memcpy(skb_cb_mpls_bos(skb), mpls_lse, sizeof(__be32));
+	return 0;
+}
+
 /* remove VLAN header from packet and update csum accordingly. */
 static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 {
@@ -73,6 +131,9 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 	skb->mac_header += VLAN_HLEN;
 	skb_reset_mac_len(skb);
 
+	/* update pointer to MPLS label stack */
+	OVS_CB(skb)->l2_size -= VLAN_HLEN;
+
 	return 0;
 }
 
@@ -102,6 +163,7 @@ static int pop_vlan(struct sk_buff *skb)
 		return err;
 
 	__vlan_hwaccel_put_tag(skb, ntohs(tci));
+
 	return 0;
 }
 
@@ -116,6 +178,9 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
 		if (!__vlan_put_tag(skb, current_tag))
 			return -ENOMEM;
 
+		/* update pointer to MPLS label stack */
+		OVS_CB(skb)->l2_size += VLAN_HLEN;
+
 		if (get_ip_summed(skb) == OVS_CSUM_COMPLETE)
 			skb->csum = csum_add(skb->csum, csum_partial(skb->data
 					+ ETH_HLEN, VLAN_HLEN, 0));
@@ -478,6 +543,10 @@ static int execute_set_action(struct sk_buff *skb,
 	case OVS_KEY_ATTR_UDP:
 		err = set_udp(skb, nla_data(nested_attr));
 		break;
+
+	case OVS_KEY_ATTR_MPLS:
+		err = set_mpls(skb, nla_data(nested_attr));
+		break;
 	}
 
 	return err;
@@ -514,6 +583,16 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			output_userspace(dp, skb, a);
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_MPLS:
+			err = push_mpls(skb, nla_data(a));
+			if (unlikely(err)) /* skb already freed. */
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_POP_MPLS:
+			err = pop_mpls(skb, nla_data(a));
+			break;
+
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 			err = push_vlan(skb, nla_data(a));
 			if (unlikely(err)) /* skb already freed. */
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 04a5e7f..897024d 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -71,6 +71,45 @@ static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
 
 int ovs_net_id __read_mostly;
 
+int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
+EXPORT_SYMBOL(ovs_dp_ioctl_hook);
+
+void skb_cb_set_l2_size(struct sk_buff *skb)
+{
+	struct ethhdr *eth;
+	int nh_ofs;
+	__be16 dl_type = 0;
+
+	skb_reset_mac_header(skb);
+
+	eth = eth_hdr(skb);
+	nh_ofs = sizeof(struct ethhdr);
+	if (likely(eth->h_proto >= htons(ETH_TYPE_MIN))) {
+		dl_type = eth->h_proto;
+
+		while (dl_type == htons(ETH_P_8021Q) &&
+				skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
+			struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs);
+			dl_type = vh->h_vlan_encapsulated_proto;
+			nh_ofs += sizeof(struct vlan_hdr);
+		}
+
+		OVS_CB(skb)->l2_size = nh_ofs;
+	} else {
+		OVS_CB(skb)->l2_size = 0;
+	}
+}
+
+unsigned char *skb_cb_mpls_bos(const struct sk_buff *skb)
+{
+	return skb_mac_header(skb) + OVS_CB(skb)->l2_size;
+}
+
+ptrdiff_t skb_cb_l2_size(const struct sk_buff *skb)
+{
+	return OVS_CB(skb)->l2_size;
+}
+
 /**
  * DOC: Locking:
  *
@@ -667,6 +706,11 @@ static int validate_set(const struct nlattr *a,
 
 		return validate_tp_port(flow_key);
 
+	case OVS_KEY_ATTR_MPLS:
+		if (!eth_p_mpls(flow_key->eth.type))
+			return -EINVAL;
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -725,6 +769,8 @@ static int validate_and_copy_actions(const struct nlattr *attr,
 		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+			[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
+			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -755,6 +801,15 @@ static int validate_and_copy_actions(const struct nlattr *attr,
 				return -EINVAL;
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_MPLS: {
+			const struct ovs_action_push_mpls *mpls = nla_data(a);
+			if (!eth_p_mpls(mpls->mpls_ethertype))
+				return -EINVAL;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_POP_MPLS:
+			break;
 
 		case OVS_ACTION_ATTR_POP_VLAN:
 			break;
@@ -870,6 +925,8 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	packet->priority = flow->key.phy.priority;
 	skb_set_mark(packet, flow->key.phy.skb_mark);
 
+	skb_cb_set_l2_size(packet);
+
 	rcu_read_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	err = -ENODEV;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 2b93348..11c908e 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -95,6 +95,10 @@ struct datapath {
  * @flow: The flow associated with this packet.  May be %NULL if no flow.
  * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
  * packet is not being tunneled.
+ * @l2_size: Length of the packet's Ethernet header, including any VLAN headers.
+ * This is the offset from the beginning of the ethernet frame where MPLS
+ * stack would be, if one is present. It is 0 when there is no L2 header.
+ * ethernet frame.  It is 0 if no MPLS stack is present.
  * @ip_summed: Consistently stores L4 checksumming status across different
  * kernel versions.
  * @csum_start: Stores the offset from which to start checksumming independent
@@ -106,6 +110,7 @@ struct datapath {
 struct ovs_skb_cb {
 	struct sw_flow		*flow;
 	struct ovs_key_ipv4_tunnel  *tun_key;
+	ptrdiff_t		l2_size;
 #ifdef NEED_CSUM_NORMALIZE
 	enum csum_type		ip_summed;
 	u16			csum_start;
@@ -189,4 +194,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 portid, u32 seq,
 					 u8 cmd);
 
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+
+void skb_cb_set_l2_size(struct sk_buff *skb);
+unsigned char *skb_cb_mpls_bos(const struct sk_buff *skb);
+ptrdiff_t skb_cb_l2_size(const struct sk_buff *skb);
 #endif /* datapath.h */
diff --git a/datapath/flow.c b/datapath/flow.c
index fad9e19..27e1920 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -728,6 +728,17 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 			memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
 			key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
 		}
+	} else if (eth_p_mpls(key->eth.type)) {
+		error = check_header(skb, MPLS_HLEN);
+		if (unlikely(error))
+			goto out;
+
+		key_len = SW_FLOW_KEY_OFFSET(mpls.top_lse);
+		memcpy(&key->mpls.top_lse, skb_network_header(skb), MPLS_HLEN);
+
+		/* Update network header */
+		skb_set_network_header(skb, skb_network_header(skb) -
+				       skb->data + MPLS_HLEN);
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
 
@@ -838,6 +849,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
 	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
 	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+	[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
 	[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
 	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
 	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
@@ -1274,6 +1286,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		swkey->ip.proto = ntohs(arp_key->arp_op);
 		memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
 		memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+	} else if (eth_p_mpls(swkey->eth.type)) {
+		const struct ovs_key_mpls *mpls_key;
+
+		if (!(attrs & (1ULL << OVS_KEY_ATTR_MPLS)))
+			return -EINVAL;
+		attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
+
+		key_len = SW_FLOW_KEY_OFFSET(mpls.top_lse);
+		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+		swkey->mpls.top_lse = mpls_key->mpls_top_lse;
 	}
 
 	if (attrs)
@@ -1473,6 +1495,15 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		arp_key->arp_op = htons(swkey->ip.proto);
 		memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
 		memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+	} else if (eth_p_mpls(swkey->eth.type)) {
+		struct ovs_key_mpls *mpls_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+		if (!nla)
+			goto nla_put_failure;
+		mpls_key = nla_data(nla);
+		memset(mpls_key, 0, sizeof(struct ovs_key_mpls));
+		mpls_key->mpls_top_lse = swkey->mpls.top_lse;
 	}
 
 	if ((swkey->eth.type == htons(ETH_P_IP) ||
diff --git a/datapath/flow.h b/datapath/flow.h
index 6949640..d8e350c 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -73,6 +73,9 @@ struct sw_flow_key {
 		__be16 type;		/* Ethernet frame type. */
 	} eth;
 	struct {
+		__be32 top_lse;		/* top label stack entry */
+	} mpls;
+	struct {
 		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
 		u8     tos;		/* IP ToS. */
 		u8     ttl;		/* IP TTL/hop limit. */
@@ -143,6 +146,10 @@ struct arp_eth_header {
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __packed;
 
+#define ETH_TYPE_MIN 0x600
+
+#define MPLS_HLEN 4
+
 int ovs_flow_init(void);
 void ovs_flow_exit(void);
 
@@ -234,4 +241,10 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
 int ipv4_tun_to_nlattr(struct sk_buff *skb,
 			const struct ovs_key_ipv4_tunnel *tun_key);
 
+static inline bool eth_p_mpls(__be16 eth_type)
+{
+	return eth_type == htons(ETH_P_MPLS_UC) ||
+		eth_type == htons(ETH_P_MPLS_MC);
+}
+
 #endif /* flow.h */
diff --git a/datapath/vport.c b/datapath/vport.c
index 9c0942b..82aba8c 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -422,6 +422,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
 	if (!(vport->ops->flags & VPORT_F_TUN_ID))
 		OVS_CB(skb)->tun_key = NULL;
 
+	skb_cb_set_l2_size(skb);
+
 	ovs_dp_process_received_packet(vport, skb);
 }
 
-- 
1.7.10.4




More information about the dev mailing list