[ovs-dev] [PATCH 1/4] datapath: Add basic MPLS support to kernel

Simon Horman horms at verge.net.au
Tue Oct 9 07:08:32 UTC 2012


Allow datapath to recognize and extract MPLS labels into flow keys
and execute actions which push, pop, and set labels on packets.

Based heavily on work by Leo Alterman.

Cc: Leo Alterman <lalterman at nicira.com>
Signed-off-by: Simon Horman <horms at verge.net.au>

---

v2.2
* Call skb_reset_mac_header() in skb_cb_set_mpls_stack()
  eth_hdr(skb) is non-NULL when called in skb_cb_set_mpls_stack().
* Add a call to skb_cb_set_mpls_stack() in ovs_packet_cmd_execute().
  I apologise that I have mislaid my notes on this but
  it avoids a kernel panic. I can investigate again if necessary.
* Use struct ovs_action_push_mpls instead of
  __be16 to decode OVS_ACTION_ATTR_PUSH_MPLS in validate_actions(). This is
  consistent with the data format for the attribute.
* Indentation fix in skb_cb_mpls_stack(). [cosmetic]

v2.1
* Manual rebase

Previous version(s) by Leo Alterman
---
 datapath/actions.c          |   81 +++++++++++++++++++++++++++++++++++++++++++
 datapath/datapath.c         |   61 ++++++++++++++++++++++++++++++++
 datapath/datapath.h         |    8 +++++
 datapath/flow.c             |   30 ++++++++++++++++
 datapath/flow.h             |    7 ++++
 datapath/vport.c            |    2 ++
 include/linux/openvswitch.h |   32 +++++++++++++++++
 7 files changed, 221 insertions(+)

diff --git a/datapath/actions.c b/datapath/actions.c
index ec9b595..c1d1a7f 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -47,6 +47,67 @@ static int make_writable(struct sk_buff *skb, int write_len)
 	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 }
 
+static __be16 get_ethertype(const struct sk_buff *skb)
+{
+	struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_stack(skb) - ETH_HLEN);
+	return hdr->h_proto;
+}
+
+static void set_ethertype(struct sk_buff *skb, const __be16 ethertype)
+{
+	struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_stack(skb) - ETH_HLEN);
+	hdr->h_proto = ethertype;
+}
+
+static int push_mpls(struct sk_buff *skb, const struct ovs_action_push_mpls *mpls)
+{
+	u32 l2_size;
+	__be32 *new_mpls_label;
+
+	if (skb_cow_head(skb, MPLS_HLEN) < 0) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	l2_size = skb_cb_mpls_stack_offset(skb);
+	skb_push(skb, MPLS_HLEN);
+	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), l2_size);
+	skb_reset_mac_header(skb);
+
+	new_mpls_label = (__be32 *)(skb_mac_header(skb) + l2_size);
+	*new_mpls_label = mpls->mpls_label;
+
+	set_ethertype(skb, mpls->mpls_ethertype);
+	return 0;
+}
+
+static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
+{
+	__be16 current_ethertype = get_ethertype(skb);
+	if (current_ethertype == htons(ETH_P_MPLS_UC) ||
+		current_ethertype == htons(ETH_P_MPLS_MC)) {
+		u32 l2_size = skb_cb_mpls_stack_offset(skb);
+
+		memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), l2_size);
+
+		skb_pull(skb, MPLS_HLEN);
+		skb_reset_mac_header(skb);
+
+		set_ethertype(skb, *ethertype);
+	}
+	return 0;
+}
+
+static int set_mpls(struct sk_buff *skb, const __be32 *mpls_label)
+{
+	__be16 current_ethertype = get_ethertype(skb);
+	if (current_ethertype == htons(ETH_P_MPLS_UC) ||
+		current_ethertype == htons(ETH_P_MPLS_MC)) {
+		memcpy(skb_cb_mpls_stack(skb), mpls_label, sizeof(__be32));
+	}
+	return 0;
+}
+
 /* remove VLAN header from packet and update csum accordingly. */
 static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 {
@@ -100,6 +161,9 @@ static int pop_vlan(struct sk_buff *skb)
 		return err;
 
 	__vlan_hwaccel_put_tag(skb, ntohs(tci));
+
+	/* update pointer to MPLS label stack */
+	skb_cb_set_mpls_stack(skb);
 	return 0;
 }
 
@@ -120,6 +184,9 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
 
 	}
 	__vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+
+	/* update pointer to MPLS label stack */
+	skb_cb_set_mpls_stack(skb);
 	return 0;
 }
 
@@ -396,6 +463,20 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			output_userspace(dp, skb, a);
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_MPLS:
+			err = push_mpls(skb, nla_data(a));
+			if (unlikely(err)) /* skb already freed. */
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_POP_MPLS:
+			err = pop_mpls(skb, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_SET_MPLS:
+			err = set_mpls(skb, nla_data(a));
+			break;
+
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 			err = push_vlan(skb, nla_data(a));
 			if (unlikely(err)) /* skb already freed. */
diff --git a/datapath/datapath.c b/datapath/datapath.c
index a6915fb..8ff244c 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -74,6 +74,43 @@ int ovs_net_id __read_mostly;
 int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
 EXPORT_SYMBOL(ovs_dp_ioctl_hook);
 
+void skb_cb_set_mpls_stack(struct sk_buff *skb)
+{
+	struct ethhdr *eth;
+	int nh_ofs;
+	__be16 dl_type = 0;
+
+	skb_reset_mac_header(skb);
+
+	eth = eth_hdr(skb);
+	nh_ofs = sizeof(struct ethhdr);
+	if (likely(eth->h_proto >= htons(ETH_TYPE_MIN))) {
+		dl_type = eth->h_proto;
+
+		while (dl_type == htons(ETH_P_8021Q) &&
+				skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
+			struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs);
+			dl_type = vh->h_vlan_encapsulated_proto;
+			nh_ofs += sizeof(struct vlan_hdr);
+		}
+
+		OVS_CB(skb)->mpls_stack = nh_ofs;
+	} else {
+		OVS_CB(skb)->mpls_stack = 0;
+	}
+}
+
+unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb)
+{
+	return OVS_CB(skb)->mpls_stack ?
+		skb_mac_header(skb) + OVS_CB(skb)->mpls_stack : 0;
+}
+
+ptrdiff_t skb_cb_mpls_stack_offset(const struct sk_buff *skb)
+{
+	return OVS_CB(skb)->mpls_stack;
+}
+
 /**
  * DOC: Locking:
  *
@@ -663,12 +700,17 @@ static int validate_actions(const struct nlattr *attr,
 		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+			[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
+			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
+			[OVS_ACTION_ATTR_SET_MPLS] = sizeof(__be32),
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
 		};
 		const struct ovs_action_push_vlan *vlan;
+		__be16 mpls_ethertype;
+		__be32 mpls_label;
 		int type = nla_type(a);
 
 		if (type > OVS_ACTION_ATTR_MAX ||
@@ -691,6 +733,23 @@ static int validate_actions(const struct nlattr *attr,
 				return -EINVAL;
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_MPLS: {
+			const struct ovs_action_push_mpls *mpls = nla_data(a);
+			if (mpls->mpls_ethertype != htons(ETH_P_MPLS_UC) &&
+			    mpls->mpls_ethertype != htons(ETH_P_MPLS_MC))
+				return -EINVAL;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_POP_MPLS:
+			break;
+
+		case OVS_ACTION_ATTR_SET_MPLS:
+			mpls_label = nla_get_be32(a);
+			if (mpls_label == htonl(0)) {
+				return -EINVAL;
+			}
+			break;
 
 		case OVS_ACTION_ATTR_POP_VLAN:
 			break;
@@ -805,6 +864,8 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	OVS_CB(packet)->flow = flow;
 	packet->priority = flow->key.phy.priority;
 
+	skb_cb_set_mpls_stack(packet);
+
 	rcu_read_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	err = -ENODEV;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index affbf0e..1801ccd 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -97,6 +97,9 @@ struct datapath {
  * struct ovs_skb_cb - OVS data in skb CB
  * @flow: The flow associated with this packet.  May be %NULL if no flow.
  * @tun_id: ID of the tunnel that encapsulated this packet.  It is 0 if the
+ * packet is not being tunneled.
+ * @mpls_stack: Offset of the packet's MPLS stack from the beginning of the
+ * ethernet frame.  It is 0 if no MPLS stack is present.
  * @ip_summed: Consistently stores L4 checksumming status across different
  * kernel versions.
  * @csum_start: Stores the offset from which to start checksumming independent
@@ -108,6 +111,7 @@ struct datapath {
 struct ovs_skb_cb {
 	struct sw_flow		*flow;
 	__be64			tun_id;
+	ptrdiff_t	mpls_stack;
 #ifdef NEED_CSUM_NORMALIZE
 	enum csum_type		ip_summed;
 	u16			csum_start;
@@ -192,4 +196,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
 					 u8 cmd);
 
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+
+void skb_cb_set_mpls_stack(struct sk_buff *skb);
+unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb);
+ptrdiff_t skb_cb_mpls_stack_offset(const struct sk_buff *skb);
 #endif /* datapath.h */
diff --git a/datapath/flow.c b/datapath/flow.c
index d07337c..40df8ff 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -739,6 +739,14 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 				key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
 			}
 		}
+	} else if (key->eth.type == htons(ETH_P_MPLS_UC) ||
+				key->eth.type == htons(ETH_P_MPLS_MC)) {
+		error = check_header(skb, MPLS_HLEN);
+		if (unlikely(error))
+			goto out;
+
+		key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
+		memcpy(&key->mpls.top_label, skb_network_header(skb), MPLS_HLEN);
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
 
@@ -836,6 +844,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
 	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
 	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+	[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
 	[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
 	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
 	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
@@ -1141,6 +1150,17 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		swkey->ip.proto = ntohs(arp_key->arp_op);
 		memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
 		memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+	} else if (swkey->eth.type == htons(ETH_P_MPLS_UC) || 
+				swkey->eth.type == htons(ETH_P_MPLS_MC)) {
+		const struct ovs_key_mpls *mpls_key;
+
+		if (!(attrs & (1 << OVS_KEY_ATTR_MPLS)))
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
+
+		key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
+		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+		swkey->mpls.top_label = mpls_key->mpls_top_label;
 	}
 
 	if (attrs)
@@ -1284,6 +1304,16 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		arp_key->arp_op = htons(swkey->ip.proto);
 		memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
 		memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+	} else if (swkey->eth.type == htons(ETH_P_MPLS_UC) || 
+				swkey->eth.type == htons(ETH_P_MPLS_MC)) {
+		struct ovs_key_mpls *mpls_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+		if (!nla)
+			goto nla_put_failure;
+		mpls_key = nla_data(nla);
+		memset(mpls_key, 0, sizeof(struct ovs_key_mpls));
+		mpls_key->mpls_top_label = swkey->mpls.top_label;
 	}
 
 	if ((swkey->eth.type == htons(ETH_P_IP) ||
diff --git a/datapath/flow.h b/datapath/flow.h
index 02c563a..eb3fe49 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -53,6 +53,9 @@ struct sw_flow_key {
 		__be16 type;		/* Ethernet frame type. */
 	} eth;
 	struct {
+		__be32 top_label;	/* 0 if no MPLS, top label from stack otherwise */
+	} mpls;
+	struct {
 		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
 		u8     tos;		/* IP ToS. */
 		u8     ttl;		/* IP TTL/hop limit. */
@@ -126,6 +129,10 @@ struct arp_eth_header {
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __packed;
 
+#define ETH_TYPE_MIN 0x600
+
+#define MPLS_HLEN 4
+
 int ovs_flow_init(void);
 void ovs_flow_exit(void);
 
diff --git a/datapath/vport.c b/datapath/vport.c
index dc7adfa..0ae4801 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -464,6 +464,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
 	if (!(vport->ops->flags & VPORT_F_TUN_ID))
 		OVS_CB(skb)->tun_id = 0;
 
+	skb_cb_set_mpls_stack(skb);
+
 	ovs_dp_process_received_packet(vport, skb);
 }
 
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index f5c9cca..78960d1 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -278,6 +278,7 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
 	OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
 	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
+	OVS_KEY_ATTR_MPLS,      /* struct ovs_key_mpls */
 	OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
 	__OVS_KEY_ATTR_MAX
 };
@@ -307,6 +308,10 @@ struct ovs_key_ethernet {
 	__u8	 eth_dst[6];
 };
 
+struct ovs_key_mpls {
+	__be32 mpls_top_label;
+};
+
 struct ovs_key_ipv4 {
 	__be32 ipv4_src;
 	__be32 ipv4_dst;
@@ -437,6 +442,20 @@ enum ovs_userspace_attr {
 #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
 
 /**
+ * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
+ * @mpls_label: MPLS label to push.
+ * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
+ *
+ * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
+ * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Any other values would
+ * produce a corrupt packet.
+ */
+struct ovs_action_push_mpls {
+	__be32 mpls_label;
+	__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */ 
+};
+
+/**
  * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
  * @vlan_tpid: Tag protocol identifier (TPID) to push.
  * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
@@ -461,6 +480,16 @@ struct ovs_action_push_vlan {
  * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
  * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
  * value.
+ * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label onto the top of the packet
+ * MPLS stack. Set the ethertype of the encapsulating frame to either
+ * %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to indicate the new packet contents.
+ * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label off of the packet's MPLS stack.
+ * Set the encapsulating frame's ethertype to indicate the new packet contents
+ * (this could potentially still be %ETH_P_MPLS_* if there are remaining MPLS
+ * labels).  If there are no MPLS labels as determined by ethertype, no action
+ * is taken.
+ * @OVS_ACTION_ATTR_SET_MPLS: Set the value of the top-most label in the MPLS
+ * label stack.  If there are no MPLS labels in the packet, no action is taken.
  * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
  * packet.
  * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
@@ -477,6 +506,9 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_OUTPUT,	      /* u32 port number. */
 	OVS_ACTION_ATTR_USERSPACE,    /* Nested OVS_USERSPACE_ATTR_*. */
 	OVS_ACTION_ATTR_SET,          /* One nested OVS_KEY_ATTR_*. */
+	OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
+	OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
+	OVS_ACTION_ATTR_SET_MPLS,     /* __be32 MPLS label */
 	OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
 	OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
 	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
-- 
1.7.10.4




More information about the dev mailing list