[ovs-dev] [PATCH] Datapath: Change in openvswitch kernel module to support MPLS label depth of 3 in ingress direction.

Gregory Rose gvrose8192 at gmail.com
Fri Nov 22 16:32:53 UTC 2019


On 11/21/2019 10:07 PM, Martin Varghese wrote:
> From: Martin Varghese <martin.varghese at nokia.com>
>
> Upstream commit:
>      commit fbdcdd78da7c95f1b970d371e1b23cbd3aa990f3
>      Author: Martin Varghese <martin.varghese at nokia.com>
>      Date:   Mon Nov 4 07:27:44 2019 +0530
>
>      Change in Openvswitch to support MPLS label depth of 3 in ingress
>      direction
>
>      The openvswitch was supporting a MPLS label depth of 1 in the
>      ingress direction though the userspace OVS supports a max depth
>      of 3 labels.This change enables openvswitch module to support a
>      max depth of 3 labels in the ingress.
>
>      Signed-off-by: Martin Varghese <martin.varghese at nokia.com>
>      Acked-by: Pravin B Shelar <pshelar at ovn.org>
>      Signed-off-by: David S. Miller <davem at davemloft.net>
>
> Signed-off-by: Martin Varghese <martin.varghese at nokia.com>

Thanks Martin.

Tested-by: Greg Rose <gvrose8192 at gmail.com>
Reviewed-by: Greg Rose <gvrose8192 at gmail.com>



> ---
>   datapath/actions.c      |  2 +-
>   datapath/flow.c         | 20 ++++++++----
>   datapath/flow.h         |  8 +++--
>   datapath/flow_netlink.c | 85 ++++++++++++++++++++++++++++++++++++-------------
>   tests/system-traffic.at | 39 +++++++++++++++++++++++
>   5 files changed, 122 insertions(+), 32 deletions(-)
>
> diff --git a/datapath/actions.c b/datapath/actions.c
> index a44e804..fbf4457 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -276,7 +276,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
>   	}
>   
>   	stack->label_stack_entry = lse;
> -	flow_key->mpls.top_lse = lse;
> +	flow_key->mpls.lse[0] = lse;
>   	return 0;
>   }
>   
> diff --git a/datapath/flow.c b/datapath/flow.c
> index 916f7f4..6dc7402 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -659,27 +659,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
>   			memset(&key->ipv4, 0, sizeof(key->ipv4));
>   		}
>   	} else if (eth_p_mpls(key->eth.type)) {
> -		size_t stack_len = MPLS_HLEN;
> +		u8 label_count = 1;
>   
> +		memset(&key->mpls, 0, sizeof(key->mpls));
>   		skb_set_inner_network_header(skb, skb->mac_len);
>   		while (1) {
>   			__be32 lse;
>   
> -			error = check_header(skb, skb->mac_len + stack_len);
> +			error = check_header(skb, skb->mac_len +
> +					     label_count * MPLS_HLEN);
>   			if (unlikely(error))
>   				return 0;
>   
>   			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
>   
> -			if (stack_len == MPLS_HLEN)
> -				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
> +			if (label_count <= MPLS_LABEL_DEPTH)
> +				memcpy(&key->mpls.lse[label_count - 1], &lse,
> +				       MPLS_HLEN);
>   
> -			skb_set_inner_network_header(skb, skb->mac_len + stack_len);
> +			skb_set_inner_network_header(skb, skb->mac_len +
> +						     label_count * MPLS_HLEN);
>   			if (lse & htonl(MPLS_LS_S_MASK))
>   				break;
>   
> -			stack_len += MPLS_HLEN;
> +			label_count++;
>   		}
> +		if (label_count > MPLS_LABEL_DEPTH)
> +			label_count = MPLS_LABEL_DEPTH;
> +
> +		key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
>   	} else if (key->eth.type == htons(ETH_P_IPV6)) {
>   		int nh_len;             /* IPv6 Header + Extensions */
>   
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 5560300..4ad5363 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -43,6 +43,7 @@ enum sw_flow_mac_proto {
>   	MAC_PROTO_ETHERNET,
>   };
>   #define SW_FLOW_KEY_INVALID	0x80
> +#define MPLS_LABEL_DEPTH       3
>   
>   /* Store options at the end of the array if they are less than the
>    * maximum size. This allows us to get the benefits of variable length
> @@ -98,9 +99,6 @@ struct sw_flow_key {
>   					 */
>   	union {
>   		struct {
> -			__be32 top_lse;	/* top label stack entry */
> -		} mpls;
> -		struct {
>   			u8     proto;	/* IP protocol or lower 8 bits of ARP opcode. */
>   			u8     tos;	    /* IP ToS. */
>   			u8     ttl;	    /* IP TTL/hop limit. */
> @@ -148,6 +146,10 @@ struct sw_flow_key {
>   				} nd;
>   			};
>   		} ipv6;
> +		struct {
> +			u32 num_labels_mask;    /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
> +			__be32 lse[MPLS_LABEL_DEPTH];     /* label stack entry  */
> +		} mpls;
>   		struct ovs_key_nsh nsh;         /* network service header */
>   	};
>   	struct {
> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
> index 35f13d7..9fc1a19 100644
> --- a/datapath/flow_netlink.c
> +++ b/datapath/flow_netlink.c
> @@ -438,7 +438,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
>   	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
>   	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
>   				     .next = ovs_tunnel_key_lens, },
> -	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) },
> +	[OVS_KEY_ATTR_MPLS]	 = { .len = OVS_ATTR_VARIABLE },
>   	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u32) },
>   	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },
>   	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) },
> @@ -1619,10 +1619,27 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
>   
>   	if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) {
>   		const struct ovs_key_mpls *mpls_key;
> +		u32 hdr_len;
> +		u32 label_count, label_count_mask, i;
> +
>   
>   		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> -		SW_FLOW_KEY_PUT(match, mpls.top_lse,
> -				mpls_key->mpls_lse, is_mask);
> +		hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> +		label_count = hdr_len / sizeof(struct ovs_key_mpls);
> +
> +		if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> +		    hdr_len % sizeof(struct ovs_key_mpls))
> +			return -EINVAL;
> +
> +		label_count_mask =  GENMASK(label_count - 1, 0);
> +
> +		for (i = 0 ; i < label_count; i++)
> +			SW_FLOW_KEY_PUT(match, mpls.lse[i],
> +					mpls_key[i].mpls_lse, is_mask);
> +
> +		SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> +				label_count_mask, is_mask);
> +
>   
>   		attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
>   	}
> @@ -2104,13 +2121,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
>   		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
>   		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
>   	} else if (eth_p_mpls(swkey->eth.type)) {
> +		u8 num_labels, i;
>   		struct ovs_key_mpls *mpls_key;
>   
> -		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> +		num_labels = hweight_long(output->mpls.num_labels_mask);
> +		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> +				  num_labels * sizeof(*mpls_key));
>   		if (!nla)
>   			goto nla_put_failure;
> +
>   		mpls_key = nla_data(nla);
> -		mpls_key->mpls_lse = output->mpls.top_lse;
> +		for (i = 0; i < num_labels; i++)
> +			mpls_key[i].mpls_lse = output->mpls.lse[i];
>   	}
>   
>   	if ((swkey->eth.type == htons(ETH_P_IP) ||
> @@ -2400,13 +2422,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
>   static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>   				  const struct sw_flow_key *key,
>   				  struct sw_flow_actions **sfa,
> -				  __be16 eth_type, __be16 vlan_tci, bool log);
> +				  __be16 eth_type, __be16 vlan_tci,
> +				  u32 mpls_label_count, bool log);
>   
>   static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>   				    const struct sw_flow_key *key,
>   				    struct sw_flow_actions **sfa,
>   				    __be16 eth_type, __be16 vlan_tci,
> -				    bool log, bool last)
> +				    u32 mpls_label_count, bool log, bool last)
>   {
>   	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>   	const struct nlattr *probability, *actions;
> @@ -2457,7 +2480,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>   		return err;
>   
>   	err = __ovs_nla_copy_actions(net, actions, key, sfa,
> -				     eth_type, vlan_tci, log);
> +				     eth_type, vlan_tci, mpls_label_count, log);
>   
>   	if (err)
>   		return err;
> @@ -2472,7 +2495,7 @@ static int validate_and_copy_clone(struct net *net,
>   				   const struct sw_flow_key *key,
>   				   struct sw_flow_actions **sfa,
>   				   __be16 eth_type, __be16 vlan_tci,
> -				   bool log, bool last)
> +				   u32 mpls_label_count, bool log, bool last)
>   {
>   	int start, err;
>   	u32 exec;
> @@ -2492,7 +2515,7 @@ static int validate_and_copy_clone(struct net *net,
>   		return err;
>   
>   	err = __ovs_nla_copy_actions(net, attr, key, sfa,
> -				     eth_type, vlan_tci, log);
> +				     eth_type, vlan_tci, mpls_label_count, log);
>   	if (err)
>   		return err;
>   
> @@ -2859,6 +2882,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
>   					   const struct sw_flow_key *key,
>   					   struct sw_flow_actions **sfa,
>   					   __be16 eth_type, __be16 vlan_tci,
> +					   u32 mpls_label_count,
>   					   bool log, bool last)
>   {
>   	const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
> @@ -2906,7 +2930,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
>   		return nested_acts_start;
>   
>   	err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
> -				     eth_type, vlan_tci, log);
> +				     eth_type, vlan_tci, mpls_label_count, log);
>   
>   	if (err)
>   		return err;
> @@ -2919,7 +2943,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
>   		return nested_acts_start;
>   
>   	err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
> -				     eth_type, vlan_tci, log);
> +				     eth_type, vlan_tci, mpls_label_count, log);
>   
>   	if (err)
>   		return err;
> @@ -2946,7 +2970,8 @@ static int copy_action(const struct nlattr *from,
>   static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>   				  const struct sw_flow_key *key,
>   				  struct sw_flow_actions **sfa,
> -				  __be16 eth_type, __be16 vlan_tci, bool log)
> +				  __be16 eth_type, __be16 vlan_tci,
> +				  u32 mpls_label_count, bool log)
>   {
>   	u8 mac_proto = ovs_key_mac_proto(key);
>   	const struct nlattr *a;
> @@ -3059,26 +3084,35 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>   			     !eth_p_mpls(eth_type)))
>   				return -EINVAL;
>   			eth_type = mpls->mpls_ethertype;
> +			mpls_label_count++;
>   			break;
>   		}
>   
> -		case OVS_ACTION_ATTR_POP_MPLS:
> +		case OVS_ACTION_ATTR_POP_MPLS: {
> +			__be16  proto;
>   			if (vlan_tci & htons(VLAN_CFI_MASK) ||
>   			    !eth_p_mpls(eth_type))
>   				return -EINVAL;
>   
> -			/* Disallow subsequent L2.5+ set and mpls_pop actions
> -			 * as there is no check here to ensure that the new
> -			 * eth_type is valid and thus set actions could
> -			 * write off the end of the packet or otherwise
> -			 * corrupt it.
> +			/* Disallow subsequent L2.5+ set actions and mpls_pop
> +			 * actions once the last MPLS label in the packet is
> +			 * popped as there is no check here to ensure that
> +			 * the new eth type is valid and thus set actions could
> +			 * write off the end of the packet or otherwise corrupt
> +			 * it.
>   			 *
>   			 * Support for these actions is planned using packet
>   			 * recirculation.
>   			 */
> -			eth_type = htons(0);
> -			break;
> +			proto = nla_get_be16(a);
> +			mpls_label_count--;
>   
> +			if (!eth_p_mpls(proto) || !mpls_label_count)
> +				eth_type = htons(0);
> +			else
> +				eth_type =  proto;
> +			break;
> +		}
>   		case OVS_ACTION_ATTR_SET:
>   			err = validate_set(a, key, sfa,
>   					   &skip_copy, mac_proto, eth_type,
> @@ -3100,6 +3134,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>   
>   			err = validate_and_copy_sample(net, a, key, sfa,
>   						       eth_type, vlan_tci,
> +						       mpls_label_count,
>   						       log, last);
>   			if (err)
>   				return err;
> @@ -3170,6 +3205,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>   
>   			err = validate_and_copy_clone(net, a, key, sfa,
>   						      eth_type, vlan_tci,
> +						      mpls_label_count,
>   						      log, last);
>   			if (err)
>   				return err;
> @@ -3183,6 +3219,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>                           err = validate_and_copy_check_pkt_len(net, a, key, sfa,
>                                                                 eth_type,
>                                                                 vlan_tci, log,
> +							      mpls_label_count,
>                                                                 last);
>                           if (err)
>                                   return err;
> @@ -3213,14 +3250,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>   			 struct sw_flow_actions **sfa, bool log)
>   {
>   	int err;
> +	u32 mpls_label_count = 0;
>   
>   	*sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
>   	if (IS_ERR(*sfa))
>   		return PTR_ERR(*sfa);
>   
> +	if (eth_p_mpls(key->eth.type))
> +		mpls_label_count = hweight_long(key->mpls.num_labels_mask);
> +
>   	(*sfa)->orig_len = nla_len(attr);
>   	err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
> -				     key->eth.vlan.tci, log);
> +				     key->eth.vlan.tci, mpls_label_count, log);
>   	if (err)
>   		ovs_nla_free_flow_actions(*sfa);
>   
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 870a05e..cde7429 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -992,6 +992,45 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0],
>   
>   OVS_TRAFFIC_VSWITCHD_STOP
>   AT_CLEANUP
> +
> +AT_SETUP([datapath - multiple mpls label pop])
> +OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24")
> +
> +AT_CHECK([ip link add patch0 type veth peer name patch1])
> +on_exit 'ip link del patch0'
> +
> +AT_CHECK([ip link set dev patch0 up])
> +AT_CHECK([ip link set dev patch1 up])
> +AT_CHECK([ovs-vsctl add-port br0 patch0])
> +AT_CHECK([ovs-vsctl add-port br1 patch1])
> +
> +AT_DATA([flows.txt], [dnl
> +table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,push_mpls:0x8847,set_mpls_label:2,push_mpls:0x8847,set_mpls_label:1,resubmit(,3)
> +table=0,priority=100,dl_type=0x8847,mpls_label=1 actions=pop_mpls:0x8847,resubmit(,1)
> +table=1,priority=100,dl_type=0x8847,mpls_label=2 actions=pop_mpls:0x8847,resubmit(,2)
> +table=2,priority=100,dl_type=0x8847,mpls_label=3 actions=pop_mpls:0x0800,resubmit(,3)
> +table=0,priority=10 actions=resubmit(,3)
> +table=3,priority=10 actions=normal
> +])
> +
> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> +AT_CHECK([ovs-ofctl add-flows br1 flows.txt])
> +
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
>   AT_SETUP([datapath - basic truncate action])
>   AT_SKIP_IF([test $HAVE_NC = no])
>   OVS_TRAFFIC_VSWITCHD_START()



More information about the dev mailing list