[ovs-dev] [PATCH v5 11/13] datapath: Allow masks for set actions.

Jarno Rajahalme jrajahalme at nicira.com
Fri Sep 5 23:05:18 UTC 2014


Masked set action allows more megaflow wildcarding.  Masked set action
is now supported for all writeable key types, except for the tunnel
key.

The set tunnel action is an exception as any input tunnel info is
cleared before action processing starts, so there is no tunnel info to
mask.

The kernel module converts all (non-tunnel) set actions to masked set
actions.  This makes action processing more uniform, and results in
less branching and duplicating the action processing code.  When
returning actions to userspace, the fully masked set actions are
converted to normal set actions.  To make this mapping consistent, we
reject fully masked set actions at flow set-up time.

Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
---
v5: Fixed issues spotted by Jesse.

 datapath/actions.c      |  333 ++++++++++++++++++++++++++++++-----------------
 datapath/flow_netlink.c |  161 ++++++++++++++++++++---
 2 files changed, 357 insertions(+), 137 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 43ca2a0..243b672 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -225,9 +225,15 @@ static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
 	return 0;
 }
 
-static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define SET_MASKED(OLD, KEY, MASK) (OLD) = MASKED(OLD, KEY, MASK)
+
+static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse,
+		    const __be32 *mask)
 {
 	__be32 *stack = (__be32 *)mac_header_end(skb);
+	__be32 lse = MASKED(*stack, *mpls_lse, *mask);
 	int err;
 
 	err = make_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -235,13 +241,13 @@ static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
 		return err;
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__be32 diff[] = { ~(*stack), *mpls_lse };
+		__be32 diff[] = { ~(*stack), lse };
 		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
 					  ~skb->csum);
 	}
 
-	*stack = *mpls_lse;
-	flow_key_set_mpls_top_lse(skb, *stack);
+	*stack = lse;
+	flow_key_set_mpls_top_lse(skb, lse);
 	return 0;
 }
 
@@ -331,8 +337,21 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
 	return 0;
 }
 
+/* 'src' is already properly masked. */
+static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
+{
+	u16 *dst = (u16 *)dst_;
+	const u16 *src = (const u16 *)src_;
+	const u16 *mask = (const u16 *)mask_;
+
+	SET_MASKED(dst[0], src[0], mask[0]);
+	SET_MASKED(dst[1], src[1], mask[1]);
+	SET_MASKED(dst[2], src[2], mask[2]);
+}
+
 static int set_eth_addr(struct sk_buff *skb,
-			const struct ovs_key_ethernet *eth_key)
+			const struct ovs_key_ethernet *key,
+			const struct ovs_key_ethernet *mask)
 {
 	int err;
 	err = make_writable(skb, ETH_HLEN);
@@ -341,13 +360,15 @@ static int set_eth_addr(struct sk_buff *skb,
 
 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
-	ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
+	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
+			       mask->eth_src);
+	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+			       mask->eth_dst);
 
 	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	flow_key_set_eth_src(skb, eth_key->eth_src);
-	flow_key_set_eth_dst(skb, eth_key->eth_dst);
+	flow_key_set_eth_src(skb, eth_hdr(skb)->h_source);
+	flow_key_set_eth_dst(skb, eth_hdr(skb)->h_dest);
 	return 0;
 }
 
@@ -405,6 +426,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
 	}
 }
 
+static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
+			   const __be32 mask[4], __be32 masked[4])
+{
+	masked[0] = MASKED(old[0], addr[0], mask[0]);
+	masked[1] = MASKED(old[1], addr[1], mask[1]);
+	masked[2] = MASKED(old[2], addr[2], mask[2]);
+	masked[3] = MASKED(old[3], addr[3], mask[3]);
+}
+
 static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 			  __be32 addr[4], const __be32 new_addr[4],
 			  bool recalculate_csum)
@@ -416,28 +446,28 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 	memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
 {
-	nh->priority = tc >> 4;
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+	/* Bits 21-24 are always unmasked, so this retains their values. */
+	SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+	SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+	SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
+		       u8 mask)
 {
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
-	nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
-	nh->flow_lbl[2] = fl & 0x000000FF;
-}
+	new_ttl = MASKED(nh->ttl, new_ttl, mask);
 
-static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
-{
 	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
 	nh->ttl = new_ttl;
 }
 
-static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *key,
+		    const struct ovs_key_ipv4 *mask)
 {
 	struct iphdr *nh;
+	__be32 new_addr;
 	int err;
 
 	err = make_writable(skb, skb_network_offset(skb) +
@@ -447,35 +477,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
 
 	nh = ip_hdr(skb);
 
-	if (ipv4_key->ipv4_src != nh->saddr) {
-		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
-		flow_key_set_ipv4_src(skb, ipv4_key->ipv4_src);
-	}
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed. */
+	if (mask->ipv4_src) {
+		new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
 
-	if (ipv4_key->ipv4_dst != nh->daddr) {
-		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
-		flow_key_set_ipv4_dst(skb, ipv4_key->ipv4_dst);
+		if (unlikely(new_addr != nh->saddr)) {
+			set_ip_addr(skb, nh, &nh->saddr, new_addr);
+			flow_key_set_ipv4_src(skb, new_addr);
+		}
 	}
+	if (mask->ipv4_dst) {
+		new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
 
-	if (ipv4_key->ipv4_tos != nh->tos) {
-		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+		if (unlikely(new_addr != nh->daddr)) {
+			set_ip_addr(skb, nh, &nh->daddr, new_addr);
+			flow_key_set_ipv4_dst(skb, new_addr);
+		}
+	}
+	if (mask->ipv4_tos) {
+		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
 		flow_key_set_ip_tos(skb, nh->tos);
 	}
-
-	if (ipv4_key->ipv4_ttl != nh->ttl) {
-		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
-		flow_key_set_ip_ttl(skb, ipv4_key->ipv4_ttl);
+	if (mask->ipv4_ttl) {
+		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
+		flow_key_set_ip_ttl(skb, nh->ttl);
 	}
 
 	return 0;
 }
 
-static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+static bool is_ipv6_mask_nonzero(const __be32 addr[4])
+{
+	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
+}
+
+static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *key,
+		    const struct ovs_key_ipv6 *mask)
 {
 	struct ipv6hdr *nh;
 	int err;
-	__be32 *saddr;
-	__be32 *daddr;
 
 	err = make_writable(skb, skb_network_offset(skb) +
 			    sizeof(struct ipv6hdr));
@@ -483,38 +525,56 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
 		return err;
 
 	nh = ipv6_hdr(skb);
-	saddr = (__be32 *)&nh->saddr;
-	daddr = (__be32 *)&nh->daddr;
 
-	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
-			      ipv6_key->ipv6_src, true);
-		flow_key_set_ipv6_src(skb, ipv6_key->ipv6_src);
-	}
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed. */
+	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
+		__be32 *saddr = (__be32 *)&nh->saddr;
+		__be32 masked[4];
 
-	if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
+
+		if (unlikely(memcmp(saddr, masked, sizeof masked))) {
+			set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+				      true);
+			flow_key_set_ipv6_src(skb, masked);
+		}
+	}
+	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
 		unsigned int offset = 0;
 		int flags = OVS_IP6T_FH_F_SKIP_RH;
 		bool recalc_csum = true;
+		__be32 *daddr = (__be32 *)&nh->daddr;
+		__be32 masked[4];
 
-		if (ipv6_ext_hdr(nh->nexthdr))
-			recalc_csum = ipv6_find_hdr(skb, &offset,
-						    NEXTHDR_ROUTING, NULL,
-						    &flags) != NEXTHDR_ROUTING;
-
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
-			      ipv6_key->ipv6_dst, recalc_csum);
-		flow_key_set_ipv6_dst(skb, ipv6_key->ipv6_dst);
-	}
-
-	set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
-	flow_key_set_ip_tos(skb, ipv6_get_dsfield(nh));
+		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
 
-	set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
-	flow_key_set_ipv6_fl(skb, nh);
+		if (unlikely(memcmp(daddr, masked, sizeof masked))) {
+			if (ipv6_ext_hdr(nh->nexthdr))
+				recalc_csum = (ipv6_find_hdr(skb, &offset,
+							     NEXTHDR_ROUTING,
+							     NULL, &flags)
+					       != NEXTHDR_ROUTING);
 
-	nh->hop_limit = ipv6_key->ipv6_hlimit;
-	flow_key_set_ip_ttl(skb, ipv6_key->ipv6_hlimit);
+			set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+				      recalc_csum);
+			flow_key_set_ipv6_dst(skb, masked);
+		}
+	}
+	if (mask->ipv6_tclass) {
+		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+		flow_key_set_ip_tos(skb, ipv6_get_dsfield(nh));
+	}
+	if (mask->ipv6_label) {
+		set_ipv6_fl(nh, ntohl(key->ipv6_label),
+			    ntohl(mask->ipv6_label));
+		flow_key_set_ipv6_fl(skb, nh);
+	}
+	if (mask->ipv6_hlimit) {
+		SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
+		flow_key_set_ip_ttl(skb, nh->hop_limit);
+	}
 	return 0;
 }
 
@@ -524,27 +584,13 @@ static void set_tp_port(struct sk_buff *skb, __be16 *port,
 {
 	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
 	*port = new_port;
-	skb_clear_hash(skb);
-}
-
-static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
-{
-	struct udphdr *uh = udp_hdr(skb);
-
-	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
-		set_tp_port(skb, port, new_port, &uh->check);
-
-		if (!uh->check)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		*port = new_port;
-		skb_clear_hash(skb);
-	}
 }
 
-static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *key,
+		   const struct ovs_key_udp *mask)
 {
 	struct udphdr *uh;
+	__be16 src, dst;
 	int err;
 
 	err = make_writable(skb, skb_transport_offset(skb) +
@@ -553,22 +599,39 @@ static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
 		return err;
 
 	uh = udp_hdr(skb);
-	if (udp_port_key->udp_src != uh->source) {
-		set_udp_port(skb, &uh->source, udp_port_key->udp_src);
-		flow_key_set_tp_src(skb, udp_port_key->udp_src);
-	}
+        /* Either of the masks is non-zero, so do not bother checking them. */
+	src = MASKED(uh->source, key->udp_src, mask->udp_src);
+	dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
 
-	if (udp_port_key->udp_dst != uh->dest) {
-		set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
-		flow_key_set_tp_dst(skb, udp_port_key->udp_dst);
+	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (likely(src != uh->source)) {
+			set_tp_port(skb, &uh->source, src, &uh->check);
+			flow_key_set_tp_src(skb, src);
+		}
+		if (likely(dst != uh->dest)) {
+			set_tp_port(skb, &uh->dest, dst, &uh->check);
+			flow_key_set_tp_dst(skb, dst);
+		}
+
+		if (unlikely(!uh->check))
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		uh->source = src;
+		uh->dest = dst;
+		flow_key_set_tp_src(skb, src);
+		flow_key_set_tp_dst(skb, dst);
 	}
 
+	skb_clear_hash(skb);
+
 	return 0;
 }
 
-static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *key,
+		   const struct ovs_key_tcp *mask)
 {
 	struct tcphdr *th;
+	__be16 src, dst;
 	int err;
 
 	err = make_writable(skb, skb_transport_offset(skb) +
@@ -577,50 +640,50 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
 		return err;
 
 	th = tcp_hdr(skb);
-	if (tcp_port_key->tcp_src != th->source) {
-		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
-		flow_key_set_tp_src(skb, tcp_port_key->tcp_src);
-	}
 
-	if (tcp_port_key->tcp_dst != th->dest) {
-		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
-		flow_key_set_tp_dst(skb, tcp_port_key->tcp_dst);
+	src = MASKED(th->source, key->tcp_src, mask->tcp_src);
+	if (likely(src != th->source)) {
+		set_tp_port(skb, &th->source, src, &th->check);
+		flow_key_set_tp_src(skb, src);
 	}
+	dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+	if (likely(dst != th->dest)) {
+		set_tp_port(skb, &th->dest, dst, &th->check);
+		flow_key_set_tp_dst(skb, dst);
+	}
+	skb_clear_hash(skb);
 
 	return 0;
 }
 
-static int set_sctp(struct sk_buff *skb,
-		    const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, const struct ovs_key_sctp *key,
+		    const struct ovs_key_sctp *mask)
 {
+	unsigned int sctphoff = skb_transport_offset(skb);
 	struct sctphdr *sh;
+	__le32 old_correct_csum, new_csum, old_csum;
 	int err;
-	unsigned int sctphoff = skb_transport_offset(skb);
 
 	err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
 	if (unlikely(err))
 		return err;
 
 	sh = sctp_hdr(skb);
-	if (sctp_port_key->sctp_src != sh->source ||
-	    sctp_port_key->sctp_dst != sh->dest) {
-		__le32 old_correct_csum, new_csum, old_csum;
 
-		old_csum = sh->checksum;
-		old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+	old_csum = sh->checksum;
+	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
 
-		sh->source = sctp_port_key->sctp_src;
-		sh->dest = sctp_port_key->sctp_dst;
+	sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
+	sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
 
-		new_csum = sctp_compute_cksum(skb, sctphoff);
+	new_csum = sctp_compute_cksum(skb, sctphoff);
 
-		/* Carry any checksum errors through. */
-		sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+	/* Carry any checksum errors through. */
+	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
 
-		skb_clear_hash(skb);
-		flow_key_set_tp_src(skb, sctp_port_key->sctp_src);
-		flow_key_set_tp_dst(skb, sctp_port_key->sctp_dst);
-	}
+	skb_clear_hash(skb);
+	flow_key_set_tp_src(skb, sh->source);
+	flow_key_set_tp_dst(skb, sh->dest);
 
 	return 0;
 }
@@ -749,52 +812,74 @@ static void execute_hash(struct sk_buff *skb, const struct nlattr *attr)
 	key->ovs_flow_hash = hash;
 }
 
-static int execute_set_action(struct sk_buff *skb,
-				 const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb, const struct nlattr *a)
+{
+	/* Only tunnel set execution is supported without a mask. */
+	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
+		OVS_CB(skb)->egress_tun_info = nla_data(a);
+		return 0;
+	}
+
+	return -EINVAL;
+
+}
+
+/* Mask is at the midpoint of the data. */
+#define get_mask(a, type) ((const type *)nla_data(a) + 1)
+
+static int execute_masked_set_action(struct sk_buff *skb,
+				     const struct nlattr *a)
 {
 	int err = 0;
 
-	switch (nla_type(nested_attr)) {
+	switch (nla_type(a)) {
 	case OVS_KEY_ATTR_PRIORITY:
-		skb->priority = nla_get_u32(nested_attr);
+		SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32));
 		flow_key_set_priority(skb, skb->priority);
 		break;
 
 	case OVS_KEY_ATTR_SKB_MARK:
-		skb->mark = nla_get_u32(nested_attr);
+		SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32));
 		flow_key_set_skb_mark(skb, skb->mark);
 		break;
 
 	case OVS_KEY_ATTR_TUNNEL_INFO:
-		OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
+		/* Masked data not supported for tunnel. */
+		err = -EINVAL;
 		break;
 
 	case OVS_KEY_ATTR_ETHERNET:
-		err = set_eth_addr(skb, nla_data(nested_attr));
+		err = set_eth_addr(skb, nla_data(a),
+				   get_mask(a, struct ovs_key_ethernet));
 		break;
 
 	case OVS_KEY_ATTR_IPV4:
-		err = set_ipv4(skb, nla_data(nested_attr));
+		err = set_ipv4(skb, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv4));
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
-		err = set_ipv6(skb, nla_data(nested_attr));
+		err = set_ipv6(skb, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv6));
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		err = set_tcp(skb, nla_data(nested_attr));
+		err = set_tcp(skb, nla_data(a),
+			      get_mask(a, struct ovs_key_tcp));
 		break;
 
 	case OVS_KEY_ATTR_UDP:
-		err = set_udp(skb, nla_data(nested_attr));
+		err = set_udp(skb, nla_data(a),
+			      get_mask(a, struct ovs_key_udp));
 		break;
 
 	case OVS_KEY_ATTR_SCTP:
-		err = set_sctp(skb, nla_data(nested_attr));
+		err = set_sctp(skb, nla_data(a),
+			       get_mask(a, struct ovs_key_sctp));
 		break;
 
 	case OVS_KEY_ATTR_MPLS:
-		err = set_mpls(skb, nla_data(nested_attr));
+		err = set_mpls(skb, nla_data(a), get_mask(a, __be32));
 		break;
 	}
 
@@ -905,6 +990,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			err = execute_set_action(skb, nla_data(a));
 			break;
 
+		case OVS_ACTION_ATTR_SET_MASKED:
+			err = execute_masked_set_action(skb, nla_data(a));
+			break;
+
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample(dp, skb, a);
 			break;
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 69d1919..ec32a00 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -325,6 +325,20 @@ static bool is_all_zero(const u8 *fp, size_t size)
 	return true;
 }
 
+static bool is_all_ones(const u8 *fp, size_t size)
+{
+	int i;
+
+	if (!fp)
+		return false;
+
+	for (i = 0; i < size; i++)
+		if (~fp[i])
+			return false;
+
+	return true;
+}
+
 static int __parse_flow_nlattrs(const struct nlattr *attr,
 				const struct nlattr *a[],
 				u64 *attrsp, bool nz)
@@ -1592,23 +1606,43 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	return err;
 }
 
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding. */
+static bool validate_masked(u8 *data, int len)
+{
+	u8 *mask = data + len;
+
+	while (len--)
+		if (*data++ & ~*mask++)
+			return false;
+	return true;
+}
+
 static int validate_set(const struct nlattr *a,
 			const struct sw_flow_key *flow_key,
 			struct sw_flow_actions **sfa,
-			bool *set_tun, __be16 eth_type)
+			bool *skip_copy, __be16 eth_type, bool masked)
 {
 	const struct nlattr *ovs_key = nla_data(a);
 	int key_type = nla_type(ovs_key);
+	size_t key_len;
 
 	/* There can be only one key in a action */
 	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 		return -EINVAL;
 
+	key_len = nla_len(ovs_key);
+	if (masked)
+		key_len /= 2;
+
 	if (key_type > OVS_KEY_ATTR_MAX ||
-	    (ovs_key_lens[key_type] != nla_len(ovs_key) &&
+	    (ovs_key_lens[key_type] != key_len &&
 	     ovs_key_lens[key_type] != -1))
 		return -EINVAL;
 
+	if (masked && !validate_masked(nla_data(ovs_key), key_len))
+		return -EINVAL;
+
 	switch (key_type) {
 	const struct ovs_key_ipv4 *ipv4_key;
 	const struct ovs_key_ipv6 *ipv6_key;
@@ -1620,7 +1654,9 @@ static int validate_set(const struct nlattr *a,
 		break;
 
 	case OVS_KEY_ATTR_TUNNEL:
-		*set_tun = true;
+		if (masked)
+			return -EINVAL; /* Masked tunnel set not supported. */
+		*skip_copy = true;
 		err = validate_and_copy_set_tun(a, sfa);
 		if (err)
 			return err;
@@ -1634,12 +1670,20 @@ static int validate_set(const struct nlattr *a,
 			return -EINVAL;
 
 		ipv4_key = nla_data(ovs_key);
-		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
-			return -EINVAL;
 
-		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
-			return -EINVAL;
+		if (masked) {
+			const struct ovs_key_ipv4 *mask = ipv4_key + 1;
+
+			/* Non-writeable fields. */
+			if (mask->ipv4_proto || mask->ipv4_frag)
+				return -EINVAL;
+		} else {
+			if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+				return -EINVAL;
 
+			if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
@@ -1650,12 +1694,23 @@ static int validate_set(const struct nlattr *a,
 			return -EINVAL;
 
 		ipv6_key = nla_data(ovs_key);
-		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
-			return -EINVAL;
+		if (masked) {
+			const struct ovs_key_ipv6 *mask = ipv6_key + 1;
 
-		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
-			return -EINVAL;
+			/* Non-writeable fields. */
+			if (mask->ipv6_proto || mask->ipv6_frag)
+				return -EINVAL;
 
+			/* Invalid bits in the flow label mask? */
+			if (ntohl(mask->ipv6_label) & 0xFFF00000)
+				return -EINVAL;
+		} else {
+			if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+				return -EINVAL;
+
+			if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
 		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
 			return -EINVAL;
 
@@ -1665,13 +1720,19 @@ static int validate_set(const struct nlattr *a,
 		if (flow_key->ip.proto != IPPROTO_TCP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		err = validate_tp_port(flow_key, eth_type);
+		if (err)
+			return err;
+		break;
 
 	case OVS_KEY_ATTR_UDP:
 		if (flow_key->ip.proto != IPPROTO_UDP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		err = validate_tp_port(flow_key, eth_type);
+		if (err)
+			return err;
+		break;
 
 	case OVS_KEY_ATTR_MPLS:
 		if (!eth_p_mpls(eth_type))
@@ -1682,12 +1743,45 @@ static int validate_set(const struct nlattr *a,
 		if (flow_key->ip.proto != IPPROTO_SCTP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		err = validate_tp_port(flow_key, eth_type);
+		if (err)
+			return err;
+		break;
 
 	default:
 		return -EINVAL;
 	}
 
+	/* Convert non-masked non-tunnel set actions to masked set actions. */
+	if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
+		int start, len = key_len * 2;
+		struct nlattr *at;
+
+		*skip_copy = true;
+
+		start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET_MASKED);
+		if (start < 0)
+			return start;
+
+		at = __add_action(sfa, key_type, NULL, len);
+		if (IS_ERR(at))
+			return PTR_ERR(at);
+
+		memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
+		/* Even though all-ones mask includes non-writeable fields,
+		 * which we do not allow above, we will not actually set them
+		 * when we execute the masked set action. */
+		memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
+
+		add_nested_action_end(*sfa, start);
+	} else if (masked) {
+		/* Reject fully masked masked set actions so that the above
+		 * conversion can be unabiguously reverted when sending
+		 * actions back to userspace. */
+		if (is_all_ones(nla_data(ovs_key) + key_len, key_len))
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -1749,6 +1843,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
+			[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
 			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
 		};
@@ -1843,7 +1938,15 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 			break;
 
 		case OVS_ACTION_ATTR_SET:
-			err = validate_set(a, key, sfa, &skip_copy, eth_type);
+			err = validate_set(a, key, sfa, &skip_copy, eth_type,
+					   false);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SET_MASKED:
+			err = validate_set(a, key, sfa, &skip_copy, eth_type,
+					   true);
 			if (err)
 				return err;
 			break;
@@ -1872,6 +1975,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 	return 0;
 }
 
+/* 'key' must be the masked key. */
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 struct sw_flow_actions **sfa)
@@ -1959,6 +2063,27 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 	return 0;
 }
 
+static int masked_set_action_to_attr(const struct nlattr *a,
+				     struct sk_buff *skb)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	size_t key_len = nla_len(ovs_key) / 2;
+
+	if (is_all_ones(nla_data(ovs_key) + key_len, key_len)) {
+		/* Revert the conversion we did from a non-masked set action to
+		 * masked set action. */
+		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len,
+			    ovs_key))
+			return -EMSGSIZE;
+	} else {
+		if (nla_put(skb, OVS_ACTION_ATTR_SET_MASKED, nla_len(a),
+			    ovs_key))
+			return -EMSGSIZE;
+	}
+
+	return 0;
+}
+
 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 {
 	const struct nlattr *a;
@@ -1974,6 +2099,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_SET_MASKED:
+			err = masked_set_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample_action_to_attr(a, skb);
 			if (err)
-- 
1.7.10.4




More information about the dev mailing list