[ovs-dev] [PATCH 04/18] datapath: Allow inner_flows

Simon Horman horms at verge.net.au
Thu Dec 27 05:23:05 UTC 2012


Allow a flow to be visible in two forms, an outer flow and an inner flow.
This may occur if the actions allow further decoding of a packet to
provide a more fine-grained match. In this case the first-pass
coarse-grained match will hit the outer-flow and the second-pass
fined-grained match will hit the inner-flow.

Inner-flows are not visible to user-space. Rather, they are just an
internal representation to handle the case of actions allowing further
packet decoding. An inner-flow is associated with its outer-flow and
deleted when the outer-flow is deleted. An outer-flow may have more than
one inner-flow but an inner-flow may only have one outer-flow.

For example:

In the case of MPLS, L3 and L4 information may not initially be decoded
from the frame as the ethernet type of the frame is an MPLS type and no
information is known about the type of the inner frame.

However, the type of the inner frame may be provided by an mpls_pop action
in which case L3 and L4 information may be decoded providing a finer
grained match than is otherwise possible.

Signed-off-by: Simon Horman <horms at verge.net.au>

---

v2.12
* No change

v2.11
* First post
---
 datapath/datapath.c |  197 +++++++++++++++++++++++++++++++++++++--------------
 datapath/flow.c     |   69 +++++++++++++-----
 datapath/flow.h     |   19 ++++-
 3 files changed, 211 insertions(+), 74 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 4a15079..1932d83 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -341,7 +341,7 @@ void ovs_dp_detach_port(struct vport *p)
 void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 {
 	struct datapath *dp = p->dp;
-	struct sw_flow *flow;
+	struct sw_flow *flow, *outer_flow = NULL;
 	struct dp_stats_percpu *stats;
 	u64 *stats_counter;
 	int error;
@@ -377,6 +377,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 					kfree_skb(skb);
 					return;
 				}
+				outer_flow = flow;
 				flow = ovs_flow_tbl_lookup(table, &key, key_len);
 			}
 		}
@@ -398,6 +399,8 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	}
 
 	stats_counter = &stats->n_hit;
+	if (unlikely(outer_flow))
+		ovs_flow_used(outer_flow, skb);
 	ovs_flow_used(OVS_CB(skb)->flow, skb);
 	ovs_execute_actions(dp, skb);
 
@@ -1104,43 +1107,24 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
 	return skb;
 }
 
-static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+enum flow_type {
+	flow_inner,
+	flow_outer,
+	flow_singleton,
+};
+
+static int ovs_flow_cmd_new_or_set__(struct sk_buff *skb,
+				     struct genl_info *info,
+				     struct nlattr **a, struct datapath *dp,
+				     struct sw_flow_key key, int key_len,
+				     enum flow_type flow_type,
+				     struct sw_flow **flowp)
 {
-	struct nlattr **a = info->attrs;
-	struct ovs_header *ovs_header = info->userhdr;
-	struct sw_flow_key key;
-	struct sw_flow *flow;
-	struct sk_buff *reply;
-	struct datapath *dp;
-	struct flow_table *table;
 	int error;
-	int key_len;
-
-	/* Extract key. */
-	error = -EINVAL;
-	if (!a[OVS_FLOW_ATTR_KEY])
-		goto error;
-	error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
-	if (error)
-		goto error;
-
-	/* Validate actions. */
-	if (a[OVS_FLOW_ATTR_ACTIONS]) {
-		error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
-		if (error)
-			goto error;
-	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
-		error = -EINVAL;
-		goto error;
-	}
-
-	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
-	error = -ENODEV;
-	if (!dp)
-		goto error;
+	struct sk_buff *reply = NULL;
+	struct flow_table *table = genl_dereference(dp->table);
+	struct sw_flow *flow = ovs_flow_tbl_lookup(table, &key, key_len);
 
-	table = genl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
 	if (!flow) {
 		struct sw_flow_actions *acts;
 
@@ -1168,6 +1152,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			goto error;
 		}
 		clear_stats(flow);
+		if (flow_type == flow_inner) {
+			flow->flags = SW_FLOW_F_NO_DUMP;
+			*flowp = flow;
+		}
 
 		/* Obtain actions. */
 		acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
@@ -1176,16 +1164,24 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			goto error_free_flow;
 		rcu_assign_pointer(flow->sf_acts, acts);
 
+		/* Add inner flow without locking */
+		if (flow_type == flow_outer && *flowp)
+			list_add_rcu(&(*flowp)->inner_flows,
+				     &flow->inner_flows);
+
 		/* Put flow in bucket. */
 		ovs_flow_tbl_insert(table, flow, &key, key_len);
 
-		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
-						info->snd_seq,
-						OVS_FLOW_CMD_NEW);
+		if (flow_type == flow_inner)
+			reply = ovs_flow_cmd_build_info(flow, dp,
+							info->snd_portid,
+							info->snd_seq,
+							OVS_FLOW_CMD_NEW);
 	} else {
 		/* We found a matching flow. */
 		struct sw_flow_actions *old_acts;
 		struct nlattr *acts_attrs;
+		bool may_clear_stats = true;
 
 		/* Bail out if we're not allowed to modify an existing flow.
 		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
@@ -1195,8 +1191,16 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		 */
 		error = -EEXIST;
 		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
-		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
-			goto error;
+		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) {
+			/* If this is an outer_flow then it may duplicate
+			 * the outer_flow of another flow's inner_flow.
+			 * Just accept this and more on.
+			 */
+			if (flow_type != flow_outer)
+				goto error;
+			may_clear_stats = false;
+			goto update;
+		}
 
 		/* Update actions. */
 		old_acts = rcu_dereference_protected(flow->sf_acts,
@@ -1220,14 +1224,23 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
 					       info->snd_seq, OVS_FLOW_CMD_NEW);
 
-		/* Clear stats. */
-		if (a[OVS_FLOW_ATTR_CLEAR]) {
+update:
+		if (a[OVS_FLOW_ATTR_CLEAR] || flow_type == flow_outer) {
 			spin_lock_bh(&flow->lock);
-			clear_stats(flow);
+			/* Clear stats. */
+			if (a[OVS_FLOW_ATTR_CLEAR] && may_clear_stats)
+				clear_stats(flow);
+			/* Add inner flow */
+			if (flow_type == flow_outer)
+				list_add_rcu(&(*flowp)->inner_flows,
+					     &flow->inner_flows);
 			spin_unlock_bh(&flow->lock);
 		}
 	}
 
+	if (!reply)
+		return 0;
+
 	if (!IS_ERR(reply))
 		genl_notify(reply, genl_info_net(info), info->snd_portid,
 			   ovs_dp_flow_multicast_group.id, info->nlhdr,
@@ -1243,6 +1256,66 @@ error:
 	return error;
 }
 
+static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sw_flow *inner_flow = NULL;
+	struct datapath *dp;
+	int error;
+	int key_len[2];
+
+	/* Extract key. */
+	error = -EINVAL;
+	if (!a[OVS_FLOW_ATTR_KEY])
+		goto error;
+	error = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]);
+	if (error)
+		goto error;
+
+	/* Validate actions. */
+	if (a[OVS_FLOW_ATTR_ACTIONS]) {
+		error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
+		if (error)
+			goto error;
+	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
+		error = -EINVAL;
+		goto error;
+	}
+
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	error = -ENODEV;
+	if (!dp)
+		goto error;
+
+	if (key_len[1]) {
+		error = ovs_flow_cmd_new_or_set__(skb, info, a, dp,
+						  key, key_len[1],
+						  flow_inner,
+						  &inner_flow);
+		if (error)
+			goto error;
+	}
+
+	error = ovs_flow_cmd_new_or_set__(skb, info, a, dp,
+					  key, key_len[0],
+					  key_len[1] ? flow_outer :
+					  flow_singleton,
+					  &inner_flow);
+	if (error) {
+		if (key_len[1]) {
+			struct flow_table *table = genl_dereference(dp->table);
+			ovs_flow_tbl_remove(table, inner_flow);
+			ovs_flow_deferred_free(inner_flow);
+		}
+		goto error;
+	}
+
+error:
+	return error;
+}
+
 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
@@ -1253,11 +1326,11 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct flow_table *table;
 	int err;
-	int key_len;
+	int key_len[2];
 
 	if (!a[OVS_FLOW_ATTR_KEY])
 		return -EINVAL;
-	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	err = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]);
 	if (err)
 		return err;
 
@@ -1266,7 +1339,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		return -ENODEV;
 
 	table = genl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	flow = ovs_flow_tbl_lookup(table, &key,
+				   key_len[1] ? key_len[1] : key_len[0]);
 	if (!flow)
 		return -ENOENT;
 
@@ -1284,11 +1358,11 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *notify_flow, *outer_flow, *inner_flow;
 	struct datapath *dp;
 	struct flow_table *table;
 	int err;
-	int key_len;
+	int key_len[2];
 
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	if (!dp)
@@ -1297,26 +1371,39 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	if (!a[OVS_FLOW_ATTR_KEY])
 		return flush_flows(dp);
 
-	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	err = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]);
 	if (err)
 		return err;
 
 	table = genl_dereference(dp->table);
-	flow = ovs_flow_tbl_lookup(table, &key, key_len);
-	if (!flow)
+	notify_flow = outer_flow = ovs_flow_tbl_lookup(table, &key, key_len[0]);
+	if (!outer_flow)
 		return -ENOENT;
 
-	reply = ovs_flow_cmd_alloc_info(flow);
+	if (key_len[1]) {
+		notify_flow = inner_flow = ovs_flow_tbl_lookup(table, &key,
+							       key_len[1]);
+		if (!inner_flow)
+			return -ENOENT;
+	} else {
+		inner_flow = NULL;
+	}
+
+	reply = ovs_flow_cmd_alloc_info(notify_flow);
 	if (!reply)
 		return -ENOMEM;
 
-	ovs_flow_tbl_remove(table, flow);
+	ovs_flow_tbl_remove(table, outer_flow);
+	if (inner_flow)
+		ovs_flow_tbl_remove(table, inner_flow);
 
-	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
+	err = ovs_flow_cmd_fill_info(notify_flow, dp, reply, info->snd_portid,
 				     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
 	BUG_ON(err < 0);
 
-	ovs_flow_deferred_free(flow);
+	ovs_flow_deferred_free(outer_flow);
+	if (inner_flow)
+		ovs_flow_deferred_free(inner_flow);
 
 	genl_notify(reply, genl_info_net(info), info->snd_portid,
 		    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
diff --git a/datapath/flow.c b/datapath/flow.c
index b54f68c..c6d6010 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -226,7 +226,9 @@ struct sw_flow *ovs_flow_alloc(void)
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&flow->lock);
+	INIT_LIST_HEAD(&flow->inner_flows);
 	flow->sf_acts = NULL;
+	flow->flags = 0;
 
 	return flow;
 }
@@ -343,7 +345,7 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
 		i = 0;
 		head = flex_array_get(table->buckets, *bucket);
 		hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) {
-			if (i < *last) {
+			if (i < *last || flow->flags | SW_FLOW_F_NO_DUMP) {
 				i++;
 				continue;
 			}
@@ -875,13 +877,25 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 	__flow_tbl_insert(table, flow);
 }
 
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+static void __flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
 	hlist_del_rcu(&flow->hash_node[table->node_ver]);
 	table->count--;
 	BUG_ON(table->count < 0);
 }
 
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+{
+	struct sw_flow *inner_flow;
+
+	__flow_tbl_remove(table, flow);
+
+	list_for_each_entry_rcu(inner_flow, &flow->inner_flows, inner_flows) {
+		__flow_tbl_remove(table, inner_flow);
+		ovs_flow_deferred_free(inner_flow);
+	}
+}
+
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
 const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_ENCAP] = -1,
@@ -1048,7 +1062,7 @@ static int parse_flow_nlattrs(const struct nlattr *attr,
  * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
  * sequence.
  */
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int key_lenp[2],
 		      const struct nlattr *attr)
 {
 	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
@@ -1056,6 +1070,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 	int key_len;
 	u64 attrs;
 	int err;
+	__be16 eth_type;
 
 	memset(swkey, 0, sizeof(struct sw_flow_key));
 	key_len = SW_FLOW_KEY_OFFSET(eth);
@@ -1169,7 +1184,29 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		swkey->eth.type = htons(ETH_P_802_2);
 	}
 
-	if (swkey->eth.type == htons(ETH_P_IP)) {
+	eth_type = swkey->eth.type;
+	if (eth_p_mpls(eth_type)) {
+		const struct ovs_key_mpls *mpls_key;
+
+		if (!(attrs & (1 << OVS_KEY_ATTR_MPLS)))
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
+
+		key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
+		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+		swkey->mpls.top_label = mpls_key->mpls_top_label;
+
+		if (attrs & (1 << OVS_KEY_ATTR_IPV4))
+			eth_type = htons(ETH_P_IP);
+		if (attrs & (1 << OVS_KEY_ATTR_IPV6))
+			eth_type = htons(ETH_P_IPV6);
+		if (attrs & (1 << OVS_KEY_ATTR_ARP))
+			eth_type = htons(ETH_P_ARP);
+	}
+
+	key_lenp[0] = key_len;
+
+	if (eth_type == htons(ETH_P_IP)) {
 		const struct ovs_key_ipv4 *ipv4_key;
 
 		if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
@@ -1192,7 +1229,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 			if (err)
 				return err;
 		}
-	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+	} else if (eth_type == htons(ETH_P_IPV6)) {
 		const struct ovs_key_ipv6 *ipv6_key;
 
 		if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
@@ -1218,8 +1255,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 			if (err)
 				return err;
 		}
-	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
-		   swkey->eth.type == htons(ETH_P_RARP)) {
+	} else if (eth_type == htons(ETH_P_ARP) ||
+		   eth_type == htons(ETH_P_RARP)) {
 		const struct ovs_key_arp *arp_key;
 
 		if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
@@ -1235,21 +1272,17 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		swkey->ip.proto = ntohs(arp_key->arp_op);
 		memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
 		memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
-	} else if (eth_p_mpls(swkey->eth.type)) {
-		const struct ovs_key_mpls *mpls_key;
-
-		if (!(attrs & (1 << OVS_KEY_ATTR_MPLS)))
-			return -EINVAL;
-		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
-
-		key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
-		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
-		swkey->mpls.top_label = mpls_key->mpls_top_label;
 	}
 
 	if (attrs)
 		return -EINVAL;
-	*key_lenp = key_len;
+
+	if (eth_type == swkey->eth.type) {
+		key_lenp[0] = key_len;
+		key_lenp[1] = 0;
+	} else {
+		key_lenp[1] = key_len;
+	}
 
 	return 0;
 }
diff --git a/datapath/flow.h b/datapath/flow.h
index 66410b4..f353805 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -98,6 +98,8 @@ struct sw_flow_key {
 	};
 };
 
+#define SW_FLOW_F_NO_DUMP 0x01
+
 struct sw_flow {
 	struct rcu_head rcu;
 	struct hlist_node hash_node[2];
@@ -107,10 +109,25 @@ struct sw_flow {
 	struct sw_flow_actions __rcu *sf_acts;
 
 	spinlock_t lock;	/* Lock for values below. */
+
+	struct list_head inner_flows;	/* Inner flows.
+					 * For an inner flow this is its
+					 * entry in the hlist of its outer
+					 * flow.
+					 * For an outer flow this is the
+					 * list of inner flows
+					 * For a singleton flow this is
+					 * unused
+					 */
+
 	unsigned long used;	/* Last used time (in jiffies). */
 	u64 packet_count;	/* Number of packets matched. */
 	u64 byte_count;		/* Number of bytes matched. */
 	u8 tcp_flags;		/* Union of seen TCP flags. */
+	/* End values that need loc*/
+
+	u8 flags;		/* SW_FLOW_F_* */
+
 };
 
 struct arp_eth_header {
@@ -142,7 +159,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *);
 void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
 
 int ovs_flow_extract_l3_onwards(struct sk_buff *, struct sw_flow_key *,
-				int *key_lenp, __be16 eth_type);
+				int key_lenp[2], __be16 eth_type);
 int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
 		     int *key_lenp);
 void ovs_flow_used(struct sw_flow *, struct sk_buff *);
-- 
1.7.10.4




More information about the dev mailing list