[ovs-dev] [PATCHv6 11/14] datapath: Add support for unique flow identifiers.

Joe Stringer joestringer at nicira.com
Fri Sep 26 09:28:15 UTC 2014


If a datapath is created with the flag OVS_DP_F_INDEX_BY_UID, then an
additional table_instance is added to the flow_table, which is indexed
by unique identifiers ("UID"). Userspace implementations can specify a
UID of up to 128 bits along with a flow operation as shorthand for the
key. This allows revalidation performance improvements of up to 50%.

If a datapath is created using OVS_DP_F_INDEX_BY_UID and a UID is not
specified at flow setup time, then that operation will fail. If
OVS_UID_F_* flags are specified for an operation, then they will modify
what is returned through the operation. For instance, OVS_UID_F_SKIP_KEY
allows the datapath to skip returning the key (eg, during dump to reduce
memory copy).

Signed-off-by: Joe Stringer <joestringer at nicira.com>
---
v6: Fix documentation for supporting UIDs between 32-128 bits.
    Minor style fixes.
    Rebase.
v5: No change.
v4: Fix memory leaks.
    Log when triggering the older userspace issue above.
v3: Initial post.
---
 datapath/datapath.c                               |  215 +++++++++++++------
 datapath/flow.h                                   |   12 +-
 datapath/flow_netlink.c                           |   60 ++++++
 datapath/flow_netlink.h                           |    2 +
 datapath/flow_table.c                             |  230 +++++++++++++++++----
 datapath/flow_table.h                             |    5 +-
 datapath/linux/compat/include/linux/openvswitch.h |   29 +++
 7 files changed, 449 insertions(+), 104 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 45e7c56..7f6717b 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -675,36 +675,41 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
+		+ nla_total_size(ovs_uid_attr_size()) /* OVS_FLOW_ATTR_UID */
 		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
 }
 
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
-				   struct sk_buff *skb)
+				   struct sk_buff *skb, u32 uid_flags)
 {
 	struct nlattr *nla;
 	int err;
 
 	/* Fill flow key. */
-	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
-	if (!nla)
-		return -EMSGSIZE;
+	if (!(uid_flags & OVS_UID_F_SKIP_KEY)) {
+		nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
+		if (!nla)
+			return -EMSGSIZE;
 
-	err = ovs_nla_put_flow(&flow->unmasked_key,
-			       &flow->unmasked_key, skb);
-	if (err)
-		return err;
-	nla_nest_end(skb, nla);
+		err = ovs_nla_put_flow(&flow->unmasked_key,
+				       &flow->unmasked_key, skb);
+		if (err)
+			return err;
+		nla_nest_end(skb, nla);
+	}
 
 	/* Fill flow mask. */
-	nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
-	if (!nla)
-		return -EMSGSIZE;
+	if (!(uid_flags & OVS_UID_F_SKIP_MASK)) {
+		nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
+		if (!nla)
+			return -EMSGSIZE;
 
-	err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
-	if (err)
-		return err;
-	nla_nest_end(skb, nla);
+		err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
+		if (err)
+			return err;
+		nla_nest_end(skb, nla);
+	}
 
 	return 0;
 }
@@ -735,6 +740,30 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 }
 
 /* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_uid(const struct sw_flow *flow,
+				 struct sk_buff *skb)
+{
+	struct nlattr *start;
+	const struct sw_flow_id *sfid = &flow->uid;
+
+	if (!sfid->uid)
+		return 0;
+
+	start = nla_nest_start(skb, OVS_FLOW_ATTR_UID);
+	if (start) {
+		int err;
+
+		err = nla_put(skb, OVS_UID_ATTR_ID, sfid->uid_len, sfid->uid);
+		if (err)
+			return err;
+		nla_nest_end(skb, start);
+	} else
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 				     struct sk_buff *skb, int skb_orig_len)
 {
@@ -777,7 +806,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 				  struct sk_buff *skb, u32 portid,
-				  u32 seq, u32 flags, u8 cmd)
+				  u32 seq, u32 flags, u8 cmd, u32 uid_flags)
 {
 	const int skb_orig_len = skb->len;
 	struct ovs_header *ovs_header;
@@ -788,18 +817,24 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 		return -EMSGSIZE;
 	ovs_header->dp_ifindex = dp_ifindex;
 
-	err = ovs_flow_cmd_fill_match(flow, skb);
+	err = ovs_flow_cmd_fill_match(flow, skb, uid_flags);
 	if (err)
 		goto error;
 
-	err = ovs_flow_cmd_fill_stats(flow, skb);
+	err = ovs_flow_cmd_fill_uid(flow, skb);
 	if (err)
 		goto error;
 
-	err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+	err = ovs_flow_cmd_fill_stats(flow, skb);
 	if (err)
 		goto error;
 
+	if (!(uid_flags & OVS_UID_F_SKIP_ACTIONS)) {
+		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+		if (err)
+			goto error;
+	}
+
 	return genlmsg_end(skb, ovs_header);
 
 error:
@@ -831,7 +866,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp,
 					       const struct sw_flow *flow,
 					       int dp_ifindex,
 					       struct genl_info *info, u8 cmd,
-					       bool always)
+					       bool always, u32 uid_flags)
 {
 	struct sk_buff *skb;
 	int retval;
@@ -843,7 +878,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp,
 
 	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 					info->snd_portid, info->snd_seq, 0,
-					cmd);
+					cmd, uid_flags);
 	BUG_ON(retval < 0);
 	return skb;
 }
@@ -858,6 +893,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct sw_flow_actions *acts;
 	struct sw_flow_match match;
+	struct sw_flow_id sfid;
+	u32 uid_flags;
 	int error;
 
 	/* Must have key and actions. */
@@ -888,6 +925,13 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
 
+	/* Extract uid. */
+	error = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &sfid, &uid_flags);
+	if (!error)
+		error = ovs_flow_uid(&new_flow->uid, &sfid);
+	if (error)
+		goto err_kfree_flow;
+
 	/* Validate actions. */
 	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
 				     &acts);
@@ -908,6 +952,13 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		error = -ENODEV;
 		goto err_unlock_ovs;
 	}
+
+	if (rcu_access_pointer(dp->table.uid_ti) && !new_flow->uid.uid) {
+		OVS_NLERR("Flow table indexes by UID but UID is not specified.\n");
+		error = -EINVAL;
+		goto err_unlock_ovs;
+	}
+
 	/* Check if this is a duplicate flow */
 	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
 	if (likely(!flow)) {
@@ -925,7 +976,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       uid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -946,9 +998,10 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		/* The unmasked key has to be the same for flow updates. */
 		if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
 			/* Look for any overlapping flow. */
-			flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
-			if (!flow) {
-				error = -ENOENT;
+			flow = ovs_flow_tbl_lookup_exact(&dp->table, &sfid,
+							 &match);
+			if (unlikely(IS_ERR(flow))) {
+				error = PTR_ERR(flow);
 				goto err_unlock_ovs;
 			}
 		}
@@ -961,7 +1014,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       uid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -1009,26 +1063,32 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct sw_flow_mask mask;
 	struct sk_buff *reply = NULL;
 	struct datapath *dp;
 	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
-	struct sw_flow_match match;
+	struct sw_flow_match match, *matchp;
+	struct sw_flow_id uid;
+	u32 uid_flags;
 	int error;
 
 	/* Extract key. */
 	error = -EINVAL;
-	if (!a[OVS_FLOW_ATTR_KEY]) {
-		OVS_NLERR("Flow key attribute not present in set flow.\n");
+	if (!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY]) {
+		OVS_NLERR("Flow index attribute not present in set flow.\n");
 		goto error;
 	}
-
-	ovs_match_init(&match, &key, &mask);
-	error = ovs_nla_get_match(&match,
-				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+	error = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags);
 	if (error)
 		goto error;
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, &mask);
+		error = ovs_nla_get_match(&match,
+					  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+		if (error)
+			goto error;
+	}
 
 	/* Validate actions. */
 	if (a[OVS_FLOW_ATTR_ACTIONS]) {
@@ -1053,9 +1113,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto err_unlock_ovs;
 	}
 	/* Check that the flow exists. */
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
-	if (unlikely(!flow)) {
-		error = -ENOENT;
+	matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL;
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp);
+	if (unlikely(IS_ERR(flow))) {
+		error = PTR_ERR(flow);
 		goto err_unlock_ovs;
 	}
 
@@ -1069,14 +1130,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       uid_flags);
 			BUG_ON(error < 0);
 		}
 	} else {
 		/* Could not alloc without acts before locking. */
 		reply = ovs_flow_cmd_build_info(dp, flow,
 						ovs_header->dp_ifindex,
-						info, OVS_FLOW_CMD_NEW, false);
+						info, OVS_FLOW_CMD_NEW, false,
+						uid_flags);
 		if (unlikely(IS_ERR(reply))) {
 			error = PTR_ERR(reply);
 			goto err_unlock_ovs;
@@ -1109,20 +1172,27 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct datapath *dp;
-	struct sw_flow_match match;
+	struct sw_flow_match match, *matchp;
+	struct sw_flow_id uid;
+	u32 uid_flags;
 	int err;
 
-	if (!a[OVS_FLOW_ATTR_KEY]) {
-		OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
+	err = -EINVAL;
+	if (!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY]) {
+		OVS_NLERR("Flow get message rejected, Index attribute missing.\n");
 		return -EINVAL;
 	}
-
-	ovs_match_init(&match, &key, NULL);
-	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+	err = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags);
 	if (err)
 		return err;
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, NULL);
+		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+		if (err)
+			return err;
+	}
 
 	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1131,14 +1201,15 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
-	if (!flow) {
-		err = -ENOENT;
+	matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL;
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp);
+	if (unlikely(IS_ERR(flow))) {
+		err = PTR_ERR(flow);
 		goto unlock;
 	}
 
 	reply = ovs_flow_cmd_build_info(dp, flow, ovs_header->dp_ifindex, info,
-					OVS_FLOW_CMD_NEW, true);
+					OVS_FLOW_CMD_NEW, true, uid_flags);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		goto unlock;
@@ -1157,12 +1228,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct datapath *dp;
-	struct sw_flow_match match;
+	struct sw_flow_match match, *matchp;
+	struct sw_flow_id uid;
+	u32 uid_flags;
 	int err;
 
-	if (likely(a[OVS_FLOW_ATTR_KEY])) {
+	err = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags);
+	if (err)
+		return err;
+	if (a[OVS_FLOW_ATTR_KEY]) {
 		ovs_match_init(&match, &key, NULL);
 		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
 		if (unlikely(err))
@@ -1175,13 +1251,16 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		err = -ENODEV;
 		goto unlock;
 	}
-	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+
+	if (unlikely(!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY])) {
 		err = ovs_flow_tbl_flush(&dp->table);
 		goto unlock;
 	}
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
-	if (unlikely(!flow)) {
-		err = -ENOENT;
+
+	matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL;
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp);
+	if (unlikely(IS_ERR(flow))) {
+		err = PTR_ERR(flow);
 		goto unlock;
 	}
 
@@ -1198,7 +1277,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 						     ovs_header->dp_ifindex,
 						     reply, info->snd_portid,
 						     info->snd_seq, 0,
-						     OVS_FLOW_CMD_DEL);
+						     OVS_FLOW_CMD_DEL, uid_flags);
 			rcu_read_unlock();
 			BUG_ON(err < 0);
 			ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
@@ -1219,8 +1298,15 @@ unlock:
 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct nlattr *nla, *uid;
 	struct table_instance *ti;
 	struct datapath *dp;
+	u32 uid_flags = 0;
+
+	nla = nlmsg_attrdata(cb->nlh, sizeof(*ovs_header));
+	uid = nla_find_nested(nla, OVS_FLOW_ATTR_UID);
+	if (uid && ovs_nla_get_uid(uid, NULL, &uid_flags))
+		OVS_NLERR("Error occurred parsing UID flags on dump");
 
 	rcu_read_lock();
 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1243,7 +1329,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   OVS_FLOW_CMD_NEW) < 0)
+					   OVS_FLOW_CMD_NEW, uid_flags) < 0)
 			break;
 
 		cb->args[0] = bucket;
@@ -1257,6 +1343,7 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
 	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+	[OVS_FLOW_ATTR_UID] = { .type = NLA_NESTED },
 };
 
 static struct genl_ops dp_flow_genl_ops[] = {
@@ -1399,6 +1486,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct vport *vport;
 	struct ovs_net *ovs_net;
+	bool support_uid;
 	int err, i;
 
 	err = -EINVAL;
@@ -1416,8 +1504,11 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
 
+	ovs_dp_change(dp, a);
+
 	/* Allocate table. */
-	err = ovs_flow_tbl_init(&dp->table);
+	support_uid = dp->user_features & OVS_DP_F_INDEX_BY_UID;
+	err = ovs_flow_tbl_init(&dp->table, support_uid);
 	if (err)
 		goto err_free_dp;
 
@@ -1445,8 +1536,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	parms.port_no = OVSP_LOCAL;
 	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
 
-	ovs_dp_change(dp, a);
-
 	/* So far only local changes have been made, now need the lock. */
 	ovs_lock();
 
diff --git a/datapath/flow.h b/datapath/flow.h
index eb9246a..816e69e 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -199,6 +199,11 @@ struct sw_flow_match {
 	struct sw_flow_mask *mask;
 };
 
+struct sw_flow_id {
+	u32 *uid;
+	u32 uid_len;
+};
+
 struct sw_flow_actions {
 	struct rcu_head rcu;
 	u32 actions_len;
@@ -215,11 +220,14 @@ struct flow_stats {
 
 struct sw_flow {
 	struct rcu_head rcu;
-	struct hlist_node hash_node[2];
-	u32 hash;
+	struct {
+		struct hlist_node node[2];
+		u32 hash;
+	} flow_hash, uid_hash;
 	int stats_last_writer;		/* NUMA-node id of the last writer on
 					 * 'stats[0]'.
 					 */
+	struct sw_flow_id uid;
 	struct sw_flow_key key;
 	struct sw_flow_key unmasked_key;
 	struct sw_flow_mask *mask;
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index a3f34f1..6958d87 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -1071,6 +1071,66 @@ free_newmask:
 	return err;
 }
 
+/* Support UIDs up to 128 bits. */
+#define MAX_UID_BUFSIZE		(128 / 8)
+
+size_t ovs_uid_attr_size(void)
+{
+	/* Whenever adding new OVS_UID_ATTR_ FIELDS, we should consider
+	 * updating this function.  */
+	return    nla_total_size(4)		    /* OVS_UID_ATTR_FLAGS */
+		+ nla_total_size(MAX_UID_BUFSIZE);  /* OVS_UID_ATTR_ID */
+}
+
+int ovs_nla_get_uid(const struct nlattr *attr, struct sw_flow_id *sfid,
+		    u32 *flags)
+{
+	static const struct nla_policy ovs_uid_policy[OVS_UID_ATTR_MAX + 1] = {
+		[OVS_UID_ATTR_FLAGS] = { .type = NLA_U32 },
+		[OVS_UID_ATTR_ID] = { .len = sizeof(u32) },
+	};
+	const struct nlattr *a[OVS_UID_ATTR_MAX + 1];
+	int err;
+
+	if (sfid) {
+		sfid->uid = NULL;
+		sfid->uid_len = 0;
+	}
+	if (flags)
+		*flags = 0;
+
+	if (!attr)
+		return 0;
+
+	err = nla_parse_nested((struct nlattr **)a, OVS_UID_ATTR_MAX, attr,
+			       ovs_uid_policy);
+	if (err)
+		return err;
+
+	if (sfid) {
+		if (a[OVS_UID_ATTR_ID]) {
+			size_t len;
+
+			len = nla_len(a[OVS_UID_ATTR_ID]);
+			if (len > MAX_UID_BUFSIZE) {
+				OVS_NLERR("Flow uid size (%zu bytes) exceeds "
+					  "maximum (%u bytes)\n", len,
+					  MAX_UID_BUFSIZE);
+					return -EINVAL;
+			}
+			sfid->uid = nla_data(a[OVS_UID_ATTR_ID]);
+			sfid->uid_len = len;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	if (flags && a[OVS_UID_ATTR_FLAGS])
+		*flags = nla_get_u32(a[OVS_UID_ATTR_FLAGS]);
+
+	return 0;
+}
+
 /**
  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
  * @key: Receives extracted in_port, priority, tun_key and skb_mark.
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
index 90bbe37..0158c69 100644
--- a/datapath/flow_netlink.h
+++ b/datapath/flow_netlink.h
@@ -39,6 +39,7 @@
 
 size_t ovs_tun_key_attr_size(void);
 size_t ovs_key_attr_size(void);
+size_t ovs_uid_attr_size(void);
 
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key, struct sw_flow_mask *mask);
@@ -52,6 +53,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
 		      const struct nlattr *);
 int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
 				  const struct ovs_tunnel_info *);
+int ovs_nla_get_uid(const struct nlattr *, struct sw_flow_id *, u32 *flags);
 
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
index 4efef13..6a92994 100644
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -80,6 +80,20 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		*d++ = *s++ & *m++;
 }
 
+int ovs_flow_uid(struct sw_flow_id *dst, const struct sw_flow_id *src)
+{
+	if (src->uid_len) {
+		dst->uid = kmalloc(src->uid_len, GFP_KERNEL);
+		if (!dst->uid)
+			return -ENOMEM;
+
+		memcpy(dst->uid, src->uid, src->uid_len);
+		dst->uid_len = src->uid_len;
+	}
+
+	return 0;
+}
+
 struct sw_flow *ovs_flow_alloc(void)
 {
 	struct sw_flow *flow;
@@ -90,6 +104,8 @@ struct sw_flow *ovs_flow_alloc(void)
 	if (!flow)
 		return ERR_PTR(-ENOMEM);
 
+	flow->uid.uid = NULL;
+	flow->uid.uid_len = 0;
 	flow->sf_acts = NULL;
 	flow->mask = NULL;
 	flow->stats_last_writer = NUMA_NO_NODE;
@@ -147,6 +163,7 @@ static void flow_free(struct sw_flow *flow)
 	int node;
 
 	kfree((struct sw_flow_actions __force *)flow->sf_acts);
+	kfree((struct sw_flow_id __force *)flow->uid.uid);
 	for_each_node(node)
 		if (flow->stats[node])
 			kmem_cache_free(flow_stats_cache,
@@ -263,7 +280,7 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
 	return 0;
 }
 
-int ovs_flow_tbl_init(struct flow_table *table)
+int ovs_flow_tbl_init(struct flow_table *table, bool support_uid)
 {
 	struct table_instance *ti;
 	struct mask_array *ma;
@@ -281,12 +298,25 @@ int ovs_flow_tbl_init(struct flow_table *table)
 	if (!ti)
 		goto free_mask_array;
 
+	if (support_uid) {
+		struct table_instance *uid_ti;
+
+		uid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+		if (!uid_ti)
+			goto free_ti;
+		rcu_assign_pointer(table->uid_ti, uid_ti);
+	} else {
+		rcu_assign_pointer(table->uid_ti, NULL);
+	}
+
 	rcu_assign_pointer(table->ti, ti);
 	rcu_assign_pointer(table->mask_array, ma);
 	table->last_rehash = jiffies;
 	table->count = 0;
 	return 0;
 
+free_ti:
+	__table_instance_destroy(ti);
 free_mask_array:
 	kfree(ma);
 free_mask_cache:
@@ -301,7 +331,8 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
 	__table_instance_destroy(ti);
 }
 
-static void table_instance_destroy(struct table_instance *ti, bool deferred)
+static void table_instance_destroy(struct table_instance *ti, bool deferred,
+				   bool uid)
 {
 	int i;
 
@@ -317,9 +348,14 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
 		struct hlist_node *n;
 		int ver = ti->node_ver;
 
-		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-			hlist_del_rcu(&flow->hash_node[ver]);
-			ovs_flow_free(flow, deferred);
+		if (uid) {
+			hlist_for_each_entry_safe(flow, n, head, uid_hash.node[ver])
+				hlist_del_rcu(&flow->uid_hash.node[ver]);
+		} else {
+			hlist_for_each_entry_safe(flow, n, head, flow_hash.node[ver]) {
+				hlist_del_rcu(&flow->flow_hash.node[ver]);
+				ovs_flow_free(flow, deferred);
+			}
 		}
 	}
 
@@ -335,10 +371,12 @@ skip_flows:
 void ovs_flow_tbl_destroy(struct flow_table *table)
 {
 	struct table_instance *ti = (struct table_instance __force *)table->ti;
+	struct table_instance *uid_ti = (struct table_instance __force *)table->uid_ti;
 
 	free_percpu(table->mask_cache);
 	kfree((struct mask_array __force *)table->mask_array);
-	table_instance_destroy(ti, false);
+	table_instance_destroy(ti, false, false);
+	table_instance_destroy(uid_ti, false, true);
 }
 
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -353,7 +391,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
 	while (*bucket < ti->n_buckets) {
 		i = 0;
 		head = flex_array_get(ti->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+		hlist_for_each_entry_rcu(flow, head, flow_hash.node[ver]) {
 			if (i < *last) {
 				i++;
 				continue;
@@ -379,12 +417,20 @@ static void table_instance_insert(struct table_instance *ti, struct sw_flow *flo
 {
 	struct hlist_head *head;
 
-	head = find_bucket(ti, flow->hash);
-	hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+	head = find_bucket(ti, flow->flow_hash.hash);
+	hlist_add_head_rcu(&flow->flow_hash.node[ti->node_ver], head);
+}
+
+static void uid_table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(ti, flow->uid_hash.hash);
+	hlist_add_head_rcu(&flow->uid_hash.node[ti->node_ver], head);
 }
 
 static void flow_table_copy_flows(struct table_instance *old,
-				  struct table_instance *new)
+				  struct table_instance *new, bool uid)
 {
 	int old_ver;
 	int i;
@@ -399,42 +445,81 @@ static void flow_table_copy_flows(struct table_instance *old,
 
 		head = flex_array_get(old->buckets, i);
 
-		hlist_for_each_entry(flow, head, hash_node[old_ver])
-			table_instance_insert(new, flow);
+		if (uid)
+			hlist_for_each_entry(flow, head, uid_hash.node[old_ver])
+				uid_table_instance_insert(new, flow);
+		else
+			hlist_for_each_entry(flow, head, flow_hash.node[old_ver])
+				table_instance_insert(new, flow);
 	}
 
 	old->keep_flows = true;
 }
 
-static struct table_instance *table_instance_rehash(struct table_instance *ti,
-					    int n_buckets)
+static int flow_table_instance_alloc(struct table_instance **ti,
+				     struct table_instance **uid_ti,
+				     int n_buckets)
 {
-	struct table_instance *new_ti;
+	struct table_instance *new_ti, *new_uid_ti;
 
 	new_ti = table_instance_alloc(n_buckets);
 	if (!new_ti)
-		return NULL;
+		return -ENOMEM;
 
-	flow_table_copy_flows(ti, new_ti);
+	if (uid_ti) {
+		new_uid_ti = table_instance_alloc(n_buckets);
+		if (!new_uid_ti) {
+			__table_instance_destroy(new_ti);
+			return -ENOMEM;
+		}
+		*uid_ti = new_uid_ti;
+	}
 
-	return new_ti;
+	*ti = new_ti;
+	return 0;
+}
+
+static int flow_table_rehash(struct table_instance *old_ti,
+			     struct table_instance *old_uid_ti, int n_buckets,
+			     struct table_instance **new_ti,
+			     struct table_instance **new_uid_ti)
+{
+	int err;
+
+	err = flow_table_instance_alloc(new_ti, old_uid_ti ? new_uid_ti : NULL,
+					n_buckets);
+	if (err)
+		return err;
+
+	flow_table_copy_flows(old_ti, *new_ti, false);
+	if (old_uid_ti)
+		flow_table_copy_flows(old_uid_ti, *new_uid_ti, true);
+
+	return 0;
 }
 
 int ovs_flow_tbl_flush(struct flow_table *flow_table)
 {
-	struct table_instance *old_ti;
-	struct table_instance *new_ti;
+	struct table_instance *old_ti, *new_ti, *old_uid_ti;
+	struct table_instance *new_uid_ti = NULL;
+	int err;
 
 	old_ti = ovsl_dereference(flow_table->ti);
-	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
-	if (!new_ti)
-		return -ENOMEM;
+	old_uid_ti = ovsl_dereference(flow_table->uid_ti);
+	if (old_uid_ti)
+		err = flow_table_instance_alloc(&new_ti, &new_uid_ti, TBL_MIN_BUCKETS);
+	else
+		err = flow_table_instance_alloc(&new_ti, NULL, TBL_MIN_BUCKETS);
+	if (err)
+		return err;
 
 	rcu_assign_pointer(flow_table->ti, new_ti);
+	rcu_assign_pointer(flow_table->uid_ti, new_uid_ti);
 	flow_table->last_rehash = jiffies;
 	flow_table->count = 0;
 
-	table_instance_destroy(old_ti, true);
+	table_instance_destroy(old_ti, true, false);
+	table_instance_destroy(old_uid_ti, true, true);
 	return 0;
 }
 
@@ -507,8 +592,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
 	hash = flow_hash(&masked_key, key_start, key_end);
 	head = find_bucket(ti, hash);
 	(*n_mask_hit)++;
-	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
-		if (flow->mask == mask && flow->hash == hash &&
+	hlist_for_each_entry_rcu(flow, head, flow_hash.node[ti->node_ver]) {
+		if (flow->mask == mask && flow->flow_hash.hash == hash &&
 		    flow_cmp_masked_key(flow, &masked_key,
 					  key_start, key_end))
 			return flow;
@@ -626,8 +711,8 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
 	return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
 }
 
-struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
-					  const struct sw_flow_match *match)
+static struct sw_flow *lookup_exact(struct flow_table *tbl,
+				    const struct sw_flow_match *match)
 {
 	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
 	int i;
@@ -649,6 +734,58 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 	return NULL;
 }
 
+static u32 uid_hash(const struct sw_flow_id *sfid)
+{
+	return arch_fast_hash2(sfid->uid, sfid->uid_len >> 2, 0);
+}
+
+static bool flow_cmp_uid(const struct sw_flow *flow,
+			 const struct sw_flow_id *sfid)
+{
+	if (flow->uid.uid_len != sfid->uid_len)
+		return false;
+
+	return !memcmp(flow->uid.uid, sfid->uid, sfid->uid_len);
+}
+
+static struct sw_flow *lookup_uid(struct flow_table *tbl,
+				  const struct sw_flow_id *uid)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->uid_ti);
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	u32 hash;
+
+	BUG_ON(!ti);
+	hash = uid_hash(uid);
+	head = find_bucket(ti, hash);
+	hlist_for_each_entry_rcu(flow, head, uid_hash.node[ti->node_ver]) {
+		if (flow->uid_hash.hash == hash && flow_cmp_uid(flow, uid))
+			return flow;
+	}
+	return NULL;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+					  const struct sw_flow_id *uid,
+					  const struct sw_flow_match *match)
+{
+	struct sw_flow *flow;
+
+	if (rcu_access_pointer(tbl->uid_ti) && uid->uid_len) {
+		flow = lookup_uid(tbl, uid);
+	} else if (match) {
+		flow = lookup_exact(tbl, match);
+	} else {
+		OVS_NLERR("Flow key attribute not present in flow lookup.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!flow)
+		return ERR_PTR(-ENOENT);
+	return flow;
+}
+
 int ovs_flow_tbl_num_masks(const struct flow_table *table)
 {
 	struct mask_array *ma;
@@ -657,9 +794,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
 	return ma->count;
 }
 
-static struct table_instance *table_instance_expand(struct table_instance *ti)
+static int flow_table_expand(struct table_instance *old_ti,
+			     struct table_instance *old_uid_ti,
+			     struct table_instance **new_ti,
+			     struct table_instance **new_uid_ti)
 {
-	return table_instance_rehash(ti, ti->n_buckets * 2);
+	return flow_table_rehash(old_ti, old_uid_ti, old_ti->n_buckets * 2,
+				 new_ti, new_uid_ti);
 }
 
 static void tbl_mask_array_delete_mask(struct mask_array *ma,
@@ -709,9 +850,12 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
 	struct table_instance *ti = ovsl_dereference(table->ti);
+	struct table_instance *uid_ti = ovsl_dereference(table->uid_ti);
 
 	BUG_ON(table->count == 0);
-	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+	hlist_del_rcu(&flow->flow_hash.node[ti->node_ver]);
+	if (uid_ti)
+		hlist_del_rcu(&flow->uid_hash.node[uid_ti->node_ver]);
 	table->count--;
 
 	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
@@ -816,29 +960,39 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			const struct sw_flow_mask *mask)
 {
-	struct table_instance *new_ti = NULL;
-	struct table_instance *ti;
+	struct table_instance *new_ti = NULL, *new_uid_ti = NULL;
+	struct table_instance *ti, *uid_ti;
 	int err;
 
 	err = flow_mask_insert(table, flow, mask);
 	if (err)
 		return err;
 
-	flow->hash = flow_hash(&flow->key, flow->mask->range.start,
-			flow->mask->range.end);
+	flow->flow_hash.hash = flow_hash(&flow->key, flow->mask->range.start,
+					 flow->mask->range.end);
 	ti = ovsl_dereference(table->ti);
 	table_instance_insert(ti, flow);
 	table->count++;
 
+	uid_ti = ovsl_dereference(table->uid_ti);
+	if (uid_ti) {
+		BUG_ON(!flow->uid.uid);
+		flow->uid_hash.hash = uid_hash(&flow->uid);
+		uid_table_instance_insert(uid_ti, flow);
+	}
+
 	/* Expand table, if necessary, to make room. */
 	if (table->count > ti->n_buckets)
-		new_ti = table_instance_expand(ti);
+		flow_table_expand(ti, uid_ti, &new_ti, &new_uid_ti);
 	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
-		new_ti = table_instance_rehash(ti, ti->n_buckets);
+		flow_table_rehash(ti, uid_ti, ti->n_buckets,
+				  &new_ti, &new_uid_ti);
 
 	if (new_ti) {
 		rcu_assign_pointer(table->ti, new_ti);
-		table_instance_destroy(ti, true);
+		rcu_assign_pointer(table->uid_ti, new_uid_ti);
+		table_instance_destroy(ti, true, false);
+		table_instance_destroy(uid_ti, true, true);
 		table->last_rehash = jiffies;
 	}
 	return 0;
diff --git a/datapath/flow_table.h b/datapath/flow_table.h
index 9eb4af9..2e6582f 100644
--- a/datapath/flow_table.h
+++ b/datapath/flow_table.h
@@ -60,6 +60,7 @@ struct flow_table {
 	struct table_instance __rcu *ti;
 	struct mask_cache_entry __percpu *mask_cache;
 	struct mask_array __rcu *mask_array;
+	struct table_instance __rcu *uid_ti;
 	unsigned long last_rehash;
 	unsigned int count;
 };
@@ -72,7 +73,7 @@ void ovs_flow_exit(void);
 struct sw_flow *ovs_flow_alloc(void);
 void ovs_flow_free(struct sw_flow *, bool deferred);
 
-int ovs_flow_tbl_init(struct flow_table *);
+int ovs_flow_tbl_init(struct flow_table *, bool support_uid);
 int ovs_flow_tbl_count(const struct flow_table *table);
 void ovs_flow_tbl_destroy(struct flow_table *table);
 int ovs_flow_tbl_flush(struct flow_table *flow_table);
@@ -90,11 +91,13 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
 				    const struct sw_flow_key *);
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *,
+					  const struct sw_flow_id *sfid,
 					  const struct sw_flow_match *match);
 
 bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
 			       const struct sw_flow_match *match);
 
+int ovs_flow_uid(struct sw_flow_id *dst, const struct sw_flow_id *src);
 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		       const struct sw_flow_mask *mask);
 #endif /* flow_table.h */
diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index 306ea86..af463a5 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -142,6 +142,9 @@ struct ovs_vport_stats {
 /* Allow datapath to associate multiple Netlink PIDs to each vport */
 #define OVS_DP_F_VPORT_PIDS	(1 << 1)
 
+/* Force the datapath to index flows by userspace flow ID. */
+#define OVS_DP_F_INDEX_BY_UID   (1 << 2)
+
 /* Fixed logical ports. */
 #define OVSP_LOCAL      ((__u32)0)
 
@@ -470,6 +473,10 @@ struct ovs_key_nd {
  * a wildcarded match. Omitting attribute is treated as wildcarding all
  * corresponding fields. Optional for all requests. If not present,
  * all flow key bits are exact match bits.
+ * @OVS_FLOW_ATTR_UID: Nested %OVS_UID_ATTR_* attributes specifying unique
+ * identifiers for flows and providing alternative semantics for flow
+ * installation and retrieval. Required for all requests if the datapath is
+ * created with %OVS_DP_F_INDEX_BY_UID.
  *
  * These attributes follow the &struct ovs_header within the Generic Netlink
  * payload for %OVS_FLOW_* commands.
@@ -483,12 +490,34 @@ enum ovs_flow_attr {
 	OVS_FLOW_ATTR_USED,      /* u64 msecs last used in monotonic time. */
 	OVS_FLOW_ATTR_CLEAR,     /* Flag to clear stats, tcp_flags, used. */
 	OVS_FLOW_ATTR_MASK,      /* Sequence of OVS_KEY_ATTR_* attributes. */
+	OVS_FLOW_ATTR_UID,       /* Unique flow identifier. */
 	__OVS_FLOW_ATTR_MAX
 };
 
 #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
 
 /**
+ * enum ovs_uid_attr - Unique identifier types.
+ *
+ * @OVS_UID_ATTR_FLAGS: A 32-bit value specifying changes to the behaviour of
+ * the current %OVS_FLOW_CMD_* request. Optional for all requests.
+ * @OVS_UID_ATTR_ID: A unique identifier for a flow.
+ */
+enum ovs_uid_attr {
+	OVS_UID_ATTR_UNSPEC,
+	OVS_UID_ATTR_FLAGS,	/* u32 of OVS_UID_F_* */
+	OVS_UID_ATTR_ID,	/* variable size: 4, 8, 12 or 16 octets. */
+	__OVS_UID_ATTR_MAX
+};
+
+#define OVS_UID_ATTR_MAX (__OVS_UID_ATTR_MAX - 1)
+
+/* Skip attributes for notifications. */
+#define OVS_UID_F_SKIP_KEY	(1 << 0)
+#define OVS_UID_F_SKIP_MASK	(1 << 1)
+#define OVS_UID_F_SKIP_ACTIONS	(1 << 2)
+
+/**
  * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
  * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
  * @OVS_ACTION_ATTR_SAMPLE.  A value of 0 samples no packets, a value of
-- 
1.7.10.4




More information about the dev mailing list