[ovs-dev] [PATCHv5 09/12] datapath: Add support for OVS_FLOW_ATTR_UID.

Joe Stringer joestringer at nicira.com
Mon Sep 15 02:25:15 UTC 2014


If a datapath is created with the flag OVS_DP_F_INDEX_BY_UID, then an
additional table_instance is added to the flow_table, which is indexed
by unique identifiers ("UID"). This can be manipulated using the flow
key as before, or by using the new UID field. If both are specified,
then UID takes precedence. UIDs up to 128 bits may be specified for any
flow operation.

If a datapath is created using OVS_DP_F_INDEX_BY_UID and a UID is not
specified at flow setup time, then that operation will fail. If
OVS_UID_F_* flags are specified for an operation, then they will modify
what is returned through the operation. For instance, OVS_UID_F_SKIP_KEY
allows the datapath to skip returning the key.

Signed-off-by: Joe Stringer <joestringer at nicira.com>
---
v5: No change.
v4: Fix memory leaks.
    Log when triggering the older userspace issue above.
v3: Initial post.
---
 datapath/datapath.c                               |  215 +++++++++++++------
 datapath/flow.h                                   |   12 +-
 datapath/flow_netlink.c                           |   60 ++++++
 datapath/flow_netlink.h                           |    2 +
 datapath/flow_table.c                             |  229 +++++++++++++++++----
 datapath/flow_table.h                             |    5 +-
 datapath/linux/compat/include/linux/openvswitch.h |    3 +
 lib/dpif-linux.c                                  |    1 +
 8 files changed, 423 insertions(+), 104 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index ed9d7bd..6f5bf9e 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -670,36 +670,41 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
+		+ nla_total_size(ovs_uid_attr_size()) /* OVS_FLOW_ATTR_UID */
 		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
 }
 
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
-				   struct sk_buff *skb)
+				   struct sk_buff *skb, u32 uid_flags)
 {
 	struct nlattr *nla;
 	int err;
 
 	/* Fill flow key. */
-	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
-	if (!nla)
-		return -EMSGSIZE;
+	if (!(uid_flags & OVS_UID_F_SKIP_KEY)) {
+		nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
+		if (!nla)
+			return -EMSGSIZE;
 
-	err = ovs_nla_put_flow(&flow->unmasked_key,
-			       &flow->unmasked_key, skb);
-	if (err)
-		return err;
-	nla_nest_end(skb, nla);
+		err = ovs_nla_put_flow(&flow->unmasked_key,
+				       &flow->unmasked_key, skb);
+		if (err)
+			return err;
+		nla_nest_end(skb, nla);
+	}
 
 	/* Fill flow mask. */
-	nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
-	if (!nla)
-		return -EMSGSIZE;
+	if (!(uid_flags & OVS_UID_F_SKIP_MASK)) {
+		nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
+		if (!nla)
+			return -EMSGSIZE;
 
-	err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
-	if (err)
-		return err;
-	nla_nest_end(skb, nla);
+		err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
+		if (err)
+			return err;
+		nla_nest_end(skb, nla);
+	}
 
 	return 0;
 }
@@ -730,6 +735,30 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 }
 
 /* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_uid(const struct sw_flow *flow,
+				 struct sk_buff *skb)
+{
+	struct nlattr *start;
+	const struct sw_flow_id *sfid = &flow->uid;
+
+	if (!sfid->uid)
+		return 0;
+
+	start = nla_nest_start(skb, OVS_FLOW_ATTR_UID);
+	if (start) {
+		int err;
+
+		err = nla_put(skb, OVS_UID_ATTR_ID, sfid->uid_len, sfid->uid);
+		if (err)
+			return err;
+		nla_nest_end(skb, start);
+	} else
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 				     struct sk_buff *skb, int skb_orig_len)
 {
@@ -772,7 +801,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 				  struct sk_buff *skb, u32 portid,
-				  u32 seq, u32 flags, u8 cmd)
+				  u32 seq, u32 flags, u8 cmd, u32 uid_flags)
 {
 	const int skb_orig_len = skb->len;
 	struct ovs_header *ovs_header;
@@ -783,18 +812,24 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 		return -EMSGSIZE;
 	ovs_header->dp_ifindex = dp_ifindex;
 
-	err = ovs_flow_cmd_fill_match(flow, skb);
+	err = ovs_flow_cmd_fill_match(flow, skb, uid_flags);
 	if (err)
 		goto error;
 
-	err = ovs_flow_cmd_fill_stats(flow, skb);
+	err = ovs_flow_cmd_fill_uid(flow, skb);
 	if (err)
 		goto error;
 
-	err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+	err = ovs_flow_cmd_fill_stats(flow, skb);
 	if (err)
 		goto error;
 
+	if (!(uid_flags & OVS_UID_F_SKIP_ACTIONS)) {
+		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+		if (err)
+			goto error;
+	}
+
 	return genlmsg_end(skb, ovs_header);
 
 error:
@@ -825,7 +860,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp,
 					       const struct sw_flow *flow,
 					       int dp_ifindex,
 					       struct genl_info *info, u8 cmd,
-					       bool always)
+					       bool always, u32 uid_flags)
 {
 	struct sk_buff *skb;
 	int retval;
@@ -837,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp,
 
 	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 					info->snd_portid, info->snd_seq, 0,
-					cmd);
+					cmd, uid_flags);
 	BUG_ON(retval < 0);
 	return skb;
 }
@@ -852,6 +887,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct sw_flow_actions *acts;
 	struct sw_flow_match match;
+	struct sw_flow_id sfid;
+	u32 uid_flags;
 	int error;
 
 	/* Must have key and actions. */
@@ -882,6 +919,13 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
 
+	/* Extract uid. */
+	error = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &sfid, &uid_flags);
+	if (!error)
+		error = ovs_flow_uid(&new_flow->uid, &sfid);
+	if (error)
+		goto err_kfree_flow;
+
 	/* Validate actions. */
 	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
 				     &acts);
@@ -902,6 +946,13 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		error = -ENODEV;
 		goto err_unlock_ovs;
 	}
+
+	if (rcu_access_pointer(dp->table.uid_ti) && !new_flow->uid.uid) {
+		OVS_NLERR("Flow table indexes by UID but UID is not specified.\n");
+		error = -EINVAL;
+		goto err_unlock_ovs;
+	}
+
 	/* Check if this is a duplicate flow */
 	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
 	if (likely(!flow)) {
@@ -919,7 +970,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       uid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -940,9 +992,10 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		/* The unmasked key has to be the same for flow updates. */
 		if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
 			/* Look for any overlapping flow. */
-			flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
-			if (!flow) {
-				error = -ENOENT;
+			flow = ovs_flow_tbl_lookup_exact(&dp->table, &sfid,
+							 &match);
+			if (unlikely(IS_ERR(flow))) {
+				error = PTR_ERR(flow);
 				goto err_unlock_ovs;
 			}
 		}
@@ -955,7 +1008,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       uid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -1003,26 +1057,32 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct sw_flow_mask mask;
 	struct sk_buff *reply = NULL;
 	struct datapath *dp;
 	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
-	struct sw_flow_match match;
+	struct sw_flow_match match, *matchp;
+	struct sw_flow_id uid;
+	u32 uid_flags;
 	int error;
 
 	/* Extract key. */
 	error = -EINVAL;
-	if (!a[OVS_FLOW_ATTR_KEY]) {
-		OVS_NLERR("Flow key attribute not present in set flow.\n");
+	if (!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY]) {
+		OVS_NLERR("Flow index attribute not present in set flow.\n");
 		goto error;
 	}
-
-	ovs_match_init(&match, &key, &mask);
-	error = ovs_nla_get_match(&match,
-				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+	error = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags);
 	if (error)
 		goto error;
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, &mask);
+		error = ovs_nla_get_match(&match,
+					  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+		if (error)
+			goto error;
+	}
 
 	/* Validate actions. */
 	if (a[OVS_FLOW_ATTR_ACTIONS]) {
@@ -1047,9 +1107,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto err_unlock_ovs;
 	}
 	/* Check that the flow exists. */
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
-	if (unlikely(!flow)) {
-		error = -ENOENT;
+	matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL;
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp);
+	if (unlikely(IS_ERR(flow))) {
+		error = PTR_ERR(flow);
 		goto err_unlock_ovs;
 	}
 
@@ -1063,14 +1124,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       uid_flags);
 			BUG_ON(error < 0);
 		}
 	} else {
 		/* Could not alloc without acts before locking. */
 		reply = ovs_flow_cmd_build_info(dp, flow,
 						ovs_header->dp_ifindex,
-						info, OVS_FLOW_CMD_NEW, false);
+						info, OVS_FLOW_CMD_NEW, false,
+						uid_flags);
 		if (unlikely(IS_ERR(reply))) {
 			error = PTR_ERR(reply);
 			goto err_unlock_ovs;
@@ -1103,20 +1166,27 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct datapath *dp;
-	struct sw_flow_match match;
+	struct sw_flow_match match, *matchp;
+	struct sw_flow_id uid;
+	u32 uid_flags;
 	int err;
 
-	if (!a[OVS_FLOW_ATTR_KEY]) {
-		OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
+	err = -EINVAL;
+	if (!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY]) {
+		OVS_NLERR("Flow get message rejected, Index attribute missing.\n");
 		return -EINVAL;
 	}
-
-	ovs_match_init(&match, &key, NULL);
-	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+	err = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags);
 	if (err)
 		return err;
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, NULL);
+		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+		if (err)
+			return err;
+	}
 
 	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1125,14 +1195,15 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
-	if (!flow) {
-		err = -ENOENT;
+	matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL;
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp);
+	if (unlikely(IS_ERR(flow))) {
+		err = PTR_ERR(flow);
 		goto unlock;
 	}
 
 	reply = ovs_flow_cmd_build_info(dp, flow, ovs_header->dp_ifindex, info,
-					OVS_FLOW_CMD_NEW, true);
+					OVS_FLOW_CMD_NEW, true, uid_flags);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		goto unlock;
@@ -1151,12 +1222,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct datapath *dp;
-	struct sw_flow_match match;
+	struct sw_flow_match match, *matchp;
+	struct sw_flow_id uid;
+	u32 uid_flags;
 	int err;
 
-	if (likely(a[OVS_FLOW_ATTR_KEY])) {
+	err = ovs_nla_get_uid(a[OVS_FLOW_ATTR_UID], &uid, &uid_flags);
+	if (err)
+		return err;
+	if (a[OVS_FLOW_ATTR_KEY]) {
 		ovs_match_init(&match, &key, NULL);
 		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
 		if (unlikely(err))
@@ -1169,13 +1245,16 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		err = -ENODEV;
 		goto unlock;
 	}
-	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+
+	if (unlikely(!a[OVS_FLOW_ATTR_UID] && !a[OVS_FLOW_ATTR_KEY])) {
 		err = ovs_flow_tbl_flush(&dp->table);
 		goto unlock;
 	}
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
-	if (unlikely(!flow)) {
-		err = -ENOENT;
+
+	matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL;
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &uid, matchp);
+	if (unlikely(IS_ERR(flow))) {
+		err = PTR_ERR(flow);
 		goto unlock;
 	}
 
@@ -1192,7 +1271,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 						     ovs_header->dp_ifindex,
 						     reply, info->snd_portid,
 						     info->snd_seq, 0,
-						     OVS_FLOW_CMD_DEL);
+						     OVS_FLOW_CMD_DEL, uid_flags);
 			rcu_read_unlock();
 			BUG_ON(err < 0);
 			ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
@@ -1213,8 +1292,15 @@ unlock:
 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct nlattr *nla, *uid;
 	struct table_instance *ti;
 	struct datapath *dp;
+	u32 uid_flags = 0;
+
+	nla = nlmsg_attrdata(cb->nlh, sizeof(*ovs_header));
+	uid = nla_find_nested(nla, OVS_FLOW_ATTR_UID);
+	if (uid && ovs_nla_get_uid(uid, NULL, &uid_flags))
+		OVS_NLERR("Error occurred parsing UID flags on dump");
 
 	rcu_read_lock();
 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1237,7 +1323,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   OVS_FLOW_CMD_NEW) < 0)
+					   OVS_FLOW_CMD_NEW, uid_flags) < 0)
 			break;
 
 		cb->args[0] = bucket;
@@ -1251,6 +1337,7 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
 	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+	[OVS_FLOW_ATTR_UID] = { .type = NLA_NESTED },
 };
 
 static struct genl_ops dp_flow_genl_ops[] = {
@@ -1393,6 +1480,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct vport *vport;
 	struct ovs_net *ovs_net;
+	bool support_uid;
 	int err, i;
 
 	err = -EINVAL;
@@ -1410,8 +1498,11 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
 
+	ovs_dp_change(dp, a);
+
 	/* Allocate table. */
-	err = ovs_flow_tbl_init(&dp->table);
+	support_uid = dp->user_features & OVS_DP_F_INDEX_BY_UID;
+	err = ovs_flow_tbl_init(&dp->table, support_uid);
 	if (err)
 		goto err_free_dp;
 
@@ -1439,8 +1530,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	parms.port_no = OVSP_LOCAL;
 	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
 
-	ovs_dp_change(dp, a);
-
 	/* So far only local changes have been made, now need the lock. */
 	ovs_lock();
 
diff --git a/datapath/flow.h b/datapath/flow.h
index 44ed10d..bd86a69 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -199,6 +199,11 @@ struct sw_flow_match {
 	struct sw_flow_mask *mask;
 };
 
+struct sw_flow_id {
+	u32 *uid;
+	u32 uid_len;
+};
+
 struct sw_flow_actions {
 	struct rcu_head rcu;
 	u32 actions_len;
@@ -215,11 +220,14 @@ struct flow_stats {
 
 struct sw_flow {
 	struct rcu_head rcu;
-	struct hlist_node hash_node[2];
-	u32 hash;
+	struct {
+		struct hlist_node node[2];
+		u32 hash;
+	} flow_hash, uid_hash;
 	int stats_last_writer;		/* NUMA-node id of the last writer on
 					 * 'stats[0]'.
 					 */
+	struct sw_flow_id uid;
 	struct sw_flow_key key;
 	struct sw_flow_key unmasked_key;
 	struct sw_flow_mask *mask;
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 6c74841..29db876 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -1071,6 +1071,66 @@ free_newmask:
 	return err;
 }
 
+/* Support UIDs up to 128 bits. Matches OVS_UID_ATTR_ID size above. */
+#define MAX_UID_BUFSIZE		(256 / 8)
+
+size_t ovs_uid_attr_size(void)
+{
+	/* Whenever adding new OVS_UID_ATTR_ FIELDS, we should consider
+	 * updating this function.  */
+	return    nla_total_size(4)		    /* OVS_UID_ATTR_FLAGS */
+		+ nla_total_size(MAX_UID_BUFSIZE);  /* OVS_UID_ATTR_ID */
+}
+
+int ovs_nla_get_uid(const struct nlattr *attr, struct sw_flow_id *sfid,
+		    u32 *flags)
+{
+	static const struct nla_policy ovs_uid_policy[OVS_UID_ATTR_MAX + 1] = {
+		[OVS_UID_ATTR_FLAGS] = { .type = NLA_U32 },
+		[OVS_UID_ATTR_ID] = { .len = sizeof(u32) },
+	};
+	const struct nlattr *a[OVS_UID_ATTR_MAX + 1];
+	int err;
+
+	if (sfid) {
+		sfid->uid = NULL;
+		sfid->uid_len = 0;
+	}
+	if (flags)
+		*flags = 0;
+
+	if (!attr)
+		return 0;
+
+	err = nla_parse_nested((struct nlattr **)a, OVS_UID_ATTR_MAX, attr,
+			       ovs_uid_policy);
+	if (err)
+		return err;
+
+	if (sfid) {
+		if (a[OVS_UID_ATTR_ID]) {
+			size_t len;
+
+			len = nla_len(a[OVS_UID_ATTR_ID]);
+			if (len > MAX_UID_BUFSIZE) {
+				OVS_NLERR("Flow uid size (%zu bytes) exceeds "
+					  "maximum (%u bytes)\n", len,
+					  MAX_UID_BUFSIZE);
+					return -EINVAL;
+			}
+			sfid->uid = nla_data(a[OVS_UID_ATTR_ID]);
+			sfid->uid_len = len;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	if (flags && a[OVS_UID_ATTR_FLAGS])
+		*flags = nla_get_u32(a[OVS_UID_ATTR_FLAGS]);
+
+	return 0;
+}
+
 /**
  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
  * @key: Receives extracted in_port, priority, tun_key and skb_mark.
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
index 90bbe37..0158c69 100644
--- a/datapath/flow_netlink.h
+++ b/datapath/flow_netlink.h
@@ -39,6 +39,7 @@
 
 size_t ovs_tun_key_attr_size(void);
 size_t ovs_key_attr_size(void);
+size_t ovs_uid_attr_size(void);
 
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key, struct sw_flow_mask *mask);
@@ -52,6 +53,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
 		      const struct nlattr *);
 int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
 				  const struct ovs_tunnel_info *);
+int ovs_nla_get_uid(const struct nlattr *, struct sw_flow_id *, u32 *flags);
 
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
index 10bf830..52006e6 100644
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -80,6 +80,20 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		*d++ = *s++ & *m++;
 }
 
+int ovs_flow_uid(struct sw_flow_id *dst, const struct sw_flow_id *src)
+{
+	if (src->uid_len) {
+		dst->uid = kmalloc(src->uid_len, GFP_KERNEL);
+		if (!dst->uid)
+			return -ENOMEM;
+
+		memcpy(dst->uid, src->uid, src->uid_len);
+		dst->uid_len = src->uid_len;
+	}
+
+	return 0;
+}
+
 struct sw_flow *ovs_flow_alloc(void)
 {
 	struct sw_flow *flow;
@@ -90,6 +104,8 @@ struct sw_flow *ovs_flow_alloc(void)
 	if (!flow)
 		return ERR_PTR(-ENOMEM);
 
+	flow->uid.uid = NULL;
+	flow->uid.uid_len = 0;
 	flow->sf_acts = NULL;
 	flow->mask = NULL;
 	flow->stats_last_writer = NUMA_NO_NODE;
@@ -147,6 +163,7 @@ static void flow_free(struct sw_flow *flow)
 	int node;
 
 	kfree((struct sw_flow_actions __force *)flow->sf_acts);
+	kfree((struct sw_flow_id __force *)flow->uid.uid);
 	for_each_node(node)
 		if (flow->stats[node])
 			kmem_cache_free(flow_stats_cache,
@@ -263,7 +280,7 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
 	return 0;
 }
 
-int ovs_flow_tbl_init(struct flow_table *table)
+int ovs_flow_tbl_init(struct flow_table *table, bool support_uid)
 {
 	struct table_instance *ti;
 	struct mask_array *ma;
@@ -281,12 +298,24 @@ int ovs_flow_tbl_init(struct flow_table *table)
 	if (!ti)
 		goto free_mask_array;
 
+	if (support_uid) {
+		struct table_instance *uid_ti;
+
+		uid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+		if (!uid_ti)
+			goto free_ti;
+		rcu_assign_pointer(table->uid_ti, uid_ti);
+	} else
+		rcu_assign_pointer(table->uid_ti, NULL);
+
 	rcu_assign_pointer(table->ti, ti);
 	rcu_assign_pointer(table->mask_array, ma);
 	table->last_rehash = jiffies;
 	table->count = 0;
 	return 0;
 
+free_ti:
+	__table_instance_destroy(ti);
 free_mask_array:
 	kfree(ma);
 free_mask_cache:
@@ -301,7 +330,8 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
 	__table_instance_destroy(ti);
 }
 
-static void table_instance_destroy(struct table_instance *ti, bool deferred)
+static void table_instance_destroy(struct table_instance *ti, bool deferred,
+				   bool uid)
 {
 	int i;
 
@@ -317,9 +347,14 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
 		struct hlist_node *n;
 		int ver = ti->node_ver;
 
-		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-			hlist_del_rcu(&flow->hash_node[ver]);
-			ovs_flow_free(flow, deferred);
+		if (uid) {
+			hlist_for_each_entry_safe(flow, n, head, uid_hash.node[ver])
+				hlist_del_rcu(&flow->uid_hash.node[ver]);
+		} else {
+			hlist_for_each_entry_safe(flow, n, head, flow_hash.node[ver]) {
+				hlist_del_rcu(&flow->flow_hash.node[ver]);
+				ovs_flow_free(flow, deferred);
+			}
 		}
 	}
 
@@ -335,10 +370,12 @@ skip_flows:
 void ovs_flow_tbl_destroy(struct flow_table *table)
 {
 	struct table_instance *ti = (struct table_instance __force *)table->ti;
+	struct table_instance *uid_ti = (struct table_instance __force *)table->uid_ti;
 
 	free_percpu(table->mask_cache);
 	kfree((struct mask_array __force *)table->mask_array);
-	table_instance_destroy(ti, false);
+	table_instance_destroy(ti, false, false);
+	table_instance_destroy(uid_ti, false, true);
 }
 
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -353,7 +390,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
 	while (*bucket < ti->n_buckets) {
 		i = 0;
 		head = flex_array_get(ti->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+		hlist_for_each_entry_rcu(flow, head, flow_hash.node[ver]) {
 			if (i < *last) {
 				i++;
 				continue;
@@ -379,12 +416,20 @@ static void table_instance_insert(struct table_instance *ti, struct sw_flow *flo
 {
 	struct hlist_head *head;
 
-	head = find_bucket(ti, flow->hash);
-	hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+	head = find_bucket(ti, flow->flow_hash.hash);
+	hlist_add_head_rcu(&flow->flow_hash.node[ti->node_ver], head);
+}
+
+static void uid_table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(ti, flow->uid_hash.hash);
+	hlist_add_head_rcu(&flow->uid_hash.node[ti->node_ver], head);
 }
 
 static void flow_table_copy_flows(struct table_instance *old,
-				  struct table_instance *new)
+				  struct table_instance *new, bool uid)
 {
 	int old_ver;
 	int i;
@@ -399,42 +444,81 @@ static void flow_table_copy_flows(struct table_instance *old,
 
 		head = flex_array_get(old->buckets, i);
 
-		hlist_for_each_entry(flow, head, hash_node[old_ver])
-			table_instance_insert(new, flow);
+		if (uid)
+			hlist_for_each_entry(flow, head, uid_hash.node[old_ver])
+				uid_table_instance_insert(new, flow);
+		else
+			hlist_for_each_entry(flow, head, flow_hash.node[old_ver])
+				table_instance_insert(new, flow);
 	}
 
 	old->keep_flows = true;
 }
 
-static struct table_instance *table_instance_rehash(struct table_instance *ti,
-					    int n_buckets)
+static int flow_table_instance_alloc(struct table_instance **ti,
+				     struct table_instance **uid_ti,
+				     int n_buckets)
 {
-	struct table_instance *new_ti;
+	struct table_instance *new_ti, *new_uid_ti;
 
 	new_ti = table_instance_alloc(n_buckets);
 	if (!new_ti)
-		return NULL;
+		return -ENOMEM;
 
-	flow_table_copy_flows(ti, new_ti);
+	if (uid_ti) {
+		new_uid_ti = table_instance_alloc(n_buckets);
+		if (!new_uid_ti) {
+			__table_instance_destroy(new_ti);
+			return -ENOMEM;
+		}
+		*uid_ti = new_uid_ti;
+	}
+
+	*ti = new_ti;
+	return 0;
+}
+
+static int flow_table_rehash(struct table_instance *old_ti,
+			     struct table_instance *old_uid_ti, int n_buckets,
+			     struct table_instance **new_ti,
+			     struct table_instance **new_uid_ti)
+{
+	int err;
 
-	return new_ti;
+	err = flow_table_instance_alloc(new_ti, old_uid_ti ? new_uid_ti : NULL,
+					n_buckets);
+	if (err)
+		return err;
+
+	flow_table_copy_flows(old_ti, *new_ti, false);
+	if (old_uid_ti)
+		flow_table_copy_flows(old_uid_ti, *new_uid_ti, true);
+
+	return 0;
 }
 
 int ovs_flow_tbl_flush(struct flow_table *flow_table)
 {
-	struct table_instance *old_ti;
-	struct table_instance *new_ti;
+	struct table_instance *old_ti, *new_ti, *old_uid_ti;
+	struct table_instance *new_uid_ti = NULL;
+	int err;
 
 	old_ti = ovsl_dereference(flow_table->ti);
-	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
-	if (!new_ti)
-		return -ENOMEM;
+	old_uid_ti = ovsl_dereference(flow_table->uid_ti);
+	if (old_uid_ti)
+		err = flow_table_instance_alloc(&new_ti, &new_uid_ti, TBL_MIN_BUCKETS);
+	else
+		err = flow_table_instance_alloc(&new_ti, NULL, TBL_MIN_BUCKETS);
+	if (err)
+		return err;
 
 	rcu_assign_pointer(flow_table->ti, new_ti);
+	rcu_assign_pointer(flow_table->uid_ti, new_uid_ti);
 	flow_table->last_rehash = jiffies;
 	flow_table->count = 0;
 
-	table_instance_destroy(old_ti, true);
+	table_instance_destroy(old_ti, true, false);
+	table_instance_destroy(old_uid_ti, true, true);
 	return 0;
 }
 
@@ -507,8 +591,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
 	hash = flow_hash(&masked_key, key_start, key_end);
 	head = find_bucket(ti, hash);
 	(*n_mask_hit)++;
-	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
-		if (flow->mask == mask && flow->hash == hash &&
+	hlist_for_each_entry_rcu(flow, head, flow_hash.node[ti->node_ver]) {
+		if (flow->mask == mask && flow->flow_hash.hash == hash &&
 		    flow_cmp_masked_key(flow, &masked_key,
 					  key_start, key_end))
 			return flow;
@@ -626,8 +710,8 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
 	return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
 }
 
-struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
-					  struct sw_flow_match *match)
+static struct sw_flow *lookup_exact(struct flow_table *tbl,
+				    struct sw_flow_match *match)
 {
 	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
 	int i;
@@ -649,6 +733,58 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 	return NULL;
 }
 
+static u32 uid_hash(const struct sw_flow_id *sfid)
+{
+	return arch_fast_hash2(sfid->uid, sfid->uid_len >> 2, 0);
+}
+
+static bool flow_cmp_uid(const struct sw_flow *flow,
+			 const struct sw_flow_id *sfid)
+{
+	if (flow->uid.uid_len != sfid->uid_len)
+		return false;
+
+	return !memcmp(flow->uid.uid, sfid->uid, sfid->uid_len);
+}
+
+static struct sw_flow *lookup_uid(struct flow_table *tbl,
+				  const struct sw_flow_id *uid)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->uid_ti);
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	u32 hash;
+
+	BUG_ON(!ti);
+	hash = uid_hash(uid);
+	head = find_bucket(ti, hash);
+	hlist_for_each_entry_rcu(flow, head, uid_hash.node[ti->node_ver]) {
+		if (flow->uid_hash.hash == hash && flow_cmp_uid(flow, uid))
+			return flow;
+	}
+	return NULL;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+					  struct sw_flow_id *uid,
+					  struct sw_flow_match *match)
+{
+	struct sw_flow *flow;
+
+	if (rcu_access_pointer(tbl->uid_ti) && uid->uid_len) {
+		flow = lookup_uid(tbl, uid);
+	} else if (match) {
+		flow = lookup_exact(tbl, match);
+	} else {
+		OVS_NLERR("Flow key attribute not present in flow lookup.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!flow)
+		return ERR_PTR(-ENOENT);
+	return flow;
+}
+
 int ovs_flow_tbl_num_masks(const struct flow_table *table)
 {
 	struct mask_array *ma;
@@ -657,9 +793,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
 	return ma->count;
 }
 
-static struct table_instance *table_instance_expand(struct table_instance *ti)
+static int flow_table_expand(struct table_instance *old_ti,
+			     struct table_instance *old_uid_ti,
+			     struct table_instance **new_ti,
+			     struct table_instance **new_uid_ti)
 {
-	return table_instance_rehash(ti, ti->n_buckets * 2);
+	return flow_table_rehash(old_ti, old_uid_ti, old_ti->n_buckets * 2,
+				 new_ti, new_uid_ti);
 }
 
 static void tbl_mask_array_delete_mask(struct mask_array *ma,
@@ -709,9 +849,12 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
 	struct table_instance *ti = ovsl_dereference(table->ti);
+	struct table_instance *uid_ti = ovsl_dereference(table->uid_ti);
 
 	BUG_ON(table->count == 0);
-	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+	hlist_del_rcu(&flow->flow_hash.node[ti->node_ver]);
+	if (uid_ti)
+		hlist_del_rcu(&flow->uid_hash.node[uid_ti->node_ver]);
 	table->count--;
 
 	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
@@ -816,29 +959,39 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			struct sw_flow_mask *mask)
 {
-	struct table_instance *new_ti = NULL;
-	struct table_instance *ti;
+	struct table_instance *new_ti = NULL, *new_uid_ti = NULL;
+	struct table_instance *ti, *uid_ti;
 	int err;
 
 	err = flow_mask_insert(table, flow, mask);
 	if (err)
 		return err;
 
-	flow->hash = flow_hash(&flow->key, flow->mask->range.start,
-			flow->mask->range.end);
+	flow->flow_hash.hash = flow_hash(&flow->key, flow->mask->range.start,
+					 flow->mask->range.end);
 	ti = ovsl_dereference(table->ti);
 	table_instance_insert(ti, flow);
 	table->count++;
 
+	uid_ti = ovsl_dereference(table->uid_ti);
+	if (uid_ti) {
+		BUG_ON(!flow->uid.uid);
+		flow->uid_hash.hash = uid_hash(&flow->uid);
+		uid_table_instance_insert(uid_ti, flow);
+	}
+
 	/* Expand table, if necessary, to make room. */
 	if (table->count > ti->n_buckets)
-		new_ti = table_instance_expand(ti);
+		flow_table_expand(ti, uid_ti, &new_ti, &new_uid_ti);
 	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
-		new_ti = table_instance_rehash(ti, ti->n_buckets);
+		flow_table_rehash(ti, uid_ti, ti->n_buckets,
+				  &new_ti, &new_uid_ti);
 
 	if (new_ti) {
 		rcu_assign_pointer(table->ti, new_ti);
-		table_instance_destroy(ti, true);
+		rcu_assign_pointer(table->uid_ti, new_uid_ti);
+		table_instance_destroy(ti, true, false);
+		table_instance_destroy(uid_ti, true, true);
 		table->last_rehash = jiffies;
 	}
 	return 0;
diff --git a/datapath/flow_table.h b/datapath/flow_table.h
index a05d36a..27aa1a4 100644
--- a/datapath/flow_table.h
+++ b/datapath/flow_table.h
@@ -60,6 +60,7 @@ struct flow_table {
 	struct table_instance __rcu *ti;
 	struct mask_cache_entry __percpu *mask_cache;
 	struct mask_array __rcu *mask_array;
+	struct table_instance __rcu *uid_ti;
 	unsigned long last_rehash;
 	unsigned int count;
 };
@@ -72,7 +73,7 @@ void ovs_flow_exit(void);
 struct sw_flow *ovs_flow_alloc(void);
 void ovs_flow_free(struct sw_flow *, bool deferred);
 
-int ovs_flow_tbl_init(struct flow_table *);
+int ovs_flow_tbl_init(struct flow_table *, bool support_uid);
 int ovs_flow_tbl_count(struct flow_table *table);
 void ovs_flow_tbl_destroy(struct flow_table *table);
 int ovs_flow_tbl_flush(struct flow_table *flow_table);
@@ -90,11 +91,13 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
 				    const struct sw_flow_key *);
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *,
+					  struct sw_flow_id *sfid,
 					  struct sw_flow_match *match);
 
 bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
 			       struct sw_flow_match *match);
 
+int ovs_flow_uid(struct sw_flow_id *dst, const struct sw_flow_id *src);
 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		       const struct sw_flow_mask *mask);
 #endif /* flow_table.h */
diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index 21b81ae..4b05d20 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -142,6 +142,9 @@ struct ovs_vport_stats {
 /* Allow datapath to associate multiple Netlink PIDs to each vport */
 #define OVS_DP_F_VPORT_PIDS	(1 << 1)
 
+/* Force the datapath to index flows by userspace flow ID. */
+#define OVS_DP_F_INDEX_BY_UID   (1 << 2)
+
 /* Fixed logical ports. */
 #define OVSP_LOCAL      ((__u32)0)
 
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 2405ccd..4510adb 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -254,6 +254,7 @@ dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
     dp_request.name = name;
     dp_request.user_features |= OVS_DP_F_UNALIGNED;
     dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
+    dp_request.user_features |= OVS_DP_F_INDEX_BY_UID;
     error = dpif_linux_dp_transact(&dp_request, &dp, &buf);
     if (error) {
         return error;
-- 
1.7.10.4




More information about the dev mailing list