[ovs-dev] [PATCHv9 09/12] datapath: Add support for unique flow identifiers.
Joe Stringer
joestringer at nicira.com
Fri Oct 31 23:55:43 UTC 2014
If a datapath is created with the flag OVS_DP_F_INDEX_BY_UFID, then an
additional table_instance is added to the flow_table, which is indexed
by unique identifiers ("UFID"). Userspace implementations can specify a
UFID of up to 128 bits along with a flow operation as shorthand for the
key. This allows revalidation performance improvements of up to 50%.
If a datapath is created using OVS_DP_F_INDEX_BY_UFID and a UFID is not
specified at flow setup time, then that operation will fail. If
OVS_UFID_F_* flags are specified for an operation, then they will modify
what is returned through the operation. For instance, OVS_UFID_F_SKIP_KEY
allows the datapath to skip returning the key (eg, during dump to reduce
memory copy).
Signed-off-by: Joe Stringer <joestringer at nicira.com>
---
v9: No change.
v8: Rename UID -> UFID "unique flow identifier".
Fix null dereference when adding flow without uid or mask.
If UFID and not match are specified, and lookup fails, return ENOENT.
Rebase.
v7: Remove OVS_DP_F_INDEX_BY_UID.
Rework UID serialisation for variable-length UID.
Log error if uid not specified and OVS_UID_F_SKIP_KEY is set.
Rebase against "probe" logging changes.
v6: Fix documentation for supporting UIDs between 32-128 bits.
Minor style fixes.
Rebase.
v5: No change.
v4: Fix memory leaks.
Log when triggering the older userspace issue above.
v3: Initial post.
---
datapath/datapath.c | 238 +++++++++++++++------
datapath/flow.h | 13 +-
datapath/flow_netlink.c | 42 ++++
datapath/flow_netlink.h | 1 +
datapath/flow_table.c | 230 ++++++++++++++++----
datapath/flow_table.h | 3 +
datapath/linux/compat/include/linux/openvswitch.h | 25 +++
7 files changed, 442 insertions(+), 110 deletions(-)
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 6820a95..924898c 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -674,11 +674,18 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
}
}
-static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
+ const struct sw_flow_id *sfid)
{
+ size_t ufid_size = 0;
+
+ if (!sfid->kernel_only)
+ ufid_size = nla_total_size(sfid->ufid_len);
+
return NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ + ufid_size /* OVS_FLOW_ATTR_UFID */
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -687,31 +694,42 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
- struct sk_buff *skb)
+ struct sk_buff *skb, u32 ufid_flags)
{
struct nlattr *nla;
int err;
- /* Fill flow key. */
- nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
- if (!nla)
- return -EMSGSIZE;
+ /* Fill flow key.
+ * If userspace didn't specify a UFID, then ignore the SKIP_KEY flag. */
+ if (!(ufid_flags & OVS_UFID_F_SKIP_KEY) ||
+ flow->ufid.kernel_only) {
+ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
+ if (!nla)
+ return -EMSGSIZE;
- err = ovs_nla_put_flow(&flow->unmasked_key,
- &flow->unmasked_key, skb);
- if (err)
- return err;
- nla_nest_end(skb, nla);
+ err = ovs_nla_put_flow(&flow->unmasked_key,
+ &flow->unmasked_key, skb);
+ if (err)
+ return err;
+ nla_nest_end(skb, nla);
+
+ if (flow->ufid.kernel_only &&
+ (ufid_flags & OVS_UFID_F_SKIP_KEY))
+ OVS_NLERR(true, "Skip key requested for flow that does "
+ "not have a userspace-specified UFID.");
+ }
/* Fill flow mask. */
- nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
- if (!nla)
- return -EMSGSIZE;
+ if (!(ufid_flags & OVS_UFID_F_SKIP_MASK)) {
+ nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
+ if (!nla)
+ return -EMSGSIZE;
- err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
- if (err)
- return err;
- nla_nest_end(skb, nla);
+ err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
+ if (err)
+ return err;
+ nla_nest_end(skb, nla);
+ }
return 0;
}
@@ -742,6 +760,32 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
}
/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_ufid(const struct sw_flow *flow,
+ struct sk_buff *skb)
+{
+ struct nlattr *start;
+ const struct sw_flow_id *sfid = &flow->ufid;
+
+ BUG_ON(!sfid->ufid_len);
+ if (sfid->kernel_only)
+ return 0;
+
+ start = nla_nest_start(skb, OVS_FLOW_ATTR_UFID);
+ if (start) {
+ int err;
+
+ err = nla_put(skb, OVS_UFID_ATTR_ID, sfid->ufid_len,
+ sfid->ufid);
+ if (err)
+ return err;
+ nla_nest_end(skb, start);
+ } else
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
struct sk_buff *skb, int skb_orig_len)
{
@@ -784,7 +828,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid,
- u32 seq, u32 flags, u8 cmd)
+ u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
{
const int skb_orig_len = skb->len;
struct ovs_header *ovs_header;
@@ -795,18 +839,24 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
return -EMSGSIZE;
ovs_header->dp_ifindex = dp_ifindex;
- err = ovs_flow_cmd_fill_match(flow, skb);
+ err = ovs_flow_cmd_fill_match(flow, skb, ufid_flags);
if (err)
goto error;
- err = ovs_flow_cmd_fill_stats(flow, skb);
+ err = ovs_flow_cmd_fill_ufid(flow, skb);
if (err)
goto error;
- err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+ err = ovs_flow_cmd_fill_stats(flow, skb);
if (err)
goto error;
+ if (!(ufid_flags & OVS_UFID_F_SKIP_ACTIONS)) {
+ err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+ if (err)
+ goto error;
+ }
+
return genlmsg_end(skb, ovs_header);
error:
@@ -816,6 +866,7 @@ error:
/* May not be called with RCU read lock. */
static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+ const struct sw_flow_id *sfid,
struct genl_info *info,
bool always)
{
@@ -825,7 +876,8 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
GROUP_ID(&ovs_dp_flow_multicast_group)))
return NULL;
- skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+ skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts, sfid), info,
+ GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
@@ -838,19 +890,19 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct datapath *dp,
const struct sw_flow *flow,
int dp_ifindex,
struct genl_info *info, u8 cmd,
- bool always)
+ bool always, u32 ufid_flags)
{
struct sk_buff *skb;
int retval;
- skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
- always);
+ skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
+ &flow->ufid, info, always);
if (IS_ERR_OR_NULL(skb))
return skb;
retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
info->snd_portid, info->snd_seq, 0,
- cmd);
+ cmd, ufid_flags);
BUG_ON(retval < 0);
return skb;
}
@@ -865,6 +917,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct sw_flow_actions *acts;
struct sw_flow_match match;
+ struct sw_flow_id sfid;
+ u32 ufid_flags;
int error;
bool log = !a[OVS_FLOW_ATTR_PROBE];
@@ -898,6 +952,13 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+ /* Extract ufid. */
+ error = ovs_nla_get_ufid(a[OVS_FLOW_ATTR_UFID], &sfid, &ufid_flags);
+ if (!error)
+ error = ovs_flow_ufid(new_flow, &sfid);
+ if (error)
+ goto err_kfree_flow;
+
/* Validate actions. */
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
&acts, log);
@@ -909,7 +970,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_flow;
}
- reply = ovs_flow_cmd_alloc_info(acts, info, false);
+ reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false);
if (IS_ERR(reply)) {
error = PTR_ERR(reply);
goto err_kfree_acts;
@@ -921,6 +982,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = -ENODEV;
goto err_unlock_ovs;
}
+
/* Check if this is a duplicate flow */
flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
if (likely(!flow)) {
@@ -938,7 +1000,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_NEW);
+ OVS_FLOW_CMD_NEW,
+ ufid_flags);
BUG_ON(error < 0);
}
ovs_unlock();
@@ -959,9 +1022,10 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
/* The unmasked key has to be the same for flow updates. */
if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
/* Look for any overlapping flow. */
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
- if (!flow) {
- error = -ENOENT;
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &sfid,
+ &match);
+ if (unlikely(IS_ERR(flow))) {
+ error = PTR_ERR(flow);
goto err_unlock_ovs;
}
}
@@ -974,7 +1038,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_NEW);
+ OVS_FLOW_CMD_NEW,
+ ufid_flags);
BUG_ON(error < 0);
}
ovs_unlock();
@@ -1024,27 +1089,34 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
- struct sw_flow *flow;
+ struct sw_flow *flow = NULL;
struct sw_flow_mask mask;
struct sk_buff *reply = NULL;
struct datapath *dp;
struct sw_flow_actions *old_acts = NULL, *acts = NULL;
- struct sw_flow_match match;
+ struct sw_flow_match match, *matchp;
+ struct sw_flow_id ufid;
+ u32 ufid_flags;
int error;
bool log = !a[OVS_FLOW_ATTR_PROBE];
/* Extract key. */
error = -EINVAL;
- if (!a[OVS_FLOW_ATTR_KEY]) {
- OVS_NLERR(log, "Flow key attribute not present in set flow.");
+ if (!a[OVS_FLOW_ATTR_UFID] && !a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR(log,
+ "Flow index attribute not present in set flow.\n");
goto error;
}
-
- ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
- a[OVS_FLOW_ATTR_MASK], log);
+ error = ovs_nla_get_ufid(a[OVS_FLOW_ATTR_UFID], &ufid, &ufid_flags);
if (error)
goto error;
+ if (a[OVS_FLOW_ATTR_KEY]) {
+ ovs_match_init(&match, &key, &mask);
+ error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ a[OVS_FLOW_ATTR_MASK], log);
+ if (error)
+ goto error;
+ }
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
@@ -1056,7 +1128,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
}
/* Can allocate before locking if have acts. */
- reply = ovs_flow_cmd_alloc_info(acts, info, false);
+ reply = ovs_flow_cmd_alloc_info(acts, &ufid, info, false);
if (IS_ERR(reply)) {
error = PTR_ERR(reply);
goto err_kfree_acts;
@@ -1070,9 +1142,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
}
/* Check that the flow exists. */
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
- if (unlikely(!flow)) {
- error = -ENOENT;
+ matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL;
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &ufid, matchp);
+ if (unlikely(IS_ERR(flow))) {
+ error = PTR_ERR(flow);
goto err_unlock_ovs;
}
@@ -1086,14 +1159,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_NEW);
+ OVS_FLOW_CMD_NEW,
+ ufid_flags);
BUG_ON(error < 0);
}
} else {
/* Could not alloc without acts before locking. */
reply = ovs_flow_cmd_build_info(dp, flow,
ovs_header->dp_ifindex,
- info, OVS_FLOW_CMD_NEW, false);
+ info, OVS_FLOW_CMD_NEW, false,
+ ufid_flags);
if (unlikely(IS_ERR(reply))) {
error = PTR_ERR(reply);
goto err_unlock_ovs;
@@ -1126,22 +1201,29 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
struct sk_buff *reply;
- struct sw_flow *flow;
+ struct sw_flow *flow = NULL;
struct datapath *dp;
- struct sw_flow_match match;
+ struct sw_flow_match match, *matchp;
+ struct sw_flow_id ufid;
+ u32 ufid_flags;
int err;
bool log = !a[OVS_FLOW_ATTR_PROBE];
- if (!a[OVS_FLOW_ATTR_KEY]) {
+ err = -EINVAL;
+ if (!a[OVS_FLOW_ATTR_UFID] && !a[OVS_FLOW_ATTR_KEY]) {
OVS_NLERR(log,
- "Flow get message rejected, Key attribute missing.");
+ "Flow get message rejected, Index attribute missing.\n");
return -EINVAL;
}
-
- ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
+ err = ovs_nla_get_ufid(a[OVS_FLOW_ATTR_UFID], &ufid, &ufid_flags);
if (err)
return err;
+ if (a[OVS_FLOW_ATTR_KEY]) {
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
+ if (err)
+ return err;
+ }
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1150,14 +1232,15 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
- if (!flow) {
- err = -ENOENT;
+ matchp = a[OVS_FLOW_ATTR_KEY] ? &match : NULL;
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &ufid, matchp);
+ if (unlikely(IS_ERR(flow))) {
+ err = PTR_ERR(flow);
goto unlock;
}
reply = ovs_flow_cmd_build_info(dp, flow, ovs_header->dp_ifindex, info,
- OVS_FLOW_CMD_NEW, true);
+ OVS_FLOW_CMD_NEW, true, ufid_flags);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
goto unlock;
@@ -1176,18 +1259,26 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
struct sk_buff *reply;
- struct sw_flow *flow;
+ struct sw_flow *flow = NULL;
struct datapath *dp;
- struct sw_flow_match match;
+ struct sw_flow_match match, *matchp;
+ struct sw_flow_id ufid;
+ u32 ufid_flags;
int err;
bool log = !a[OVS_FLOW_ATTR_PROBE];
- if (likely(a[OVS_FLOW_ATTR_KEY])) {
+ err = ovs_nla_get_ufid(a[OVS_FLOW_ATTR_UFID], &ufid, &ufid_flags);
+ if (err)
+ return err;
+ if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
log);
if (unlikely(err))
return err;
+ matchp = &match;
+ } else {
+ matchp = NULL;
}
ovs_lock();
@@ -1196,13 +1287,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = -ENODEV;
goto unlock;
}
- if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+
+ if (unlikely(!a[OVS_FLOW_ATTR_UFID] && !a[OVS_FLOW_ATTR_KEY])) {
err = ovs_flow_tbl_flush(&dp->table);
goto unlock;
}
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
- if (unlikely(!flow)) {
- err = -ENOENT;
+
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &ufid, matchp);
+ if (unlikely(IS_ERR(flow))) {
+ err = PTR_ERR(flow);
goto unlock;
}
@@ -1210,7 +1303,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_unlock();
reply = ovs_flow_cmd_alloc_info(rcu_dereference_raw(flow->sf_acts),
- info, false);
+ &ufid, info, false);
if (likely(reply)) {
if (likely(!IS_ERR(reply))) {
@@ -1219,7 +1312,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_DEL);
+ OVS_FLOW_CMD_DEL,
+ ufid_flags);
rcu_read_unlock();
BUG_ON(err < 0);
ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
@@ -1240,8 +1334,15 @@ unlock:
static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+ struct nlattr *nla, *ufid;
struct table_instance *ti;
struct datapath *dp;
+ u32 ufid_flags = 0;
+
+ nla = nlmsg_attrdata(cb->nlh, sizeof(*ovs_header));
+ ufid = nla_find_nested(nla, OVS_FLOW_ATTR_UFID);
+ if (ufid && ovs_nla_get_ufid(ufid, NULL, &ufid_flags))
+ OVS_NLERR(true, "Error occurred parsing UFID flags on dump");
rcu_read_lock();
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1264,7 +1365,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_FLOW_CMD_NEW) < 0)
+ OVS_FLOW_CMD_NEW, ufid_flags) < 0)
break;
cb->args[0] = bucket;
@@ -1280,6 +1381,7 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
+ [OVS_FLOW_ATTR_UFID] = { .type = NLA_NESTED },
};
static struct genl_ops dp_flow_genl_ops[] = {
diff --git a/datapath/flow.h b/datapath/flow.h
index c78b864..8744043 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -199,6 +199,12 @@ struct sw_flow_match {
struct sw_flow_mask *mask;
};
+struct sw_flow_id {
+ u8 *ufid;
+ u8 ufid_len;
+ bool kernel_only; /* True if userspace omits the UFID field. */
+};
+
struct sw_flow_actions {
struct rcu_head rcu;
u32 actions_len;
@@ -215,11 +221,14 @@ struct flow_stats {
struct sw_flow {
struct rcu_head rcu;
- struct hlist_node hash_node[2];
- u32 hash;
+ struct {
+ struct hlist_node node[2];
+ u32 hash;
+ } flow_hash, ufid_hash;
int stats_last_writer; /* NUMA-node id of the last writer on
* 'stats[0]'.
*/
+ struct sw_flow_id ufid;
struct sw_flow_key key;
struct sw_flow_key unmasked_key;
struct sw_flow_mask *mask;
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 37b0bdd..f46632d 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -1111,6 +1111,48 @@ free_newmask:
return err;
}
+int ovs_nla_get_ufid(const struct nlattr *attr, struct sw_flow_id *sfid,
+ u32 *flags)
+{
+ static const struct nla_policy ovs_ufid_policy[OVS_UFID_ATTR_MAX + 1] = {
+ [OVS_UFID_ATTR_FLAGS] = { .type = NLA_U32 },
+ [OVS_UFID_ATTR_ID] = { .len = sizeof(u32) },
+ };
+ const struct nlattr *a[OVS_UFID_ATTR_MAX + 1];
+ int err;
+
+ if (sfid) {
+ sfid->ufid = NULL;
+ sfid->ufid_len = 0;
+ sfid->kernel_only = true;
+ }
+ if (flags)
+ *flags = 0;
+
+ if (!attr)
+ return 0;
+
+ err = nla_parse_nested((struct nlattr **)a, OVS_UFID_ATTR_MAX, attr,
+ ovs_ufid_policy);
+ if (err)
+ return err;
+
+ if (sfid) {
+ if (a[OVS_UFID_ATTR_ID]) {
+ sfid->ufid = nla_data(a[OVS_UFID_ATTR_ID]);
+ sfid->ufid_len = nla_len(a[OVS_UFID_ATTR_ID]);
+ sfid->kernel_only = false;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ if (flags && a[OVS_UFID_ATTR_FLAGS])
+ *flags = nla_get_u32(a[OVS_UFID_ATTR_FLAGS]);
+
+ return 0;
+}
+
/**
* ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
* @key: Receives extracted in_port, priority, tun_key and skb_mark.
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
index 577f12b..380b6df 100644
--- a/datapath/flow_netlink.h
+++ b/datapath/flow_netlink.h
@@ -52,6 +52,7 @@ int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
const struct nlattr *mask, bool log);
int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
const struct ovs_tunnel_info *);
+int ovs_nla_get_ufid(const struct nlattr *, struct sw_flow_id *, u32 *flags);
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
index ad410fd..b2d6ce3 100644
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -90,6 +90,9 @@ struct sw_flow *ovs_flow_alloc(void)
if (!flow)
return ERR_PTR(-ENOMEM);
+ flow->ufid.ufid = NULL;
+ flow->ufid.ufid_len = 0;
+ flow->ufid.kernel_only = true;
flow->sf_acts = NULL;
flow->mask = NULL;
flow->stats_last_writer = NUMA_NO_NODE;
@@ -147,6 +150,7 @@ static void flow_free(struct sw_flow *flow)
int node;
kfree(rcu_dereference_raw(flow->sf_acts));
+ kfree(rcu_dereference_raw(flow->ufid.ufid));
for_each_node(node)
if (flow->stats[node])
kmem_cache_free(flow_stats_cache,
@@ -265,7 +269,7 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
int ovs_flow_tbl_init(struct flow_table *table)
{
- struct table_instance *ti;
+ struct table_instance *ti, *ufid_ti;
struct mask_array *ma;
table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
@@ -277,16 +281,23 @@ int ovs_flow_tbl_init(struct flow_table *table)
if (!ma)
goto free_mask_cache;
+ ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ if (!ufid_ti)
+ goto free_mask_array;
+
ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
- goto free_mask_array;
+ goto free_ti;
rcu_assign_pointer(table->ti, ti);
+ rcu_assign_pointer(table->ufid_ti, ufid_ti);
rcu_assign_pointer(table->mask_array, ma);
table->last_rehash = jiffies;
table->count = 0;
return 0;
+free_ti:
+ __table_instance_destroy(ufid_ti);
free_mask_array:
kfree(ma);
free_mask_cache:
@@ -301,7 +312,8 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti);
}
-static void table_instance_destroy(struct table_instance *ti, bool deferred)
+static void table_instance_destroy(struct table_instance *ti, bool deferred,
+ bool ufid)
{
int i;
@@ -317,9 +329,14 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
struct hlist_node *n;
int ver = ti->node_ver;
- hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
- hlist_del_rcu(&flow->hash_node[ver]);
- ovs_flow_free(flow, deferred);
+ if (ufid) {
+ hlist_for_each_entry_safe(flow, n, head, ufid_hash.node[ver])
+ hlist_del_rcu(&flow->ufid_hash.node[ver]);
+ } else {
+ hlist_for_each_entry_safe(flow, n, head, flow_hash.node[ver]) {
+ hlist_del_rcu(&flow->flow_hash.node[ver]);
+ ovs_flow_free(flow, deferred);
+ }
}
}
@@ -336,10 +353,12 @@ skip_flows:
void ovs_flow_tbl_destroy(struct flow_table *table)
{
struct table_instance *ti = rcu_dereference_raw(table->ti);
+ struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
free_percpu(table->mask_cache);
- kfree(rcu_dereference_raw(table->mask_array));
- table_instance_destroy(ti, false);
+ kfree((struct mask_array __force *)table->mask_array);
+ table_instance_destroy(ti, false, false);
+ table_instance_destroy(ufid_ti, false, true);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -354,7 +373,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
while (*bucket < ti->n_buckets) {
i = 0;
head = flex_array_get(ti->buckets, *bucket);
- hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+ hlist_for_each_entry_rcu(flow, head, flow_hash.node[ver]) {
if (i < *last) {
i++;
continue;
@@ -380,12 +399,21 @@ static void table_instance_insert(struct table_instance *ti, struct sw_flow *flo
{
struct hlist_head *head;
- head = find_bucket(ti, flow->hash);
- hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+ head = find_bucket(ti, flow->flow_hash.hash);
+ hlist_add_head_rcu(&flow->flow_hash.node[ti->node_ver], head);
+}
+
+static void ufid_table_instance_insert(struct table_instance *ti,
+ struct sw_flow *flow)
+{
+ struct hlist_head *head;
+
+ head = find_bucket(ti, flow->ufid_hash.hash);
+ hlist_add_head_rcu(&flow->ufid_hash.node[ti->node_ver], head);
}
static void flow_table_copy_flows(struct table_instance *old,
- struct table_instance *new)
+ struct table_instance *new, bool ufid)
{
int old_ver;
int i;
@@ -400,42 +428,75 @@ static void flow_table_copy_flows(struct table_instance *old,
head = flex_array_get(old->buckets, i);
- hlist_for_each_entry(flow, head, hash_node[old_ver])
- table_instance_insert(new, flow);
+ if (ufid)
+ hlist_for_each_entry(flow, head, ufid_hash.node[old_ver])
+ ufid_table_instance_insert(new, flow);
+ else
+ hlist_for_each_entry(flow, head, flow_hash.node[old_ver])
+ table_instance_insert(new, flow);
}
old->keep_flows = true;
}
-static struct table_instance *table_instance_rehash(struct table_instance *ti,
- int n_buckets)
+static int flow_table_instance_alloc(struct table_instance **ti,
+ struct table_instance **ufid_ti,
+ int n_buckets)
{
- struct table_instance *new_ti;
+ struct table_instance *new_ti, *new_ufid_ti;
new_ti = table_instance_alloc(n_buckets);
if (!new_ti)
- return NULL;
+ return -ENOMEM;
+
+ new_ufid_ti = table_instance_alloc(n_buckets);
+ if (!new_ufid_ti) {
+ __table_instance_destroy(new_ti);
+ return -ENOMEM;
+ }
+
+ *ti = new_ti;
+ *ufid_ti = new_ufid_ti;
+ return 0;
+}
- flow_table_copy_flows(ti, new_ti);
+static int flow_table_rehash(struct table_instance *old_ti,
+ struct table_instance *old_ufid_ti, int n_buckets,
+ struct table_instance **new_ti,
+ struct table_instance **new_ufid_ti)
+{
+ int err;
+
+ err = flow_table_instance_alloc(new_ti, old_ufid_ti ? new_ufid_ti : NULL,
+ n_buckets);
+ if (err)
+ return err;
- return new_ti;
+ flow_table_copy_flows(old_ti, *new_ti, false);
+ flow_table_copy_flows(old_ufid_ti, *new_ufid_ti, true);
+
+ return 0;
}
int ovs_flow_tbl_flush(struct flow_table *flow_table)
{
- struct table_instance *old_ti;
- struct table_instance *new_ti;
+ struct table_instance *old_ti, *new_ti, *old_ufid_ti;
+ struct table_instance *new_ufid_ti = NULL;
+ int err;
old_ti = ovsl_dereference(flow_table->ti);
- new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
- if (!new_ti)
- return -ENOMEM;
+ old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
+ err = flow_table_instance_alloc(&new_ti, &new_ufid_ti, TBL_MIN_BUCKETS);
+ if (err)
+ return err;
rcu_assign_pointer(flow_table->ti, new_ti);
+ rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
flow_table->last_rehash = jiffies;
flow_table->count = 0;
- table_instance_destroy(old_ti, true);
+ table_instance_destroy(old_ti, true, false);
+ table_instance_destroy(old_ufid_ti, true, true);
return 0;
}
@@ -508,8 +569,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
hash = flow_hash(&masked_key, key_start, key_end);
head = find_bucket(ti, hash);
(*n_mask_hit)++;
- hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
- if (flow->mask == mask && flow->hash == hash &&
+ hlist_for_each_entry_rcu(flow, head, flow_hash.node[ti->node_ver]) {
+ if (flow->mask == mask && flow->flow_hash.hash == hash &&
flow_cmp_masked_key(flow, &masked_key,
key_start, key_end))
return flow;
@@ -627,8 +688,8 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
}
-struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
- const struct sw_flow_match *match)
+static struct sw_flow *lookup_exact(struct flow_table *tbl,
+ const struct sw_flow_match *match)
{
struct mask_array *ma = ovsl_dereference(tbl->mask_array);
int i;
@@ -650,6 +711,54 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
return NULL;
}
+static u32 ufid_hash(const struct sw_flow_id *sfid)
+{
+ return arch_fast_hash(sfid->ufid, sfid->ufid_len, 0);
+}
+
+static bool flow_cmp_ufid(const struct sw_flow *flow,
+ const struct sw_flow_id *sfid)
+{
+ if (flow->ufid.ufid_len != sfid->ufid_len)
+ return false;
+
+ return !memcmp(flow->ufid.ufid, sfid->ufid, sfid->ufid_len);
+}
+
+static struct sw_flow *lookup_ufid(struct flow_table *tbl,
+ const struct sw_flow_id *ufid)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
+ struct sw_flow *flow;
+ struct hlist_head *head;
+ u32 hash;
+
+ hash = ufid_hash(ufid);
+ head = find_bucket(ti, hash);
+ hlist_for_each_entry_rcu(flow, head, ufid_hash.node[ti->node_ver]) {
+ if (flow->ufid_hash.hash == hash && flow_cmp_ufid(flow, ufid))
+ return flow;
+ }
+ return NULL;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+ const struct sw_flow_id *ufid,
+ const struct sw_flow_match *match)
+{
+ struct sw_flow *flow = NULL;
+
+ BUG_ON(!ufid->ufid_len && !match);
+ if (ufid->ufid_len)
+ flow = lookup_ufid(tbl, ufid);
+ if (!flow && match)
+ flow = lookup_exact(tbl, match);
+ if (!flow)
+ return ERR_PTR(-ENOENT);
+
+ return flow;
+}
+
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
struct mask_array *ma;
@@ -658,9 +767,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
return ma->count;
}
-static struct table_instance *table_instance_expand(struct table_instance *ti)
+static int flow_table_expand(struct table_instance *old_ti,
+ struct table_instance *old_ufid_ti,
+ struct table_instance **new_ti,
+ struct table_instance **new_ufid_ti)
{
- return table_instance_rehash(ti, ti->n_buckets * 2);
+ return flow_table_rehash(old_ti, old_ufid_ti, old_ti->n_buckets * 2,
+ new_ti, new_ufid_ti);
}
static void tbl_mask_array_delete_mask(struct mask_array *ma,
@@ -710,9 +823,11 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti = ovsl_dereference(table->ti);
+ struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+ hlist_del_rcu(&flow->flow_hash.node[ti->node_ver]);
+ hlist_del_rcu(&flow->ufid_hash.node[ufid_ti->node_ver]);
table->count--;
/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
@@ -818,34 +933,69 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
const struct sw_flow_mask *mask)
{
- struct table_instance *new_ti = NULL;
- struct table_instance *ti;
+ struct table_instance *new_ti = NULL, *new_ufid_ti = NULL;
+ struct table_instance *ti, *ufid_ti;
int err;
err = flow_mask_insert(table, flow, mask);
if (err)
return err;
- flow->hash = flow_hash(&flow->key, flow->mask->range.start,
- flow->mask->range.end);
+ flow->flow_hash.hash = flow_hash(&flow->key, flow->mask->range.start,
+ flow->mask->range.end);
ti = ovsl_dereference(table->ti);
table_instance_insert(ti, flow);
+ flow->ufid_hash.hash = ufid_hash(&flow->ufid);
+ ufid_ti = ovsl_dereference(table->ufid_ti);
+ ufid_table_instance_insert(ufid_ti, flow);
table->count++;
/* Expand table, if necessary, to make room. */
if (table->count > ti->n_buckets)
- new_ti = table_instance_expand(ti);
+ flow_table_expand(ti, ufid_ti, &new_ti, &new_ufid_ti);
else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
- new_ti = table_instance_rehash(ti, ti->n_buckets);
+ flow_table_rehash(ti, ufid_ti, ti->n_buckets,
+ &new_ti, &new_ufid_ti);
if (new_ti) {
rcu_assign_pointer(table->ti, new_ti);
- table_instance_destroy(ti, true);
+ rcu_assign_pointer(table->ufid_ti, new_ufid_ti);
+ table_instance_destroy(ti, true, false);
+ table_instance_destroy(ufid_ti, true, true);
table->last_rehash = jiffies;
}
return 0;
}
+/* Initializes 'flow->ufid'. */
+int ovs_flow_ufid(struct sw_flow *flow, const struct sw_flow_id *src)
+{
+ struct sw_flow_id *dst = &flow->ufid;
+ size_t len = src->ufid_len ? src->ufid_len : sizeof(u32);
+
+ dst->ufid = kmalloc(len, GFP_KERNEL);
+ if (!dst->ufid)
+ return -ENOMEM;
+
+ if (src->ufid_len) {
+ /* Use userspace-specified flow-id. */
+ memcpy(dst->ufid, src->ufid, src->ufid_len);
+ dst->ufid_len = src->ufid_len;
+ dst->kernel_only = false;
+ } else {
+ u32 hash;
+
+ /* Generate flow-id for internal use. */
+ hash = flow_hash(&flow->unmasked_key, 0,
+ sizeof(flow->unmasked_key));
+ memcpy(dst->ufid, &hash, sizeof(hash));
+ dst->ufid_len = sizeof(hash);
+ dst->kernel_only = true;
+ }
+
+ return 0;
+}
+
/* Initializes the flow module.
* Returns zero if successful or a negative error code.
*/
diff --git a/datapath/flow_table.h b/datapath/flow_table.h
index 9eb4af9..69453b4 100644
--- a/datapath/flow_table.h
+++ b/datapath/flow_table.h
@@ -60,6 +60,7 @@ struct flow_table {
struct table_instance __rcu *ti;
struct mask_cache_entry __percpu *mask_cache;
struct mask_array __rcu *mask_array;
+ struct table_instance __rcu *ufid_ti;
unsigned long last_rehash;
unsigned int count;
};
@@ -90,11 +91,13 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *,
+ const struct sw_flow_id *sfid,
const struct sw_flow_match *match);
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
const struct sw_flow_match *match);
+int ovs_flow_ufid(struct sw_flow *flow, const struct sw_flow_id *src);
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask);
#endif /* flow_table.h */
diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index b2257e6..988a4a7 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -470,6 +470,9 @@ struct ovs_key_nd {
* a wildcarded match. Omitting attribute is treated as wildcarding all
* corresponding fields. Optional for all requests. If not present,
* all flow key bits are exact match bits.
+ * @OVS_FLOW_ATTR_UFID: Nested %OVS_UFID_ATTR_* attributes specifying unique
+ * identifiers for flows and providing alternative semantics for flow
+ * installation and retrieval.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_FLOW_* commands.
@@ -485,12 +488,34 @@ enum ovs_flow_attr {
OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error
* logging should be suppressed. */
+ OVS_FLOW_ATTR_UFID, /* Unique flow identifier. */
__OVS_FLOW_ATTR_MAX
};
#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
/**
+ * enum ovs_ufid_attr - Unique identifier types.
+ *
+ * @OVS_UFID_ATTR_FLAGS: A 32-bit value specifying changes to the behaviour of
+ * the current %OVS_FLOW_CMD_* request. Optional for all requests.
+ * @OVS_UFID_ATTR_ID: A unique identifier for a flow.
+ */
+enum ovs_ufid_attr {
+ OVS_UFID_ATTR_UNSPEC,
+ OVS_UFID_ATTR_FLAGS, /* u32 of OVS_UFID_F_* */
+ OVS_UFID_ATTR_ID, /* variable length identifier. */
+ __OVS_UFID_ATTR_MAX
+};
+
+#define OVS_UFID_ATTR_MAX (__OVS_UFID_ATTR_MAX - 1)
+
+/* Skip attributes for notifications. */
+#define OVS_UFID_F_SKIP_KEY (1 << 0)
+#define OVS_UFID_F_SKIP_MASK (1 << 1)
+#define OVS_UFID_F_SKIP_ACTIONS (1 << 2)
+
+/**
* enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
* @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
* @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of
--
1.7.10.4
More information about the dev
mailing list