[ovs-dev] [PATCHv2 2/2] datapath: Cache netlink flow key, mask on flow_put.

Joe Stringer joestringer at nicira.com
Wed Jul 16 00:15:41 UTC 2014


Converting the flow key and mask back into netlink format during flow
dump is fairly expensive. By caching the userspace-provided versions of
these during flow setup, and copying the memory directly during flow
dump, we are able to support up to 33% more flows in the datapath.

Previous top-5 perf results for revalidator thread with many flows:
+ 14.00%  ovs-vswitchd  [kernel.kallsyms]   [k] memcpy
+  9.56%  ovs-vswitchd  [kernel.kallsyms]   [k] ovs_nla_put_flow
+  7.86%  ovs-vswitchd  [kernel.kallsyms]   [k] memset
+  3.84%  ovs-vswitchd  ovs-vswitchd        [.] revalidate.isra.14
+  3.52%  ovs-vswitchd  [kernel.kallsyms]   [k] __nla_reserve

Perf results for the thread after this patch:
+  7.62%  ovs-vswitchd  [kernel.kallsyms]   [k] memcpy
+  7.44%  ovs-vswitchd  [kernel.kallsyms]   [k] ovs_flow_cmd_fill_info
+  6.71%  ovs-vswitchd  ovs-vswitchd        [.] revalidate.isra.14
+  3.64%  ovs-vswitchd  [kernel.kallsyms]   [k] _raw_spin_lock_bh
+  3.43%  ovs-vswitchd  [kernel.kallsyms]   [k] ovs_flow_tbl_dump_next

Suggested-by: Jesse Gross <jesse at nicira.com>
Signed-off-by: Joe Stringer <joestringer at nicira.com>
---
v2: Copy mask from userspace rather than constructing from sw_flow.
RFC: First post.

This version gets rid of the contention introduced in v1, by copying the
netlink key/mask outside of the flow setup lock. Between this and the
reduction in cache construction cost, the flow setup rate has been
restored to roughly the same as master in my 1 reval, 15 handlers tests.
---
 datapath/datapath.c     |   27 +++++++++++++---
 datapath/flow.h         |    1 +
 datapath/flow_netlink.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++
 datapath/flow_netlink.h |    2 ++
 datapath/flow_table.c   |   12 ++++++-
 datapath/flow_table.h   |    2 +-
 6 files changed, 116 insertions(+), 7 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index b1c757c..ffe720e 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -535,7 +535,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 		packet->protocol = htons(ETH_P_802_2);
 
 	/* Build an sw_flow for sending this packet. */
-	flow = ovs_flow_alloc();
+	flow = ovs_flow_alloc(0);
 	err = PTR_ERR(flow);
 	if (IS_ERR(flow))
 		goto err_kfree_skb;
@@ -653,11 +653,16 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
 	}
 }
 
+static size_t ovs_nl_match_size(void)
+{
+	return nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+		+ nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_MASK */
+}
+
 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 {
 	return NLMSG_ALIGN(sizeof(struct ovs_header))
-		+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
-		+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+		+ ovs_nl_match_size()
 		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
 		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -667,11 +672,20 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_match(struct datapath *dp,
 				   const struct sw_flow *flow,
+				   const struct sk_buff *nl_cache,
 				   struct sk_buff *skb)
 {
 	struct nlattr *nla;
 	int err;
 
+	if (nl_cache) {
+		if (skb_tailroom(skb) < nl_cache->len)
+			return -EMSGSIZE;
+
+		return skb_copy_bits(nl_cache, 0, skb_put(skb, nl_cache->len),
+				     nl_cache->len);
+	}
+
 	/* Fill flow key. */
 	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
 	if (!nla)
@@ -776,7 +790,7 @@ static int ovs_flow_cmd_fill_info(struct datapath *dp,
 		return -EMSGSIZE;
 	ovs_header->dp_ifindex = dp_ifindex;
 
-	err = ovs_flow_cmd_fill_match(dp, flow, skb);
+	err = ovs_flow_cmd_fill_match(dp, flow, flow->nl_match_cache, skb);
 	if (err)
 		goto error;
 
@@ -860,7 +874,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	/* Most of the time we need to allocate a new flow, do it before
 	 * locking. */
-	new_flow = ovs_flow_alloc();
+	new_flow = ovs_flow_alloc(ovs_nl_match_size());
 	if (IS_ERR(new_flow)) {
 		error = PTR_ERR(new_flow);
 		goto error;
@@ -894,6 +908,9 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto err_kfree_acts;
 	}
 
+	ovs_nla_copy_flow(a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK],
+			  new_flow->nl_match_cache);
+
 	ovs_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 	if (unlikely(!dp)) {
diff --git a/datapath/flow.h b/datapath/flow.h
index f6afa48..f1bf289 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -189,6 +189,7 @@ struct sw_flow {
 	struct sw_flow_key unmasked_key;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
+	struct sk_buff *nl_match_cache;
 	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one
 					   * is allocated at flow creation time,
 					   * the rest are allocated on demand
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 08b628a..e21390d 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -862,6 +862,50 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
 	nlattr_set(attr, val, true);
 }
 
+/* We expect the nlattr stream to be already validated */
+static void nlattr_set_unwildcarded(struct nlattr *mask)
+{
+	struct nlattr *a;
+	int rem;
+
+	nla_for_each_nested(a, mask, rem) {
+		enum ovs_key_attr type = nla_type(a);
+
+		switch (type) {
+		case OVS_KEY_ATTR_IN_PORT:
+		case OVS_KEY_ATTR_ETHERTYPE:
+			/* Wildcards not supported. */
+			memset(nla_data(a), 0xff, nla_len(a));
+			break;
+
+		case OVS_KEY_ATTR_UNSPEC:
+		case OVS_KEY_ATTR_ENCAP:
+		case OVS_KEY_ATTR_PRIORITY:
+		case OVS_KEY_ATTR_ETHERNET:
+		case OVS_KEY_ATTR_VLAN:
+		case OVS_KEY_ATTR_IPV4:
+		case OVS_KEY_ATTR_IPV6:
+		case OVS_KEY_ATTR_TCP:
+		case OVS_KEY_ATTR_UDP:
+		case OVS_KEY_ATTR_ICMP:
+		case OVS_KEY_ATTR_ICMPV6:
+		case OVS_KEY_ATTR_ARP:
+		case OVS_KEY_ATTR_ND:
+		case OVS_KEY_ATTR_SKB_MARK:
+		case OVS_KEY_ATTR_TUNNEL:
+		case OVS_KEY_ATTR_SCTP:
+		case OVS_KEY_ATTR_TCP_FLAGS:
+		case OVS_KEY_ATTR_DP_HASH:
+		case OVS_KEY_ATTR_RECIRC_ID:
+		case OVS_KEY_ATTR_MPLS:
+		case OVS_KEY_ATTR_TUNNEL_INFO:
+		case __OVS_KEY_ATTR_MAX:
+			/* Wildcards supported. */
+			break;
+		}
+	}
+}
+
 /**
  * ovs_nla_get_match - parses Netlink attributes into a flow key and
  * mask. In case the 'mask' is NULL, the flow is treated as exact match
@@ -1001,6 +1045,41 @@ free_newmask:
 }
 
 /**
+ * ovs_nla_copy_flow - makes a copy of the flow key and mask in netlink format.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
+ * attribute specifies the mask field of the wildcarded flow. No mask indicates
+ * exact match.
+ * @skb: Receives netlink-formatted key and mask.
+ *
+ * This copies the netlink-formatted key and mask, adjusting the mask for
+ * wildcards currently supported. No validation of key or mask is performed.
+ */
+void ovs_nla_copy_flow(const struct nlattr *key, const struct nlattr *mask,
+		       struct sk_buff *skb)
+{
+	struct nlattr *new_mask;
+	unsigned char *payload;
+	size_t key_len, mask_len;
+
+	key_len = nla_total_size(nla_len(key));
+	nla_append(skb, key_len, key);
+
+	mask_len = mask ? nla_total_size(nla_len(mask)) : key_len;
+	new_mask = nla_reserve(skb, OVS_FLOW_ATTR_MASK, mask_len);
+	payload = nla_data(new_mask);
+	if (mask) {
+		nla_memcpy(payload, mask, mask_len);
+		nlattr_set_unwildcarded(new_mask);
+	} else {
+		/* Construct an exact match mask from the key. */
+		nla_memcpy(payload, key, mask_len);
+		mask_set_nlattr(new_mask, 0xff);
+	}
+}
+
+/**
  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
  * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
  * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
index 274e43d..c16486d 100644
--- a/datapath/flow_netlink.h
+++ b/datapath/flow_netlink.h
@@ -49,6 +49,8 @@ int ovs_nla_get_flow_metadata(struct sw_flow *flow,
 int ovs_nla_get_match(struct sw_flow_match *match,
 		      const struct nlattr *,
 		      const struct nlattr *);
+void ovs_nla_copy_flow(const struct nlattr *, const struct nlattr *,
+		       struct sk_buff *);
 
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
index 9ab1020..879c4c3 100644
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -80,7 +80,7 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		*d++ = *s++ & *m++;
 }
 
-struct sw_flow *ovs_flow_alloc(void)
+struct sw_flow *ovs_flow_alloc(size_t match_len)
 {
 	struct sw_flow *flow;
 	struct flow_stats *stats;
@@ -92,6 +92,7 @@ struct sw_flow *ovs_flow_alloc(void)
 
 	flow->sf_acts = NULL;
 	flow->mask = NULL;
+	flow->nl_match_cache = NULL;
 	flow->stats_last_writer = NUMA_NO_NODE;
 
 	/* Initialize the default stat node. */
@@ -100,6 +101,12 @@ struct sw_flow *ovs_flow_alloc(void)
 	if (!stats)
 		goto err;
 
+	if (match_len) {
+		flow->nl_match_cache = alloc_skb(match_len, GFP_KERNEL);
+		if (!flow->nl_match_cache)
+			goto err_free_stats;
+	}
+
 	spin_lock_init(&stats->lock);
 
 	RCU_INIT_POINTER(flow->stats[0], stats);
@@ -109,6 +116,8 @@ struct sw_flow *ovs_flow_alloc(void)
 			RCU_INIT_POINTER(flow->stats[node], NULL);
 
 	return flow;
+err_free_stats:
+	kmem_cache_free(flow_stats_cache, flow);
 err:
 	kmem_cache_free(flow_cache, flow);
 	return ERR_PTR(-ENOMEM);
@@ -146,6 +155,7 @@ static void flow_free(struct sw_flow *flow)
 {
 	int node;
 
+	kfree_skb(flow->nl_match_cache);
 	kfree((struct sw_flow_actions __force *)flow->sf_acts);
 	for_each_node(node)
 		if (flow->stats[node])
diff --git a/datapath/flow_table.h b/datapath/flow_table.h
index a05d36a..5edf1c5 100644
--- a/datapath/flow_table.h
+++ b/datapath/flow_table.h
@@ -69,7 +69,7 @@ extern struct kmem_cache *flow_stats_cache;
 int ovs_flow_init(void);
 void ovs_flow_exit(void);
 
-struct sw_flow *ovs_flow_alloc(void);
+struct sw_flow *ovs_flow_alloc(size_t match_len);
 void ovs_flow_free(struct sw_flow *, bool deferred);
 
 int ovs_flow_tbl_init(struct flow_table *);
-- 
1.7.10.4




More information about the dev mailing list