[ovs-dev] [PATCH V2 04/16] datapath: Optimize operations for OvS flow_stats.

Greg Rose gvrose8192 at gmail.com
Mon Sep 11 21:10:57 UTC 2017


Upstream commit:
    commit c4b2bf6b4a35348fe6d1eb06928eb68d7b9d99a9
    Author: Tonghao Zhang <xiangxia.m.yue at gmail.com>
    Date:   Mon Jul 17 23:28:06 2017 -0700

    openvswitch: Optimize operations for OvS flow_stats.

    When calling the flow_free() to free the flow, we call many times
    (cpu_possible_mask, eg. 128 as default) cpumask_next(). That will
    take up our CPU usage if we call the flow_free() frequently.
    When we put all packets to userspace via upcall, and OvS will send
    them back via netlink to ovs_packet_cmd_execute(will call flow_free).

    The test topo is shown as below. VM01 sends TCP packets to VM02,
    and OvS forward packtets. When testing, we use perf to report the
    system performance.

    VM01 --- OvS-VM --- VM02

    Without this patch, perf-top show as below: The flow_free() is
    3.02% CPU usage.

        4.23%  [kernel]            [k] _raw_spin_unlock_irqrestore
        3.62%  [kernel]            [k] __do_softirq
        3.16%  [kernel]            [k] __memcpy
        3.02%  [kernel]            [k] flow_free
        2.42%  libc-2.17.so        [.] __memcpy_ssse3_back
        2.18%  [kernel]            [k] copy_user_generic_unrolled
        2.17%  [kernel]            [k] find_next_bit

    When applied this patch, perf-top show as below: Not shown on
    the list anymore.

        4.11%  [kernel]            [k] _raw_spin_unlock_irqrestore
        3.79%  [kernel]            [k] __do_softirq
        3.46%  [kernel]            [k] __memcpy
        2.73%  libc-2.17.so        [.] __memcpy_ssse3_back
        2.25%  [kernel]            [k] copy_user_generic_unrolled
        1.89%  libc-2.17.so        [.] _int_malloc
        1.53%  ovs-vswitchd        [.] xlate_actions

    With this patch, the TCP throughput(we dont use Megaflow Cache
    + Microflow Cache) between VMs is 1.18Gbs/sec up to 1.30Gbs/sec
    (maybe ~10% performance imporve).

    This patch adds cpumask struct, the cpu_used_mask stores the cpu_id
    that the flow used. And we only check the flow_stats on the cpu we
    used, and it is unncessary to check all possible cpu when getting,
    cleaning, and updating the flow_stats. Adding the cpu_used_mask to
    sw_flow struct does’t increase the cacheline number.

    Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com>
    Acked-by: Pravin B Shelar <pshelar at ovn.org>
    Signed-off-by: David S. Miller <davem at davemloft.net>

Signed-off-by: Greg Rose <gvrose8192 at gmail.com>
---
 datapath/flow.c       | 7 ++++---
 datapath/flow.h       | 2 ++
 datapath/flow_table.c | 4 +++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/datapath/flow.c b/datapath/flow.c
index 30e4d21..5da7e3e 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -71,7 +71,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 			   const struct sk_buff *skb)
 {
 	struct flow_stats *stats;
-	int cpu = smp_processor_id();
+	unsigned int cpu = smp_processor_id();
 	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	stats = rcu_dereference(flow->stats[cpu]);
@@ -116,6 +116,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 
 					rcu_assign_pointer(flow->stats[cpu],
 							   new_stats);
+					cpumask_set_cpu(cpu, &flow->cpu_used_mask);
 					goto unlock;
 				}
 			}
@@ -143,7 +144,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
 	memset(ovs_stats, 0, sizeof(*ovs_stats));
 
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
 		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
 		if (stats) {
@@ -167,7 +168,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
 	int cpu;
 
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
 		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
 		if (stats) {
diff --git a/datapath/flow.h b/datapath/flow.h
index 07af912..0796b09 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -31,6 +31,7 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+#include <linux/cpumask.h>
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
 #include <net/dst_metadata.h>
@@ -218,6 +219,7 @@ struct sw_flow {
 					 */
 	struct sw_flow_key key;
 	struct sw_flow_id id;
+	struct cpumask cpu_used_mask;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
index 6fe3739..47057a1 100644
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -104,6 +104,8 @@ struct sw_flow *ovs_flow_alloc(void)
 
 	RCU_INIT_POINTER(flow->stats[0], stats);
 
+	cpumask_set_cpu(0, &flow->cpu_used_mask);
+
 	return flow;
 err:
 	kmem_cache_free(flow_cache, flow);
@@ -147,7 +149,7 @@ static void flow_free(struct sw_flow *flow)
 	if (flow->sf_acts)
 		ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask))
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
 		if (flow->stats[cpu])
 			kmem_cache_free(flow_stats_cache,
 					rcu_dereference_raw(flow->stats[cpu]));
-- 
1.8.3.1



More information about the dev mailing list