[ovs-dev] [PATCH] datapath: Per cpu flow stats.
Andy Zhou
azhou at nicira.com
Fri Oct 18 15:25:15 UTC 2013
Do we still need a spinlock when we have per-cpu counters?
On Fri, Oct 18, 2013 at 8:11 AM, Pravin B Shelar <pshelar at nicira.com> wrote:
> With mega flow implementation ovs flow can be shared between
> multiple CPUs which makes stats updates highly contended
> operation. Following patch allocates separate stats for each
> CPU to make stats update scalable.
>
> Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
> ---
> datapath/datapath.c | 54
> ++++++++++++++++++------------------------------
> datapath/flow.c | 50 +++++++++++++++++++++++++++++++++++++++------
> datapath/flow.h | 20 +++++++++++------
> datapath/flow_table.c | 14 ++++++++++--
> 4 files changed, 87 insertions(+), 51 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index 9e6df12..25f25f2 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -252,9 +252,9 @@ void ovs_dp_process_received_packet(struct vport *p,
> struct sk_buff *skb)
> OVS_CB(skb)->flow = flow;
> OVS_CB(skb)->pkt_key = &key;
>
> - stats_counter = &stats->n_hit;
> - ovs_flow_used(OVS_CB(skb)->flow, skb);
> + ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
> ovs_execute_actions(dp, skb);
> + stats_counter = &stats->n_hit;
>
> out:
> /* Update datapath statistics. */
> @@ -454,14 +454,6 @@ out:
> return err;
> }
>
> -static void clear_stats(struct sw_flow *flow)
> -{
> - flow->used = 0;
> - flow->tcp_flags = 0;
> - flow->packet_count = 0;
> - flow->byte_count = 0;
> -}
> -
> static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info
> *info)
> {
> struct ovs_header *ovs_header = info->userhdr;
> @@ -629,11 +621,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow
> *flow, struct datapath *dp,
> {
> const int skb_orig_len = skb->len;
> struct nlattr *start;
> - struct ovs_flow_stats stats;
> + struct sw_flow_stats flow_stats;
> struct ovs_header *ovs_header;
> struct nlattr *nla;
> - unsigned long used;
> - u8 tcp_flags;
> int err;
>
> ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
> flags, cmd);
> @@ -662,24 +652,24 @@ static int ovs_flow_cmd_fill_info(struct sw_flow
> *flow, struct datapath *dp,
>
> nla_nest_end(skb, nla);
>
> - spin_lock_bh(&flow->lock);
> - used = flow->used;
> - stats.n_packets = flow->packet_count;
> - stats.n_bytes = flow->byte_count;
> - tcp_flags = flow->tcp_flags;
> - spin_unlock_bh(&flow->lock);
> -
> - if (used &&
> - nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
> + ovs_flow_stats_get(flow, &flow_stats);
> + if (flow_stats.used &&
> + nla_put_u64(skb, OVS_FLOW_ATTR_USED,
> ovs_flow_used_time(flow_stats.used)))
> goto nla_put_failure;
>
> - if (stats.n_packets &&
> - nla_put(skb, OVS_FLOW_ATTR_STATS,
> - sizeof(struct ovs_flow_stats), &stats))
> - goto nla_put_failure;
> + if (flow_stats.packet_count) {
> + struct ovs_flow_stats stats = {
> + .n_packets = flow_stats.packet_count,
> + .n_bytes = flow_stats.byte_count,
> + };
>
> - if (tcp_flags &&
> - nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
> + if (nla_put(skb, OVS_FLOW_ATTR_STATS,
> + sizeof(struct ovs_flow_stats), &stats))
> + goto nla_put_failure;
> + }
> +
> + if (flow_stats.tcp_flags &&
> + nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, flow_stats.tcp_flags))
> goto nla_put_failure;
>
> /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions
> if
> @@ -809,7 +799,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff
> *skb, struct genl_info *info)
> error = PTR_ERR(flow);
> goto err_unlock_ovs;
> }
> - clear_stats(flow);
>
> flow->key = masked_key;
> flow->unmasked_key = key;
> @@ -855,11 +844,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff
> *skb, struct genl_info *info)
> info->snd_seq,
> OVS_FLOW_CMD_NEW);
>
> /* Clear stats. */
> - if (a[OVS_FLOW_ATTR_CLEAR]) {
> - spin_lock_bh(&flow->lock);
> - clear_stats(flow);
> - spin_unlock_bh(&flow->lock);
> - }
> + if (a[OVS_FLOW_ATTR_CLEAR])
> + ovs_sw_flow_stats_clear(flow);
> }
> ovs_unlock();
>
> diff --git a/datapath/flow.c b/datapath/flow.c
> index faa4e15..1ee01ec 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -62,8 +62,9 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
> #define TCP_FLAGS_OFFSET 13
> #define TCP_FLAG_MASK 0x3f
>
> -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
> +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
> {
> + struct sw_flow_stats *stats = &flow->stats[smp_processor_id()];
> u8 tcp_flags = 0;
>
> if ((flow->key.eth.type == htons(ETH_P_IP) ||
> @@ -74,12 +75,47 @@ void ovs_flow_used(struct sw_flow *flow, struct
> sk_buff *skb)
> tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
> }
>
> - spin_lock(&flow->lock);
> - flow->used = jiffies;
> - flow->packet_count++;
> - flow->byte_count += skb->len;
> - flow->tcp_flags |= tcp_flags;
> - spin_unlock(&flow->lock);
> + spin_lock(&stats->lock);
> + stats->used = jiffies;
> + stats->packet_count++;
> + stats->byte_count += skb->len;
> + stats->tcp_flags |= tcp_flags;
> + spin_unlock(&stats->lock);
> +}
> +
> +void ovs_flow_stats_get(struct sw_flow *flow, struct sw_flow_stats *res)
> +{
> + int cpu;
> +
> + memset(res, 0, sizeof(*res));
> +
> + for_each_possible_cpu(cpu) {
> + struct sw_flow_stats *stats = &flow->stats[cpu];
> +
> + spin_lock(&stats->lock);
> + if (time_after(stats->used, res->used))
> + res->used = stats->used;
> + res->packet_count += stats->packet_count;
> + res->byte_count += stats->byte_count;
> + res->tcp_flags |= stats->tcp_flags;
> + spin_unlock(&stats->lock);
> + }
> +}
> +
> +void ovs_sw_flow_stats_clear(struct sw_flow *flow)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + struct sw_flow_stats *stats = &flow->stats[cpu];
> +
> + spin_lock(&stats->lock);
> + stats->used = 0;
> + stats->packet_count = 0;
> + stats->byte_count = 0;
> + stats->tcp_flags = 0;
> + spin_unlock(&stats->lock);
> + }
> }
>
> static int check_header(struct sk_buff *skb, int len)
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 91a3022..987e9e5 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -19,6 +19,7 @@
> #ifndef FLOW_H
> #define FLOW_H 1
>
> +#include <linux/cache.h>
> #include <linux/kernel.h>
> #include <linux/netlink.h>
> #include <linux/openvswitch.h>
> @@ -146,6 +147,14 @@ struct sw_flow_actions {
> struct nlattr actions[];
> };
>
> +struct sw_flow_stats {
> + u64 packet_count; /* Number of packets matched. */
> + u64 byte_count; /* Number of bytes matched. */
> + unsigned long used; /* Last used time (in jiffies). */
> + spinlock_t lock; /* Lock for values below. */
> + u8 tcp_flags; /* Union of seen TCP flags. */
> +} ____cacheline_aligned;
> +
> struct sw_flow {
> struct rcu_head rcu;
> struct hlist_node hash_node[2];
> @@ -155,12 +164,7 @@ struct sw_flow {
> struct sw_flow_key unmasked_key;
> struct sw_flow_mask *mask;
> struct sw_flow_actions __rcu *sf_acts;
> -
> - spinlock_t lock; /* Lock for values below. */
> - unsigned long used; /* Last used time (in jiffies). */
> - u64 packet_count; /* Number of packets matched. */
> - u64 byte_count; /* Number of bytes matched. */
> - u8 tcp_flags; /* Union of seen TCP flags. */
> + struct sw_flow_stats stats[];
> };
>
> struct arp_eth_header {
> @@ -177,7 +181,9 @@ struct arp_eth_header {
> unsigned char ar_tip[4]; /* target IP address
> */
> } __packed;
>
> -void ovs_flow_used(struct sw_flow *, struct sk_buff *);
> +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb);
> +void ovs_flow_stats_get(struct sw_flow *flow, struct sw_flow_stats *res);
> +void ovs_sw_flow_stats_clear(struct sw_flow *flow);
> u64 ovs_flow_used_time(unsigned long flow_jiffies);
>
> int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index 98eb809..23ed9ee 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -76,15 +76,19 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const
> struct sw_flow_key *src,
> struct sw_flow *ovs_flow_alloc(void)
> {
> struct sw_flow *flow;
> + int cpu;
>
> flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
> if (!flow)
> return ERR_PTR(-ENOMEM);
>
> - spin_lock_init(&flow->lock);
> flow->sf_acts = NULL;
> flow->mask = NULL;
>
> + memset(flow->stats, 0, num_possible_cpus() * sizeof(struct
> sw_flow_stats));
> + for_each_possible_cpu(cpu)
> + spin_lock_init(&flow->stats[cpu].lock);
> +
> return flow;
> }
>
> @@ -561,11 +565,15 @@ int ovs_flow_tbl_insert(struct flow_table *table,
> struct sw_flow *flow,
> * Returns zero if successful or a negative error code. */
> int ovs_flow_init(void)
> {
> + int flow_size;
> +
> BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
> BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
>
> - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow),
> 0,
> - 0, NULL);
> + flow_size = sizeof(struct sw_flow) +
> + (num_possible_cpus() * sizeof(struct sw_flow_stats));
> +
> + flow_cache = kmem_cache_create("sw_flow", flow_size, 0, 0, NULL);
> if (flow_cache == NULL)
> return -ENOMEM;
>
> --
> 1.7.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20131018/2293d1f0/attachment-0003.html>
More information about the dev
mailing list