[ovs-dev] [PATCH] datapath: Per cpu flow stats.

Andy Zhou azhou at nicira.com
Fri Oct 18 15:25:15 UTC 2013


Do we still need a spinlock when we have per-cpu counters?


On Fri, Oct 18, 2013 at 8:11 AM, Pravin B Shelar <pshelar at nicira.com> wrote:

> With mega flow implementation ovs flow can be shared between
> multiple CPUs which makes stats updates highly contended
> operation. Following patch allocates separate stats for each
> CPU to make stats update scalable.
>
> Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
> ---
>  datapath/datapath.c   |   54
> ++++++++++++++++++------------------------------
>  datapath/flow.c       |   50 +++++++++++++++++++++++++++++++++++++++------
>  datapath/flow.h       |   20 +++++++++++------
>  datapath/flow_table.c |   14 ++++++++++--
>  4 files changed, 87 insertions(+), 51 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index 9e6df12..25f25f2 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -252,9 +252,9 @@ void ovs_dp_process_received_packet(struct vport *p,
> struct sk_buff *skb)
>         OVS_CB(skb)->flow = flow;
>         OVS_CB(skb)->pkt_key = &key;
>
> -       stats_counter = &stats->n_hit;
> -       ovs_flow_used(OVS_CB(skb)->flow, skb);
> +       ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
>         ovs_execute_actions(dp, skb);
> +       stats_counter = &stats->n_hit;
>
>  out:
>         /* Update datapath statistics. */
> @@ -454,14 +454,6 @@ out:
>         return err;
>  }
>
> -static void clear_stats(struct sw_flow *flow)
> -{
> -       flow->used = 0;
> -       flow->tcp_flags = 0;
> -       flow->packet_count = 0;
> -       flow->byte_count = 0;
> -}
> -
>  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info
> *info)
>  {
>         struct ovs_header *ovs_header = info->userhdr;
> @@ -629,11 +621,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow
> *flow, struct datapath *dp,
>  {
>         const int skb_orig_len = skb->len;
>         struct nlattr *start;
> -       struct ovs_flow_stats stats;
> +       struct sw_flow_stats flow_stats;
>         struct ovs_header *ovs_header;
>         struct nlattr *nla;
> -       unsigned long used;
> -       u8 tcp_flags;
>         int err;
>
>         ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
> flags, cmd);
> @@ -662,24 +652,24 @@ static int ovs_flow_cmd_fill_info(struct sw_flow
> *flow, struct datapath *dp,
>
>         nla_nest_end(skb, nla);
>
> -       spin_lock_bh(&flow->lock);
> -       used = flow->used;
> -       stats.n_packets = flow->packet_count;
> -       stats.n_bytes = flow->byte_count;
> -       tcp_flags = flow->tcp_flags;
> -       spin_unlock_bh(&flow->lock);
> -
> -       if (used &&
> -           nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
> +       ovs_flow_stats_get(flow, &flow_stats);
> +       if (flow_stats.used &&
> +           nla_put_u64(skb, OVS_FLOW_ATTR_USED,
> ovs_flow_used_time(flow_stats.used)))
>                 goto nla_put_failure;
>
> -       if (stats.n_packets &&
> -           nla_put(skb, OVS_FLOW_ATTR_STATS,
> -                   sizeof(struct ovs_flow_stats), &stats))
> -               goto nla_put_failure;
> +       if (flow_stats.packet_count) {
> +               struct ovs_flow_stats stats = {
> +                       .n_packets = flow_stats.packet_count,
> +                       .n_bytes = flow_stats.byte_count,
> +               };
>
> -       if (tcp_flags &&
> -           nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
> +               if (nla_put(skb, OVS_FLOW_ATTR_STATS,
> +                           sizeof(struct ovs_flow_stats), &stats))
> +                       goto nla_put_failure;
> +       }
> +
> +       if (flow_stats.tcp_flags &&
> +           nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, flow_stats.tcp_flags))
>                 goto nla_put_failure;
>
>         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions
> if
> @@ -809,7 +799,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff
> *skb, struct genl_info *info)
>                         error = PTR_ERR(flow);
>                         goto err_unlock_ovs;
>                 }
> -               clear_stats(flow);
>
>                 flow->key = masked_key;
>                 flow->unmasked_key = key;
> @@ -855,11 +844,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff
> *skb, struct genl_info *info)
>                                                info->snd_seq,
> OVS_FLOW_CMD_NEW);
>
>                 /* Clear stats. */
> -               if (a[OVS_FLOW_ATTR_CLEAR]) {
> -                       spin_lock_bh(&flow->lock);
> -                       clear_stats(flow);
> -                       spin_unlock_bh(&flow->lock);
> -               }
> +               if (a[OVS_FLOW_ATTR_CLEAR])
> +                       ovs_sw_flow_stats_clear(flow);
>         }
>         ovs_unlock();
>
> diff --git a/datapath/flow.c b/datapath/flow.c
> index faa4e15..1ee01ec 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -62,8 +62,9 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
>  #define TCP_FLAGS_OFFSET 13
>  #define TCP_FLAG_MASK 0x3f
>
> -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
> +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
>  {
> +       struct sw_flow_stats *stats = &flow->stats[smp_processor_id()];
>         u8 tcp_flags = 0;
>
>         if ((flow->key.eth.type == htons(ETH_P_IP) ||
> @@ -74,12 +75,47 @@ void ovs_flow_used(struct sw_flow *flow, struct
> sk_buff *skb)
>                 tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
>         }
>
> -       spin_lock(&flow->lock);
> -       flow->used = jiffies;
> -       flow->packet_count++;
> -       flow->byte_count += skb->len;
> -       flow->tcp_flags |= tcp_flags;
> -       spin_unlock(&flow->lock);
> +       spin_lock(&stats->lock);
> +       stats->used = jiffies;
> +       stats->packet_count++;
> +       stats->byte_count += skb->len;
> +       stats->tcp_flags |= tcp_flags;
> +       spin_unlock(&stats->lock);
> +}
> +
> +void ovs_flow_stats_get(struct sw_flow *flow, struct sw_flow_stats *res)
> +{
> +       int cpu;
> +
> +       memset(res, 0, sizeof(*res));
> +
> +       for_each_possible_cpu(cpu) {
> +               struct sw_flow_stats *stats = &flow->stats[cpu];
> +
> +               spin_lock(&stats->lock);
> +               if (time_after(stats->used, res->used))
> +                       res->used = stats->used;
> +               res->packet_count += stats->packet_count;
> +               res->byte_count += stats->byte_count;
> +               res->tcp_flags |= stats->tcp_flags;
> +               spin_unlock(&stats->lock);
> +       }
> +}
> +
> +void ovs_sw_flow_stats_clear(struct sw_flow *flow)
> +{
> +       int cpu;
> +
> +       for_each_possible_cpu(cpu) {
> +               struct sw_flow_stats *stats = &flow->stats[cpu];
> +
> +               spin_lock(&stats->lock);
> +               stats->used = 0;
> +               stats->packet_count = 0;
> +               stats->byte_count = 0;
> +               stats->tcp_flags = 0;
> +               spin_unlock(&stats->lock);
> +       }
>  }
>
>  static int check_header(struct sk_buff *skb, int len)
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 91a3022..987e9e5 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -19,6 +19,7 @@
>  #ifndef FLOW_H
>  #define FLOW_H 1
>
> +#include <linux/cache.h>
>  #include <linux/kernel.h>
>  #include <linux/netlink.h>
>  #include <linux/openvswitch.h>
> @@ -146,6 +147,14 @@ struct sw_flow_actions {
>         struct nlattr actions[];
>  };
>
> +struct sw_flow_stats {
> +       u64 packet_count;               /* Number of packets matched. */
> +       u64 byte_count;                 /* Number of bytes matched. */
> +       unsigned long used;             /* Last used time (in jiffies). */
> +       spinlock_t lock;                /* Lock for values below. */
> +       u8 tcp_flags;                   /* Union of seen TCP flags. */
> +} ____cacheline_aligned;
> +
>  struct sw_flow {
>         struct rcu_head rcu;
>         struct hlist_node hash_node[2];
> @@ -155,12 +164,7 @@ struct sw_flow {
>         struct sw_flow_key unmasked_key;
>         struct sw_flow_mask *mask;
>         struct sw_flow_actions __rcu *sf_acts;
> -
> -       spinlock_t lock;        /* Lock for values below. */
> -       unsigned long used;     /* Last used time (in jiffies). */
> -       u64 packet_count;       /* Number of packets matched. */
> -       u64 byte_count;         /* Number of bytes matched. */
> -       u8 tcp_flags;           /* Union of seen TCP flags. */
> +       struct sw_flow_stats stats[];
>  };
>
>  struct arp_eth_header {
> @@ -177,7 +181,9 @@ struct arp_eth_header {
>         unsigned char       ar_tip[4];          /* target IP address
>  */
>  } __packed;
>
> -void ovs_flow_used(struct sw_flow *, struct sk_buff *);
> +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb);
> +void ovs_flow_stats_get(struct sw_flow *flow, struct sw_flow_stats *res);
> +void ovs_sw_flow_stats_clear(struct sw_flow *flow);
>  u64 ovs_flow_used_time(unsigned long flow_jiffies);
>
>  int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index 98eb809..23ed9ee 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -76,15 +76,19 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const
> struct sw_flow_key *src,
>  struct sw_flow *ovs_flow_alloc(void)
>  {
>         struct sw_flow *flow;
> +       int cpu;
>
>         flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
>         if (!flow)
>                 return ERR_PTR(-ENOMEM);
>
> -       spin_lock_init(&flow->lock);
>         flow->sf_acts = NULL;
>         flow->mask = NULL;
>
> +       memset(flow->stats, 0, num_possible_cpus() * sizeof(struct
> sw_flow_stats));
> +       for_each_possible_cpu(cpu)
> +               spin_lock_init(&flow->stats[cpu].lock);
> +
>         return flow;
>  }
>
> @@ -561,11 +565,15 @@ int ovs_flow_tbl_insert(struct flow_table *table,
> struct sw_flow *flow,
>   * Returns zero if successful or a negative error code. */
>  int ovs_flow_init(void)
>  {
> +       int flow_size;
> +
>         BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
>         BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
>
> -       flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow),
> 0,
> -                                       0, NULL);
> +       flow_size = sizeof(struct sw_flow) +
> +                   (num_possible_cpus() * sizeof(struct sw_flow_stats));
> +
> +       flow_cache = kmem_cache_create("sw_flow", flow_size, 0, 0, NULL);
>         if (flow_cache == NULL)
>                 return -ENOMEM;
>
> --
> 1.7.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20131018/2293d1f0/attachment-0003.html>


More information about the dev mailing list