[ovs-dev] [PATCH v10 2/2] dpif-netdev: Refactor cycle counting

Stokes, Ian ian.stokes at intel.com
Mon Jan 15 15:00:51 UTC 2018


> Acked-by: Ilya Maximets <i.maximets at samsung.com>

Thanks to everyone for the work put into this, I've merged this to the DPDK_MERGE branch now.

Thanks
Ian
> 
> On 15.01.2018 14:27, Jan Scheurich wrote:
> > Simplify the historically grown TSC cycle counting in PMD threads.
> > Cycles are currently counted for the following purposes:
> >
> > 1. Measure PMD ustilization
> >
> > PMD utilization is defined as ratio of cycles spent in busy iterations
> > (at least one packet received or sent) over the total number of cycles.
> >
> > This is already done in pmd_perf_start_iteration() and
> > pmd_perf_end_iteration() based on a TSC timestamp saved in current
> > iteration at start_iteration() and the actual TSC at end_iteration().
> > No dependency on intermediate cycle accounting.
> >
> > 2. Measure the processing load per RX queue
> >
> > This comprises cycles spend on polling and processing packets received
> > from the rx queue and the cycles spent on delayed sending of these
> > packets to tx queues (with time-based batching).
> >
> > The previous scheme using cycles_count_start(),
> > cycles_count_intermediate() and cycles-count_end() originally
> > introduced to simplify cycle counting and saving calls to
> rte_get_tsc_cycles() was rather obscuring things.
> >
> > Replace by a nestable cycle_timer with with start and stop functions
> > to embrace a code segment to be timed. The timed code may contain
> > arbitrary nested cycle_timers. The duration of nested timers is
> > excluded from the outer timer.
> >
> > The caller must ensure that each call to cycle_timer_start() is
> > followed by a call to cycle_timer_end(). Failure to do so will lead to
> > assertion failure or a memory leak.
> >
> > The new cycle_timer is used to measure the processing cycles per rx
> queue.
> > This is not yet strictly necessary but will be made use of in a
> > subsequent commit.
> >
> > All cycle count functions and data are relocated to module
> > dpif-netdev-perf.
> >
> > Signed-off-by: Jan Scheurich <jan.scheurich at ericsson.com>
> > ---
> >  lib/dpif-netdev-perf.h | 110 +++++++++++++++++++++++++++++++++++++++++-
> ---
> >  lib/dpif-netdev.c      | 118 ++++++++++++++----------------------------
> -------
> >  2 files changed, 134 insertions(+), 94 deletions(-)
> >
> > diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h index
> > 53d60d3..5993c25 100644
> > --- a/lib/dpif-netdev-perf.h
> > +++ b/lib/dpif-netdev-perf.h
> > @@ -23,6 +23,11 @@
> >  #include <string.h>
> >  #include <math.h>
> >
> > +#ifdef DPDK_NETDEV
> > +#include <rte_config.h>
> > +#include <rte_cycles.h>
> > +#endif
> > +
> >  #include "openvswitch/vlog.h"
> >  #include "ovs-atomic.h"
> >  #include "timeval.h"
> > @@ -59,10 +64,6 @@ enum pmd_stat_type {
> >                               * recirculation. */
> >      PMD_STAT_SENT_PKTS,     /* Packets that have been sent. */
> >      PMD_STAT_SENT_BATCHES,  /* Number of batches sent. */
> > -    PMD_CYCLES_POLL_IDLE,   /* Cycles spent unsuccessful polling. */
> > -    PMD_CYCLES_POLL_BUSY,   /* Cycles spent successfully polling and
> > -                             * processing polled packets. */
> > -    PMD_CYCLES_OVERHEAD,    /* Cycles spent for other tasks. */
> >      PMD_CYCLES_ITER_IDLE,   /* Cycles spent in idle iterations. */
> >      PMD_CYCLES_ITER_BUSY,   /* Cycles spent in busy iterations. */
> >      PMD_N_STATS
> > @@ -85,11 +86,95 @@ struct pmd_counters {
> >
> >  struct pmd_perf_stats {
> >      /* Start of the current PMD iteration in TSC cycles.*/
> > +    uint64_t start_it_tsc;
> > +    /* Latest TSC time stamp taken in PMD. */
> >      uint64_t last_tsc;
> > +    /* If non-NULL, outermost cycle timer currently running in PMD. */
> > +    struct cycle_timer *cur_timer;
> >      /* Set of PMD counters with their zero offsets. */
> >      struct pmd_counters counters;
> >  };
> >
> > +/* Support for accurate timing of PMD execution on TSC clock cycle
> level.
> > + * These functions are intended to be invoked in the context of pmd
> > +threads. */
> > +
> > +/* Read the TSC cycle register and cache it. Any function not
> > +requiring clock
> > + * cycle accuracy should read the cached value using
> > +cycles_counter_get() to
> > + * avoid the overhead of reading the TSC register. */
> > +
> > +static inline uint64_t
> > +cycles_counter_update(struct pmd_perf_stats *s) { #ifdef DPDK_NETDEV
> > +    return s->last_tsc = rte_get_tsc_cycles(); #else
> > +    return s->last_tsc = 0;
> > +#endif
> > +}
> > +
> > +static inline uint64_t
> > +cycles_counter_get(struct pmd_perf_stats *s) {
> > +    return s->last_tsc;
> > +}
> > +
> > +/* A nestable timer for measuring execution time in TSC cycles.
> > + *
> > + * Usage:
> > + * struct cycle_timer timer;
> > + *
> > + * cycle_timer_start(pmd, &timer);
> > + * <Timed execution>
> > + * uint64_t cycles = cycle_timer_stop(pmd, &timer);
> > + *
> > + * The caller must guarantee that a call to cycle_timer_start() is
> > +always
> > + * paired with a call to cycle_stimer_stop().
> > + *
> > + * Is is possible to have nested cycles timers within the timed code.
> > +The
> > + * execution time measured by the nested timers is excluded from the
> > +time
> > + * measured by the embracing timer.
> > + */
> > +
> > +struct cycle_timer {
> > +    uint64_t start;
> > +    uint64_t suspended;
> > +    struct cycle_timer *interrupted;
> > +};
> > +
> > +static inline void
> > +cycle_timer_start(struct pmd_perf_stats *s,
> > +                  struct cycle_timer *timer) {
> > +    struct cycle_timer *cur_timer = s->cur_timer;
> > +    uint64_t now = cycles_counter_update(s);
> > +
> > +    if (cur_timer) {
> > +        cur_timer->suspended = now;
> > +    }
> > +    timer->interrupted = cur_timer;
> > +    timer->start = now;
> > +    timer->suspended = 0;
> > +    s->cur_timer = timer;
> > +}
> > +
> > +static inline uint64_t
> > +cycle_timer_stop(struct pmd_perf_stats *s,
> > +                 struct cycle_timer *timer) {
> > +    /* Assert that this is the current cycle timer. */
> > +    ovs_assert(s->cur_timer == timer);
> > +    uint64_t now = cycles_counter_update(s);
> > +    struct cycle_timer *intr_timer = timer->interrupted;
> > +
> > +    if (intr_timer) {
> > +        /* Adjust the start offset by the suspended cycles. */
> > +        intr_timer->start += now - intr_timer->suspended;
> > +    }
> > +    /* Restore suspended timer, if any. */
> > +    s->cur_timer = intr_timer;
> > +    return now - timer->start;
> > +}
> > +
> >  void pmd_perf_stats_init(struct pmd_perf_stats *s);  void
> > pmd_perf_stats_clear(struct pmd_perf_stats *s);  void
> > pmd_perf_read_counters(struct pmd_perf_stats *s, @@ -115,16 +200,23 @@
> > pmd_perf_update_counter(struct pmd_perf_stats *s,  }
> >
> >  static inline void
> > -pmd_perf_start_iteration(struct pmd_perf_stats *s, uint64_t now_tsc)
> > +pmd_perf_start_iteration(struct pmd_perf_stats *s)
> >  {
> > -    s->last_tsc = now_tsc;
> > +    if (OVS_LIKELY(s->last_tsc)) {
> > +        /* We assume here that last_tsc was updated immediately prior
> at
> > +         * the end of the previous iteration, or just before the first
> > +         * iteration. */
> > +        s->start_it_tsc = s->last_tsc;
> > +    } else {
> > +        /* In case last_tsc has never been set before. */
> > +        s->start_it_tsc = cycles_counter_update(s);
> > +    }
> >  }
> >
> >  static inline void
> > -pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t now_tsc,
> > -                       int rx_packets)
> > +pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets)
> >  {
> > -    uint64_t cycles = now_tsc - s->last_tsc;
> > +    uint64_t cycles = cycles_counter_update(s) - s->start_it_tsc;
> >
> >      if (rx_packets > 0) {
> >          pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles);
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > deb63c6..e371d11 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -32,10 +32,6 @@
> >  #include <sys/stat.h>
> >  #include <unistd.h>
> >
> > -#ifdef DPDK_NETDEV
> > -#include <rte_cycles.h>
> > -#endif
> > -
> >  #include "bitmap.h"
> >  #include "cmap.h"
> >  #include "conntrack.h"
> > @@ -509,8 +505,6 @@ struct tx_port {
> >  struct dp_netdev_pmd_thread_ctx {
> >      /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */
> >      long long now;
> > -    /* Used to count cycles. See 'cycles_count_end()' */
> > -    unsigned long long last_cycles;
> >  };
> >
> >  /* PMD: Poll modes drivers.  PMD accesses devices via polling to
> > eliminate @@ -3111,64 +3105,20 @@ dp_netdev_actions_free(struct
> dp_netdev_actions *actions)
> >      free(actions);
> >  }
> >  

> > -static inline unsigned long long
> > -cycles_counter(void)
> > -{
> > -#ifdef DPDK_NETDEV
> > -    return rte_get_tsc_cycles();
> > -#else
> > -    return 0;
> > -#endif
> > -}
> > -
> > -/* Fake mutex to make sure that the calls to cycles_count_* are
> > balanced */ -extern struct ovs_mutex cycles_counter_fake_mutex;
> > -
> > -/* Start counting cycles.  Must be followed by 'cycles_count_end()'
> > */ -static inline void -cycles_count_start(struct dp_netdev_pmd_thread
> > *pmd)
> > -    OVS_ACQUIRES(&cycles_counter_fake_mutex)
> > -    OVS_NO_THREAD_SAFETY_ANALYSIS
> > -{
> > -    pmd->ctx.last_cycles = cycles_counter();
> > -}
> > -
> > -/* Stop counting cycles and add them to the counter 'type' */ -static
> > inline void -cycles_count_end(struct dp_netdev_pmd_thread *pmd,
> > -                 enum pmd_stat_type type)
> > -    OVS_RELEASES(&cycles_counter_fake_mutex)
> > -    OVS_NO_THREAD_SAFETY_ANALYSIS
> > -{
> > -    unsigned long long interval = cycles_counter() - pmd-
> >ctx.last_cycles;
> > -
> > -    pmd_perf_update_counter(&pmd->perf_stats, type, interval);
> > -}
> > -
> > -/* Calculate the intermediate cycle result and add to the counter
> > 'type' */ -static inline void -cycles_count_intermediate(struct
> > dp_netdev_pmd_thread *pmd,
> > -                          struct dp_netdev_rxq *rxq,
> > -                          enum pmd_stat_type type)
> > -    OVS_NO_THREAD_SAFETY_ANALYSIS
> > +static void
> > +dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
> > +                         enum rxq_cycles_counter_type type,
> > +                         unsigned long long cycles)
> >  {
> > -    unsigned long long new_cycles = cycles_counter();
> > -    unsigned long long interval = new_cycles - pmd->ctx.last_cycles;
> > -    pmd->ctx.last_cycles = new_cycles;
> > -
> > -    pmd_perf_update_counter(&pmd->perf_stats, type, interval);
> > -    if (rxq && (type == PMD_CYCLES_POLL_BUSY)) {
> > -        /* Add to the amount of current processing cycles. */
> > -        non_atomic_ullong_add(&rxq->cycles[RXQ_CYCLES_PROC_CURR],
> interval);
> > -    }
> > +   atomic_store_relaxed(&rx->cycles[type], cycles);
> >  }
> >
> >  static void
> > -dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
> > +dp_netdev_rxq_add_cycles(struct dp_netdev_rxq *rx,
> >                           enum rxq_cycles_counter_type type,
> >                           unsigned long long cycles)  {
> > -   atomic_store_relaxed(&rx->cycles[type], cycles);
> > +    non_atomic_ullong_add(&rx->cycles[type], cycles);
> >  }
> >
> >  static uint64_t
> > @@ -3234,27 +3184,40 @@ dp_netdev_pmd_flush_output_packets(struct
> > dp_netdev_pmd_thread *pmd)
> >
> >  static int
> >  dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
> > -                           struct netdev_rxq *rx,
> > +                           struct dp_netdev_rxq *rxq,
> >                             odp_port_t port_no)  {
> >      struct dp_packet_batch batch;
> > +    struct cycle_timer timer;
> >      int error;
> >      int batch_cnt = 0;
> >
> > +    /* Measure duration for polling and processing rx burst. */
> > +    cycle_timer_start(&pmd->perf_stats, &timer);
> >      dp_packet_batch_init(&batch);
> > -    error = netdev_rxq_recv(rx, &batch);
> > +    error = netdev_rxq_recv(rxq->rx, &batch);
> >      if (!error) {
> > +        /* At least one packet received. */
> >          *recirc_depth_get() = 0;
> >          pmd_thread_ctx_time_update(pmd);
> >
> >          batch_cnt = batch.count;
> >          dp_netdev_input(pmd, &batch, port_no);
> >          dp_netdev_pmd_flush_output_packets(pmd);
> > -    } else if (error != EAGAIN && error != EOPNOTSUPP) {
> > -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> >
> > -        VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
> > -                    netdev_rxq_get_name(rx), ovs_strerror(error));
> > +        /* Assign processing cycles to rx queue. */
> > +        uint64_t cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
> > +        dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
> > +
> > +    } else {
> > +        /* Discard cycles. */
> > +        cycle_timer_stop(&pmd->perf_stats, &timer);
> > +        if (error != EAGAIN && error != EOPNOTSUPP) {
> > +            static struct vlog_rate_limit rl =
> > + VLOG_RATE_LIMIT_INIT(1, 5);
> > +
> > +            VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
> > +                    netdev_rxq_get_name(rxq->rx), ovs_strerror(error));
> > +        }
> >      }
> >
> >      return batch_cnt;
> > @@ -3880,30 +3843,22 @@ dpif_netdev_run(struct dpif *dpif)
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> >      struct dp_netdev_pmd_thread *non_pmd;
> >      uint64_t new_tnl_seq;
> > -    int process_packets = 0;
> >
> >      ovs_mutex_lock(&dp->port_mutex);
> >      non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
> >      if (non_pmd) {
> >          ovs_mutex_lock(&dp->non_pmd_mutex);
> > -        cycles_count_start(non_pmd);
> >          HMAP_FOR_EACH (port, node, &dp->ports) {
> >              if (!netdev_is_pmd(port->netdev)) {
> >                  int i;
> >
> >                  for (i = 0; i < port->n_rxq; i++) {
> > -                    process_packets =
> > -                        dp_netdev_process_rxq_port(non_pmd,
> > -                                                   port->rxqs[i].rx,
> > -                                                   port->port_no);
> > -                    cycles_count_intermediate(non_pmd, NULL,
> > -                                              process_packets
> > -                                              ? PMD_CYCLES_POLL_BUSY
> > -                                              : PMD_CYCLES_POLL_IDLE);
> > +                    dp_netdev_process_rxq_port(non_pmd,
> > +                                               &port->rxqs[i],
> > +                                               port->port_no);
> >                  }
> >              }
> >          }
> > -        cycles_count_end(non_pmd, PMD_CYCLES_POLL_IDLE);
> >          pmd_thread_ctx_time_update(non_pmd);
> >          dpif_netdev_xps_revalidate_pmd(non_pmd, false);
> >          ovs_mutex_unlock(&dp->non_pmd_mutex);
> > @@ -4082,18 +4037,14 @@ reload:
> >          lc = UINT_MAX;
> >      }
> >
> > -    cycles_count_start(pmd);
> > +    cycles_counter_update(s);
> >      for (;;) {
> >          uint64_t iter_packets = 0;
> > -        pmd_perf_start_iteration(s, pmd->ctx.last_cycles);
> > +        pmd_perf_start_iteration(s);
> >          for (i = 0; i < poll_cnt; i++) {
> >              process_packets =
> > -                dp_netdev_process_rxq_port(pmd, poll_list[i].rxq->rx,
> > +                dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
> >                                             poll_list[i].port_no);
> > -            cycles_count_intermediate(pmd, poll_list[i].rxq,
> > -                                      process_packets
> > -                                      ? PMD_CYCLES_POLL_BUSY
> > -                                      : PMD_CYCLES_POLL_IDLE);
> >              iter_packets += process_packets;
> >          }
> >
> > @@ -4115,13 +4066,10 @@ reload:
> >              if (reload) {
> >                  break;
> >              }
> > -            cycles_count_intermediate(pmd, NULL, PMD_CYCLES_OVERHEAD);
> >          }
> > -        pmd_perf_end_iteration(s, pmd->ctx.last_cycles, iter_packets);
> > +        pmd_perf_end_iteration(s, iter_packets);
> >      }
> >
> > -    cycles_count_end(pmd, PMD_CYCLES_OVERHEAD);
> > -
> >      poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
> >      exiting = latch_is_set(&pmd->exit_latch);
> >      /* Signal here to make sure the pmd finishes
> >


More information about the dev mailing list