[ovs-dev] [PATCH] Do RCU synchronization at fixed interval in PMD main loop.

Nitin Katiyar nitin.katiyar at ericsson.com
Tue Aug 13 05:12:55 UTC 2019


Hi
A gentle reminder. Please review and provide the feedback.

Regards,
Nitin

> -----Original Message-----
> From: Nitin Katiyar
> Sent: Wednesday, August 07, 2019 7:52 PM
> To: ovs-dev at openvswitch.org
> Cc: Nitin Katiyar <nitin.katiyar at ericsson.com>; Anju Thomas
> <anju.thomas at ericsson.com>
> Subject: [PATCH] Do RCU synchronization at fixed interval in PMD main loop.
> 
> Each PMD updates the global sequence number for RCU synchronization
> purpose with other OVS threads. This is done at every 1025th iteration in
> PMD main loop.
> 
> If the PMD thread is responsible for polling large number of queues that are
> carrying traffic, it spends a lot of time processing packets and this results in
> significant delay in performing the housekeeping activities.
> 
> If the OVS main thread is waiting to synchronize with the PMD threads and if
> those threads delay performing housekeeping activities for more than 3 sec
> then LACP processing will be impacted and it will lead to LACP flaps. Similarly,
> other controls protocols run by OVS main thread are impacted.
> 
> For e.g. a PMD thread polling 200 ports/queues with average of 1600
> processing cycles per packet with batch size of 32 may take 10240000
> (200 * 1600 * 32) cycles per iteration. In system with 2.0 GHz CPU it means
> more than 5 ms per iteration. So, for 1024 iterations to complete it would be
> more than 5 seconds.
> 
> This gets worse when there are PMD threads which are less loaded.
> It reduces possibility of getting mutex lock in ovsrcu_try_quiesce() by heavily
> loaded PMD and next attempt to quiesce would be after 1024 iterations.
> 
> With this patch, PMD RCU synchronization will be performed after fixed
> interval instead after a fixed number of iterations. This will ensure that even if
> the packet processing load is high the RCU synchronization will not be delayed
> long.
> 
> Co-authored-by: Anju Thomas <anju.thomas at ericsson.com>
> 
> Signed-off-by: Nitin Katiyar <nitin.katiyar at ericsson.com>
> Signed-off-by: Anju Thomas <anju.thomas at ericsson.com>
> ---
>  lib/dpif-netdev-perf.c | 16 ----------------  lib/dpif-netdev-perf.h | 17
> +++++++++++++++++
>  lib/dpif-netdev.c      | 27 +++++++++++++++++++++++++++
>  3 files changed, 44 insertions(+), 16 deletions(-)
> 
> diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c index
> e7ed49e..c888e5d 100644
> --- a/lib/dpif-netdev-perf.c
> +++ b/lib/dpif-netdev-perf.c
> @@ -43,22 +43,6 @@ uint64_t iter_cycle_threshold;
> 
>  static struct vlog_rate_limit latency_rl = VLOG_RATE_LIMIT_INIT(600, 600);
> 
> -#ifdef DPDK_NETDEV
> -static uint64_t
> -get_tsc_hz(void)
> -{
> -    return rte_get_tsc_hz();
> -}
> -#else
> -/* This function is only invoked from PMD threads which depend on DPDK.
> - * A dummy function is sufficient when building without DPDK_NETDEV. */ -
> static uint64_t
> -get_tsc_hz(void)
> -{
> -    return 1;
> -}
> -#endif
> -
>  /* Histogram functions. */
> 
>  static void
> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h index
> 244813f..3f2ee1c 100644
> --- a/lib/dpif-netdev-perf.h
> +++ b/lib/dpif-netdev-perf.h
> @@ -187,6 +187,23 @@ struct pmd_perf_stats {
>      char *log_reason;
>  };
> 
> +#ifdef DPDK_NETDEV
> +static inline uint64_t
> +get_tsc_hz(void)
> +{
> +    return rte_get_tsc_hz();
> +}
> +#else
> +/* This function is only invoked from PMD threads which depend on DPDK.
> + * A dummy function is sufficient when building without DPDK_NETDEV. */
> +static inline uint64_t
> +get_tsc_hz(void)
> +{
> +    return 1;
> +}
> +#endif
> +
> +
>  #ifdef __linux__
>  static inline uint64_t
>  rdtsc_syscall(struct pmd_perf_stats *s) diff --git a/lib/dpif-netdev.c b/lib/dpif-
> netdev.c index d0a1c58..c3d6835 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -751,6 +751,9 @@ struct dp_netdev_pmd_thread {
> 
>      /* Set to true if the pmd thread needs to be reloaded. */
>      bool need_reload;
> +
> +    /* Last time (in tsc) when PMD was last quiesced */
> +    uint64_t last_rcu_quiesced;
>  };
> 
>  /* Interface to netdev-based datapath. */ @@ -5445,6 +5448,7 @@
> pmd_thread_main(void *f_)
>      int poll_cnt;
>      int i;
>      int process_packets = 0;
> +    uint64_t rcu_quiesce_interval = 0;
> 
>      poll_list = NULL;
> 
> @@ -5486,6 +5490,13 @@ reload:
>      pmd->intrvl_tsc_prev = 0;
>      atomic_store_relaxed(&pmd->intrvl_cycles, 0);
>      cycles_counter_update(s);
> +
> +    if (get_tsc_hz() > 1) {
> +        /* Calculate ~10 ms interval. */
> +        rcu_quiesce_interval = get_tsc_hz() / 100;
> +        pmd->last_rcu_quiesced = cycles_counter_get(s);
> +    }
> +
>      /* Protect pmd stats from external clearing while polling. */
>      ovs_mutex_lock(&pmd->perf_stats.stats_mutex);
>      for (;;) {
> @@ -5493,6 +5504,19 @@ reload:
> 
>          pmd_perf_start_iteration(s);
> 
> +        /* Do RCU synchronization at fixed interval instead of doing it
> +         * at fixed number of iterations. This ensures that synchronization
> +         * would not be delayed long even at high load of packet
> +         * processing. */
> +
> +        if (rcu_quiesce_interval &&
> +            ((cycles_counter_get(s) - pmd->last_rcu_quiesced) >
> +             rcu_quiesce_interval)) {
> +            if (!ovsrcu_try_quiesce()) {
> +                pmd->last_rcu_quiesced = cycles_counter_get(s);
> +            }
> +        }
> +
>          for (i = 0; i < poll_cnt; i++) {
> 
>              if (!poll_list[i].rxq_enabled) { @@ -5527,6 +5551,9 @@ reload:
>              dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
>              if (!ovsrcu_try_quiesce()) {
>                  emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
> +                if (rcu_quiesce_interval) {
> +                    pmd->last_rcu_quiesced = cycles_counter_get(s);
> +                }
>              }
> 
>              for (i = 0; i < poll_cnt; i++) {
> --
> 1.9.1



More information about the dev mailing list