[ovs-dev] [PATCH] Do RCU synchronization at fixed interval in PMD main loop.
Nitin Katiyar
nitin.katiyar at ericsson.com
Tue Aug 13 05:12:55 UTC 2019
Hi
A gentle reminder. Please review and provide the feedback.
Regards,
Nitin
> -----Original Message-----
> From: Nitin Katiyar
> Sent: Wednesday, August 07, 2019 7:52 PM
> To: ovs-dev at openvswitch.org
> Cc: Nitin Katiyar <nitin.katiyar at ericsson.com>; Anju Thomas
> <anju.thomas at ericsson.com>
> Subject: [PATCH] Do RCU synchronization at fixed interval in PMD main loop.
>
> Each PMD updates the global sequence number for RCU synchronization
> purpose with other OVS threads. This is done at every 1025th iteration in
> PMD main loop.
>
> If the PMD thread is responsible for polling large number of queues that are
> carrying traffic, it spends a lot of time processing packets and this results in
> significant delay in performing the housekeeping activities.
>
> If the OVS main thread is waiting to synchronize with the PMD threads and if
> those threads delay performing housekeeping activities for more than 3 sec
> then LACP processing will be impacted and it will lead to LACP flaps. Similarly,
> other controls protocols run by OVS main thread are impacted.
>
> For e.g. a PMD thread polling 200 ports/queues with average of 1600
> processing cycles per packet with batch size of 32 may take 10240000
> (200 * 1600 * 32) cycles per iteration. In system with 2.0 GHz CPU it means
> more than 5 ms per iteration. So, for 1024 iterations to complete it would be
> more than 5 seconds.
>
> This gets worse when there are PMD threads which are less loaded.
> It reduces possibility of getting mutex lock in ovsrcu_try_quiesce() by heavily
> loaded PMD and next attempt to quiesce would be after 1024 iterations.
>
> With this patch, PMD RCU synchronization will be performed after fixed
> interval instead after a fixed number of iterations. This will ensure that even if
> the packet processing load is high the RCU synchronization will not be delayed
> long.
>
> Co-authored-by: Anju Thomas <anju.thomas at ericsson.com>
>
> Signed-off-by: Nitin Katiyar <nitin.katiyar at ericsson.com>
> Signed-off-by: Anju Thomas <anju.thomas at ericsson.com>
> ---
> lib/dpif-netdev-perf.c | 16 ---------------- lib/dpif-netdev-perf.h | 17
> +++++++++++++++++
> lib/dpif-netdev.c | 27 +++++++++++++++++++++++++++
> 3 files changed, 44 insertions(+), 16 deletions(-)
>
> diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c index
> e7ed49e..c888e5d 100644
> --- a/lib/dpif-netdev-perf.c
> +++ b/lib/dpif-netdev-perf.c
> @@ -43,22 +43,6 @@ uint64_t iter_cycle_threshold;
>
> static struct vlog_rate_limit latency_rl = VLOG_RATE_LIMIT_INIT(600, 600);
>
> -#ifdef DPDK_NETDEV
> -static uint64_t
> -get_tsc_hz(void)
> -{
> - return rte_get_tsc_hz();
> -}
> -#else
> -/* This function is only invoked from PMD threads which depend on DPDK.
> - * A dummy function is sufficient when building without DPDK_NETDEV. */ -
> static uint64_t
> -get_tsc_hz(void)
> -{
> - return 1;
> -}
> -#endif
> -
> /* Histogram functions. */
>
> static void
> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h index
> 244813f..3f2ee1c 100644
> --- a/lib/dpif-netdev-perf.h
> +++ b/lib/dpif-netdev-perf.h
> @@ -187,6 +187,23 @@ struct pmd_perf_stats {
> char *log_reason;
> };
>
> +#ifdef DPDK_NETDEV
> +static inline uint64_t
> +get_tsc_hz(void)
> +{
> + return rte_get_tsc_hz();
> +}
> +#else
> +/* This function is only invoked from PMD threads which depend on DPDK.
> + * A dummy function is sufficient when building without DPDK_NETDEV. */
> +static inline uint64_t
> +get_tsc_hz(void)
> +{
> + return 1;
> +}
> +#endif
> +
> +
> #ifdef __linux__
> static inline uint64_t
> rdtsc_syscall(struct pmd_perf_stats *s) diff --git a/lib/dpif-netdev.c b/lib/dpif-
> netdev.c index d0a1c58..c3d6835 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -751,6 +751,9 @@ struct dp_netdev_pmd_thread {
>
> /* Set to true if the pmd thread needs to be reloaded. */
> bool need_reload;
> +
> + /* Last time (in tsc) when PMD was last quiesced */
> + uint64_t last_rcu_quiesced;
> };
>
> /* Interface to netdev-based datapath. */ @@ -5445,6 +5448,7 @@
> pmd_thread_main(void *f_)
> int poll_cnt;
> int i;
> int process_packets = 0;
> + uint64_t rcu_quiesce_interval = 0;
>
> poll_list = NULL;
>
> @@ -5486,6 +5490,13 @@ reload:
> pmd->intrvl_tsc_prev = 0;
> atomic_store_relaxed(&pmd->intrvl_cycles, 0);
> cycles_counter_update(s);
> +
> + if (get_tsc_hz() > 1) {
> + /* Calculate ~10 ms interval. */
> + rcu_quiesce_interval = get_tsc_hz() / 100;
> + pmd->last_rcu_quiesced = cycles_counter_get(s);
> + }
> +
> /* Protect pmd stats from external clearing while polling. */
> ovs_mutex_lock(&pmd->perf_stats.stats_mutex);
> for (;;) {
> @@ -5493,6 +5504,19 @@ reload:
>
> pmd_perf_start_iteration(s);
>
> + /* Do RCU synchronization at fixed interval instead of doing it
> + * at fixed number of iterations. This ensures that synchronization
> + * would not be delayed long even at high load of packet
> + * processing. */
> +
> + if (rcu_quiesce_interval &&
> + ((cycles_counter_get(s) - pmd->last_rcu_quiesced) >
> + rcu_quiesce_interval)) {
> + if (!ovsrcu_try_quiesce()) {
> + pmd->last_rcu_quiesced = cycles_counter_get(s);
> + }
> + }
> +
> for (i = 0; i < poll_cnt; i++) {
>
> if (!poll_list[i].rxq_enabled) { @@ -5527,6 +5551,9 @@ reload:
> dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
> if (!ovsrcu_try_quiesce()) {
> emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
> + if (rcu_quiesce_interval) {
> + pmd->last_rcu_quiesced = cycles_counter_get(s);
> + }
> }
>
> for (i = 0; i < poll_cnt; i++) {
> --
> 1.9.1
More information about the dev
mailing list