[ovs-dev] [PATCH V2] dpif-netdev: Do RCU synchronization at fixed interval in PMD main loop.

Nitin Katiyar nitin.katiyar at ericsson.com
Wed Aug 28 08:30:00 UTC 2019


Hi,
Please provide your feedback.

Regards,
Nitin

> -----Original Message-----
> From: Nitin Katiyar
> Sent: Thursday, August 22, 2019 10:24 PM
> To: ovs-dev at openvswitch.org
> Cc: Nitin Katiyar <nitin.katiyar at ericsson.com>; Anju Thomas
> <anju.thomas at ericsson.com>
> Subject: [PATCH V2] dpif-netdev: Do RCU synchronization at fixed interval in
> PMD main loop.
> 
> Each PMD updates the global sequence number for RCU synchronization
> purpose with other OVS threads. This is done at every 1025th iteration in
> PMD main loop.
> 
> If the PMD thread is responsible for polling large number of queues that are
> carrying traffic, it spends a lot of time processing packets and this results in
> significant delay in performing the housekeeping activities.
> 
> If the OVS main thread is waiting to synchronize with the PMD threads and if
> those threads delay performing housekeeping activities for more than 3 sec
> then LACP processing will be impacted and it will lead to LACP flaps. Similarly,
> other controls protocols run by OVS main thread are impacted.
> 
> For e.g. a PMD thread polling 200 ports/queues with average of 1600
> processing cycles per packet with batch size of 32 may take 10240000
> (200 * 1600 * 32) cycles per iteration. In system with 2.0 GHz CPU it means
> more than 5 ms per iteration. So, for 1024 iterations to complete it would be
> more than 5 seconds.
> 
> This gets worse when there are PMD threads which are less loaded.
> It reduces possibility of getting mutex lock in ovsrcu_try_quiesce() by heavily
> loaded PMD and next attempt to quiesce would be after 1024 iterations.
> 
> With this patch, PMD RCU synchronization will be performed after fixed
> interval instead after a fixed number of iterations. This will ensure that even if
> the packet processing load is high the RCU synchronization will not be delayed
> long.
> 
> Co-authored-by: Anju Thomas <anju.thomas at ericsson.com>
> Signed-off-by: Anju Thomas <anju.thomas at ericsson.com>
> Signed-off-by: Nitin Katiyar <nitin.katiyar at ericsson.com>
> ---
>  lib/dpif-netdev.c | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d0a1c58..63b6cb9
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -228,6 +228,9 @@ struct dfc_cache {
>   * and used during rxq to pmd assignment. */  #define
> PMD_RXQ_INTERVAL_MAX 6
> 
> +/* Time in microseconds to try RCU quiescing. */ #define
> +PMD_RCU_QUIESCE_INTERVAL 10000LL
> +
>  struct dpcls {
>      struct cmap_node node;      /* Within dp_netdev_pmd_thread.classifiers
> */
>      odp_port_t in_port;
> @@ -751,6 +754,9 @@ struct dp_netdev_pmd_thread {
> 
>      /* Set to true if the pmd thread needs to be reloaded. */
>      bool need_reload;
> +
> +    /* Next time when PMD should try RCU quiescing. */
> +    long long next_rcu_quiesce;
>  };
> 
>  /* Interface to netdev-based datapath. */ @@ -5486,6 +5492,8 @@ reload:
>      pmd->intrvl_tsc_prev = 0;
>      atomic_store_relaxed(&pmd->intrvl_cycles, 0);
>      cycles_counter_update(s);
> +
> +    pmd->next_rcu_quiesce = pmd->ctx.now +
> PMD_RCU_QUIESCE_INTERVAL;
>      /* Protect pmd stats from external clearing while polling. */
>      ovs_mutex_lock(&pmd->perf_stats.stats_mutex);
>      for (;;) {
> @@ -5493,6 +5501,17 @@ reload:
> 
>          pmd_perf_start_iteration(s);
> 
> +        /* Do RCU synchronization at fixed interval instead of doing it
> +         * at fixed number of iterations. This ensures that synchronization
> +         * would not be delayed long even at high load of packet
> +         * processing. */
> +        if (pmd->ctx.now > pmd->next_rcu_quiesce) {
> +            if (!ovsrcu_try_quiesce()) {
> +                pmd->next_rcu_quiesce =
> +                    pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
> +            }
> +        }
> +
>          for (i = 0; i < poll_cnt; i++) {
> 
>              if (!poll_list[i].rxq_enabled) { @@ -5527,6 +5546,8 @@ reload:
>              dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
>              if (!ovsrcu_try_quiesce()) {
>                  emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
> +                pmd->next_rcu_quiesce =
> +                    pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
>              }
> 
>              for (i = 0; i < poll_cnt; i++) { @@ -5986,6 +6007,7 @@
> dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct
> dp_netdev *dp,
>      pmd->ctx.last_rxq = NULL;
>      pmd_thread_ctx_time_update(pmd);
>      pmd->next_optimization = pmd->ctx.now +
> DPCLS_OPTIMIZATION_INTERVAL;
> +    pmd->next_rcu_quiesce = pmd->ctx.now +
> PMD_RCU_QUIESCE_INTERVAL;
>      pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
>      hmap_init(&pmd->poll_list);
>      hmap_init(&pmd->tx_ports);
> --
> 1.9.1



More information about the dev mailing list