[ovs-dev] [urcu-fixes 4/4] ovs-rcu: Log a helpful warning when ovsrcu_synchronize() stalls.

Alex Wang alexw at nicira.com
Mon Apr 28 21:10:27 UTC 2014


Acked-by: Alex Wang <alexw at nicira.com>


On Mon, Apr 28, 2014 at 9:06 AM, Ben Pfaff <blp at nicira.com> wrote:

> This made it easier for me to find a thread that was causing stalls.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
>  lib/ovs-rcu.c    |   24 +++++++++++++++++++++++-
>  lib/ovs-thread.c |    4 +++-
>  2 files changed, 26 insertions(+), 2 deletions(-)
>
> diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
> index c1ac61a..8a12564 100644
> --- a/lib/ovs-rcu.c
> +++ b/lib/ovs-rcu.c
> @@ -21,6 +21,10 @@
>  #include "ovs-thread.h"
>  #include "poll-loop.h"
>  #include "seq.h"
> +#include "timeval.h"
> +#include "vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(ovs_rcu);
>
>  struct ovsrcu_cb {
>      void (*function)(void *aux);
> @@ -34,11 +38,12 @@ struct ovsrcu_cbset {
>  };
>
>  struct ovsrcu_perthread {
> -    struct list list_node;           /* In global list. */
> +    struct list list_node;      /* In global list. */
>
>      struct ovs_mutex mutex;
>      uint64_t seqno;
>      struct ovsrcu_cbset *cbset;
> +    char name[16];              /* This thread's name. */
>  };
>
>  static struct seq *global_seqno;
> @@ -70,6 +75,8 @@ ovsrcu_perthread_get(void)
>          ovs_mutex_init(&perthread->mutex);
>          perthread->seqno = seq_read(global_seqno);
>          perthread->cbset = NULL;
> +        ovs_strlcpy(perthread->name, get_subprogram_name(),
> +                    sizeof perthread->name);
>
>          ovs_mutex_lock(&ovsrcu_threads_mutex);
>          list_push_back(&ovsrcu_threads, &perthread->list_node);
> @@ -144,7 +151,9 @@ ovsrcu_is_quiescent(void)
>  static void
>  ovsrcu_synchronize(void)
>  {
> +    unsigned int warning_threshold = 1000;
>      uint64_t target_seqno;
> +    long long int start;
>
>      if (single_threaded()) {
>          return;
> @@ -152,15 +161,20 @@ ovsrcu_synchronize(void)
>
>      target_seqno = seq_read(global_seqno);
>      ovsrcu_quiesce_start();
> +    start = time_msec();
>
>      for (;;) {
>          uint64_t cur_seqno = seq_read(global_seqno);
>          struct ovsrcu_perthread *perthread;
> +        char stalled_thread[16];
> +        unsigned int elapsed;
>          bool done = true;
>
>          ovs_mutex_lock(&ovsrcu_threads_mutex);
>          LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) {
>              if (perthread->seqno <= target_seqno) {
> +                ovs_strlcpy(stalled_thread, perthread->name,
> +                            sizeof stalled_thread);
>                  done = false;
>                  break;
>              }
> @@ -171,6 +185,14 @@ ovsrcu_synchronize(void)
>              break;
>          }
>
> +        elapsed = time_msec() - start;
> +        if (elapsed >= warning_threshold) {
> +            VLOG_WARN("blocked %u ms waiting for %s to quiesce",
> +                      elapsed, stalled_thread);
> +            warning_threshold *= 2;
> +        }
> +        poll_timer_wait_until(start + warning_threshold);
> +
>          seq_wait(global_seqno, cur_seqno);
>          poll_block();
>      }
> diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
> index d835b39..19edf8f 100644
> --- a/lib/ovs-thread.c
> +++ b/lib/ovs-thread.c
> @@ -274,9 +274,11 @@ ovsthread_wrapper(void *aux_)
>      aux = *auxp;
>      free(auxp);
>
> +    /* The order of the following calls is important, because
> +     * ovsrcu_quiesce_end() saves a copy of the thread name. */
>      set_subprogram_name("%s%u", aux.name, id);
> -
>      ovsrcu_quiesce_end();
> +
>      return aux.start(aux.arg);
>  }
>
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20140428/c89ca9db/attachment-0005.html>


More information about the dev mailing list