[ovs-dev] [urcu-fixes 4/4] ovs-rcu: Log a helpful warning when ovsrcu_synchronize() stalls.
Alex Wang
alexw at nicira.com
Mon Apr 28 21:10:27 UTC 2014
Acked-by: Alex Wang <alexw at nicira.com>
On Mon, Apr 28, 2014 at 9:06 AM, Ben Pfaff <blp at nicira.com> wrote:
> This made it easier for me to find a thread that was causing stalls.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
> lib/ovs-rcu.c | 24 +++++++++++++++++++++++-
> lib/ovs-thread.c | 4 +++-
> 2 files changed, 26 insertions(+), 2 deletions(-)
>
> diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
> index c1ac61a..8a12564 100644
> --- a/lib/ovs-rcu.c
> +++ b/lib/ovs-rcu.c
> @@ -21,6 +21,10 @@
> #include "ovs-thread.h"
> #include "poll-loop.h"
> #include "seq.h"
> +#include "timeval.h"
> +#include "vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(ovs_rcu);
>
> struct ovsrcu_cb {
> void (*function)(void *aux);
> @@ -34,11 +38,12 @@ struct ovsrcu_cbset {
> };
>
> struct ovsrcu_perthread {
> - struct list list_node; /* In global list. */
> + struct list list_node; /* In global list. */
>
> struct ovs_mutex mutex;
> uint64_t seqno;
> struct ovsrcu_cbset *cbset;
> + char name[16]; /* This thread's name. */
> };
>
> static struct seq *global_seqno;
> @@ -70,6 +75,8 @@ ovsrcu_perthread_get(void)
> ovs_mutex_init(&perthread->mutex);
> perthread->seqno = seq_read(global_seqno);
> perthread->cbset = NULL;
> + ovs_strlcpy(perthread->name, get_subprogram_name(),
> + sizeof perthread->name);
>
> ovs_mutex_lock(&ovsrcu_threads_mutex);
> list_push_back(&ovsrcu_threads, &perthread->list_node);
> @@ -144,7 +151,9 @@ ovsrcu_is_quiescent(void)
> static void
> ovsrcu_synchronize(void)
> {
> + unsigned int warning_threshold = 1000;
> uint64_t target_seqno;
> + long long int start;
>
> if (single_threaded()) {
> return;
> @@ -152,15 +161,20 @@ ovsrcu_synchronize(void)
>
> target_seqno = seq_read(global_seqno);
> ovsrcu_quiesce_start();
> + start = time_msec();
>
> for (;;) {
> uint64_t cur_seqno = seq_read(global_seqno);
> struct ovsrcu_perthread *perthread;
> + char stalled_thread[16];
> + unsigned int elapsed;
> bool done = true;
>
> ovs_mutex_lock(&ovsrcu_threads_mutex);
> LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) {
> if (perthread->seqno <= target_seqno) {
> + ovs_strlcpy(stalled_thread, perthread->name,
> + sizeof stalled_thread);
> done = false;
> break;
> }
> @@ -171,6 +185,14 @@ ovsrcu_synchronize(void)
> break;
> }
>
> + elapsed = time_msec() - start;
> + if (elapsed >= warning_threshold) {
> + VLOG_WARN("blocked %u ms waiting for %s to quiesce",
> + elapsed, stalled_thread);
> + warning_threshold *= 2;
> + }
> + poll_timer_wait_until(start + warning_threshold);
> +
> seq_wait(global_seqno, cur_seqno);
> poll_block();
> }
> diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
> index d835b39..19edf8f 100644
> --- a/lib/ovs-thread.c
> +++ b/lib/ovs-thread.c
> @@ -274,9 +274,11 @@ ovsthread_wrapper(void *aux_)
> aux = *auxp;
> free(auxp);
>
> + /* The order of the following calls is important, because
> + * ovsrcu_quiesce_end() saves a copy of the thread name. */
> set_subprogram_name("%s%u", aux.name, id);
> -
> ovsrcu_quiesce_end();
> +
> return aux.start(aux.arg);
> }
>
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20140428/c89ca9db/attachment-0005.html>
More information about the dev
mailing list