[ovs-dev] [PATCH 1/1] ovs-thread: Fix thread id for threads not started with ovs_thread_create()

Ilya Maximets i.maximets at samsung.com
Fri May 25 13:30:22 UTC 2018


> When ping-pong'in a live VM migration between two machines running
> OVS-DPDK every now and then the ping misses would increase
> dramatically. For example:
> 
> ===========Stream Rate: 3Mpps===========
> No Stream_Rate Downtime Totaltime Ping_Loss Moongen_Loss
>  0       3Mpps      128     13974       115      7168374
>  1       3Mpps      145     13620        17      1169770
>  2       3Mpps      140     14499       116      7141175
>  3       3Mpps      142     13358        16      1150606
>  4       3Mpps      136     14004        16      1124020
>  5       3Mpps      139     15494       214     13170452
>  6       3Mpps      136     15610       217     13282413
>  7       3Mpps      146     13194        17      1167512
>  8       3Mpps      148     12871        16      1162655
>  9       3Mpps      137     15615       214     13170656
> 
> I identified this issue being introduced in OVS commit
> https://github.com/openvswitch/ovs/commit/f3e7ec254738364101eed8f04b1d954cb510615c
> and more specific due to DPDK commit
> http://dpdk.org/browse/dpdk/commit/?id=af14759181240120f76c82f894982e8f33f0ba2a
> 
> The combined changes no longer have OVS start the vhost socket polling
> thread at startup, but DPDK will do it on its own when the first vhost
> client is started.
> 
> Figuring out the reason why this happens kept me puzzled for quite some time...
> What happens is that the callbacks called from the vhost thread are
> calling ovsrcu_synchronize() as part of destroy_device(). This will
> end-up calling seq_wait__(), and the problem is with this part:
> 
>    176static void
>    177seq_wait__(struct seq *seq, uint64_t value, const char *where)
>    178    OVS_REQUIRES(seq_mutex)
>    179{
>>> 180    unsigned int id = ovsthread_id_self();
>    181    uint32_t hash = hash_int(id, 0);
>    182    struct seq_waiter *waiter;
>    183
>    184    HMAP_FOR_EACH_IN_BUCKET (waiter, hmap_node, hash, &seq->waiters) {
>>> 185        if (waiter->ovsthread_id == id) {
>    186            if (waiter->value != value) {
>    187                /* The current value is different from the value we've already
>    188                 * waited for, */
>    189                poll_immediate_wake_at(where);
>    190            } else {
>    191                /* Already waiting on 'value', nothing more to do. */
>    192            }
>>> 193            return;
>    194        }
>    195    }
>    196
> 
> By default, all created threads outside of OVS will get thread id 0,
> which is equal to the main ovs thread. So for example in the function
> above if the main thread is waiting already we won't add ourselves as
> a waiter.

Good catch. Thanks for working on this. I guess, this issue could even cause
a crash, because vhost device could be freed before other threads stop
working with it.

> 
> The fix below assigns UINT_MAX to none OVS created threads, which will
> fix this specific issue. However if more none OVS threads gets added
> the issue might arise again.
> 
> Currently, I do not see another solution that will work unless DPDK is
> adding some framework/callback support when new threads get created.

What do you think about allocating ids on demand inside 'ovsthread_id_self()'?
This will work for any number of threads. Something like this:
------------------------------------------------------------------------
diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
index f8bc06d..ff3a1df 100644
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -315,7 +315,7 @@ ovs_barrier_block(struct ovs_barrier *barrier)
     }
 }
 
-DEFINE_EXTERN_PER_THREAD_DATA(ovsthread_id, 0);
+DEFINE_EXTERN_PER_THREAD_DATA(ovsthread_id, UINT_MAX);
 
 struct ovsthread_aux {
     void *(*start)(void *);
@@ -323,24 +323,28 @@ struct ovsthread_aux {
     char name[16];
 };
 
+void
+ovsthread_id_init(void)
+{
+    static atomic_count next_id = ATOMIC_COUNT_INIT(0);
+
+    unsigned int id = atomic_count_inc(&next_id);
+
+    *ovsthread_id_get() = id;
+}
+
 static void *
 ovsthread_wrapper(void *aux_)
 {
-    static atomic_count next_id = ATOMIC_COUNT_INIT(1);
-
     struct ovsthread_aux *auxp = aux_;
     struct ovsthread_aux aux;
-    unsigned int id;
-
-    id = atomic_count_inc(&next_id);
-    *ovsthread_id_get() = id;
 
     aux = *auxp;
     free(auxp);
 
     /* The order of the following calls is important, because
      * ovsrcu_quiesce_end() saves a copy of the thread name. */
-    char *subprogram_name = xasprintf("%s%u", aux.name, id);
+    char *subprogram_name = xasprintf("%s%u", aux.name, ovsthread_id_self());
     set_subprogram_name(subprogram_name);
     free(subprogram_name);
     ovsrcu_quiesce_end();
diff --git a/lib/ovs-thread.h b/lib/ovs-thread.h
index 03fd804..ada09d1 100644
--- a/lib/ovs-thread.h
+++ b/lib/ovs-thread.h
@@ -467,12 +467,21 @@ void *ovsthread_getspecific(ovsthread_key_t);
 
 DECLARE_EXTERN_PER_THREAD_DATA(unsigned int, ovsthread_id);
 
+void ovsthread_id_init(void);
+
 /* Returns a per-thread identifier unique within the lifetime of the
  * process. */
 static inline unsigned int
 ovsthread_id_self(void)
 {
-    return *ovsthread_id_get();
+    unsigned int id = *ovsthread_id_get();
+
+    if (OVS_UNLIKELY(id == UINT_MAX)) {
+        ovsthread_id_init();
+        id = *ovsthread_id_get();
+    }
+
+    return id;
 }
 
 /* Simulated global counter.
------------------------------------------------------------------------


More information about the dev mailing list