[ovs-dev] [BUG] upcall handler thread crash

Jarno Rajahalme jarno at ovn.org
Mon Feb 6 22:20:49 UTC 2017


> On Feb 5, 2017, at 10:49 PM, wangyunjian <wangyunjian at huawei.com> wrote:
> 
> My ovs version is openvswitch-2.5.0(http://openvswitch.org/releases/openvswitch-2.5.0.tar.gz). 
> I had modified the code as follows and getted other crash. Do it need a lock to protect the operations
> of mbridge->mbundles hmap(xbridge->xport hmap) between ovs-vswichd thread and the upcall handler(revalidator) thread?
> 
> 413 static struct mbundle *
> 414 mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle)
> 415 {
> 416     struct mbundle *mbundle;
> 417 
> 418     HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 0),
> 419                              &mbridge->mbundles) {
> 420         xsleep(2);                                                                                                         //only add xsleep(2)

xsleep() causes the thread to quiesce, basically telling the main thread it is OK to delete the bridge, so the new crash you see is not a bug, but caused by your change.

Instead, try it out with these changes:

diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
index 6f8079a..15a398f 100644
--- a/ofproto/ofproto-dpif-mirror.c
+++ b/ofproto/ofproto-dpif-mirror.c
@@ -18,7 +18,7 @@
 
 #include <errno.h>
 
-#include "hmap.h"
+#include "cmap.h"
 #include "hmapx.h"
 #include "ofproto.h"
 #include "vlan-bitmap.h"
@@ -31,7 +31,7 @@ BUILD_ASSERT_DECL(sizeof(mirror_mask_t) * CHAR_BIT >= MAX_MIRRORS);
 
 struct mbridge {
     struct mirror *mirrors[MAX_MIRRORS];
-    struct hmap mbundles;
+    struct cmap mbundles;
 
     bool need_revalidate;
     bool has_mirrors;
@@ -40,7 +40,7 @@ struct mbridge {
 };
 
 struct mbundle {
-    struct hmap_node hmap_node; /* In parent 'mbridge' map. */
+    struct cmap_node cmap_node; /* In parent 'mbridge' map. */
     struct ofbundle *ofbundle;
 
     mirror_mask_t src_mirrors;  /* Mirrors triggered when packet received. */
@@ -84,7 +84,7 @@ mbridge_create(void)
     mbridge = xzalloc(sizeof *mbridge);
     ovs_refcount_init(&mbridge->ref_cnt);
 
-    hmap_init(&mbridge->mbundles);
+    cmap_init(&mbridge->mbundles);
     return mbridge;
 }
 
@@ -101,7 +101,7 @@ mbridge_ref(const struct mbridge *mbridge_)
 void
 mbridge_unref(struct mbridge *mbridge)
 {
-    struct mbundle *mbundle, *next;
+    struct mbundle *mbundle;
     size_t i;
 
     if (!mbridge) {
@@ -115,11 +115,11 @@ mbridge_unref(struct mbridge *mbridge)
             }
         }
 
-        HMAP_FOR_EACH_SAFE (mbundle, next, hmap_node, &mbridge->mbundles) {
+        CMAP_FOR_EACH (mbundle, cmap_node, &mbridge->mbundles) {
             mbridge_unregister_bundle(mbridge, mbundle->ofbundle);
         }
 
-        hmap_destroy(&mbridge->mbundles);
+        cmap_destroy(&mbridge->mbundles);
         free(mbridge);
     }
 }
@@ -147,7 +147,7 @@ mbridge_register_bundle(struct mbridge *mbridge, struct ofbundle *ofbundle)
 
     mbundle = xzalloc(sizeof *mbundle);
     mbundle->ofbundle = ofbundle;
-    hmap_insert(&mbridge->mbundles, &mbundle->hmap_node,
+    cmap_insert(&mbridge->mbundles, &mbundle->cmap_node,
                 hash_pointer(ofbundle, 0));
 }
 
@@ -173,8 +173,9 @@ mbridge_unregister_bundle(struct mbridge *mbridge, struct ofbundle *ofbundle)
         }
     }
 
-    hmap_remove(&mbridge->mbundles, &mbundle->hmap_node);
-    free(mbundle);
+    cmap_remove(&mbridge->mbundles, &mbundle->cmap_node,
+                hash_pointer(ofbundle, 0));
+    ovsrcu_postpone(free, mbundle);
 }
 
 mirror_mask_t
@@ -269,7 +270,7 @@ mirror_set(struct mbridge *mbridge, void *aux, const char *name,
 
     /* Update mbundles. */
     mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
-    HMAP_FOR_EACH (mbundle, hmap_node, &mirror->mbridge->mbundles) {
+    CMAP_FOR_EACH (mbundle, cmap_node, &mirror->mbridge->mbundles) {
         if (hmapx_contains(&mirror->srcs, mbundle)) {
             mbundle->src_mirrors |= mirror_bit;
         } else {
@@ -308,7 +309,7 @@ mirror_destroy(struct mbridge *mbridge, void *aux)
     }
 
     mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
-    HMAP_FOR_EACH (mbundle, hmap_node, &mbridge->mbundles) {
+    CMAP_FOR_EACH (mbundle, cmap_node, &mbridge->mbundles) {
         mbundle->src_mirrors &= ~mirror_bit;
         mbundle->dst_mirrors &= ~mirror_bit;
         mbundle->mirror_out &= ~mirror_bit;
@@ -414,9 +415,9 @@ static struct mbundle *
 mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle)
 {
     struct mbundle *mbundle;
+    uint32_t hash = hash_pointer(ofbundle, 0);
 
-    HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 0),
-                             &mbridge->mbundles) {
+    CMAP_FOR_EACH_WITH_HASH (mbundle, cmap_node, hash, &mbridge->mbundles) {
         if (mbundle->ofbundle == ofbundle) {
             return mbundle;
         }
@@ -424,7 +425,7 @@ mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle)
     return NULL;
 }
 
-/* Looks up each of the 'n_ofbundlees' pointers in 'ofbundlees' as mbundles and
+/* Looks up each of the 'n_ofbundles' pointers in 'ofbundles' as mbundles and
  * adds the ones that are found to 'mbundles'. */
 static void
 mbundle_lookup_multiple(const struct mbridge *mbridge,


  Jarno


> 421         if (mbundle->ofbundle == ofbundle) {
> 422             return mbundle;
> 423         }
> 424     }
> 425     return NULL;
> 426 }
> 427 
> 
> Call Trace:
> Program received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7fadfefd1700 (LWP 25029)]
> mbundle_lookup (mbridge=<optimized out>, ofbundle=0x7fae03a55210) at ofproto/ofproto-dpif-mirror.c:421
> 421	        if (mbundle->ofbundle == ofbundle) {
> (gdb) bt
> #0  mbundle_lookup (mbridge=<optimized out>, ofbundle=0x7fae03a55210) at ofproto/ofproto-dpif-mirror.c:421
> #1  0x00007fae029ec519 in mirror_bundle_out (mbridge=<optimized out>, ofbundle=<optimized out>) at ofproto/ofproto-dpif-mirror.c:183
> #2  0x00007fae029fba61 in xbundle_mirror_out (xbridge=<optimized out>, xbundle=0x7fae03a4e910) at ofproto/ofproto-dpif-xlate.c:1524
> #3  xlate_normal (ctx=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:2360
> #4  xlate_output_action (ctx=ctx at entry=0x7fadfef94420, port=<optimized out>, max_len=<optimized out>, may_packet_in=may_packet_in at entry=true) at ofproto/ofproto-dpif-xlate.c:3829
> #5  0x00007fae029f854f in do_xlate_actions (ofpacts=ofpacts at entry=0x7fade800e5a8, ofpacts_len=ofpacts_len at entry=8, ctx=ctx at entry=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:4384
> #6  0x00007fae029fd6ec in xlate_actions (xin=xin at entry=0x7fadfef955c0, xout=xout at entry=0x7fadfefb3178) at ofproto/ofproto-dpif-xlate.c:5272
> #7  0x00007fae029f16f6 in upcall_xlate (wc=0x7fadfefb31d0, odp_actions=0x7fadfefb3190, upcall=0x7fadfefb3120, udpif=0x7fae03a81040) at ofproto/ofproto-dpif-upcall.c:1068
> #8  process_upcall (udpif=udpif at entry=0x7fae03a81040, upcall=upcall at entry=0x7fadfefb3120, odp_actions=odp_actions at entry=0x7fadfefb3190, wc=wc at entry=0x7fadfefb31d0) at ofproto/ofproto-dpif-upcall.c:1206
> #9  0x00007fae029f3960 in recv_upcalls (handler=0x7fae03a4f5c8, handler=0x7fae03a4f5c8) at ofproto/ofproto-dpif-upcall.c:778
> #10 0x00007fae029f3e1c in udpif_upcall_handler (arg=0x7fae03a4f5c8) at ofproto/ofproto-dpif-upcall.c:696
> #11 0x00007fae02a7e596 in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:340
> #12 0x00007fae01d14dc5 in start_thread () from /usr/lib64/libpthread.so.0
> #13 0x00007fae0132271d in clone () from /usr/lib64/libc.so.6
> (gdb) p mbundle
> $1 = (struct mbundle *) 0xcccccccccccccccc
> 
> Program received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7fadfefd1700 (LWP 3570)]
> 0x00007fae029f68a0 in get_ofp_port (xbridge=xbridge at entry=0x7fae03a81680, ofp_port=ofp_port at entry=1) at ofproto/ofproto-dpif-xlate.c:1400
> 1400	        if (xport->ofp_port == ofp_port) {
> (gdb) bt
> #0  0x00007fae029f68a0 in get_ofp_port (xbridge=xbridge at entry=0x7fae03a81680, ofp_port=ofp_port at entry=1) at ofproto/ofproto-dpif-xlate.c:1400
> #1  0x00007fae029f692f in lookup_input_bundle (xbridge=0x7fae03a81680, in_port=1, warn=<optimized out>, in_xportp=in_xportp at entry=0x7fadfef93e90) at ofproto/ofproto-dpif-xlate.c:1550
> #2  0x00007fae029fba23 in xlate_normal (ctx=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:2339
> #3  xlate_output_action (ctx=ctx at entry=0x7fadfef94420, port=<optimized out>, max_len=<optimized out>, may_packet_in=may_packet_in at entry=true) at ofproto/ofproto-dpif-xlate.c:3829
> #4  0x00007fae029f854f in do_xlate_actions (ofpacts=ofpacts at entry=0x7fae03a7f0a8, ofpacts_len=ofpacts_len at entry=8, ctx=ctx at entry=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:4384
> #5  0x00007fae029fd6ec in xlate_actions (xin=xin at entry=0x7fadfef955c0, xout=xout at entry=0x7fadfefb3178) at ofproto/ofproto-dpif-xlate.c:5272
> #6  0x00007fae029f16f6 in upcall_xlate (wc=0x7fadfefb31d0, odp_actions=0x7fadfefb3190, upcall=0x7fadfefb3120, udpif=0x7fae03a4fb60) at ofproto/ofproto-dpif-upcall.c:1068
> #7  process_upcall (udpif=udpif at entry=0x7fae03a4fb60, upcall=upcall at entry=0x7fadfefb3120, odp_actions=odp_actions at entry=0x7fadfefb3190, wc=wc at entry=0x7fadfefb31d0) at ofproto/ofproto-dpif-upcall.c:1206
> #8  0x00007fae029f3960 in recv_upcalls (handler=0x7fae03a7e7b8, handler=0x7fae03a7e7b8) at ofproto/ofproto-dpif-upcall.c:778
> #9  0x00007fae029f3e1c in udpif_upcall_handler (arg=0x7fae03a7e7b8) at ofproto/ofproto-dpif-upcall.c:696
> #10 0x00007fae02a7e596 in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:340
> #11 0x00007fae01d14dc5 in start_thread () from /usr/lib64/libpthread.so.0
> #12 0x00007fae0132271d in clone () from /usr/lib64/libc.so.6
> (gdb) p xport
> $1 = (struct xport *) 0x19
> 
>> From: nickcooper-zhangtonghao [mailto:nic at opencloud.tech] 
>> Sent: Saturday, February 04, 2017 11:48 PM
>> To: wangyunjian <wangyunjian at huawei.com>
>> Cc: blp at ovn.org; dev at openvswitch.org; caihe <caihe at huawei.com>; Huangjian (J) <huangjian.huangjian at huawei.com>
>> Subject: Re: [ovs-dev] [BUG] upcall handler thread crash
>> 
>> Hi, what’s the OvS version you tested. I didn’t get the crash with master version.
>> The test script is described as below.
>> 
>> ovs-vsctl add-br br0
>> ovs-vsctl add-port br0 eth1
>> for i in `seq 0 1000`;
>> do
>>    ovs-vsctl add-port br0 eth2
>>    ovs-vsctl del-port br0 eth2
>> done
>> 
>> 
>> Thanks.
>> Nick
>> 
>> On Feb 4, 2017, at 7:21 PM, wangyunjian <wangyunjian at huawei.com> wrote:
>> 
>> Recently, write a script add and delete port repeatly, ovs upcall handler thread crash with the following trace.
>> In the code bellow, weather the operations of mbridge->mbundles hmap should with a lock to protect content between ovs-vswichd thread and the upcall handler thread:
>> 
>> static struct mbundle *
>> mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle)
>> {
>>   struct mbundle *mbundle;
>> 
>>   HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 0),
>>                            &mbridge->mbundles) {
>>       if (mbundle->ofbundle == ofbundle) {
>>           return mbundle;
>>       }
>>   }
>>   return NULL;
>> }
>> 
>> Call Trace:
>> #0  0x000000000044d838 in mbundle_lookup (mbridge=0x7fbf68000cc0, ofbundle=0x7fbf7007c3d0) at ofproto/ofproto_dpif_mirror.c:472
>> #1  0x000000000044da15 in mirror_bundle_out (mbridge=<optimized out>, ofbundle=<optimized out>) at ofproto/ofproto_dpif_mirror.c:192
>> #2  0x0000000000448658 in xbundle_mirror_out (xbridge=0x7fbf5c6468a0, xbundle=0x7fbf3d48a160) at ofproto/ofproto_dpif_xlate.c:1556
>> #3  xlate_normal_flood (ctx=ctx at entry=0x7fbf7729e3d0, in_xbundle=in_xbundle at entry=0x7fbf5c22f870, vlan=vlan at entry=100) at ofproto/ofproto_dpif_xlate.c:2525
>> #4  0x0000000000448e7e in xlate_normal (ctx=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:2724
>> #5 xlate_output_action (ctx=ctx at entry=0x7fbf7729e3d0, port=<optimized out>, max_len=<optimized out>, may_packet_in=may_packet_in at entry=true) at ofproto/ofproto_dpif_xlate.c:4061
>> #6 0x0000000000445147 in do_xlate_actions (ofpacts=0x6eb7288, ofpacts_len=16, ctx=ctx at entry=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:4616
>> #7 0x0000000000446481 in xlate_recursively (rule=0x6eb7100, ctx=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:3445
>> #8 xlate_table_action (ctx=0x7fbf7729e3d0, in_port=<optimized out>, table_id=<optimized out>, may_packet_in=<optimized out>, honor_table_miss=<optimized out>) at ofproto/ofproto_dpif_xlate.c:3513
>> #9 0x00000000004475a2 in compose_output_action__ (ctx=ctx at entry=0x7fbf7729e3d0, ofp_port=<optimized out>, xr=<optimized out>, check_stp=check_stp at entry=true) at ofproto/ofproto_dpif_xlate.c:3206
>> #10 0x0000000000447bbf in compose_output_action (xr=<optimized out>, ofp_port=<optimized out>, ctx=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:3426
>> #11 output_normal (ctx=ctx at entry=0x7fbf7729e3d0, out_xbundle=out_xbundle at entry=0x7fbf5cdf4aa0, vlan=vlan at entry=0) at ofproto/ofproto_dpif_xlate.c:2073
>> #12 0x00000000004486ae in xlate_normal_flood (ctx=ctx at entry=0x7fbf7729e3d0, in_xbundle=in_xbundle at entry=0x7fbf5e1d44d0, vlan=vlan at entry=0) at ofproto/ofproto_dpif_xlate.c:2529
>> #13 0x0000000000448e7e in xlate_normal (ctx=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:2724
>> #14 xlate_output_action (ctx=ctx at entry=0x7fbf7729e3d0, port=<optimized out>, max_len=<optimized out>, may_packet_in=may_packet_in at entry=true) at ofproto/ofproto_dpif_xlate.c:4061
>> #15 0x0000000000445147 in do_xlate_actions (ofpacts=ofpacts at entry=0x7fbf70005ae8, ofpacts_len=ofpacts_len at entry=8, ctx=ctx at entry=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:4616
>> #16 0x000000000044a739 in xlate_actions (xin=xin at entry=0x7fbf7729f570, xout=xout at entry=0x7fbf772c0b18) at ofproto/ofproto_dpif_xlate.c:5509
>> #17 0x000000000043e4b6 in upcall_xlate (wc=0x7fbf772c0b70, odp_actions=0x7fbf772c0b30, upcall=0x7fbf772c0ac0, udpif=0x6e164a0) at ofproto/ofproto_dpif_upcall.c:1082
>> #18 process_upcall (udpif=udpif at entry=0x6e164a0, upcall=upcall at entry=0x7fbf772c0ac0, odp_actions=odp_actions at entry=0x7fbf772c0b30, wc=wc at entry=0x7fbf772c0b70) at ofproto/ofproto_dpif_upcall.c:1220
>> #19 0x00000000004407d3 in recv_upcalls (handler=0x7fbf58944810, handler=0x7fbf58944810) at ofproto/ofproto_dpif_upcall.c:784
>> #20 0x0000000000440cca in udpif_upcall_handler (arg=0x7fbf58944810) at ofproto/ofproto_dpif_upcall.c:701
>> #21 0x00000000004c95e4 in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs_thread.c:649
>> #22 0x00007fbf7aeaedc5 in start_thread () from /usr/lib64/libpthread.so.0
>> #23 0x00007fbf79a5e71d in clone () from /usr/lib64/libc.so.6
>> 
>> #0  0x000000000044d838 in mbundle_lookup (mbridge=0x7fbf68000cc0, ofbundle=0x7fbf7007c3d0)
>> 472	        if (mbundle->ofbundle == ofbundle) {
>> (gdb) p mbundle
>> $1 = (struct mbundle *) 0x31
>> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev



More information about the dev mailing list