[ovs-dev] [PATCH V3] ofproto-dpif-monitor: Fix deadlock.

Alex Wang alexw at nicira.com
Tue Apr 29 18:06:29 UTC 2014


Commit 6b59b543 (ovs-thread: Use fair (but nonrecursive)
rwlocks on glibc.) changed the rwlocks to nonrecursive,
writer-biased lock.  It also made the following deadlock
possible.

Assume BFD is used on both end of a link.  Consider the
following events:
1. Handler at one end received the BFD control packet with
   POLL flag set while holding the read lock of 'xlate_rwlock'.
   Since a BFD control packet with FINAL flag set should be
   sent back immediately, it calls the
   ofproto_dpif_monitor_port_send_soon(), in which, it tries
   to grab the 'monitor_mutex'.
2. The main thread needs to configure the ofproto-dpif-xlate
   module.  It tries to grab the write lock of 'xlate_rwlock'
   and is blocked by event 1.
3. The monitor thread, after acquired the 'monitor_mutex',
   wants to acquire the read lock of 'xlate_rwlock'.

Since the rwlock is now writer-biased, the attempt of acquiring
read lock in event 3 will be blocked by event 2.  This will
subsequently cause the block of event 1, since monitor thread
is holding the 'monitor_mutex'.  So the deadlock happens.

This commit resolves the above issue by removing the requirement of
acquiring 'monitor_mutex' in ofproto_dpif_monitor_port_send_soon().

Signed-off-by: Alex Wang <alexw at nicira.com>

---
V2 -> V3:
- Rebase.

PATCH -> V2:
- Figured out how to implement the compound designated initializer
  for guarded-list.
- Refine comments.
---
 lib/guarded-list.h             |    5 ++
 ofproto/ofproto-dpif-monitor.c |  135 +++++++++++++++++++++++++++-------------
 ofproto/ofproto-dpif-monitor.h |    1 -
 ofproto/ofproto-dpif-xlate.c   |    6 +-
 4 files changed, 97 insertions(+), 50 deletions(-)

diff --git a/lib/guarded-list.h b/lib/guarded-list.h
index 625865d..df17cbc 100644
--- a/lib/guarded-list.h
+++ b/lib/guarded-list.h
@@ -28,6 +28,11 @@ struct guarded_list {
     size_t n;
 };
 
+#define GUARDED_LIST_INITIALIZER(LIST) { \
+    .mutex = OVS_MUTEX_INITIALIZER, \
+    .list = LIST_INITIALIZER(&((LIST)->list)), \
+    .n = 0 }
+
 void guarded_list_init(struct guarded_list *);
 void guarded_list_destroy(struct guarded_list *);
 
diff --git a/ofproto/ofproto-dpif-monitor.c b/ofproto/ofproto-dpif-monitor.c
index 7c84e4b..764c7b1 100644
--- a/ofproto/ofproto-dpif-monitor.c
+++ b/ofproto/ofproto-dpif-monitor.c
@@ -21,6 +21,7 @@
 
 #include "bfd.h"
 #include "cfm.h"
+#include "guarded-list.h"
 #include "hash.h"
 #include "heap.h"
 #include "hmap.h"
@@ -52,12 +53,24 @@ struct mport {
     uint8_t hw_addr[OFP_ETH_ALEN];    /* Hardware address. */
 };
 
+/* Entry of the 'send_soon' list.  Contains the pointer to the
+ * 'ofport_dpif'.  Note, the pointed object is not protected, so
+ * users should always use the mport_find() to convert it to 'mport'. */
+struct send_soon_entry {
+    struct list list_node;            /* In send_soon. */
+    const struct ofport_dpif *ofport;
+};
+
 /* hmap that contains "struct mport"s. */
 static struct hmap monitor_hmap = HMAP_INITIALIZER(&monitor_hmap);
 
 /* heap for ordering mport based on bfd/cfm wakeup time. */
 static struct heap monitor_heap;
 
+/* guarded-list for storing the mports that need to send bfd/cfm control
+ * packet soon. */
+static struct guarded_list send_soon = GUARDED_LIST_INITIALIZER(&send_soon);
+
 /* The monitor thread id. */
 static pthread_t monitor_tid;
 /* True if the monitor thread is running. */
@@ -67,7 +80,9 @@ static struct latch monitor_exit_latch;
 static struct ovs_mutex monitor_mutex = OVS_MUTEX_INITIALIZER;
 
 static void *monitor_main(void *);
+static void monitor_check_send_soon(struct ofpbuf *);
 static void monitor_run(void);
+static void monitor_mport_run(struct mport *, struct ofpbuf *);
 
 static void mport_register(const struct ofport_dpif *, struct bfd *,
                            struct cfm *, uint8_t[ETH_ADDR_LEN])
@@ -172,9 +187,8 @@ monitor_main(void * args OVS_UNUSED)
  * reconfigured monitoring ports are run in a timely manner. */
 #define MONITOR_INTERVAL_MSEC 100
 
-/* Checks the sending of control packets on mports that have timed out.
- * Sends the control packets if needed.  Executes bfd and cfm periodic
- * functions (run, wait) on those mports. */
+/* Checks the 'send_soon' list and the heap for mports that have timed
+ * out bfd/cfm sessions. */
 static void
 monitor_run(void)
 {
@@ -184,39 +198,26 @@ monitor_run(void)
 
     ofpbuf_use_stub(&packet, stub, sizeof stub);
     ovs_mutex_lock(&monitor_mutex);
+
+    /* The monitor_check_send_soon() needs to be run twice.  The first
+     * time is for preventing the same 'mport' from being processed twice
+     * (i.e. once from heap, the other from the 'send_soon' array).
+     * The second run is to cover the case when the control packet is sent
+     * via patch port and the other end needs to send back immediately. */
+    monitor_check_send_soon(&packet);
+
     prio_now = MSEC_TO_PRIO(time_msec());
     /* Peeks the top of heap and checks if we should run this mport. */
     while (!heap_is_empty(&monitor_heap)
            && heap_max(&monitor_heap)->priority >= prio_now) {
-        long long int next_wake_time;
         struct mport *mport;
 
         mport = CONTAINER_OF(heap_max(&monitor_heap), struct mport, heap_node);
-        if (mport->cfm && cfm_should_send_ccm(mport->cfm)) {
-            ofpbuf_clear(&packet);
-            cfm_compose_ccm(mport->cfm, &packet, mport->hw_addr);
-            ofproto_dpif_send_packet(mport->ofport, &packet);
-        }
-        if (mport->bfd && bfd_should_send_packet(mport->bfd)) {
-            ofpbuf_clear(&packet);
-            bfd_put_packet(mport->bfd, &packet, mport->hw_addr);
-            ofproto_dpif_send_packet(mport->ofport, &packet);
-        }
-        if (mport->cfm) {
-            cfm_run(mport->cfm);
-            cfm_wait(mport->cfm);
-        }
-        if (mport->bfd) {
-            bfd_run(mport->bfd);
-            bfd_wait(mport->bfd);
-        }
-        /* Computes the next wakeup time for this mport. */
-        next_wake_time = MIN(bfd_wake_time(mport->bfd),
-                             cfm_wake_time(mport->cfm));
-        heap_change(&monitor_heap, &mport->heap_node,
-                    MSEC_TO_PRIO(next_wake_time));
+        monitor_mport_run(mport, &packet);
     }
 
+    monitor_check_send_soon(&packet);
+
     /* Waits on the earliest next wakeup time. */
     if (!heap_is_empty(&monitor_heap)) {
         long long int next_timeout, next_mport_wakeup;
@@ -228,6 +229,61 @@ monitor_run(void)
     ovs_mutex_unlock(&monitor_mutex);
     ofpbuf_uninit(&packet);
 }
+
+/* Checks the 'send_soon' list for any mport that needs to send cfm/bfd
+ * control packet immediately, and calls monitor_mport_run(). */
+static void
+monitor_check_send_soon(struct ofpbuf *packet)
+    OVS_REQUIRES(monitor_mutex)
+{
+    while (!guarded_list_is_empty(&send_soon)) {
+        struct send_soon_entry *entry;
+        struct mport *mport;
+
+        entry = CONTAINER_OF(guarded_list_pop_front(&send_soon),
+                             struct send_soon_entry, list_node);
+        mport = mport_find(entry->ofport);
+        if (mport) {
+            monitor_mport_run(mport, packet);
+        }
+        free(entry);
+    }
+}
+
+/* Checks the sending of control packet on 'mport'.  Sends the control
+ * packet if needed.  Executes bfd and cfm periodic functions (run, wait)
+ * on 'mport'.  And changes the location of 'mport' in heap based on next
+ * timeout. */
+static void
+monitor_mport_run(struct mport *mport, struct ofpbuf *packet)
+    OVS_REQUIRES(monitor_mutex)
+{
+    long long int next_wake_time;
+
+    if (mport->cfm && cfm_should_send_ccm(mport->cfm)) {
+        ofpbuf_clear(packet);
+        cfm_compose_ccm(mport->cfm, packet, mport->hw_addr);
+        ofproto_dpif_send_packet(mport->ofport, packet);
+    }
+    if (mport->bfd && bfd_should_send_packet(mport->bfd)) {
+        ofpbuf_clear(packet);
+        bfd_put_packet(mport->bfd, packet, mport->hw_addr);
+        ofproto_dpif_send_packet(mport->ofport, packet);
+    }
+    if (mport->cfm) {
+        cfm_run(mport->cfm);
+        cfm_wait(mport->cfm);
+    }
+    if (mport->bfd) {
+        bfd_run(mport->bfd);
+        bfd_wait(mport->bfd);
+    }
+    /* Computes the next wakeup time for this mport. */
+    next_wake_time = MIN(bfd_wake_time(mport->bfd),
+                         cfm_wake_time(mport->cfm));
+    heap_change(&monitor_heap, &mport->heap_node,
+                MSEC_TO_PRIO(next_wake_time));
+}
 
 
 /* Creates the mport in monitor module if either bfd or cfm
@@ -262,25 +318,16 @@ ofproto_dpif_monitor_port_update(const struct ofport_dpif *ofport,
     }
 }
 
-/* Moves the mport on top of the heap.  This is necessary when
- * for example, bfd POLL is received and the mport should
- * immediately send FINAL back. */
-void
-ofproto_dpif_monitor_port_send_soon_safe(const struct ofport_dpif *ofport)
-{
-    ovs_mutex_lock(&monitor_mutex);
-    ofproto_dpif_monitor_port_send_soon(ofport);
-    ovs_mutex_unlock(&monitor_mutex);
-}
-
+/* Registers the 'ofport' in the 'send_soon' list.  We cannot directly
+ * insert the corresponding mport to the 'send_soon' list, since the
+ * 'send_soon' list is not updated when the mport is removed.
+ *
+ * Reader of the 'send_soon' list is responsible for freeing the entry. */
 void
 ofproto_dpif_monitor_port_send_soon(const struct ofport_dpif *ofport)
-    OVS_REQUIRES(monitor_mutex)
 {
-    struct mport *mport;
+    struct send_soon_entry *entry = xzalloc(sizeof *entry);
+    entry->ofport = ofport;
 
-    mport = mport_find(ofport);
-    if (mport) {
-        heap_change(&monitor_heap, &mport->heap_node, LLONG_MAX);
-    }
+    guarded_list_push_back(&send_soon, &entry->list_node, SIZE_MAX);
 }
diff --git a/ofproto/ofproto-dpif-monitor.h b/ofproto/ofproto-dpif-monitor.h
index 12aa2bb..2c466c2 100644
--- a/ofproto/ofproto-dpif-monitor.h
+++ b/ofproto/ofproto-dpif-monitor.h
@@ -25,7 +25,6 @@ struct cfm;
 struct ofport_dpif;
 
 void ofproto_dpif_monitor_port_send_soon(const struct ofport_dpif *);
-void ofproto_dpif_monitor_port_send_soon_safe(const struct ofport_dpif *);
 
 void ofproto_dpif_monitor_port_update(const struct ofport_dpif *,
                                       struct bfd *, struct cfm *,
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 4a9ba52..ffb22d6 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -1773,11 +1773,7 @@ process_special(struct xlate_ctx *ctx, const struct flow *flow,
             bfd_process_packet(xport->bfd, flow, packet);
             /* If POLL received, immediately sends FINAL back. */
             if (bfd_should_send_packet(xport->bfd)) {
-                if (xport->peer) {
-                    ofproto_dpif_monitor_port_send_soon(xport->ofport);
-                } else {
-                    ofproto_dpif_monitor_port_send_soon_safe(xport->ofport);
-                }
+                ofproto_dpif_monitor_port_send_soon(xport->ofport);
             }
         }
         return SLOW_BFD;
-- 
1.7.9.5




More information about the dev mailing list