[ovs-dev] [PATCH 2/2] rconn: Treat draining a message from the send queue as activity.

Ben Pfaff blp at nicira.com
Mon Aug 6 20:55:04 UTC 2012


Until now, the rconn module has used messages received from the
controller as the sole means to determine that the connection is up.
This can interact badly with the OVS connection manager in ofproto,
which stops reading and processing messages from the receive queue
when there is a backlog in the send queue for a given connection
(because reading and processes messages is the main cause of messages
getting pushed onto the send queue).  So, if a send queue backlog
lasts more than twice the inactivity probe interval, then the
connection drops, whether the controller is sending messages or not.
Dumping a large flow table can trigger this behavior if the controller
becomes temporarily busy or if the network between OVS and a
controller is slow.  The problem can easily repeat itself, since upon
reconnection the controller will generally dump the flow table.

This commit fixes the problem by expanding the definition of
"activity" to include successfully sending an OpenFlow message that
was previously queued.

Reported-by: Natasha Gude <natasha at nicira.com>
Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 lib/rconn.c |   23 ++++++++++++++---------
 1 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/lib/rconn.c b/lib/rconn.c
index a32f042..eae6a7e 100644
--- a/lib/rconn.c
+++ b/lib/rconn.c
@@ -80,7 +80,6 @@ struct rconn {
     int backoff;
     int max_backoff;
     time_t backoff_deadline;
-    time_t last_received;
     time_t last_connected;
     time_t last_disconnected;
     unsigned int packets_sent;
@@ -105,11 +104,15 @@ struct rconn {
     time_t creation_time;
     unsigned long int total_time_connected;
 
-    /* Throughout this file, "probe" is shorthand for "inactivity probe".
-     * When nothing has been received from the peer for a while, we send out
-     * an echo request as an inactivity probe packet.  We should receive back
-     * a response. */
+    /* Throughout this file, "probe" is shorthand for "inactivity probe".  When
+     * no activity has been observed from the peer for a while, we send out an
+     * echo request as an inactivity probe packet.  We should receive back a
+     * response.
+     *
+     * "Activity" is defined as either receiving an OpenFlow message from the
+     * peer or successfully sending a message that had been in 'txq'. */
     int probe_interval;         /* Secs of inactivity before sending probe. */
+    time_t last_activity;       /* Last time we saw some activity. */
 
     /* When we create a vconn we obtain these values, to save them past the end
      * of the vconn's lifetime.  Otherwise, in-band control will only allow
@@ -179,7 +182,6 @@ rconn_create(int probe_interval, int max_backoff, uint8_t dscp)
     rc->backoff = 0;
     rc->max_backoff = max_backoff ? max_backoff : 8;
     rc->backoff_deadline = TIME_MIN;
-    rc->last_received = time_now();
     rc->last_connected = TIME_MIN;
     rc->last_disconnected = TIME_MIN;
     rc->seqno = 0;
@@ -195,6 +197,8 @@ rconn_create(int probe_interval, int max_backoff, uint8_t dscp)
     rc->creation_time = time_now();
     rc->total_time_connected = 0;
 
+    rc->last_activity = time_now();
+
     rconn_set_probe_interval(rc, probe_interval);
     rconn_set_dscp(rc, dscp);
 
@@ -419,6 +423,7 @@ do_tx_work(struct rconn *rc)
         if (error) {
             break;
         }
+        rc->last_activity = time_now();
     }
     if (list_is_empty(&rc->txq)) {
         poll_immediate_wake();
@@ -429,7 +434,7 @@ static unsigned int
 timeout_ACTIVE(const struct rconn *rc)
 {
     if (rc->probe_interval) {
-        unsigned int base = MAX(rc->last_received, rc->state_entered);
+        unsigned int base = MAX(rc->last_activity, rc->state_entered);
         unsigned int arg = base + rc->probe_interval - rc->state_entered;
         return arg;
     }
@@ -440,7 +445,7 @@ static void
 run_ACTIVE(struct rconn *rc)
 {
     if (timed_out(rc)) {
-        unsigned int base = MAX(rc->last_received, rc->state_entered);
+        unsigned int base = MAX(rc->last_activity, rc->state_entered);
         VLOG_DBG("%s: idle %u seconds, sending inactivity probe",
                  rc->name, (unsigned int) (time_now() - base));
 
@@ -543,7 +548,7 @@ rconn_recv(struct rconn *rc)
                 rc->probably_admitted = true;
                 rc->last_admitted = time_now();
             }
-            rc->last_received = time_now();
+            rc->last_activity = time_now();
             rc->packets_received++;
             if (rc->state == S_IDLE) {
                 state_transition(rc, S_ACTIVE);
-- 
1.7.2.5




More information about the dev mailing list