[ovs-dev] [PATCH] ovsdb-idl: Prevent occasional hang when multiple database clients race.

Ben Pfaff blp at nicira.com
Fri Oct 28 18:09:23 UTC 2011


When a client of the IDL tries to commit a read-modify-write transaction
but the database has changed in the meantime, the IDL tells its client to
wait for the IDL to change and then try the transaction again by returning
TXN_TRY_AGAIN.  The "wait for the IDL to change" part is important because
there's no point in retrying the transaction before the IDL has received
the database updates (the transaction would fail in the same way all over
again).

However, the logic was incomplete: the database update can be received
*before* the reply to the transaction RPC (I think that in the current
ovsdb-server implementation this will always happen, in fact).  When this
happens, the right thing to do is to retry the transaction immediately;
if we wait, then we're waiting for an additional change to the database
that may never come, causing an indefinite hang.

This commit therefore breaks the "try again" IDL commit status code
into two, one that means "try again immediately" and another that means
"wait for a change then try again".  When an update is processed after a
transaction is committed but before the reply is received, the "try again
now" tells the IDL client not to wait for another database change before
retrying its transaction.

Bug #5980.
Reported-by: Ram Jothikumar <rjothikumar at nicira.com>
Reproduced-by: Alex Yip <alex at nicira.com>
---
This also belongs on branch-1.2 and branch-1.3 after it's merged.

 lib/ovsdb-idl.c       |   20 ++++++++++++++------
 lib/ovsdb-idl.h       |    6 ++++--
 python/ovs/db/idl.py  |   21 +++++++++++++++------
 tests/ovsdb-idl.at    |    2 +-
 utilities/ovs-vsctl.c |   26 +++++++++++++++++---------
 5 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 56b4328..439a40a 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -91,6 +91,7 @@ struct ovsdb_idl_txn {
     char *error;
     bool dry_run;
     struct ds comment;
+    unsigned int commit_seqno;
 
     /* Increments. */
     char *inc_table;
@@ -1173,8 +1174,10 @@ ovsdb_idl_txn_status_to_string(enum ovsdb_idl_txn_status status)
         return "aborted";
     case TXN_SUCCESS:
         return "success";
-    case TXN_TRY_AGAIN:
-        return "try again";
+    case TXN_AGAIN_WAIT:
+        return "wait then try again";
+    case TXN_AGAIN_NOW:
+        return "try again now";
     case TXN_NOT_LOCKED:
         return "not locked";
     case TXN_ERROR:
@@ -1197,6 +1200,7 @@ ovsdb_idl_txn_create(struct ovsdb_idl *idl)
     txn->error = NULL;
     txn->dry_run = false;
     ds_init(&txn->comment);
+    txn->commit_seqno = txn->idl->change_seqno;
 
     txn->inc_table = NULL;
     txn->inc_column = NULL;
@@ -1587,7 +1591,7 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn)
                     json_hash(txn->request_id, 0));
         txn->status = TXN_INCOMPLETE;
     } else {
-        txn->status = TXN_TRY_AGAIN;
+        txn->status = TXN_AGAIN_WAIT;
     }
 
     ovsdb_idl_txn_disassemble(txn);
@@ -1762,7 +1766,9 @@ ovsdb_idl_txn_write(const struct ovsdb_idl_row *row_,
  * prerequisite to completing the transaction.  That is, if 'column' in 'row_'
  * changed (or if 'row_' was deleted) between the time that the IDL originally
  * read its contents and the time that the transaction commits, then the
- * transaction aborts and ovsdb_idl_txn_commit() returns TXN_TRY_AGAIN.
+ * transaction aborts and ovsdb_idl_txn_commit() returns TXN_AGAIN_WAIT or
+ * TXN_AGAIN_NOW (depending on whether the database change has already been
+ * received).
  *
  * The intention is that, to ensure that no transaction commits based on dirty
  * reads, an application should call ovsdb_idl_txn_verify() on each data item
@@ -1890,7 +1896,7 @@ ovsdb_idl_txn_abort_all(struct ovsdb_idl *idl)
     struct ovsdb_idl_txn *txn;
 
     HMAP_FOR_EACH (txn, hmap_node, &idl->outstanding_txns) {
-        ovsdb_idl_txn_complete(txn, TXN_TRY_AGAIN);
+        ovsdb_idl_txn_complete(txn, TXN_AGAIN_WAIT);
     }
 }
 
@@ -2091,7 +2097,9 @@ ovsdb_idl_txn_process_reply(struct ovsdb_idl *idl,
 
         status = (hard_errors ? TXN_ERROR
                   : lock_errors ? TXN_NOT_LOCKED
-                  : soft_errors ? TXN_TRY_AGAIN
+                  : soft_errors ? (txn->commit_seqno == idl->change_seqno
+                                   ? TXN_AGAIN_WAIT
+                                   : TXN_AGAIN_NOW)
                   : TXN_SUCCESS);
     }
 
diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h
index e7ae6f1..320a1ef 100644
--- a/lib/ovsdb-idl.h
+++ b/lib/ovsdb-idl.h
@@ -120,9 +120,11 @@ enum ovsdb_idl_txn_status {
     TXN_INCOMPLETE,             /* Commit in progress, please wait. */
     TXN_ABORTED,                /* ovsdb_idl_txn_abort() called. */
     TXN_SUCCESS,                /* Commit successful. */
-    TXN_TRY_AGAIN,              /* Commit failed because a "verify" operation
+    TXN_AGAIN_WAIT,             /* Commit failed because a "verify" operation
                                  * reported an inconsistency, due to a network
-                                 * problem, or other transient failure. */
+                                 * problem, or other transient failure.  Wait
+                                 * for a change, then try again. */
+    TXN_AGAIN_NOW,              /* Same as above but try again immediately. */
     TXN_NOT_LOCKED,             /* Server hasn't given us the lock yet. */
     TXN_ERROR                   /* Commit failed due to a hard error. */
 };
diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
index cd72fe4..9c637bf 100644
--- a/python/ovs/db/idl.py
+++ b/python/ovs/db/idl.py
@@ -441,7 +441,7 @@ class Idl:
     def __txn_abort_all(self):
         while self._outstanding_txns:
             txn = self._outstanding_txns.popitem()[1]
-            txn._status = Transaction.TRY_AGAIN
+            txn._status = Transaction.AGAIN_
 
     def __txn_process_reply(self, msg):
         txn = self._outstanding_txns.pop(msg.id, None)
@@ -561,7 +561,9 @@ class Row(object):
         if 'column_name' changed in this row (or if this row was deleted)
         between the time that the IDL originally read its contents and the time
         that the transaction commits, then the transaction aborts and
-        Transaction.commit() returns Transaction.TRY_AGAIN.
+        Transaction.commit() returns Transaction.AGAIN_WAIT or
+        Transaction.AGAIN_NOW (depending on whether the database change has
+        already been received).
 
         The intention is that, to ensure that no transaction commits based on
         dirty reads, an application should call Row.verify() on each data item
@@ -620,9 +622,12 @@ class Transaction(object):
     INCOMPLETE = "incomplete"    # Commit in progress, please wait.
     ABORTED = "aborted"          # ovsdb_idl_txn_abort() called.
     SUCCESS = "success"          # Commit successful.
-    TRY_AGAIN = "try again"      # Commit failed because a "verify" operation
+    AGAIN_WAIT = "wait then try again"
+                                 # Commit failed because a "verify" operation
                                  # reported an inconsistency, due to a network
-                                 # problem, or other transient failure.
+                                 # problem, or other transient failure.  Wait
+                                 # for a change, then try again.
+    AGAIN_NOW = "try again now"  # Same as AGAIN_WAIT but try again right away.
     NOT_LOCKED = "not locked"    # Server hasn't given us the lock yet.
     ERROR = "error"              # Commit failed due to a hard error.
 
@@ -657,6 +662,7 @@ class Transaction(object):
         self._status = Transaction.UNCOMMITTED
         self._error = None
         self._comments = []
+        self._commit_seqno = self.idl.change_seqno
 
         self._inc_table = None
         self._inc_column = None
@@ -827,7 +833,7 @@ class Transaction(object):
                 self.idl._outstanding_txns[self._request_id] = self
                 self._status = Transaction.INCOMPLETE
             else:
-                self._status = Transaction.TRY_AGAIN
+                self._status = Transaction.AGAIN_WAIT
 
         self.__disassemble()
         return self._status
@@ -982,7 +988,10 @@ class Transaction(object):
             elif lock_errors:
                 self._status = Transaction.NOT_LOCKED
             elif soft_errors:
-                self._status = Transaction.TRY_AGAIN
+                if self._commit_seqno == self.idl.change_seqno:
+                    self._status = Transaction.AGAIN_WAIT
+                else:
+                    self._status = Transaction.AGAIN_NOW
             else:
                 self._status = Transaction.SUCCESS
 
diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at
index cce6197..a8ce3ca 100644
--- a/tests/ovsdb-idl.at
+++ b/tests/ovsdb-idl.at
@@ -221,7 +221,7 @@ OVSDB_CHECK_IDL([simple idl, handling verification failure],
 000: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
 001: commit, status=success
 002: {"error":null,"result":[{"count":1}]}
-003: commit, status=try again
+003: commit, status=try again now
 004: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
 004: i=1 r=5 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
 005: commit, status=success
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index a8b1011..1f0f485 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -138,9 +138,9 @@ static void parse_command(int argc, char *argv[], struct vsctl_command *);
 static const struct vsctl_command_syntax *find_command(const char *name);
 static void run_prerequisites(struct vsctl_command[], size_t n_commands,
                               struct ovsdb_idl *);
-static void do_vsctl(const char *args,
-                     struct vsctl_command *, size_t n_commands,
-                     struct ovsdb_idl *);
+static enum ovsdb_idl_txn_status do_vsctl(const char *args,
+                                          struct vsctl_command *, size_t n,
+                                          struct ovsdb_idl *);
 
 static const struct vsctl_table_class *get_table(const char *table_name);
 static void set_column(const struct vsctl_table_class *,
@@ -156,6 +156,7 @@ int
 main(int argc, char *argv[])
 {
     extern struct vlog_module VLM_reconnect;
+    enum ovsdb_idl_txn_status status;
     struct ovsdb_idl *idl;
     struct vsctl_command *commands;
     size_t n_commands;
@@ -184,13 +185,16 @@ main(int argc, char *argv[])
     run_prerequisites(commands, n_commands, idl);
 
     /* Now execute the commands. */
+    status = TXN_AGAIN_WAIT;
     for (;;) {
-        if (ovsdb_idl_run(idl)) {
-            do_vsctl(args, commands, n_commands, idl);
+        if (ovsdb_idl_run(idl) || status == TXN_AGAIN_NOW) {
+            status = do_vsctl(args, commands, n_commands, idl);
         }
 
-        ovsdb_idl_wait(idl);
-        poll_block();
+        if (status != TXN_AGAIN_NOW) {
+            ovsdb_idl_wait(idl);
+            poll_block();
+        }
     }
 }
 
@@ -3578,7 +3582,7 @@ run_prerequisites(struct vsctl_command *commands, size_t n_commands,
     }
 }
 
-static void
+static enum ovsdb_idl_txn_status
 do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
          struct ovsdb_idl *idl)
 {
@@ -3625,6 +3629,7 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
         vsctl_context_done(&ctx, c);
 
         if (ctx.try_again) {
+            status = TXN_AGAIN_WAIT;
             goto try_again;
         }
     }
@@ -3681,7 +3686,8 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
     case TXN_SUCCESS:
         break;
 
-    case TXN_TRY_AGAIN:
+    case TXN_AGAIN_WAIT:
+    case TXN_AGAIN_NOW:
         goto try_again;
 
     case TXN_ERROR:
@@ -3765,6 +3771,8 @@ try_again:
         free(c->table);
     }
     free(error);
+
+    return status;
 }
 
 static const struct vsctl_command_syntax all_commands[] = {
-- 
1.7.4.4




More information about the dev mailing list