[ovs-dev] [idl 2/4] ovsdb-idl: Simplify transaction retry.
Ethan Jackson
ethan at nicira.com
Wed Apr 4 20:55:10 UTC 2012
Looks good to me, thanks.
Ethan
On Tue, Mar 27, 2012 at 17:00, Ben Pfaff <blp at nicira.com> wrote:
> Originally the IDL transaction state machine had a return value
> TXN_TRY_AGAIN to signal the client to wait for a change in the database and
> then retry its transaction. However, this logic was incomplete, because
> it was possible for the database to change before the reply to the
> transaction RPC was received, in which case the client would wait for a
> further change. Commit 4fdfe5ccf84c (ovsdb-idl: Prevent occasional hang
> when multiple database clients race.) fixed the problem by breaking
> TXN_TRY_AGAIN into two status codes, TXN_AGAIN_WAIT that meant to wait for
> a further change and TXN_AGAIN_NOW that meant that a change had already
> occurred so try again immediately.
>
> This is correct enough, but it is more complicated than necessary. It is
> simpler and just as correct to use a single "try again" status that
> requires the client to wait for a change relative to the database contents
> *before* the transaction was committed. This commit makes that change.
> It also changes ovsdb_idl_run()'s return type from bool to void because
> its return type is hardly useful anymore.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
> lib/ovsdb-idl.c | 46 ++++++++++------------------------------------
> lib/ovsdb-idl.h | 13 ++++++-------
> python/ovs/db/idl.py | 17 +++++------------
> tests/test-ovsdb.c | 14 +++++++++++---
> utilities/ovs-vsctl.c | 35 ++++++++++++++++++++---------------
> vswitchd/bridge.c | 10 +++++++---
> 6 files changed, 59 insertions(+), 76 deletions(-)
>
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index 19ae16f..fdb82ba 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -1,4 +1,4 @@
> -/* Copyright (c) 2009, 2010, 2011 Nicira Networks.
> +/* Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -270,32 +270,12 @@ ovsdb_idl_clear(struct ovsdb_idl *idl)
> }
> }
>
> -/* Processes a batch of messages from the database server on 'idl'. Returns
> - * true if the database as seen through 'idl' changed, false if it did not
> - * change. The initial fetch of the entire contents of the remote database is
> - * considered to be one kind of change. If 'idl' has been configured to
> - * acquire a database lock (with ovsdb_idl_set_lock()), then successfully
> - * acquiring the lock is also considered to be a change.
> - *
> - * When this function returns false, the client may continue to use any data
> - * structures it obtained from 'idl' in the past. But when it returns true,
> - * the client must not access any of these data structures again, because they
> - * could have freed or reused for other purposes.
> - *
> - * This function can return occasional false positives, that is, report that
> - * the database changed even though it didn't. This happens if the connection
> - * to the database drops and reconnects, which causes the database contents to
> - * be reloaded even if they didn't change. (It could also happen if the
> - * database server sends out a "change" that reflects what we already thought
> - * was in the database, but the database server is not supposed to do that.)
> - *
> - * As an alternative to checking the return value, the client may check for
> - * changes in the value returned by ovsdb_idl_get_seqno().
> - */
> -bool
> +/* Processes a batch of messages from the database server on 'idl'. This may
> + * cause the IDL's contents to change. The client may check for that with
> + * ovsdb_idl_get_seqno(). */
> +void
> ovsdb_idl_run(struct ovsdb_idl *idl)
> {
> - unsigned int initial_change_seqno = idl->change_seqno;
> int i;
>
> assert(!idl->txn);
> @@ -366,8 +346,6 @@ ovsdb_idl_run(struct ovsdb_idl *idl)
> }
> jsonrpc_msg_destroy(msg);
> }
> -
> - return initial_change_seqno != idl->change_seqno;
> }
>
> /* Arranges for poll_block() to wake up when ovsdb_idl_run() has something to
> @@ -1179,10 +1157,8 @@ ovsdb_idl_txn_status_to_string(enum ovsdb_idl_txn_status status)
> return "aborted";
> case TXN_SUCCESS:
> return "success";
> - case TXN_AGAIN_WAIT:
> - return "wait then try again";
> - case TXN_AGAIN_NOW:
> - return "try again now";
> + case TXN_TRY_AGAIN:
> + return "try again";
> case TXN_NOT_LOCKED:
> return "not locked";
> case TXN_ERROR:
> @@ -1596,7 +1572,7 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn)
> json_hash(txn->request_id, 0));
> txn->status = TXN_INCOMPLETE;
> } else {
> - txn->status = TXN_AGAIN_WAIT;
> + txn->status = TXN_TRY_AGAIN;
> }
>
> ovsdb_idl_txn_disassemble(txn);
> @@ -1901,7 +1877,7 @@ ovsdb_idl_txn_abort_all(struct ovsdb_idl *idl)
> struct ovsdb_idl_txn *txn;
>
> HMAP_FOR_EACH (txn, hmap_node, &idl->outstanding_txns) {
> - ovsdb_idl_txn_complete(txn, TXN_AGAIN_WAIT);
> + ovsdb_idl_txn_complete(txn, TXN_TRY_AGAIN);
> }
> }
>
> @@ -2102,9 +2078,7 @@ ovsdb_idl_txn_process_reply(struct ovsdb_idl *idl,
>
> status = (hard_errors ? TXN_ERROR
> : lock_errors ? TXN_NOT_LOCKED
> - : soft_errors ? (txn->commit_seqno == idl->change_seqno
> - ? TXN_AGAIN_WAIT
> - : TXN_AGAIN_NOW)
> + : soft_errors ? TXN_TRY_AGAIN
> : TXN_SUCCESS);
> }
>
> diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h
> index 320a1ef..9b49e62 100644
> --- a/lib/ovsdb-idl.h
> +++ b/lib/ovsdb-idl.h
> @@ -1,4 +1,4 @@
> -/* Copyright (c) 2009, 2010, 2011 Nicira Networks.
> +/* Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -47,7 +47,7 @@ struct ovsdb_idl *ovsdb_idl_create(const char *remote,
> bool monitor_everything_by_default);
> void ovsdb_idl_destroy(struct ovsdb_idl *);
>
> -bool ovsdb_idl_run(struct ovsdb_idl *);
> +void ovsdb_idl_run(struct ovsdb_idl *);
> void ovsdb_idl_wait(struct ovsdb_idl *);
>
> void ovsdb_idl_set_lock(struct ovsdb_idl *, const char *lock_name);
> @@ -66,9 +66,9 @@ void ovsdb_idl_force_reconnect(struct ovsdb_idl *);
> * always appear to the client to be the default value for its type.
> *
> * If OVSDB_IDL_MONITOR is set, then the column is replicated. Its value will
> - * reflect the value in the database. If OVSDB_IDL_ALERT is also set, then
> - * ovsdb_idl_run() will return "true", and the value returned by
> - * ovsdb_idl_get_seqno() will change, when the column's value changes.
> + * reflect the value in the database. If OVSDB_IDL_ALERT is also set, then the
> + * value returned by ovsdb_idl_get_seqno() will change when the column's value
> + * changes.
> *
> * The possible mode combinations are:
> *
> @@ -120,11 +120,10 @@ enum ovsdb_idl_txn_status {
> TXN_INCOMPLETE, /* Commit in progress, please wait. */
> TXN_ABORTED, /* ovsdb_idl_txn_abort() called. */
> TXN_SUCCESS, /* Commit successful. */
> - TXN_AGAIN_WAIT, /* Commit failed because a "verify" operation
> + TXN_TRY_AGAIN, /* Commit failed because a "verify" operation
> * reported an inconsistency, due to a network
> * problem, or other transient failure. Wait
> * for a change, then try again. */
> - TXN_AGAIN_NOW, /* Same as above but try again immediately. */
> TXN_NOT_LOCKED, /* Server hasn't given us the lock yet. */
> TXN_ERROR /* Commit failed due to a hard error. */
> };
> diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
> index 5639120..9760fc6 100644
> --- a/python/ovs/db/idl.py
> +++ b/python/ovs/db/idl.py
> @@ -447,7 +447,7 @@ class Idl:
> def __txn_abort_all(self):
> while self._outstanding_txns:
> txn = self._outstanding_txns.popitem()[1]
> - txn._status = Transaction.AGAIN_WAIT
> + txn._status = Transaction.TRY_AGAIN
>
> def __txn_process_reply(self, msg):
> txn = self._outstanding_txns.pop(msg.id, None)
> @@ -567,9 +567,7 @@ class Row(object):
> if 'column_name' changed in this row (or if this row was deleted)
> between the time that the IDL originally read its contents and the time
> that the transaction commits, then the transaction aborts and
> - Transaction.commit() returns Transaction.AGAIN_WAIT or
> - Transaction.AGAIN_NOW (depending on whether the database change has
> - already been received).
> + Transaction.commit() returns Transaction.TRY_AGAIN.
>
> The intention is that, to ensure that no transaction commits based on
> dirty reads, an application should call Row.verify() on each data item
> @@ -628,12 +626,10 @@ class Transaction(object):
> INCOMPLETE = "incomplete" # Commit in progress, please wait.
> ABORTED = "aborted" # ovsdb_idl_txn_abort() called.
> SUCCESS = "success" # Commit successful.
> - AGAIN_WAIT = "wait then try again"
> - # Commit failed because a "verify" operation
> + TRY_AGAIN = "try again" # Commit failed because a "verify" operation
> # reported an inconsistency, due to a network
> # problem, or other transient failure. Wait
> # for a change, then try again.
> - AGAIN_NOW = "try again now" # Same as AGAIN_WAIT but try again right away.
> NOT_LOCKED = "not locked" # Server hasn't given us the lock yet.
> ERROR = "error" # Commit failed due to a hard error.
>
> @@ -839,7 +835,7 @@ class Transaction(object):
> self.idl._outstanding_txns[self._request_id] = self
> self._status = Transaction.INCOMPLETE
> else:
> - self._status = Transaction.AGAIN_WAIT
> + self._status = Transaction.TRY_AGAIN
>
> self.__disassemble()
> return self._status
> @@ -994,10 +990,7 @@ class Transaction(object):
> elif lock_errors:
> self._status = Transaction.NOT_LOCKED
> elif soft_errors:
> - if self._commit_seqno == self.idl.change_seqno:
> - self._status = Transaction.AGAIN_WAIT
> - else:
> - self._status = Transaction.AGAIN_NOW
> + self._status = Transaction.TRY_AGAIN
> else:
> self._status = Transaction.SUCCESS
>
> diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
> index 65d6e5f..cc01bbe 100644
> --- a/tests/test-ovsdb.c
> +++ b/tests/test-ovsdb.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -1893,7 +1893,11 @@ do_idl(int argc, char *argv[])
> arg++;
> } else {
> /* Wait for update. */
> - while (ovsdb_idl_get_seqno(idl) == seqno && !ovsdb_idl_run(idl)) {
> + for (;;) {
> + ovsdb_idl_run(idl);
> + if (ovsdb_idl_get_seqno(idl) != seqno) {
> + break;
> + }
> jsonrpc_run(rpc);
>
> ovsdb_idl_wait(idl);
> @@ -1933,7 +1937,11 @@ do_idl(int argc, char *argv[])
> if (rpc) {
> jsonrpc_close(rpc);
> }
> - while (ovsdb_idl_get_seqno(idl) == seqno && !ovsdb_idl_run(idl)) {
> + for (;;) {
> + ovsdb_idl_run(idl);
> + if (ovsdb_idl_get_seqno(idl) != seqno) {
> + break;
> + }
> ovsdb_idl_wait(idl);
> poll_block();
> }
> diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
> index 0caf57e..9a86f78 100644
> --- a/utilities/ovs-vsctl.c
> +++ b/utilities/ovs-vsctl.c
> @@ -138,9 +138,8 @@ static void parse_command(int argc, char *argv[], struct vsctl_command *);
> static const struct vsctl_command_syntax *find_command(const char *name);
> static void run_prerequisites(struct vsctl_command[], size_t n_commands,
> struct ovsdb_idl *);
> -static enum ovsdb_idl_txn_status do_vsctl(const char *args,
> - struct vsctl_command *, size_t n,
> - struct ovsdb_idl *);
> +static void do_vsctl(const char *args, struct vsctl_command *, size_t n,
> + struct ovsdb_idl *);
>
> static const struct vsctl_table_class *get_table(const char *table_name);
> static void set_column(const struct vsctl_table_class *,
> @@ -156,9 +155,9 @@ int
> main(int argc, char *argv[])
> {
> extern struct vlog_module VLM_reconnect;
> - enum ovsdb_idl_txn_status status;
> struct ovsdb_idl *idl;
> struct vsctl_command *commands;
> + unsigned int seqno;
> size_t n_commands;
> char *args;
>
> @@ -184,14 +183,23 @@ main(int argc, char *argv[])
> idl = the_idl = ovsdb_idl_create(db, &ovsrec_idl_class, false);
> run_prerequisites(commands, n_commands, idl);
>
> - /* Now execute the commands. */
> - status = TXN_AGAIN_WAIT;
> + /* Execute the commands.
> + *
> + * 'seqno' is the database sequence number for which we last tried to
> + * execute our transaction. There's no point in trying to commit more than
> + * once for any given sequence number, because if the transaction fails
> + * it's because the database changed and we need to obtain an up-to-date
> + * view of the database before we try the transaction again. */
> + seqno = ovsdb_idl_get_seqno(idl);
> for (;;) {
> - if (ovsdb_idl_run(idl) || status == TXN_AGAIN_NOW) {
> - status = do_vsctl(args, commands, n_commands, idl);
> + ovsdb_idl_run(idl);
> +
> + if (seqno != ovsdb_idl_get_seqno(idl)) {
> + seqno = ovsdb_idl_get_seqno(idl);
> + do_vsctl(args, commands, n_commands, idl);
> }
>
> - if (status != TXN_AGAIN_NOW) {
> + if (seqno == ovsdb_idl_get_seqno(idl)) {
> ovsdb_idl_wait(idl);
> poll_block();
> }
> @@ -3663,7 +3671,7 @@ run_prerequisites(struct vsctl_command *commands, size_t n_commands,
> }
> }
>
> -static enum ovsdb_idl_txn_status
> +static void
> do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
> struct ovsdb_idl *idl)
> {
> @@ -3710,7 +3718,7 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
> vsctl_context_done(&ctx, c);
>
> if (ctx.try_again) {
> - status = TXN_AGAIN_WAIT;
> + status = TXN_TRY_AGAIN;
> goto try_again;
> }
> }
> @@ -3767,8 +3775,7 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
> case TXN_SUCCESS:
> break;
>
> - case TXN_AGAIN_WAIT:
> - case TXN_AGAIN_NOW:
> + case TXN_TRY_AGAIN:
> goto try_again;
>
> case TXN_ERROR:
> @@ -3852,8 +3859,6 @@ try_again:
> free(c->table);
> }
> free(error);
> -
> - return status;
> }
>
> static const struct vsctl_command_syntax all_commands[] = {
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index e15d57b..354044c 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -129,6 +129,9 @@ static struct hmap all_bridges = HMAP_INITIALIZER(&all_bridges);
> /* OVSDB IDL used to obtain configuration. */
> static struct ovsdb_idl *idl;
>
> +/* Most recently processed IDL sequence number. */
> +static unsigned int idl_seqno;
> +
> /* Each time this timer expires, the bridge fetches systems and interface
> * statistics and pushes them into the database. */
> #define STATS_INTERVAL (5 * 1000) /* In milliseconds. */
> @@ -248,6 +251,7 @@ bridge_init(const char *remote)
> {
> /* Create connection to database. */
> idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true);
> + idl_seqno = ovsdb_idl_get_seqno(idl);
> ovsdb_idl_set_lock(idl, "ovs_vswitchd");
>
> ovsdb_idl_omit_alert(idl, &ovsrec_open_vswitch_col_cur_cfg);
> @@ -1852,11 +1856,10 @@ bridge_run(void)
> const struct ovsrec_open_vswitch *cfg;
>
> bool vlan_splinters_changed;
> - bool database_changed;
> struct bridge *br;
>
> /* (Re)configure if necessary. */
> - database_changed = ovsdb_idl_run(idl);
> + ovsdb_idl_run(idl);
> if (ovsdb_idl_is_lock_contended(idl)) {
> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> struct bridge *br, *next_br;
> @@ -1903,7 +1906,8 @@ bridge_run(void)
> }
> }
>
> - if (database_changed || vlan_splinters_changed) {
> + if (ovsdb_idl_get_seqno(idl) != idl_seqno || vlan_splinters_changed) {
> + idl_seqno = ovsdb_idl_get_seqno(idl);
> if (cfg) {
> struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl);
>
> --
> 1.7.2.5
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list