[ovs-dev] [PATCH v3 2/5] ovsdb-idl.c: Allows retry even when using a single remote.

Han Zhou zhouhan at gmail.com
Mon Aug 19 16:29:57 UTC 2019


From: Han Zhou <hzhou8 at ebay.com>

When clustered mode is used, the client needs to retry connecting
to new servers when certain failures happen. Today it is allowed to
retry new connection only if multiple remotes are used, which prevents
using LB VIP with clustered nodes. This patch makes sure the retry
logic works when using LB VIP: although same IP is used for retrying,
the LB can actually redirect the connection to a new node.

Signed-off-by: Han Zhou <hzhou8 at ebay.com>
---
v2 -> v3: Minor fix for test case.

 lib/ovsdb-idl.c        | 11 +++-------
 tests/ovsdb-cluster.at | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/test-ovsdb.c     |  1 +
 3 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 1a6a4af..190143f 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -657,12 +657,8 @@ ovsdb_idl_state_to_string(enum ovsdb_idl_state state)
 static void
 ovsdb_idl_retry_at(struct ovsdb_idl *idl, const char *where)
 {
-    if (idl->session && jsonrpc_session_get_n_remotes(idl->session) > 1) {
-        ovsdb_idl_force_reconnect(idl);
-        ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
-    } else {
-        ovsdb_idl_transition_at(idl, IDL_S_ERROR, where);
-    }
+    ovsdb_idl_force_reconnect(idl);
+    ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
 }
 
 static void
@@ -1895,8 +1891,7 @@ ovsdb_idl_check_server_db(struct ovsdb_idl *idl)
     if (!database) {
         VLOG_INFO_RL(&rl, "%s: server does not have %s database",
                      server_name, idl->data.class_->database);
-    } else if (!strcmp(database->model, "clustered")
-               && jsonrpc_session_get_n_remotes(idl->session) > 1) {
+    } else if (!strcmp(database->model, "clustered")) {
         uint64_t index = database->n_index ? *database->index : 0;
 
         if (!database->schema) {
diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
index 4701272..02e9926 100644
--- a/tests/ovsdb-cluster.at
+++ b/tests/ovsdb-cluster.at
@@ -63,6 +63,63 @@ m4_define([OVSDB_CHECK_EXECUTION],
 EXECUTION_EXAMPLES
 
 
+AT_BANNER([OVSDB - disconnect from cluster])
+
+# When a node is disconnected from the cluster, the IDL should disconnect
+# and retry even if it uses a single remote, because the remote IP can be
+# a VIP on a load-balance.
+AT_SETUP([OVSDB cluster - disconnect from cluster, single remote])
+AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
+
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+ordinal_schema > schema
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=`ovsdb-tool db-cid s1.db`
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+for i in `seq 2 3`; do
+    AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill `cat *.pid`'
+for i in `seq 3`; do
+    AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+done
+for i in `seq 3`; do
+    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
+      {"op": "insert",
+       "table": "simple",
+       "row": {"i": 1}}]]'], [0], [ignore], [ignore])
+
+# Connect to a single remote s3.  Use "wait" to trigger a non-op transaction so
+# that test-ovsdb will not quit.
+
+test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl unix:s3.ovsdb '[["idltest",
+      {"op": "wait",
+       "table": "simple",
+       "where": [["i", "==", 1]],
+       "columns": ["i"],
+       "until": "==",
+       "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
+echo $! > test-ovsdb.pid
+
+OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
+
+# Shutdown the other 2 servers so that s3 is disconnected from the cluster.
+for i in 2 1; do
+    OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+done
+
+# The test-ovsdb should detect the disconnect and retry.
+OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
+
+OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s3], [s3.pid])
+
+AT_CLEANUP
+
+
 OVS_START_SHELL_HELPERS
 # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
 ovsdb_cluster_failure_test () {
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index 187eb28..b1a4be3 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -2412,6 +2412,7 @@ do_idl(struct ovs_cmdl_context *ctx)
     track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track;
 
     idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true, true);
+    ovsdb_idl_set_leader_only(idl, false);
     if (ctx->argc > 2) {
         struct stream *stream;
 
-- 
2.1.0



More information about the dev mailing list