[ovs-dev] [PATCH v2 4/4] raft.c: Set candidate_retrying if no leader elected since last election.
Han Zhou
zhouhan at gmail.com
Wed Aug 14 22:37:03 UTC 2019
From: Han Zhou <hzhou8 at ebay.com>
candiate_retrying is used to determine if the current node is disconnected
from the cluster when the node is in candiate role. However, a node
can flap between candidate and follower role before a leader is elected
when majority of the cluster is down, so is_connected() will flap, too, which
confuses clients.
This patch avoids the flapping with the help of a new member had_leader,
so that if no leader was elected since last election, we know we are
still retrying, and keep as disconnected from the cluster.
Signed-off-by: Han Zhou <hzhou8 at ebay.com>
---
v1 -> v2: Fixed the condition in raft_is_connected(), and added a test case.
Moved this patch from 3/4 to 4/4 in the series.
ovsdb/raft.c | 29 ++++++++++++++++++-----
tests/ovsdb-cluster.at | 64 ++++++++++++++++++++++++++++++++++++++++----------
2 files changed, 74 insertions(+), 19 deletions(-)
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 64b8150..c2bd0ed 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -286,8 +286,11 @@ struct raft {
/* Candidates only. Reinitialized at start of election. */
int n_votes; /* Number of votes for me. */
- bool candidate_retrying; /* The first round of election timed-out and it
- is now retrying. */
+ bool candidate_retrying; /* The earlier election timed-out and we are
+ now retrying. */
+ bool had_leader; /* There has been leader elected since last
+ election initiated. This is to help setting
+ candidate_retrying. */
};
/* All Raft structures. */
@@ -345,6 +348,7 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *);
static void raft_run_reconfigure(struct raft *);
+static void raft_set_leader(struct raft *, const struct uuid *sid);
static struct raft_server *
raft_find_server(const struct raft *raft, const struct uuid *sid)
{
@@ -997,11 +1001,13 @@ raft_get_sid(const struct raft *raft)
bool
raft_is_connected(const struct raft *raft)
{
- return (!(raft->role == RAFT_CANDIDATE && raft->candidate_retrying)
+ bool ret = (!raft->candidate_retrying
&& !raft->joining
&& !raft->leaving
&& !raft->left
&& !raft->failed);
+ VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false");
+ return ret;
}
/* Returns true if 'raft' is the cluster leader. */
@@ -1616,8 +1622,11 @@ raft_start_election(struct raft *raft, bool leadership_transfer)
}
ovs_assert(raft->role != RAFT_LEADER);
- raft->candidate_retrying = (raft->role == RAFT_CANDIDATE);
raft->role = RAFT_CANDIDATE;
+ /* If there was no leader elected since last election, we know we are
+ * retrying now. */
+ raft->candidate_retrying = !raft->had_leader;
+ raft->had_leader = false;
raft->n_votes = 0;
@@ -2487,6 +2496,14 @@ raft_server_init_leader(struct raft *raft, struct raft_server *s)
}
static void
+raft_set_leader(struct raft *raft, const struct uuid *sid)
+{
+ raft->leader_sid = *sid;
+ raft->had_leader = true;
+ raft->candidate_retrying = false;
+}
+
+static void
raft_become_leader(struct raft *raft)
{
log_all_commands(raft);
@@ -2498,7 +2515,7 @@ raft_become_leader(struct raft *raft)
ovs_assert(raft->role != RAFT_LEADER);
raft->role = RAFT_LEADER;
- raft->leader_sid = raft->sid;
+ raft_set_leader(raft, &raft->sid);
raft_reset_election_timer(raft);
raft_reset_ping_timer(raft);
@@ -2892,7 +2909,7 @@ raft_update_leader(struct raft *raft, const struct uuid *sid)
raft_get_nickname(raft, sid, buf, sizeof buf),
raft->term);
}
- raft->leader_sid = *sid;
+ raft_set_leader(raft, sid);
/* Record the leader to the log. This is not used by the algorithm
* (although it could be, for quick restart), but it is used for
diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
index 7146fe6..f124244 100644
--- a/tests/ovsdb-cluster.at
+++ b/tests/ovsdb-cluster.at
@@ -66,23 +66,30 @@ EXECUTION_EXAMPLES
AT_BANNER([OVSDB - disconnect from cluster])
OVS_START_SHELL_HELPERS
-# ovsdb_test_cluster_disconnect LEADER_OR_FOLLOWER
+# ovsdb_test_cluster_disconnect N_SERVERS LEADER_OR_FOLLOWER [CHECK_FLAPPING]
+# Test server disconnected from the cluster.
+# N_SERVERS: Number of servers in the cluster.
+# LEADER_OR_FOLLOWER: The role of the server that is disconnected from the
+# cluster: "leader" or "follower".
+# CHECK_FLAPPING: Whether to check if is_disconnected flapped. "yes", "no".
ovsdb_test_cluster_disconnect () {
- leader_or_follower=$1
+ n=$1
+ leader_or_follower=$2
+ check_flapping=$3
schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
ordinal_schema > schema
AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
cid=`ovsdb-tool db-cid s1.db`
schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
- for i in `seq 2 3`; do
+ for i in `seq 2 $n`; do
AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
done
on_exit 'kill `cat *.pid`'
- for i in `seq 3`; do
+ for i in `seq $n`; do
AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
done
- for i in `seq 3`; do
+ for i in `seq $n`; do
AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
done
@@ -96,14 +103,18 @@ ovsdb_test_cluster_disconnect () {
# a VIP on a load-balance. So we use single remote to test here.
if test $leader_or_follower == "leader"; then
target=1
- shutdown="2 3"
+ shutdown=`seq $(($n/2 + 1)) $n`
+ cleanup=`seq $(($n/2))`
else
- target=3
+ target=$n
- # shutdown follower before the leader so that there is no chance for s3
- # become leader during the process.
- shutdown="2 1"
+ # shutdown followers before the leader (s1) so that there is no chance for
+ # s$n to become leader during the process.
+ shutdown="`seq 2 $(($n/2 + 1))` 1"
+ cleanup=`seq $(($n/2 + 2)) $n`
fi
+ echo shutdown=$shutdown
+ echo cleanup=$cleanup
# Connect to $target. Use "wait" to trigger a non-op transaction so
# that test-ovsdb will not quit.
@@ -119,6 +130,11 @@ ovsdb_test_cluster_disconnect () {
OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
+ # Start collecting raft_is_connected logs for $target before shutting down
+ # any servers.
+ tail -f s$target.log > raft_is_connected.log &
+ on_exit 'kill `echo $!`'
+
# Shutdown the other servers so that $target is disconnected from the cluster.
for i in $shutdown; do
OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
@@ -127,18 +143,40 @@ ovsdb_test_cluster_disconnect () {
# The test-ovsdb should detect the disconnect and retry.
OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
- OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$target], [s$target.pid])
+ # The $target debug log should show raft_is_connected: false.
+ OVS_WAIT_UNTIL([grep "raft_is_connected: false" raft_is_connected.log])
+
+ # Save the current count of "raft_is_connected: true"
+ count_old=`grep "raft_is_connected: true" raft_is_connected.log | wc -l`
+ echo count_old $count_old
+
+ if test X$check_flapping == X"yes"; then
+ sleep 10
+ fi
+ # Make sure raft_is_connected didn't flap from false to true.
+ count_new=`grep "raft_is_connected: true" raft_is_connected.log | wc -l`
+ echo count_new $count_new
+ AT_CHECK([test $count_new == $count_old])
+
+ for i in $cleanup; do
+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+ done
}
OVS_END_SHELL_HELPERS
AT_SETUP([OVSDB cluster - follower disconnect from cluster, single remote])
AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
-ovsdb_test_cluster_disconnect follower
+ovsdb_test_cluster_disconnect 3 follower
AT_CLEANUP
AT_SETUP([OVSDB cluster - leader disconnect from cluster, single remote])
AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
-ovsdb_test_cluster_disconnect leader
+ovsdb_test_cluster_disconnect 3 leader
+AT_CLEANUP
+
+AT_SETUP([OVSDB cluster - leader disconnect from cluster, check flapping])
+AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
+ovsdb_test_cluster_disconnect 5 leader yes
AT_CLEANUP
--
2.1.0
More information about the dev
mailing list