[ovs-dev] [PATCH ovn] Fix ha chassis failover issues for stale ha chassis entries
numans at ovn.org
numans at ovn.org
Tue Nov 5 18:09:59 UTC 2019
From: Numan Siddique <numans at ovn.org>
If ha chassis rows of an HA chassis group become stale i.e the HA_Chassis.chassis
column is empty (because ovn-controller is not running in that chassis)
except one row and when ha_chassis_group_is_active()
is called on that ovn-controller, then it returns false. Ideally it should
become active since its the only active chassis. This patch fixes this issue.
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1762777
Reported-by: Daniel Alvarez <dalvarez at redhat.com>
Signed-off-by: Numan Siddique <numans at ovn.org>
---
controller/ha-chassis.c | 22 ++++++++++++++++++++++
tests/ovn.at | 20 +++++++++++++++++++-
2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/controller/ha-chassis.c b/controller/ha-chassis.c
index 6d9426a5c..46d5b50fc 100644
--- a/controller/ha-chassis.c
+++ b/controller/ha-chassis.c
@@ -142,6 +142,20 @@ ha_chassis_destroy_ordered(struct ha_chassis_ordered *ordered_ha_ch)
}
}
+/* Returns the number of ha_chassis whose chassis column is
+ * set. */
+static size_t
+get_active_ha_chassis(const struct sbrec_ha_chassis_group *ha_ch_grp)
+{
+ size_t n_active_ha_chassis = 0;
+ for (size_t i = 0; i < ha_ch_grp->n_ha_chassis; i++) {
+ if (ha_ch_grp->ha_chassis[i]->chassis) {
+ n_active_ha_chassis++;
+ }
+ }
+
+ return n_active_ha_chassis;
+}
/* Returns true if the local_chassis is the master of
* the HA chassis group, false otherwise. */
@@ -159,6 +173,14 @@ ha_chassis_group_is_active(
return (ha_ch_grp->ha_chassis[0]->chassis == local_chassis);
}
+ if (get_active_ha_chassis(ha_ch_grp) == 1) {
+ /* Only 1 ha_chassis of this chassis group has the 'chasss'
+ * column set, which can only be true for this 'local_chassis'.
+ * So return true - as the local_chassis can be the master of
+ * this HA chassis group. */
+ return true;
+ }
+
if (sset_is_empty(active_tunnels)) {
/* If active tunnel sset is empty, it means it has lost
* connectivity with other chassis. */
diff --git a/tests/ovn.at b/tests/ovn.at
index 410f4b514..cb7903db8 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -13413,7 +13413,25 @@ OVS_WAIT_UNTIL(
logical_port=ls1-lp_ext1`
test "$chassis" = "$hv1_uuid"])
-OVN_CLEANUP([hv1],[hv2],[hv3])
+# Stop ovn-controllers on hv1 and hv3.
+as hv1 ovn-appctl -t ovn-controller exit
+as hv3 ovn-appctl -t ovn-controller exit
+
+# hv2 should be master and claim ls1-lp_ext1
+OVS_WAIT_UNTIL(
+ [chassis=`ovn-sbctl --bare --columns chassis find port_binding \
+logical_port=ls1-lp_ext1`
+ test "$chassis" = "$hv2_uuid"])
+
+as hv1
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as hv3
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+OVN_CLEANUP([hv2])
AT_CLEANUP
AT_SETUP([ovn -- Address Set Incremental Processing])
--
2.23.0
More information about the dev
mailing list