[ovs-dev] [PATCH] ovn-northd: Fix the HA_Chassis sync issue in OVN SB DB
nusiddiq at redhat.com
nusiddiq at redhat.com
Thu Apr 25 19:01:39 UTC 2019
From: Numan Siddique <nusiddiq at redhat.com>
ovn-northd deletes and recreates HA_Chassis rows (which belong
to a HA_Chassis_Group) whenever the HA_Chassis_Group/Gateway_Chassis
rows in Northbound DB are out of sync. If a Chassis table row in
Southbound DB is deleted and if this row is referenced by HA_Chassis
row (in Southbound DB), then the present code syncs the HA_Chassis
rows continously and this causes the ovn-controller's to wake up
and results in 100% cpu usage.
This was a simple case which the commit
1be1e0e5e0d1 ("ovn: Add generic HA chassis group") missed out addressing.
This patch fixes this issue.
Fixes: 1be1e0e5e0d1 ("ovn: Add generic HA chassis group")
Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2019-April/048580.html
Reported-by: Daniel Alvarez Sanchez (dalvarez at redhat.com)
Signed-off-by: Numan Siddique <nusiddiq at redhat.com>
---
ovn/northd/ovn-northd.c | 92 +++++++++++++++++++++++++++++++--------
tests/ovn-northd.at | 95 ++++++++++++++++++++++++++++++++++-------
tests/ovn.at | 6 ++-
3 files changed, 158 insertions(+), 35 deletions(-)
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 7e0bf13d2..de0c06d4b 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -2020,7 +2020,8 @@ get_nat_addresses(const struct ovn_port *op, size_t *n)
static bool
sbpb_gw_chassis_needs_update(
const struct sbrec_port_binding *pb,
- const struct nbrec_logical_router_port *lrp)
+ const struct nbrec_logical_router_port *lrp,
+ struct ovsdb_idl_index *sbrec_chassis_by_name)
{
if (!lrp || !pb) {
return false;
@@ -2048,13 +2049,34 @@ sbpb_gw_chassis_needs_update(
for (size_t j = 0; j < pb->ha_chassis_group->n_ha_chassis; j++) {
struct sbrec_ha_chassis *sbha_ch =
pb->ha_chassis_group->ha_chassis[j];
+ const char *chassis_name = smap_get(&sbha_ch->external_ids,
+ "chassis-name");
+ if (!chassis_name) {
+ return true;
+ }
+
+ if (strcmp(chassis_name, nbgw_ch->chassis_name)) {
+ continue;
+ }
+
+ found = true;
+
+ if (nbgw_ch->priority != sbha_ch->priority) {
+ return true;
+ }
+
if (sbha_ch->chassis &&
- !strcmp(nbgw_ch->chassis_name, sbha_ch->chassis->name)) {
- if (nbgw_ch->priority == sbha_ch->priority) {
- found = true;
- break;
- }
- /* Priority has changed. Return true. */
+ strcmp(nbgw_ch->chassis_name, sbha_ch->chassis->name)) {
+ /* sbha_ch->chassis's name is different from the one
+ * in sbha_ch->external_ids:chassis-name. */
+ return true;
+ }
+
+ if (!sbha_ch->chassis &&
+ chassis_lookup_by_name(sbrec_chassis_by_name,
+ nbgw_ch->chassis_name)) {
+ /* sbha_ch->chassis is NULL, but the chassis is
+ * present in Chassis table. */
return true;
}
}
@@ -2070,19 +2092,28 @@ sbpb_gw_chassis_needs_update(
static struct sbrec_ha_chassis *
create_sb_ha_chassis(struct northd_context *ctx,
- const struct sbrec_chassis *chassis, int priority)
+ const struct sbrec_chassis *chassis,
+ const char *chassis_name, int priority)
{
struct sbrec_ha_chassis *sb_ha_chassis =
sbrec_ha_chassis_insert(ctx->ovnsb_txn);
sbrec_ha_chassis_set_chassis(sb_ha_chassis, chassis);
sbrec_ha_chassis_set_priority(sb_ha_chassis, priority);
+ /* Store the chassis_name in external_ids. If the chassis
+ * entry doesn't exist in the Chassis table then we can
+ * figure out the chassis to which this ha_chassis
+ * maps to. */
+ const struct smap external_ids =
+ SMAP_CONST1(&external_ids, "chassis-name", chassis_name);
+ sbrec_ha_chassis_set_external_ids(sb_ha_chassis, &external_ids);
return sb_ha_chassis;
}
static bool
chassis_group_list_changed(
const struct nbrec_ha_chassis_group *nb_ha_grp,
- const struct sbrec_ha_chassis_group *sb_ha_grp)
+ const struct sbrec_ha_chassis_group *sb_ha_grp,
+ struct ovsdb_idl_index *sbrec_chassis_by_name)
{
if (nb_ha_grp->n_ha_chassis != sb_ha_grp->n_ha_chassis) {
return true;
@@ -2100,21 +2131,38 @@ chassis_group_list_changed(
const struct nbrec_ha_chassis *nb_ha_chassis;
for (size_t i = 0; i < sb_ha_grp->n_ha_chassis; i++) {
sb_ha_chassis = sb_ha_grp->ha_chassis[i];
- if (!sb_ha_chassis->chassis) {
- /* This can happen, if ovn-controller exits gracefully in
- * a chassis, in which case, chassis row for that chassis
- * would be NULL. */
+ const char *chassis_name = smap_get(&sb_ha_chassis->external_ids,
+ "chassis-name");
+
+ if (!chassis_name) {
changed = true;
break;
}
nb_ha_chassis = shash_find_and_delete(&nb_ha_chassis_list,
- sb_ha_chassis->chassis->name);
+ chassis_name);
if (!nb_ha_chassis ||
nb_ha_chassis->priority != sb_ha_chassis->priority) {
changed = true;
break;
}
+
+ if (sb_ha_chassis->chassis &&
+ strcmp(sb_ha_chassis->chassis->name, chassis_name)) {
+ /* sb_ha_chassis->chassis's name is different from the one
+ * in sb_ha_chassis->external_ids:chassis-name. */
+ changed = true;
+ break;
+ }
+
+ if (!sb_ha_chassis->chassis &&
+ chassis_lookup_by_name(sbrec_chassis_by_name,
+ chassis_name)) {
+ /* sb_ha_chassis->chassis is NULL, but the chassis is
+ * present in Chassis table. */
+ changed = true;
+ break;
+ }
}
struct shash_node *node, *next;
@@ -2145,7 +2193,8 @@ sync_ha_chassis_group_for_sbpb(struct northd_context *ctx,
}
if (new_sb_chassis_group ||
- chassis_group_list_changed(nb_ha_grp, sb_ha_grp)) {
+ chassis_group_list_changed(nb_ha_grp, sb_ha_grp,
+ sbrec_chassis_by_name)) {
struct sbrec_ha_chassis **sb_ha_chassis = NULL;
size_t n_ha_chassis = nb_ha_grp->n_ha_chassis;
sb_ha_chassis = xcalloc(n_ha_chassis, sizeof *sb_ha_chassis);
@@ -2162,6 +2211,10 @@ sync_ha_chassis_group_for_sbpb(struct northd_context *ctx,
sbrec_ha_chassis_set_chassis(sb_ha_chassis[i], chassis);
sbrec_ha_chassis_set_priority(sb_ha_chassis[i],
nb_ha_chassis->priority);
+ const struct smap external_ids =
+ SMAP_CONST1(&external_ids, "chassis-name",
+ nb_ha_chassis->chassis_name);
+ sbrec_ha_chassis_set_external_ids(sb_ha_chassis[i], &external_ids);
}
sbrec_ha_chassis_group_set_ha_chassis(sb_ha_grp, sb_ha_chassis,
n_ha_chassis);
@@ -2206,7 +2259,8 @@ copy_gw_chassis_from_nbrp_to_sbpb(
lrp_gwc->chassis_name);
sb_ha_chassis[n_sb_ha_ch] =
- create_sb_ha_chassis(ctx, chassis, lrp_gwc->priority);
+ create_sb_ha_chassis(ctx, chassis, lrp_gwc->chassis_name,
+ lrp_gwc->priority);
n_sb_ha_ch++;
}
@@ -2271,7 +2325,8 @@ ovn_port_update_sbrec(struct northd_context *ctx,
/* Legacy gateway_chassis support.
* Create ha_chassis_group for the Northbound gateway_chassis
* associated with the lrp. */
- if (sbpb_gw_chassis_needs_update(op->sb, op->nbrp)) {
+ if (sbpb_gw_chassis_needs_update(op->sb, op->nbrp,
+ sbrec_chassis_by_name)) {
copy_gw_chassis_from_nbrp_to_sbpb(ctx,
sbrec_chassis_by_name,
op->nbrp, op->sb);
@@ -2303,7 +2358,8 @@ ovn_port_update_sbrec(struct northd_context *ctx,
if (sb_ha_ch_grp->n_ha_chassis != 1) {
struct sbrec_ha_chassis **sb_ha_ch =
xcalloc(1, sizeof *sb_ha_ch);
- sb_ha_ch[0] = create_sb_ha_chassis(ctx, chassis, 0);
+ sb_ha_ch[0] = create_sb_ha_chassis(ctx, chassis,
+ chassis->name, 0);
sbrec_ha_chassis_group_set_ha_chassis(sb_ha_ch_grp,
sb_ha_ch, 1);
}
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 94848672e..9588c76c9 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -1,5 +1,5 @@
AT_BANNER([OVN northd])
-AT_SETUP([ovn -- check Gateway_Chassis propagation from NBDB to SBDB])
+AT_SETUP([ovn -- check from NBDB to SBDB])
AT_SKIP_IF([test $HAVE_PYTHON = no])
ovn_start
@@ -65,6 +65,34 @@ ha_ch_list=`echo $ha_ch_list | sed 's/ //g'`
AT_CHECK([test "$ha_ch_list" = "$ha_ch"])
+# Delete chassis - gw2 in SB DB.
+# ovn-northd should not recreate ha_chassis rows
+# repeatedly when gw2 is deleted.
+ovn-sbctl chassis-del gw2
+
+ha_ch_list_1=''
+for i in `ovn-sbctl --bare --columns _uuid list ha_chassis | sort`
+do
+ ha_ch_list_1="$ha_ch_list_1 $i"
+done
+
+# Trim the spaces.
+ha_ch_list_1=`echo $ha_ch_list_1 | sed 's/ //g'`
+
+ha_ch_list_2=''
+for i in `ovn-sbctl --bare --columns _uuid list ha_chassis | sort`
+do
+ ha_ch_list_2="$ha_ch_list_2 $i"
+done
+
+# Trim the spaces.
+ha_ch_list_2=`echo $ha_ch_list_2 | sed 's/ //g'`
+
+AT_CHECK([test "$ha_ch_list_1" = "$ha_ch_list_2"])
+
+# Add back the gw2 chassis
+ovn-sbctl chassis-add gw2 geneve 1.2.4.8
+
# delete the 2nd Gateway_Chassis on NBDB for alice port
gw_ch=`ovn-sbctl --bare --columns gateway_chassis find port_binding \
logical_port="cr-alice"`
@@ -431,7 +459,31 @@ ovn-nbctl set logical_router_port lr0-public ha_chassis_group=$hagrp1_uuid
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="hagrp1" | wc -l`])
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
+
+# Make sure that ovn-northd doesn't recreate the ha_chassis
+# records if the chassis record is missing in SB DB.
+
+ha_ch_list_1=''
+for i in `ovn-sbctl --bare --columns _uuid list ha_chassis | sort`
+do
+ ha_ch_list_1="$ha_ch_list_1 $i"
+done
+
+# Trim the spaces.
+ha_ch_list_1=`echo $ha_ch_list_1 | sed 's/ //g'`
+
+ha_ch_list_2=''
+for i in `ovn-sbctl --bare --columns _uuid list ha_chassis | sort`
+do
+ ha_ch_list_2="$ha_ch_list_2 $i"
+done
+
+# Trim the spaces.
+ha_ch_list_2=`echo $ha_ch_list_2 | sed 's/ //g'`
+
+AT_CHECK([test "$ha_ch_list_1" = "$ha_ch_list_2"])
# 2 HA chassis should be created with 'chassis' column empty because
# we have not added hv1 and hv2 chassis to the SB DB.
@@ -439,7 +491,8 @@ AT_CHECK([test 2 = `ovn-sbctl list ha_chassis | grep chassis | awk '{print $3}'
| grep -v '-' | wc -l`])
# We should have 1 ha chassis with 'chassis' column set for hv1
-AT_CHECK([test 1 = `ovn-sbctl list ha_chassis | grep chassis | awk '{print $3}' \
+AT_CHECK([test 1 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | awk '{print $3}' \
| grep '-' | wc -l`])
# Create another logical router port and associate to the same ha_chasis_group
@@ -452,7 +505,8 @@ ovn-nbctl set logical_router_port lr1-public ha_chassis_group=$hagrp1_uuid
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="hagrp1" | wc -l`])
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Change the priority of ch1 - ha chassis in NB DB. It should get
# reflected in SB DB.
@@ -464,11 +518,13 @@ ha_chassis | grep 100 | wc -l`])
# Delete ch1 HA chassis in NB DB.
ovn-nbctl --wait=sb ha-chassis-group-remove-chassis hagrp1 ch1
-OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Add back the ha chassis
ovn-nbctl --wait=sb ha-chassis-group-add-chassis hagrp1 ch1 40
-OVS_WAIT_UNTIL([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Delete lr0-public. We should still have 1 HA chassis group and
# 3 HA chassis in SB DB.
@@ -477,7 +533,8 @@ ovn-nbctl --wait=sb lrp-del lr0-public
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="hagrp1" | wc -l`])
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Delete lr1-public. There should be no HA chassis group in SB DB.
ovn-nbctl --wait=sb lrp-del lr1-public
@@ -494,7 +551,8 @@ ovn-nbctl set logical_router_port lr0-public ha_chassis_group=$hagrp1_uuid
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="hagrp1" | wc -l`])
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Create a Gateway chassis. ovn-northd should ignore this.
ovn-nbctl lrp-set-gateway-chassis lr0-public ch-1 20
@@ -507,7 +565,8 @@ ha_chassis_group | wc -l`])
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="hagrp1" | wc -l`])
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Now delete HA chassis group. ovn-northd should create HA chassis group
# with the Gateway chassis name
@@ -529,7 +588,8 @@ OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="lr0-public" | wc -l`])
ovn-sbctl --bare --columns _uuid find ha_chassis
-OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Test if 'ref_chassis' column is properly set or not in
# SB DB ha_chassis_group.
@@ -709,7 +769,8 @@ OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="lr0-public" | wc -l`])
ovn-sbctl --bare --columns _uuid find ha_chassis
-OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Delete the gateway chassis. HA chassis group should be created in SB DB
# for the redirect-chassis option.
@@ -724,7 +785,8 @@ OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="lr0-public_ch1" | wc -l`])
ovn-sbctl --bare --columns _uuid find ha_chassis
-OVS_WAIT_UNTIL([test 1 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 1 = `ovn-sbctl list ha_chassis | grep chassis |
+grep -v chassis-name | wc -l`])
# Clear the redirect-chassis option.
ovn-nbctl clear logical_router_port lr0-public options
@@ -775,7 +837,8 @@ ovn-nbctl lsp-set-type sw0-pext1 external
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="hagrp1" | wc -l`])
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
sb_hagrp1_uuid=`ovn-sbctl --bare --columns _uuid find ha_chassis_group \
name=hagrp1`
@@ -790,7 +853,8 @@ ovn-nbctl set logical_switch_port sw0-pext2 ha_chassis_group=$hagrp1_uuid
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="hagrp1" | wc -l`])
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis |
+grep -v chassis-name | wc -l`])
AT_CHECK([test "$sb_hagrp1_uuid" = `ovn-sbctl --bare --columns \
ha_chassis_group find port_binding logical_port=sw0-pext1`])
@@ -814,7 +878,8 @@ logical_port=sw0-pext1) = x], [0], [])
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
ha_chassis_group name="hagrp1" | wc -l`])
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
# Clear ha_chassis_group for sw0-pext2
ovn-nbctl --wait=sb clear logical_switch_port sw0-pext2 ha_chassis_group
diff --git a/tests/ovn.at b/tests/ovn.at
index 2db1ed49d..592f491fd 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -10207,7 +10207,8 @@ ha_chassis_group name="outside"`
AT_CHECK([test $ha_chassi_grp_name = outside])
# There should be 2 ha_chassis rows in SB DB.
-AT_CHECK([ovn-sbctl list ha_chassis | grep chassis | awk '{print $3}' \
+AT_CHECK([ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | awk '{print $3}' \
| grep '-' | wc -l ], [0], [2
])
@@ -10504,7 +10505,8 @@ ovn-nbctl set Logical_Router_Port outside ha_chassis_group=$hagrp1_uuid
OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns _uuid \
find ha_chassis_group | wc -l`])
-OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
OVS_WAIT_UNTIL([as hv1 ovs-ofctl dump-flows br-int table=32 | \
grep active_backup | grep slaves:$hv1_gw1_ofport,$hv1_gw2_ofport \
--
2.20.1
More information about the dev
mailing list