[ovs-dev] [PATCH] ovn-northd: Fix the HA_Chassis sync issue in OVN SB DB

nusiddiq at redhat.com nusiddiq at redhat.com
Thu Apr 25 19:01:39 UTC 2019


From: Numan Siddique <nusiddiq at redhat.com>

ovn-northd deletes and recreates HA_Chassis rows (which belong
to a HA_Chassis_Group) whenever the HA_Chassis_Group/Gateway_Chassis
rows in Northbound DB are out of sync. If a Chassis table row in
Southbound DB is deleted and if this row is referenced by HA_Chassis
row (in Southbound DB), then the present code syncs the HA_Chassis
rows continously and this causes the ovn-controller's to wake up
and results in 100% cpu usage.

This was a simple case which the commit
1be1e0e5e0d1 ("ovn: Add generic HA chassis group") missed out addressing.

This patch fixes this issue.

Fixes: 1be1e0e5e0d1 ("ovn: Add generic HA chassis group")
Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2019-April/048580.html
Reported-by: Daniel Alvarez Sanchez (dalvarez at redhat.com)
Signed-off-by: Numan Siddique <nusiddiq at redhat.com>
---
 ovn/northd/ovn-northd.c | 92 +++++++++++++++++++++++++++++++--------
 tests/ovn-northd.at     | 95 ++++++++++++++++++++++++++++++++++-------
 tests/ovn.at            |  6 ++-
 3 files changed, 158 insertions(+), 35 deletions(-)

diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 7e0bf13d2..de0c06d4b 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -2020,7 +2020,8 @@ get_nat_addresses(const struct ovn_port *op, size_t *n)
 static bool
 sbpb_gw_chassis_needs_update(
     const struct sbrec_port_binding *pb,
-    const struct nbrec_logical_router_port *lrp)
+    const struct nbrec_logical_router_port *lrp,
+    struct ovsdb_idl_index *sbrec_chassis_by_name)
 {
     if (!lrp || !pb) {
         return false;
@@ -2048,13 +2049,34 @@ sbpb_gw_chassis_needs_update(
         for (size_t j = 0; j < pb->ha_chassis_group->n_ha_chassis; j++) {
             struct sbrec_ha_chassis *sbha_ch =
                 pb->ha_chassis_group->ha_chassis[j];
+            const char *chassis_name = smap_get(&sbha_ch->external_ids,
+                                                "chassis-name");
+            if (!chassis_name) {
+                return true;
+            }
+
+            if (strcmp(chassis_name, nbgw_ch->chassis_name)) {
+                continue;
+            }
+
+            found = true;
+
+            if (nbgw_ch->priority != sbha_ch->priority) {
+                return true;
+            }
+
             if (sbha_ch->chassis &&
-                !strcmp(nbgw_ch->chassis_name, sbha_ch->chassis->name)) {
-                if (nbgw_ch->priority == sbha_ch->priority) {
-                    found = true;
-                    break;
-                }
-                /* Priority has changed. Return true. */
+                strcmp(nbgw_ch->chassis_name, sbha_ch->chassis->name)) {
+                /* sbha_ch->chassis's name is different from the one
+                 * in sbha_ch->external_ids:chassis-name. */
+                return true;
+            }
+
+            if (!sbha_ch->chassis &&
+                chassis_lookup_by_name(sbrec_chassis_by_name,
+                                       nbgw_ch->chassis_name)) {
+                /* sbha_ch->chassis is NULL, but the chassis is
+                 * present in Chassis table. */
                 return true;
             }
         }
@@ -2070,19 +2092,28 @@ sbpb_gw_chassis_needs_update(
 
 static struct sbrec_ha_chassis *
 create_sb_ha_chassis(struct northd_context *ctx,
-                     const struct sbrec_chassis *chassis, int priority)
+                     const struct sbrec_chassis *chassis,
+                     const char *chassis_name, int priority)
 {
     struct sbrec_ha_chassis *sb_ha_chassis =
         sbrec_ha_chassis_insert(ctx->ovnsb_txn);
     sbrec_ha_chassis_set_chassis(sb_ha_chassis, chassis);
     sbrec_ha_chassis_set_priority(sb_ha_chassis, priority);
+    /* Store the chassis_name in external_ids. If the chassis
+     * entry doesn't exist in the Chassis table then we can
+     * figure out the chassis to which this ha_chassis
+     * maps to. */
+    const struct smap external_ids =
+        SMAP_CONST1(&external_ids, "chassis-name", chassis_name);
+    sbrec_ha_chassis_set_external_ids(sb_ha_chassis, &external_ids);
     return sb_ha_chassis;
 }
 
 static bool
 chassis_group_list_changed(
     const struct nbrec_ha_chassis_group *nb_ha_grp,
-    const struct sbrec_ha_chassis_group *sb_ha_grp)
+    const struct sbrec_ha_chassis_group *sb_ha_grp,
+    struct ovsdb_idl_index *sbrec_chassis_by_name)
 {
     if (nb_ha_grp->n_ha_chassis != sb_ha_grp->n_ha_chassis) {
         return true;
@@ -2100,21 +2131,38 @@ chassis_group_list_changed(
     const struct nbrec_ha_chassis *nb_ha_chassis;
     for (size_t i = 0; i < sb_ha_grp->n_ha_chassis; i++) {
         sb_ha_chassis = sb_ha_grp->ha_chassis[i];
-        if (!sb_ha_chassis->chassis) {
-            /* This can happen, if ovn-controller exits gracefully in
-             * a chassis, in which case, chassis row for that chassis
-             * would be NULL. */
+        const char *chassis_name = smap_get(&sb_ha_chassis->external_ids,
+                                            "chassis-name");
+
+        if (!chassis_name) {
             changed = true;
             break;
         }
 
         nb_ha_chassis = shash_find_and_delete(&nb_ha_chassis_list,
-                                              sb_ha_chassis->chassis->name);
+                                              chassis_name);
         if (!nb_ha_chassis ||
             nb_ha_chassis->priority != sb_ha_chassis->priority) {
             changed = true;
             break;
         }
+
+        if (sb_ha_chassis->chassis &&
+            strcmp(sb_ha_chassis->chassis->name, chassis_name)) {
+            /* sb_ha_chassis->chassis's name is different from the one
+             * in sb_ha_chassis->external_ids:chassis-name. */
+            changed = true;
+            break;
+        }
+
+        if (!sb_ha_chassis->chassis &&
+            chassis_lookup_by_name(sbrec_chassis_by_name,
+                                   chassis_name)) {
+            /* sb_ha_chassis->chassis is NULL, but the chassis is
+             * present in Chassis table. */
+            changed = true;
+            break;
+        }
     }
 
     struct shash_node *node, *next;
@@ -2145,7 +2193,8 @@ sync_ha_chassis_group_for_sbpb(struct northd_context *ctx,
     }
 
     if (new_sb_chassis_group ||
-        chassis_group_list_changed(nb_ha_grp, sb_ha_grp)) {
+        chassis_group_list_changed(nb_ha_grp, sb_ha_grp,
+                                   sbrec_chassis_by_name)) {
         struct sbrec_ha_chassis **sb_ha_chassis = NULL;
         size_t n_ha_chassis = nb_ha_grp->n_ha_chassis;
         sb_ha_chassis = xcalloc(n_ha_chassis, sizeof *sb_ha_chassis);
@@ -2162,6 +2211,10 @@ sync_ha_chassis_group_for_sbpb(struct northd_context *ctx,
             sbrec_ha_chassis_set_chassis(sb_ha_chassis[i], chassis);
             sbrec_ha_chassis_set_priority(sb_ha_chassis[i],
                                           nb_ha_chassis->priority);
+            const struct smap external_ids =
+                SMAP_CONST1(&external_ids, "chassis-name",
+                            nb_ha_chassis->chassis_name);
+            sbrec_ha_chassis_set_external_ids(sb_ha_chassis[i], &external_ids);
         }
         sbrec_ha_chassis_group_set_ha_chassis(sb_ha_grp, sb_ha_chassis,
                                               n_ha_chassis);
@@ -2206,7 +2259,8 @@ copy_gw_chassis_from_nbrp_to_sbpb(
                                    lrp_gwc->chassis_name);
 
         sb_ha_chassis[n_sb_ha_ch] =
-            create_sb_ha_chassis(ctx, chassis, lrp_gwc->priority);
+            create_sb_ha_chassis(ctx, chassis, lrp_gwc->chassis_name,
+                                 lrp_gwc->priority);
         n_sb_ha_ch++;
     }
 
@@ -2271,7 +2325,8 @@ ovn_port_update_sbrec(struct northd_context *ctx,
                 /* Legacy gateway_chassis support.
                  * Create ha_chassis_group for the Northbound gateway_chassis
                  * associated with the lrp. */
-                if (sbpb_gw_chassis_needs_update(op->sb, op->nbrp)) {
+                if (sbpb_gw_chassis_needs_update(op->sb, op->nbrp,
+                                                 sbrec_chassis_by_name)) {
                     copy_gw_chassis_from_nbrp_to_sbpb(ctx,
                                                       sbrec_chassis_by_name,
                                                       op->nbrp, op->sb);
@@ -2303,7 +2358,8 @@ ovn_port_update_sbrec(struct northd_context *ctx,
                     if (sb_ha_ch_grp->n_ha_chassis != 1) {
                         struct sbrec_ha_chassis **sb_ha_ch =
                             xcalloc(1, sizeof *sb_ha_ch);
-                        sb_ha_ch[0] = create_sb_ha_chassis(ctx, chassis, 0);
+                        sb_ha_ch[0] = create_sb_ha_chassis(ctx, chassis,
+                                                           chassis->name, 0);
                         sbrec_ha_chassis_group_set_ha_chassis(sb_ha_ch_grp,
                                                               sb_ha_ch, 1);
                     }
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 94848672e..9588c76c9 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -1,5 +1,5 @@
 AT_BANNER([OVN northd])
-AT_SETUP([ovn -- check Gateway_Chassis propagation from NBDB to SBDB])
+AT_SETUP([ovn -- check   from NBDB to SBDB])
 AT_SKIP_IF([test $HAVE_PYTHON = no])
 ovn_start
 
@@ -65,6 +65,34 @@ ha_ch_list=`echo $ha_ch_list | sed 's/ //g'`
 
 AT_CHECK([test "$ha_ch_list" = "$ha_ch"])
 
+# Delete chassis - gw2 in SB DB.
+# ovn-northd should not recreate ha_chassis rows
+# repeatedly when gw2 is deleted.
+ovn-sbctl chassis-del gw2
+
+ha_ch_list_1=''
+for i in `ovn-sbctl --bare --columns _uuid list ha_chassis | sort`
+do
+    ha_ch_list_1="$ha_ch_list_1 $i"
+done
+
+# Trim the spaces.
+ha_ch_list_1=`echo $ha_ch_list_1 | sed 's/ //g'`
+
+ha_ch_list_2=''
+for i in `ovn-sbctl --bare --columns _uuid list ha_chassis | sort`
+do
+    ha_ch_list_2="$ha_ch_list_2 $i"
+done
+
+# Trim the spaces.
+ha_ch_list_2=`echo $ha_ch_list_2 | sed 's/ //g'`
+
+AT_CHECK([test "$ha_ch_list_1" = "$ha_ch_list_2"])
+
+# Add back the gw2 chassis
+ovn-sbctl chassis-add gw2 geneve 1.2.4.8
+
 # delete the 2nd Gateway_Chassis on NBDB for alice port
 gw_ch=`ovn-sbctl --bare --columns gateway_chassis find port_binding \
 logical_port="cr-alice"`
@@ -431,7 +459,31 @@ ovn-nbctl set logical_router_port lr0-public ha_chassis_group=$hagrp1_uuid
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="hagrp1" | wc -l`])
 
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
+
+# Make sure that ovn-northd doesn't recreate the ha_chassis
+# records if the chassis record is missing in SB DB.
+
+ha_ch_list_1=''
+for i in `ovn-sbctl --bare --columns _uuid list ha_chassis | sort`
+do
+    ha_ch_list_1="$ha_ch_list_1 $i"
+done
+
+# Trim the spaces.
+ha_ch_list_1=`echo $ha_ch_list_1 | sed 's/ //g'`
+
+ha_ch_list_2=''
+for i in `ovn-sbctl --bare --columns _uuid list ha_chassis | sort`
+do
+    ha_ch_list_2="$ha_ch_list_2 $i"
+done
+
+# Trim the spaces.
+ha_ch_list_2=`echo $ha_ch_list_2 | sed 's/ //g'`
+
+AT_CHECK([test "$ha_ch_list_1" = "$ha_ch_list_2"])
 
 # 2 HA chassis should be created with 'chassis' column empty because
 # we have not added hv1 and hv2 chassis to the SB DB.
@@ -439,7 +491,8 @@ AT_CHECK([test 2 = `ovn-sbctl list ha_chassis | grep chassis | awk '{print $3}'
 | grep -v '-' | wc -l`])
 
 # We should have 1 ha chassis with 'chassis' column set for hv1
-AT_CHECK([test 1 = `ovn-sbctl list ha_chassis | grep chassis | awk '{print $3}' \
+AT_CHECK([test 1 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | awk '{print $3}' \
 | grep '-' | wc -l`])
 
 # Create another logical router port and associate to the same ha_chasis_group
@@ -452,7 +505,8 @@ ovn-nbctl set logical_router_port lr1-public ha_chassis_group=$hagrp1_uuid
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="hagrp1" | wc -l`])
 
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Change the priority of ch1 - ha chassis in NB DB. It should get
 # reflected in SB DB.
@@ -464,11 +518,13 @@ ha_chassis | grep 100 | wc -l`])
 # Delete ch1 HA chassis in NB DB.
 ovn-nbctl --wait=sb ha-chassis-group-remove-chassis hagrp1 ch1
 
-OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Add back the ha chassis
 ovn-nbctl --wait=sb ha-chassis-group-add-chassis hagrp1 ch1 40
-OVS_WAIT_UNTIL([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Delete lr0-public. We should still have 1 HA chassis group and
 # 3 HA chassis in SB DB.
@@ -477,7 +533,8 @@ ovn-nbctl --wait=sb lrp-del lr0-public
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="hagrp1" | wc -l`])
 
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Delete lr1-public. There should be no HA chassis group in SB DB.
 ovn-nbctl --wait=sb lrp-del lr1-public
@@ -494,7 +551,8 @@ ovn-nbctl set logical_router_port lr0-public ha_chassis_group=$hagrp1_uuid
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="hagrp1" | wc -l`])
 
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Create a Gateway chassis. ovn-northd should ignore this.
 ovn-nbctl lrp-set-gateway-chassis lr0-public ch-1 20
@@ -507,7 +565,8 @@ ha_chassis_group | wc -l`])
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="hagrp1" | wc -l`])
 
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Now delete HA chassis group. ovn-northd should create HA chassis group
 # with the Gateway chassis name
@@ -529,7 +588,8 @@ OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="lr0-public" | wc -l`])
 
 ovn-sbctl --bare --columns _uuid find ha_chassis
-OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Test if 'ref_chassis' column is properly set or not in
 # SB DB ha_chassis_group.
@@ -709,7 +769,8 @@ OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="lr0-public" | wc -l`])
 
 ovn-sbctl --bare --columns _uuid find ha_chassis
-OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Delete the gateway chassis. HA chassis group should be created in SB DB
 # for the redirect-chassis option.
@@ -724,7 +785,8 @@ OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="lr0-public_ch1" | wc -l`])
 
 ovn-sbctl --bare --columns _uuid find ha_chassis
-OVS_WAIT_UNTIL([test 1 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 1 = `ovn-sbctl list ha_chassis | grep chassis |
+grep -v chassis-name | wc -l`])
 
 # Clear the redirect-chassis option.
 ovn-nbctl clear logical_router_port lr0-public options
@@ -775,7 +837,8 @@ ovn-nbctl lsp-set-type sw0-pext1 external
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="hagrp1" | wc -l`])
 
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 sb_hagrp1_uuid=`ovn-sbctl --bare --columns _uuid find ha_chassis_group \
 name=hagrp1`
@@ -790,7 +853,8 @@ ovn-nbctl set logical_switch_port sw0-pext2 ha_chassis_group=$hagrp1_uuid
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="hagrp1" | wc -l`])
 
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis |
+grep -v chassis-name | wc -l`])
 AT_CHECK([test "$sb_hagrp1_uuid" = `ovn-sbctl --bare --columns \
 ha_chassis_group find port_binding logical_port=sw0-pext1`])
 
@@ -814,7 +878,8 @@ logical_port=sw0-pext1) = x], [0], [])
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns name find \
 ha_chassis_group name="hagrp1" | wc -l`])
 
-AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+AT_CHECK([test 3 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 # Clear ha_chassis_group for sw0-pext2
 ovn-nbctl --wait=sb clear logical_switch_port sw0-pext2 ha_chassis_group
diff --git a/tests/ovn.at b/tests/ovn.at
index 2db1ed49d..592f491fd 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -10207,7 +10207,8 @@ ha_chassis_group name="outside"`
 AT_CHECK([test $ha_chassi_grp_name = outside])
 
 # There should be 2 ha_chassis rows in SB DB.
-AT_CHECK([ovn-sbctl list ha_chassis | grep chassis | awk '{print $3}' \
+AT_CHECK([ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | awk '{print $3}' \
 | grep '-' | wc -l ], [0], [2
 ])
 
@@ -10504,7 +10505,8 @@ ovn-nbctl set Logical_Router_Port outside ha_chassis_group=$hagrp1_uuid
 OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns _uuid \
 find ha_chassis_group | wc -l`])
 
-OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | wc -l`])
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl list ha_chassis | grep chassis | \
+grep -v chassis-name | wc -l`])
 
 OVS_WAIT_UNTIL([as hv1 ovs-ofctl dump-flows br-int table=32 | \
 grep active_backup | grep slaves:$hv1_gw1_ofport,$hv1_gw2_ofport \
-- 
2.20.1



More information about the dev mailing list