[ovs-dev] [PATCH ovn branch-20.03] chassis: Fix the way encaps are updated for a chassis record.

Dumitru Ceara dceara at redhat.com
Mon Sep 7 10:10:45 UTC 2020


ovn-controller always stores the last configured system-id/chassis-id in
memory regardless if the connection to the SB is up or down. This is OK
as long as the change can be committed successfully when the SB DB
connection comes back up.

Without this change, if the chassis-id changes while the SB connection is
down, ovn-controller will fail to create the new record but nevertheless
update its in-memory chassis-id. When the SB connection is restored
ovn-controller tries to find the record corresponding to the chassis-id
it stored in memory. This fails causing ovn-controller to try to insert
a new record. But at this point a constraint violation is hit in the SB
because the Encap records of the "stale" chassis still exist in the DB,
along with the old chassis record.

This commit changes the way we search for a "stale" chassis record in the
SB to cover the above mentioned case. Also, in such cases there's no need
to recreate the Encaps, it's sufficient to update the chassis_name field.

Fixes: 5344f24ecb1a ("ovn-controller: Refactor chassis.c to abstract the string parsing")
Signed-off-by: Dumitru Ceara <dceara at redhat.com>

(cherry-picked from master commit 94a32fca2d2b825fece0ef5b1873459bd9857dd3)
---
 controller/chassis.c    | 60 ++++++++++++++++++++++++++++++-------------------
 tests/ovn-controller.at | 17 ++++++++++++++
 2 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index 522893e..d525463 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -380,10 +380,7 @@ chassis_tunnels_changed(const struct sset *encap_type_set,
 {
     size_t encap_type_count = 0;
 
-    for (int i = 0; i < chassis_rec->n_encaps; i++) {
-        if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
-            return true;
-        }
+    for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
 
         if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
             return true;
@@ -456,6 +453,19 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
 }
 
 /*
+ * Updates encaps for a given chassis. This can happen when the chassis
+ * name has changed. Also, the only thing we support updating is the
+ * chassis_name. For other changes the encaps will be recreated.
+ */
+static void
+chassis_update_encaps(const struct sbrec_chassis *chassis)
+{
+    for (size_t i = 0; i < chassis->n_encaps; i++) {
+        sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
+    }
+}
+
+/*
  * Returns a pointer to a chassis record from 'chassis_table' that
  * matches at least one tunnel config.
  */
@@ -486,9 +496,10 @@ chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
 /* If this is a chassis config update after we initialized the record once
  * then we should always be able to find it with the ID we saved in
  * chassis_state.
- * Otherwise (i.e., first time we create the record) then we check if there's
- * a stale record from a previous controller run that didn't end gracefully
- * and reuse it. If not then we create a new record.
+ * Otherwise (i.e., first time we create the record or if the system-id
+ * changed) then we check if there's a stale record from a previous
+ * controller run that didn't end gracefully and reuse it. If not then we
+ * create a new record.
  */
 static const struct sbrec_chassis *
 chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
@@ -497,29 +508,31 @@ chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
                    const struct ovs_chassis_cfg *ovs_cfg,
                    const char *chassis_id)
 {
-    const struct sbrec_chassis *chassis_rec;
+    const struct sbrec_chassis *chassis = NULL;
 
     if (chassis_info_id_inited(&chassis_state)) {
-        chassis_rec = chassis_lookup_by_name(sbrec_chassis_by_name,
-                                             chassis_info_id(&chassis_state));
-        if (!chassis_rec) {
-            VLOG_DBG("Could not find Chassis, will create it"
-                     ": stored (%s) ovs (%s)",
+        chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
+                                         chassis_info_id(&chassis_state));
+        if (!chassis) {
+            VLOG_DBG("Could not find Chassis, will check if the id changed: "
+                     "stored (%s) ovs (%s)",
                      chassis_info_id(&chassis_state), chassis_id);
-            if (ovnsb_idl_txn) {
-                /* Recreate the chassis record.  */
-                chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
-            }
         }
-    } else {
-        chassis_rec =
-            chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
+    }
 
-        if (!chassis_rec && ovnsb_idl_txn) {
-            chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
+    if (!chassis) {
+        chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
+    }
+
+    if (!chassis) {
+        /* Recreate the chassis record. */
+        VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
+        if (ovnsb_idl_txn) {
+            return sbrec_chassis_insert(ovnsb_idl_txn);
         }
     }
-    return chassis_rec;
+
+    return chassis;
 }
 
 /* Update a Chassis record based on the config in the ovs config. */
@@ -567,6 +580,7 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
                                 &ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
                                 chassis_rec);
     if (!tunnels_changed) {
+        chassis_update_encaps(chassis_rec);
         return;
     }
 
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 63b2581..1c7aa58 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -196,6 +196,23 @@ OVS_WAIT_UNTIL([
     test "${sysid}" = "${chassis_id}"
 ])
 
+# Simulate system-id changing while ovn-controller is disconnected from the
+# SB.
+valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
+invalid_remote=tcp:0.0.0.0:4242
+ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
+expected_state="not connected"
+OVS_WAIT_UNTIL([
+    test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)"
+])
+sysid=${sysid}-bar
+ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
+ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}
+OVS_WAIT_UNTIL([
+    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
+    test "${sysid}" = "${chassis_id}"
+])
+
 # Gracefully terminate daemons
 OVN_CLEANUP_SBOX([hv])
 OVN_CLEANUP_VSWITCH([main])
-- 
1.8.3.1



More information about the dev mailing list