[ovs-dev] [PATCH ovn] [OVN] Avoid nb_cfg update notification flooding

lmartins at redhat.com lmartins at redhat.com
Fri Mar 13 13:18:50 UTC 2020


From: Lucas Alvares Gomes <lucasagomes at gmail.com>

nb_cfg as a mechanism to "ping" OVN control plane is very useful
in many ways. However, the current implementation will trigger
update notifications flooding in the whole control plane. Each
HV updates to SB the nb_cfg number and all these updates are
notified to all the other HVs, which is O(n^2). Although updates
are batched in fewers notifications than n^2, it still generates
significant load on SB DB and also on ovn-controllers.

To solve this problem and make the mechanism more useful in large
scale producation deployment, this patch separates the per HV
*private* data (write only by the owning chassis and not
interesting to any other HVs) from the Chassis table to a separate
table, so that each HV can conditionally monitor and get updates
only for its own record.

Test result shows great improvement:
In a test environment with 1K sandbox HVs, and 10K ports created
on 40 lswitches and 8 lrouters, do the sync test when the system
is idle, with command:

    time ovn-nbctl --wait=hv sync

Original result:
real    4m52.926s
user    0m0.328s
sys     0m0.004s

With this patch:
real    0m11.405s
user    0m0.316s
sys     0m0.016s

Also, regarding backwards compatibility note that the nb_cfg from the
Chassis table is no longer updated. If any system is relying on this
mechanism they should start using the nb_cfg from the Chassis_Private
table from now on.

Co-authored-by: Han Zhou <hzhou8 at ebay.com>
Signed-off-by: Han Zhou <hzhou8 at ebay.com>
Signed-off-by: Lucas Alvares Gomes <lucasagomes at gmail.com>
---
 controller/chassis.c        | 19 +++++++++++++++++--
 controller/chassis.h        |  8 ++++++--
 controller/ovn-controller.c | 37 ++++++++++++++++++++++++++++++------
 lib/chassis-index.c         | 25 ++++++++++++++++++++++++
 lib/chassis-index.h         |  6 ++++++
 northd/ovn-northd.c         | 27 +++++++++++++++++++++-----
 ovn-sb.ovsschema            | 13 +++++++++++--
 ovn-sb.xml                  | 38 +++++++++++++++++++++++++++++++++----
 tests/ovn-controller.at     | 26 +++++++++++++++++++++++++
 9 files changed, 178 insertions(+), 21 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index 522893ead..99ea6b8fc 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -585,14 +585,18 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
 const struct sbrec_chassis *
 chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
             struct ovsdb_idl_index *sbrec_chassis_by_name,
+            struct ovsdb_idl_index *sbrec_chassis_private_by_name,
             const struct ovsrec_open_vswitch_table *ovs_table,
             const struct sbrec_chassis_table *chassis_table,
             const char *chassis_id,
             const struct ovsrec_bridge *br_int,
-            const struct sset *transport_zones)
+            const struct sset *transport_zones,
+            const struct sbrec_chassis_private **chassis_private)
 {
     struct ovs_chassis_cfg ovs_cfg;
 
+    *chassis_private = NULL;
+
     /* Get the chassis config from the ovs table. */
     ovs_chassis_cfg_init(&ovs_cfg);
     if (!chassis_parse_ovs_config(ovs_table, br_int, &ovs_cfg)) {
@@ -616,6 +620,15 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                                   chassis_id);
     }
 
+    const struct sbrec_chassis_private *chassis_private_rec =
+        chassis_private_lookup_by_name(sbrec_chassis_private_by_name,
+                                       chassis_id);
+    if (!chassis_private_rec && ovnsb_idl_txn) {
+        chassis_private_rec = sbrec_chassis_private_insert(ovnsb_idl_txn);
+        sbrec_chassis_private_set_name(chassis_private_rec, chassis_id);
+    }
+    *chassis_private = chassis_private_rec;
+
     ovs_chassis_cfg_destroy(&ovs_cfg);
     return chassis_rec;
 }
@@ -669,7 +682,8 @@ chassis_get_mac(const struct sbrec_chassis *chassis_rec,
  * required. */
 bool
 chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
-                const struct sbrec_chassis *chassis_rec)
+                const struct sbrec_chassis *chassis_rec,
+                const struct sbrec_chassis_private *chassis_private_rec)
 {
     if (!chassis_rec) {
         return true;
@@ -679,6 +693,7 @@ chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
                                   "ovn-controller: unregistering chassis '%s'",
                                   chassis_rec->name);
         sbrec_chassis_delete(chassis_rec);
+        sbrec_chassis_private_delete(chassis_private_rec);
     }
     return false;
 }
diff --git a/controller/chassis.h b/controller/chassis.h
index 178d2957e..81055b403 100644
--- a/controller/chassis.h
+++ b/controller/chassis.h
@@ -17,6 +17,7 @@
 #define OVN_CHASSIS_H 1
 
 #include <stdbool.h>
+#include "lib/ovn-sb-idl.h"
 
 struct ovsdb_idl;
 struct ovsdb_idl_index;
@@ -33,12 +34,15 @@ void chassis_register_ovs_idl(struct ovsdb_idl *);
 const struct sbrec_chassis *chassis_run(
     struct ovsdb_idl_txn *ovnsb_idl_txn,
     struct ovsdb_idl_index *sbrec_chassis_by_name,
+    struct ovsdb_idl_index *sbrec_chassis_private_by_name,
     const struct ovsrec_open_vswitch_table *,
     const struct sbrec_chassis_table *,
     const char *chassis_id, const struct ovsrec_bridge *br_int,
-    const struct sset *transport_zones);
+    const struct sset *transport_zones,
+    const struct sbrec_chassis_private **chassis_private);
 bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
-                     const struct sbrec_chassis *);
+                     const struct sbrec_chassis *,
+                     const struct sbrec_chassis_private *);
 bool chassis_get_mac(const struct sbrec_chassis *chassis,
                      const char *bridge_mapping,
                      struct eth_addr *chassis_mac);
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 2893eaac1..1542b2ad7 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -155,6 +155,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
     struct ovsdb_idl_condition ce =  OVSDB_IDL_CONDITION_INIT(&ce);
     struct ovsdb_idl_condition ip_mcast = OVSDB_IDL_CONDITION_INIT(&ip_mcast);
     struct ovsdb_idl_condition igmp = OVSDB_IDL_CONDITION_INIT(&igmp);
+    struct ovsdb_idl_condition chprv = OVSDB_IDL_CONDITION_INIT(&chprv);
 
     if (monitor_all) {
         ovsdb_idl_condition_add_clause_true(&pb);
@@ -165,6 +166,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
         ovsdb_idl_condition_add_clause_true(&ce);
         ovsdb_idl_condition_add_clause_true(&ip_mcast);
         ovsdb_idl_condition_add_clause_true(&igmp);
+        ovsdb_idl_condition_add_clause_true(&chprv);
         goto out;
     }
 
@@ -196,6 +198,10 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
                                                   &chassis->header_.uuid);
         sbrec_igmp_group_add_clause_chassis(&igmp, OVSDB_F_EQ,
                                             &chassis->header_.uuid);
+
+        /* Monitors Chassis_Private record for current chassis only */
+        sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ,
+                                              chassis->name);
     }
     if (local_ifaces) {
         const char *name;
@@ -229,6 +235,7 @@ out:
     sbrec_controller_event_set_condition(ovnsb_idl, &ce);
     sbrec_ip_multicast_set_condition(ovnsb_idl, &ip_mcast);
     sbrec_igmp_group_set_condition(ovnsb_idl, &igmp);
+    sbrec_chassis_private_set_condition(ovnsb_idl, &chprv);
     ovsdb_idl_condition_destroy(&pb);
     ovsdb_idl_condition_destroy(&lf);
     ovsdb_idl_condition_destroy(&mb);
@@ -237,6 +244,7 @@ out:
     ovsdb_idl_condition_destroy(&ce);
     ovsdb_idl_condition_destroy(&ip_mcast);
     ovsdb_idl_condition_destroy(&igmp);
+    ovsdb_idl_condition_destroy(&chprv);
 }
 
 static const char *
@@ -1759,6 +1767,8 @@ main(int argc, char *argv[])
 
     struct ovsdb_idl_index *sbrec_chassis_by_name
         = chassis_index_create(ovnsb_idl_loop.idl);
+    struct ovsdb_idl_index *sbrec_chassis_private_by_name
+        = chassis_private_index_create(ovnsb_idl_loop.idl);
     struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath
         = mcast_group_index_create(ovnsb_idl_loop.idl);
     struct ovsdb_idl_index *sbrec_port_binding_by_name
@@ -1784,7 +1794,8 @@ main(int argc, char *argv[])
         = igmp_group_index_create(ovnsb_idl_loop.idl);
 
     ovsdb_idl_track_add_all(ovnsb_idl_loop.idl);
-    ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
+    ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
+                         &sbrec_chassis_private_col_nb_cfg);
 
     /* Omit the external_ids column of all the tables except for -
      *  - DNS. pinctrl.c uses the external_ids column of DNS,
@@ -1820,6 +1831,10 @@ main(int argc, char *argv[])
     ovsdb_idl_omit(ovnsb_idl_loop.idl, &sbrec_connection_col_status);
     ovsdb_idl_omit(ovnsb_idl_loop.idl, &sbrec_connection_col_target);
 
+    /* Do not monitor Chassis_Private external_ids */
+    ovsdb_idl_omit(ovnsb_idl_loop.idl,
+                   &sbrec_chassis_private_col_external_ids);
+
     update_sb_monitors(ovnsb_idl_loop.idl, NULL, NULL, NULL, false);
 
     stopwatch_create(CONTROLLER_LOOP_STOPWATCH_NAME, SW_MS);
@@ -1997,10 +2012,13 @@ main(int argc, char *argv[])
                 process_br_int(ovs_idl_txn, bridge_table, ovs_table);
             const char *chassis_id = get_ovs_chassis_id(ovs_table);
             const struct sbrec_chassis *chassis = NULL;
+            const struct sbrec_chassis_private *chassis_private = NULL;
             if (chassis_id) {
                 chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
+                                      sbrec_chassis_private_by_name,
                                       ovs_table, chassis_table, chassis_id,
-                                      br_int, &transport_zones);
+                                      br_int, &transport_zones,
+                                      &chassis_private);
             }
 
             if (br_int) {
@@ -2125,10 +2143,10 @@ main(int argc, char *argv[])
                 engine_set_force_recompute(false);
             }
 
-            if (ovnsb_idl_txn && chassis) {
+            if (ovnsb_idl_txn && chassis_private) {
                 int64_t cur_cfg = ofctrl_get_cur_cfg();
-                if (cur_cfg && cur_cfg != chassis->nb_cfg) {
-                    sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
+                if (cur_cfg && cur_cfg != chassis_private->nb_cfg) {
+                    sbrec_chassis_private_set_nb_cfg(chassis_private, cur_cfg);
                 }
             }
 
@@ -2231,10 +2249,17 @@ main(int argc, char *argv[])
                    ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
                    : NULL);
 
+            const struct sbrec_chassis_private *chassis_private
+                = (chassis_id
+                   ? chassis_private_lookup_by_name(
+                         sbrec_chassis_private_by_name, chassis_id)
+                   : NULL);
+
             /* Run all of the cleanup functions, even if one of them returns
              * false. We're done if all of them return true. */
             done = binding_cleanup(ovnsb_idl_txn, port_binding_table, chassis);
-            done = chassis_cleanup(ovnsb_idl_txn, chassis) && done;
+            done = chassis_cleanup(ovnsb_idl_txn,
+                                   chassis, chassis_private) && done;
             done = encaps_cleanup(ovs_idl_txn, br_int) && done;
             done = igmp_group_cleanup(ovnsb_idl_txn, sbrec_igmp_group) && done;
             if (done) {
diff --git a/lib/chassis-index.c b/lib/chassis-index.c
index 39066f4cc..d760124b4 100644
--- a/lib/chassis-index.c
+++ b/lib/chassis-index.c
@@ -40,6 +40,31 @@ chassis_lookup_by_name(struct ovsdb_idl_index *sbrec_chassis_by_name,
     return retval;
 }
 
+struct ovsdb_idl_index *
+chassis_private_index_create(struct ovsdb_idl *idl)
+{
+    return ovsdb_idl_index_create1(idl, &sbrec_chassis_private_col_name);
+}
+
+/* Finds and returns the chassis with the given 'name', or NULL if no such
+ * chassis exists. */
+const struct sbrec_chassis_private *
+chassis_private_lookup_by_name(
+    struct ovsdb_idl_index *sbrec_chassis_private_by_name,
+    const char *name)
+{
+    struct sbrec_chassis_private *target =
+        sbrec_chassis_private_index_init_row(sbrec_chassis_private_by_name);
+    sbrec_chassis_private_index_set_name(target, name);
+
+    struct sbrec_chassis_private *retval = sbrec_chassis_private_index_find(
+        sbrec_chassis_private_by_name, target);
+
+    sbrec_chassis_private_index_destroy_row(target);
+
+    return retval;
+}
+
 struct ovsdb_idl_index *
 ha_chassis_group_index_create(struct ovsdb_idl *idl)
 {
diff --git a/lib/chassis-index.h b/lib/chassis-index.h
index 302e5f0fd..b9b331f34 100644
--- a/lib/chassis-index.h
+++ b/lib/chassis-index.h
@@ -23,6 +23,12 @@ struct ovsdb_idl_index *chassis_index_create(struct ovsdb_idl *);
 const struct sbrec_chassis *chassis_lookup_by_name(
     struct ovsdb_idl_index *sbrec_chassis_by_name, const char *name);
 
+struct ovsdb_idl_index *chassis_private_index_create(struct ovsdb_idl *);
+
+const struct sbrec_chassis_private *
+chassis_private_lookup_by_name(
+    struct ovsdb_idl_index *sbrec_chassis_private_by_name, const char *name);
+
 struct ovsdb_idl_index *ha_chassis_group_index_create(struct ovsdb_idl *idl);
 const struct sbrec_ha_chassis_group *ha_chassis_group_lookup_by_name(
     struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name, const char *name);
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index d42a9892a..cb2a7e0d6 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -11174,6 +11174,11 @@ static const char *rbac_chassis_auth[] =
 static const char *rbac_chassis_update[] =
     {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
 
+static const char *rbac_chassis_private_auth[] =
+    {"chassis_name"};
+static const char *rbac_chassis_private_update[] =
+    {"nb_cfg"};
+
 static const char *rbac_encap_auth[] =
     {"chassis_name"};
 static const char *rbac_encap_update[] =
@@ -11211,6 +11216,14 @@ static struct rbac_perm_cfg {
         .update = rbac_chassis_update,
         .n_update = ARRAY_SIZE(rbac_chassis_update),
         .row = NULL
+    },{
+        .table = "Chassis_Private",
+        .auth = rbac_chassis_private_auth,
+        .n_auth = ARRAY_SIZE(rbac_chassis_private_auth),
+        .insdel = true,
+        .update = rbac_chassis_private_update,
+        .n_update = ARRAY_SIZE(rbac_chassis_private_update),
+        .row = NULL
     },{
         .table = "Encap",
         .auth = rbac_encap_auth,
@@ -11380,11 +11393,10 @@ update_northbound_cfg(struct northd_context *ctx,
     /* Update northbound hv_cfg if appropriate. */
     if (nbg) {
         /* Find minimum nb_cfg among all chassis. */
-        const struct sbrec_chassis *chassis;
+        const struct sbrec_chassis_private *chassis;
         int64_t hv_cfg = nbg->nb_cfg;
-        SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
-            if (!smap_get_bool(&chassis->external_ids, "is-remote", false) &&
-                chassis->nb_cfg < hv_cfg) {
+        SBREC_CHASSIS_PRIVATE_FOR_EACH (chassis, ctx->ovnsb_idl) {
+            if (chassis->nb_cfg < hv_cfg) {
                 hv_cfg = chassis->nb_cfg;
             }
         }
@@ -11670,10 +11682,15 @@ main(int argc, char *argv[])
     ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_burst_size);
 
     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
-    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
     ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
     ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_external_ids);
 
+    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis_private);
+    ovsdb_idl_add_column(ovnsb_idl_loop.idl,
+                         &sbrec_chassis_private_col_nb_cfg);
+    add_column_noalert(ovnsb_idl_loop.idl,
+                       &sbrec_chassis_private_col_external_ids);
+
     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_ha_chassis);
     add_column_noalert(ovnsb_idl_loop.idl,
                        &sbrec_ha_chassis_col_chassis);
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index d89f8dbbb..fa53c6a4c 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@
 {
     "name": "OVN_Southbound",
-    "version": "2.7.0",
-    "cksum": "4286723485 21693",
+    "version": "2.8.0",
+    "cksum": "3504843097 22072",
     "tables": {
         "SB_Global": {
             "columns": {
@@ -43,6 +43,15 @@
                                               "max": "unlimited"}}},
             "isRoot": true,
             "indexes": [["name"]]},
+        "Chassis_Private": {
+            "columns": {
+                "name": {"type": "string"},
+                "nb_cfg": {"type": {"key": "integer"}},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "isRoot": true,
+            "indexes": [["name"]]},
         "Encap": {
             "columns": {
                 "type": {"type": {"key": {
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 3ae9d4f92..77c5b1f38 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -256,10 +256,8 @@
     </column>
 
     <column name="nb_cfg">
-      Sequence number for the configuration.  When <code>ovn-controller</code>
-      updates the configuration of a chassis from the contents of the
-      southbound database, it copies <ref table="SB_Global" column="nb_cfg"/>
-      from the <ref table="SB_Global"/> table into this column.
+      Deprecated. This column is replaced by the <ref table="Chassis_Private"
+      column="nb_cfg"/> column of the <ref table="Chassis_Private"/> table.
     </column>
 
     <column name="external_ids" key="ovn-bridge-mappings">
@@ -366,6 +364,38 @@
     </group>
   </table>
 
+  <table name="Chassis_Private" title="Chassis Private">
+    <p>
+      Each row in this table maintains per chassis private data that are
+      accessed only by the owning chassis (write only) and ovn-northd, not by
+      any other chassis.  These data are stored in this separate table instead
+      of the <ref table="Chassis"/> table for performance considerations:
+      the rows in this table can be conditionally monitored by chassises so
+      that each chassis only get update notifications for its own row, to avoid
+      unnecessary chassis private data update flooding in a large scale
+      deployment.  (Future: this separation can be avoided if ovsdb conditional
+      monitoring is supported on a set of columns)
+    </p>
+
+    <column name="name">
+      The name of the chassis that owns these chassis-private data.
+    </column>
+
+    <column name="nb_cfg">
+      Sequence number for the configuration.  When <code>ovn-controller</code>
+      updates the configuration of a chassis from the contents of the
+      southbound database, it copies <ref table="SB_Global" column="nb_cfg"/>
+      from the <ref table="SB_Global"/> table into this column.
+    </column>
+
+    <group title="Common Columns">
+      The overall purpose of these columns is described under <code>Common
+      Columns</code> at the beginning of this document.
+
+      <column name="external_ids"/>
+    </group>
+  </table>
+
   <table name="Encap" title="Encapsulation Types">
     <p>
       The <ref column="encaps" table="Chassis"/> column in the <ref
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 63b2581c0..8c6e03611 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -321,3 +321,29 @@ as ovn-sb
 OVS_APP_EXIT_AND_WAIT([ovsdb-server])
 
 AT_CLEANUP
+
+# Checks that ovn-controller increments the nb_cfg value in the Chassis_Private table
+AT_SETUP([ovn-controller - Bump Chassis_Private nb_cfg value])
+AT_KEYWORDS([ovn])
+ovn_start
+
+net_add n1
+sim_add hv
+as hv
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+OVS_WAIT_UNTIL([test xhv = x`ovn-sbctl --columns name --bare find chassis`])
+
+# Bump the NB_Global nb_cfg value
+nb_global_id=$(ovn-nbctl --columns _uuid --bare find nb_global)
+ovn-nbctl set NB_Global ${nb_global_id} nb_cfg=999
+
+# ovn-controller should bump the nb_cfg in the chassis_private table
+OVS_WAIT_UNTIL([test x999 = x`ovn-sbctl --columns nb_cfg --bare find chassis_private`])
+
+# Assert that the the nb_cfg from the Chassis table was not incremented
+OVS_WAIT_UNTIL([test x0 = x`ovn-sbctl --columns nb_cfg --bare find chassis`])
+
+OVN_CLEANUP([hv])
+AT_CLEANUP
-- 
2.25.1



More information about the dev mailing list