[ovs-dev] [PATCH ovn branch-20.06] chassis: Do not try to guess system-id changes.
Numan Siddique
numans at ovn.org
Mon Dec 21 12:09:38 UTC 2020
On Mon, Dec 14, 2020 at 6:31 PM Dumitru Ceara <dceara at redhat.com> wrote:
>
> When the OVS system-id changes ovn-controller needs external (CMS) help
> in order to update its own Chassis/Chassis_private records, i.e., the
> CMS has to ensure that either ovn-controller is stopped (so that
> ovn-controller cleans up its old Chassis/Chassis_private records) or
> that after the system-id is changed, the stale Chassis/Chassis_private
> records are destroyed externally.
>
> This patch reverts the previous efforts to have ovn-controller reuse
> stale Chassis records and documents how the system-id change operation
> needs to be executed. The main problem with reusing stale records is
> that there's no safe way to make it work when RBAC is enabled.
>
> Suggestedy-by: Han Zhou <hzhou at ovn.org>
> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374608.html
> Fixes: f26c4a530bca ("ovn-controller: Fix chassis ovn-sbdb record init")
> Fixes: 4465f553ee70 ("ovn-controller: Update stale chassis entry at init")
> Fixes: 94a32fca2d2b ("chassis: Fix the way encaps are updated for a chassis record.")
> Fixes: dce1af31b550 ("chassis: Fix chassis_private record updates when the system-id changes.")
> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
> Acked-by: Mark Michelson <mmichels at redhat.com>
> (cherry picked from commit fc359bfe934290baeeeed1bc78a3f2a919421fa3)
Hi Dumitru,
I applied all your backport patches to the respective branches last
week and I missed updating here.
Thanks
Numan
> ---
> controller/chassis.c | 132 +++-------------------------------------
> controller/chassis.h | 2 -
> controller/ovn-controller.8.xml | 7 ++-
> controller/ovn-controller.c | 20 +++---
> tests/ovn-controller.at | 29 ++++-----
> 5 files changed, 40 insertions(+), 150 deletions(-)
>
> diff --git a/controller/chassis.c b/controller/chassis.c
> index 68e0510..c272c20 100644
> --- a/controller/chassis.c
> +++ b/controller/chassis.c
> @@ -37,44 +37,6 @@ VLOG_DEFINE_THIS_MODULE(chassis);
> #endif /* HOST_NAME_MAX */
>
> /*
> - * Structure to hold chassis specific state (currently just chassis-id)
> - * to avoid database lookups when changes happen while the controller is
> - * running.
> - */
> -struct chassis_info {
> - /* Last ID we initialized the Chassis SB record with. */
> - struct ds id;
> -
> - /* True if Chassis SB record is initialized, false otherwise. */
> - uint32_t id_inited : 1;
> -};
> -
> -static struct chassis_info chassis_state = {
> - .id = DS_EMPTY_INITIALIZER,
> - .id_inited = false,
> -};
> -
> -static void
> -chassis_info_set_id(struct chassis_info *info, const char *id)
> -{
> - ds_clear(&info->id);
> - ds_put_cstr(&info->id, id);
> - info->id_inited = true;
> -}
> -
> -static bool
> -chassis_info_id_inited(const struct chassis_info *info)
> -{
> - return info->id_inited;
> -}
> -
> -static const char *
> -chassis_info_id(const struct chassis_info *info)
> -{
> - return ds_cstr_ro(&info->id);
> -}
> -
> -/*
> * Structure for storing the chassis config parsed from the ovs table.
> */
> struct ovs_chassis_cfg {
> @@ -400,6 +362,9 @@ chassis_tunnels_changed(const struct sset *encap_type_set,
> bool changed = false;
>
> for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
> + if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
> + return true;
> + }
>
> if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
> changed = true;
> @@ -480,54 +445,11 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
> return encaps;
> }
>
> -/*
> - * Updates encaps for a given chassis. This can happen when the chassis
> - * name has changed. Also, the only thing we support updating is the
> - * chassis_name. For other changes the encaps will be recreated.
> - */
> -static void
> -chassis_update_encaps(const struct sbrec_chassis *chassis)
> -{
> - for (size_t i = 0; i < chassis->n_encaps; i++) {
> - sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
> - }
> -}
> -
> -/*
> - * Returns a pointer to a chassis record from 'chassis_table' that
> - * matches at least one tunnel config.
> - */
> -static const struct sbrec_chassis *
> -chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
> - const struct ovs_chassis_cfg *ovs_cfg,
> - const char *chassis_id)
> -{
> - const struct sbrec_chassis *chassis_rec;
> -
> - SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
> - for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
> - if (sset_contains(&ovs_cfg->encap_type_set,
> - chassis_rec->encaps[i]->type) &&
> - sset_contains(&ovs_cfg->encap_ip_set,
> - chassis_rec->encaps[i]->ip)) {
> - return chassis_rec;
> - }
> - if (strcmp(chassis_rec->name, chassis_id) == 0) {
> - return chassis_rec;
> - }
> - }
> - }
> -
> - return NULL;
> -}
> -
> /* If this is a chassis config update after we initialized the record once
> * then we should always be able to find it with the ID we saved in
> * chassis_state.
> * Otherwise (i.e., first time we create the record or if the system-id
> - * changed) then we check if there's a stale record from a previous
> - * controller run that didn't end gracefully and reuse it. If not then we
> - * create a new record.
> + * changed) we create a new record.
> *
> * Sets '*chassis_rec' to point to the local chassis record.
> * Returns true if this record was already in the database, false if it was
> @@ -536,33 +458,16 @@ chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
> static bool
> chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_chassis_by_name,
> - const struct sbrec_chassis_table *chassis_table,
> - const struct ovs_chassis_cfg *ovs_cfg,
> const char *chassis_id,
> const struct sbrec_chassis **chassis_rec)
> {
> - const struct sbrec_chassis *chassis = NULL;
> -
> - if (chassis_info_id_inited(&chassis_state)) {
> - chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
> - chassis_info_id(&chassis_state));
> - if (!chassis) {
> - VLOG_DBG("Could not find Chassis, will check if the id changed: "
> - "stored (%s) ovs (%s)",
> - chassis_info_id(&chassis_state), chassis_id);
> - }
> - }
> -
> - if (!chassis) {
> - chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
> - }
> + const struct sbrec_chassis *chassis =
> + chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
>
> - if (!chassis) {
> - /* Recreate the chassis record. */
> + if (!chassis && ovnsb_idl_txn) {
> + /* Create the chassis record. */
> VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
> - if (ovnsb_idl_txn) {
> - *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
> - }
> + *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
> return false;
> }
>
> @@ -628,7 +533,6 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
> &ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
> chassis_rec);
> if (!tunnels_changed) {
> - chassis_update_encaps(chassis_rec);
> return updated;
> }
>
> @@ -649,7 +553,6 @@ const struct sbrec_chassis *
> chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_chassis_by_name,
> const struct ovsrec_open_vswitch_table *ovs_table,
> - const struct sbrec_chassis_table *chassis_table,
> const char *chassis_id,
> const struct ovsrec_bridge *br_int,
> const struct sset *transport_zones)
> @@ -664,8 +567,7 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>
> const struct sbrec_chassis *chassis_rec = NULL;
> bool existed = chassis_get_record(ovnsb_idl_txn, sbrec_chassis_by_name,
> - chassis_table, &ovs_cfg, chassis_id,
> - &chassis_rec);
> + chassis_id, &chassis_rec);
>
> /* If we found (or created) a record, update it with the correct config
> * and store the current chassis_id for fast lookup in case it gets
> @@ -675,7 +577,6 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> bool updated = chassis_update(chassis_rec, ovnsb_idl_txn, &ovs_cfg,
> chassis_id, transport_zones);
>
> - chassis_info_set_id(&chassis_state, chassis_id);
> if (!existed || updated) {
> ovsdb_idl_txn_add_comment(ovnsb_idl_txn,
> "ovn-controller: %s chassis '%s'",
> @@ -750,16 +651,3 @@ chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> }
> return false;
> }
> -
> -/*
> - * Returns the last initialized chassis-id.
> - */
> -const char *
> -chassis_get_id(void)
> -{
> - if (chassis_info_id_inited(&chassis_state)) {
> - return chassis_info_id(&chassis_state);
> - }
> -
> - return NULL;
> -}
> diff --git a/controller/chassis.h b/controller/chassis.h
> index 178d295..de3971e 100644
> --- a/controller/chassis.h
> +++ b/controller/chassis.h
> @@ -34,7 +34,6 @@ const struct sbrec_chassis *chassis_run(
> struct ovsdb_idl_txn *ovnsb_idl_txn,
> struct ovsdb_idl_index *sbrec_chassis_by_name,
> const struct ovsrec_open_vswitch_table *,
> - const struct sbrec_chassis_table *,
> const char *chassis_id, const struct ovsrec_bridge *br_int,
> const struct sset *transport_zones);
> bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> @@ -42,7 +41,6 @@ bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> bool chassis_get_mac(const struct sbrec_chassis *chassis,
> const char *bridge_mapping,
> struct eth_addr *chassis_mac);
> -const char *chassis_get_id(void);
> const char * get_chassis_mac_mappings(const struct smap *ext_ids);
>
>
> diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
> index 92e0a6e..3e75733 100644
> --- a/controller/ovn-controller.8.xml
> +++ b/controller/ovn-controller.8.xml
> @@ -68,7 +68,12 @@
> </p>
> <dl>
> <dt><code>external_ids:system-id</code></dt>
> - <dd>The chassis name to use in the Chassis table.</dd>
> + <dd>The chassis name to use in the Chassis table. Changing the
> + <code>system-id</code> while <code>ovn-controller</code> is running is
> + not directly supported. Users have two options: either first
> + gracefully stop <code>ovn-controller</code> or manually delete the
> + stale <code>Chassis</code> record after changing the
> + <code>system-id</code>.</dd>
>
> <dt><code>external_ids:hostname</code></dt>
> <dd>The hostname to use in the Chassis table.</dd>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index ed1a99e..43b6dc7 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1543,7 +1543,7 @@ static void init_physical_ctx(struct engine_node *node,
> (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> engine_get_input("OVS_bridge", node));
> const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> - const char *chassis_id = chassis_get_id();
> + const char *chassis_id = get_ovs_chassis_id(ovs_table);
> const struct sbrec_chassis *chassis = NULL;
> struct ovsdb_idl_index *sbrec_chassis_by_name =
> engine_ovsdb_node_get_index(
> @@ -1619,7 +1619,11 @@ static void init_lflow_ctx(struct engine_node *node,
> (struct sbrec_multicast_group_table *)EN_OVSDB_GET(
> engine_get_input("SB_multicast_group", node));
>
> - const char *chassis_id = chassis_get_id();
> + struct ovsrec_open_vswitch_table *ovs_table =
> + (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> + engine_get_input("OVS_open_vswitch", node));
> +
> + const char *chassis_id = get_ovs_chassis_id(ovs_table);
> const struct sbrec_chassis *chassis = NULL;
> struct ovsdb_idl_index *sbrec_chassis_by_name =
> engine_ovsdb_node_get_index(
> @@ -1700,7 +1704,7 @@ en_flow_output_run(struct engine_node *node, void *data)
> (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> engine_get_input("OVS_bridge", node));
> const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> - const char *chassis_id = chassis_get_id();
> + const char *chassis_id = get_ovs_chassis_id(ovs_table);
>
> struct ovsdb_idl_index *sbrec_chassis_by_name =
> engine_ovsdb_node_get_index(
> @@ -1858,7 +1862,7 @@ _flow_output_resource_ref_handler(struct engine_node *node, void *data,
> (struct ovsrec_bridge_table *)EN_OVSDB_GET(
> engine_get_input("OVS_bridge", node));
> const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> - const char *chassis_id = chassis_get_id();
> + const char *chassis_id = get_ovs_chassis_id(ovs_table);
>
> struct ovsdb_idl_index *sbrec_chassis_by_name =
> engine_ovsdb_node_get_index(
> @@ -2367,16 +2371,14 @@ main(int argc, char *argv[])
> ovsrec_bridge_table_get(ovs_idl_loop.idl);
> const struct ovsrec_open_vswitch_table *ovs_table =
> ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
> - const struct sbrec_chassis_table *chassis_table =
> - sbrec_chassis_table_get(ovnsb_idl_loop.idl);
> const struct ovsrec_bridge *br_int =
> process_br_int(ovs_idl_txn, bridge_table, ovs_table);
> const char *chassis_id = get_ovs_chassis_id(ovs_table);
> const struct sbrec_chassis *chassis = NULL;
> if (chassis_id) {
> chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
> - ovs_table, chassis_table, chassis_id,
> - br_int, &transport_zones);
> + ovs_table, chassis_id, br_int,
> + &transport_zones);
> }
>
> if (br_int) {
> @@ -2603,7 +2605,7 @@ loop_done:
>
> const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
> ovs_table);
> - const char *chassis_id = chassis_get_id();
> + const char *chassis_id = get_ovs_chassis_id(ovs_table);
> const struct sbrec_chassis *chassis
> = (chassis_id
> ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index d440359..7fcbd30 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -187,30 +187,27 @@ OVS_WAIT_UNTIL([
> test "${expected_iface_types}" = "${chassis_iface_types}"
> ])
>
> -# Change the value of external_ids:system-id and make sure it's mirrored
> -# in the Chassis record in the OVN_Southbound database.
> +# Change the value of external_ids:system-id.
> +# This requires operator intervention and removal of the stale chassis record.
> +# Until that happens ovn-controller fails to create the records due to
> +# constraint violation on the Encap table.
> sysid=${sysid}-foo
> ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
> +
> OVS_WAIT_UNTIL([
> - chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
> - test "${sysid}" = "${chassis_id}"
> + grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have identical values (geneve and \\"192.168.0.1\\") for index on columns \\"type\\" and \\"ip\\".' hv/ovn-controller.log
> ])
>
> -# Simulate system-id changing while ovn-controller is disconnected from the
> -# SB.
> -valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
> -invalid_remote=tcp:0.0.0.0:4242
> -ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
> -expected_state="not connected"
> +# Destroy the stale entry manually and ovn-controller should now be able
> +# to create new ones.
> +ovn-sbctl destroy chassis .
> OVS_WAIT_UNTIL([
> - test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)"
> + test $(ovn-sbctl --columns _uuid find chassis name=${sysid} | wc -l) -eq 1
> ])
> -sysid=${sysid}-bar
> -ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
> -ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}
> +
> +# Only one Chassis record should exist.
> OVS_WAIT_UNTIL([
> - chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
> - test "${sysid}" = "${chassis_id}"
> + test $(ovn-sbctl --columns _uuid list chassis | wc -l) -eq 1
> ])
>
> # Gracefully terminate daemons
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
More information about the dev
mailing list