[ovs-dev] [PATCH ovn branch-20.06] chassis: Do not try to guess system-id changes.

Numan Siddique numans at ovn.org
Mon Dec 21 12:09:38 UTC 2020


On Mon, Dec 14, 2020 at 6:31 PM Dumitru Ceara <dceara at redhat.com> wrote:
>
> When the OVS system-id changes ovn-controller needs external (CMS) help
> in order to update its own Chassis/Chassis_private records, i.e., the
> CMS has to ensure that either ovn-controller is stopped (so that
> ovn-controller cleans up its old Chassis/Chassis_private records) or
> that after the system-id is changed, the stale Chassis/Chassis_private
> records are destroyed externally.
>
> This patch reverts the previous efforts to have ovn-controller reuse
> stale Chassis records and documents how the system-id change operation
> needs to be executed.  The main problem with reusing stale records is
> that there's no safe way to make it work when RBAC is enabled.
>
> Suggestedy-by: Han Zhou <hzhou at ovn.org>
> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374608.html
> Fixes: f26c4a530bca ("ovn-controller: Fix chassis ovn-sbdb record init")
> Fixes: 4465f553ee70 ("ovn-controller: Update stale chassis entry at init")
> Fixes: 94a32fca2d2b ("chassis: Fix the way encaps are updated for a chassis record.")
> Fixes: dce1af31b550 ("chassis: Fix chassis_private record updates when the system-id changes.")
> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
> Acked-by: Mark Michelson <mmichels at redhat.com>
> (cherry picked from commit fc359bfe934290baeeeed1bc78a3f2a919421fa3)

Hi Dumitru,

I applied all your backport patches to the respective branches last
week and I missed updating here.

Thanks
Numan

> ---
>  controller/chassis.c            | 132 +++-------------------------------------
>  controller/chassis.h            |   2 -
>  controller/ovn-controller.8.xml |   7 ++-
>  controller/ovn-controller.c     |  20 +++---
>  tests/ovn-controller.at         |  29 ++++-----
>  5 files changed, 40 insertions(+), 150 deletions(-)
>
> diff --git a/controller/chassis.c b/controller/chassis.c
> index 68e0510..c272c20 100644
> --- a/controller/chassis.c
> +++ b/controller/chassis.c
> @@ -37,44 +37,6 @@ VLOG_DEFINE_THIS_MODULE(chassis);
>  #endif /* HOST_NAME_MAX */
>
>  /*
> - * Structure to hold chassis specific state (currently just chassis-id)
> - * to avoid database lookups when changes happen while the controller is
> - * running.
> - */
> -struct chassis_info {
> -    /* Last ID we initialized the Chassis SB record with. */
> -    struct ds id;
> -
> -    /* True if Chassis SB record is initialized, false otherwise. */
> -    uint32_t id_inited : 1;
> -};
> -
> -static struct chassis_info chassis_state = {
> -    .id = DS_EMPTY_INITIALIZER,
> -    .id_inited = false,
> -};
> -
> -static void
> -chassis_info_set_id(struct chassis_info *info, const char *id)
> -{
> -    ds_clear(&info->id);
> -    ds_put_cstr(&info->id, id);
> -    info->id_inited = true;
> -}
> -
> -static bool
> -chassis_info_id_inited(const struct chassis_info *info)
> -{
> -    return info->id_inited;
> -}
> -
> -static const char *
> -chassis_info_id(const struct chassis_info *info)
> -{
> -    return ds_cstr_ro(&info->id);
> -}
> -
> -/*
>   * Structure for storing the chassis config parsed from the ovs table.
>   */
>  struct ovs_chassis_cfg {
> @@ -400,6 +362,9 @@ chassis_tunnels_changed(const struct sset *encap_type_set,
>      bool  changed = false;
>
>      for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
> +        if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
> +            return true;
> +        }
>
>          if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
>              changed = true;
> @@ -480,54 +445,11 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
>      return encaps;
>  }
>
> -/*
> - * Updates encaps for a given chassis. This can happen when the chassis
> - * name has changed. Also, the only thing we support updating is the
> - * chassis_name. For other changes the encaps will be recreated.
> - */
> -static void
> -chassis_update_encaps(const struct sbrec_chassis *chassis)
> -{
> -    for (size_t i = 0; i < chassis->n_encaps; i++) {
> -        sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
> -    }
> -}
> -
> -/*
> - * Returns a pointer to a chassis record from 'chassis_table' that
> - * matches at least one tunnel config.
> - */
> -static const struct sbrec_chassis *
> -chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
> -                         const struct ovs_chassis_cfg *ovs_cfg,
> -                         const char *chassis_id)
> -{
> -    const struct sbrec_chassis *chassis_rec;
> -
> -    SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
> -        for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
> -            if (sset_contains(&ovs_cfg->encap_type_set,
> -                              chassis_rec->encaps[i]->type) &&
> -                    sset_contains(&ovs_cfg->encap_ip_set,
> -                                  chassis_rec->encaps[i]->ip)) {
> -                return chassis_rec;
> -            }
> -            if (strcmp(chassis_rec->name, chassis_id) == 0) {
> -                return chassis_rec;
> -            }
> -        }
> -    }
> -
> -    return NULL;
> -}
> -
>  /* If this is a chassis config update after we initialized the record once
>   * then we should always be able to find it with the ID we saved in
>   * chassis_state.
>   * Otherwise (i.e., first time we create the record or if the system-id
> - * changed) then we check if there's a stale record from a previous
> - * controller run that didn't end gracefully and reuse it. If not then we
> - * create a new record.
> + * changed) we create a new record.
>   *
>   * Sets '*chassis_rec' to point to the local chassis record.
>   * Returns true if this record was already in the database, false if it was
> @@ -536,33 +458,16 @@ chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
>  static bool
>  chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                     struct ovsdb_idl_index *sbrec_chassis_by_name,
> -                   const struct sbrec_chassis_table *chassis_table,
> -                   const struct ovs_chassis_cfg *ovs_cfg,
>                     const char *chassis_id,
>                     const struct sbrec_chassis **chassis_rec)
>  {
> -    const struct sbrec_chassis *chassis = NULL;
> -
> -    if (chassis_info_id_inited(&chassis_state)) {
> -        chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
> -                                         chassis_info_id(&chassis_state));
> -        if (!chassis) {
> -            VLOG_DBG("Could not find Chassis, will check if the id changed: "
> -                     "stored (%s) ovs (%s)",
> -                     chassis_info_id(&chassis_state), chassis_id);
> -        }
> -    }
> -
> -    if (!chassis) {
> -        chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
> -    }
> +    const struct sbrec_chassis *chassis =
> +        chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
>
> -    if (!chassis) {
> -        /* Recreate the chassis record. */
> +    if (!chassis && ovnsb_idl_txn) {
> +        /* Create the chassis record. */
>          VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
> -        if (ovnsb_idl_txn) {
> -            *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
> -        }
> +        *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
>          return false;
>      }
>
> @@ -628,7 +533,6 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
>                                  &ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
>                                  chassis_rec);
>      if (!tunnels_changed) {
> -        chassis_update_encaps(chassis_rec);
>          return updated;
>      }
>
> @@ -649,7 +553,6 @@ const struct sbrec_chassis *
>  chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>              struct ovsdb_idl_index *sbrec_chassis_by_name,
>              const struct ovsrec_open_vswitch_table *ovs_table,
> -            const struct sbrec_chassis_table *chassis_table,
>              const char *chassis_id,
>              const struct ovsrec_bridge *br_int,
>              const struct sset *transport_zones)
> @@ -664,8 +567,7 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>
>      const struct sbrec_chassis *chassis_rec = NULL;
>      bool existed = chassis_get_record(ovnsb_idl_txn, sbrec_chassis_by_name,
> -                                      chassis_table, &ovs_cfg, chassis_id,
> -                                      &chassis_rec);
> +                                      chassis_id, &chassis_rec);
>
>      /* If we found (or created) a record, update it with the correct config
>       * and store the current chassis_id for fast lookup in case it gets
> @@ -675,7 +577,6 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>          bool updated = chassis_update(chassis_rec, ovnsb_idl_txn, &ovs_cfg,
>                                        chassis_id, transport_zones);
>
> -        chassis_info_set_id(&chassis_state, chassis_id);
>          if (!existed || updated) {
>              ovsdb_idl_txn_add_comment(ovnsb_idl_txn,
>                                        "ovn-controller: %s chassis '%s'",
> @@ -750,16 +651,3 @@ chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>      }
>      return false;
>  }
> -
> -/*
> - * Returns the last initialized chassis-id.
> - */
> -const char *
> -chassis_get_id(void)
> -{
> -    if (chassis_info_id_inited(&chassis_state)) {
> -        return chassis_info_id(&chassis_state);
> -    }
> -
> -    return NULL;
> -}
> diff --git a/controller/chassis.h b/controller/chassis.h
> index 178d295..de3971e 100644
> --- a/controller/chassis.h
> +++ b/controller/chassis.h
> @@ -34,7 +34,6 @@ const struct sbrec_chassis *chassis_run(
>      struct ovsdb_idl_txn *ovnsb_idl_txn,
>      struct ovsdb_idl_index *sbrec_chassis_by_name,
>      const struct ovsrec_open_vswitch_table *,
> -    const struct sbrec_chassis_table *,
>      const char *chassis_id, const struct ovsrec_bridge *br_int,
>      const struct sset *transport_zones);
>  bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> @@ -42,7 +41,6 @@ bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
>  bool chassis_get_mac(const struct sbrec_chassis *chassis,
>                       const char *bridge_mapping,
>                       struct eth_addr *chassis_mac);
> -const char *chassis_get_id(void);
>  const char * get_chassis_mac_mappings(const struct smap *ext_ids);
>
>
> diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
> index 92e0a6e..3e75733 100644
> --- a/controller/ovn-controller.8.xml
> +++ b/controller/ovn-controller.8.xml
> @@ -68,7 +68,12 @@
>      </p>
>      <dl>
>        <dt><code>external_ids:system-id</code></dt>
> -      <dd>The chassis name to use in the Chassis table.</dd>
> +      <dd>The chassis name to use in the Chassis table.  Changing the
> +      <code>system-id</code> while <code>ovn-controller</code> is running is
> +      not directly supported.  Users have two options: either first
> +      gracefully stop <code>ovn-controller</code> or manually delete the
> +      stale <code>Chassis</code> record after changing the
> +      <code>system-id</code>.</dd>
>
>        <dt><code>external_ids:hostname</code></dt>
>        <dd>The hostname to use in the Chassis table.</dd>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index ed1a99e..43b6dc7 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1543,7 +1543,7 @@ static void init_physical_ctx(struct engine_node *node,
>          (struct ovsrec_bridge_table *)EN_OVSDB_GET(
>              engine_get_input("OVS_bridge", node));
>      const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> -    const char *chassis_id = chassis_get_id();
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
>      const struct sbrec_chassis *chassis = NULL;
>      struct ovsdb_idl_index *sbrec_chassis_by_name =
>          engine_ovsdb_node_get_index(
> @@ -1619,7 +1619,11 @@ static void init_lflow_ctx(struct engine_node *node,
>          (struct sbrec_multicast_group_table *)EN_OVSDB_GET(
>              engine_get_input("SB_multicast_group", node));
>
> -    const char *chassis_id = chassis_get_id();
> +    struct ovsrec_open_vswitch_table *ovs_table =
> +        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> +            engine_get_input("OVS_open_vswitch", node));
> +
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
>      const struct sbrec_chassis *chassis = NULL;
>      struct ovsdb_idl_index *sbrec_chassis_by_name =
>          engine_ovsdb_node_get_index(
> @@ -1700,7 +1704,7 @@ en_flow_output_run(struct engine_node *node, void *data)
>          (struct ovsrec_bridge_table *)EN_OVSDB_GET(
>              engine_get_input("OVS_bridge", node));
>      const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> -    const char *chassis_id = chassis_get_id();
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
>
>      struct ovsdb_idl_index *sbrec_chassis_by_name =
>          engine_ovsdb_node_get_index(
> @@ -1858,7 +1862,7 @@ _flow_output_resource_ref_handler(struct engine_node *node, void *data,
>          (struct ovsrec_bridge_table *)EN_OVSDB_GET(
>              engine_get_input("OVS_bridge", node));
>      const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
> -    const char *chassis_id = chassis_get_id();
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
>
>      struct ovsdb_idl_index *sbrec_chassis_by_name =
>          engine_ovsdb_node_get_index(
> @@ -2367,16 +2371,14 @@ main(int argc, char *argv[])
>                  ovsrec_bridge_table_get(ovs_idl_loop.idl);
>              const struct ovsrec_open_vswitch_table *ovs_table =
>                  ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
> -            const struct sbrec_chassis_table *chassis_table =
> -                sbrec_chassis_table_get(ovnsb_idl_loop.idl);
>              const struct ovsrec_bridge *br_int =
>                  process_br_int(ovs_idl_txn, bridge_table, ovs_table);
>              const char *chassis_id = get_ovs_chassis_id(ovs_table);
>              const struct sbrec_chassis *chassis = NULL;
>              if (chassis_id) {
>                  chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
> -                                      ovs_table, chassis_table, chassis_id,
> -                                      br_int, &transport_zones);
> +                                      ovs_table, chassis_id, br_int,
> +                                      &transport_zones);
>              }
>
>              if (br_int) {
> @@ -2603,7 +2605,7 @@ loop_done:
>
>              const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
>                                                              ovs_table);
> -            const char *chassis_id = chassis_get_id();
> +            const char *chassis_id = get_ovs_chassis_id(ovs_table);
>              const struct sbrec_chassis *chassis
>                  = (chassis_id
>                     ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index d440359..7fcbd30 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -187,30 +187,27 @@ OVS_WAIT_UNTIL([
>      test "${expected_iface_types}" = "${chassis_iface_types}"
>  ])
>
> -# Change the value of external_ids:system-id and make sure it's mirrored
> -# in the Chassis record in the OVN_Southbound database.
> +# Change the value of external_ids:system-id.
> +# This requires operator intervention and removal of the stale chassis record.
> +# Until that happens ovn-controller fails to create the records due to
> +# constraint violation on the Encap table.
>  sysid=${sysid}-foo
>  ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
> +
>  OVS_WAIT_UNTIL([
> -    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
> -    test "${sysid}" = "${chassis_id}"
> +    grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have identical values (geneve and \\"192.168.0.1\\") for index on columns \\"type\\" and \\"ip\\".' hv/ovn-controller.log
>  ])
>
> -# Simulate system-id changing while ovn-controller is disconnected from the
> -# SB.
> -valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
> -invalid_remote=tcp:0.0.0.0:4242
> -ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
> -expected_state="not connected"
> +# Destroy the stale entry manually and ovn-controller should now be able
> +# to create new ones.
> +ovn-sbctl destroy chassis .
>  OVS_WAIT_UNTIL([
> -    test "${expected_state}" = "$(ovn-appctl -t ovn-controller connection-status)"
> +    test $(ovn-sbctl --columns _uuid find chassis name=${sysid} | wc -l) -eq 1
>  ])
> -sysid=${sysid}-bar
> -ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
> -ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}
> +
> +# Only one Chassis record should exist.
>  OVS_WAIT_UNTIL([
> -    chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
> -    test "${sysid}" = "${chassis_id}"
> +    test $(ovn-sbctl --columns _uuid list chassis | wc -l) -eq 1
>  ])
>
>  # Gracefully terminate daemons
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list