[ovs-dev] [PATCH ovn] ovn-controller: Propagate nb-cfg-ts to local OVSDB.

Mark Michelson mmichels at redhat.com
Wed Jun 16 20:13:26 UTC 2021


Hi Dumitru, I have a few comments down below:

On 6/16/21 7:03 AM, Dumitru Ceara wrote:
> Also store the timestamp when ovn-controller started up.  This helps
> implementing alerts on the CMS side to detect whether ovn-controller is
> still alive and functioning well.
> 
> Reported-at: https://bugzilla.redhat.com/1924751
> Reported-by: Casey Callendrello <cdc at redhat.com>
> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
> ---
>   controller/ovn-controller.8.xml | 25 +++++++++++++++++++++++++
>   controller/ovn-controller.c     | 29 +++++++++++++++++++++++------
>   tests/ovn-controller.at         | 11 +++++++++++
>   3 files changed, 59 insertions(+), 6 deletions(-)
> 
> diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
> index 8886df568..6610e08cf 100644
> --- a/controller/ovn-controller.8.xml
> +++ b/controller/ovn-controller.8.xml
> @@ -418,6 +418,18 @@
>           </p>
>         </dd>
>   
> +      <dt>
> +        <code>external-ids:ovn-startup-ts</code> in the <code>Bridge</code>
> +        table
> +      </dt>
> +
> +      <dd>
> +        <p>
> +          This key represents the timestamp at which
> +          <code>ovn-controller</code> process was started.
> +        </p>
> +      </dd>
> +

When documenting timestamps, it's typically a good idea to specify the 
units. When I hear "timestamp", my automatic assumption is UNIX 
timestamp, measured in seconds. But the code below is actually setting a 
millisecond-precision timestamp.

>         <dt>
>           <code>external-ids:ovn-nb-cfg</code> in the <code>Bridge</code> table
>         </dt>
> @@ -429,6 +441,19 @@
>             flows have been successfully installed in OVS.
>           </p>
>         </dd>
> +
> +      <dt>
> +        <code>external-ids:ovn-nb-cfg_ts</code> in the <code>Bridge</code>

This name doesn't match the code. The code sets external-ids:ovn-nb-cfg-ts.

> +        table
> +      </dt>
> +
> +      <dd>
> +        <p>
> +          This key represents the timestamp of the last known
> +          <code>OVN_Southbound.SB_Global.nb_cfg</code> value for which all
> +          flows have been successfully installed in OVS.
> +        </p>
> +      </dd>
>       </dl>
>   
>       <h1>OVN Southbound Database Usage</h1>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index addb08755..2f8ceff9f 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -94,6 +94,8 @@ static unixctl_cb_func debug_delay_nb_cfg_report;
>   #define CONTROLLER_LOOP_STOPWATCH_NAME "ovn-controller-flow-generation"
>   
>   #define OVS_NB_CFG_NAME "ovn-nb-cfg"
> +#define OVS_NB_CFG_TS_NAME "ovn-nb-cfg-ts"
> +#define OVS_STARTUP_TS_NAME "ovn-startup-ts"
>   
>   static char *parse_options(int argc, char *argv[]);
>   OVS_NO_RETURN static void usage(void);
> @@ -788,19 +790,30 @@ static void
>   store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn,
>                const struct sbrec_chassis_private *chassis,
>                const struct ovsrec_bridge *br_int,
> -             unsigned int delay_nb_cfg_report)
> +             unsigned int delay_nb_cfg_report, int64_t startup_ts)
>   {
>       struct ofctrl_acked_seqnos *acked_nb_cfg_seqnos =
>           ofctrl_acked_seqnos_get(ofctrl_seq_type_nb_cfg);
>       uint64_t cur_cfg = acked_nb_cfg_seqnos->last_acked;
>   
> +    if (ovs_txn && br_int
> +            && startup_ts != smap_get_ullong(&br_int->external_ids,
> +                                             OVS_STARTUP_TS_NAME, 0)) {
> +        char *startup_ts_str = xasprintf("%"PRId64, startup_ts);
> +        ovsrec_bridge_update_external_ids_setkey(br_int, OVS_STARTUP_TS_NAME,
> +                                                 startup_ts_str);
> +        free(startup_ts_str);
> +    }

Micro-optimization suggestion: Declare a static bool in store_nb_cfg() 
that indicates when the startup_ts has been written to the OVS DB. Then 
check it at the beginning of this if statement to avoid the smap lookup 
of br_int->external_ids when we know it won't be necessary any more.

> +
>       if (!cur_cfg) {
>           goto done;
>       }
>   
> +    long long ts_now = time_wall_msec();
> +
>       if (sb_txn && chassis && cur_cfg != chassis->nb_cfg) {
>           sbrec_chassis_private_set_nb_cfg(chassis, cur_cfg);
> -        sbrec_chassis_private_set_nb_cfg_timestamp(chassis, time_wall_msec());
> +        sbrec_chassis_private_set_nb_cfg_timestamp(chassis, ts_now);
>   
>           if (delay_nb_cfg_report) {
>               VLOG_INFO("Sleep for %u sec", delay_nb_cfg_report);
> @@ -808,12 +821,15 @@ store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn,
>           }
>       }
>   
> -    if (ovs_txn && br_int &&
> -            cur_cfg != smap_get_ullong(&br_int->external_ids,
> -                                       OVS_NB_CFG_NAME, 0)) {
> +    if (ovs_txn && br_int && cur_cfg != smap_get_ullong(&br_int->external_ids,
> +                                                        OVS_NB_CFG_NAME, 0)) {
> +        char *cur_cfg_ts_str = xasprintf("%lld", ts_now);
>           char *cur_cfg_str = xasprintf("%"PRId64, cur_cfg);
>           ovsrec_bridge_update_external_ids_setkey(br_int, OVS_NB_CFG_NAME,
>                                                    cur_cfg_str);
> +        ovsrec_bridge_update_external_ids_setkey(br_int, OVS_NB_CFG_TS_NAME,
> +                                                 cur_cfg_ts_str);
> +        free(cur_cfg_ts_str);
>           free(cur_cfg_str);
>       }
>   
> @@ -2987,6 +3003,7 @@ main(int argc, char *argv[])
>       /* Main loop. */
>       exiting = false;
>       restart = false;
> +    int64_t startup_ts = time_wall_msec();
>       bool sb_monitor_all = false;
>       while (!exiting) {
>           memory_run();
> @@ -3234,7 +3251,7 @@ main(int argc, char *argv[])
>               }
>   
>               store_nb_cfg(ovnsb_idl_txn, ovs_idl_txn, chassis_private,
> -                         br_int, delay_nb_cfg_report);
> +                         br_int, delay_nb_cfg_report, startup_ts);
>   
>               if (pending_pkt.conn) {
>                   struct ed_type_addr_sets *as_data =
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index 72c07b3fa..e0440843b 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -447,6 +447,17 @@ check ovn-nbctl --wait=hv sync
>   as hv1
>   OVS_WAIT_UNTIL([ovs-vsctl get Bridge br-int external_ids:ovn-nb-cfg], [0], [1])
>   
> +nb_cfg_ts=$(fetch_column Chassis_Private nb_cfg_timestamp name=hv1)
> +as hv1
> +AT_CHECK_UNQUOTED([ovs-vsctl get Bridge br-int external_ids:ovn-nb-cfg-ts], [0], [dnl
> +"${nb_cfg_ts}"
> +])
> +
> +# This will fail if the timestamp overflows but that's a risk that should be
> +# fine to take.

This could also fail because CLOCK_REALTIME can potentially move 
backwards in odd circumstances. I'm not sure of a way to work around 
this other than to use CLOCK_MONOTONIC. But that doesn't result in 
user-friendly timestamps.

So, I guess just leave as-is, and if we see the failure rate on this 
test get high, we can rethink it.

> +startup_ts=$(as hv1 ovs-vsctl get Bridge br-int external_ids:ovn-startup-ts | xargs)
> +AT_CHECK([test "${nb_cfg_ts}" -ge "${startup_ts}"])
> +
>   OVN_CLEANUP([hv1])
>   AT_CLEANUP
>   ])
> 



More information about the dev mailing list