[ovs-dev] [PATCH v2] ovsdb-idl: Force reconnect when missing updates encountered.

Mark Michelson mmichels at redhat.com
Fri Apr 3 12:01:18 UTC 2020


Acked-by: Mark Michelson <mmichels at redhat.com>

On 4/3/20 3:37 AM, Dumitru Ceara wrote:
> When processing update2 messages, if the IDL detects that previous
> updates were missed, clear db->last_id and trigger a reconnect such that
> the IDL refreshes its complete view of the database.
> 
> Such scenarios can happen for example when bugs in
> ovsdb-server/ovsdb-idl are encountered. One such situation is reported
> in https://bugzilla.redhat.com/show_bug.cgi?id=1808580.
> 
> This commit doesn't fix the root cause of the bug above but adds the
> recovery mechanism to avoid staying in an inconsistent state for ever.
> 
> CC: Andy Zhou <azhou at ovn.org>
> Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
> 
> ---
> V2:
> - Address Mark's comments:
>    - change the error log message to reflect the action taken.
>    - use ovsdb_error() instead of ovsdb_syntax_error().
> ---
>   lib/ovsdb-idl.c | 36 ++++++++++++++++++++++++++++++++----
>   1 file changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index 1535ad7..dfc5a47 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -324,7 +324,8 @@ static bool ovsdb_idl_process_update(struct ovsdb_idl_table *,
>   static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
>                                         const struct uuid *,
>                                         const char *operation,
> -                                      const struct json *row);
> +                                      const struct json *row,
> +                                      bool *inconsistent);
>   static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct json *);
>   static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
>   static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct json *);
> @@ -2318,10 +2319,25 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
>                       row = shash_find_data(json_object(row_update), operation);
>   
>                       if (row)  {
> +                        bool inconsistent = false;
> +
>                           if (ovsdb_idl_process_update2(table, &uuid, operation,
> -                                                      row)) {
> +                                                      row, &inconsistent)) {
>                               db->change_seqno++;
>                           }
> +
> +                        /* If the db is in an inconsistent state, clear the
> +                         * db->last_id and retry to retrieve the complete DB.
> +                         */
> +                        if (inconsistent) {
> +                            memset(&db->last_id, 0, sizeof db->last_id);
> +                            ovsdb_idl_retry(db->idl);
> +                            return ovsdb_error(NULL,
> +                                               "<row_update2> received for "
> +                                               "inconsistent IDL: "
> +                                               "reconnecting IDL with "
> +                                               "fast resync disabled");
> +                        }
>                           break;
>                       }
>                   }
> @@ -2445,16 +2461,26 @@ ovsdb_idl_process_update(struct ovsdb_idl_table *table,
>   }
>   
>   /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
> - * otherwise. */
> + * otherwise.
> + *
> + * NOTE: When processing the "modify" updates, the IDL can determine that
> + * previous updates were missed (e.g., due to bugs) and that rows that don't
> + * exist locally should be updated. This indicates that the
> + * IDL is in an inconsistent state and, unlike in ovsdb_idl_process_update(),
> + * the missing rows cannot be inserted. If this is the case, 'inconsistent'
> + * is set to true to indicate the catastrophic failure.
> + */
>   static bool
>   ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>                             const struct uuid *uuid,
>                             const char *operation,
> -                          const struct json *json_row)
> +                          const struct json *json_row,
> +                          bool *inconsistent)
>   {
>       struct ovsdb_idl_row *row;
>   
>       row = ovsdb_idl_get_row(table, uuid);
> +    *inconsistent = false;
>       if (!strcmp(operation, "delete")) {
>           /* Delete row. */
>           if (row && !ovsdb_idl_row_is_orphan(row)) {
> @@ -2486,11 +2512,13 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
>                   VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
>                                "referenced row "UUID_FMT" in table %s",
>                                UUID_ARGS(uuid), table->class_->name);
> +                *inconsistent = true;
>                   return false;
>               }
>           } else {
>               VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" "
>                            "in table %s", UUID_ARGS(uuid), table->class_->name);
> +            *inconsistent = true;
>               return false;
>           }
>       } else {
> 



More information about the dev mailing list