[ovs-dev] [PATCH] ovsdb: Don't let transaction history grow larger than the database.

Dumitru Ceara dceara at redhat.com
Tue Nov 2 16:34:22 UTC 2021


On 10/14/21 4:58 PM, Ilya Maximets wrote:
> If user frequently changes a lot of rows in a database, transaction
> history could grow way larger than the database itself.  This wastes
> a lot of memory and also makes monitor_cond_since slower than
> usual monotor_cond if the transaction id is old enough, because
> re-construction of the changes from a history is slower than just
> creation of initial database snapshot.  This is also the case if
> user deleted a lot of data, so transaction history still holds all of
> it while the database itself doesn't.
> 
> In case of current lb-per-service model in ovn-kubernetes, each
> load-balancer is added to every logical switch/router.  Such a
> transaction touches more than a half of a OVN_Northbound database.

Hi Ilya

This is also being fixed in ovn-kubernetes and load balancer groups will
be used instead; this will avoid generating such inefficient
transactions.  Nevertheless, limiting transaction history size makes
sense in general.

I have a tiny comment below; with it addressed:

Acked-by: Dumitru Ceara <dceara at redhat.com>

Thanks,
Dumitru

> And each of these transactions is added to the transaction history.
> Since transaction history depth is 100, in worst case scenario,
> it will hold 100 copies of a database increasing memory consumption
> dramatically.  In tests with 3000 LBs and 120 LSs, memory goes up
> to 3 GB, while holding at 30 MB if transaction history disabled in
> the code.
> 
> Fixing that by keeping count of the number of ovsdb_atom's in the
> database and not allowing the total number of atoms in transaction
> history to grow larger than this value.  Counting atoms is fairly
> cheap because we don't need to iterate over them, so it doesn't have
> significant performance impact.  It would be ideal to measure the
> size of individual atoms, but that will hit the performance.
> Counting cells instead of atoms is not sufficient, because OVN
> users are adding hundreds or thousands of atoms to a single cell,
> so they are largely different in size.
> 
> Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
> ---
>  ovsdb/ovsdb.c       |  5 ++++
>  ovsdb/ovsdb.h       |  3 +++
>  ovsdb/transaction.c | 56 +++++++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 62 insertions(+), 2 deletions(-)
> 
> diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
> index 126d16a2f..e6d866182 100644
> --- a/ovsdb/ovsdb.c
> +++ b/ovsdb/ovsdb.c
> @@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage)
>      ovs_list_init(&db->triggers);
>      db->run_triggers_now = db->run_triggers = false;
>  
> +    db->n_atoms = 0;
> +
>      db->is_relay = false;
>      ovs_list_init(&db->txn_forward_new);
>      hmap_init(&db->txn_forward_sent);
> @@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage)
>      }
>  
>      simap_increase(usage, "cells", cells);
> +    simap_increase(usage, "atoms", db->n_atoms);
> +    simap_increase(usage, "txn-history", db->n_txn_history);
> +    simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms);
>  
>      if (db->storage) {
>          ovsdb_storage_get_memory_usage(db->storage, usage);
> diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
> index 4a7bd0f0e..ec2d235ec 100644
> --- a/ovsdb/ovsdb.h
> +++ b/ovsdb/ovsdb.h
> @@ -90,8 +90,11 @@ struct ovsdb {
>      /* History trasanctions for incremental monitor transfer. */
>      bool need_txn_history;     /* Need to maintain history of transactions. */
>      unsigned int n_txn_history; /* Current number of history transactions. */
> +    unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
>      struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */
>  
> +    size_t n_atoms;  /* Total number of ovsdb atoms in the database. */
> +
>      /* Relay mode. */
>      bool is_relay;  /* True, if database is in relay mode. */
>      /* List that holds transactions waiting to be forwarded to the server. */
> diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
> index dcccc61c0..a7ca09b58 100644
> --- a/ovsdb/transaction.c
> +++ b/ovsdb/transaction.c
> @@ -41,6 +41,9 @@ struct ovsdb_txn {
>      struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */
>      struct ds comment;
>      struct uuid txnid; /* For clustered mode only. It is the eid. */
> +    size_t n_atoms;    /* Number of atoms in all transaction rows. */
> +    ssize_t n_atoms_diff;  /* Difference between number of added and
> +                            * removed atoms. */
>  };
>  
>  /* A table modified by a transaction. */
> @@ -842,6 +845,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED,
>      return NULL;
>  }
>  
> +static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
> +count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
> +{
> +    struct ovsdb_table *table = txn_row->table;
> +    ssize_t n_atoms_old = 0, n_atoms_new = 0;
> +    struct shash_node *node;
> +
> +    SHASH_FOR_EACH (node, &table->schema->columns) {
> +        const struct ovsdb_column *column = node->data;
> +        const struct ovsdb_type *type = &column->type;
> +        unsigned int idx = column->index;
> +
> +        if (txn_row->old) {
> +            n_atoms_old += txn_row->old->fields[idx].n;
> +            if (type->value.type != OVSDB_TYPE_VOID) {
> +                n_atoms_old += txn_row->old->fields[idx].n;
> +            }
> +        }
> +        if (txn_row->new) {
> +            n_atoms_new += txn_row->new->fields[idx].n;
> +            if (type->value.type != OVSDB_TYPE_VOID) {
> +                n_atoms_new += txn_row->new->fields[idx].n;
> +            }
> +        }
> +    }
> +
> +    txn->n_atoms += n_atoms_old + n_atoms_new;
> +    txn->n_atoms_diff += n_atoms_new - n_atoms_old;
> +    return NULL;
> +}
> +
>  static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
>  update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row)
>  {
> @@ -910,6 +944,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn)
>          return error;
>      }
>  
> +    /* Count atoms. */
> +    error = for_each_txn_row(txn, count_atoms);
> +    if (error) {
> +        return error;

This should never happen, maybe it's better to change this to something
similar to what we do for update_version:

return OVSDB_WRAP_BUG("can't happen", error);



More information about the dev mailing list