[ovs-dev] [PATCH] ovsdb: Don't let transaction history grow larger than the database.

Ilya Maximets i.maximets at ovn.org
Thu Oct 14 14:58:22 UTC 2021


If user frequently changes a lot of rows in a database, transaction
history could grow way larger than the database itself.  This wastes
a lot of memory and also makes monitor_cond_since slower than
usual monotor_cond if the transaction id is old enough, because
re-construction of the changes from a history is slower than just
creation of initial database snapshot.  This is also the case if
user deleted a lot of data, so transaction history still holds all of
it while the database itself doesn't.

In case of current lb-per-service model in ovn-kubernetes, each
load-balancer is added to every logical switch/router.  Such a
transaction touches more than a half of a OVN_Northbound database.
And each of these transactions is added to the transaction history.
Since transaction history depth is 100, in worst case scenario,
it will hold 100 copies of a database increasing memory consumption
dramatically.  In tests with 3000 LBs and 120 LSs, memory goes up
to 3 GB, while holding at 30 MB if transaction history disabled in
the code.

Fixing that by keeping count of the number of ovsdb_atom's in the
database and not allowing the total number of atoms in transaction
history to grow larger than this value.  Counting atoms is fairly
cheap because we don't need to iterate over them, so it doesn't have
significant performance impact.  It would be ideal to measure the
size of individual atoms, but that will hit the performance.
Counting cells instead of atoms is not sufficient, because OVN
users are adding hundreds or thousands of atoms to a single cell,
so they are largely different in size.

Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
---
 ovsdb/ovsdb.c       |  5 ++++
 ovsdb/ovsdb.h       |  3 +++
 ovsdb/transaction.c | 56 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index 126d16a2f..e6d866182 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage)
     ovs_list_init(&db->triggers);
     db->run_triggers_now = db->run_triggers = false;
 
+    db->n_atoms = 0;
+
     db->is_relay = false;
     ovs_list_init(&db->txn_forward_new);
     hmap_init(&db->txn_forward_sent);
@@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage)
     }
 
     simap_increase(usage, "cells", cells);
+    simap_increase(usage, "atoms", db->n_atoms);
+    simap_increase(usage, "txn-history", db->n_txn_history);
+    simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms);
 
     if (db->storage) {
         ovsdb_storage_get_memory_usage(db->storage, usage);
diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
index 4a7bd0f0e..ec2d235ec 100644
--- a/ovsdb/ovsdb.h
+++ b/ovsdb/ovsdb.h
@@ -90,8 +90,11 @@ struct ovsdb {
     /* History trasanctions for incremental monitor transfer. */
     bool need_txn_history;     /* Need to maintain history of transactions. */
     unsigned int n_txn_history; /* Current number of history transactions. */
+    unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
     struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */
 
+    size_t n_atoms;  /* Total number of ovsdb atoms in the database. */
+
     /* Relay mode. */
     bool is_relay;  /* True, if database is in relay mode. */
     /* List that holds transactions waiting to be forwarded to the server. */
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index dcccc61c0..a7ca09b58 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -41,6 +41,9 @@ struct ovsdb_txn {
     struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */
     struct ds comment;
     struct uuid txnid; /* For clustered mode only. It is the eid. */
+    size_t n_atoms;    /* Number of atoms in all transaction rows. */
+    ssize_t n_atoms_diff;  /* Difference between number of added and
+                            * removed atoms. */
 };
 
 /* A table modified by a transaction. */
@@ -842,6 +845,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED,
     return NULL;
 }
 
+static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
+count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
+{
+    struct ovsdb_table *table = txn_row->table;
+    ssize_t n_atoms_old = 0, n_atoms_new = 0;
+    struct shash_node *node;
+
+    SHASH_FOR_EACH (node, &table->schema->columns) {
+        const struct ovsdb_column *column = node->data;
+        const struct ovsdb_type *type = &column->type;
+        unsigned int idx = column->index;
+
+        if (txn_row->old) {
+            n_atoms_old += txn_row->old->fields[idx].n;
+            if (type->value.type != OVSDB_TYPE_VOID) {
+                n_atoms_old += txn_row->old->fields[idx].n;
+            }
+        }
+        if (txn_row->new) {
+            n_atoms_new += txn_row->new->fields[idx].n;
+            if (type->value.type != OVSDB_TYPE_VOID) {
+                n_atoms_new += txn_row->new->fields[idx].n;
+            }
+        }
+    }
+
+    txn->n_atoms += n_atoms_old + n_atoms_new;
+    txn->n_atoms_diff += n_atoms_new - n_atoms_old;
+    return NULL;
+}
+
 static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
 update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row)
 {
@@ -910,6 +944,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn)
         return error;
     }
 
+    /* Count atoms. */
+    error = for_each_txn_row(txn, count_atoms);
+    if (error) {
+        return error;
+    }
+
     /* Update _version for rows that changed.  */
     error = for_each_txn_row(txn, update_version);
     if (error) {
@@ -925,6 +965,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn)
     struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned);
     ovs_list_init(&txn_cloned->txn_tables);
     txn_cloned->txnid = txn->txnid;
+    txn_cloned->n_atoms = txn->n_atoms;
+    txn_cloned->n_atoms_diff = txn->n_atoms_diff;
 
     struct ovsdb_txn_table *t;
     LIST_FOR_EACH (t, node, &txn->txn_tables) {
@@ -983,6 +1025,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn)
         node->txn = ovsdb_txn_clone(txn);
         ovs_list_push_back(&txn->db->txn_history, &node->node);
         txn->db->n_txn_history++;
+        txn->db->n_txn_history_atoms += txn->n_atoms;
     }
 }
 
@@ -993,6 +1036,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn)
     if (!ovsdb_txn_is_empty(txn)) {
 
         txn->db->run_triggers_now = txn->db->run_triggers = true;
+        txn->db->n_atoms += txn->n_atoms_diff;
         ovsdb_monitors_commit(txn->db, txn);
         ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs));
         ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit));
@@ -1448,12 +1492,18 @@ ovsdb_txn_history_run(struct ovsdb *db)
     if (!db->need_txn_history) {
         return;
     }
-    /* Remove old histories to limit the size of the history */
-    while (db->n_txn_history > 100) {
+    /* Remove old histories to limit the size of the history.  Removing until
+     * the number of ovsdb atoms in history becomes less than the number of
+     * atoms in the database, because it will be faster to just get a database
+     * snapshot than re-constructing changes from the history that big. */
+    while (db->n_txn_history &&
+           (db->n_txn_history > 100 ||
+            db->n_txn_history_atoms > db->n_atoms)) {
         struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF(
                 ovs_list_pop_front(&db->txn_history),
                 struct ovsdb_txn_history_node, node);
 
+        db->n_txn_history_atoms -= txn_h_node->txn->n_atoms;
         ovsdb_txn_destroy_cloned(txn_h_node->txn);
         free(txn_h_node);
         db->n_txn_history--;
@@ -1465,6 +1515,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history)
 {
     db->need_txn_history = need_txn_history;
     db->n_txn_history = 0;
+    db->n_txn_history_atoms = 0;
     ovs_list_init(&db->txn_history);
 }
 
@@ -1483,4 +1534,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db)
         free(txn_h_node);
     }
     db->n_txn_history = 0;
+    db->n_txn_history_atoms = 0;
 }
-- 
2.31.1



More information about the dev mailing list