[ovs-dev] [PATCH] ovsdb: Use previous snapshot size as an additional factor for compaction.

Ben Pfaff blp at ovn.org
Thu Mar 31 20:01:31 UTC 2016


Until now, the minimum database size before automatically compacting has
been 10 MB, regardless of the inherent size of the data in the database.
A couple of people have pointed out that this won't scale well to larger
databases.  This commit changes this criterion to 4 times the previously
compacted size of the database, with 10 MB as a minimum.

The 4x factor is suggested by Diego Ongaro's thesis, "Consensus: Bridging
Theory and Practice", section 5.1.2 "When to snapshot".

Signed-off-by: Ben Pfaff <blp at ovn.org>
---
 ovsdb/file.c            | 33 ++++++++++++++++++++++++++-------
 ovsdb/ovsdb-server.1.in |  7 +++++--
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/ovsdb/file.c b/ovsdb/file.c
index 8c3c31b..2894f30 100644
--- a/ovsdb/file.c
+++ b/ovsdb/file.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2016 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -73,6 +73,7 @@ static struct ovsdb_error *ovsdb_file_create(struct ovsdb *,
                                              struct ovsdb_log *,
                                              const char *file_name,
                                              unsigned int n_transactions,
+                                             off_t snapshot_size,
                                              struct ovsdb_file **filep);
 
 /* Opens database 'file_name' and stores a pointer to the new database in
@@ -182,7 +183,6 @@ ovsdb_file_open__(const char *file_name,
                   struct ovsdb_file **filep)
 {
     enum ovsdb_log_open_mode open_mode;
-    unsigned int n_transactions;
     struct ovsdb_schema *schema = NULL;
     struct ovsdb_error *error;
     struct ovsdb_log *log;
@@ -201,7 +201,16 @@ ovsdb_file_open__(const char *file_name,
 
     db = ovsdb_create(schema ? schema : ovsdb_schema_clone(alternate_schema));
 
-    n_transactions = 0;
+    /* When a log gets big, we compact it into a new log that initially has
+     * only a single transaction that represents the entire state of the
+     * database.  Thus, we consider the first transaction in the database to be
+     * the snapshot.  We measure its size to later influence the minimum log
+     * size before compacting again.
+     *
+     * The schema precedes the snapshot in the log; we could compensate for its
+     * size, but it's just not that important. */
+    off_t snapshot_size = 0;
+    unsigned int n_transactions = 0;
     while ((error = ovsdb_log_read(log, &json)) == NULL && json) {
         struct ovsdb_txn *txn;
 
@@ -219,6 +228,10 @@ ovsdb_file_open__(const char *file_name,
             ovsdb_log_unread(log);
             break;
         }
+
+        if (n_transactions == 1) {
+            snapshot_size = ovsdb_log_get_offset(log);
+        }
     }
     if (error) {
         /* Log error but otherwise ignore it.  Probably the database just got
@@ -234,7 +247,8 @@ ovsdb_file_open__(const char *file_name,
     if (!read_only) {
         struct ovsdb_file *file;
 
-        error = ovsdb_file_create(db, log, file_name, n_transactions, &file);
+        error = ovsdb_file_create(db, log, file_name, n_transactions,
+                                  snapshot_size, &file);
         if (error) {
             goto error;
         }
@@ -500,6 +514,7 @@ struct ovsdb_file {
     long long int last_compact;
     long long int next_compact;
     unsigned int n_transactions;
+    off_t snapshot_size;
 };
 
 static const struct ovsdb_replica_class ovsdb_file_class;
@@ -507,7 +522,7 @@ static const struct ovsdb_replica_class ovsdb_file_class;
 static struct ovsdb_error *
 ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log,
                   const char *file_name,
-                  unsigned int n_transactions,
+                  unsigned int n_transactions, off_t snapshot_size,
                   struct ovsdb_file **filep)
 {
     struct ovsdb_file *file;
@@ -532,6 +547,7 @@ ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log,
     file->file_name = abs_name;
     file->last_compact = time_msec();
     file->next_compact = file->last_compact + COMPACT_MIN_MSEC;
+    file->snapshot_size = snapshot_size;
     file->n_transactions = n_transactions;
     ovsdb_add_replica(db, &file->replica);
 
@@ -582,10 +598,13 @@ ovsdb_file_commit(struct ovsdb_replica *replica,
     /* If it has been at least COMPACT_MIN_MSEC ms since the last time we
      * compacted (or at least COMPACT_RETRY_MSEC ms since the last time we
      * tried), and if there are at least 100 transactions in the database, and
-     * if the database is at least 10 MB, then compact the database. */
+     * if the database is at least 10 MB, and the database is at least 4x the
+     * size of the previous snapshot, then compact the database. */
+    off_t log_size = ovsdb_log_get_offset(file->log);
     if (time_msec() >= file->next_compact
         && file->n_transactions >= 100
-        && ovsdb_log_get_offset(file->log) >= 10 * 1024 * 1024)
+        && log_size >= 10 * 1024 * 1024
+        && log_size / 4 >= file->snapshot_size)
     {
         error = ovsdb_file_compact(file);
         if (error) {
diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in
index 6c85729..98f2b98 100644
--- a/ovsdb/ovsdb-server.1.in
+++ b/ovsdb/ovsdb-server.1.in
@@ -126,8 +126,11 @@ These commands are specific to \fBovsdb\-server\fR.
 Causes \fBovsdb\-server\fR to gracefully terminate.
 .IP "\fBovsdb\-server/compact\fR [\fIdb\fR]\&..."
 Compacts each database \fIdb\fR in-place.  If no \fIdb\fR is
-specified, compacts every database in-place.  Databases are also
-automatically compacted occasionally.
+specified, compacts every database in-place.  A database is also
+compacted automatically when a transaction is logged if it is over 4
+times as large as its previous compacted size (and at least 10 MB),
+but not before 100 commits have been added or 10 minutes have elapsed
+since the last compaction.
 .
 .IP "\fBovsdb\-server/reconnect\fR"
 Makes \fBovsdb\-server\fR drop all of the JSON\-RPC
-- 
2.1.3




More information about the dev mailing list