[ovs-dev] [PATCH] ovsdb: Use previous snapshot size as an additional factor for compaction.
Ben Pfaff
blp at ovn.org
Thu Mar 31 20:01:31 UTC 2016
Until now, the minimum database size before automatically compacting has
been 10 MB, regardless of the inherent size of the data in the database.
A couple of people have pointed out that this won't scale well to larger
databases. This commit changes this criterion to 4 times the previously
compacted size of the database, with 10 MB as a minimum.
The 4x factor is suggested by Diego Ongaro's thesis, "Consensus: Bridging
Theory and Practice", section 5.1.2 "When to snapshot".
Signed-off-by: Ben Pfaff <blp at ovn.org>
---
ovsdb/file.c | 33 ++++++++++++++++++++++++++-------
ovsdb/ovsdb-server.1.in | 7 +++++--
2 files changed, 31 insertions(+), 9 deletions(-)
diff --git a/ovsdb/file.c b/ovsdb/file.c
index 8c3c31b..2894f30 100644
--- a/ovsdb/file.c
+++ b/ovsdb/file.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2016 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -73,6 +73,7 @@ static struct ovsdb_error *ovsdb_file_create(struct ovsdb *,
struct ovsdb_log *,
const char *file_name,
unsigned int n_transactions,
+ off_t snapshot_size,
struct ovsdb_file **filep);
/* Opens database 'file_name' and stores a pointer to the new database in
@@ -182,7 +183,6 @@ ovsdb_file_open__(const char *file_name,
struct ovsdb_file **filep)
{
enum ovsdb_log_open_mode open_mode;
- unsigned int n_transactions;
struct ovsdb_schema *schema = NULL;
struct ovsdb_error *error;
struct ovsdb_log *log;
@@ -201,7 +201,16 @@ ovsdb_file_open__(const char *file_name,
db = ovsdb_create(schema ? schema : ovsdb_schema_clone(alternate_schema));
- n_transactions = 0;
+ /* When a log gets big, we compact it into a new log that initially has
+ * only a single transaction that represents the entire state of the
+ * database. Thus, we consider the first transaction in the database to be
+ * the snapshot. We measure its size to later influence the minimum log
+ * size before compacting again.
+ *
+ * The schema precedes the snapshot in the log; we could compensate for its
+ * size, but it's just not that important. */
+ off_t snapshot_size = 0;
+ unsigned int n_transactions = 0;
while ((error = ovsdb_log_read(log, &json)) == NULL && json) {
struct ovsdb_txn *txn;
@@ -219,6 +228,10 @@ ovsdb_file_open__(const char *file_name,
ovsdb_log_unread(log);
break;
}
+
+ if (n_transactions == 1) {
+ snapshot_size = ovsdb_log_get_offset(log);
+ }
}
if (error) {
/* Log error but otherwise ignore it. Probably the database just got
@@ -234,7 +247,8 @@ ovsdb_file_open__(const char *file_name,
if (!read_only) {
struct ovsdb_file *file;
- error = ovsdb_file_create(db, log, file_name, n_transactions, &file);
+ error = ovsdb_file_create(db, log, file_name, n_transactions,
+ snapshot_size, &file);
if (error) {
goto error;
}
@@ -500,6 +514,7 @@ struct ovsdb_file {
long long int last_compact;
long long int next_compact;
unsigned int n_transactions;
+ off_t snapshot_size;
};
static const struct ovsdb_replica_class ovsdb_file_class;
@@ -507,7 +522,7 @@ static const struct ovsdb_replica_class ovsdb_file_class;
static struct ovsdb_error *
ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log,
const char *file_name,
- unsigned int n_transactions,
+ unsigned int n_transactions, off_t snapshot_size,
struct ovsdb_file **filep)
{
struct ovsdb_file *file;
@@ -532,6 +547,7 @@ ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log,
file->file_name = abs_name;
file->last_compact = time_msec();
file->next_compact = file->last_compact + COMPACT_MIN_MSEC;
+ file->snapshot_size = snapshot_size;
file->n_transactions = n_transactions;
ovsdb_add_replica(db, &file->replica);
@@ -582,10 +598,13 @@ ovsdb_file_commit(struct ovsdb_replica *replica,
/* If it has been at least COMPACT_MIN_MSEC ms since the last time we
* compacted (or at least COMPACT_RETRY_MSEC ms since the last time we
* tried), and if there are at least 100 transactions in the database, and
- * if the database is at least 10 MB, then compact the database. */
+ * if the database is at least 10 MB, and the database is at least 4x the
+ * size of the previous snapshot, then compact the database. */
+ off_t log_size = ovsdb_log_get_offset(file->log);
if (time_msec() >= file->next_compact
&& file->n_transactions >= 100
- && ovsdb_log_get_offset(file->log) >= 10 * 1024 * 1024)
+ && log_size >= 10 * 1024 * 1024
+ && log_size / 4 >= file->snapshot_size)
{
error = ovsdb_file_compact(file);
if (error) {
diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in
index 6c85729..98f2b98 100644
--- a/ovsdb/ovsdb-server.1.in
+++ b/ovsdb/ovsdb-server.1.in
@@ -126,8 +126,11 @@ These commands are specific to \fBovsdb\-server\fR.
Causes \fBovsdb\-server\fR to gracefully terminate.
.IP "\fBovsdb\-server/compact\fR [\fIdb\fR]\&..."
Compacts each database \fIdb\fR in-place. If no \fIdb\fR is
-specified, compacts every database in-place. Databases are also
-automatically compacted occasionally.
+specified, compacts every database in-place. A database is also
+compacted automatically when a transaction is logged if it is over 4
+times as large as its previous compacted size (and at least 10 MB),
+but not before 100 commits have been added or 10 minutes have elapsed
+since the last compaction.
.
.IP "\fBovsdb\-server/reconnect\fR"
Makes \fBovsdb\-server\fR drop all of the JSON\-RPC
--
2.1.3
More information about the dev
mailing list