[ovs-dev] [PATCH 2/5] ovsdb-server: Reclaim heap memory after compaction.

Ilya Maximets i.maximets at ovn.org
Mon Oct 26 01:42:54 UTC 2020


Compaction happens at most once in 10 minutes.  That is a big time
interval for a heavy loaded ovsdb-server in cluster mode.
In 10 minutes raft logs could grow up to tens of thousands of entries
with tens of gigabytes in total size.
While compaction cleans up raft log entries, the memory in many cases
is not returned to the system, but kept in the heap of running
ovsdb-server process, and it could stay in this condition for a really
long time.  In the end one performance spike could lead to a fast
growth of the raft log and this memory will never (for a really long
time) be released to the system even if the database if empty.

Simple example how to reproduce with OVN sandbox:

1. make sandbox SANDBOXFLAGS='--nbdb-model=clustered --sbdb-model=clustered'

2. Run following script that creates 1 port group, adds 4000 acls and
   removes all of that in the end:

   # cat ../memory-test.sh
   pg_name=my_port_group
   export OVN_NB_DAEMON=$(ovn-nbctl --pidfile --detach --log-file -vsocket_util:off)
   ovn-nbctl pg-add $pg_name
   for i in $(seq 1 4000); do
     echo "Iteration: $i"
     ovn-nbctl --log acl-add $pg_name from-lport $i udp drop
   done
   ovn-nbctl acl-del $pg_name
   ovn-nbctl pg-del $pg_name
   ovs-appctl -t $(pwd)/sandbox/nb1 memory/show
   ovn-appctl -t ovn-nbctl exit
   ---

3. Stopping one of Northbound DB servers:
   ovs-appctl -t $(pwd)/sandbox/nb1 exit

   Make sure that ovsdb-server didn't compact the database before
   it was stopped.  Now we have a db file on disk that contains
   4000 fairly big transactions inside.

4. Trying to start same ovsdb-server with this file.

   # cd sandbox && ovsdb-server <...> nb1.db

   At this point ovsdb-server reads all the transactions from db
   file and performs all of them as fast as it can one by one.
   When it finishes this, raft log contains 4000 entries and
   ovsdb-server consumes (on my system) ~13GB of memory while
   database is empty.  And libc will likely never return this memory
   back to system, or, at least, will hold it for a really long time.

This patch adds a new command 'ovsdb-server/memory-trim-on-compaction'.
It's disabled by default, but once enabled, ovsdb-server will call
'malloc_trim(0)' after every successful compaction to try to return
unused heap memory back to system.  This is glibc-specific, so we
need to detect function availability in a build time.
Disabled by default since it adds from 1% to 30% (depending on the
current state) to the snapshot creation time and, also, next memory
allocations will likely require requests to kernel and that might be
slower.  Could be enabled by default later if considered broadly
beneficial.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1888829
Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
---
 NEWS                    |  3 +++
 configure.ac            |  1 +
 ovsdb/ovsdb-server.1.in |  4 ++++
 ovsdb/ovsdb-server.c    | 41 +++++++++++++++++++++++++++++++++++++++--
 ovsdb/ovsdb.c           | 12 +++++++++++-
 ovsdb/ovsdb.h           |  3 ++-
 6 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index 8bb5bdc3f..2860a8e9c 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,9 @@ Post-v2.14.0
    - OVSDB:
      * New unixctl command 'ovsdb-server/get-db-storage-status' to show the
        status of the storage that's backing a database.
+     * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'.
+       If turned on, ovsdb-server will try to reclaim all the unused memory
+       after every DB compaction back to OS.  Disabled by default.
    - DPDK:
      * Removed support for vhost-user dequeue zero-copy.
    - The environment variable OVS_UNBOUND_CONF, if set, is now used
diff --git a/configure.ac b/configure.ac
index 8d37af9db..126a1d9d1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -100,6 +100,7 @@ OVS_CHECK_IF_DL
 OVS_CHECK_STRTOK_R
 OVS_CHECK_LINUX_AF_XDP
 AC_CHECK_DECLS([sys_siglist], [], [], [[#include <signal.h>]])
+AC_CHECK_DECLS([malloc_trim], [], [], [[#include <malloc.h>]])
 AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
   [], [], [[#include <sys/stat.h>]])
 AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include <net/if.h>]])
diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in
index 6667553df..07a36cc7d 100644
--- a/ovsdb/ovsdb-server.1.in
+++ b/ovsdb/ovsdb-server.1.in
@@ -206,6 +206,10 @@ but not before 100 commits have been added or 10 minutes have elapsed
 since the last compaction. It will also be compacted automatically
 after 24 hours since the last compaction if 100 commits were added
 regardless of its size.
+.IP "\fBovsdb\-server/memory-trim-on-compaction\fR \fIon\fR|\fIoff\fR"
+If this option is \fIon\fR, ovsdb-server will try to reclaim all unused
+heap memory back to the system after each successful database compaction
+to reduce the memory consumption of the process.  \fIoff\fR by default.
 .
 .IP "\fBovsdb\-server/reconnect\fR"
 Makes \fBovsdb\-server\fR drop all of the JSON\-RPC
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 73a155b3f..6ebe5d720 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -76,8 +76,12 @@ static char *ssl_protocols;
 static char *ssl_ciphers;
 static bool bootstrap_ca_cert;
 
+/* Try to reclaim heap memory back to system after DB compaction. */
+static bool trim_memory = false;
+
 static unixctl_cb_func ovsdb_server_exit;
 static unixctl_cb_func ovsdb_server_compact;
+static unixctl_cb_func ovsdb_server_memory_trim_on_compaction;
 static unixctl_cb_func ovsdb_server_reconnect;
 static unixctl_cb_func ovsdb_server_perf_counters_clear;
 static unixctl_cb_func ovsdb_server_perf_counters_show;
@@ -243,7 +247,7 @@ main_loop(struct server_config *config,
                           xasprintf("removing database %s because storage "
                                     "disconnected permanently", node->name));
             } else if (ovsdb_storage_should_snapshot(db->db->storage)) {
-                log_and_free_error(ovsdb_snapshot(db->db));
+                log_and_free_error(ovsdb_snapshot(db->db, trim_memory));
             }
         }
         if (run_process) {
@@ -410,6 +414,9 @@ main(int argc, char *argv[])
     unixctl_command_register("exit", "", 0, 0, ovsdb_server_exit, &exiting);
     unixctl_command_register("ovsdb-server/compact", "", 0, 1,
                              ovsdb_server_compact, &all_dbs);
+    unixctl_command_register("ovsdb-server/memory-trim-on-compaction",
+                             "on|off", 1, 1,
+                             ovsdb_server_memory_trim_on_compaction, NULL);
     unixctl_command_register("ovsdb-server/reconnect", "", 0, 0,
                              ovsdb_server_reconnect, jsonrpc);
 
@@ -1492,7 +1499,8 @@ ovsdb_server_compact(struct unixctl_conn *conn, int argc,
                 VLOG_INFO("compacting %s database by user request",
                           node->name);
 
-                struct ovsdb_error *error = ovsdb_snapshot(db->db);
+                struct ovsdb_error *error = ovsdb_snapshot(db->db,
+                                                           trim_memory);
                 if (error) {
                     char *s = ovsdb_error_to_string(error);
                     ds_put_format(&reply, "%s\n", s);
@@ -1515,6 +1523,35 @@ ovsdb_server_compact(struct unixctl_conn *conn, int argc,
     ds_destroy(&reply);
 }
 
+/* "ovsdb-server/memory-trim-on-compaction": controls whether ovsdb-server
+ * tries to reclaim heap memory back to system using malloc_trim() after
+ * compaction.  */
+static void
+ovsdb_server_memory_trim_on_compaction(struct unixctl_conn *conn,
+                                       int argc OVS_UNUSED,
+                                       const char *argv[],
+                                       void *arg OVS_UNUSED)
+{
+    const char *command = argv[1];
+
+#ifndef HAVE_DECL_MALLOC_TRIM
+    unixctl_command_reply_error(conn, "memory trimming is not supported");
+    return;
+#endif
+
+    if (!strcmp(command, "on")) {
+        trim_memory = true;
+    } else if (!strcmp(command, "off")) {
+        trim_memory = false;
+    } else {
+        unixctl_command_reply_error(conn, "invalid argument");
+        return;
+    }
+    VLOG_INFO("memory trimming after compaction %s.",
+              trim_memory ? "enabled" : "disabled");
+    unixctl_command_reply(conn, NULL);
+}
+
 /* "ovsdb-server/reconnect": makes ovsdb-server drop all of its JSON-RPC
  * connections and reconnect. */
 static void
diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index 2da117cb3..cc05d6e2b 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -17,6 +17,10 @@
 
 #include "ovsdb.h"
 
+#ifdef HAVE_DECL_MALLOC_TRIM
+#include <malloc.h>
+#endif
+
 #include "column.h"
 #include "file.h"
 #include "monitor.h"
@@ -515,7 +519,7 @@ ovsdb_get_table(const struct ovsdb *db, const char *name)
 }
 
 struct ovsdb_error * OVS_WARN_UNUSED_RESULT
-ovsdb_snapshot(struct ovsdb *db)
+ovsdb_snapshot(struct ovsdb *db, bool trim_memory OVS_UNUSED)
 {
     if (!db->storage) {
         return NULL;
@@ -527,6 +531,12 @@ ovsdb_snapshot(struct ovsdb *db)
                                                              schema, data);
     json_destroy(schema);
     json_destroy(data);
+
+#ifdef HAVE_DECL_MALLOC_TRIM
+    if (!error && trim_memory) {
+        malloc_trim(0);
+    }
+#endif
     return error;
 }
 
diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
index 5c30a83d9..72e127c84 100644
--- a/ovsdb/ovsdb.h
+++ b/ovsdb/ovsdb.h
@@ -112,7 +112,8 @@ struct json *ovsdb_execute(struct ovsdb *, const struct ovsdb_session *,
                            long long int elapsed_msec,
                            long long int *timeout_msec);
 
-struct ovsdb_error *ovsdb_snapshot(struct ovsdb *) OVS_WARN_UNUSED_RESULT;
+struct ovsdb_error *ovsdb_snapshot(struct ovsdb *, bool trim_memory)
+    OVS_WARN_UNUSED_RESULT;
 
 void ovsdb_replace(struct ovsdb *dst, struct ovsdb *src);
 
-- 
2.25.4



More information about the dev mailing list