[ovs-dev] [PATCH v2 2/2] raft: Make backlog thresholds configurable.

Ilya Maximets i.maximets at ovn.org
Tue Nov 3 15:53:50 UTC 2020


New appctl 'cluster/set-backlog-threshold' to configure thresholds
on backlog of raft jsonrpc connections.  Could be used, for example,
in some extreme conditions where size of a database expected to be
very large, i.e. comparable with default 4GB threshold.

Acked-by: Dumitru Ceara <dceara at redhat.com>
Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
---
 NEWS                    |  1 +
 ovsdb/ovsdb-server.1.in |  5 ++++
 ovsdb/raft.c            | 55 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index ebdf8758b..c0819bf93 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,7 @@ Post-v2.14.0
        after every DB compaction back to OS.  Disabled by default.
      * Maximum backlog on RAFT connections limited to 500 messages or 4GB.
        Once threshold reached, connection is dropped (and re-established).
+       Use the 'cluster/set-backlog-threshold' command to change limits.
    - DPDK:
      * Removed support for vhost-user dequeue zero-copy.
    - The environment variable OVS_UNBOUND_CONF, if set, is now used
diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in
index 07a36cc7d..5a7f3ba13 100644
--- a/ovsdb/ovsdb-server.1.in
+++ b/ovsdb/ovsdb-server.1.in
@@ -381,6 +381,11 @@ This command must be executed on the leader.  It initiates the change to the
 cluster.  To see if the change takes effect (committed), use
 \fBcluster/status\fR to show the current setting.  Once a change is committed,
 it persists at server restarts.
+.IP "\fBcluster/set\-backlog\-threshold \fIdb\fR \fIn_msgs\fR \fIn_bytes\fR"
+Sets the backlog limits for \fIdb\fR's RAFT connections to a maximum of
+\fIn_msgs\fR messages or \fIn_bytes\fR bytes.  If the backlog on one of the
+connections reaches the limit, it will be disconnected (and re-established).
+Values are checked only if the backlog contains more than 50 messages.
 .
 .so lib/vlog-unixctl.man
 .so lib/memory-unixctl.man
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 67c714ff4..760dfca6d 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -305,6 +305,12 @@ struct raft {
     bool ever_had_leader;       /* There has been leader elected since the raft
                                    is initialized, meaning it is ever
                                    connected. */
+
+    /* Connection backlog limits. */
+#define DEFAULT_MAX_BACKLOG_N_MSGS    500
+#define DEFAULT_MAX_BACKLOG_N_BYTES   UINT32_MAX
+    size_t conn_backlog_max_n_msgs;   /* Number of messages. */
+    size_t conn_backlog_max_n_bytes;  /* Number of bytes. */
 };
 
 /* All Raft structures. */
@@ -412,6 +418,9 @@ raft_alloc(void)
 
     raft->election_timer = ELECTION_BASE_MSEC;
 
+    raft->conn_backlog_max_n_msgs = DEFAULT_MAX_BACKLOG_N_MSGS;
+    raft->conn_backlog_max_n_bytes = DEFAULT_MAX_BACKLOG_N_BYTES;
+
     return raft;
 }
 
@@ -925,9 +934,6 @@ raft_reset_ping_timer(struct raft *raft)
     raft->ping_timeout = time_msec() + raft->election_timer / 3;
 }
 
-#define RAFT_MAX_BACKLOG_N_MSGS    500
-#define RAFT_MAX_BACKLOG_BYTES     UINT32_MAX
-
 static void
 raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
               const struct uuid *sid, bool incoming)
@@ -943,8 +949,8 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
     conn->incoming = incoming;
     conn->js_seqno = jsonrpc_session_get_seqno(conn->js);
     jsonrpc_session_set_probe_interval(js, 0);
-    jsonrpc_session_set_backlog_threshold(js, RAFT_MAX_BACKLOG_N_MSGS,
-                                              RAFT_MAX_BACKLOG_BYTES);
+    jsonrpc_session_set_backlog_threshold(js, raft->conn_backlog_max_n_msgs,
+                                              raft->conn_backlog_max_n_bytes);
 }
 
 /* Starts the local server in an existing Raft cluster, using the local copy of
@@ -4717,6 +4723,42 @@ raft_unixctl_change_election_timer(struct unixctl_conn *conn,
     unixctl_command_reply(conn, "change of election timer initiated.");
 }
 
+static void
+raft_unixctl_set_backlog_threshold(struct unixctl_conn *conn,
+                                   int argc OVS_UNUSED, const char *argv[],
+                                   void *aux OVS_UNUSED)
+{
+    const char *cluster_name = argv[1];
+    unsigned long long n_msgs, n_bytes;
+    struct raft_conn *r_conn;
+
+    struct raft *raft = raft_lookup_by_name(cluster_name);
+    if (!raft) {
+        unixctl_command_reply_error(conn, "unknown cluster");
+        return;
+    }
+
+    if (!str_to_ullong(argv[2], 10, &n_msgs)
+        || !str_to_ullong(argv[3], 10, &n_bytes)) {
+        unixctl_command_reply_error(conn, "invalid argument");
+        return;
+    }
+
+    if (n_msgs < 50 || n_msgs > SIZE_MAX || n_bytes > SIZE_MAX) {
+        unixctl_command_reply_error(conn, "values out of range");
+        return;
+    }
+
+    raft->conn_backlog_max_n_msgs = n_msgs;
+    raft->conn_backlog_max_n_bytes = n_bytes;
+
+    LIST_FOR_EACH (r_conn, list_node, &raft->conns) {
+        jsonrpc_session_set_backlog_threshold(r_conn->js, n_msgs, n_bytes);
+    }
+
+    unixctl_command_reply(conn, NULL);
+}
+
 static void
 raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
                           int argc OVS_UNUSED, const char *argv[],
@@ -4777,6 +4819,9 @@ raft_init(void)
                              raft_unixctl_kick, NULL);
     unixctl_command_register("cluster/change-election-timer", "DB TIME", 2, 2,
                              raft_unixctl_change_election_timer, NULL);
+    unixctl_command_register("cluster/set-backlog-threshold",
+                             "DB N_MSGS N_BYTES", 3, 3,
+                             raft_unixctl_set_backlog_threshold, NULL);
     unixctl_command_register("cluster/failure-test", "FAILURE SCENARIO", 1, 1,
                              raft_unixctl_failure_test, NULL);
     ovsthread_once_done(&once);
-- 
2.25.4



More information about the dev mailing list