[ovs-dev] [PATCH v4] ovsdb: provide raft and command interfaces with priority
Dumitru Ceara
dceara at redhat.com
Wed Jul 21 18:45:52 UTC 2021
On 7/9/21 7:27 PM, anton.ivanov at cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov at cambridgegreys.com>
>
> Set a soft time limit of "raft election timer"/2 on ovsdb
> processing.
>
> This improves behaviour in large heavily loaded clusters.
> While it cannot fully eliminate spurious raft elections
> under heavy load, it significantly decreases their number.
>
> Processing is (to the extent possible) restarted where it
> stopped on the previous iteration to ensure that sessions
> towards the tail of the session list are not starved.
>
> Signed-off-by: Anton Ivanov <anton.ivanov at cambridgegreys.com>
Hi Anton,
This patch needs a small rebase after the ovsdb-relay patches went in.
Please see some more comments below.
Regards,
Dumitru
> ---
> ovsdb/jsonrpc-server.c | 89 ++++++++++++++++++++++++++++++++++++++++--
> ovsdb/jsonrpc-server.h | 2 +-
> ovsdb/ovsdb-server.c | 23 ++++++++++-
> ovsdb/raft.c | 6 +++
> ovsdb/raft.h | 3 ++
> ovsdb/storage.c | 12 ++++++
> ovsdb/storage.h | 2 +
> 7 files changed, 131 insertions(+), 6 deletions(-)
>
> diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
> index 4e2dfc3d7..e193776ff 100644
> --- a/ovsdb/jsonrpc-server.c
> +++ b/ovsdb/jsonrpc-server.c
> @@ -60,7 +60,8 @@ static struct ovsdb_jsonrpc_session *ovsdb_jsonrpc_session_create(
> struct ovsdb_jsonrpc_remote *, struct jsonrpc_session *, bool);
> static void ovsdb_jsonrpc_session_preremove_db(struct ovsdb_jsonrpc_remote *,
> struct ovsdb *);
> -static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *);
> +static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *,
> + uint64_t limit);
> static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *);
> static void ovsdb_jsonrpc_session_get_memory_usage_all(
> const struct ovsdb_jsonrpc_remote *, struct simap *usage);
> @@ -128,6 +129,8 @@ struct ovsdb_jsonrpc_server {
> bool read_only; /* This server is does not accept any
> transactions that can modify the database. */
> struct shash remotes; /* Contains "struct ovsdb_jsonrpc_remote *"s. */
> + struct ovsdb_jsonrpc_remote *skip_to;
Please add a comment for 'skip_to'.
> + bool must_wake_up;
Please add a comment for 'must_wake_up'.
> };
>
> /* A configured remote. This is either a passive stream listener plus a list
> @@ -137,6 +140,7 @@ struct ovsdb_jsonrpc_remote {
> struct ovsdb_jsonrpc_server *server;
> struct pstream *listener; /* Listener, if passive. */
> struct ovs_list sessions; /* List of "struct ovsdb_jsonrpc_session"s. */
> + struct ovsdb_jsonrpc_session *skip_to;
Please add a comment for 'skip_to'.
> uint8_t dscp;
> bool read_only;
> char *role;
> @@ -279,6 +283,7 @@ ovsdb_jsonrpc_server_add_remote(struct ovsdb_jsonrpc_server *svr,
> remote->dscp = options->dscp;
> remote->read_only = options->read_only;
> remote->role = nullable_xstrdup(options->role);
> + remote->skip_to = NULL;
> shash_add(&svr->remotes, name, remote);
>
> if (!listener) {
> @@ -293,6 +298,11 @@ ovsdb_jsonrpc_server_del_remote(struct shash_node *node)
> {
> struct ovsdb_jsonrpc_remote *remote = node->data;
>
> + /* safest option - rerun all remotes */
Style: Safest option - rerun all remotes.
> + if (remote->server->skip_to) {
> + remote->server->skip_to = NULL;
> + }
No need to check for non-NULL, this can be directly (without the if):
remote-server->skip_to = NULL;
> +
> ovsdb_jsonrpc_session_close_all(remote);
> pstream_close(remote->listener);
> shash_delete(&remote->server->remotes, node);
> @@ -378,12 +388,24 @@ ovsdb_jsonrpc_server_set_read_only(struct ovsdb_jsonrpc_server *svr,
> }
>
> void
> -ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
> +ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr, uint64_t limit)
> {
> struct shash_node *node;
> + uint64_t elapsed = 0;
> + uint64_t start_time = time_msec();
> +
> + svr->must_wake_up = false;
>
> SHASH_FOR_EACH (node, &svr->remotes) {
> struct ovsdb_jsonrpc_remote *remote = node->data;
> + if (svr->skip_to) {
> + if (remote != svr->skip_to) {
> + continue;
> + } else {
> + svr->skip_to = NULL;
> + svr->must_wake_up = true;
> + }
> + }
>
> if (remote->listener) {
> struct stream *stream;
> @@ -403,7 +425,18 @@ ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
> }
> }
>
> - ovsdb_jsonrpc_session_run_all(remote);
> + /* We assume accept and session creation time to be
> + * neglible for the purposes of computing timeouts.
Typo: negligible
> + */
> +
Please remove this empty line.
> + ovsdb_jsonrpc_session_run_all(remote, limit - elapsed);
> +
> + elapsed = time_msec() - start_time;
> + if (elapsed > limit) {
> + svr->must_wake_up = true;
> + svr->skip_to = remote;
> + break;
> + }
> }
> }
>
> @@ -412,6 +445,16 @@ ovsdb_jsonrpc_server_wait(struct ovsdb_jsonrpc_server *svr)
> {
> struct shash_node *node;
>
> + if (svr->must_wake_up) {
> + /* We have stopped processing due to a time constraint.
> + * In this case there is no point to walk all sessions
> + * and rebuild the poll structure for the poll loop.
> + */
> + poll_immediate_wake();
> + svr->must_wake_up = false;
> + return;
> + }
> +
> SHASH_FOR_EACH (node, &svr->remotes) {
> struct ovsdb_jsonrpc_remote *remote = node->data;
>
> @@ -583,15 +626,53 @@ ovsdb_jsonrpc_session_set_options(struct ovsdb_jsonrpc_session *session,
> }
>
> static void
> -ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote)
> +ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote,
> + uint64_t limit)
> {
> struct ovsdb_jsonrpc_session *s, *next;
> + uint64_t start_time = time_msec();
> +
Please remove this empty line.
>
> LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) {
> + if ((remote->skip_to) && (s != remote->skip_to)) {
No need for the additional parenthesis, this can be:
if (remote->skip_to && s != remote->skip_to)
> + /* Processing was interrupted, we skip to the point
> + * where we had to interrupt it.
> + * We cannot use the _CONTINUE macro as it is not safe
> + * if the list has been changed in the meantime.
> + */
> + continue;
> + }
> +
> + /* Set ->next as skip point if we need to restart processing.
> + * This way, even if current is removed, we always have a
> + * valid pointer to continue processing.
> + */
> + remote->skip_to =
> + CONTAINER_OF(s->node.next, struct ovsdb_jsonrpc_session, node);
> +
I think it's better to not use ovs_list internals directly, this can be:
remote->skip_to = CONTAINER_OF(ovs_list_front(&s->node),
struct ovsdb_jsonrpc_session,
node);
> + if (remote->skip_to == s) {
> + /* The list is a singleton. This is a special case, where
> + * deleting the node will invalide the pointer.
Typo: invalidate
> + */
> + remote->skip_to = NULL;
> + }
> +
> int error = ovsdb_jsonrpc_session_run(s);
> +
Please remove this empty line.
> if (error) {
> ovsdb_jsonrpc_session_close(s);
> }
> +
> + if (time_msec() - start_time > limit) {
> + /* We bail leaving skip_to set. Next processing iteration
> + * will skip everything up to skip_to which was set to
> + * ->next earlier on.
> + */
> + break;
> + } else {
> + /* We are within time constraints, clear skip_to. */
> + remote->skip_to = NULL;
> + }
> }
> }
>
> diff --git a/ovsdb/jsonrpc-server.h b/ovsdb/jsonrpc-server.h
> index e0653aa39..218152e9d 100644
> --- a/ovsdb/jsonrpc-server.h
> +++ b/ovsdb/jsonrpc-server.h
> @@ -67,7 +67,7 @@ void ovsdb_jsonrpc_server_free_remote_status(
> void ovsdb_jsonrpc_server_reconnect(struct ovsdb_jsonrpc_server *, bool force,
> char *comment);
>
> -void ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *);
> +void ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *, uint64_t limit);
> void ovsdb_jsonrpc_server_wait(struct ovsdb_jsonrpc_server *);
>
> void ovsdb_jsonrpc_server_set_read_only(struct ovsdb_jsonrpc_server *,
> diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
> index b09232c65..62f841357 100644
> --- a/ovsdb/ovsdb-server.c
> +++ b/ovsdb/ovsdb-server.c
> @@ -184,6 +184,7 @@ main_loop(struct server_config *config,
> char *remotes_error, *ssl_error;
> struct shash_node *node;
> long long int status_timer = LLONG_MIN;
> + uint64_t limit = UINT64_MAX;
I see no reason to keep the 'limit' outside the loop, this (along with
computing the limit value) can be moved just before
ovsdb_jsonrpc_server_run().
>
> *exiting = false;
> ssl_error = NULL;
> @@ -215,7 +216,7 @@ main_loop(struct server_config *config,
> reconfigure_remotes(jsonrpc, all_dbs, remotes),
> &remotes_error);
> report_error_if_changed(reconfigure_ssl(all_dbs), &ssl_error);
> - ovsdb_jsonrpc_server_run(jsonrpc);
I would move the computation of 'limit' here:
/* Figure out current processing time limit. */
uint64_t limit = UINT64_MAX;
SHASH_FOR_EACH (node, all_dbs) {
struct db *db = node->data;
uint64_t db_limit =
ovsdb_storage_max_processing_time(db->db->storage));
limit = MIN(limit, db_limit);
}
if (ovs_replay_is_active()) {
limit = UINT64_MAX;
}
> + ovsdb_jsonrpc_server_run(jsonrpc, limit);
>
> if (*is_backup) {
> replication_run();
> @@ -225,9 +226,29 @@ main_loop(struct server_config *config,
> }
> }
>
> + /* Figure out current processing time limit. */
> +
> + bool first_db = true;
> + SHASH_FOR_EACH (node, all_dbs) {
> + struct db *db = node->data;
> + uint64_t db_limit;
> +
> + db_limit = max_processing_time(db->db->storage);
> + if (first_db) {
> + /* reset the limit */
> + limit = db_limit;
> + first_db = false;
> + }
> + limit = MIN(db_limit, limit);
> + }
> + if (ovs_replay_is_active()) {
> + limit = UINT64_MAX;
> + }
> +
This whole chunk could be moved above.
> struct shash_node *next;
> SHASH_FOR_EACH_SAFE (node, next, all_dbs) {
> struct db *db = node->data;
> +
Please remove this empty line.
> ovsdb_txn_history_run(db->db);
> ovsdb_storage_run(db->db->storage);
> read_db(config, db);
> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> index 5bb901fd4..347badb4e 100644
> --- a/ovsdb/raft.c
> +++ b/ovsdb/raft.c
> @@ -407,6 +407,12 @@ raft_make_address_passive(const char *address_)
> }
> }
>
> +uint64_t
> +raft_election_timer_value(const struct raft *raft)
To be aligned with the names of other raft APIs this should be:
uint64_t
raft_get_election_timer(const struct raft *raft)
> +{
> + return raft->election_timer;
> +}
> +
> static struct raft *
> raft_alloc(void)
> {
> diff --git a/ovsdb/raft.h b/ovsdb/raft.h
> index 3545c41c2..1d270ec0c 100644
> --- a/ovsdb/raft.h
> +++ b/ovsdb/raft.h
> @@ -188,4 +188,7 @@ void raft_take_leadership(struct raft *);
> void raft_transfer_leadership(struct raft *, const char *reason);
>
> const struct uuid *raft_current_eid(const struct raft *);
> +
> +uint64_t raft_election_timer_value(const struct raft *);
> +
> #endif /* lib/raft.h */
> diff --git a/ovsdb/storage.c b/ovsdb/storage.c
> index 40415fcf6..1c11cb328 100644
> --- a/ovsdb/storage.c
> +++ b/ovsdb/storage.c
> @@ -640,3 +640,15 @@ ovsdb_storage_peek_last_eid(struct ovsdb_storage *storage)
> }
> return raft_current_eid(storage->raft);
> }
> +
> +uint64_t
> +max_processing_time(struct ovsdb_storage *storage)
To be aligned with other ovsdb-storage APIs this should be:
uint64_t
ovsdb_storage_max_processing_time(struct ovsdb_storage *storage)
> +{
> + if (!storage->raft) {
> + return UINT64_MAX;
> + }
> + if (raft_election_timer_value(storage->raft) > 2) {
> + return raft_election_timer_value(storage->raft) / 2;
> + }
> + return 1;
> +}
> diff --git a/ovsdb/storage.h b/ovsdb/storage.h
> index 02b6e7e6c..9e195bbe8 100644
> --- a/ovsdb/storage.h
> +++ b/ovsdb/storage.h
> @@ -97,4 +97,6 @@ struct ovsdb_schema *ovsdb_storage_read_schema(struct ovsdb_storage *);
>
> const struct uuid *ovsdb_storage_peek_last_eid(struct ovsdb_storage *);
>
> +uint64_t max_processing_time(struct ovsdb_storage *);
> +
> #endif /* ovsdb/storage.h */
>
More information about the dev
mailing list