[ovs-dev] [PATCH v2 1/9] ovs-replay: New library to create and manage replay files.

Ilya Maximets i.maximets at ovn.org
Mon Apr 12 22:00:12 UTC 2021


This library provides interfaces to open replay files and
read/write records.  Will be used later for stream record/replay
functionality, i.e. to record all the incoming connections and
data and replay it later for debugging and performance analysis
purposes.

Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
---
 lib/automake.mk        |   5 +
 lib/ovs-replay-syn.man |   3 +
 lib/ovs-replay.c       | 237 +++++++++++++++++++++++++++++++++++++++++
 lib/ovs-replay.h       | 163 ++++++++++++++++++++++++++++
 lib/ovs-replay.man     |  16 +++
 lib/ovs-replay.xml     |  35 ++++++
 6 files changed, 459 insertions(+)
 create mode 100644 lib/ovs-replay-syn.man
 create mode 100644 lib/ovs-replay.c
 create mode 100644 lib/ovs-replay.h
 create mode 100644 lib/ovs-replay.man
 create mode 100644 lib/ovs-replay.xml

diff --git a/lib/automake.mk b/lib/automake.mk
index 39901bd6d..b558692c6 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -236,6 +236,8 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/ovs-numa.h \
 	lib/ovs-rcu.c \
 	lib/ovs-rcu.h \
+	lib/ovs-replay.c \
+	lib/ovs-replay.h \
 	lib/ovs-router.h \
 	lib/ovs-router.c \
 	lib/ovs-thread.c \
@@ -532,6 +534,7 @@ EXTRA_DIST += \
 	lib/daemon.xml \
 	lib/dirs.c.in \
 	lib/db-ctl-base.xml \
+	lib/ovs-replay.xml \
 	lib/ssl.xml \
 	lib/ssl-bootstrap.xml \
 	lib/ssl-peer-ca-cert.xml \
@@ -554,6 +557,8 @@ MAN_FRAGMENTS += \
 	lib/dpif-netdev-unixctl.man \
 	lib/ofp-version.man \
 	lib/ovs.tmac \
+	lib/ovs-replay.man \
+	lib/ovs-replay-syn.man \
 	lib/service.man \
 	lib/service-syn.man \
 	lib/ssl-bootstrap.man \
diff --git a/lib/ovs-replay-syn.man b/lib/ovs-replay-syn.man
new file mode 100644
index 000000000..f0c78656b
--- /dev/null
+++ b/lib/ovs-replay-syn.man
@@ -0,0 +1,3 @@
+.IP "Replay options:"
+[\fB\-\-record-replay\fR[\fB=\fIdirectory\fR]]
+[\fB\-\-replay\fR[\fB=\fIdirectory\fR]]
diff --git a/lib/ovs-replay.c b/lib/ovs-replay.c
new file mode 100644
index 000000000..fcdb4edbb
--- /dev/null
+++ b/lib/ovs-replay.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2021, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "dirs.h"
+#include "ovs-atomic.h"
+#include "ovs-replay.h"
+#include "util.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(ovs_replay);
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25);
+
+static struct ovs_mutex replay_mutex = OVS_MUTEX_INITIALIZER;
+static int replay_seqno OVS_GUARDED_BY(replay_mutex) = 0;
+static atomic_int replay_state = ATOMIC_VAR_INIT(OVS_REPLAY_NONE);
+
+static char *dirname = NULL;
+
+void
+ovs_replay_set_state(enum ovs_replay_state state)
+{
+    atomic_store_relaxed(&replay_state, state);
+}
+
+enum ovs_replay_state
+ovs_replay_get_state(void)
+{
+    int state;
+
+    atomic_read_relaxed(&replay_state, &state);
+    return state;
+}
+
+void
+ovs_replay_set_dirname(const char *new_dirname)
+{
+    if (new_dirname) {
+        free(dirname);
+        dirname = xstrdup(new_dirname);
+    }
+}
+
+
+void
+ovs_replay_lock(void)
+    OVS_ACQUIRES(replay_mutex)
+{
+    ovs_mutex_lock(&replay_mutex);
+}
+
+void
+ovs_replay_unlock(void)
+    OVS_RELEASES(replay_mutex)
+{
+    ovs_mutex_unlock(&replay_mutex);
+}
+
+int
+ovs_replay_seqno(void)
+    OVS_REQUIRES(replay_mutex)
+{
+    return replay_seqno;
+}
+
+static char *
+ovs_replay_file_name(const char *name, int seqno)
+{
+    char *local_name = xstrdup(name);
+    char *filename, *p, *c;
+    bool skip = false;
+
+    /* Replace all the numbers and special symbols with single underscore.
+     * Numbers might be PIDs or port numbers that could change between record
+     * and replay phases, special symbols might be not good as a filename.
+     * We have a unique seuqence number as part of the name, so we don't care
+     * keeping too much information. */
+    for (c = p = local_name; *p; p++) {
+         if (!isalpha((unsigned char) *p)) {
+             if (!skip) {
+                *c++ = '_';
+                skip = true;
+             }
+         } else {
+             *c++ = *p;
+             skip = false;
+         }
+    }
+    if (skip) {
+        c--;
+    }
+    *c = '\0';
+    filename = xasprintf("%s/replay_%s_%d", dirname ? dirname : "",
+                                            local_name, seqno);
+    VLOG_DBG("Constructing replay filename: '%s' --> '%s' --> '%s'.",
+             name, local_name, filename);
+    free(local_name);
+
+    return filename;
+}
+
+int
+ovs_replay_file_open(const char *name, replay_file_t *f, int *seqno)
+    OVS_REQUIRES(replay_mutex)
+{
+    char *file_path, *filename;
+    int state = ovs_replay_get_state();
+
+    ovs_assert(state != OVS_REPLAY_NONE);
+
+    filename = ovs_replay_file_name(name, replay_seqno);
+    if (filename[0] != '/') {
+        file_path = abs_file_name(ovs_rundir(), filename);
+        free(filename);
+    } else {
+        file_path = filename;
+    }
+
+    *f = fopen(file_path, state == OVS_REPLAY_WRITE ? "wb" : "rb");
+    if (!*f) {
+        VLOG_ERR("%s: fopen failed: %s", file_path, ovs_strerror(errno));
+        free(file_path);
+        return errno;
+    }
+    free(file_path);
+
+    if (state == OVS_REPLAY_READ
+        && fread(seqno, sizeof *seqno, 1, *f) != 1) {
+        VLOG_INFO("%s: failed to read seqno: replay might be empty.", name);
+        *seqno = INT_MAX;
+    }
+    replay_seqno++;  /* New file opened. */
+    return 0;
+}
+
+void
+ovs_replay_file_close(replay_file_t f)
+{
+    fclose(f);
+}
+
+int
+ovs_replay_write(replay_file_t f, const void *buffer, int n, bool is_read)
+    OVS_EXCLUDED(replay_mutex)
+{
+    int state = ovs_replay_get_state();
+    int seqno_to_write;
+    int retval = 0;
+
+    if (OVS_LIKELY(state != OVS_REPLAY_WRITE)) {
+        return 0;
+    }
+
+    ovs_replay_lock();
+
+    seqno_to_write = is_read ? replay_seqno : -replay_seqno;
+    if (fwrite(&seqno_to_write, sizeof seqno_to_write, 1, f) != 1) {
+        VLOG_ERR_RL(&rl, "Failed to write seqno.");
+        retval = -1;
+        goto out;
+    }
+    if (fwrite(&n, sizeof n, 1, f) != 1) {
+        VLOG_ERR_RL(&rl, "Failed to write length.");
+        retval = -1;
+        goto out;
+    }
+    if (n > 0 && is_read && fwrite(buffer, 1, n, f) != n) {
+        VLOG_ERR_RL(&rl, "Failed to write data.");
+        retval = -1;
+    }
+out:
+    replay_seqno++; /* Write completed. */
+    ovs_replay_unlock();
+    fflush(f);
+    return retval;
+}
+
+int
+ovs_replay_read(replay_file_t f, void *buffer, int buffer_size,
+                int *len, int *seqno, bool is_read)
+    OVS_REQUIRES(replay_mutex)
+{
+    int retval = EINVAL;
+
+    if (fread(len, sizeof *len, 1, f) != 1
+        || (is_read && *len > buffer_size)) {
+        VLOG_ERR("Failed to read replay length.");
+        goto out;
+    }
+
+    if (*len > 0 && is_read && fread(buffer, 1, *len, f) != *len) {
+        VLOG_ERR("Failed to read replay buffer.");
+        goto out;
+    }
+
+    if (fread(seqno, sizeof *seqno, 1, f) != 1) {
+        *seqno = INT_MAX;  /* Most likely EOF. */
+        if (ferror(f)) {
+            VLOG_INFO("Failed to read replay seqno.");
+            goto out;
+        }
+    }
+
+    retval = 0;
+out:
+    replay_seqno++;  /* Read completed. */
+    return retval;
+}
+
+void
+ovs_replay_usage(void)
+{
+    printf("\nReplay options:\n"
+           "  --replay-record[=DIR]     turn on writing replay files\n"
+           "  --replay[=DIR]            run from replay files\n");
+}
diff --git a/lib/ovs-replay.h b/lib/ovs-replay.h
new file mode 100644
index 000000000..03cddeb13
--- /dev/null
+++ b/lib/ovs-replay.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVS_REPLAY_H
+#define OVS_REPLAY_H 1
+
+/*
+ * Library to work with 'replay' files.
+ *
+ * ovs_replay_file_open() should be used to open a new replay file.
+ * 'replay' file contains records.  If current state is OVS_REPLAY_WRITE,
+ * files are opened in a write mode and new records could be written by
+ * ovs_replay_write().  If current mode is OVS_REPLAY_READ, files are
+ * opened in a read mode and records could be read with ovs_replay_read().
+ *
+ * Each record has several fields:
+ *   <seqno> <len> [<data>]
+ *
+ * Here <seqno> is a global sequence number of the record, it is unique
+ * across all the replay files.  By comparing normalized version of this
+ * number (ovs_replay_normalized_seqno()) with the current global sequence
+ * number (ovs_replay_seqno()) users may detect if this record should be
+ * replayed now.
+ *
+ * Non-normalized versions of seqno are used to distinguish 'read' and 'write'
+ * records.  'read' records are records that corresponds to incoming events.
+ * Only 'read' records contains <data>.  'write' records contains outgoing
+ * events, i.e. stream_write() and contains only the size of outgoing message.
+ *
+ * For 'read' records, <len> is a size of a <data> stored in this record in
+ * bytes.  For 'write' records, it is a size of outgoing message, but there
+ * is no <data>.  If it contains negative value, it means that this record
+ * holds some recorded error and no data available.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+
+typedef FILE *replay_file_t;
+
+/* Replay state. */
+enum ovs_replay_state {
+    OVS_REPLAY_NONE,
+    OVS_REPLAY_WRITE,
+    OVS_REPLAY_READ,
+};
+
+void ovs_replay_set_state(enum ovs_replay_state);
+enum ovs_replay_state ovs_replay_get_state(void);
+
+static inline bool
+ovs_replay_is_active(void)
+{
+    return ovs_replay_get_state() != OVS_REPLAY_NONE;
+}
+
+/* Returns 'true' if provided sequence number belongs to  'read' record. */
+static inline bool
+ovs_replay_seqno_is_read(int seqno)
+{
+    return seqno >= 0;
+}
+
+/* Normalizes sequence number, so it can be used to compare with result of
+ * ovs_replay_seqno(). */
+static inline int
+ovs_replay_normalized_seqno(int seqno)
+{
+    return seqno >= 0 ? seqno : -seqno;
+}
+
+/* Locks the replay module.
+ * Locking required to use ovs_replay_file_open() and ovs_replay_read(). */
+void ovs_replay_lock(void);
+
+/* Unlocks the replay module. */
+void ovs_replay_unlock(void);
+
+/* Returns current global replay sequence number. */
+int ovs_replay_seqno(void);
+
+/* In write mode creates a new replay file to write stream replay.
+ * In read mode opens an existing replay file.
+ *
+ * Requires replay being locked with ovs_replay_lock().
+ *
+ * On success returns 0,  'f' points to the opened file.  If current mode is
+ * OVS_REPLAY_READ, sets 'seqno' to the sequence number of the first record in
+ * the file.
+ *
+ * On failure returns positive errno. */
+int ovs_replay_file_open(const char *name, replay_file_t *f, int *seqno);
+
+/* Closes replay file. */
+void ovs_replay_file_close(replay_file_t f);
+
+/* Writes a new record of 'n' bytes from 'buffer' to a replay file.
+ * 'is_read' should be true if the record belongs to 'read' operation
+ * Depending on 'is_read', creates 'read' or 'write' record.  'write' records
+ * contains only the size of a bufer ('n').
+ * If 'n' is negative, writes 'n' as an error status.
+ *
+ * On success returns 0.  Otherwise, positive errno. */
+int ovs_replay_write(replay_file_t f, const void *buffer, int n, bool is_read);
+
+/* Reads one record from a replay file to 'buffer'.  'buffer_size' should be
+ * equal to the size of a memory available.
+ *
+ * On success, actual size of the read record will be set to 'len', 'seqno'
+ * will be set to the sequence number of the next record in the file.  If it
+ * was the last record, sets 'seqno' to INT_MAX.
+ * Negative 'len' means that record contained an error status.
+ *
+ * Depending on 'is_read', tries to read 'read' or 'write' record.  For the
+ * 'write' record, only 'len' and 'seqno' updated, no data read to 'buffer'.
+ *
+ * On success returns 0.  Otherwise, positive errno. */
+int ovs_replay_read(replay_file_t f, void *buffer, int buffer_size,
+                    int *len, int *seqno, bool is_read);
+
+/* Helpers for cmdline options. */
+#define OVS_REPLAY_OPTION_ENUMS  \
+        OPT_OVS_REPLAY_REC,      \
+        OPT_OVS_REPLAY
+
+#define OVS_REPLAY_LONG_OPTIONS                                          \
+        {"replay-record", optional_argument, NULL, OPT_OVS_REPLAY_REC},  \
+        {"replay",        optional_argument, NULL, OPT_OVS_REPLAY}
+
+#define OVS_REPLAY_OPTION_HANDLERS                                 \
+        case OPT_OVS_REPLAY_REC:                                   \
+            ovs_replay_set_state(OVS_REPLAY_WRITE);                \
+            ovs_replay_set_dirname(optarg);                        \
+            break;                                                 \
+                                                                   \
+        case OPT_OVS_REPLAY:                                       \
+            ovs_replay_set_state(OVS_REPLAY_READ);                 \
+            ovs_replay_set_dirname(optarg);                        \
+            break;
+
+#define OVS_REPLAY_CASES \
+        case OPT_OVS_REPLAY_REC: case OPT_OVS_REPLAY:
+
+/* Prints usage information. */
+void ovs_replay_usage(void);
+
+/* Sets path to the directory where replay files should be stored. */
+void ovs_replay_set_dirname(const char *new_dirname);
+
+#endif /* OVS_REPLAY_H */
diff --git a/lib/ovs-replay.man b/lib/ovs-replay.man
new file mode 100644
index 000000000..f81b9fbed
--- /dev/null
+++ b/lib/ovs-replay.man
@@ -0,0 +1,16 @@
+.IP "\fB\-\-record-replay[=\fIdirectory\fR]"
+Sets the process in "recording" mode, in which it will record all the
+connections, data from streams (Unix domain and network sockets) and some other
+important necessary bits, so they could be replayed later.
+Recorded data is stored in replay files in specified \fIdirectory\fR.
+If \fIdirectory\fR does not begin with \fB/\fR, it is interpreted as relative
+to \fB at RUNDIR@\fR.  If \fIdirectory\fR is not specified, \fB at RUNDIR@\fR will
+be used.
+.
+.IP "\fB\-\-replay[=\fIdirectory\fR]"
+Sets the process in "replay" mode, in which it will read information about
+connections, data from streams (Unix domain and network sockets) and some
+other necessary bits directly from replay files instead of using real sockets.
+Replay files from the \fIdirectory\fR will be used.  If \fIdirectory\fR does
+not begin with \fB/\fR, it is interpreted as relative to \fB at RUNDIR@\fR.
+If \fIdirectory\fR is not specified, \fB at RUNDIR@\fR will be used.
diff --git a/lib/ovs-replay.xml b/lib/ovs-replay.xml
new file mode 100644
index 000000000..7392bd540
--- /dev/null
+++ b/lib/ovs-replay.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="utf-8"?>
+<dl>
+  <dt><code>--record-replay</code>[<code>=</code><var>directory</var>]</dt>
+  <dd>
+    <p>
+      Sets the process in "recording" mode, in which it will record all the
+      connections, data from streams (Unix domain and network sockets) and some
+      other necessary bits, so they could be replayed later.
+    </p>
+    <p>
+      Recorded data is stored in replay files in specified
+      <var>directory</var>.  If <var>directory</var> does not begin with
+      <code>/<code>, it is interpreted as relative to <code>@RUNDIR@</code>.
+      If <var>directory</var> is not specified, <code>@RUNDIR@</code> will
+      be used.
+    </p>
+  </dd>
+
+  <dt><code>--replay</code>[<code>=</code><var>directory</var>]</dt>
+  <dd>
+    <p>
+      Sets the process in "replay" mode, in which it will read information
+      about connections, data from streams (Unix domain and network sockets)
+      and some other necessary bits directly from replay files instead of using
+      real sockets.
+    </p>
+    <p>
+      Replay files from the <var>directory</var> will be used.
+      If <var>directory</var> does not begin with <code>/<code>, it is
+      interpreted as relative to <code>@RUNDIR@</code>.  If
+      <var>directory</var> is not specified, <code>@RUNDIR@</code> will be
+      used.
+    </p>
+  </dd>
+</dl>
-- 
2.26.2



More information about the dev mailing list