[ovs-dev] [PATCH v2 1/9] ovs-replay: New library to create and manage replay files.

Dumitru Ceara dceara at redhat.com
Mon May 10 10:12:16 UTC 2021


On 4/13/21 12:00 AM, Ilya Maximets wrote:
> This library provides interfaces to open replay files and
> read/write records.  Will be used later for stream record/replay
> functionality, i.e. to record all the incoming connections and
> data and replay it later for debugging and performance analysis
> purposes.
> 
> Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
> ---

Hi Ilya,

I only have two nits below and a question regarding partial record reads
(although I'm not sure that's a requirement).

The rest of the code looks good to me:

Acked-by: Dumitru Ceara <dceara at redhat.com>

Thanks,
Dumitru

>  lib/automake.mk        |   5 +
>  lib/ovs-replay-syn.man |   3 +
>  lib/ovs-replay.c       | 237 +++++++++++++++++++++++++++++++++++++++++
>  lib/ovs-replay.h       | 163 ++++++++++++++++++++++++++++
>  lib/ovs-replay.man     |  16 +++
>  lib/ovs-replay.xml     |  35 ++++++
>  6 files changed, 459 insertions(+)
>  create mode 100644 lib/ovs-replay-syn.man
>  create mode 100644 lib/ovs-replay.c
>  create mode 100644 lib/ovs-replay.h
>  create mode 100644 lib/ovs-replay.man
>  create mode 100644 lib/ovs-replay.xml
> 
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 39901bd6d..b558692c6 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -236,6 +236,8 @@ lib_libopenvswitch_la_SOURCES = \
>  	lib/ovs-numa.h \
>  	lib/ovs-rcu.c \
>  	lib/ovs-rcu.h \
> +	lib/ovs-replay.c \
> +	lib/ovs-replay.h \
>  	lib/ovs-router.h \
>  	lib/ovs-router.c \
>  	lib/ovs-thread.c \
> @@ -532,6 +534,7 @@ EXTRA_DIST += \
>  	lib/daemon.xml \
>  	lib/dirs.c.in \
>  	lib/db-ctl-base.xml \
> +	lib/ovs-replay.xml \
>  	lib/ssl.xml \
>  	lib/ssl-bootstrap.xml \
>  	lib/ssl-peer-ca-cert.xml \
> @@ -554,6 +557,8 @@ MAN_FRAGMENTS += \
>  	lib/dpif-netdev-unixctl.man \
>  	lib/ofp-version.man \
>  	lib/ovs.tmac \
> +	lib/ovs-replay.man \
> +	lib/ovs-replay-syn.man \
>  	lib/service.man \
>  	lib/service-syn.man \
>  	lib/ssl-bootstrap.man \
> diff --git a/lib/ovs-replay-syn.man b/lib/ovs-replay-syn.man
> new file mode 100644
> index 000000000..f0c78656b
> --- /dev/null
> +++ b/lib/ovs-replay-syn.man
> @@ -0,0 +1,3 @@
> +.IP "Replay options:"
> +[\fB\-\-record-replay\fR[\fB=\fIdirectory\fR]]
> +[\fB\-\-replay\fR[\fB=\fIdirectory\fR]]
> diff --git a/lib/ovs-replay.c b/lib/ovs-replay.c
> new file mode 100644
> index 000000000..fcdb4edbb
> --- /dev/null
> +++ b/lib/ovs-replay.c
> @@ -0,0 +1,237 @@
> +/*
> + * Copyright (c) 2021, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include <ctype.h>
> +#include <errno.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/socket.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +#include "dirs.h"
> +#include "ovs-atomic.h"
> +#include "ovs-replay.h"
> +#include "util.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(ovs_replay);
> +
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25);
> +
> +static struct ovs_mutex replay_mutex = OVS_MUTEX_INITIALIZER;
> +static int replay_seqno OVS_GUARDED_BY(replay_mutex) = 0;
> +static atomic_int replay_state = ATOMIC_VAR_INIT(OVS_REPLAY_NONE);
> +
> +static char *dirname = NULL;
> +
> +void
> +ovs_replay_set_state(enum ovs_replay_state state)
> +{
> +    atomic_store_relaxed(&replay_state, state);
> +}
> +
> +enum ovs_replay_state
> +ovs_replay_get_state(void)
> +{
> +    int state;
> +
> +    atomic_read_relaxed(&replay_state, &state);
> +    return state;
> +}
> +
> +void
> +ovs_replay_set_dirname(const char *new_dirname)
> +{
> +    if (new_dirname) {
> +        free(dirname);
> +        dirname = xstrdup(new_dirname);
> +    }
> +}
> +
> +

Nit: I'd remove one of the newlines here.

> +void
> +ovs_replay_lock(void)
> +    OVS_ACQUIRES(replay_mutex)
> +{
> +    ovs_mutex_lock(&replay_mutex);
> +}
> +
> +void
> +ovs_replay_unlock(void)
> +    OVS_RELEASES(replay_mutex)
> +{
> +    ovs_mutex_unlock(&replay_mutex);
> +}
> +
> +int
> +ovs_replay_seqno(void)
> +    OVS_REQUIRES(replay_mutex)
> +{
> +    return replay_seqno;
> +}
> +
> +static char *
> +ovs_replay_file_name(const char *name, int seqno)
> +{
> +    char *local_name = xstrdup(name);
> +    char *filename, *p, *c;
> +    bool skip = false;
> +
> +    /* Replace all the numbers and special symbols with single underscore.
> +     * Numbers might be PIDs or port numbers that could change between record
> +     * and replay phases, special symbols might be not good as a filename.
> +     * We have a unique seuqence number as part of the name, so we don't care
> +     * keeping too much information. */
> +    for (c = p = local_name; *p; p++) {
> +         if (!isalpha((unsigned char) *p)) {
> +             if (!skip) {
> +                *c++ = '_';
> +                skip = true;
> +             }
> +         } else {
> +             *c++ = *p;
> +             skip = false;
> +         }
> +    }
> +    if (skip) {
> +        c--;
> +    }
> +    *c = '\0';
> +    filename = xasprintf("%s/replay_%s_%d", dirname ? dirname : "",
> +                                            local_name, seqno);
> +    VLOG_DBG("Constructing replay filename: '%s' --> '%s' --> '%s'.",
> +             name, local_name, filename);
> +    free(local_name);
> +
> +    return filename;
> +}
> +
> +int
> +ovs_replay_file_open(const char *name, replay_file_t *f, int *seqno)
> +    OVS_REQUIRES(replay_mutex)
> +{
> +    char *file_path, *filename;
> +    int state = ovs_replay_get_state();
> +
> +    ovs_assert(state != OVS_REPLAY_NONE);
> +
> +    filename = ovs_replay_file_name(name, replay_seqno);
> +    if (filename[0] != '/') {
> +        file_path = abs_file_name(ovs_rundir(), filename);
> +        free(filename);
> +    } else {
> +        file_path = filename;
> +    }
> +
> +    *f = fopen(file_path, state == OVS_REPLAY_WRITE ? "wb" : "rb");
> +    if (!*f) {
> +        VLOG_ERR("%s: fopen failed: %s", file_path, ovs_strerror(errno));
> +        free(file_path);
> +        return errno;
> +    }
> +    free(file_path);
> +
> +    if (state == OVS_REPLAY_READ
> +        && fread(seqno, sizeof *seqno, 1, *f) != 1) {
> +        VLOG_INFO("%s: failed to read seqno: replay might be empty.", name);
> +        *seqno = INT_MAX;
> +    }
> +    replay_seqno++;  /* New file opened. */
> +    return 0;
> +}
> +
> +void
> +ovs_replay_file_close(replay_file_t f)
> +{
> +    fclose(f);
> +}
> +
> +int
> +ovs_replay_write(replay_file_t f, const void *buffer, int n, bool is_read)
> +    OVS_EXCLUDED(replay_mutex)
> +{
> +    int state = ovs_replay_get_state();
> +    int seqno_to_write;
> +    int retval = 0;
> +
> +    if (OVS_LIKELY(state != OVS_REPLAY_WRITE)) {
> +        return 0;
> +    }
> +
> +    ovs_replay_lock();
> +
> +    seqno_to_write = is_read ? replay_seqno : -replay_seqno;
> +    if (fwrite(&seqno_to_write, sizeof seqno_to_write, 1, f) != 1) {
> +        VLOG_ERR_RL(&rl, "Failed to write seqno.");
> +        retval = -1;
> +        goto out;
> +    }
> +    if (fwrite(&n, sizeof n, 1, f) != 1) {
> +        VLOG_ERR_RL(&rl, "Failed to write length.");
> +        retval = -1;
> +        goto out;
> +    }
> +    if (n > 0 && is_read && fwrite(buffer, 1, n, f) != n) {
> +        VLOG_ERR_RL(&rl, "Failed to write data.");
> +        retval = -1;
> +    }
> +out:
> +    replay_seqno++; /* Write completed. */
> +    ovs_replay_unlock();
> +    fflush(f);
> +    return retval;
> +}
> +
> +int
> +ovs_replay_read(replay_file_t f, void *buffer, int buffer_size,
> +                int *len, int *seqno, bool is_read)
> +    OVS_REQUIRES(replay_mutex)
> +{
> +    int retval = EINVAL;
> +
> +    if (fread(len, sizeof *len, 1, f) != 1
> +        || (is_read && *len > buffer_size)) {
> +        VLOG_ERR("Failed to read replay length.");

Would it make sense to distinguish between the two cases?  That is,
should we allow partial reads into buffers that can't store the whole
record?

> +        goto out;
> +    }
> +
> +    if (*len > 0 && is_read && fread(buffer, 1, *len, f) != *len) {
> +        VLOG_ERR("Failed to read replay buffer.");
> +        goto out;
> +    }
> +
> +    if (fread(seqno, sizeof *seqno, 1, f) != 1) {
> +        *seqno = INT_MAX;  /* Most likely EOF. */
> +        if (ferror(f)) {
> +            VLOG_INFO("Failed to read replay seqno.");
> +            goto out;
> +        }
> +    }
> +
> +    retval = 0;
> +out:
> +    replay_seqno++;  /* Read completed. */
> +    return retval;
> +}
> +
> +void
> +ovs_replay_usage(void)
> +{
> +    printf("\nReplay options:\n"
> +           "  --replay-record[=DIR]     turn on writing replay files\n"

nit: I wonder if it would be more clear if this was renamed to
"--record".  If so, we'd have to also update the man pages.

> +           "  --replay[=DIR]            run from replay files\n");
> +}
> diff --git a/lib/ovs-replay.h b/lib/ovs-replay.h
> new file mode 100644
> index 000000000..03cddeb13
> --- /dev/null
> +++ b/lib/ovs-replay.h
> @@ -0,0 +1,163 @@
> +/*
> + * Copyright (c) 2021, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef OVS_REPLAY_H
> +#define OVS_REPLAY_H 1
> +
> +/*
> + * Library to work with 'replay' files.
> + *
> + * ovs_replay_file_open() should be used to open a new replay file.
> + * 'replay' file contains records.  If current state is OVS_REPLAY_WRITE,
> + * files are opened in a write mode and new records could be written by
> + * ovs_replay_write().  If current mode is OVS_REPLAY_READ, files are
> + * opened in a read mode and records could be read with ovs_replay_read().
> + *
> + * Each record has several fields:
> + *   <seqno> <len> [<data>]
> + *
> + * Here <seqno> is a global sequence number of the record, it is unique
> + * across all the replay files.  By comparing normalized version of this
> + * number (ovs_replay_normalized_seqno()) with the current global sequence
> + * number (ovs_replay_seqno()) users may detect if this record should be
> + * replayed now.
> + *
> + * Non-normalized versions of seqno are used to distinguish 'read' and 'write'
> + * records.  'read' records are records that corresponds to incoming events.
> + * Only 'read' records contains <data>.  'write' records contains outgoing
> + * events, i.e. stream_write() and contains only the size of outgoing message.
> + *
> + * For 'read' records, <len> is a size of a <data> stored in this record in
> + * bytes.  For 'write' records, it is a size of outgoing message, but there
> + * is no <data>.  If it contains negative value, it means that this record
> + * holds some recorded error and no data available.
> + */
> +
> +#include <stdbool.h>
> +#include <stdio.h>
> +
> +typedef FILE *replay_file_t;
> +
> +/* Replay state. */
> +enum ovs_replay_state {
> +    OVS_REPLAY_NONE,
> +    OVS_REPLAY_WRITE,
> +    OVS_REPLAY_READ,
> +};
> +
> +void ovs_replay_set_state(enum ovs_replay_state);
> +enum ovs_replay_state ovs_replay_get_state(void);
> +
> +static inline bool
> +ovs_replay_is_active(void)
> +{
> +    return ovs_replay_get_state() != OVS_REPLAY_NONE;
> +}
> +
> +/* Returns 'true' if provided sequence number belongs to  'read' record. */
> +static inline bool
> +ovs_replay_seqno_is_read(int seqno)
> +{
> +    return seqno >= 0;
> +}
> +
> +/* Normalizes sequence number, so it can be used to compare with result of
> + * ovs_replay_seqno(). */
> +static inline int
> +ovs_replay_normalized_seqno(int seqno)
> +{
> +    return seqno >= 0 ? seqno : -seqno;
> +}
> +
> +/* Locks the replay module.
> + * Locking required to use ovs_replay_file_open() and ovs_replay_read(). */
> +void ovs_replay_lock(void);
> +
> +/* Unlocks the replay module. */
> +void ovs_replay_unlock(void);
> +
> +/* Returns current global replay sequence number. */
> +int ovs_replay_seqno(void);
> +
> +/* In write mode creates a new replay file to write stream replay.
> + * In read mode opens an existing replay file.
> + *
> + * Requires replay being locked with ovs_replay_lock().
> + *
> + * On success returns 0,  'f' points to the opened file.  If current mode is
> + * OVS_REPLAY_READ, sets 'seqno' to the sequence number of the first record in
> + * the file.
> + *
> + * On failure returns positive errno. */
> +int ovs_replay_file_open(const char *name, replay_file_t *f, int *seqno);
> +
> +/* Closes replay file. */
> +void ovs_replay_file_close(replay_file_t f);
> +
> +/* Writes a new record of 'n' bytes from 'buffer' to a replay file.
> + * 'is_read' should be true if the record belongs to 'read' operation
> + * Depending on 'is_read', creates 'read' or 'write' record.  'write' records
> + * contains only the size of a bufer ('n').
> + * If 'n' is negative, writes 'n' as an error status.
> + *
> + * On success returns 0.  Otherwise, positive errno. */
> +int ovs_replay_write(replay_file_t f, const void *buffer, int n, bool is_read);
> +
> +/* Reads one record from a replay file to 'buffer'.  'buffer_size' should be
> + * equal to the size of a memory available.
> + *
> + * On success, actual size of the read record will be set to 'len', 'seqno'
> + * will be set to the sequence number of the next record in the file.  If it
> + * was the last record, sets 'seqno' to INT_MAX.
> + * Negative 'len' means that record contained an error status.
> + *
> + * Depending on 'is_read', tries to read 'read' or 'write' record.  For the
> + * 'write' record, only 'len' and 'seqno' updated, no data read to 'buffer'.
> + *
> + * On success returns 0.  Otherwise, positive errno. */
> +int ovs_replay_read(replay_file_t f, void *buffer, int buffer_size,
> +                    int *len, int *seqno, bool is_read);
> +
> +/* Helpers for cmdline options. */
> +#define OVS_REPLAY_OPTION_ENUMS  \
> +        OPT_OVS_REPLAY_REC,      \
> +        OPT_OVS_REPLAY
> +
> +#define OVS_REPLAY_LONG_OPTIONS                                          \
> +        {"replay-record", optional_argument, NULL, OPT_OVS_REPLAY_REC},  \
> +        {"replay",        optional_argument, NULL, OPT_OVS_REPLAY}
> +
> +#define OVS_REPLAY_OPTION_HANDLERS                                 \
> +        case OPT_OVS_REPLAY_REC:                                   \
> +            ovs_replay_set_state(OVS_REPLAY_WRITE);                \
> +            ovs_replay_set_dirname(optarg);                        \
> +            break;                                                 \
> +                                                                   \
> +        case OPT_OVS_REPLAY:                                       \
> +            ovs_replay_set_state(OVS_REPLAY_READ);                 \
> +            ovs_replay_set_dirname(optarg);                        \
> +            break;
> +
> +#define OVS_REPLAY_CASES \
> +        case OPT_OVS_REPLAY_REC: case OPT_OVS_REPLAY:
> +
> +/* Prints usage information. */
> +void ovs_replay_usage(void);
> +
> +/* Sets path to the directory where replay files should be stored. */
> +void ovs_replay_set_dirname(const char *new_dirname);
> +
> +#endif /* OVS_REPLAY_H */
> diff --git a/lib/ovs-replay.man b/lib/ovs-replay.man
> new file mode 100644
> index 000000000..f81b9fbed
> --- /dev/null
> +++ b/lib/ovs-replay.man
> @@ -0,0 +1,16 @@
> +.IP "\fB\-\-record-replay[=\fIdirectory\fR]"
> +Sets the process in "recording" mode, in which it will record all the
> +connections, data from streams (Unix domain and network sockets) and some other
> +important necessary bits, so they could be replayed later.
> +Recorded data is stored in replay files in specified \fIdirectory\fR.
> +If \fIdirectory\fR does not begin with \fB/\fR, it is interpreted as relative
> +to \fB at RUNDIR@\fR.  If \fIdirectory\fR is not specified, \fB at RUNDIR@\fR will
> +be used.
> +.
> +.IP "\fB\-\-replay[=\fIdirectory\fR]"
> +Sets the process in "replay" mode, in which it will read information about
> +connections, data from streams (Unix domain and network sockets) and some
> +other necessary bits directly from replay files instead of using real sockets.
> +Replay files from the \fIdirectory\fR will be used.  If \fIdirectory\fR does
> +not begin with \fB/\fR, it is interpreted as relative to \fB at RUNDIR@\fR.
> +If \fIdirectory\fR is not specified, \fB at RUNDIR@\fR will be used.
> diff --git a/lib/ovs-replay.xml b/lib/ovs-replay.xml
> new file mode 100644
> index 000000000..7392bd540
> --- /dev/null
> +++ b/lib/ovs-replay.xml
> @@ -0,0 +1,35 @@
> +<?xml version="1.0" encoding="utf-8"?>
> +<dl>
> +  <dt><code>--record-replay</code>[<code>=</code><var>directory</var>]</dt>
> +  <dd>
> +    <p>
> +      Sets the process in "recording" mode, in which it will record all the
> +      connections, data from streams (Unix domain and network sockets) and some
> +      other necessary bits, so they could be replayed later.
> +    </p>
> +    <p>
> +      Recorded data is stored in replay files in specified
> +      <var>directory</var>.  If <var>directory</var> does not begin with
> +      <code>/<code>, it is interpreted as relative to <code>@RUNDIR@</code>.
> +      If <var>directory</var> is not specified, <code>@RUNDIR@</code> will
> +      be used.
> +    </p>
> +  </dd>
> +
> +  <dt><code>--replay</code>[<code>=</code><var>directory</var>]</dt>
> +  <dd>
> +    <p>
> +      Sets the process in "replay" mode, in which it will read information
> +      about connections, data from streams (Unix domain and network sockets)
> +      and some other necessary bits directly from replay files instead of using
> +      real sockets.
> +    </p>
> +    <p>
> +      Replay files from the <var>directory</var> will be used.
> +      If <var>directory</var> does not begin with <code>/<code>, it is
> +      interpreted as relative to <code>@RUNDIR@</code>.  If
> +      <var>directory</var> is not specified, <code>@RUNDIR@</code> will be
> +      used.
> +    </p>
> +  </dd>
> +</dl>
> 



More information about the dev mailing list