[ovs-dev] [threads 21/28] async-append: New library to allow asynchronous appending to a log file.

Ben Pfaff blp at nicira.com
Wed Jul 10 23:04:03 UTC 2013


This will be hooked into the vlog library in an upcoming commit.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 configure.ac            |    1 +
 lib/async-append-aio.c  |  178 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/async-append-sync.c |   62 ++++++++++++++++
 lib/async-append.h      |   67 ++++++++++++++++++
 lib/automake.mk         |    7 ++
 m4/openvswitch.m4       |    5 ++
 6 files changed, 320 insertions(+), 0 deletions(-)
 create mode 100644 lib/async-append-aio.c
 create mode 100644 lib/async-append-sync.c
 create mode 100644 lib/async-append.h

diff --git a/configure.ac b/configure.ac
index e4f9991..2f3e474 100644
--- a/configure.ac
+++ b/configure.ac
@@ -86,6 +86,7 @@ OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(1)
 OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(2)
 OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(4)
 OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(8)
+OVS_CHECK_POSIX_AIO
 
 OVS_ENABLE_OPTION([-Wall])
 OVS_ENABLE_OPTION([-Wno-sign-compare])
diff --git a/lib/async-append-aio.c b/lib/async-append-aio.c
new file mode 100644
index 0000000..48edc38
--- /dev/null
+++ b/lib/async-append-aio.c
@@ -0,0 +1,178 @@
+/* Copyright (c) 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+/* This implementation of the async-append.h interface uses the POSIX
+ * asynchronous I/O interface.  */
+
+#include "async-append.h"
+
+#include <aio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "byteq.h"
+#include "ovs-thread.h"
+#include "util.h"
+
+/* Maximum number of bytes of buffered data. */
+enum { BUFFER_SIZE = 65536 };
+
+/* Maximum number of aiocbs to use.
+ *
+ * aiocbs are big (144 bytes with glibc 2.11 on i386) so we try to allow for a
+ * reasonable number by basing the number we allocate on the amount of buffer
+ * space. */
+enum { MAX_CBS = ROUND_DOWN_POW2(BUFFER_SIZE / sizeof(struct aiocb)) };
+BUILD_ASSERT_DECL(IS_POW2(MAX_CBS));
+
+struct async_append {
+    int fd;
+
+    struct aiocb *aiocbs;
+    unsigned int aiocb_head, aiocb_tail;
+
+    uint8_t *buffer;
+    struct byteq byteq;
+};
+
+static bool async_append_enabled;
+
+void
+async_append_enable(void)
+{
+    assert_single_threaded();
+    forbid_forking("async i/o enabled");
+    async_append_enabled = true;
+}
+
+struct async_append *
+async_append_create(int fd)
+{
+    struct async_append *ap;
+
+    ap = xmalloc(sizeof *ap);
+    ap->fd = fd;
+    ap->aiocbs = xmalloc(MAX_CBS * sizeof *ap->aiocbs);
+    ap->aiocb_head = ap->aiocb_tail = 0;
+    ap->buffer = xmalloc(BUFFER_SIZE);
+    byteq_init(&ap->byteq, ap->buffer, BUFFER_SIZE);
+
+    return ap;
+}
+
+void
+async_append_destroy(struct async_append *ap)
+{
+    if (ap) {
+        async_append_flush(ap);
+        free(ap->aiocbs);
+        free(ap->buffer);
+        free(ap);
+    }
+}
+
+static bool
+async_append_is_full(const struct async_append *ap)
+{
+    return (ap->aiocb_head - ap->aiocb_tail >= MAX_CBS
+            || byteq_is_full(&ap->byteq));
+}
+
+static bool
+async_append_is_empty(const struct async_append *ap)
+{
+    return byteq_is_empty(&ap->byteq);
+}
+
+static void
+async_append_wait(struct async_append *ap)
+{
+    int n = 0;
+
+    while (!async_append_is_empty(ap)) {
+        struct aiocb *aiocb = &ap->aiocbs[ap->aiocb_tail & (MAX_CBS - 1)];
+        int error = aio_error(aiocb);
+
+        if (error == EINPROGRESS) {
+            const struct aiocb *p = aiocb;
+            if (n > 0) {
+                return;
+            }
+            aio_suspend(&p, 1, NULL);
+        } else {
+            ignore(aio_return(aiocb));
+            ap->aiocb_tail++;
+            byteq_advance_tail(&ap->byteq, aiocb->aio_nbytes);
+            n++;
+        }
+    }
+}
+
+void
+async_append_write(struct async_append *ap, const void *data_, size_t size)
+{
+    const uint8_t *data = data_;
+
+    if (!async_append_enabled) {
+        ignore(write(ap->fd, data, size));
+        return;
+    }
+
+    while (size > 0) {
+        struct aiocb *aiocb;
+        size_t chunk_size;
+        void *chunk;
+
+        while (async_append_is_full(ap)) {
+            async_append_wait(ap);
+        }
+
+        chunk = byteq_head(&ap->byteq);
+        chunk_size = byteq_headroom(&ap->byteq);
+        if (chunk_size > size) {
+            chunk_size = size;
+        }
+        memcpy(chunk, data, chunk_size);
+
+        aiocb = &ap->aiocbs[ap->aiocb_head & (MAX_CBS - 1)];
+        memset(aiocb, 0, sizeof *aiocb);
+        aiocb->aio_fildes = ap->fd;
+        aiocb->aio_offset = 0;
+        aiocb->aio_buf = chunk;
+        aiocb->aio_nbytes = chunk_size;
+        aiocb->aio_sigevent.sigev_notify = SIGEV_NONE;
+        if (aio_write(aiocb) == -1) {
+            async_append_flush(ap);
+            ignore(write(ap->fd, data, size));
+            return;
+        }
+
+        data += chunk_size;
+        size -= chunk_size;
+        byteq_advance_head(&ap->byteq, chunk_size);
+        ap->aiocb_head++;
+    }
+}
+
+void
+async_append_flush(struct async_append *ap)
+{
+    while (!async_append_is_empty(ap)) {
+        async_append_wait(ap);
+    }
+}
diff --git a/lib/async-append-sync.c b/lib/async-append-sync.c
new file mode 100644
index 0000000..d40fdc8
--- /dev/null
+++ b/lib/async-append-sync.c
@@ -0,0 +1,62 @@
+/* Copyright (c) 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+/* This implementation of the async-append.h interface uses ordinary
+ * synchronous I/O, so it should be portable everywhere. */
+
+#include "async-append.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "util.h"
+
+struct async_append {
+    int fd;
+};
+
+void
+async_append_enable(void)
+{
+    /* Nothing to do. */
+}
+
+struct async_append *
+async_append_create(int fd)
+{
+    struct async_append *ap = xmalloc(sizeof *ap);
+    ap->fd = fd;
+    return ap;
+}
+
+void
+async_append_destroy(struct async_append *ap)
+{
+    free(ap);
+}
+
+void
+async_append_write(struct async_append *ap, const void *data, size_t size)
+{
+    ignore(write(ap->fd, data, size));
+}
+
+void
+async_append_flush(struct async_append *ap OVS_UNUSED)
+{
+    /* Nothing to do. */
+}
diff --git a/lib/async-append.h b/lib/async-append.h
new file mode 100644
index 0000000..fb0ce52
--- /dev/null
+++ b/lib/async-append.h
@@ -0,0 +1,67 @@
+/* Copyright (c) 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ASYNC_APPEND_H
+#define ASYNC_APPEND_H 1
+
+#include <stddef.h>
+
+/* This module defines a simple, abstract interface to asynchronous file I/O.
+ * It is currently used only for logging.  Thus, for now the interface only
+ * supports appending to a file.  Multiple implementations are possible
+ * depending on the operating system's degree and form of support for
+ * asynchronous I/O.
+ *
+ * The comments below document the requirements on any implementation.
+ *
+ * Thread-safety
+ * =============
+ *
+ * Only a single thread may use a given 'struct async_append' at one time.
+ */
+
+/* Enables using asynchronous I/O.  Some implementations may treat this as a
+ * no-op.
+ *
+ * Before this function is called, the POSIX aio implementation uses ordinary
+ * synchronous I/O because some POSIX aio libraries actually use threads
+ * internally, which has enough cost and robustness implications that it's
+ * better to use asynchronous I/O only when it has real expected benefits.
+ *
+ * Must be called while the process is still single-threaded.  May forbid the
+ * process from subsequently forking. */
+void async_append_enable(void);
+
+/* Creates and returns a new asynchronous appender for file descriptor 'fd',
+ * which the caller must have opened in append mode (O_APPEND).
+ *
+ * This function must always succeed.  If the system is for some reason unable
+ * to support asynchronous I/O on 'fd' then the library must fall back to
+ * syncrhonous I/O. */
+struct async_append *async_append_create(int fd);
+
+/* Destroys 'ap', without closing its underlying file descriptor. */
+void async_append_destroy(struct async_append *ap);
+
+/* Appends the 'size' bytes of 'data' to 'ap', asynchronously if possible. */
+void async_append_write(struct async_append *ap,
+                        const void *data, size_t size);
+
+/* Blocks until all data asynchronously written to 'ap' with
+ * async_append_write() has been committed to the point that it will be written
+ * to disk barring an operating system or hardware failure. */
+void async_append_flush(struct async_append *ap);
+
+#endif /* async-append.h */
diff --git a/lib/automake.mk b/lib/automake.mk
index 6b0972b..f18df91 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -10,6 +10,7 @@ noinst_LIBRARIES += lib/libopenvswitch.a
 lib_libopenvswitch_a_SOURCES = \
 	lib/aes128.c \
 	lib/aes128.h \
+	lib/async-append.h \
 	lib/backtrace.c \
 	lib/backtrace.h \
 	lib/bfd.c \
@@ -259,6 +260,12 @@ lib_libopenvswitch_a_SOURCES += \
 	lib/route-table.h
 endif
 
+if HAVE_POSIX_AIO
+lib_libopenvswitch_a_SOURCES += lib/async-append-aio.c
+else
+lib_libopenvswitch_a_SOURCES += lib/async-append-sync.c
+endif
+
 if ESX
 lib_libopenvswitch_a_SOURCES += \
         lib/route-table-stub.c
diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4
index ca80506..dbfc7c4 100644
--- a/m4/openvswitch.m4
+++ b/m4/openvswitch.m4
@@ -446,3 +446,8 @@ AC_DEFUN([OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE],
         __atomic_always_lock_free($1, 0).  If the C compiler is not GCC or is
         an older version of GCC, the value does not matter.])
    fi])
+
+dnl OVS_CHECK_POSIX_AIO
+AC_DEFUN([OVS_CHECK_POSIX_AIO],
+  [AC_SEARCH_LIBS([aio_write], [rt])
+   AM_CONDITIONAL([HAVE_POSIX_AIO], [test "$ac_cv_search_aio_write" != no])])
-- 
1.7.2.5




More information about the dev mailing list