[ovs-dev] [threads 21/28] async-append: New library to allow asynchronous appending to a log file.
Ben Pfaff
blp at nicira.com
Wed Jul 10 23:04:03 UTC 2013
This will be hooked into the vlog library in an upcoming commit.
Signed-off-by: Ben Pfaff <blp at nicira.com>
---
configure.ac | 1 +
lib/async-append-aio.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++
lib/async-append-sync.c | 62 ++++++++++++++++
lib/async-append.h | 67 ++++++++++++++++++
lib/automake.mk | 7 ++
m4/openvswitch.m4 | 5 ++
6 files changed, 320 insertions(+), 0 deletions(-)
create mode 100644 lib/async-append-aio.c
create mode 100644 lib/async-append-sync.c
create mode 100644 lib/async-append.h
diff --git a/configure.ac b/configure.ac
index e4f9991..2f3e474 100644
--- a/configure.ac
+++ b/configure.ac
@@ -86,6 +86,7 @@ OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(1)
OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(2)
OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(4)
OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(8)
+OVS_CHECK_POSIX_AIO
OVS_ENABLE_OPTION([-Wall])
OVS_ENABLE_OPTION([-Wno-sign-compare])
diff --git a/lib/async-append-aio.c b/lib/async-append-aio.c
new file mode 100644
index 0000000..48edc38
--- /dev/null
+++ b/lib/async-append-aio.c
@@ -0,0 +1,178 @@
+/* Copyright (c) 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+/* This implementation of the async-append.h interface uses the POSIX
+ * asynchronous I/O interface. */
+
+#include "async-append.h"
+
+#include <aio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "byteq.h"
+#include "ovs-thread.h"
+#include "util.h"
+
+/* Maximum number of bytes of buffered data. */
+enum { BUFFER_SIZE = 65536 };
+
+/* Maximum number of aiocbs to use.
+ *
+ * aiocbs are big (144 bytes with glibc 2.11 on i386) so we try to allow for a
+ * reasonable number by basing the number we allocate on the amount of buffer
+ * space. */
+enum { MAX_CBS = ROUND_DOWN_POW2(BUFFER_SIZE / sizeof(struct aiocb)) };
+BUILD_ASSERT_DECL(IS_POW2(MAX_CBS));
+
+struct async_append {
+ int fd;
+
+ struct aiocb *aiocbs;
+ unsigned int aiocb_head, aiocb_tail;
+
+ uint8_t *buffer;
+ struct byteq byteq;
+};
+
+static bool async_append_enabled;
+
+void
+async_append_enable(void)
+{
+ assert_single_threaded();
+ forbid_forking("async i/o enabled");
+ async_append_enabled = true;
+}
+
+struct async_append *
+async_append_create(int fd)
+{
+ struct async_append *ap;
+
+ ap = xmalloc(sizeof *ap);
+ ap->fd = fd;
+ ap->aiocbs = xmalloc(MAX_CBS * sizeof *ap->aiocbs);
+ ap->aiocb_head = ap->aiocb_tail = 0;
+ ap->buffer = xmalloc(BUFFER_SIZE);
+ byteq_init(&ap->byteq, ap->buffer, BUFFER_SIZE);
+
+ return ap;
+}
+
+void
+async_append_destroy(struct async_append *ap)
+{
+ if (ap) {
+ async_append_flush(ap);
+ free(ap->aiocbs);
+ free(ap->buffer);
+ free(ap);
+ }
+}
+
+static bool
+async_append_is_full(const struct async_append *ap)
+{
+ return (ap->aiocb_head - ap->aiocb_tail >= MAX_CBS
+ || byteq_is_full(&ap->byteq));
+}
+
+static bool
+async_append_is_empty(const struct async_append *ap)
+{
+ return byteq_is_empty(&ap->byteq);
+}
+
+static void
+async_append_wait(struct async_append *ap)
+{
+ int n = 0;
+
+ while (!async_append_is_empty(ap)) {
+ struct aiocb *aiocb = &ap->aiocbs[ap->aiocb_tail & (MAX_CBS - 1)];
+ int error = aio_error(aiocb);
+
+ if (error == EINPROGRESS) {
+ const struct aiocb *p = aiocb;
+ if (n > 0) {
+ return;
+ }
+ aio_suspend(&p, 1, NULL);
+ } else {
+ ignore(aio_return(aiocb));
+ ap->aiocb_tail++;
+ byteq_advance_tail(&ap->byteq, aiocb->aio_nbytes);
+ n++;
+ }
+ }
+}
+
+void
+async_append_write(struct async_append *ap, const void *data_, size_t size)
+{
+ const uint8_t *data = data_;
+
+ if (!async_append_enabled) {
+ ignore(write(ap->fd, data, size));
+ return;
+ }
+
+ while (size > 0) {
+ struct aiocb *aiocb;
+ size_t chunk_size;
+ void *chunk;
+
+ while (async_append_is_full(ap)) {
+ async_append_wait(ap);
+ }
+
+ chunk = byteq_head(&ap->byteq);
+ chunk_size = byteq_headroom(&ap->byteq);
+ if (chunk_size > size) {
+ chunk_size = size;
+ }
+ memcpy(chunk, data, chunk_size);
+
+ aiocb = &ap->aiocbs[ap->aiocb_head & (MAX_CBS - 1)];
+ memset(aiocb, 0, sizeof *aiocb);
+ aiocb->aio_fildes = ap->fd;
+ aiocb->aio_offset = 0;
+ aiocb->aio_buf = chunk;
+ aiocb->aio_nbytes = chunk_size;
+ aiocb->aio_sigevent.sigev_notify = SIGEV_NONE;
+ if (aio_write(aiocb) == -1) {
+ async_append_flush(ap);
+ ignore(write(ap->fd, data, size));
+ return;
+ }
+
+ data += chunk_size;
+ size -= chunk_size;
+ byteq_advance_head(&ap->byteq, chunk_size);
+ ap->aiocb_head++;
+ }
+}
+
+void
+async_append_flush(struct async_append *ap)
+{
+ while (!async_append_is_empty(ap)) {
+ async_append_wait(ap);
+ }
+}
diff --git a/lib/async-append-sync.c b/lib/async-append-sync.c
new file mode 100644
index 0000000..d40fdc8
--- /dev/null
+++ b/lib/async-append-sync.c
@@ -0,0 +1,62 @@
+/* Copyright (c) 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+/* This implementation of the async-append.h interface uses ordinary
+ * synchronous I/O, so it should be portable everywhere. */
+
+#include "async-append.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "util.h"
+
+struct async_append {
+ int fd;
+};
+
+void
+async_append_enable(void)
+{
+ /* Nothing to do. */
+}
+
+struct async_append *
+async_append_create(int fd)
+{
+ struct async_append *ap = xmalloc(sizeof *ap);
+ ap->fd = fd;
+ return ap;
+}
+
+void
+async_append_destroy(struct async_append *ap)
+{
+ free(ap);
+}
+
+void
+async_append_write(struct async_append *ap, const void *data, size_t size)
+{
+ ignore(write(ap->fd, data, size));
+}
+
+void
+async_append_flush(struct async_append *ap OVS_UNUSED)
+{
+ /* Nothing to do. */
+}
diff --git a/lib/async-append.h b/lib/async-append.h
new file mode 100644
index 0000000..fb0ce52
--- /dev/null
+++ b/lib/async-append.h
@@ -0,0 +1,67 @@
+/* Copyright (c) 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ASYNC_APPEND_H
+#define ASYNC_APPEND_H 1
+
+#include <stddef.h>
+
+/* This module defines a simple, abstract interface to asynchronous file I/O.
+ * It is currently used only for logging. Thus, for now the interface only
+ * supports appending to a file. Multiple implementations are possible
+ * depending on the operating system's degree and form of support for
+ * asynchronous I/O.
+ *
+ * The comments below document the requirements on any implementation.
+ *
+ * Thread-safety
+ * =============
+ *
+ * Only a single thread may use a given 'struct async_append' at one time.
+ */
+
+/* Enables using asynchronous I/O. Some implementations may treat this as a
+ * no-op.
+ *
+ * Before this function is called, the POSIX aio implementation uses ordinary
+ * synchronous I/O because some POSIX aio libraries actually use threads
+ * internally, which has enough cost and robustness implications that it's
+ * better to use asynchronous I/O only when it has real expected benefits.
+ *
+ * Must be called while the process is still single-threaded. May forbid the
+ * process from subsequently forking. */
+void async_append_enable(void);
+
+/* Creates and returns a new asynchronous appender for file descriptor 'fd',
+ * which the caller must have opened in append mode (O_APPEND).
+ *
+ * This function must always succeed. If the system is for some reason unable
+ * to support asynchronous I/O on 'fd' then the library must fall back to
+ * syncrhonous I/O. */
+struct async_append *async_append_create(int fd);
+
+/* Destroys 'ap', without closing its underlying file descriptor. */
+void async_append_destroy(struct async_append *ap);
+
+/* Appends the 'size' bytes of 'data' to 'ap', asynchronously if possible. */
+void async_append_write(struct async_append *ap,
+ const void *data, size_t size);
+
+/* Blocks until all data asynchronously written to 'ap' with
+ * async_append_write() has been committed to the point that it will be written
+ * to disk barring an operating system or hardware failure. */
+void async_append_flush(struct async_append *ap);
+
+#endif /* async-append.h */
diff --git a/lib/automake.mk b/lib/automake.mk
index 6b0972b..f18df91 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -10,6 +10,7 @@ noinst_LIBRARIES += lib/libopenvswitch.a
lib_libopenvswitch_a_SOURCES = \
lib/aes128.c \
lib/aes128.h \
+ lib/async-append.h \
lib/backtrace.c \
lib/backtrace.h \
lib/bfd.c \
@@ -259,6 +260,12 @@ lib_libopenvswitch_a_SOURCES += \
lib/route-table.h
endif
+if HAVE_POSIX_AIO
+lib_libopenvswitch_a_SOURCES += lib/async-append-aio.c
+else
+lib_libopenvswitch_a_SOURCES += lib/async-append-sync.c
+endif
+
if ESX
lib_libopenvswitch_a_SOURCES += \
lib/route-table-stub.c
diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4
index ca80506..dbfc7c4 100644
--- a/m4/openvswitch.m4
+++ b/m4/openvswitch.m4
@@ -446,3 +446,8 @@ AC_DEFUN([OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE],
__atomic_always_lock_free($1, 0). If the C compiler is not GCC or is
an older version of GCC, the value does not matter.])
fi])
+
+dnl OVS_CHECK_POSIX_AIO
+AC_DEFUN([OVS_CHECK_POSIX_AIO],
+ [AC_SEARCH_LIBS([aio_write], [rt])
+ AM_CONDITIONAL([HAVE_POSIX_AIO], [test "$ac_cv_search_aio_write" != no])])
--
1.7.2.5
More information about the dev
mailing list