[ovs-dev] [PATCH 2/3] fat-rwlock: New big but fast synchronization primitive.
Ethan Jackson
ethan at nicira.com
Tue Jan 14 02:10:38 UTC 2014
In free_slot() any reason not to use an ovs_assert() instead of an abort?
Tried hard to find races but I couldn't, LGTM.
Acked-by: Ethan Jackson <ethan at nicira.com>
On Mon, Jan 13, 2014 at 11:25 AM, Ben Pfaff <blp at nicira.com> wrote:
> This implements a reader-writer lock that uses a lot of memory (128 to 192
> bytes per thread that takes the lock) but avoids cache line bouncing when
> taking the read side. Thus, a fat_rwlock is a good choice for rwlocks
> taken frequently by readers.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
> lib/automake.mk | 4 +-
> lib/fat-rwlock.c | 275 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> lib/fat-rwlock.h | 49 ++++++++++
> 3 files changed, 327 insertions(+), 1 deletion(-)
> create mode 100644 lib/fat-rwlock.c
> create mode 100644 lib/fat-rwlock.h
>
> diff --git a/lib/automake.mk b/lib/automake.mk
> index ac6ff4a..449d2c5 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -1,4 +1,4 @@
> -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
> +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
> #
> # Copying and distribution of this file, with or without modification,
> # are permitted in any medium without royalty provided the copyright
> @@ -57,6 +57,8 @@ lib_libopenvswitch_la_SOURCES = \
> lib/dynamic-string.h \
> lib/entropy.c \
> lib/entropy.h \
> + lib/fat-rwlock.c \
> + lib/fat-rwlock.h \
> lib/fatal-signal.c \
> lib/fatal-signal.h \
> lib/flow.c \
> diff --git a/lib/fat-rwlock.c b/lib/fat-rwlock.c
> new file mode 100644
> index 0000000..49dbc72
> --- /dev/null
> +++ b/lib/fat-rwlock.c
> @@ -0,0 +1,275 @@
> +/*
> + * Copyright (c) 2013, 2014 Nicira, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "fat-rwlock.h"
> +
> +#include <errno.h>
> +
> +#include "hmap.h"
> +#include "list.h"
> +#include "ovs-thread.h"
> +#include "random.h"
> +
> +/* This system's cache line size, in bytes.
> + * Being wrong hurts performance but not correctness. */
> +#define CACHE_LINE_SIZE 64 /* Correct for most CPUs. */
> +BUILD_ASSERT_DECL(IS_POW2(CACHE_LINE_SIZE));
> +
> +struct fat_rwlock_slot {
> + /* Membership in rwlock's list of "struct fat_rwlock_slot"s.
> + *
> + * fat_rwlock_destroy() sets 'rwlock' to NULL to indicate that this
> + * slot may be destroyed. */
> + struct list list_node; /* In struct rwlock's 'threads' list. */
> + struct fat_rwlock *rwlock; /* Owner. */
> +
> + /* Mutex.
> + *
> + * A thread holding the read-lock holds its own mutex.
> + *
> + * A thread holding the write-lock holds every thread's mutex, plus
> + * 'rwlock->mutex'. */
> + struct ovs_mutex mutex;
> +
> + /* This thread's locking status for 'rwlock':
> + *
> + * - 0: This thread does not have any lock on 'rwlock'. This thread
> + * does not have 'mutex' locked.
> + *
> + * - 1: This thread has a read-lock on 'rwlock' and holds 'mutex'.
> + *
> + * - 2...UINT_MAX-1: This thread has recursively taken the read-lock on
> + * 'rwlock' to the level of 'depth'. This thread holds 'mutex'.
> + *
> + * - UINT_MAX: This thread has the write-lock on 'rwlock' and holds
> + * 'mutex' (plus the 'mutex' of all of 'rwlock''s other slots).
> + *
> + * Accessed only by the slot's own thread, so no synchronization is
> + * needed. */
> + unsigned int depth;
> +
> + /* To prevent two of these structures from accidentally occupying the same
> + * cache line (causing "false sharing"), we cache-align each of these data
> + * structures. That requires malloc()ing extra space and throwing away
> + * some space at the beginning, which means that the pointer to this struct
> + * isn't necessarily the pointer to the beginning of the block, and so we
> + * need to retain the original pointer to free later.
> + *
> + * Accessed only by a single thread, so no synchronization is needed. */
> + void *base; /* Pointer to pass to free() for this block. */
> +};
> +
> +static void
> +free_slot(struct fat_rwlock_slot *slot)
> +{
> + if (slot->depth) {
> + abort();
> + }
> +
> + list_remove(&slot->list_node);
> + free(slot->base);
> +}
> +
> +static void
> +slot_destructor(void *slot_)
> +{
> + struct fat_rwlock_slot *slot = slot_;
> + struct fat_rwlock *rwlock = slot->rwlock;
> +
> + ovs_mutex_lock(&rwlock->mutex);
> + free_slot(slot);
> + ovs_mutex_unlock(&rwlock->mutex);
> +}
> +
> +/* Initialize 'rwlock' as a new fat_rwlock. */
> +void
> +fat_rwlock_init(struct fat_rwlock *rwlock)
> +{
> + ovsthread_key_create(&rwlock->key, slot_destructor);
> + ovs_mutex_init(&rwlock->mutex);
> + ovs_mutex_lock(&rwlock->mutex);
> + list_init(&rwlock->threads);
> + ovs_mutex_unlock(&rwlock->mutex);
> +}
> +
> +/* Destroys 'rwlock', which must not be locked or otherwise in use by any
> + * thread. */
> +void
> +fat_rwlock_destroy(struct fat_rwlock *rwlock)
> +{
> + struct fat_rwlock_slot *slot, *next;
> +
> + /* Order is important here. By destroying the thread-specific data first,
> + * before we destroy the slots, we ensure that the thread-specific
> + * data destructor can't race with our loop below. */
> + ovsthread_key_delete(rwlock->key);
> +
> + LIST_FOR_EACH_SAFE (slot, next, list_node, &rwlock->threads) {
> + free_slot(slot);
> + }
> +}
> +
> +static struct fat_rwlock_slot *
> +fat_rwlock_get_slot__(struct fat_rwlock *rwlock)
> +{
> + struct fat_rwlock_slot *slot;
> + void *base;
> +
> + /* Fast path. */
> + slot = ovsthread_getspecific(rwlock->key);
> + if (slot) {
> + return slot;
> + }
> +
> + /* Slow path: create a new slot for 'rwlock' in this thread. */
> +
> + /* Allocate room for:
> + *
> + * - Up to CACHE_LINE_SIZE - 1 bytes before the per-thread, so that
> + * the start of the slot doesn't potentially share a cache line.
> + *
> + * - The slot itself.
> + *
> + * - Space following the slot up to the end of the cache line, so
> + * that the end of the slot doesn't potentially share a cache
> + * line. */
> + base = xmalloc((CACHE_LINE_SIZE - 1)
> + + ROUND_UP(sizeof *slot, CACHE_LINE_SIZE));
> + slot = (void *) ROUND_UP((uintptr_t) base, CACHE_LINE_SIZE);
> +
> + slot->base = base;
> + slot->rwlock = rwlock;
> + ovs_mutex_init(&slot->mutex);
> + slot->depth = 0;
> +
> + ovs_mutex_lock(&rwlock->mutex);
> + list_push_back(&rwlock->threads, &slot->list_node);
> + ovs_mutex_unlock(&rwlock->mutex);
> +
> + ovsthread_setspecific(rwlock->key, slot);
> +
> + return slot;
> +}
> +
> +/* Locks 'rwlock' for reading. The read-lock is recursive: it may be acquired
> + * any number of times by a single thread (which must then release it the same
> + * number of times for it to truly be released). */
> +void
> +fat_rwlock_rdlock(const struct fat_rwlock *rwlock_)
> + OVS_ACQ_RDLOCK(rwlock_)
> + OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> + struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
> + struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
> +
> + switch (this->depth) {
> + case UINT_MAX:
> + /* This thread already holds the write-lock. */
> + abort();
> +
> + case 0:
> + ovs_mutex_lock(&this->mutex);
> + /* fall through */
> + default:
> + this->depth++;
> + break;
> + }
> +}
> +
> +/* Tries to lock 'rwlock' for reading. If successful, returns 0. If taking
> + * the lock would require blocking, returns EBUSY (without blocking). */
> +int
> +fat_rwlock_tryrdlock(const struct fat_rwlock *rwlock_)
> + OVS_TRY_RDLOCK(0, rwlock_)
> + OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> + struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
> + struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
> + int error;
> +
> + switch (this->depth) {
> + case UINT_MAX:
> + return EBUSY;
> +
> + case 0:
> + error = ovs_mutex_trylock(&this->mutex);
> + if (error) {
> + return error;
> + }
> + /* fall through */
> + default:
> + this->depth++;
> + break;
> + }
> +
> + return 0;
> +}
> +
> +/* Locks 'rwlock' for writing.
> + *
> + * The write lock is not recursive. */
> +void
> +fat_rwlock_wrlock(const struct fat_rwlock *rwlock_)
> + OVS_ACQ_WRLOCK(rwlock_)
> + OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> + struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
> + struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
> + struct fat_rwlock_slot *slot;
> +
> + ovs_assert(!this->depth);
> + this->depth = UINT_MAX;
> +
> + ovs_mutex_lock(&rwlock->mutex);
> + LIST_FOR_EACH (slot, list_node, &rwlock->threads) {
> + ovs_mutex_lock(&slot->mutex);
> + }
> +}
> +
> +/* Unlocks 'rwlock', which the current thread must have locked for reading or
> + * for writing. If the read lock has been taken recursively, it must be
> + * released the same number of times to be truly released. */
> +void
> +fat_rwlock_unlock(const struct fat_rwlock *rwlock_)
> + OVS_RELEASES(rwlock_)
> + OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> + struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
> + struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
> + struct fat_rwlock_slot *slot;
> +
> + switch (this->depth) {
> + case UINT_MAX:
> + LIST_FOR_EACH (slot, list_node, &rwlock->threads) {
> + ovs_mutex_unlock(&slot->mutex);
> + }
> + ovs_mutex_unlock(&rwlock->mutex);
> + this->depth = 0;
> + break;
> +
> + case 0:
> + /* This thread doesn't hold any lock. */
> + abort();
> +
> + case 1:
> + ovs_mutex_unlock(&this->mutex);
> + default:
> + this->depth--;
> + break;
> + }
> +}
> diff --git a/lib/fat-rwlock.h b/lib/fat-rwlock.h
> new file mode 100644
> index 0000000..27e3c22
> --- /dev/null
> +++ b/lib/fat-rwlock.h
> @@ -0,0 +1,49 @@
> +/*
> + * Copyright (c) 2013, 2014 Nicira, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef FAT_RWLOCK_H
> +#define FAT_RWLOCK_H 1
> +
> +#include "compiler.h"
> +#include "list.h"
> +#include "ovs-thread.h"
> +
> +/* "Fat rwlock".
> + *
> + * This implements a reader-writer lock that uses a lot of memory (128 to 192
> + * bytes per thread that takes the lock) but avoids cache line bouncing when
> + * taking the read side. Thus, a fat_rwlock is a good choice for rwlocks taken
> + * frequently by readers.
> + */
> +struct OVS_LOCKABLE fat_rwlock {
> + ovsthread_key_t key;
> +
> + /* Contains "struct fat_rwlock_slot"s, one for each thread that has taken
> + * this lock. Guarded by 'mutex'. */
> + struct list threads OVS_GUARDED;
> + struct ovs_mutex mutex;
> +};
> +
> +void fat_rwlock_init(struct fat_rwlock *);
> +void fat_rwlock_destroy(struct fat_rwlock *);
> +
> +void fat_rwlock_rdlock(const struct fat_rwlock *rwlock) OVS_ACQ_RDLOCK(rwlock);
> +int fat_rwlock_tryrdlock(const struct fat_rwlock *rwlock)
> + OVS_TRY_RDLOCK(0, rwlock);
> +void fat_rwlock_wrlock(const struct fat_rwlock *rwlock) OVS_ACQ_WRLOCK(rwlock);
> +void fat_rwlock_unlock(const struct fat_rwlock *rwlock) OVS_RELEASES(rwlock);
> +
> +#endif /* fat-rwlock.h */
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list