[ovs-dev] [PATCH 2/3] fat-rwlock: New big but fast synchronization primitive.

Tue Jan 14 02:10:38 UTC 2014

In free_slot() any reason not to use an ovs_assert() instead of an abort?

Tried hard to find races but I couldn't, LGTM.

Acked-by: Ethan Jackson <ethan at nicira.com>

On Mon, Jan 13, 2014 at 11:25 AM, Ben Pfaff <blp at nicira.com> wrote:
> This implements a reader-writer lock that uses a lot of memory (128 to 192
> bytes per thread that takes the lock) but avoids cache line bouncing when
> taking the read side.  Thus, a fat_rwlock is a good choice for rwlocks
> taken frequently by readers.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
>  lib/automake.mk  |    4 +-
>  lib/fat-rwlock.c |  275 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/fat-rwlock.h |   49 ++++++++++
>  3 files changed, 327 insertions(+), 1 deletion(-)
>  create mode 100644 lib/fat-rwlock.c
>  create mode 100644 lib/fat-rwlock.h
>
> diff --git a/lib/automake.mk b/lib/automake.mk
> index ac6ff4a..449d2c5 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -1,4 +1,4 @@
> -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
> +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
>  #
>  # Copying and distribution of this file, with or without modification,
>  # are permitted in any medium without royalty provided the copyright
> @@ -57,6 +57,8 @@ lib_libopenvswitch_la_SOURCES = \
>         lib/dynamic-string.h \
>         lib/entropy.c \
>         lib/entropy.h \
> +       lib/fat-rwlock.c \
> +       lib/fat-rwlock.h \
>         lib/fatal-signal.c \
>         lib/fatal-signal.h \
>         lib/flow.c \
> diff --git a/lib/fat-rwlock.c b/lib/fat-rwlock.c
> new file mode 100644
> index 0000000..49dbc72
> --- /dev/null
> +++ b/lib/fat-rwlock.c
> @@ -0,0 +1,275 @@
> +/*
> + * Copyright (c) 2013, 2014 Nicira, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "fat-rwlock.h"
> +
> +#include <errno.h>
> +
> +#include "hmap.h"
> +#include "list.h"
> +#include "ovs-thread.h"
> +#include "random.h"
> +
> +/* This system's cache line size, in bytes.
> + * Being wrong hurts performance but not correctness. */
> +#define CACHE_LINE_SIZE 64      /* Correct for most CPUs. */
> +BUILD_ASSERT_DECL(IS_POW2(CACHE_LINE_SIZE));
> +
> +struct fat_rwlock_slot {
> +    /* Membership in rwlock's list of "struct fat_rwlock_slot"s.
> +     *
> +     * fat_rwlock_destroy() sets 'rwlock' to NULL to indicate that this
> +     * slot may be destroyed. */
> +    struct list list_node;      /* In struct rwlock's 'threads' list. */
> +    struct fat_rwlock *rwlock;  /* Owner. */
> +
> +    /* Mutex.
> +     *
> +     * A thread holding the read-lock holds its own mutex.
> +     *
> +     * A thread holding the write-lock holds every thread's mutex, plus
> +     * 'rwlock->mutex'. */
> +    struct ovs_mutex mutex;
> +
> +    /* This thread's locking status for 'rwlock':
> +     *
> +     *     - 0: This thread does not have any lock on 'rwlock'.  This thread
> +     *       does not have 'mutex' locked.
> +     *
> +     *     - 1: This thread has a read-lock on 'rwlock' and holds 'mutex'.
> +     *
> +     *     - 2...UINT_MAX-1: This thread has recursively taken the read-lock on
> +     *       'rwlock' to the level of 'depth'.  This thread holds 'mutex'.
> +     *
> +     *     - UINT_MAX: This thread has the write-lock on 'rwlock' and holds
> +     *       'mutex' (plus the 'mutex' of all of 'rwlock''s other slots).
> +     *
> +     * Accessed only by the slot's own thread, so no synchronization is
> +     * needed. */
> +    unsigned int depth;
> +
> +    /* To prevent two of these structures from accidentally occupying the same
> +     * cache line (causing "false sharing"), we cache-align each of these data
> +     * structures.  That requires malloc()ing extra space and throwing away
> +     * some space at the beginning, which means that the pointer to this struct
> +     * isn't necessarily the pointer to the beginning of the block, and so we
> +     * need to retain the original pointer to free later.
> +     *
> +     * Accessed only by a single thread, so no synchronization is needed. */
> +    void *base;                /* Pointer to pass to free() for this block.  */
> +};
> +
> +static void
> +free_slot(struct fat_rwlock_slot *slot)
> +{
> +    if (slot->depth) {
> +        abort();
> +    }
> +
> +    list_remove(&slot->list_node);
> +    free(slot->base);
> +}
> +
> +static void
> +slot_destructor(void *slot_)
> +{
> +    struct fat_rwlock_slot *slot = slot_;
> +    struct fat_rwlock *rwlock = slot->rwlock;
> +
> +    ovs_mutex_lock(&rwlock->mutex);
> +    free_slot(slot);
> +    ovs_mutex_unlock(&rwlock->mutex);
> +}
> +
> +/* Initialize 'rwlock' as a new fat_rwlock. */
> +void
> +fat_rwlock_init(struct fat_rwlock *rwlock)
> +{
> +    ovsthread_key_create(&rwlock->key, slot_destructor);
> +    ovs_mutex_init(&rwlock->mutex);
> +    ovs_mutex_lock(&rwlock->mutex);
> +    list_init(&rwlock->threads);
> +    ovs_mutex_unlock(&rwlock->mutex);
> +}
> +
> +/* Destroys 'rwlock', which must not be locked or otherwise in use by any
> + * thread. */
> +void
> +fat_rwlock_destroy(struct fat_rwlock *rwlock)
> +{
> +    struct fat_rwlock_slot *slot, *next;
> +
> +    /* Order is important here.  By destroying the thread-specific data first,
> +     * before we destroy the slots, we ensure that the thread-specific
> +     * data destructor can't race with our loop below. */
> +    ovsthread_key_delete(rwlock->key);
> +
> +    LIST_FOR_EACH_SAFE (slot, next, list_node, &rwlock->threads) {
> +        free_slot(slot);
> +    }
> +}
> +
> +static struct fat_rwlock_slot *
> +fat_rwlock_get_slot__(struct fat_rwlock *rwlock)
> +{
> +    struct fat_rwlock_slot *slot;
> +    void *base;
> +
> +    /* Fast path. */
> +    slot = ovsthread_getspecific(rwlock->key);
> +    if (slot) {
> +        return slot;
> +    }
> +
> +    /* Slow path: create a new slot for 'rwlock' in this thread. */
> +
> +    /* Allocate room for:
> +     *
> +     *     - Up to CACHE_LINE_SIZE - 1 bytes before the per-thread, so that
> +     *       the start of the slot doesn't potentially share a cache line.
> +     *
> +     *     - The slot itself.
> +     *
> +     *     - Space following the slot up to the end of the cache line, so
> +     *       that the end of the slot doesn't potentially share a cache
> +     *       line. */
> +    base = xmalloc((CACHE_LINE_SIZE - 1)
> +                   + ROUND_UP(sizeof *slot, CACHE_LINE_SIZE));
> +    slot = (void *) ROUND_UP((uintptr_t) base, CACHE_LINE_SIZE);
> +
> +    slot->base = base;
> +    slot->rwlock = rwlock;
> +    ovs_mutex_init(&slot->mutex);
> +    slot->depth = 0;
> +
> +    ovs_mutex_lock(&rwlock->mutex);
> +    list_push_back(&rwlock->threads, &slot->list_node);
> +    ovs_mutex_unlock(&rwlock->mutex);
> +
> +    ovsthread_setspecific(rwlock->key, slot);
> +
> +    return slot;
> +}
> +
> +/* Locks 'rwlock' for reading.  The read-lock is recursive: it may be acquired
> + * any number of times by a single thread (which must then release it the same
> + * number of times for it to truly be released). */
> +void
> +fat_rwlock_rdlock(const struct fat_rwlock *rwlock_)
> +    OVS_ACQ_RDLOCK(rwlock_)
> +    OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> +    struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
> +    struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
> +
> +    switch (this->depth) {
> +    case UINT_MAX:
> +        /* This thread already holds the write-lock. */
> +        abort();
> +
> +    case 0:
> +        ovs_mutex_lock(&this->mutex);
> +        /* fall through */
> +    default:
> +        this->depth++;
> +        break;
> +    }
> +}
> +
> +/* Tries to lock 'rwlock' for reading.  If successful, returns 0.  If taking
> + * the lock would require blocking, returns EBUSY (without blocking). */
> +int
> +fat_rwlock_tryrdlock(const struct fat_rwlock *rwlock_)
> +    OVS_TRY_RDLOCK(0, rwlock_)
> +    OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> +    struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
> +    struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
> +    int error;
> +
> +    switch (this->depth) {
> +    case UINT_MAX:
> +        return EBUSY;
> +
> +    case 0:
> +        error = ovs_mutex_trylock(&this->mutex);
> +        if (error) {
> +            return error;
> +        }
> +        /* fall through */
> +    default:
> +        this->depth++;
> +        break;
> +    }
> +
> +    return 0;
> +}
> +
> +/* Locks 'rwlock' for writing.
> + *
> + * The write lock is not recursive. */
> +void
> +fat_rwlock_wrlock(const struct fat_rwlock *rwlock_)
> +    OVS_ACQ_WRLOCK(rwlock_)
> +    OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> +    struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
> +    struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
> +    struct fat_rwlock_slot *slot;
> +
> +    ovs_assert(!this->depth);
> +    this->depth = UINT_MAX;
> +
> +    ovs_mutex_lock(&rwlock->mutex);
> +    LIST_FOR_EACH (slot, list_node, &rwlock->threads) {
> +        ovs_mutex_lock(&slot->mutex);
> +    }
> +}
> +
> +/* Unlocks 'rwlock', which the current thread must have locked for reading or
> + * for writing.  If the read lock has been taken recursively, it must be
> + * released the same number of times to be truly released. */
> +void
> +fat_rwlock_unlock(const struct fat_rwlock *rwlock_)
> +    OVS_RELEASES(rwlock_)
> +    OVS_NO_THREAD_SAFETY_ANALYSIS
> +{
> +    struct fat_rwlock *rwlock = CONST_CAST(struct fat_rwlock *, rwlock_);
> +    struct fat_rwlock_slot *this = fat_rwlock_get_slot__(rwlock);
> +    struct fat_rwlock_slot *slot;
> +
> +    switch (this->depth) {
> +    case UINT_MAX:
> +        LIST_FOR_EACH (slot, list_node, &rwlock->threads) {
> +            ovs_mutex_unlock(&slot->mutex);
> +        }
> +        ovs_mutex_unlock(&rwlock->mutex);
> +        this->depth = 0;
> +        break;
> +
> +    case 0:
> +        /* This thread doesn't hold any lock. */
> +        abort();
> +
> +    case 1:
> +        ovs_mutex_unlock(&this->mutex);
> +    default:
> +        this->depth--;
> +        break;
> +    }
> +}
> diff --git a/lib/fat-rwlock.h b/lib/fat-rwlock.h
> new file mode 100644
> index 0000000..27e3c22
> --- /dev/null
> +++ b/lib/fat-rwlock.h
> @@ -0,0 +1,49 @@
> +/*
> + * Copyright (c) 2013, 2014 Nicira, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef FAT_RWLOCK_H
> +#define FAT_RWLOCK_H 1
> +
> +#include "compiler.h"
> +#include "list.h"
> +#include "ovs-thread.h"
> +
> +/* "Fat rwlock".
> + *
> + * This implements a reader-writer lock that uses a lot of memory (128 to 192
> + * bytes per thread that takes the lock) but avoids cache line bouncing when
> + * taking the read side.  Thus, a fat_rwlock is a good choice for rwlocks taken
> + * frequently by readers.
> + */
> +struct OVS_LOCKABLE fat_rwlock {
> +    ovsthread_key_t key;
> +
> +    /* Contains "struct fat_rwlock_slot"s, one for each thread that has taken
> +     * this lock.  Guarded by 'mutex'. */
> +    struct list threads OVS_GUARDED;
> +    struct ovs_mutex mutex;
> +};
> +
> +void fat_rwlock_init(struct fat_rwlock *);
> +void fat_rwlock_destroy(struct fat_rwlock *);
> +
> +void fat_rwlock_rdlock(const struct fat_rwlock *rwlock) OVS_ACQ_RDLOCK(rwlock);
> +int fat_rwlock_tryrdlock(const struct fat_rwlock *rwlock)
> +    OVS_TRY_RDLOCK(0, rwlock);
> +void fat_rwlock_wrlock(const struct fat_rwlock *rwlock) OVS_ACQ_WRLOCK(rwlock);
> +void fat_rwlock_unlock(const struct fat_rwlock *rwlock) OVS_RELEASES(rwlock);
> +
> +#endif /* fat-rwlock.h */
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev