[ovs-dev] [PATCH RFC 2/8] util: New functions for allocating memory while avoiding false sharing.
Ben Pfaff
blp at nicira.com
Thu Jan 23 00:11:52 UTC 2014
This factors code out of fat-rwlock, making it easily usable by other code.
Signed-off-by: Ben Pfaff <blp at nicira.com>
---
lib/fat-rwlock.c | 29 ++----------------------
lib/util.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
lib/util.h | 6 ++++-
3 files changed, 71 insertions(+), 29 deletions(-)
diff --git a/lib/fat-rwlock.c b/lib/fat-rwlock.c
index 9ffa37b..29cef6a 100644
--- a/lib/fat-rwlock.c
+++ b/lib/fat-rwlock.c
@@ -62,16 +62,6 @@ struct fat_rwlock_slot {
* Accessed only by the slot's own thread, so no synchronization is
* needed. */
unsigned int depth;
-
- /* To prevent two of these structures from accidentally occupying the same
- * cache line (causing "false sharing"), we cache-align each of these data
- * structures. That requires malloc()ing extra space and throwing away
- * some space at the beginning, which means that the pointer to this struct
- * isn't necessarily the pointer to the beginning of the block, and so we
- * need to retain the original pointer to free later.
- *
- * Accessed only by a single thread, so no synchronization is needed. */
- void *base; /* Pointer to pass to free() for this block. */
};
static void
@@ -82,7 +72,7 @@ free_slot(struct fat_rwlock_slot *slot)
}
list_remove(&slot->list_node);
- free(slot->base);
+ free_cacheline(slot);
}
static void
@@ -129,7 +119,6 @@ static struct fat_rwlock_slot *
fat_rwlock_get_slot__(struct fat_rwlock *rwlock)
{
struct fat_rwlock_slot *slot;
- void *base;
/* Fast path. */
slot = ovsthread_getspecific(rwlock->key);
@@ -139,21 +128,7 @@ fat_rwlock_get_slot__(struct fat_rwlock *rwlock)
/* Slow path: create a new slot for 'rwlock' in this thread. */
- /* Allocate room for:
- *
- * - Up to CACHE_LINE_SIZE - 1 bytes before the per-thread, so that
- * the start of the slot doesn't potentially share a cache line.
- *
- * - The slot itself.
- *
- * - Space following the slot up to the end of the cache line, so
- * that the end of the slot doesn't potentially share a cache
- * line. */
- base = xmalloc((CACHE_LINE_SIZE - 1)
- + ROUND_UP(sizeof *slot, CACHE_LINE_SIZE));
- slot = (void *) ROUND_UP((uintptr_t) base, CACHE_LINE_SIZE);
-
- slot->base = base;
+ slot = xmalloc_cacheline(sizeof *slot);
slot->rwlock = rwlock;
ovs_mutex_init(&slot->mutex);
slot->depth = 0;
diff --git a/lib/util.c b/lib/util.c
index 0ebf085..811967c 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -172,6 +172,69 @@ x2nrealloc(void *p, size_t *n, size_t s)
return xrealloc(p, *n * s);
}
+/* This system's cache line size, in bytes.
+ * Being wrong hurts performance but not correctness. */
+#define CACHE_LINE_SIZE 64
+BUILD_ASSERT_DECL(IS_POW2(CACHE_LINE_SIZE));
+
+/* The desired minimum alignment for an allocated block of memory. */
+#define MEM_ALIGN MAX(sizeof(void *), 8)
+BUILD_ASSERT_DECL(IS_POW2(MEM_ALIGN));
+BUILD_ASSERT_DECL(CACHE_LINE_SIZE >= MEM_ALIGN);
+
+/* Allocates and returns 'size' bytes of memory in dedicated cache lines. That
+ * is, the memory block returned will not share a cache line with other data,
+ * avoiding "false sharing". (The memory returned will not be at the start of
+ * a cache line, though, so don't assume such alignment.)
+ *
+ * Use free_cacheline() to free the returned memory block. */
+void *
+xmalloc_cacheline(size_t size)
+{
+ void **payload;
+ void *base;
+
+ /* Allocate room for:
+ *
+ * - Up to CACHE_LINE_SIZE - 1 bytes before the payload, so that the
+ * start of the payload doesn't potentially share a cache line.
+ *
+ * - A payload consisting of a void *, followed by padding out to
+ * MEM_ALIGN bytes, followed by 'size' bytes of user data.
+ *
+ * - Space following the payload up to the end of the cache line, so
+ * that the end of the payload doesn't potentially share a cache line
+ * with some following block. */
+ base = xmalloc((CACHE_LINE_SIZE - 1)
+ + ROUND_UP(MEM_ALIGN + size, CACHE_LINE_SIZE));
+
+ /* Locate the payload and store a pointer to the base at the beginning. */
+ payload = (void **) ROUND_UP((uintptr_t) base, CACHE_LINE_SIZE);
+ *payload = base;
+
+ return (char *) payload + MEM_ALIGN;
+}
+
+/* Like xmalloc_cacheline() but clears the allocated memory to all zero
+ * bytes. */
+void *
+xzalloc_cacheline(size_t size)
+{
+ void *p = xmalloc_cacheline(size);
+ memset(p, 0, size);
+ return p;
+}
+
+/* Frees a memory block allocated with xmalloc_cacheline() or
+ * xzalloc_cacheline(). */
+void
+free_cacheline(void *p)
+{
+ if (p) {
+ free(*(void **) ((uintptr_t) p - MEM_ALIGN));
+ }
+}
+
char *
xasprintf(const char *format, ...)
{
diff --git a/lib/util.h b/lib/util.h
index 8886a54..fca670f 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -256,6 +256,10 @@ char *xasprintf(const char *format, ...) PRINTF_FORMAT(1, 2) MALLOC_LIKE;
char *xvasprintf(const char *format, va_list) PRINTF_FORMAT(1, 0) MALLOC_LIKE;
void *x2nrealloc(void *p, size_t *n, size_t s);
+void *xmalloc_cacheline(size_t) MALLOC_LIKE;
+void *xzalloc_cacheline(size_t) MALLOC_LIKE;
+void free_cacheline(void *);
+
void ovs_strlcpy(char *dst, const char *src, size_t size);
void ovs_strzcpy(char *dst, const char *src, size_t size);
--
1.7.10.4
More information about the dev
mailing list