[ovs-dev] [PATCH] lib/ovs-thread: Add Transactional Memory (TM) support.

William Tu u9012063 at gmail.com
Tue Feb 23 21:36:16 UTC 2016


v2->v3
- Pass all test cases
- Introduce separate lock elision for rwlock
- Add performance comparison of cmap/hmap search

v1->v2
- Fix a bug at UNLOCK_ELISION
- Add checking if glibc version >= 2.21 (OVS_CHECK_GLIBC_TSX)
- Add checking of whether cpu has TSX support (OVS_CHECK_RTM)
- Enable LOCK_ELISION only when CPU has TSX and glibc doesn't
  (if glibc version >= 2.21, then using phtread_mutex has lock elision)
  - Add 20% mutation test-cmap testcase
  - List failed testcases below

The patch shows the preliminary results of enabling RTM
(Restricted Transactional Memory). A successful transactional execution
elides the lock, i.e., the lock is by-passed, and exposes concurrency.
However, transactions might abort due to several reasons
such as data conflicts, I/O operations, syscall, etc. When transaction aborts,
it falls back to the original locking mechanisms. Thus, the performance
improvement depends on the abort rate, and the overhead of speculative execution
and rollback.

The patch adds ovs_##ELISION_FUNC at LOCK_FUNCTION, TRY_LOCK_FUNCTION, and
UNLOCK_FUNCTION macros, and calls either rwlock_elision or mutex_elision
accordingly. Experiments show that for cmap, the TM does not seem to have
observable improvements below 5% mutations, while hmap is more obvious.
For cmap search over 5% mutation, the search time of TM shows much better
scalability when % of writers increase.

Results are shown by using test-cmap benchmark with different mutation %.
$ tests/ovstest test-cmap benchmark 20000000 4 <mutation> 1

Unit: ms, number presented as baseline / TM
<mutation>  cmap_search     hmap_search
 0.1%           117/117        312/292
   2%           124/120        688/328
   5%           142/125       1093/403
  10%           237/132       1588/432
  20%           512/160       2516/679
  40%          1010/277       2927/1007
  60%          1278/324       3313/1119
  80%          1614/343       3392/1291

Signed-off-by: William Tu <u9012063 at gmail.com>
---
 configure.ac      |   2 +
 lib/ovs-thread.c  | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 m4/openvswitch.m4 |  21 ++++++
 3 files changed, 210 insertions(+), 14 deletions(-)

diff --git a/configure.ac b/configure.ac
index 49aa182..5c3bd15 100644
--- a/configure.ac
+++ b/configure.ac
@@ -113,6 +113,8 @@ OVS_CHECK_PKIDIR
 OVS_CHECK_RUNDIR
 OVS_CHECK_DBDIR
 OVS_CHECK_BACKTRACE
+OVS_CHECK_RTM
+OVS_CHECK_GLIBC_TSX
 OVS_CHECK_PERF_EVENT
 OVS_CHECK_VALGRIND
 OVS_CHECK_SOCKET_LIBS
diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
index b0e10ee..6226522 100644
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, 2015 Nicira, Inc.
+ * Copyright (c) 2013, 2014, 2015, 2016 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -52,7 +52,168 @@ static const char *must_not_fork;
 /* True if we created any threads beyond the main initial thread. */
 static bool multithreaded;
 
-#define LOCK_FUNCTION(TYPE, FUN) \
+#if defined(HAVE_RTM) && !defined(HAVE_GLIBC_TSX)
+/* Intel Transactional Memory (TSX) support */
+#define _XBEGIN_STARTED         (~0u)
+#define _ABORT_EXPLICIT         (1 << 0)
+#define _ABORT_RETRY            (1 << 1)
+#define _ABORT_CONFLICT         (1 << 2)
+#define _ABORT_CAPACITY         (1 << 3)
+#define _ABORT_DEBUG            (1 << 4)
+#define _ABORT_NESTED           (1 << 5)
+#define _XABORT_CODE(x)         (((x) >> 24) & 0xff)
+#define _ABORT_LOCK_BUSY        0xff
+
+#define MAX_RETRY_XBEGIN        10
+#define MAX_ABORT_RETRY         5
+
+#define __force_inline __attribute__((__always_inline__)) inline
+
+/* See Intel 64 and IA-32 Architectures Software Developer's Manual
+ * Instruction Set Reference.  Assemblers might not support TSX
+ * instructions (xbegin/xend/xabort/xtest). */
+static __force_inline int
+_xbegin(void)
+{
+    int ret = _XBEGIN_STARTED;
+    __asm__ volatile (".byte 0xc7,0xf8 ; .long 0"
+                        : "+a" (ret) :: "memory");
+    return ret;
+}
+
+static __force_inline void
+_xend(void)
+{
+    __asm__ volatile (".byte 0x0f,0x01,0xd5" ::: "memory");
+}
+
+static __force_inline void
+_xabort(const unsigned int status)
+{
+     __asm__ volatile (".byte 0xc6,0xf8,%P0"
+                        :: "i" (status) : "memory");
+}
+
+static __force_inline int
+_xtest(void)
+{
+    unsigned char ret;
+    /* Return 1 if RTM_ACTIVE = 1. */
+    __asm__ volatile (".byte 0x0f,0x01,0xd6 ; setnz %0"
+                        : "=r" (ret) :: "memory");
+    return ret;
+}
+
+/* Returns true if successfully starts the transaction,
+ * otherwise, returns false and fallback to mutex_lock later. */
+static inline bool
+ovs_mutex_elision(pthread_mutex_t *mutex)
+{
+    unsigned status;
+    int i;
+    int abort_retry = 0;
+    for(i = 0; i < MAX_RETRY_XBEGIN; i++) {
+        if ((status = _xbegin()) == _XBEGIN_STARTED) {
+            if ((mutex)->__data.__lock == 0) {
+                return true;
+            }
+            _xabort(_ABORT_LOCK_BUSY);
+        }
+        if (!(status & _ABORT_RETRY)) {
+            if (abort_retry >= MAX_ABORT_RETRY) {
+                break;
+            }
+            abort_retry++;
+        }
+    }
+    return false;
+}
+
+/* If the mutex lock is free and we're in transaction,
+ * commit and return true.  Otherwise, return false and
+ * fallbacks to mutex_unlcok later. */
+static inline bool
+ovs_unmutex_elision(pthread_mutex_t *mutex)
+{
+    if (((mutex)->__data.__lock == 0)) {
+        /* FIXME: we shouldn't need to check _xtest(). */
+        if (_xtest() == 1) {
+            _xend();
+        }
+        return true;
+    }
+    return false;
+}
+
+/* Returns true if successfully starts the transaction,
+ * otherwise, returns false and fallback to rwlock later. */
+static inline bool
+ovs_rwlock_elision(pthread_rwlock_t *rwlock)
+{
+    unsigned status;
+    int i;
+    int abort_retry = 0;
+    for(i = 0; i < MAX_RETRY_XBEGIN; i++) {
+        if ((status = _xbegin()) == _XBEGIN_STARTED) {
+            if ((rwlock)->__data.__writer == 0 &&
+                (rwlock)->__data.__nr_readers == 0) {
+                return true;
+            }
+            _xabort(_ABORT_LOCK_BUSY);
+        }
+        if (!(status & _ABORT_RETRY)) {
+            if (abort_retry >= MAX_ABORT_RETRY) {
+                break;
+            }
+            abort_retry++;
+        }
+    }
+    return false;
+}
+
+/* If the rwlock is free and we're in transaction,
+ * commit and return true.  Otherwise, return false and
+ * fallbacks to rwlock_unlcok later. */
+static inline bool
+ovs_unrwlock_elision(pthread_rwlock_t *rwlock)
+{
+    if ((rwlock)->__data.__writer == 0 &&
+        (rwlock)->__data.__nr_readers == 0) {
+        if (_xtest() == 1) {
+            _xend();\
+        }
+        return true;
+    }
+    return false;
+}
+
+#else
+static inline bool
+ovs_mutex_elision(pthread_mutex_t *mutex OVS_UNUSED)
+{
+   return false;
+}
+
+static inline bool
+ovs_unmutex_elision(pthread_mutex_t *mutex OVS_UNUSED)
+{
+   return false;
+}
+
+static inline bool
+ovs_rwlock_elision(pthread_rwlock_t *rwlock OVS_UNUSED)
+{
+    return false;
+}
+
+static inline bool
+ovs_unrwlock_elision(pthread_rwlock_t *rwlock OVS_UNUSED)
+{
+    return false;
+}
+#endif /* defined(HAVE_RTM) && !defined(HAVE_GLIBC_TSX) */
+
+#define LOCK_FUNCTION(TYPE, FUN, ELISION_FUNC) \
     void \
     ovs_##TYPE##_##FUN##_at(const struct ovs_##TYPE *l_, \
                             const char *where) \
@@ -61,6 +222,10 @@ static bool multithreaded;
         struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
         int error; \
  \
+        if (ovs_##ELISION_FUNC(&l->lock)) { \
+            return;\
+        }\
+ \
         /* Verify that 'l' was initialized. */ \
         if (OVS_UNLIKELY(!l->where)) { \
             ovs_abort(0, "%s: %s() passed uninitialized ovs_"#TYPE, \
@@ -73,11 +238,11 @@ static bool multithreaded;
         } \
         l->where = where; \
  }
-LOCK_FUNCTION(mutex, lock);
-LOCK_FUNCTION(rwlock, rdlock);
-LOCK_FUNCTION(rwlock, wrlock);
+LOCK_FUNCTION(mutex, lock, mutex_elision);
+LOCK_FUNCTION(rwlock, rdlock, rwlock_elision);
+LOCK_FUNCTION(rwlock, wrlock, rwlock_elision);
 
-#define TRY_LOCK_FUNCTION(TYPE, FUN) \
+#define TRY_LOCK_FUNCTION(TYPE, FUN, ELISION_FUNC) \
     int \
     ovs_##TYPE##_##FUN##_at(const struct ovs_##TYPE *l_, \
                             const char *where) \
@@ -86,6 +251,10 @@ LOCK_FUNCTION(rwlock, wrlock);
         struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
         int error; \
  \
+        if (ovs_##ELISION_FUNC(&l->lock)) { \
+            return 0;\
+        } \
+ \
         /* Verify that 'l' was initialized. */ \
         if (OVS_UNLIKELY(!l->where)) { \
             ovs_abort(0, "%s: %s() passed uninitialized ovs_"#TYPE, \
@@ -101,11 +270,11 @@ LOCK_FUNCTION(rwlock, wrlock);
         } \
         return error; \
     }
-TRY_LOCK_FUNCTION(mutex, trylock);
-TRY_LOCK_FUNCTION(rwlock, tryrdlock);
-TRY_LOCK_FUNCTION(rwlock, trywrlock);
+TRY_LOCK_FUNCTION(mutex, trylock, mutex_elision);
+TRY_LOCK_FUNCTION(rwlock, tryrdlock, rwlock_elision);
+TRY_LOCK_FUNCTION(rwlock, trywrlock, rwlock_elision);
 
-#define UNLOCK_FUNCTION(TYPE, FUN, WHERE) \
+#define UNLOCK_FUNCTION(TYPE, FUN, WHERE, ELISION_FUNC) \
     void \
     ovs_##TYPE##_##FUN(const struct ovs_##TYPE *l_) \
         OVS_NO_THREAD_SAFETY_ANALYSIS \
@@ -113,6 +282,10 @@ TRY_LOCK_FUNCTION(rwlock, trywrlock);
         struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
         int error; \
  \
+        if (ovs_##ELISION_FUNC(&l->lock)) { \
+            return; \
+        } \
+ \
         /* Verify that 'l' was initialized. */ \
         ovs_assert(l->where); \
  \
@@ -122,10 +295,10 @@ TRY_LOCK_FUNCTION(rwlock, trywrlock);
             ovs_abort(error, "pthread_%s_%s failed", #TYPE, #FUN); \
         } \
     }
-UNLOCK_FUNCTION(mutex, unlock, "<unlocked>");
-UNLOCK_FUNCTION(mutex, destroy, NULL);
-UNLOCK_FUNCTION(rwlock, unlock, "<unlocked>");
-UNLOCK_FUNCTION(rwlock, destroy, NULL);
+UNLOCK_FUNCTION(mutex, unlock, "<unlocked>", unmutex_elision);
+UNLOCK_FUNCTION(mutex, destroy, NULL, unmutex_elision);
+UNLOCK_FUNCTION(rwlock, unlock, "<unlocked>", unrwlock_elision);
+UNLOCK_FUNCTION(rwlock, destroy, NULL, unrwlock_elision);
 
 #define XPTHREAD_FUNC1(FUNCTION, PARAM1)                \
     void                                                \
diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4
index 0149c30..8245da9 100644
--- a/m4/openvswitch.m4
+++ b/m4/openvswitch.m4
@@ -309,6 +309,27 @@ AC_DEFUN([OVS_CHECK_BACKTRACE],
                   [AC_DEFINE([HAVE_BACKTRACE], [1],
                              [Define to 1 if you have backtrace(3).])])])
 
+dnl Defines HAVE_RTM if CPU supports TSX
+AC_DEFUN([OVS_CHECK_RTM],
+  [ if (test -e /proc/cpuinfo && cat /proc/cpuinfo | grep rtm > /dev/null); then
+        AC_DEFINE([HAVE_RTM], [1], [Define to 1 if CPU has Transactional Memory.])
+    fi])
+
+dnl Defines HAVE_GLIBC_TSX if glibc supports TSX
+AC_DEFUN([OVS_CHECK_GLIBC_TSX],
+  [AC_RUN_IFELSE(
+    [AC_LANG_PROGRAM([],
+    [[
+      #if __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 21
+      return 0;
+      #else
+      return 1;
+      #endif
+    ]])],
+    [AC_DEFINE([HAVE_GLIBC_TSX], [1], [Defines HAVE_GLIBC_TSX if glibc supports TSX])],
+    [], [])
+  ])
+
 dnl Defines HAVE_PERF_EVENT if linux/perf_event.h is found.
 AC_DEFUN([OVS_CHECK_PERF_EVENT],
   [AC_CHECK_HEADERS([linux/perf_event.h])])
-- 
2.5.0




More information about the dev mailing list