[ovs-dev] [classifier-opt] hash: Introduce an implementation of murmurhash.

Ben Pfaff blp at nicira.com
Mon Aug 20 21:27:07 UTC 2012


Murmurhash is generally superior to the Jenkins lookup3 hash according to
the available figures.  Perhaps we should generally replace our current
hashes by murmurhash.

For now, I'm introducing a parallel implementation to allow it to be used
in cases where an incremental one-word-at-a-time hash is desirable.  The
first user will be added in an upcoming commit.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
This is meant for the "classifier-opt" series, just before the
final patch.  (If you look at that final patch as I sent it out,
you can see a couple of comments
 * XXX murmurhash would be much better for this than hash_int(). */
but I forgot to do that before I sent them out.

 lib/hash.c        |   15 ++++++++++++
 lib/hash.h        |   42 +++++++++++++++++++++++++++++++++++
 tests/test-hash.c |   62 ++++++++++++++++++++++++++++++++--------------------
 3 files changed, 95 insertions(+), 24 deletions(-)

diff --git a/lib/hash.c b/lib/hash.c
index f7aa916..41ad1bf 100644
--- a/lib/hash.c
+++ b/lib/hash.c
@@ -102,3 +102,18 @@ hash_bytes(const void *p_, size_t n, uint32_t basis)
 
     return c;
 }
+
+/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
+ * 'p' must be properly aligned. */
+uint32_t
+mhash_words(const uint32_t p[], size_t n_words, uint32_t basis)
+{
+    uint32_t hash;
+    size_t i;
+
+    hash = basis;
+    for (i = 0; i < n_words; i++) {
+        hash = mhash_add(hash, p[i]);
+    }
+    return mhash_finish(hash, n_words);
+}
diff --git a/lib/hash.h b/lib/hash.h
index ac6a63c..701e686 100644
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -118,6 +118,48 @@ static inline uint32_t hash_pointer(const void *p, uint32_t basis)
     return hash_int((uint32_t) (uintptr_t) p, basis);
 }
 
+/* Murmurhash by Austin Appleby,
+ * from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp.
+ *
+ * The upstream license there says:
+ *
+ * // MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * // domain. The author hereby disclaims copyright to this source code.
+ *
+ * Murmurhash is faster and higher-quality than the Jenkins lookup3 hash.  When
+ * we have a little more familiarity with it, it's probably a good idea to
+ * switch all of OVS to it.
+ *
+ * For now, we have this implementation here for use by code that needs a hash
+ * that is convenient for use one word at a time, since the Jenkins lookup3
+ * hash works three words at a time.
+ *
+ * See mhash_words() for sample usage. */
+
+uint32_t mhash_words(const uint32_t data[], size_t n_words, uint32_t basis);
+
+static inline uint32_t mhash_add(uint32_t hash, uint32_t data)
+{
+    data *= 0xcc9e2d51;
+    data = hash_rot(data, 15);
+    data *= 0x1b873593;
+
+    hash ^= data;
+    hash = hash_rot(hash, 13);
+    return hash * 5 + 0xe6546b64;
+}
+
+static inline uint32_t mhash_finish(uint32_t hash, size_t n)
+{
+    hash ^= n * 4;
+    hash ^= hash_rot(hash, 16);
+    hash *= 0x85ebca6b;
+    hash ^= hash_rot(hash, 13);
+    hash *= 0xc2b2ae35;
+    hash ^= hash_rot(hash, 16);
+    return hash;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/tests/test-hash.c b/tests/test-hash.c
index bdf1435..d53ba2e 100644
--- a/tests/test-hash.c
+++ b/tests/test-hash.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Nicira, Inc.
+ * Copyright (c) 2009, 2012 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,6 +41,12 @@ hash_words_cb(uint32_t input)
 }
 
 static uint32_t
+mhash_words_cb(uint32_t input)
+{
+    return mhash_words(&input, 1, 0);
+}
+
+static uint32_t
 hash_int_cb(uint32_t input)
 {
     return hash_int(input, 0);
@@ -76,11 +82,37 @@ check_word_hash(uint32_t (*hash)(uint32_t), const char *name,
     }
 }
 
-int
-main(void)
+static void
+check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t),
+                 const char *name)
 {
     int i, j;
 
+    for (i = 0; i <= 96; i++) {
+        for (j = i + 1; j <= 96; j++) {
+            uint32_t in1[3], in2[3];
+            uint32_t out1, out2;
+            const int min_unique = 12;
+            const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1;
+
+            set_bit(in1, i);
+            set_bit(in2, j);
+            out1 = hash(in1, 3, 0);
+            out2 = hash(in2, 3, 0);
+            if ((out1 & unique_mask) == (out2 & unique_mask)) {
+                printf("%s has a partial collision:\n", name);
+                printf("hash(1 << %d) == %08"PRIx32"\n", i, out1);
+                printf("hash(1 << %d) == %08"PRIx32"\n", j, out2);
+                printf("The low-order %d bits of output are both "
+                       "0x%"PRIx32"\n", min_unique, out1 & unique_mask);
+            }
+        }
+    }
+}
+
+int
+main(void)
+{
     /* Check that all hashes computed with hash_words with one 1-bit (or no
      * 1-bits) set within a single 32-bit word have different values in all
      * 11-bit consecutive runs.
@@ -98,6 +130,7 @@ main(void)
      * independence must be a bad assumption :-)
      */
     check_word_hash(hash_words_cb, "hash_words", 11);
+    check_word_hash(mhash_words_cb, "mhash_words", 11);
 
     /* Check that all hash functions of with one 1-bit (or no 1-bits) set
      * within three 32-bit words have different values in their lowest 12
@@ -112,27 +145,8 @@ main(void)
      *
      * so we are doing pretty well to not have any collisions in 12 bits.
      */
-    for (i = 0; i <= 96; i++) {
-        for (j = i + 1; j <= 96; j++) {
-            uint32_t in1[3], in2[3];
-            uint32_t out1, out2;
-            const int min_unique = 12;
-            const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1;
-
-            set_bit(in1, i);
-            set_bit(in2, j);
-            out1 = hash_words(in1, 3, 0);
-            out2 = hash_words(in2, 3, 0);
-            if ((out1 & unique_mask) == (out2 & unique_mask)) {
-                printf("Partial collision:\n");
-                printf("hash(1 << %d) == %08"PRIx32"\n", i, out1);
-                printf("hash(1 << %d) == %08"PRIx32"\n", j, out2);
-                printf("The low-order %d bits of output are both "
-                       "0x%"PRIx32"\n", min_unique, out1 & unique_mask);
-                exit(1);
-            }
-        }
-    }
+    check_3word_hash(hash_words, "hash_words");
+    check_3word_hash(mhash_words, "mhash_words");
 
     /* Check that all hashes computed with hash_int with one 1-bit (or no
      * 1-bits) set within a single 32-bit word have different values in all
-- 
1.7.2.5




More information about the dev mailing list