[ovs-dev] [murmurhash 4/4] hash: Replace primary hash functions by murmurhash.

Ben Pfaff blp at nicira.com
Thu Jan 17 00:15:49 UTC 2013


On Wed, Jan 16, 2013 at 02:24:56PM -0800, Ethan Jackson wrote:
> I might consider putting hash_3words() near hash_2words() in the
> header file.  Mostly just for consistency sake.

I put the prototypes nearby at the top of the file, but hash_3words()
generated enough code that it didn't look worthwhile to inline it, so
the definition is in the .c file.

The prototypes did look like they could use some rearrangement,
though.  How about this:

uint32_t hash_words(const uint32_t data[], size_t n_words, uint32_t basis);
uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);

static inline uint32_t hash_int(uint32_t x, uint32_t basis);
static inline uint32_t hash_2words(uint32_t, uint32_t);
uint32_t hash_3words(uint32_t, uint32_t, uint32_t);

static inline uint32_t hash_boolean(bool x, uint32_t basis);
uint32_t hash_double(double, uint32_t basis);

static inline uint32_t hash_pointer(const void *, uint32_t basis);
static inline uint32_t hash_string(const char *, uint32_t basis);

> In hash_3words(), you're finishing with a byte count of 8 instead 12.
> I may be misinterpreting  what that value is for though.

You're right.  I changed it to 12.

> Acked-by: Ethan Jackson <ethan at nicira.com>

Here's the new version in case you want to look.  I only made the
changes above

--8<--------------------------cut here-------------------------->8--

From: Ben Pfaff <blp at nicira.com>
Date: Wed, 16 Jan 2013 16:14:42 -0800
Subject: [PATCH] hash: Replace primary hash functions by murmurhash.

murmurhash is faster than Jenkins and slightly higher quality, so switch to
it for hashing words.

The best timings I got for hashing for data lengths of the following
numbers of 32-bit words, in seconds per 1,000,000,000 hashes, were:

words     murmurhash      Jenkins hash
-----     ----------      ------------
   1           8.4              10.4
   2          10.3              10.3
   3          11.2              10.7
   4          12.6              18.0
   5          13.9              18.3
   6          15.2              18.7

In other words, murmurhash outperforms Jenkins for all input lengths other
than exactly 3 32-bit words (12 bytes).  (It's understandable that Jenkins
would have a best case at 12 bytes, because Jenkins works in 12-byte
chunks.)  Even in the case where Jenkins is faster, it's only by 5%.  On
average within this data set, murmurhash is 15% faster, and for 4-word
input it is 30% faster.

We retain Jenkins for flow_hash_symmetric_l4() and flow_hash_fields(),
which are cases where the hash value is exposed externally.

This commit appears to improve "ovs-benchmark rate" results slightly by
a few hundred connections per second (under 1%), when used with an NVP
controller.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 lib/automake.mk   |    2 +
 lib/flow.c        |    5 +-
 lib/hash.c        |   91 +++++++++--------------------------
 lib/hash.h        |  134 ++++++++++++++++++-----------------------------------
 lib/jhash.c       |  125 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/jhash.h       |   44 +++++++++++++++++
 tests/test-hash.c |   27 ++++++-----
 7 files changed, 259 insertions(+), 169 deletions(-)
 create mode 100644 lib/jhash.c
 create mode 100644 lib/jhash.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 740f33f..4547198 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -61,6 +61,8 @@ lib_libopenvswitch_a_SOURCES = \
 	lib/hmap.h \
 	lib/hmapx.c \
 	lib/hmapx.h \
+	lib/jhash.c \
+	lib/jhash.h \
 	lib/json.c \
 	lib/json.h \
 	lib/jsonrpc.c \
diff --git a/lib/flow.c b/lib/flow.c
index 05bc269..2a3dd3d 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -30,6 +30,7 @@
 #include "csum.h"
 #include "dynamic-string.h"
 #include "hash.h"
+#include "jhash.h"
 #include "match.h"
 #include "ofpbuf.h"
 #include "openflow/openflow.h"
@@ -722,7 +723,7 @@ flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis)
             fields.tp_port = flow->tp_src ^ flow->tp_dst;
         }
     }
-    return hash_bytes(&fields, sizeof fields, basis);
+    return jhash_bytes(&fields, sizeof fields, basis);
 }
 
 /* Hashes the portions of 'flow' designated by 'fields'. */
@@ -733,7 +734,7 @@ flow_hash_fields(const struct flow *flow, enum nx_hash_fields fields,
     switch (fields) {
 
     case NX_HASH_FIELDS_ETH_SRC:
-        return hash_bytes(flow->dl_src, sizeof flow->dl_src, basis);
+        return jhash_bytes(flow->dl_src, sizeof flow->dl_src, basis);
 
     case NX_HASH_FIELDS_SYMMETRIC_L4:
         return flow_hash_symmetric_l4(flow, basis);
diff --git a/lib/hash.c b/lib/hash.c
index 8cee5d0..e954d78 100644
--- a/lib/hash.c
+++ b/lib/hash.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,57 +18,11 @@
 #include <string.h>
 #include "unaligned.h"
 
-/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
- * 'p' must be properly aligned. */
-uint32_t
-hash_words(const uint32_t *p, size_t n, uint32_t basis)
-{
-    uint32_t a, b, c;
-
-    a = b = c = 0xdeadbeef + (((uint32_t) n) << 2) + basis;
-
-    while (n > 3) {
-        a += p[0];
-        b += p[1];
-        c += p[2];
-        hash_mix(&a, &b, &c);
-        n -= 3;
-        p += 3;
-    }
-
-    switch (n) {
-    case 3:
-        c += p[2];
-        /* fall through */
-    case 2:
-        b += p[1];
-        /* fall through */
-    case 1:
-        a += p[0];
-        hash_final(&a, &b, &c);
-        /* fall through */
-    case 0:
-        break;
-    }
-    return c;
-}
-
 /* Returns the hash of 'a', 'b', and 'c'. */
 uint32_t
 hash_3words(uint32_t a, uint32_t b, uint32_t c)
 {
-    a += 0xdeadbeef;
-    b += 0xdeadbeef;
-    c += 0xdeadbeef;
-    hash_final(&a, &b, &c);
-    return c;
-}
-
-/* Returns the hash of 'a' and 'b'. */
-uint32_t
-hash_2words(uint32_t a, uint32_t b)
-{
-    return hash_3words(a, b, 0);
+    return mhash_finish(mhash_add(mhash_add(mhash_add(a, 0), b), c), 12);
 }
 
 /* Returns the hash of the 'n' bytes at 'p', starting from 'basis'. */
@@ -76,37 +30,30 @@ uint32_t
 hash_bytes(const void *p_, size_t n, uint32_t basis)
 {
     const uint8_t *p = p_;
-    uint32_t a, b, c;
-
-    a = b = c = 0xdeadbeef + n + basis;
+    size_t orig_n = n;
+    uint32_t hash;
 
-    while (n >= 12) {
-        a += get_unaligned_u32((uint32_t *) p);
-        b += get_unaligned_u32((uint32_t *) (p + 4));
-        c += get_unaligned_u32((uint32_t *) (p + 8));
-        hash_mix(&a, &b, &c);
-        n -= 12;
-        p += 12;
+    hash = basis;
+    while (n >= 4) {
+        hash = mhash_add(hash, get_unaligned_u32((const uint32_t *) p));
+        n -= 4;
+        p += 4;
     }
 
     if (n) {
-        uint32_t tmp[3];
+        uint32_t tmp = 0;
 
-        tmp[0] = tmp[1] = tmp[2] = 0;
-        memcpy(tmp, p, n);
-        a += tmp[0];
-        b += tmp[1];
-        c += tmp[2];
-        hash_final(&a, &b, &c);
+        memcpy(&tmp, p, n);
+        hash = mhash_add__(hash, tmp);
     }
 
-    return c;
+    return mhash_finish(hash, orig_n);
 }
 
 /* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
  * 'p' must be properly aligned. */
 uint32_t
-mhash_words(const uint32_t p[], size_t n_words, uint32_t basis)
+hash_words(const uint32_t p[], size_t n_words, uint32_t basis)
 {
     uint32_t hash;
     size_t i;
@@ -117,3 +64,13 @@ mhash_words(const uint32_t p[], size_t n_words, uint32_t basis)
     }
     return mhash_finish(hash, n_words * 4);
 }
+
+uint32_t
+hash_double(double x, uint32_t basis)
+{
+    uint32_t value[2];
+    BUILD_ASSERT_DECL(sizeof x == sizeof value);
+
+    memcpy(value, &x, sizeof value);
+    return hash_3words(value[0], value[1], basis);
+}
diff --git a/lib/hash.h b/lib/hash.h
index d33924f..f8a72ed 100644
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,65 +26,69 @@
 extern "C" {
 #endif
 
-/* This is the public domain lookup3 hash by Bob Jenkins from
- * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */
-
 static inline uint32_t
 hash_rot(uint32_t x, int k)
 {
     return (x << k) | (x >> (32 - k));
 }
 
-static inline void
-hash_mix(uint32_t *a, uint32_t *b, uint32_t *c)
+uint32_t hash_words(const uint32_t data[], size_t n_words, uint32_t basis);
+uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);
+
+static inline uint32_t hash_int(uint32_t x, uint32_t basis);
+static inline uint32_t hash_2words(uint32_t, uint32_t);
+uint32_t hash_3words(uint32_t, uint32_t, uint32_t);
+
+static inline uint32_t hash_boolean(bool x, uint32_t basis);
+uint32_t hash_double(double, uint32_t basis);
+
+static inline uint32_t hash_pointer(const void *, uint32_t basis);
+static inline uint32_t hash_string(const char *, uint32_t basis);
+
+/* Murmurhash by Austin Appleby,
+ * from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp.
+ *
+ * The upstream license there says:
+ *
+ * // MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * // domain. The author hereby disclaims copyright to this source code.
+ *
+ * See hash_words() for sample usage. */
+
+static inline uint32_t mhash_add__(uint32_t hash, uint32_t data)
 {
-      *a -= *c; *a ^= hash_rot(*c,  4); *c += *b;
-      *b -= *a; *b ^= hash_rot(*a,  6); *a += *c;
-      *c -= *b; *c ^= hash_rot(*b,  8); *b += *a;
-      *a -= *c; *a ^= hash_rot(*c, 16); *c += *b;
-      *b -= *a; *b ^= hash_rot(*a, 19); *a += *c;
-      *c -= *b; *c ^= hash_rot(*b,  4); *b += *a;
+    data *= 0xcc9e2d51;
+    data = hash_rot(data, 15);
+    data *= 0x1b873593;
+    return hash ^ data;
 }
 
-static inline void
-hash_final(uint32_t *a, uint32_t *b, uint32_t *c)
+static inline uint32_t mhash_add(uint32_t hash, uint32_t data)
 {
-      *c ^= *b; *c -= hash_rot(*b, 14);
-      *a ^= *c; *a -= hash_rot(*c, 11);
-      *b ^= *a; *b -= hash_rot(*a, 25);
-      *c ^= *b; *c -= hash_rot(*b, 16);
-      *a ^= *c; *a -= hash_rot(*c,  4);
-      *b ^= *a; *b -= hash_rot(*a, 14);
-      *c ^= *b; *c -= hash_rot(*b, 24);
+    hash = mhash_add__(hash, data);
+    hash = hash_rot(hash, 13);
+    return hash * 5 + 0xe6546b64;
 }
 
-uint32_t hash_words(const uint32_t *, size_t n_word, uint32_t basis);
-uint32_t hash_2words(uint32_t, uint32_t);
-uint32_t hash_3words(uint32_t, uint32_t, uint32_t);
-uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);
+static inline uint32_t mhash_finish(uint32_t hash, size_t n_bytes)
+{
+    hash ^= n_bytes;
+    hash ^= hash >> 16;
+    hash *= 0x85ebca6b;
+    hash ^= hash >> 13;
+    hash *= 0xc2b2ae35;
+    hash ^= hash >> 16;
+    return hash;
+}
 
 static inline uint32_t hash_string(const char *s, uint32_t basis)
 {
     return hash_bytes(s, strlen(s), basis);
 }
 
-/* This is Bob Jenkins' integer hash from
- * http://burtleburtle.net/bob/hash/integer.html, modified for style.
- *
- * This hash is faster than hash_2words(), but it isn't as good when 'basis' is
- * important.  So use this function for speed or hash_2words() for hash
- * quality. */
 static inline uint32_t hash_int(uint32_t x, uint32_t basis)
 {
-    x -= x << 6;
-    x ^= x >> 17;
-    x -= x << 9;
-    x ^= x << 4;
-    x += basis;
-    x -= x << 3;
-    x ^= x << 10;
-    x ^= x >> 15;
-    return x;
+    return hash_2words(x, basis);
 }
 
 /* An attempt at a useful 1-bit hash function.  Has not been analyzed for
@@ -96,15 +100,6 @@ static inline uint32_t hash_boolean(bool x, uint32_t basis)
     return (x ? P0 : P1) ^ hash_rot(basis, 1);
 }
 
-static inline uint32_t hash_double(double x, uint32_t basis)
-{
-    uint32_t value[2];
-    BUILD_ASSERT_DECL(sizeof x == sizeof value);
-
-    memcpy(value, &x, sizeof value);
-    return hash_3words(value[0], value[1], basis);
-}
-
 static inline uint32_t hash_pointer(const void *p, uint32_t basis)
 {
     /* Often pointers are hashed simply by casting to integer type, but that
@@ -118,46 +113,9 @@ static inline uint32_t hash_pointer(const void *p, uint32_t basis)
     return hash_int((uint32_t) (uintptr_t) p, basis);
 }
 
-/* Murmurhash by Austin Appleby,
- * from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp.
- *
- * The upstream license there says:
- *
- * // MurmurHash3 was written by Austin Appleby, and is placed in the public
- * // domain. The author hereby disclaims copyright to this source code.
- *
- * Murmurhash is faster and higher-quality than the Jenkins lookup3 hash.  When
- * we have a little more familiarity with it, it's probably a good idea to
- * switch all of OVS to it.
- *
- * For now, we have this implementation here for use by code that needs a hash
- * that is convenient for use one word at a time, since the Jenkins lookup3
- * hash works three words at a time.
- *
- * See mhash_words() for sample usage. */
-
-uint32_t mhash_words(const uint32_t data[], size_t n_words, uint32_t basis);
-
-static inline uint32_t mhash_add(uint32_t hash, uint32_t data)
+static inline uint32_t hash_2words(uint32_t x, uint32_t y)
 {
-    data *= 0xcc9e2d51;
-    data = hash_rot(data, 15);
-    data *= 0x1b873593;
-
-    hash ^= data;
-    hash = hash_rot(hash, 13);
-    return hash * 5 + 0xe6546b64;
-}
-
-static inline uint32_t mhash_finish(uint32_t hash, size_t n_bytes)
-{
-    hash ^= n_bytes;
-    hash ^= hash >> 16;
-    hash *= 0x85ebca6b;
-    hash ^= hash >> 13;
-    hash *= 0xc2b2ae35;
-    hash ^= hash >> 16;
-    return hash;
+    return mhash_finish(mhash_add(mhash_add(x, 0), y), 4);
 }
 
 #ifdef __cplusplus
diff --git a/lib/jhash.c b/lib/jhash.c
new file mode 100644
index 0000000..4ec2871
--- /dev/null
+++ b/lib/jhash.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "jhash.h"
+#include <string.h>
+#include "unaligned.h"
+
+/* This is the public domain lookup3 hash by Bob Jenkins from
+ * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */
+
+static inline uint32_t
+jhash_rot(uint32_t x, int k)
+{
+    return (x << k) | (x >> (32 - k));
+}
+
+static inline void
+jhash_mix(uint32_t *a, uint32_t *b, uint32_t *c)
+{
+      *a -= *c; *a ^= jhash_rot(*c,  4); *c += *b;
+      *b -= *a; *b ^= jhash_rot(*a,  6); *a += *c;
+      *c -= *b; *c ^= jhash_rot(*b,  8); *b += *a;
+      *a -= *c; *a ^= jhash_rot(*c, 16); *c += *b;
+      *b -= *a; *b ^= jhash_rot(*a, 19); *a += *c;
+      *c -= *b; *c ^= jhash_rot(*b,  4); *b += *a;
+}
+
+static inline void
+jhash_final(uint32_t *a, uint32_t *b, uint32_t *c)
+{
+      *c ^= *b; *c -= jhash_rot(*b, 14);
+      *a ^= *c; *a -= jhash_rot(*c, 11);
+      *b ^= *a; *b -= jhash_rot(*a, 25);
+      *c ^= *b; *c -= jhash_rot(*b, 16);
+      *a ^= *c; *a -= jhash_rot(*c,  4);
+      *b ^= *a; *b -= jhash_rot(*a, 14);
+      *c ^= *b; *c -= jhash_rot(*b, 24);
+}
+
+/* Returns the Jenkins hash of the 'n' 32-bit words at 'p', starting from
+ * 'basis'.  'p' must be properly aligned.
+ *
+ * Use hash_words() instead, unless you're computing a hash function whose
+ * value is exposed "on the wire" so we don't want to change it. */
+uint32_t
+jhash_words(const uint32_t *p, size_t n, uint32_t basis)
+{
+    uint32_t a, b, c;
+
+    a = b = c = 0xdeadbeef + (((uint32_t) n) << 2) + basis;
+
+    while (n > 3) {
+        a += p[0];
+        b += p[1];
+        c += p[2];
+        jhash_mix(&a, &b, &c);
+        n -= 3;
+        p += 3;
+    }
+
+    switch (n) {
+    case 3:
+        c += p[2];
+        /* fall through */
+    case 2:
+        b += p[1];
+        /* fall through */
+    case 1:
+        a += p[0];
+        jhash_final(&a, &b, &c);
+        /* fall through */
+    case 0:
+        break;
+    }
+    return c;
+}
+
+/* Returns the Jenkins hash of the 'n' bytes at 'p', starting from 'basis'.
+ *
+ * Use jhash_bytes() instead, unless you're computing a hash function whose
+ * value is exposed "on the wire" so we don't want to change it. */
+uint32_t
+jhash_bytes(const void *p_, size_t n, uint32_t basis)
+{
+    const uint8_t *p = p_;
+    uint32_t a, b, c;
+
+    a = b = c = 0xdeadbeef + n + basis;
+
+    while (n >= 12) {
+        a += get_unaligned_u32((uint32_t *) p);
+        b += get_unaligned_u32((uint32_t *) (p + 4));
+        c += get_unaligned_u32((uint32_t *) (p + 8));
+        jhash_mix(&a, &b, &c);
+        n -= 12;
+        p += 12;
+    }
+
+    if (n) {
+        uint32_t tmp[3];
+
+        tmp[0] = tmp[1] = tmp[2] = 0;
+        memcpy(tmp, p, n);
+        a += tmp[0];
+        b += tmp[1];
+        c += tmp[2];
+        jhash_final(&a, &b, &c);
+    }
+
+    return c;
+}
diff --git a/lib/jhash.h b/lib/jhash.h
new file mode 100644
index 0000000..f83b08f
--- /dev/null
+++ b/lib/jhash.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef JHASH_H
+#define JHASH_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include "util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This is the public domain lookup3 hash by Bob Jenkins from
+ * http://burtleburtle.net/bob/c/lookup3.c, modified for style.
+ *
+ * Use the functions in hash.h instead if you can.  These are here just for
+ * places where we've exposed a hash function "on the wire" and don't want it
+ * to change. */
+
+uint32_t jhash_words(const uint32_t *, size_t n_word, uint32_t basis);
+uint32_t jhash_bytes(const void *, size_t n_bytes, uint32_t basis);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* jhash.h */
diff --git a/tests/test-hash.c b/tests/test-hash.c
index d53ba2e..0b7b87a 100644
--- a/tests/test-hash.c
+++ b/tests/test-hash.c
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "hash.h"
+#include "jhash.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -41,9 +42,9 @@ hash_words_cb(uint32_t input)
 }
 
 static uint32_t
-mhash_words_cb(uint32_t input)
+jhash_words_cb(uint32_t input)
 {
-    return mhash_words(&input, 1, 0);
+    return jhash_words(&input, 1, 0);
 }
 
 static uint32_t
@@ -130,7 +131,7 @@ main(void)
      * independence must be a bad assumption :-)
      */
     check_word_hash(hash_words_cb, "hash_words", 11);
-    check_word_hash(mhash_words_cb, "mhash_words", 11);
+    check_word_hash(jhash_words_cb, "jhash_words", 11);
 
     /* Check that all hash functions of with one 1-bit (or no 1-bits) set
      * within three 32-bit words have different values in their lowest 12
@@ -146,24 +147,26 @@ main(void)
      * so we are doing pretty well to not have any collisions in 12 bits.
      */
     check_3word_hash(hash_words, "hash_words");
-    check_3word_hash(mhash_words, "mhash_words");
+    check_3word_hash(jhash_words, "jhash_words");
 
     /* Check that all hashes computed with hash_int with one 1-bit (or no
      * 1-bits) set within a single 32-bit word have different values in all
-     * 14-bit consecutive runs.
+     * 12-bit consecutive runs.
      *
      * Given a random distribution, the probability of at least one collision
-     * in any set of 14 bits is approximately
+     * in any set of 12 bits is approximately
      *
-     *                      1 - ((2**14 - 1)/2**14)**C(33,2)
-     *                   == 1 - (16,383/16,834)**528
-     *                   =~ 0.031
+     *                      1 - ((2**12 - 1)/2**12)**C(33,2)
+     *                   == 1 - (4,095/4,096)**528
+     *                   =~ 0.12
      *
-     * There are 18 ways to pick 14 consecutive bits in a 32-bit word, so if we
+     * There are 20 ways to pick 12 consecutive bits in a 32-bit word, so if we
      * assumed independence then the chance of having no collisions in any of
-     * those 14-bit runs would be (1-0.03)**18 =~ 0.56.  This seems reasonable.
+     * those 12-bit runs would be (1-0.12)**20 =~ 0.078.  This refutes our
+     * assumption of independence, which makes it seem like a good hash
+     * function.
      */
-    check_word_hash(hash_int_cb, "hash_int", 14);
+    check_word_hash(hash_int_cb, "hash_int", 12);
 
     return 0;
 }
-- 
1.7.2.5




More information about the dev mailing list