[ovs-dev] [PATCH] datapath: optimize flow compare and mask functions

Andy Zhou azhou at nicira.com
Sat Jun 22 12:32:48 UTC 2013


For architectures can load and store unaligned long efficiently, use 4
or 8 bytes operations. This improves the efficiency compare to byte wise
operations.

This patch is uses ideas and code from a patch submitted by Peter Klausler
titled "replace memcmp() with specialized comparator". The flow compare
function is essentially his implementation.  The original patch
mentioned 7X speed up with this optimization.

Co-authored-by: Peter Klausler <pmk at google.com>
Signed-off-by: Andy Zhou <azhou at nicira.com>
---
 datapath/flow.c |   55 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 10 deletions(-)

diff --git a/datapath/flow.c b/datapath/flow.c
index 39de931..273cbea 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -45,6 +45,13 @@
 
 #include "vlan.h"
 
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#define ADDR_IS_ALIGNED(addr)  1
+#else
+#define ADDR_IS_ALIGNED(addr)  \
+	    ((long) addr & (sizeof(long) - 1) == 0)
+#endif
+
 static struct kmem_cache *flow_cache;
 
 static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
@@ -343,16 +350,26 @@ static void flow_key_mask(struct sw_flow_key *dst,
 			  const struct sw_flow_key *src,
 			  const struct sw_flow_mask *mask)
 {
-	u8 *m = (u8 *)&mask->key + mask->range.start;
-	u8 *s = (u8 *)src + mask->range.start;
-	u8 *d = (u8 *)dst + mask->range.start;
-	int i;
+	const u8 *m = (u8 *)&mask->key;
+	const u8 *s = (u8 *)src;
+	u8 *d = (u8 *)dst;
+	int len = sizeof(*dst);
 
-	memset(dst, 0, sizeof(*dst));
-	for (i = 0; i < ovs_sw_flow_mask_size_roundup(mask); i++) {
-		*d = *s & *m;
-		d++, s++, m++;
+	if (ADDR_IS_ALIGNED((m | s | d))) {
+		const long *ml = (const long *)m;
+		const long *sl = (const long *)s;
+		long *dl = (long *)d;
+
+		for (; len >= sizeof(long); len -= sizeof(long))
+			*dl++ = *sl++ & *ml++;
+
+		m = (const u8 *)ml;
+		s = (const u8 *)sl;
+		d = (u8 *)dl;
 	}
+
+	while (len-- > 0)
+		*d++ = *s++ & *m++;
 }
 
 #define TCP_FLAGS_OFFSET 13
@@ -984,8 +1001,26 @@ static int flow_key_start(const struct sw_flow_key *key)
 static bool __cmp_key(const struct sw_flow_key *key1,
 		const struct sw_flow_key *key2,  int key_start, int key_len)
 {
-	return !memcmp((u8 *)key1 + key_start,
-			(u8 *)key2 + key_start, (key_len - key_start));
+	const u8 *cp1 = (u8 *)key1 + key_start;
+	const u8 *cp2 = (u8 *)key2 + key_start;
+	int len = key_len - key_start;
+	long diffs = 0;
+
+	if (ADDR_IS_ALIGNED(cp1 | cp2)) {
+		const long *lp1 = (const long *)cp1;
+		const long *lp2 = (const long *)cp2;
+
+		for (; len >= sizeof(long); len -= sizeof(long))
+			diffs |= *lp1++ ^ *lp2++;
+
+		cp1 = (const u8 *)lp1;
+		cp2 = (const u8 *)lp2;
+	}
+
+	while (len-- > 0)
+		diffs |= *cp1++ ^ *cp2++;
+
+	return diffs == 0;
 }
 
 static bool __flow_cmp_key(const struct sw_flow *flow,
-- 
1.7.9.5




More information about the dev mailing list