[ovs-dev] [PATCH v2 2/5] dpif-netdev: Add AVX2 implementation for CD lookup.
Yipeng Wang
yipeng1.wang at intel.com
Tue Oct 31 23:39:34 UTC 2017
This patch adds the AVX2 implementation during CD lookup. 16 entries of a
bucket will be compared together with the lookup key. This patch depends
on the first patch.
CC: Darrell Ball <dball at vmware.com>
CC: Jan Scheurich <jan.scheurich at ericsson.com>
Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com>
Signed-off-by: Antonio Fischetti <antonio.fischetti at intel.com>
Co-authored-by: Antonio Fischetti <antonio.fischetti at intel.com>
---
evaluation:
We setup the testing enviornment same to the previous patch. The AVX2
CD implementation's results are shown below.
AVX2 data:
1M flows:
no.subtable: 10 20 30
cd-ovs 3895961 3170530 2968555
orig-ovs 2683455 1646227 1240501
speedup 1.45x 1.92x 2.39x
---
lib/dpif-netdev.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 66 insertions(+), 1 deletion(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ea1d625..78219ba 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -30,6 +30,9 @@
#include <sys/socket.h>
#include <sys/stat.h>
#include <unistd.h>
+#if defined(__AVX2__)
+#include <immintrin.h>
+#endif
#ifdef DPDK_NETDEV
#include <rte_cycles.h>
@@ -2378,7 +2381,37 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[],
OVS_PREFETCH(prim_bkt1);
OVS_PREFETCH(sec_bkt1);
+#ifdef __AVX2__
+ prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)prim_bkt0->sig),
+ _mm256_set1_epi16(temp_sig0)));
+
+
+ sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)sec_bkt0->sig),
+ _mm256_set1_epi16(temp_sig0)));
+ if (prim_hitmask) {
+ loc = raw_ctz(prim_hitmask) >> 1;
+ data[i-1] =
+ prim_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ prim_bkt0 = prim_bkt1;
+ sec_bkt0 = sec_bkt1;
+ temp_sig0 = temp_sig1;
+ continue;
+ }
+ }
+
+ if (sec_hitmask) {
+ loc = raw_ctz(sec_hitmask) >> 1;
+ data[i-1] = sec_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ }
+ }
+#else
unsigned int j;
prim_hitmask = 0;
sec_hitmask = 0;
@@ -2407,12 +2440,42 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[],
hits |= 1 << (i - 1);
}
}
-
+#endif
prim_bkt0 = prim_bkt1;
sec_bkt0 = sec_bkt1;
temp_sig0 = temp_sig1;
}
+#ifdef __AVX2__
+ prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)prim_bkt0->sig),
+ _mm256_set1_epi16(temp_sig0)));
+
+
+ sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)sec_bkt0->sig),
+ _mm256_set1_epi16(temp_sig0)));
+
+ if (prim_hitmask) {
+ loc = raw_ctz(prim_hitmask) >> 1;
+ data[i-1] = prim_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ if (hit_mask != NULL) {
+ *hit_mask = hits;
+ }
+ return;
+ }
+ }
+
+ if (sec_hitmask) {
+ loc = raw_ctz(sec_hitmask) >> 1;
+ data[i-1] = sec_bkt0->table_index[loc];
+ if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+ hits |= 1 << (i - 1);
+ }
+ }
+#else
unsigned int j;
prim_hitmask = 0;
sec_hitmask = 0;
@@ -2442,9 +2505,11 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct netdev_flow_key keys[],
}
}
+#endif
if (hit_mask != NULL) {
*hit_mask = hits;
}
+
}
static int
--
2.7.4
More information about the dev
mailing list