[ovs-dev] [PATCH RFC] dpif-netdev: Add Cuckoo Distributor to Accelerate Megaflow Search

yipeng1.wang at intel.com yipeng1.wang at intel.com
Thu Apr 6 21:48:12 UTC 2017


From: Yipeng Wang <yipeng1.wang at intel.com>

The Datapath Classifier uses tuple space search for flow classification.
The rules are arranged into a set of tuples/subtables (each with a
distinct mask).  Each subtable is implemented as a hash table and lookup
is done with flow keys formed by selecting the bits from the packet header
based on each subtable's mask. Tuple space search will sequentially search
each subtable until a match is found. With a large number of subtables, a
sequential search of the subtables could consume a lot of CPU cycles. In
a testbench with a uniform traffic pattern equally distributed across 20
subtables, we measured that up to 65% of total execution time is attributed
to the megaflow cache lookup.

This patch presents the idea of the two-layer hierarchical lookup, where a
low overhead first level of indirection is accessed first, we call this
level cuckoo distributor (CD). If a flow key has been inserted in the flow
table the first level will indicate with high probability that which
subtable to look into. A lookup is performed on the second level (the
target subtable) to retrieve the result. If the key doesn’t have a match,
then we revert back to the sequential search of subtables.

This patch can improve the already existing Subtable Ranking when traffic
data has high entropy. Subtable Ranking helps minimize the number of
traversed subtables when most of the traffic hit the same subtable.
However, in the case of high entropy traffic such as traffic coming from
a physical port, multiple subtables could be hit with a similar frequency.
In this case the average subtable lookups per hit would be much greater
than 1. In addition, CD can adaptively turn off when it finds the traffic
mostly hit one subtable. Thus, CD will not be an overhead when Subtable
Ranking works well.

Scheme:

     -------
    |  CD   |
     -------
       \
        \
 -----  -----     -----
|sub  ||sub  |...|sub  |
|table||table|   |table|
 -----  -----     -----

Evaluation:

We create set of rules with various src IP. We feed traffic containing 1
million flows with various src IP and dst IP. All the flows hit 10/20/30
rules creating 10/20/30 subtables.

The table below shows the preliminary continuous testing results (full line
speed test) we collected with a uni-directional port-to-port setup. The
machine we tested on is a Xeon E5 server running with 2.2GHz cores. OvS
runs with 1 PMD. We use Spirent as the hardware traffic generator.

no.subtable: 10          20          30
cd-ovs       3895961     3170530     2968555
orig-ovs     2683455     1646227     1240501
speedup      1.45x       1.92x       2.39x

Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com>
Signed-off-by: Charlie Tai <charlie.tai at intel.com>
Co-authored-by: Charlie Tai <charlie.tai at intel.com>
Signed-off-by: Sameh Gobriel <sameh.gobriel at intel.com>
Co-authored-by: Sameh Gobriel <sameh.gobriel at intel.com>
Signed-off-by: Ren Wang <ren.wang at intel.com>
Co-authored-by: Ren Wang <ren.wang at intel.com>
Signed-off-by: Antonio Fischetti <antonio.fischetti at intel.com>
Co-authored-by: Antonio Fischetti <antonio.fischetti at intel.com>
---
 lib/dpif-netdev.c     | 654 ++++++++++++++++++++++++++++++++++++++++++++++++--
 tests/ofproto-dpif.at |   3 +-
 2 files changed, 633 insertions(+), 24 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a14a2eb..d9a883b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -79,11 +79,23 @@
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 
+/* Length of Subtable table for cuckoo distributor to index subtables.
+ * The size of the table is at most 256 entires because the CD's entry only
+ * provides 1 byte for indexing.
+ */
+#define SUBTABLE_TABLE_LENGTH 256
+
 #define FLOW_DUMP_MAX_BATCH 50
 /* Use per thread recirc_depth to prevent recirculation loop. */
 #define MAX_RECIRC_DEPTH 5
 DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
+
+#define CD_DEBUG 0
+#define debug_print(...) \
+            do { if (CD_DEBUG) fprintf(stderr, __VA_ARGS__); } while (0)
+
+
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
 enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
@@ -163,6 +175,44 @@ struct emc_cache {
     int sweep_idx;                /* For emc_cache_slow_sweep(). */
 };
 
+
+/* Cuckoo distributor (CD) is a 2-hash function hash table.
+ * For now, the design does not allow desplacing items when bucket is full,
+ * which is different from the behavior of a cuckoo hash table.
+ * The advantage is that we do not need to store two sigantures so that
+ * the struct will be more compact. We use 16 entries per bucket for the
+ * usage of AVX.
+ *
+ * Each classifier has its own cuckoo distributor. It is NOT thread-safe
+ */
+#define CD_NUM_BUCKETS (1<<16)
+#define CD_BUCKET_MASK (CD_NUM_BUCKETS-1)
+#define CD_ENTRIES 16
+
+/* These two seeds are used for hashing two bucket locations */
+#define CD_PRIM_BUCKET_SEED 10
+#define CD_SEC_BUCKET_SEED 20
+
+/* This bit is used to choose which bucket to replace CD's entry in cd_insert*/
+#define CD_CHOOSE_SEC_BUCKT_BIT (1 << CD_ENTRIES)
+
+typedef uint16_t simple_sig_store_t;
+
+
+/* The bucket struct for cuckoo distributor*/
+struct cuckoo_distributor_bucket {
+    simple_sig_store_t sig[CD_ENTRIES]; /*2-byte long signature*/
+    uint8_t table_index[CD_ENTRIES];    /*index to subtable table*/
+    uint8_t flag[CD_ENTRIES];           /*FIXME: not yet used*/
+} __attribute__ ((packed));
+
+
+struct cuckoo_distributor {
+    struct cuckoo_distributor_bucket buckets[CD_NUM_BUCKETS]; /*buckets array*/
+    uint32_t sig_store_bitmask;  /*mask to derive signature from hash value*/
+} __attribute__ ((aligned (64)));
+
+
 /* Iterate in the exact match cache through every entry that might contain a
  * miniflow with hash 'HASH'. */
 #define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH)                 \
@@ -175,12 +225,19 @@ struct emc_cache {
 
 /* Time in ms between successive optimizations of the dpcls subtable vector */
 #define DPCLS_OPTIMIZATION_INTERVAL 1000
+/* Time in ms between the decisions of turning on or off CD */
+#define DPCLS_CD_OPTIMIZATION_INTERVAL 5000
 
 struct dpcls {
     struct cmap_node node;      /* Within dp_netdev_pmd_thread.classifiers */
     odp_port_t in_port;
     struct cmap subtables_map;
     struct pvector subtables;
+    struct cuckoo_distributor *cdtable;
+    uint8_t cd_on;                    /* turn on of off CD during runtime */
+    unsigned long cd_insert_cnt;      /* data collected for test purpose */
+    unsigned long cd_insert_fail_cnt; /* data collected for test purpose */
+    struct dpcls_subtable* subtable_table[SUBTABLE_TABLE_LENGTH];
 };
 
 /* A rule to be inserted to the classifier. */
@@ -197,7 +254,9 @@ static void dpcls_sort_subtable_vector(struct dpcls *);
 static void dpcls_insert(struct dpcls *, struct dpcls_rule *,
                          const struct netdev_flow_key *mask);
 static void dpcls_remove(struct dpcls *, struct dpcls_rule *);
-static bool dpcls_lookup(struct dpcls *cls,
+
+struct dp_netdev_pmd_thread;
+static bool dpcls_lookup(struct dp_netdev_pmd_thread *pmd, struct dpcls *cls,
                          const struct netdev_flow_key keys[],
                          struct dpcls_rule **rules, size_t cnt,
                          int *num_lookups_p);
@@ -322,6 +381,8 @@ enum dp_stat_type {
     DP_STAT_LOST,               /* Packets not passed up to the client. */
     DP_STAT_LOOKUP_HIT,         /* Number of subtable lookups for flow table
                                    hits */
+    DP_CD_STAT_HIT,             /* Packets that hit in cuckoo distributor */
+    DP_CD_STAT_MISS,            /* Packets that miss in cuckoo distributor */
     DP_N_STATS
 };
 
@@ -539,6 +600,7 @@ struct dp_netdev_pmd_thread {
     struct cmap classifiers;
     /* Periodically sort subtable vectors according to hit frequencies */
     long long int next_optimization;
+    long long int next_cd_optimization;
 
     /* Statistics. */
     struct dp_netdev_pmd_stats stats;
@@ -698,6 +760,28 @@ emc_cache_uninit(struct emc_cache *flow_cache)
     }
 }
 
+/* Initialize the cuckoo distributor structure */
+static void
+cd_init(struct cuckoo_distributor *cd)
+{
+    int i, j;
+    for (i = 0; i < CD_NUM_BUCKETS; i++) {
+        for(j = 0; j < CD_ENTRIES; j++){
+            cd->buckets[i].sig[j] = 0;
+            cd->buckets[i].table_index[j] = 0;
+            cd->buckets[i].flag[j] = 0 ;
+        }
+    }
+    cd->sig_store_bitmask = (1 << (8 * sizeof(simple_sig_store_t))) - 1;
+}
+
+/* Delete the cuckoo distributor*/
+static void
+cd_delete(struct cuckoo_distributor *cd)
+{
+    free(cd);
+}
+
 /* Check and clear dead flow references slowly (one entry at each
  * invocation).  */
 static void
@@ -760,7 +844,8 @@ pmd_info_show_stats(struct ds *reply,
             stats[i] = 0;
         }
 
-        if (i != DP_STAT_LOST) {
+        if (i != DP_STAT_LOST && i != DP_STAT_LOOKUP_HIT
+                && i != DP_CD_STAT_HIT && i != DP_CD_STAT_MISS) {
             /* Lost packets are already included in DP_STAT_MISS */
             total_packets += stats[i];
         }
@@ -797,6 +882,11 @@ pmd_info_show_stats(struct ds *reply,
                   : 0,
                   stats[DP_STAT_MISS], stats[DP_STAT_LOST]);
 
+    ds_put_format(reply,
+                  "\tCD hits:%llu\n\tCD miss:%llu\n",
+                  stats[DP_CD_STAT_HIT], stats[DP_CD_STAT_MISS]);
+
+
     if (total_cycles == 0) {
         return;
     }
@@ -2026,6 +2116,274 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
     emc_change_entry(to_be_replaced, flow, key);
 }
 
+
+static inline int
+cd_insert(struct cuckoo_distributor *cd,
+            const struct netdev_flow_key *key, int index)
+{
+    /* FIXME: make return value meaningful. */
+    int i;
+    simple_sig_store_t tmp_sig = (key->hash & cd->sig_store_bitmask);
+
+    /* First entry of subtable_table (index == 0) means an empty entry.
+     * Here we should have a valide entry for cd insertion so it is not 0.
+     */
+    ovs_assert(index != 0);
+    /* Using 2 hash functions (or different seeds) get 2 totally
+     * random bucket places
+     */
+    /* FIXME: use higher/lower bits rather than 2 hashes and test. */
+    uint32_t prim_bucket = hash_int(key->hash, CD_PRIM_BUCKET_SEED)
+                            & CD_BUCKET_MASK;
+    uint32_t sec_bucket = hash_int(key->hash, CD_SEC_BUCKET_SEED)
+                            & CD_BUCKET_MASK;
+
+    /* Check if the signature already in the two buckets */
+    for (i = 0; i < CD_ENTRIES; i++) {
+        if (cd->buckets[prim_bucket].sig[i] == tmp_sig) {
+            cd->buckets[prim_bucket].table_index[i] = index;
+            return 0;
+        }
+        if (cd->buckets[sec_bucket].sig[i] == tmp_sig) {
+            cd->buckets[sec_bucket].table_index[i] = index;
+            return 0;
+        }
+    }
+
+
+    /* If not then insert into one slot (prefer empty slot) */
+    for (i = 0; i < CD_ENTRIES; i++) {
+        if(cd->buckets[prim_bucket].table_index[i] == 0){
+            cd->buckets[prim_bucket].sig[i] = tmp_sig;
+            cd->buckets[prim_bucket].table_index[i] = index;
+            return 0;
+        }
+    }
+
+    /* Primary location full */
+    if (i == CD_ENTRIES) {
+        for(i = 0; i < CD_ENTRIES; i++){
+        if(cd->buckets[sec_bucket].table_index[i] == 0){
+                cd->buckets[sec_bucket].sig[i] = tmp_sig;
+                cd->buckets[sec_bucket].table_index[i] = index;
+                return 0;
+            }
+        }
+    }
+
+    /* Then we should evict someone. */
+
+    /* FIXME: replace pseudo random to aging based replacement policy
+     * otherwise a slowly sweep process (like EMC) to kill infrequently
+     * accessed items could also help
+     */
+
+    uint32_t random = random_uint32();
+    uint32_t evict_idx = random & (CD_ENTRIES-1);
+    uint32_t bucket_choose = prim_bucket;
+
+    if (random & CD_CHOOSE_SEC_BUCKT_BIT) {
+        bucket_choose = sec_bucket;
+    }
+
+    cd->buckets[bucket_choose].sig[evict_idx] = tmp_sig;
+    cd->buckets[bucket_choose].table_index[evict_idx] = index;
+    return 0;
+}
+
+
+static inline void
+cd_compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
+            const struct cuckoo_distributor_bucket *prim_bkt,
+            const struct cuckoo_distributor_bucket *sec_bkt,
+            simple_sig_store_t sig)
+{
+#ifdef __AVX2__
+
+      *prim_hash_matches = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)prim_bkt->sig),
+                _mm256_set1_epi16(sig)));
+
+
+      *sec_hash_matches = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)sec_bkt->sig),
+                _mm256_set1_epi16(sig)));
+
+#else
+        unsigned int i;
+        for (i = 0; i < CD_ENTRIES; i++) {
+            *prim_hash_matches |= ((sig == prim_bkt->sig[i]) << i);
+            *sec_hash_matches |= ((sig == sec_bkt->sig[i]) << i);
+        }
+#endif
+}
+
+/* 2-stage pipelined cd lookup*/
+static inline int
+cd_lookup_bulk_pipe( struct dpcls *cls,  const struct netdev_flow_key keys[],
+                     int32_t num_keys, uint32_t *hit_mask, int data[])
+{
+    int i;
+    uint32_t prim_hitmask = 0;
+    uint32_t sec_hitmask = 0;
+    uint64_t hits = 0;
+    struct cuckoo_distributor* cd = cls->cdtable;
+    simple_sig_store_t temp_sig0 = (keys[0].hash) & cd->sig_store_bitmask;
+
+
+    struct cuckoo_distributor_bucket* prim_bkt0 =
+                    &cd->buckets[hash_int(keys[0].hash, CD_PRIM_BUCKET_SEED)
+                    & CD_BUCKET_MASK];
+    struct cuckoo_distributor_bucket* sec_bkt0 =
+                    &cd->buckets[hash_int(keys[0].hash, CD_SEC_BUCKET_SEED)
+                    & CD_BUCKET_MASK];
+    rte_prefetch0(prim_bkt0);
+    rte_prefetch0(sec_bkt0);
+
+    for (i = 1; i < num_keys; i++) {
+        simple_sig_store_t temp_sig1 = (keys[i].hash) & cd->sig_store_bitmask;
+
+        struct cuckoo_distributor_bucket* prim_bkt1 =
+                    &cd->buckets[hash_int(keys[i].hash, CD_PRIM_BUCKET_SEED)
+                    & CD_BUCKET_MASK];
+        struct cuckoo_distributor_bucket* sec_bkt1 =
+                    &cd->buckets[hash_int(keys[i].hash, CD_SEC_BUCKET_SEED)
+                    & CD_BUCKET_MASK];
+
+        rte_prefetch0(prim_bkt1);
+        rte_prefetch0(sec_bkt1);
+
+#ifdef __AVX2__
+
+        prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)prim_bkt0->sig),
+                _mm256_set1_epi16(temp_sig0)));
+
+
+        sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)sec_bkt0->sig),
+                _mm256_set1_epi16(temp_sig0)));
+
+        if (prim_hitmask) {
+            data[i-1] =
+                     prim_bkt0->table_index[raw_ctz(prim_hitmask) / 2];
+            if (data[i-1] != 0 && cls->subtable_table[data[i-1]] != 0) {
+                hits |= 1 << (i - 1);
+                prim_bkt0 = prim_bkt1;
+                sec_bkt0 = sec_bkt1;
+                temp_sig0 = temp_sig1;
+                continue;
+            }
+
+        }
+
+        if (sec_hitmask) {
+            data[i-1] = sec_bkt0->table_index[raw_ctz(sec_hitmask) / 2];
+            if (data[i-1] != 0 && cls->subtable_table[data[i-1]] != 0) {
+               hits |= 1 << (i - 1);
+            }
+        }
+
+#else
+        unsigned int j;
+        for (j = 0; j < CD_ENTRIES; j++) {
+            prim_hitmask |= ((temp_sig0 == prim_bkt0->sig[j]) << j);
+            sec_hitmask |= ((temp_sig0 == sec_bkt0->sig[j]) << j);
+        }
+
+        if (prim_hitmask) {
+            data[i-1] = prim_bkt0->table_index[raw_ctz(prim_hitmask)];
+            if (data[i-1] != 0 && cls->subtable_table[data[i-1]] != 0) {
+                hits |= 1 << (i - 1);
+                prim_bkt0 = prim_bkt1;
+                sec_bkt0 = sec_bkt1;
+                temp_sig0 = temp_sig1;
+
+                continue;
+            }
+
+        }
+
+        if (sec_hitmask) {
+            data[i-1] = sec_bkt0->table_index[raw_ctz(sec_hitmask)];
+            if (data[i-1] != 0 && cls->subtable_table[data[i-1]] != 0) {
+                hits |= 1 << (i - 1);
+            }
+        }
+
+#endif
+
+        prim_bkt0 = prim_bkt1;
+        sec_bkt0 = sec_bkt1;
+        temp_sig0 = temp_sig1;
+
+    }
+
+
+#ifdef __AVX2__
+
+      prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)prim_bkt0->sig),
+                _mm256_set1_epi16(temp_sig0)));
+
+
+      sec_hitmask= _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+                _mm256_load_si256((__m256i const *)sec_bkt0->sig),
+                _mm256_set1_epi16(temp_sig0)));
+
+     if (prim_hitmask) {
+        data[i-1] = prim_bkt0->table_index[raw_ctz(prim_hitmask) / 2];
+        if (data[i-1] != 0 && cls->subtable_table[data[i-1]] != 0) {
+            hits |= 1 << (i - 1);
+            if (hit_mask != NULL)
+                *hit_mask = hits;
+            return count_1bits(*hit_mask);
+        }
+
+     }
+
+    if (sec_hitmask) {
+        data[i-1] = sec_bkt0->table_index[raw_ctz(sec_hitmask) / 2];
+        if (data[i-1] != 0 && cls->subtable_table[data[i-1]] != 0) {
+           hits |= 1 << (i - 1);
+        }
+    }
+
+
+#else
+    unsigned int j;
+    for (j = 0; j < CD_ENTRIES; j++) {
+        prim_hitmask |= ((temp_sig0 == prim_bkt0->sig[j]) << j);
+        sec_hitmask |= ((temp_sig0 == sec_bkt0->sig[j]) << j);
+    }
+
+    if (prim_hitmask) {
+        data[i-1] = prim_bkt0->table_index[raw_ctz(prim_hitmask)];
+        if (data[i-1] != 0 && cls->subtable_table[data[i-1]] != 0) {
+            hits |= 1 << (i-1);
+            if (hit_mask != NULL)
+                *hit_mask = hits;
+            return count_1bits(*hit_mask);
+        }
+
+    }
+
+    if (sec_hitmask) {
+        data[i-1] = sec_bkt0->table_index[raw_ctz(sec_hitmask)];
+        if (data[i-1] != 0 && cls->subtable_table[data[i-1]] != 0) {
+           hits |= 1 << (i - 1);
+        }
+    }
+#endif
+
+    if (hit_mask != NULL)
+        *hit_mask = hits;
+    return count_1bits(*hit_mask);
+}
+
+
+
+
 static inline void
 emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
                          const struct netdev_flow_key *key,
@@ -2065,6 +2423,24 @@ emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
     return NULL;
 }
 
+
+static inline struct dpcls_subtable *
+dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask);
+
+
+/* Insert the subtable pointer to the subtable table. */
+static inline int
+insert_subtable_table(struct dpcls *, struct dpcls_subtable * );
+
+/* Remove a subtable from the subtable table */
+static inline int
+remove_subtable_table(struct dpcls *, struct dpcls_subtable * );
+
+/* Find the index of a certain subtable. */
+static inline int
+find_index_in_subtable_table(struct dpcls *, struct dpcls_subtable * );
+
+
 static struct dp_netdev_flow *
 dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
                           const struct netdev_flow_key *key,
@@ -2077,7 +2453,7 @@ dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
 
     cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
     if (OVS_LIKELY(cls)) {
-        dpcls_lookup(cls, key, &rule, 1, lookup_num_p);
+        dpcls_lookup(pmd, cls, key, &rule, 1, lookup_num_p);
         netdev_flow = dp_netdev_flow_cast(rule);
     }
     return netdev_flow;
@@ -2311,7 +2687,8 @@ out:
 static struct dp_netdev_flow *
 dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
                    struct match *match, const ovs_u128 *ufid,
-                   const struct nlattr *actions, size_t actions_len)
+                   const struct nlattr *actions, size_t actions_len,
+                   const struct netdev_flow_key *key)
     OVS_REQUIRES(pmd->flow_mutex)
 {
     struct dp_netdev_flow *flow;
@@ -2358,6 +2735,17 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
     cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node),
                 dp_netdev_flow_hash(&flow->ufid));
 
+    /* Insert to CD here. */
+    if (cls->cd_on) {
+        if (key) {
+            struct dpcls_subtable *subtable = dpcls_find_subtable(cls, &mask);
+            int index = find_index_in_subtable_table(cls, subtable);
+            if (index != 0) {
+                cd_insert(cls->cdtable, key, index);
+            }
+        }
+    }
+
     if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
         struct ds ds = DS_EMPTY_INITIALIZER;
         struct ofpbuf key_buf, mask_buf;
@@ -2414,7 +2802,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
         if (put->flags & DPIF_FP_CREATE) {
             if (cmap_count(&pmd->flow_table) < MAX_FLOWS) {
                 dp_netdev_flow_add(pmd, match, ufid, put->actions,
-                                   put->actions_len);
+                                   put->actions_len, NULL);
                 error = 0;
             } else {
                 error = EFBIG;
@@ -4160,6 +4548,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
     cmap_init(&pmd->flow_table);
     cmap_init(&pmd->classifiers);
     pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
+    pmd->next_cd_optimization = time_msec() + DPCLS_CD_OPTIMIZATION_INTERVAL;
     hmap_init(&pmd->poll_list);
     hmap_init(&pmd->tx_ports);
     hmap_init(&pmd->tnl_port_cache);
@@ -4629,7 +5018,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet,
         if (OVS_LIKELY(!netdev_flow)) {
             netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
                                              add_actions->data,
-                                             add_actions->size);
+                                             add_actions->size, key);
         }
         ovs_mutex_unlock(&pmd->flow_mutex);
         emc_probabilistic_insert(pmd, key, netdev_flow);
@@ -4667,7 +5056,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
     /* Get the classifier for the in_port */
     cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
     if (OVS_LIKELY(cls)) {
-        any_miss = !dpcls_lookup(cls, keys, rules, cnt, &lookup_cnt);
+        any_miss = !dpcls_lookup(pmd, cls, keys, rules, cnt, &lookup_cnt);
     } else {
         any_miss = true;
         memset(rules, 0, sizeof(rules));
@@ -5426,23 +5815,109 @@ struct dpcls_subtable {
     struct cmap rules;           /* Contains "struct dpcls_rule"s. */
     uint32_t hit_cnt;            /* Number of match hits in subtable in current
                                     optimization interval. */
+
+    uint32_t access_cnt;     /* With CD implemented, hit_cnt should be subtable
+                              * hits that miss in CD, so the ranking mechanism
+                              * which is based on hit_cnt still works properly.
+                              * We have the access_cnt as total access count to
+                              * each subtable to consider if we should turn on
+                              * or turn off CD.
+                              */
+
     struct netdev_flow_key mask; /* Wildcards for fields (const). */
     /* 'mask' must be the last field, additional space is allocated here. */
 };
 
+
+
+static int
+insert_subtable_table(struct dpcls *cls, struct dpcls_subtable* subtable)
+{
+    int i;
+    ovs_assert(subtable != NULL );
+    for (i = 1; i < SUBTABLE_TABLE_LENGTH; i++) {
+        if (cls->subtable_table[i] == subtable) {
+            /* When we insert, we should know that the subtable is not inserted
+             * before.
+             */
+            VLOG_ERR("already have the subtable in subtable_table");
+            return -1;
+        }
+    }
+
+    for (i = 1; i < SUBTABLE_TABLE_LENGTH; i++) {
+        if (cls->subtable_table[i] == 0) {
+            cls->subtable_table[i] = subtable;
+            return i;
+        }
+    }
+    /* When the subtable count is larger than subtable_table_length (255 now)*/
+    VLOG_INFO("create subtable in subtable_table failed, overflow");
+    return 0;
+}
+
+static int
+remove_subtable_table(struct dpcls *cls, struct dpcls_subtable* subtable)
+{
+
+    int i;
+    ovs_assert(subtable != NULL );
+    for (i = 1; i < SUBTABLE_TABLE_LENGTH; i++) {
+        if (cls->subtable_table[i] == subtable) {
+            /*reset to subtable index in subtable_table to NULL*/
+            cls->subtable_table[i] = (struct dpcls_subtable*)0;
+            return i;
+        }
+    }
+
+    /* Happens when remove while more subtable than the subtable_table_length*/
+    VLOG_INFO("cannot find the table ptr in subtable_table to remove");
+    return 0;
+}
+
+static int
+find_index_in_subtable_table(struct dpcls *cls,
+                             struct dpcls_subtable* subtable)
+{
+    int i;
+    ovs_assert(subtable != NULL );
+    for (i = 1; i < SUBTABLE_TABLE_LENGTH; i++) {
+        if (cls->subtable_table[i] == subtable) {
+            return i;
+        }
+    }
+    return 0;
+}
+
+
 /* Initializes 'cls' as a classifier that initially contains no classification
  * rules. */
 static void
 dpcls_init(struct dpcls *cls)
 {
+    int i;
     cmap_init(&cls->subtables_map);
     pvector_init(&cls->subtables);
+    int ret = posix_memalign((void**)&cls->cdtable, 64,
+                            sizeof(struct cuckoo_distributor));
+    if(ret != 0) {
+        VLOG_ERR("Create cuckoo distributor failed");
+    }
+    cd_init(cls->cdtable);
+    cls->cd_on = 1;
+    cls->cd_insert_cnt = 0;
+    cls->cd_insert_fail_cnt = 0;
+    for(i = 0; i < SUBTABLE_TABLE_LENGTH; i++){
+       cls->subtable_table[i] = 0;
+    }
+    random_set_seed(100);
 }
 
 static void
 dpcls_destroy_subtable(struct dpcls *cls, struct dpcls_subtable *subtable)
 {
     VLOG_DBG("Destroying subtable %p for in_port %d", subtable, cls->in_port);
+    remove_subtable_table(cls, subtable);
     pvector_remove(&cls->subtables, subtable);
     cmap_remove(&cls->subtables_map, &subtable->cmap_node,
                 subtable->mask.hash);
@@ -5465,6 +5940,7 @@ dpcls_destroy(struct dpcls *cls)
         }
         cmap_destroy(&cls->subtables_map);
         pvector_destroy(&cls->subtables);
+        cd_delete(cls->cdtable);
     }
 }
 
@@ -5478,6 +5954,7 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
                        - sizeof subtable->mask.mf + mask->len);
     cmap_init(&subtable->rules);
     subtable->hit_cnt = 0;
+    subtable->access_cnt = 0;
     netdev_flow_key_clone(&subtable->mask, mask);
     cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
     /* Add the new subtable at the end of the pvector (with no hits yet) */
@@ -5485,6 +5962,9 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
     VLOG_DBG("Creating %"PRIuSIZE". subtable %p for in_port %d",
              cmap_count(&cls->subtables_map), subtable, cls->in_port);
     pvector_publish(&cls->subtables);
+    int ret = insert_subtable_table(cls, subtable);
+    /* The subtable should not be in subtable-table yet */
+    ovs_assert(ret >= 0);
 
     return subtable;
 }
@@ -5510,7 +5990,6 @@ dpcls_sort_subtable_vector(struct dpcls *cls)
 {
     struct pvector *pvec = &cls->subtables;
     struct dpcls_subtable *subtable;
-
     PVECTOR_FOR_EACH (subtable, pvec) {
         pvector_change_priority(pvec, subtable, subtable->hit_cnt);
         subtable->hit_cnt = 0;
@@ -5537,6 +6016,39 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd)
             pmd->next_optimization = now + DPCLS_OPTIMIZATION_INTERVAL;
         }
     }
+
+    if (now > pmd->next_cd_optimization) {
+
+        CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
+            struct pvector *pvec = &cls->subtables;
+            struct dpcls_subtable *subtable;
+            float avg_table_cnt = 0;
+            int cnt = 0;
+            uint32_t total = 0;
+            uint32_t sum = 0;
+            PVECTOR_FOR_EACH (subtable,pvec) {
+                sum += subtable->access_cnt * cnt;
+                total += subtable->access_cnt;
+                subtable->access_cnt = 0;
+                cnt++;
+            }
+            /* If total access is too small, we keep previous decision */
+            if (total > cnt * 5) {
+                avg_table_cnt = (float)sum / total;
+            }
+            else {
+                avg_table_cnt = -1;
+            }
+
+            if (avg_table_cnt >= 1) {
+                cls->cd_on = 1;
+            }
+            else if (avg_table_cnt != -1) {
+                cls->cd_on = 0;
+            }
+        }
+        pmd->next_cd_optimization = now + DPCLS_CD_OPTIMIZATION_INTERVAL;
+    }
 }
 
 /* Insert 'rule' into 'cls'. */
@@ -5587,6 +6099,10 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule,
     return true;
 }
 
+
+
+
+
 /* For each miniflow in 'keys' performs a classifier lookup writing the result
  * into the corresponding slot in 'rules'.  If a particular entry in 'keys' is
  * NULL it is skipped.
@@ -5597,40 +6113,113 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule,
  * priorities, instead returning any rule which matches the flow.
  *
  * Returns true if all miniflows found a corresponding rule. */
-static bool
-dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[],
-             struct dpcls_rule **rules, const size_t cnt,
-             int *num_lookups_p)
+ static bool
+ dpcls_lookup(struct dp_netdev_pmd_thread *pmd, struct dpcls *cls,
+              const struct netdev_flow_key keys[],
+              struct dpcls_rule **rules, const size_t cnt,
+              int *num_lookups_p)
 {
     /* The received 'cnt' miniflows are the search-keys that will be processed
      * to find a matching entry into the available subtables.
      * The number of bits in map_type is equal to NETDEV_MAX_BURST. */
+
+    if (cnt == 0) {
+        return false;
+    }
+
     typedef uint32_t map_type;
+
 #define MAP_BITS (sizeof(map_type) * CHAR_BIT)
     BUILD_ASSERT_DECL(MAP_BITS >= NETDEV_MAX_BURST);
 
     struct dpcls_subtable *subtable;
-
     map_type keys_map = TYPE_MAXIMUM(map_type); /* Set all bits. */
-    map_type found_map;
+    map_type found_map = keys_map;
     uint32_t hashes[MAP_BITS];
     const struct cmap_node *nodes[MAP_BITS];
+    int cd_match = 0;
 
     if (cnt != MAP_BITS) {
         keys_map >>= MAP_BITS - cnt; /* Clear extra bits. */
     }
+
     memset(rules, 0, cnt * sizeof *rules);
 
     int lookups_match = 0, subtable_pos = 1;
 
-    /* The Datapath classifier - aka dpcls - is composed of subtables.
-     * Subtables are dynamically created as needed when new rules are inserted.
-     * Each subtable collects rules with matches on a specific subset of packet
-     * fields as defined by the subtable's mask.  We proceed to process every
-     * search-key against each subtable, but when a match is found for a
-     * search-key, the search for that key can stop because the rules are
-     * non-overlapping. */
+    if (cls->cd_on) {
+
+        int i;
+        int data[MAP_BITS];
+        int valid_cnt = count_1bits(keys_map);
+        int nfound = cd_lookup_bulk_pipe(cls, keys, valid_cnt, &found_map,
+                                            data);
+
+        debug_print("CD found %d   maps %x\n", nfound, found_map);
+
+        ULLONG_FOR_EACH_1(i, found_map) {
+            hashes[i] = netdev_flow_key_hash_in_mask(&keys[i],
+                                    &(cls->subtable_table[data[i]])->mask);
+            nodes[i] = cmap_find(&((cls->subtable_table[data[i]])->rules),
+                                 hashes[i]);
+            if (nodes[i] != NULL) {
+                struct dpcls_rule *rule;
+                CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
+                    if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) {
+                        rules[i] = rule;
+                        cls->subtable_table[data[i]]->access_cnt++;
+                        lookups_match += 1;
+                        cd_match += 1;
+                        goto scnext;
+                    }
+                }
+                ULLONG_SET0(found_map, i);  /* Did not match. */
+                /*
+                 * Here means key same in subtable but not same rule
+                 * since CD find the correct subtable,
+                 * we dont need to insert to CD.
+                 */
+
+                scnext:
+                ;
+            }
+            else if (nodes[i] == NULL) {
+                 ULLONG_SET0(found_map, i);
+                 /* Here means in CD but not in the target subtable.
+                  * meaning that it matches to a same (but wrong) key in CD.
+                  * we should insert it into CD later when we know
+                  * which subtable it hits.
+                  */
+            }
+        }
+
+        keys_map &= ~found_map;
+
+        dp_netdev_count_packet(pmd, DP_CD_STAT_HIT, cd_match);
+        dp_netdev_count_packet(pmd, DP_CD_STAT_MISS, cnt-cd_match);
+
+        if (!keys_map) {
+            if (num_lookups_p) {
+                *num_lookups_p = lookups_match;
+            }
+
+            debug_print( "every key found in CD\n");
+            return true;              /* All found. */
+        }
+
+        debug_print( "Need search subtable (CD miss)\n");
+    }
+
+
+    /*The Datapath classifier - aka dpcls - is composed of subtables.
+    * Subtables are dynamically created as needed when new rules are inserted.
+    * Each subtable collects rules with matches on a specific subset of packet
+    * fields as defined by the subtable's mask.  We proceed to process every
+    * search-key against each subtable, but when a match is found for a
+    * search-key, the search for that key can stop because the rules are
+    * non-overlapping. */
     PVECTOR_FOR_EACH (subtable, &cls->subtables) {
+
         int i;
 
         /* Compute hashes for the remaining keys.  Each search-key is
@@ -5649,13 +6238,13 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[],
          * search-key. */
         ULLONG_FOR_EACH_1(i, found_map) {
             struct dpcls_rule *rule;
-
             CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
                 if (OVS_LIKELY(dpcls_rule_matches_key(rule, &keys[i]))) {
                     rules[i] = rule;
                     /* Even at 20 Mpps the 32-bit hit_cnt cannot wrap
                      * within one second optimization interval. */
                     subtable->hit_cnt++;
+                    subtable->access_cnt++;
                     lookups_match += subtable_pos;
                     goto next;
                 }
@@ -5663,10 +6252,25 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[],
             /* None of the found rules was a match.  Reset the i-th bit to
              * keep searching this key in the next subtable. */
             ULLONG_SET0(found_map, i);  /* Did not match. */
+            continue;
         next:
-            ;                     /* Keep Sparse happy. */
+            debug_print("find in subtable\n");
+            /* If we find things here, it means it misses in CD.
+             * we should insert into CD.
+             */
+
+           if (cls->cd_on) {
+                int index = find_index_in_subtable_table(cls, subtable);
+                /* If 0, means not in subtable_table, then no need to insert
+                 * into CD.
+                 */
+                if (index != 0) {
+                    cd_insert(cls->cdtable, &keys[i], index);
+                }
+            }
         }
         keys_map &= ~found_map;             /* Clear the found rules. */
+
         if (!keys_map) {
             if (num_lookups_p) {
                 *num_lookups_p = lookups_match;
@@ -5678,5 +6282,9 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key keys[],
     if (num_lookups_p) {
         *num_lookups_p = lookups_match;
     }
+    debug_print("Miss in both CD and subtable\n");
+    /* Things that miss in both tables should also be inserted into CD.
+     * the upcall function should be able to handle it.
+     */
     return false;                     /* Some misses. */
 }
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 0c2ea38..937dd3d 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -9383,7 +9383,8 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 
 dnl Start a new connection from port 1.
 AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1,dst=2)'])
-
+# cuckoo distributor requires time for initilization, add sleep
+sleep 2
 AT_CHECK([cat ovs-vswitchd.log | strip_ufid | filter_flow_install], [0], [dnl
 recirc_id(0),in_port(1),eth_type(0x0800),ipv4(proto=17,frag=no), actions:ct(commit)
 ])
-- 
1.9.1



More information about the dev mailing list