[ovs-dev] [PATCH v2 3/4] dpif-netdev: use flow_table as indirect table

yipeng wang yipeng1.wang at intel.com
Mon Apr 2 17:04:42 UTC 2018


From: Yipeng Wang <yipeng1.wang at intel.com>

It is not memory efficient to store pointers in the distributor.
In this commit, we store a 2 Byte index which is the index into
flow_table. If the flow table is larger than 2^16, the rules
store in the high index entry will not be cached in distributor.

In cmap, we add two APIs to support find flow by index and find
index by flow. Since flow table is a cuckoo hash, it is possible
that keys are moved around and also keys to be rehashed.
In such case, distributor will have misses and
refresh. However, this should not happen frequently and
distributor as a cache should not cause functional error.

Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com>
---
 lib/cmap.c        |  73 ++++++++++++++++++++++++++++++
 lib/cmap.h        |   5 +++
 lib/dpif-netdev.c | 131 +++++++++++++++++++++---------------------------------
 3 files changed, 128 insertions(+), 81 deletions(-)

diff --git a/lib/cmap.c b/lib/cmap.c
index 07719a8..db1c806 100644
--- a/lib/cmap.c
+++ b/lib/cmap.c
@@ -373,6 +373,79 @@ cmap_find(const struct cmap *cmap, uint32_t hash)
                        hash);
 }
 
+/* Find a node by the index of the entry of cmap. For example, index 7 means
+ * the second bucket and the third item.
+ * Notice that it is not protected by the optimistic lock (versioning) because
+ * of performance reasons. Currently it is only used by the datapath DFC cache.
+ *
+ * Return node for the entry of index or NULL if the index beyond boundary */
+const struct cmap_node *
+cmap_find_by_index(const struct cmap *cmap, uint16_t index)
+{
+    const struct cmap_impl *impl = cmap_get_impl(cmap);
+
+    uint32_t b = index / CMAP_K;
+    uint32_t e = index % CMAP_K;
+
+    if (b > impl->mask) {
+        return NULL;
+    }
+
+    const struct cmap_bucket *bucket = &impl->buckets[b];
+
+    return cmap_node_next(&bucket->nodes[e]);
+}
+
+/* Find the index of certain hash value. Currently only used by the datapath
+ * DFC cache.
+ *
+ * Return the index of the entry if found, or UINT32_MAX if not found */
+
+uint32_t
+cmap_find_index(const struct cmap *cmap, uint32_t hash)
+{
+    const struct cmap_impl *impl = cmap_get_impl(cmap);
+    uint32_t h1 = rehash(impl, hash);
+    uint32_t h2 = other_hash(h1);
+
+    uint32_t b_index1 = h1 & impl->mask;
+    uint32_t b_index2 = h2 & impl->mask;
+
+    uint32_t c1, c2;
+    uint32_t index = UINT32_MAX;
+
+    const struct cmap_bucket *b1 = &impl->buckets[b_index1];
+    const struct cmap_bucket *b2 = &impl->buckets[b_index2];
+
+    do {
+        do {
+            c1 = read_even_counter(b1);
+            for (int i = 0; i < CMAP_K; i++) {
+                if (b1->hashes[i] == hash) {
+                    index = b_index1 * CMAP_K + i;
+                 }
+            }
+        } while (OVS_UNLIKELY(counter_changed(b1, c1)));
+        if (index != UINT32_MAX) {
+            break;
+        }
+        do {
+            c2 = read_even_counter(b2);
+            for (int i = 0; i < CMAP_K; i++) {
+                if (b2->hashes[i] == hash) {
+                    index = b_index2 * CMAP_K + i;
+                }
+            }
+        } while (OVS_UNLIKELY(counter_changed(b2, c2)));
+
+        if (index != UINT32_MAX) {
+            break;
+        }
+    } while (OVS_UNLIKELY(counter_changed(b1, c1)));
+
+    return index;
+}
+
 /* Looks up multiple 'hashes', when the corresponding bit in 'map' is 1,
  * and sets the corresponding pointer in 'nodes', if the hash value was
  * found from the 'cmap'.  In other cases the 'nodes' values are not changed,
diff --git a/lib/cmap.h b/lib/cmap.h
index 8bfb6c0..076afd6 100644
--- a/lib/cmap.h
+++ b/lib/cmap.h
@@ -145,6 +145,11 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node,
 const struct cmap_node *cmap_find(const struct cmap *, uint32_t hash);
 struct cmap_node *cmap_find_protected(const struct cmap *, uint32_t hash);
 
+/* Find node by index or find index by hash */
+const struct cmap_node * cmap_find_by_index(const struct cmap *,
+                                            uint16_t index);
+uint32_t cmap_find_index(const struct cmap *, uint32_t hash);
+
 /* Looks up multiple 'hashes', when the corresponding bit in 'map' is 1,
  * and sets the corresponding pointer in 'nodes', if the hash value was
  * found from the 'cmap'.  In other cases the 'nodes' values are not changed,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 28d6086..8fe736d 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -175,13 +175,12 @@ struct emc_cache {
 
 struct dfc_bucket {
     uint16_t sig[DFC_ENTRY_PER_BUCKET];
-    struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
+    uint16_t flow_idx[DFC_ENTRY_PER_BUCKET];
 };
 
 struct dfc_cache {
     struct emc_cache emc_cache;
     struct dfc_bucket buckets[DFC_BUCKET_CNT];
-    int sweep_idx;
 };
 
 
@@ -722,7 +721,6 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
 static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
                                       struct tx_port *tx);
 
-static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);
 static void emc_clear_entry(struct emc_entry *ce);
 static void dfc_clear_entry(struct dfc_bucket *b, int idx);
 
@@ -752,10 +750,9 @@ dfc_cache_init(struct dfc_cache *flow_cache)
     emc_cache_init(&flow_cache->emc_cache);
     for (i = 0; i < DFC_BUCKET_CNT; i++) {
         for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
-            flow_cache->buckets[i].flow[j] = NULL;
+            flow_cache->buckets[i].flow_idx[j] = UINT16_MAX;
         }
     }
-    flow_cache->sweep_idx = 0;
 }
 
 static void
@@ -2209,84 +2206,72 @@ emc_lookup(struct emc_cache *emc, const struct netdev_flow_key *key)
     return NULL;
 }
 
-static inline struct dp_netdev_flow *
-dfc_entry_get(struct dfc_cache *cache, const uint32_t hash)
+static inline const struct cmap_node *
+dfc_entry_get(struct dp_netdev_pmd_thread *pmd, struct dfc_cache *cache, const uint32_t hash)
 {
     struct dfc_bucket *bucket = &cache->buckets[hash & DFC_MASK];
     uint16_t sig = hash >> 16;
+    uint16_t index = UINT16_MAX;
+
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
-        if(bucket->sig[i] == sig) {
-            return bucket->flow[i];
+        if (bucket->sig[i] == sig) {
+            index = bucket->flow_idx[i];
+            break;
         }
     }
+    if (index != UINT16_MAX) {
+        return cmap_find_by_index(&pmd->flow_table, index);
+    }
     return NULL;
 }
 
-static inline bool
-dfc_entry_alive(struct dp_netdev_flow *flow)
-{
-    return flow && !flow->dead;
-}
 
 static void
 dfc_clear_entry(struct dfc_bucket *b, int idx)
 {
-    if (b->flow[idx]) {
-        dp_netdev_flow_unref(b->flow[idx]);
-        b->flow[idx] = NULL;
-    }
-}
-
-static inline void
-dfc_change_entry(struct dfc_bucket *b, int idx, struct dp_netdev_flow *flow)
-{
-    if (b->flow[idx] != flow) {
-        if (b->flow[idx]) {
-            dp_netdev_flow_unref(b->flow[idx]);
-        }
-
-        if (dp_netdev_flow_ref(flow)) {
-            b->flow[idx] = flow;
-        } else {
-            b->flow[idx] = NULL;
-        }
-    }
+    b->flow_idx[idx] = UINT16_MAX;
 }
 
 static inline void
 dfc_insert(struct dp_netdev_pmd_thread *pmd,
            const struct netdev_flow_key *key,
-           struct dp_netdev_flow *flow)
+           uint16_t index)
 {
     struct dfc_cache *cache = &pmd->flow_cache;
-
     struct dfc_bucket *bucket = &cache->buckets[key->hash & DFC_MASK];
+
+    if (index == UINT16_MAX) {
+        return;
+    }
+
     uint16_t sig = key->hash >> 16;
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
         if(bucket->sig[i] == sig) {
-            dfc_change_entry(bucket, i, flow);
+            bucket->flow_idx[i] = index;
             return;
         }
     }
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
-        if(bucket->flow[i] == NULL) {
+        if(bucket->flow_idx[i] == UINT16_MAX) {
             bucket->sig[i] = sig;
-            dfc_change_entry(bucket, i, flow);
+            bucket->flow_idx[i] = index;
             return;
         }
     }
     int idx = random_uint32() & (DFC_ENTRY_PER_BUCKET - 1);
     bucket->sig[idx] = sig;
-    dfc_change_entry(bucket, idx, flow);
+    bucket->flow_idx[idx] = index;
 }
 
 static inline struct dp_netdev_flow *
-dfc_lookup(struct dp_netdev_pmd_thread *pmd, struct netdev_flow_key *key,
-           bool *exact_match)
+dfc_lookup(struct dp_netdev_pmd_thread *pmd,
+            struct netdev_flow_key *key,
+            bool *exact_match)
 {
     struct dp_netdev_flow *flow;
     uint32_t cur_min;
     struct dfc_cache *cache = &pmd->flow_cache;
+    const struct cmap_node *flow_node;
 
     /* Try an EMC lookup first. */
     atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
@@ -2303,23 +2288,18 @@ dfc_lookup(struct dp_netdev_pmd_thread *pmd, struct netdev_flow_key *key,
     }
 
     /* EMC lookup not successful: try DFC lookup. */
-    flow = dfc_entry_get(cache, key->hash);
-
-    if (flow != NULL && dfc_entry_alive(flow) &&
-        dpcls_rule_matches_key(&flow->cr, key)) {
-
-        /* Found a match in DFC. Insert into EMC for subsequent lookups.
-         * We use probabilistic insertion here so that mainly elephant
-         * flows enter EMC. */
-        key->len = netdev_flow_key_size(miniflow_n_values(&key->mf));
-        emc_probabilistic_insert(pmd, key, flow);
-        *exact_match = false;
-        return flow;
-    } else {
+    flow_node = dfc_entry_get(pmd, cache, key->hash);
 
-        /* No match. Need to go to DPCLS lookup. */
-        return NULL;
+    CMAP_NODE_FOR_EACH (flow, node, flow_node) {
+        if (OVS_LIKELY(dpcls_rule_matches_key(&flow->cr, key))) {
+            key->len = netdev_flow_key_size(miniflow_n_values(&key->mf));
+            emc_probabilistic_insert(pmd, key, flow);
+            *exact_match = false;
+            return flow;
+        }
     }
+
+    return NULL;
 }
 
 /* Check and clear dead flow references slowly (one entry at each
@@ -2335,26 +2315,6 @@ emc_slow_sweep(struct emc_cache *emc)
     emc->sweep_idx = (emc->sweep_idx + 1) & EMC_MASK;
 }
 
-static void
-dfc_slow_sweep(struct dfc_cache *cache)
-{
-    /* Sweep the EMC so that both finish in the same time. */
-    if ((cache->sweep_idx & (DFC_BUCKET_CNT / EMC_ENTRIES - 1)) == 0) {
-        emc_slow_sweep(&cache->emc_cache);
-    }
-
-    if((cache->sweep_idx & (DFC_ENTRY_PER_BUCKET - 1)) == 0) {
-        uint32_t bkt = cache->sweep_idx / DFC_ENTRY_PER_BUCKET;
-        struct dfc_bucket *bucket = &cache->buckets[bkt];
-        for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++){
-            if (!dfc_entry_alive(bucket->flow[i])) {
-                dfc_clear_entry(bucket, i);
-            }
-        }
-    }
-    cache->sweep_idx = (cache->sweep_idx + 1) & (DFC_ENTRIES - 1);
-}
-
 static struct dp_netdev_flow *
 dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
                           const struct netdev_flow_key *key,
@@ -4319,8 +4279,9 @@ reload:
 
             coverage_try_clear();
             dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
+
             if (!ovsrcu_try_quiesce()) {
-                dfc_slow_sweep(&pmd->flow_cache);
+                emc_slow_sweep(&((pmd->flow_cache).emc_cache));
             }
 
             atomic_read_relaxed(&pmd->reload, &reload);
@@ -5178,6 +5139,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
         }
         miniflow_extract(packet, &key->mf);
         key->len = 0; /* Not computed yet. */
+
         if (!md_is_valid) {
             key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet,
                     &key->mf);
@@ -5273,7 +5235,11 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
                                              add_actions->size);
         }
         ovs_mutex_unlock(&pmd->flow_mutex);
-        dfc_insert(pmd, key, netdev_flow);
+        uint32_t cmap_index = cmap_find_index(&pmd->flow_table,
+                                    dp_netdev_flow_hash(&netdev_flow->ufid));
+        uint16_t index = (cmap_index >= UINT16_MAX) ? UINT16_MAX :
+                                                        (uint16_t)cmap_index;
+        dfc_insert(pmd, key, index);
     }
     return error;
 }
@@ -5368,8 +5334,11 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
         }
 
         flow = dp_netdev_flow_cast(rules[i]);
-
-        dfc_insert(pmd, &keys[i], flow);
+        uint32_t cmap_index = cmap_find_index(&pmd->flow_table,
+                                            dp_netdev_flow_hash(&flow->ufid));
+        uint16_t index = (cmap_index >= UINT16_MAX) ? UINT16_MAX :
+                                                        (uint16_t)cmap_index;
+        dfc_insert(pmd, &keys[i], index);
         dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
     }
 
-- 
2.7.4



More information about the dev mailing list