[ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table

Yipeng Wang yipeng1.wang at intel.com
Thu Jan 18 18:20:24 UTC 2018


If we store pointers in DFC, then the memory requirement is large. When
there are VMs or multiple PMDs running on the same platform, they will
compete the shared cache. So we want DFC to be as memory efficient as possible.

Indirect table is a simple hash table that map the DFC's result
to the dp_netdev_flow's pointer. This is to reduce the memory size of the
DFC cache, assuming that the megaflow count is much smaller than
the exact match flow count. With this commit, we could reduce the 8-byte
pointer to a 2-byte index in DFC cache so that the memory/cache requirement is
almost halved. Another approach we plan to try is to use the flow_table
as the indirect table.

The indirect table size is a fixed constant for now.

Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com>
---
 lib/dpif-netdev.c | 69 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 25 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 50a1d25..35197d3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -151,6 +151,12 @@ struct netdev_flow_key {
 
 #define DFC_MASK_LEN 20
 #define DFC_ENTRY_PER_BUCKET 8
+
+/* For now we fix the Indirect table size, ideally it should be sized according
+ * to max megaflow count but less than 2^16
+ */
+#define INDIRECT_TABLE_SIZE (1u << 12)
+#define INDIRECT_TABLE_MASK (INDIRECT_TABLE_SIZE - 1)
 #define DFC_ENTRIES (1u << DFC_MASK_LEN)
 #define DFC_BUCKET_CNT (DFC_ENTRIES / DFC_ENTRY_PER_BUCKET)
 #define DFC_MASK (DFC_BUCKET_CNT - 1)
@@ -175,13 +181,14 @@ struct emc_cache {
 
 struct dfc_bucket {
     uint16_t sig[DFC_ENTRY_PER_BUCKET];
-    struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
+    uint16_t index[DFC_ENTRY_PER_BUCKET];
 };
 
 struct dfc_cache {
     struct emc_cache emc_cache;
     struct dfc_bucket buckets[DFC_BUCKET_CNT];
     int sweep_idx;
+    struct dp_netdev_flow *indirect_table[INDIRECT_TABLE_SIZE];
 };
 
 
@@ -754,7 +761,7 @@ static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
 
 static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);
 static void emc_clear_entry(struct emc_entry *ce);
-static void dfc_clear_entry(struct dfc_bucket *b, int idx);
+static void dfc_clear_entry(struct dp_netdev_flow **flow, struct dfc_bucket *b, int idx);
 
 static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
 
@@ -782,9 +789,12 @@ dfc_cache_init(struct dfc_cache *flow_cache)
     emc_cache_init(&flow_cache->emc_cache);
     for (i = 0; i < DFC_BUCKET_CNT; i++) {
         for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
-            flow_cache->buckets[i].flow[j] = NULL;
+            flow_cache->buckets[i].sig[j] = 0;
         }
     }
+    for (i = 0; i < INDIRECT_TABLE_SIZE; i++) {
+        flow_cache->indirect_table[i] = NULL;
+    }
     flow_cache->sweep_idx = 0;
 }
 
@@ -805,7 +815,7 @@ dfc_cache_uninit(struct dfc_cache *flow_cache)
 
     for (i = 0; i < DFC_BUCKET_CNT; i++) {
         for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
-            dfc_clear_entry(&(flow_cache->buckets[i]), j);
+            dfc_clear_entry(flow_cache->indirect_table, &(flow_cache->buckets[i]), j);
         }
     }
     emc_cache_uninit(&flow_cache->emc_cache);
@@ -2259,7 +2269,7 @@ dfc_entry_get(struct dfc_cache *cache, const uint32_t hash)
     uint16_t sig = hash >> 16;
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
         if(bucket->sig[i] == sig) {
-            return bucket->flow[i];
+            return cache->indirect_table[bucket->index[i]];
         }
     }
     return NULL;
@@ -2272,28 +2282,33 @@ dfc_entry_alive(struct dp_netdev_flow *flow)
 }
 
 static void
-dfc_clear_entry(struct dfc_bucket *b, int idx)
+dfc_clear_entry(struct dp_netdev_flow **ind_table, struct dfc_bucket *b, int idx)
 {
-    if (b->flow[idx]) {
-        dp_netdev_flow_unref(b->flow[idx]);
-        b->flow[idx] = NULL;
+    if (ind_table[b->index[idx]]) {
+        dp_netdev_flow_unref(ind_table[b->index[idx]]);
+        ind_table[b->index[idx]] = NULL;
     }
 }
 
-static inline void
-dfc_change_entry(struct dfc_bucket *b, int idx, struct dp_netdev_flow *flow)
+
+static inline uint16_t
+indirect_table_insert(struct dp_netdev_flow **indirect_table,
+                        struct dp_netdev_flow *flow)
 {
-    if (b->flow[idx] != flow) {
-        if (b->flow[idx]) {
-            dp_netdev_flow_unref(b->flow[idx]);
+    uint32_t hash = dp_netdev_flow_hash(&flow->ufid);
+    uint16_t index = hash & INDIRECT_TABLE_MASK;
+    if (indirect_table[index] != flow){
+        if(indirect_table[index]) {
+            dp_netdev_flow_unref(indirect_table[index]);
         }
-
         if (dp_netdev_flow_ref(flow)) {
-            b->flow[idx] = flow;
-        } else {
-            b->flow[idx] = NULL;
+            indirect_table[index] = flow;
+        }
+        else {
+            indirect_table[index] = NULL;
         }
     }
+    return index;
 }
 
 static inline void
@@ -2302,25 +2317,28 @@ dfc_insert(struct dp_netdev_pmd_thread *pmd,
            struct dp_netdev_flow *flow)
 {
     struct dfc_cache *cache = &pmd->flow_cache;
-
+    struct dp_netdev_flow **ind_table = cache->indirect_table;
     struct dfc_bucket *bucket = &cache->buckets[key->hash & DFC_MASK];
+
+    uint16_t index = indirect_table_insert(ind_table, flow);
+
     uint16_t sig = key->hash >> 16;
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
         if(bucket->sig[i] == sig) {
-            dfc_change_entry(bucket, i, flow);
+            bucket->index[i] = index;
             return;
         }
     }
     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
-        if(bucket->flow[i] == NULL) {
+        if(bucket->sig[i] == 0) {
             bucket->sig[i] = sig;
-            dfc_change_entry(bucket, i, flow);
+            bucket->index[i] = index;
             return;
         }
     }
     int idx = random_uint32() & (DFC_ENTRY_PER_BUCKET - 1);
     bucket->sig[idx] = sig;
-    dfc_change_entry(bucket, idx, flow);
+    bucket->index[idx] = index;
 }
 
 static inline struct dp_netdev_flow *
@@ -2381,8 +2399,9 @@ dfc_slow_sweep(struct dfc_cache *cache)
         uint32_t bkt = cache->sweep_idx / DFC_ENTRY_PER_BUCKET;
         struct dfc_bucket *bucket = &cache->buckets[bkt];
         for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++){
-            if (!dfc_entry_alive(bucket->flow[i])) {
-                dfc_clear_entry(bucket, i);
+            if (!dfc_entry_alive(cache->indirect_table[bucket->index[i]])) {
+                dfc_clear_entry(cache->indirect_table, bucket, i);
+                bucket->sig[i] = 0;
             }
         }
     }
-- 
2.7.4



More information about the dev mailing list