[ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table

Wang, Yipeng1 yipeng1.wang at intel.com
Mon Feb 26 19:21:11 UTC 2018


Thanks for the comments, reply inlined:

>-----Original Message-----
>From: Bodireddy, Bhanuprakash
>Sent: Friday, February 23, 2018 1:06 PM
>To: Wang, Yipeng1 <yipeng1.wang at intel.com>; dev at openvswitch.org; jan.scheurich at ericsson.com
>Cc: Tai, Charlie <charlie.tai at intel.com>
>Subject: RE: [ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table
>
>Hi Yipeng,
>
>>If we store pointers in DFC, then the memory requirement is large. When
>>there are VMs or multiple PMDs running on the same platform, they will
>>compete the shared cache. So we want DFC to be as memory efficient as
>>possible.
>
>>
>>Indirect table is a simple hash table that map the DFC's result to the
>>dp_netdev_flow's pointer. This is to reduce the memory size of the DFC
>>cache, assuming that the megaflow count is much smaller than the exact
>>match flow count. With this commit, we could reduce the 8-byte pointer to a
>>2-byte index in DFC cache so that the memory/cache requirement is almost
>>halved. Another approach we plan to try is to use the flow_table as the
>>indirect table.
>
>I assume this patch is only aimed at reducing the DFC cache memory foot print and doesn't introduce any new functionality ?
>
[Wang, Yipeng] Yes, by introducing an indirect table. We have another solution to use the "flow_table" we will post later. But there will be pros/cons.

>With this I see the dfc_bucket size is at 32 bytes from earlier 80bytes in 3/4 and the buckets now will be aligned to cache lines.
>Also the dfc_cache size reduced to ~8MB from ~12MB in 1/4 and ~14Mb in 3/4 patches.
>
[Wang, Yipeng]  With 1M entry, the size is as below:
1/4: 8MB
3/4: 10MB
4/4: 4MB + indirect table size, which is 512k to contain 64k rules.


>I am guessing there might be some performance improvement with this patch due to buckets aligning to cache lines apart of reduced
>memory footprint. Do you see any such advantage here in your benchmarks?
>
[Wang, Yipeng] Actually due to the indirect table (1 more memory jump), we saw 1-2% throughput drop comparing to 3/4 in simple tests. 
But as you said this does not consider the cache efficiency effect. There will be performance improvement with cache contention. So we believe
this is more scalable.

>Regards,
>Bhanuprakash.
>
>>
>>The indirect table size is a fixed constant for now.
>>
>>Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com>
>>---
>> lib/dpif-netdev.c | 69 +++++++++++++++++++++++++++++++++++------------
>>--------
>> 1 file changed, 44 insertions(+), 25 deletions(-)
>>
>>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 50a1d25..35197d3
>>100644
>>--- a/lib/dpif-netdev.c
>>+++ b/lib/dpif-netdev.c
>>@@ -151,6 +151,12 @@ struct netdev_flow_key {
>>
>> #define DFC_MASK_LEN 20
>> #define DFC_ENTRY_PER_BUCKET 8
>>+
>>+/* For now we fix the Indirect table size, ideally it should be sized
>>+according
>>+ * to max megaflow count but less than 2^16  */ #define
>>+INDIRECT_TABLE_SIZE (1u << 12) #define INDIRECT_TABLE_MASK
>>+(INDIRECT_TABLE_SIZE - 1)
>> #define DFC_ENTRIES (1u << DFC_MASK_LEN)  #define DFC_BUCKET_CNT
>>(DFC_ENTRIES / DFC_ENTRY_PER_BUCKET)  #define DFC_MASK
>>(DFC_BUCKET_CNT - 1) @@ -175,13 +181,14 @@ struct emc_cache {
>>
>> struct dfc_bucket {
>>     uint16_t sig[DFC_ENTRY_PER_BUCKET];
>>-    struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
>>+    uint16_t index[DFC_ENTRY_PER_BUCKET];
>> };
>>
>> struct dfc_cache {
>>     struct emc_cache emc_cache;
>>     struct dfc_bucket buckets[DFC_BUCKET_CNT];
>>     int sweep_idx;
>>+    struct dp_netdev_flow *indirect_table[INDIRECT_TABLE_SIZE];
>> };
>>
>>
>
>>@@ -754,7 +761,7 @@ static int dpif_netdev_xps_get_tx_qid(const struct
>>dp_netdev_pmd_thread *pmd,
>>
>> static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);  static void
>>emc_clear_entry(struct emc_entry *ce); -static void dfc_clear_entry(struct
>>dfc_bucket *b, int idx);
>>+static void dfc_clear_entry(struct dp_netdev_flow **flow, struct
>>+dfc_bucket *b, int idx);
>>
>> static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>>
>>@@ -782,9 +789,12 @@ dfc_cache_init(struct dfc_cache *flow_cache)
>>     emc_cache_init(&flow_cache->emc_cache);
>>     for (i = 0; i < DFC_BUCKET_CNT; i++) {
>>         for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
>>-            flow_cache->buckets[i].flow[j] = NULL;
>>+            flow_cache->buckets[i].sig[j] = 0;
>>         }
>>     }
>>+    for (i = 0; i < INDIRECT_TABLE_SIZE; i++) {
>>+        flow_cache->indirect_table[i] = NULL;
>>+    }
>>     flow_cache->sweep_idx = 0;
>> }
>>
>>@@ -805,7 +815,7 @@ dfc_cache_uninit(struct dfc_cache *flow_cache)
>>
>>     for (i = 0; i < DFC_BUCKET_CNT; i++) {
>>         for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
>>-            dfc_clear_entry(&(flow_cache->buckets[i]), j);
>>+            dfc_clear_entry(flow_cache->indirect_table,
>>+ &(flow_cache->buckets[i]), j);
>>         }
>>     }
>>     emc_cache_uninit(&flow_cache->emc_cache);
>>@@ -2259,7 +2269,7 @@ dfc_entry_get(struct dfc_cache *cache, const
>>uint32_t hash)
>>     uint16_t sig = hash >> 16;
>>     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
>>         if(bucket->sig[i] == sig) {
>>-            return bucket->flow[i];
>>+            return cache->indirect_table[bucket->index[i]];
>>         }
>>     }
>>     return NULL;
>>@@ -2272,28 +2282,33 @@ dfc_entry_alive(struct dp_netdev_flow *flow)  }
>>
>> static void
>>-dfc_clear_entry(struct dfc_bucket *b, int idx)
>>+dfc_clear_entry(struct dp_netdev_flow **ind_table, struct dfc_bucket
>>+*b, int idx)
>> {
>>-    if (b->flow[idx]) {
>>-        dp_netdev_flow_unref(b->flow[idx]);
>>-        b->flow[idx] = NULL;
>>+    if (ind_table[b->index[idx]]) {
>>+        dp_netdev_flow_unref(ind_table[b->index[idx]]);
>>+        ind_table[b->index[idx]] = NULL;
>>     }
>> }
>>
>>-static inline void
>>-dfc_change_entry(struct dfc_bucket *b, int idx, struct dp_netdev_flow
>>*flow)
>>+
>>+static inline uint16_t
>>+indirect_table_insert(struct dp_netdev_flow **indirect_table,
>>+                        struct dp_netdev_flow *flow)
>> {
>>-    if (b->flow[idx] != flow) {
>>-        if (b->flow[idx]) {
>>-            dp_netdev_flow_unref(b->flow[idx]);
>>+    uint32_t hash = dp_netdev_flow_hash(&flow->ufid);
>>+    uint16_t index = hash & INDIRECT_TABLE_MASK;
>>+    if (indirect_table[index] != flow){
>>+        if(indirect_table[index]) {
>>+            dp_netdev_flow_unref(indirect_table[index]);
>>         }
>>-
>>         if (dp_netdev_flow_ref(flow)) {
>>-            b->flow[idx] = flow;
>>-        } else {
>>-            b->flow[idx] = NULL;
>>+            indirect_table[index] = flow;
>>+        }
>>+        else {
>>+            indirect_table[index] = NULL;
>>         }
>>     }
>>+    return index;
>> }
>>
>> static inline void
>>@@ -2302,25 +2317,28 @@ dfc_insert(struct dp_netdev_pmd_thread *pmd,
>>            struct dp_netdev_flow *flow)  {
>>     struct dfc_cache *cache = &pmd->flow_cache;
>>-
>>+    struct dp_netdev_flow **ind_table = cache->indirect_table;
>>     struct dfc_bucket *bucket = &cache->buckets[key->hash & DFC_MASK];
>>+
>>+    uint16_t index = indirect_table_insert(ind_table, flow);
>>+
>>     uint16_t sig = key->hash >> 16;
>>     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
>>         if(bucket->sig[i] == sig) {
>>-            dfc_change_entry(bucket, i, flow);
>>+            bucket->index[i] = index;
>>             return;
>>         }
>>     }
>>     for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
>>-        if(bucket->flow[i] == NULL) {
>>+        if(bucket->sig[i] == 0) {
>>             bucket->sig[i] = sig;
>>-            dfc_change_entry(bucket, i, flow);
>>+            bucket->index[i] = index;
>>             return;
>>         }
>>     }
>>     int idx = random_uint32() & (DFC_ENTRY_PER_BUCKET - 1);
>>     bucket->sig[idx] = sig;
>>-    dfc_change_entry(bucket, idx, flow);
>>+    bucket->index[idx] = index;
>> }
>>
>> static inline struct dp_netdev_flow *
>>@@ -2381,8 +2399,9 @@ dfc_slow_sweep(struct dfc_cache *cache)
>>         uint32_t bkt = cache->sweep_idx / DFC_ENTRY_PER_BUCKET;
>>         struct dfc_bucket *bucket = &cache->buckets[bkt];
>>         for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++){
>>-            if (!dfc_entry_alive(bucket->flow[i])) {
>>-                dfc_clear_entry(bucket, i);
>>+            if (!dfc_entry_alive(cache->indirect_table[bucket->index[i]])) {
>>+                dfc_clear_entry(cache->indirect_table, bucket, i);
>>+                bucket->sig[i] = 0;
>>             }
>>         }
>>     }
>>--
>>2.7.4
>>
>>_______________________________________________
>>dev mailing list
>>dev at openvswitch.org
>>https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list