[ovs-dev] [mask array v5 2/2] datapath: keep mask array compact when deleting mask

Andy Zhou azhou at nicira.com
Wed Jun 11 21:24:12 UTC 2014


When deleting a mask from the mask array, we always move the last entry
into its current location. Another approach can be NULL in its current
place, and periodically compact it.

The approach taken by this patch is more efficient during run time.
During look up, fast path packet don't have to skip over NULL pointers.

A more important advantage of this approach is that it tries to
keep the mask array index stable by avoiding periodic index reshuffle.

This patch implements an optimization to further promote index
stability.  By leaving the last entry value intact when moving it to
a new location, the old cache index can 'fix' themselves, by noticing
the index in the cache is outside the valid mask array region. The new
index can be found by scanning the mask pointer within the valid region.

Signed-off-by: Andy Zhou <azhou at nicira.com>

----
V4 -> v5:
	Further simplify mask_insert using "for loop".

V3 -> v4:
	split mask look simplication into its own patch.

V2 -> v3:
	* Further simplifed mask flow lookup function

v1 -> v2:
	* added shrink mask array function.
	* documented the cornor case of mask deletion.
	* Simplifed mask flow lookup function
	 	(w.r.t. using and update the mask cache)
---
 datapath/flow_table.c | 120 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 92 insertions(+), 28 deletions(-)

diff --git a/datapath/flow_table.c b/datapath/flow_table.c
index 408730f..ddf714b 100644
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -247,10 +247,10 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
 	if (old) {
 		int i;
 
-		for (i = 0; i < old->max; i++) {
-			if (old->masks[i])
-				new->masks[new->count++] = old->masks[i];
-		}
+		for (i = 0; i < old->max; i++)
+			new->masks[i] = old->masks[i];
+
+		new->count = old->count;
 	}
 	rcu_assign_pointer(tbl->mask_array, new);
 
@@ -260,6 +260,59 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
 	return 0;
 }
 
+static void tbl_mask_array_delete_mask(struct mask_array *ma,
+				       const struct sw_flow_mask *mask)
+{
+	int i = 0;
+
+	/* Delete a mask pointer from the valid section.
+	 *
+	 * Also move the last entry in its place, so there is no
+	 * whole in the valid section.
+	 *
+	 * Notice the last entry still points to the original mask.
+	 *
+	 * <Note>: there is a small race window that may cause a mask
+	 * to be missed in a search. Imaging a core is
+	 * walking through the array, passing the index of deleting mask.
+	 * But before reaching the last entry, it is overwritten,
+	 * by another core that is adding a new mask, now the last entry
+	 * will point to the new mask. In this case, the moved up last
+	 * entry can be missed by the core walking the mask array.
+	 *
+	 * In case this missed mask would have led to successful
+	 * lookup, Hitting the race window could cause a packet to miss
+	 * kernel flow cache, and be sent to the user space.
+	 * </Note>
+	 */
+	for (i = 0; i < ma->count; i++)
+		if (mask == ma->masks[i]) {
+			struct sw_flow_mask *last;
+
+			last = ma->masks[ma->count - 1];
+			rcu_assign_pointer(ma->masks[i], last);
+			ma->count--;
+			break;
+		}
+
+	/* Remove the deleted mask pointers from the invalid section. */
+	for (i = ma->count; i < ma->max; i++)
+		if (mask == ma->masks[i])
+			RCU_INIT_POINTER(ma->masks[i], NULL);
+}
+
+static int tbl_mask_array_find_idx(const struct mask_array *ma,
+				   const struct sw_flow_mask *mask)
+{
+	int i;
+
+	for (i = 0; i < ma->count; i++)
+		if (mask == ovsl_dereference(ma->masks[i]))
+			return i;
+
+	return -1;
+}
+
 int ovs_flow_tbl_init(struct flow_table *table)
 {
 	struct table_instance *ti;
@@ -524,7 +577,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 	struct sw_flow *flow;
 	int i;
 
-	for (i = 0; i < ma->max; i++) {
+	for (i = 0; i < ma->count; i++) {
 		struct sw_flow_mask *mask;
 
 		mask = rcu_dereference_ovsl(ma->masks[i]);
@@ -540,6 +593,18 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 	return NULL;
 }
 
+/* If the the cache index is outside of the valid region, update the index
+ * in case cache entry was moved up. */
+static void fixup_cache_entry_index(struct mask_cache_entry *e,
+				    const struct mask_array *ma,
+				    const struct sw_flow_mask *cache)
+{
+	int i = tbl_mask_array_find_idx(ma, cache);
+
+	if (i >= 0)
+		e->mask_index = i;
+}
+
 /*
  * mask_cache maps flow to probable mask. This cache is not tightly
  * coupled cache, It means updates to  mask list can result in inconsistent
@@ -578,13 +643,21 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 
 		e = &entries[index];
 		if (e->skb_hash == skb_hash) {
-			cache = rcu_dereference_ovsl(ma->masks[e->mask_index]);
+			int i = e->mask_index;
+
+			if (i < ma->max) 
+				cache = rcu_dereference_ovsl(ma->masks[i]);
+
 			if (cache) {
+				if (unlikely(i >= ma->count))
+					fixup_cache_entry_index(e, ma, cache);
+
 				flow = masked_flow_lookup(ti, key, cache,
 							  n_mask_hit);
 				if (flow) /* Cache hit. */
 					return flow;
-			}
+			} else
+				e->skb_hash = 0; /* Not a valid cache entry. */
 
 			ce = e;  /* The best cache replacement candidate. */
 			break;
@@ -642,18 +715,17 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 
 		if (!mask->ref_count) {
 			struct mask_array *ma;
-			int i;
 
 			ma = ovsl_dereference(tbl->mask_array);
-			for (i = 0; i < ma->max; i++) {
-				if (mask == ovsl_dereference(ma->masks[i])) {
-					RCU_INIT_POINTER(ma->masks[i], NULL);
-					ma->count--;
-					goto free;
-				}
+			/* Shrink the mask array if necessary. */
+			if (ma->max > MASK_ARRAY_SIZE_MIN * 2
+				&& ma->count <= ma->max / 4) {
+
+				tbl_mask_array_realloc(tbl, ma->max / 2);
+				ma = ovsl_dereference(tbl->mask_array);
 			}
-			BUG();
-free:
+
+			tbl_mask_array_delete_mask(ma, mask);
 			call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
 		}
 	}
@@ -702,7 +774,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
 	int i;
 
 	ma = ovsl_dereference(tbl->mask_array);
-	for (i = 0; i < ma->max; i++) {
+	for (i = 0; i < ma->count; i++) {
 		struct sw_flow_mask *t;
 
 		t = ovsl_dereference(ma->masks[i]);
@@ -722,7 +794,6 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 	mask = flow_mask_find(tbl, new);
 	if (!mask) {
 		struct mask_array *ma;
-		int i;
 
 		/* Allocate a new mask if none exsits. */
 		mask = mask_alloc();
@@ -745,16 +816,9 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 			}
 			ma = ovsl_dereference(tbl->mask_array);
 		}
-		for (i = 0; i < ma->max; i++) {
-			const struct sw_flow_mask *t;
-
-			t = ovsl_dereference(ma->masks[i]);
-			if (!t) {
-				rcu_assign_pointer(ma->masks[i], mask);
-				ma->count++;
-				break;
-			}
-		}
+
+		rcu_assign_pointer(ma->masks[ma->count], mask);
+		ma->count++;
 	} else {
 		BUG_ON(!mask->ref_count);
 		mask->ref_count++;
-- 
1.9.1




More information about the dev mailing list