[ovs-dev] [mask array v6 2/2] datapath: keep mask array compact when deleting mask
Andy Zhou
azhou at nicira.com
Fri Jun 20 01:31:04 UTC 2014
thanks, pushed both with the suggested fixes.
On Wed, Jun 18, 2014 at 2:46 PM, Pravin Shelar <pshelar at nicira.com> wrote:
> On Mon, Jun 16, 2014 at 1:18 PM, Andy Zhou <azhou at nicira.com> wrote:
>> When deleting a mask from the mask array, we always move the last entry
>> into its current location. Another approach can be NULL in its
>> current place, and periodically compact it.
>>
>> The approach taken by this patch is more efficient during run
>> time. During look up, fast path packet don't have to skip over NULL
>> pointers.
>>
>> A more important advantage of this approach is that it tries to
>> keep the mask array index stable by avoiding periodic index
>> reshuffle.
>>
>> This patch implements an optimization to further promote index
>> stability. By leaving the last entry value intact when moving it
>> to a new location, the old cache index can 'fix' themselves, by noticing
>> the index in the cache is outside the valid mask array region. The
>> new index can be found by scanning the mask pointer within the valid
>> region.
>>
>> Signed-off-by: Andy Zhou <azhou at nicira.com>
>>
>> ----
>> v5 -> v6:
>> Rewrote fixup_cache_entry_index(),
>> Remove tbl_mask_array_find_idx()
>> In ovs_flow_tbl_lookup_stats(), make i < count the fast path.
>>
>> V4 -> v5:
>> Further simplify mask_insert using "for loop".
>>
>> V3 -> v4:
>> split mask look simplication into its own patch.
>>
>> V2 -> v3:
>> Further simplifed mask flow lookup function
>>
>> v1 -> v2:
>> * added shrink mask array function.
>> * documented the cornor case of mask deletion.
>> * Simplifed mask flow lookup function (w.r.t. using and
>> update the mask cache)
>> ---
>> ---
>> datapath/flow_table.c | 124 +++++++++++++++++++++++++++++++++++++-------------
>> 1 file changed, 92 insertions(+), 32 deletions(-)
>>
>> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
>> index c448eec..12679af 100644
>> --- a/datapath/flow_table.c
>> +++ b/datapath/flow_table.c
>> @@ -247,10 +247,10 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>> if (old) {
>> int i;
>>
>> - for (i = 0; i < old->max; i++) {
>> - if (old->masks[i])
>> - new->masks[new->count++] = old->masks[i];
>> - }
>> + for (i = 0; i < old->max; i++)
>> + new->masks[i] = old->masks[i];
>> +
>> + new->count = old->count;
>> }
>> rcu_assign_pointer(tbl->mask_array, new);
>>
>> @@ -260,6 +260,47 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>> return 0;
>> }
>>
>> +static void tbl_mask_array_delete_mask(struct mask_array *ma,
>> + const struct sw_flow_mask *mask)
>> +{
>> + int i = 0;
>> +
>> + /* Delete a mask pointer from the valid section.
>> + *
>> + * Also move the last entry in its place, so there is no
>> + * whole in the valid section.
>> + *
>> + * Notice the last entry still points to the original mask.
>> + *
>> + * <Note>: there is a small race window that may cause a mask
>> + * to be missed in a search. Imaging a core is
>> + * walking through the array, passing the index of deleting mask.
>> + * But before reaching the last entry, it is overwritten,
>> + * by another core that is adding a new mask, now the last entry
>> + * will point to the new mask. In this case, the moved up last
>> + * entry can be missed by the core walking the mask array.
>> + *
>> + * In case this missed mask would have led to successful
>> + * lookup, Hitting the race window could cause a packet to miss
>> + * kernel flow cache, and be sent to the user space.
>> + * </Note>
>> + */
>> + for (i = 0; i < ma->count; i++)
>> + if (mask == ma->masks[i]) {
>> + struct sw_flow_mask *last;
>> +
>> + last = ma->masks[ma->count - 1];
>> + rcu_assign_pointer(ma->masks[i], last);
>> + ma->count--;
>> + break;
>> + }
>> +
>> + /* Remove the deleted mask pointers from the invalid section. */
>> + for (i = ma->count; i < ma->max; i++)
>> + if (mask == ma->masks[i])
>> + RCU_INIT_POINTER(ma->masks[i], NULL);
>> +}
>> +
>> int ovs_flow_tbl_init(struct flow_table *table)
>> {
>> struct table_instance *ti;
>> @@ -513,7 +554,6 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
>> return NULL;
>> }
>>
>> -
>> static struct sw_flow *flow_lookup(struct flow_table *tbl,
>> struct table_instance *ti,
>> struct mask_array *ma,
>> @@ -524,7 +564,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>> struct sw_flow *flow;
>> int i;
>>
>> - for (i = 0; i < ma->max; i++) {
>> + for (i = 0; i < ma->count; i++) {
>> struct sw_flow_mask *mask;
>>
>> mask = rcu_dereference_ovsl(ma->masks[i]);
>> @@ -540,6 +580,21 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>> return NULL;
>> }
>>
>> +/* If the the cache index is outside of the valid region, update the index
>> + * in case cache entry was moved up. */
>> +static void fixup_cache_entry_index(struct mask_cache_entry *e,
>> + const struct mask_array *ma,
>> + const struct sw_flow_mask *cache)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < ma->count; i++)
>> + if (cache == ovsl_dereference(ma->masks[i])) {
>> + e->mask_index = i;
>> + return;
>> + }
>> +}
>> +
>> /*
>> * mask_cache maps flow to probable mask. This cache is not tightly
>> * coupled cache, It means updates to mask list can result in inconsistent
>> @@ -569,6 +624,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>
>> ce = NULL;
>> cache = NULL;
>> + flow = NULL;
> No need to set NULL to flow here in function block, rather we can do
> it in else part of count condition check. That way it is easier to
> understand why is it done.
> something like:
>
> if (likely(i < ma->count)) {
>
> cache = rcu_dereference_ovsl(ma->masks[i]);
>
> flow = masked_flow_lookup(ti, key, cache,
>
> n_mask_hit);
>
> } else {
>
> flow = NULL;
>
> if (i < ma->max) {
>
> cache =
> rcu_dereference_ovsl(ma->masks[i]);
>
> if (cache) {
>
>
> fixup_cache_entry_index(e, ma, cache);
>
> flow =
> masked_flow_lookup(ti, key, cache, n_mask_hit);
>
> }
>
> }
>
> }
>
>
>> entries = this_cpu_ptr(tbl->mask_cache);
>>
>> /* Find the cache entry 'ce' to operate on. */
>> @@ -578,15 +634,28 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>
>> e = &entries[index];
>> if (e->skb_hash == skb_hash) {
>> - cache = rcu_dereference_ovsl(ma->masks[e->mask_index]);
>> - if (cache) {
>> + int i = e->mask_index;
>> +
>> + if (likely(i < ma->count)) {
>> + cache = rcu_dereference_ovsl(ma->masks[i]);
>> flow = masked_flow_lookup(ti, key, cache,
>> n_mask_hit);
>> - if (flow) /* Cache hit. */
>> - return flow;
>> + } else if (i < ma->count) {
> I think its ma->max.
>
>> + cache = rcu_dereference_ovsl(ma->masks[i]);
>> + if (cache) {
>> + fixup_cache_entry_index(e, ma, cache);
>> + flow = masked_flow_lookup(ti, key,
>> + cache, n_mask_hit);
>> + }
>> }
>> +
>> + if (flow) /* Cache hit. */
>> + return flow;
>> +
>> + /* Cache miss. This is the best cache
>> + * replacement candidate. */
>> e->skb_hash = 0;
>> - ce = e; /* The best cache replacement candidate. */
>> + ce = e;
>> break;
>> }
>>
>> @@ -642,18 +711,17 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
>>
>> if (!mask->ref_count) {
>> struct mask_array *ma;
>> - int i;
>>
>> ma = ovsl_dereference(tbl->mask_array);
>> - for (i = 0; i < ma->max; i++) {
>> - if (mask == ovsl_dereference(ma->masks[i])) {
>> - RCU_INIT_POINTER(ma->masks[i], NULL);
>> - ma->count--;
>> - goto free;
>> - }
>> + /* Shrink the mask array if necessary. */
>> + if (ma->max > MASK_ARRAY_SIZE_MIN * 2
>> + && ma->count <= ma->max / 4) {
>> +
>> + tbl_mask_array_realloc(tbl, ma->max / 2);
>> + ma = ovsl_dereference(tbl->mask_array);
>> }
>> - BUG();
>> -free:
>> +
>> + tbl_mask_array_delete_mask(ma, mask);
>> call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
>> }
>> }
>> @@ -702,7 +770,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
>> int i;
>>
>> ma = ovsl_dereference(tbl->mask_array);
>> - for (i = 0; i < ma->max; i++) {
>> + for (i = 0; i < ma->count; i++) {
>> struct sw_flow_mask *t;
>>
>> t = ovsl_dereference(ma->masks[i]);
>> @@ -722,7 +790,6 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
>> mask = flow_mask_find(tbl, new);
>> if (!mask) {
>> struct mask_array *ma;
>> - int i;
>>
>> /* Allocate a new mask if none exsits. */
>> mask = mask_alloc();
>> @@ -745,16 +812,9 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
>> }
>> ma = ovsl_dereference(tbl->mask_array);
>> }
>> - for (i = 0; i < ma->max; i++) {
>> - const struct sw_flow_mask *t;
>> -
>> - t = ovsl_dereference(ma->masks[i]);
>> - if (!t) {
>> - rcu_assign_pointer(ma->masks[i], mask);
>> - ma->count++;
>> - break;
>> - }
>> - }
>> +
>> + rcu_assign_pointer(ma->masks[ma->count], mask);
>> + ma->count++;
>> } else {
>> BUG_ON(!mask->ref_count);
>> mask->ref_count++;
>> --
> otherwise looks good.
> Acked-by: Pravin B Shelar <pshelar at nicira.com>
>
>> 1.9.1
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list