[ovs-dev] [mask array v6 2/2] datapath: keep mask array compact when deleting mask

Andy Zhou azhou at nicira.com
Fri Jun 20 01:31:04 UTC 2014


thanks, pushed both with the suggested fixes.

On Wed, Jun 18, 2014 at 2:46 PM, Pravin Shelar <pshelar at nicira.com> wrote:
> On Mon, Jun 16, 2014 at 1:18 PM, Andy Zhou <azhou at nicira.com> wrote:
>> When deleting a mask from the mask array, we always move the last entry
>> into its current location. Another approach can be NULL in its
>> current place, and periodically compact it.
>>
>> The approach taken by this patch is more efficient during run
>> time.  During look up, fast path packet don't have to skip over NULL
>> pointers.
>>
>> A more important advantage of this approach is that it tries to
>> keep the mask array index stable by avoiding periodic index
>> reshuffle.
>>
>> This patch implements an optimization to further promote index
>> stability.  By leaving the last entry value intact when moving it
>> to a new location, the old cache index can 'fix' themselves, by noticing
>> the index in the cache is outside the valid mask array region. The
>> new index can be found by scanning the mask pointer within the valid
>> region.
>>
>> Signed-off-by: Andy Zhou <azhou at nicira.com>
>>
>> ----
>> v5 -> v6:
>>         Rewrote fixup_cache_entry_index(),
>>         Remove tbl_mask_array_find_idx()
>>         In ovs_flow_tbl_lookup_stats(), make i < count the fast path.
>>
>> V4 -> v5:
>>         Further simplify mask_insert using "for loop".
>>
>> V3 -> v4:
>>         split mask look simplication into its own patch.
>>
>> V2 -> v3:
>>         Further simplifed mask flow lookup function
>>
>> v1 -> v2:
>>        * added shrink mask array function.
>>        * documented the cornor case of mask deletion.
>>        * Simplifed mask flow lookup function (w.r.t. using and
>>          update the mask cache)
>> ---
>> ---
>>  datapath/flow_table.c | 124 +++++++++++++++++++++++++++++++++++++-------------
>>  1 file changed, 92 insertions(+), 32 deletions(-)
>>
>> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
>> index c448eec..12679af 100644
>> --- a/datapath/flow_table.c
>> +++ b/datapath/flow_table.c
>> @@ -247,10 +247,10 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>>         if (old) {
>>                 int i;
>>
>> -               for (i = 0; i < old->max; i++) {
>> -                       if (old->masks[i])
>> -                               new->masks[new->count++] = old->masks[i];
>> -               }
>> +               for (i = 0; i < old->max; i++)
>> +                       new->masks[i] = old->masks[i];
>> +
>> +               new->count = old->count;
>>         }
>>         rcu_assign_pointer(tbl->mask_array, new);
>>
>> @@ -260,6 +260,47 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>>         return 0;
>>  }
>>
>> +static void tbl_mask_array_delete_mask(struct mask_array *ma,
>> +                                      const struct sw_flow_mask *mask)
>> +{
>> +       int i = 0;
>> +
>> +       /* Delete a mask pointer from the valid section.
>> +        *
>> +        * Also move the last entry in its place, so there is no
>> +        * whole in the valid section.
>> +        *
>> +        * Notice the last entry still points to the original mask.
>> +        *
>> +        * <Note>: there is a small race window that may cause a mask
>> +        * to be missed in a search. Imaging a core is
>> +        * walking through the array, passing the index of deleting mask.
>> +        * But before reaching the last entry, it is overwritten,
>> +        * by another core that is adding a new mask, now the last entry
>> +        * will point to the new mask. In this case, the moved up last
>> +        * entry can be missed by the core walking the mask array.
>> +        *
>> +        * In case this missed mask would have led to successful
>> +        * lookup, Hitting the race window could cause a packet to miss
>> +        * kernel flow cache, and be sent to the user space.
>> +        * </Note>
>> +        */
>> +       for (i = 0; i < ma->count; i++)
>> +               if (mask == ma->masks[i]) {
>> +                       struct sw_flow_mask *last;
>> +
>> +                       last = ma->masks[ma->count - 1];
>> +                       rcu_assign_pointer(ma->masks[i], last);
>> +                       ma->count--;
>> +                       break;
>> +               }
>> +
>> +       /* Remove the deleted mask pointers from the invalid section. */
>> +       for (i = ma->count; i < ma->max; i++)
>> +               if (mask == ma->masks[i])
>> +                       RCU_INIT_POINTER(ma->masks[i], NULL);
>> +}
>> +
>>  int ovs_flow_tbl_init(struct flow_table *table)
>>  {
>>         struct table_instance *ti;
>> @@ -513,7 +554,6 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
>>         return NULL;
>>  }
>>
>> -
>>  static struct sw_flow *flow_lookup(struct flow_table *tbl,
>>                                    struct table_instance *ti,
>>                                    struct mask_array *ma,
>> @@ -524,7 +564,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>>         struct sw_flow *flow;
>>         int i;
>>
>> -       for (i = 0; i < ma->max; i++) {
>> +       for (i = 0; i < ma->count; i++) {
>>                 struct sw_flow_mask *mask;
>>
>>                 mask = rcu_dereference_ovsl(ma->masks[i]);
>> @@ -540,6 +580,21 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>>         return NULL;
>>  }
>>
>> +/* If the the cache index is outside of the valid region, update the index
>> + * in case cache entry was moved up. */
>> +static void fixup_cache_entry_index(struct mask_cache_entry *e,
>> +                                   const struct mask_array *ma,
>> +                                   const struct sw_flow_mask *cache)
>> +{
>> +       int i;
>> +
>> +       for (i = 0; i < ma->count; i++)
>> +               if (cache == ovsl_dereference(ma->masks[i])) {
>> +                       e->mask_index = i;
>> +                       return;
>> +               }
>> +}
>> +
>>  /*
>>   * mask_cache maps flow to probable mask. This cache is not tightly
>>   * coupled cache, It means updates to  mask list can result in inconsistent
>> @@ -569,6 +624,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>
>>         ce = NULL;
>>         cache = NULL;
>> +       flow = NULL;
> No need to set NULL to flow here in function block, rather we can do
> it in else part of count condition check. That way it is easier to
> understand why is it done.
> something like:
>
>                         if (likely(i < ma->count)) {
>
>                                 cache = rcu_dereference_ovsl(ma->masks[i]);
>
>                                 flow = masked_flow_lookup(ti, key, cache,
>
>                                                           n_mask_hit);
>
>                         } else {
>
>                                 flow = NULL;
>
>                                 if (i < ma->max) {
>
>                                         cache =
> rcu_dereference_ovsl(ma->masks[i]);
>
>                                         if (cache) {
>
>
> fixup_cache_entry_index(e, ma, cache);
>
>                                                 flow =
> masked_flow_lookup(ti, key, cache, n_mask_hit);
>
>                                         }
>
>                                 }
>
>                         }
>
>
>>         entries = this_cpu_ptr(tbl->mask_cache);
>>
>>         /* Find the cache entry 'ce' to operate on. */
>> @@ -578,15 +634,28 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>
>>                 e = &entries[index];
>>                 if (e->skb_hash == skb_hash) {
>> -                       cache = rcu_dereference_ovsl(ma->masks[e->mask_index]);
>> -                       if (cache) {
>> +                       int i = e->mask_index;
>> +
>> +                       if (likely(i < ma->count)) {
>> +                               cache = rcu_dereference_ovsl(ma->masks[i]);
>>                                 flow = masked_flow_lookup(ti, key, cache,
>>                                                           n_mask_hit);
>> -                               if (flow) /* Cache hit. */
>> -                                       return flow;
>> +                       } else if (i < ma->count) {
> I think its ma->max.
>
>> +                               cache = rcu_dereference_ovsl(ma->masks[i]);
>> +                               if (cache) {
>> +                                       fixup_cache_entry_index(e, ma, cache);
>> +                                       flow = masked_flow_lookup(ti, key,
>> +                                                       cache, n_mask_hit);
>> +                               }
>>                         }
>> +
>> +                       if (flow)  /* Cache hit. */
>> +                               return flow;
>> +
>> +                       /* Cache miss. This is the best cache
>> +                        * replacement candidate.  */
>>                         e->skb_hash = 0;
>> -                       ce = e;  /* The best cache replacement candidate. */
>> +                       ce = e;
>>                         break;
>>                 }
>>
>> @@ -642,18 +711,17 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
>>
>>                 if (!mask->ref_count) {
>>                         struct mask_array *ma;
>> -                       int i;
>>
>>                         ma = ovsl_dereference(tbl->mask_array);
>> -                       for (i = 0; i < ma->max; i++) {
>> -                               if (mask == ovsl_dereference(ma->masks[i])) {
>> -                                       RCU_INIT_POINTER(ma->masks[i], NULL);
>> -                                       ma->count--;
>> -                                       goto free;
>> -                               }
>> +                       /* Shrink the mask array if necessary. */
>> +                       if (ma->max > MASK_ARRAY_SIZE_MIN * 2
>> +                               && ma->count <= ma->max / 4) {
>> +
>> +                               tbl_mask_array_realloc(tbl, ma->max / 2);
>> +                               ma = ovsl_dereference(tbl->mask_array);
>>                         }
>> -                       BUG();
>> -free:
>> +
>> +                       tbl_mask_array_delete_mask(ma, mask);
>>                         call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
>>                 }
>>         }
>> @@ -702,7 +770,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
>>         int i;
>>
>>         ma = ovsl_dereference(tbl->mask_array);
>> -       for (i = 0; i < ma->max; i++) {
>> +       for (i = 0; i < ma->count; i++) {
>>                 struct sw_flow_mask *t;
>>
>>                 t = ovsl_dereference(ma->masks[i]);
>> @@ -722,7 +790,6 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
>>         mask = flow_mask_find(tbl, new);
>>         if (!mask) {
>>                 struct mask_array *ma;
>> -               int i;
>>
>>                 /* Allocate a new mask if none exsits. */
>>                 mask = mask_alloc();
>> @@ -745,16 +812,9 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
>>                         }
>>                         ma = ovsl_dereference(tbl->mask_array);
>>                 }
>> -               for (i = 0; i < ma->max; i++) {
>> -                       const struct sw_flow_mask *t;
>> -
>> -                       t = ovsl_dereference(ma->masks[i]);
>> -                       if (!t) {
>> -                               rcu_assign_pointer(ma->masks[i], mask);
>> -                               ma->count++;
>> -                               break;
>> -                       }
>> -               }
>> +
>> +               rcu_assign_pointer(ma->masks[ma->count], mask);
>> +               ma->count++;
>>         } else {
>>                 BUG_ON(!mask->ref_count);
>>                 mask->ref_count++;
>> --
> otherwise looks good.
> Acked-by: Pravin B Shelar <pshelar at nicira.com>
>
>> 1.9.1
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev



More information about the dev mailing list