[ovs-dev] [PATCH net v2] net: openvswitch: fix to make sure flow_lookup() is not preempted

Eelco Chaudron echaudro at redhat.com
Thu Oct 15 10:54:39 UTC 2020



On 15 Oct 2020, at 12:27, Ilya Maximets wrote:

> On 10/15/20 11:46 AM, Eelco Chaudron wrote:
>> The flow_lookup() function uses per CPU variables, which must not be
>> preempted. However, this is fine in the general napi use case where
>> the local BH is disabled. But, it's also called in the netlink
>> context, which is preemptible. The below patch makes sure that even
>> in the netlink path, preemption is disabled.
>>
>> In addition, the u64_stats_update_begin() sync point was not 
>> protected,
>> making the sync point part of the per CPU variable fixed this.
>>
>> Fixes: eac87c413bf9 ("net: openvswitch: reorder masks array based on 
>> usage")
>> Reported-by: Juri Lelli <jlelli at redhat.com>
>> Signed-off-by: Eelco Chaudron <echaudro at redhat.com>
>> ---
>> v2: - Add u64_stats_update_begin() sync point protection
>>     - Moved patch to net from net-next branch
>>
>>  net/openvswitch/flow_table.c |   56 
>> +++++++++++++++++++++++++-----------------
>>  net/openvswitch/flow_table.h |    8 +++++-
>>  2 files changed, 39 insertions(+), 25 deletions(-)
>>
>> diff --git a/net/openvswitch/flow_table.c 
>> b/net/openvswitch/flow_table.c
>> index e2235849a57e..d90b4af6f539 100644
>> --- a/net/openvswitch/flow_table.c
>> +++ b/net/openvswitch/flow_table.c
>> @@ -172,7 +172,7 @@ static struct table_instance 
>> *table_instance_alloc(int new_size)
>>
>>  static void __mask_array_destroy(struct mask_array *ma)
>>  {
>> -	free_percpu(ma->masks_usage_cntr);
>> +	free_percpu(ma->masks_usage_stats);
>>  	kfree(ma);
>>  }
>>
>> @@ -196,15 +196,15 @@ static void 
>> tbl_mask_array_reset_counters(struct mask_array *ma)
>>  		ma->masks_usage_zero_cntr[i] = 0;
>>
>>  		for_each_possible_cpu(cpu) {
>> -			u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
>> -							  cpu);
>> +			struct mask_array_stats *stats;
>>  			unsigned int start;
>>  			u64 counter;
>>
>> +			stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
>>  			do {
>> -				start = u64_stats_fetch_begin_irq(&ma->syncp);
>> -				counter = usage_counters[i];
>> -			} while (u64_stats_fetch_retry_irq(&ma->syncp, start));
>> +				start = u64_stats_fetch_begin_irq(&stats->syncp);
>> +				counter = stats->usage_cntrs[i];
>> +			} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
>>
>>  			ma->masks_usage_zero_cntr[i] += counter;
>>  		}
>> @@ -227,9 +227,10 @@ static struct mask_array 
>> *tbl_mask_array_alloc(int size)
>>  					     sizeof(struct sw_flow_mask *) *
>>  					     size);
>>
>> -	new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size,
>> -					       __alignof__(u64));
>> -	if (!new->masks_usage_cntr) {
>> +	new->masks_usage_stats = __alloc_percpu(sizeof(struct 
>> mask_array_stats) +
>> +						sizeof(u64) * size,
>> +						__alignof__(u64));
>> +	if (!new->masks_usage_stats) {
>>  		kfree(new);
>>  		return NULL;
>>  	}
>> @@ -732,7 +733,7 @@ static struct sw_flow *flow_lookup(struct 
>> flow_table *tbl,
>>  				   u32 *n_cache_hit,
>>  				   u32 *index)
>>  {
>> -	u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
>> +	struct mask_array_stats *stats = 
>> this_cpu_ptr(ma->masks_usage_stats);
>>  	struct sw_flow *flow;
>>  	struct sw_flow_mask *mask;
>>  	int i;
>> @@ -742,9 +743,9 @@ static struct sw_flow *flow_lookup(struct 
>> flow_table *tbl,
>>  		if (mask) {
>>  			flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
>>  			if (flow) {
>> -				u64_stats_update_begin(&ma->syncp);
>> -				usage_counters[*index]++;
>> -				u64_stats_update_end(&ma->syncp);
>> +				u64_stats_update_begin(&stats->syncp);
>> +				stats->usage_cntrs[*index]++;
>> +				u64_stats_update_end(&stats->syncp);
>>  				(*n_cache_hit)++;
>>  				return flow;
>>  			}
>> @@ -763,9 +764,9 @@ static struct sw_flow *flow_lookup(struct 
>> flow_table *tbl,
>>  		flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
>>  		if (flow) { /* Found */
>>  			*index = i;
>> -			u64_stats_update_begin(&ma->syncp);
>> -			usage_counters[*index]++;
>> -			u64_stats_update_end(&ma->syncp);
>> +			u64_stats_update_begin(&stats->syncp);
>> +			stats->usage_cntrs[*index]++;
>> +			u64_stats_update_end(&stats->syncp);
>>  			return flow;
>>  		}
>>  	}
>> @@ -851,9 +852,17 @@ struct sw_flow *ovs_flow_tbl_lookup(struct 
>> flow_table *tbl,
>>  	struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
>>  	u32 __always_unused n_mask_hit;
>>  	u32 __always_unused n_cache_hit;
>> +	struct sw_flow *flow;
>>  	u32 index = 0;
>>
>> -	return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, 
>> &index);
>> +	/* This function gets called trough the netlink interface and 
>> therefore
>> +	 * is preemptible. However, flow_lookup() function needs to be 
>> called
>> +	 * with preemption disabled due to CPU specific variables.
>
> Is it possible to add some kind of assertion inside flow_lookup() to 
> avoid
> this kind of issues in the future?

We could do something like WARN_ON_ONCE(preemptible()) but do not think 
such check should be added to the fast path.

> It might be also good to update the comment for flow_lookup() function 
> itself.

Good idea, will do this in a v3

>> +	 */
>> +	local_bh_disable();
>> +	flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, 
>> &index);
>> +	local_bh_enable();
>> +	return flow;
>>  }
>>
>>  struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
>> @@ -1109,7 +1118,6 @@ void ovs_flow_masks_rebalance(struct flow_table 
>> *table)
>>
>>  	for (i = 0; i < ma->max; i++)  {
>>  		struct sw_flow_mask *mask;
>> -		unsigned int start;
>>  		int cpu;
>>
>>  		mask = rcu_dereference_ovsl(ma->masks[i]);
>> @@ -1120,14 +1128,16 @@ void ovs_flow_masks_rebalance(struct 
>> flow_table *table)
>>  		masks_and_count[i].counter = 0;
>>
>>  		for_each_possible_cpu(cpu) {
>> -			u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
>> -							  cpu);
>> +			struct mask_array_stats *stats;
>> +			unsigned int start;
>>  			u64 counter;
>>
>> +			stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
>>  			do {
>> -				start = u64_stats_fetch_begin_irq(&ma->syncp);
>> -				counter = usage_counters[i];
>> -			} while (u64_stats_fetch_retry_irq(&ma->syncp, start));
>> +				start = u64_stats_fetch_begin_irq(&stats->syncp);
>> +				counter = stats->usage_cntrs[i];
>> +			} while (u64_stats_fetch_retry_irq(&stats->syncp,
>> +							   start));
>>
>>  			masks_and_count[i].counter += counter;
>>  		}
>> diff --git a/net/openvswitch/flow_table.h 
>> b/net/openvswitch/flow_table.h
>> index 6e7d4ac59353..43144396e192 100644
>> --- a/net/openvswitch/flow_table.h
>> +++ b/net/openvswitch/flow_table.h
>> @@ -38,12 +38,16 @@ struct mask_count {
>>  	u64 counter;
>>  };
>>
>> +struct mask_array_stats {
>> +	struct u64_stats_sync syncp;
>> +	u64 usage_cntrs[];
>> +};
>> +
>>  struct mask_array {
>>  	struct rcu_head rcu;
>>  	int count, max;
>> -	u64 __percpu *masks_usage_cntr;
>> +	struct mask_array_stats __percpu *masks_usage_stats;
>>  	u64 *masks_usage_zero_cntr;
>> -	struct u64_stats_sync syncp;
>>  	struct sw_flow_mask __rcu *masks[];
>>  };
>>
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>



More information about the dev mailing list