[ovs-dev] [PATCH] dpif-netdev: Avoid deadlock with offloading during PMD thread deletion.
Ilya Maximets
i.maximets at ovn.org
Tue Jul 14 17:52:47 UTC 2020
On 7/14/20 5:33 PM, Kevin Traynor wrote:
> On 09/07/2020 16:19, Ilya Maximets wrote:
>> Main thread will try to pause/stop all revalidators during datapath
>> reconfiguration via datapath purge callback (dp_purge_cb) while
>> holding 'dp->port_mutex'. And deadlock happens in case any of
>> revalidator threads is already waiting on 'dp->port_mutex' while
>> dumping offloaded flows:
>>
>> main thread revalidator
>> --------------------------------- ----------------------------------
>>
>> ovs_mutex_lock(&dp->port_mutex)
>>
>> dpif_netdev_flow_dump_next()
>> -> dp_netdev_flow_to_dpif_flow
>> -> get_dpif_flow_status
>> -> dpif_netdev_get_flow_offload_status()
>> -> ovs_mutex_lock(&dp->port_mutex)
>> <waiting for mutex here>
>>
>> reconfigure_datapath()
>> -> reconfigure_pmd_threads()
>> -> dp_netdev_del_pmd()
>> -> dp_purge_cb()
>> -> udpif_pause_revalidators()
>> -> ovs_barrier_block(&udpif->pause_barrier)
>> <waiting for revalidators to reach barrier>
>>
>> <DEADLOCK>
>>
>> We're not allowed to call offloading API without holding global
>> port mutex from the userspace datapath due to thread safety
>> restrictions on netdev-ofload-dpdk module. And it's also not easy
>
> typo s/ofload/offload/
Thanks.
>
>> to rework datapath reconfiguration process in order to move actual
>> PMD removal and datapath purge out of the port mutex.
>>
>> So, for now, not sleeping on a mutex if it's not immediately available
>> seem like an easiest workaround. This will have impact on flow
>> statistics update rate and on ability to get the latest statistics
>> before removing the flow (latest stats will be lost in case we were
>> not able to take the mutex). However, this will allow us to operate
>> normally avoiding the deadlock.
>>
>> The last bit is that to avoid flapping of flow attributes and
>> statistics we're not failing the operation, but returning last
>> statistics and attributes returned by offload provider. Since those
>> might be updated in different threads, stores and reads are atomic.
>>
>
> Nice explanation, thanks
>
>> Reported-by: Frank Wang (王培辉) <wangpeihui at inspur.com>
>> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-June/371753.html
>> Fixes: a309e4f52660 ("dpif-netdev: Update offloaded flows statistics.")
>> Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
>> ---
>>
>> Tested with unit tests only. Needs checking with the real setup.
>> Especially stats/attrs update parts.
>>
>> AUTHORS.rst | 1 +
>> lib/dpif-netdev.c | 78 +++++++++++++++++++++++++++++++++++++++++++----
>> 2 files changed, 73 insertions(+), 6 deletions(-)
>>
>> diff --git a/AUTHORS.rst b/AUTHORS.rst
>> index 8e6a0769f..eb36a01d0 100644
>> --- a/AUTHORS.rst
>> +++ b/AUTHORS.rst
>> @@ -504,6 +504,7 @@ Edwin Chiu echiu at vmware.com
>> Eivind Bulie Haanaes
>> Enas Ahmad enas.ahmad at kaust.edu.sa
>> Eric Lopez
>> +Frank Wang (王培辉) wangpeihui at inspur.com
>> Frido Roose fr.roose at gmail.com
>> Gaetano Catalli gaetano.catalli at gmail.com
>> Gavin Remaley gavin_remaley at selinc.com
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 629a0cb53..2eb7f32ff 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -490,6 +490,12 @@ struct dp_netdev_flow_stats {
>> atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
>> };
>>
>> +/* Contained by struct dp_netdev_flow's 'last_attrs' member. */
>> +struct dp_netdev_flow_attrs {
>> + atomic_bool offloaded; /* True if flow is offloaded to HW. */
>> + ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. */
>> +};
>> +
>> /* A flow in 'dp_netdev_pmd_thread's 'flow_table'.
>> *
>> *
>> @@ -550,6 +556,10 @@ struct dp_netdev_flow {
>> /* Statistics. */
>> struct dp_netdev_flow_stats stats;
>>
>> + /* Statistics and attributes received from the netdev offload provider. */
>> + struct dp_netdev_flow_stats last_stats;
>> + struct dp_netdev_flow_attrs last_attrs;
>> +
>> /* Actions. */
>> OVSRCU_TYPE(struct dp_netdev_actions *) actions;
>>
>> @@ -3150,9 +3160,43 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
>> return NULL;
>> }
>>
>> +static void
>> +dp_netdev_flow_set_last_stats_attrs(struct dp_netdev_flow *netdev_flow,
>> + const struct dpif_flow_stats *stats,
>> + const struct dpif_flow_attrs *attrs)
>> +{
>> + struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats;
>> + struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs;
>> +
>> + atomic_store_relaxed(&last_stats->used, stats->used);
>> + atomic_store_relaxed(&last_stats->packet_count, stats->n_packets);
>> + atomic_store_relaxed(&last_stats->byte_count, stats->n_bytes);
>> + atomic_store_relaxed(&last_stats->tcp_flags, stats->tcp_flags);
>> +
>> + atomic_store_relaxed(&last_attrs->offloaded, attrs->offloaded);
>> + atomic_store_relaxed(&last_attrs->dp_layer, attrs->dp_layer);
>> +}
>> +
>> +static void
>> +dp_netdev_flow_get_last_stats_attrs(struct dp_netdev_flow *netdev_flow,
>> + struct dpif_flow_stats *stats,
>> + struct dpif_flow_attrs *attrs)
>> +{
>> + struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats;
>> + struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs;
>> +
>> + atomic_read_relaxed(&last_stats->used, &stats->used);
>> + atomic_read_relaxed(&last_stats->packet_count, &stats->n_packets);
>> + atomic_read_relaxed(&last_stats->byte_count, &stats->n_bytes);
>> + atomic_read_relaxed(&last_stats->tcp_flags, &stats->tcp_flags);
>> +
>> + atomic_read_relaxed(&last_attrs->offloaded, &attrs->offloaded);
>> + atomic_read_relaxed(&last_attrs->dp_layer, &attrs->dp_layer);
>> +}
>> +
>> static bool
>> dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
>> - const struct dp_netdev_flow *netdev_flow,
>> + struct dp_netdev_flow *netdev_flow,
>> struct dpif_flow_stats *stats,
>> struct dpif_flow_attrs *attrs)
>> {
>> @@ -3175,11 +3219,31 @@ dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
>> }
>> ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
>> /* Taking a global 'port_mutex' to fulfill thread safety
>> - * restrictions for the netdev-offload-dpdk module. */
>> - ovs_mutex_lock(&dp->port_mutex);
>> - ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid,
>> - stats, attrs, &buf);
>> - ovs_mutex_unlock(&dp->port_mutex);
>> + * restrictions for the netdev-offload-dpdk module.
>> + *
>> + * XXX: Main thread will try to pause/stop all revalidators during datapath
>> + * reconfiguration via datapath purge callback (dp_purge_cb) while
>> + * holding 'dp->port_mutex'. So we're not waiting for mutex here.
>> + * Otherwise, deadlock is possible, bcause revalidators might sleep
>> + * waiting for the main thread to release the lock and main thread
>> + * will wait for them to stop processing.
>> + * This workaround might make statistics less accurate. Especially
>> + * for flow deletion case, since there will be no other attempt. */
>> + if (!ovs_mutex_trylock(&dp->port_mutex)) {
>> + ret = netdev_flow_get(netdev, &match, &actions,
>> + &netdev_flow->mega_ufid, stats, attrs, &buf);
>> + /* Storing attributes from the last request for later use on mutex
>> + * contention. */
>
> Shouldn't you check 'ret' here before the set
Yes. That makes sense. I'll move below call under 'if (!ret)'.
>
>> + dp_netdev_flow_set_last_stats_attrs(netdev_flow, stats, attrs);
>> + ovs_mutex_unlock(&dp->port_mutex);
>> + } else {
>> + dp_netdev_flow_get_last_stats_attrs(netdev_flow, stats, attrs);
>> + if (!attrs->dp_layer) {
>> + /* Flow was never reported as 'offloaded' so it's harmless
>> + * to continue to think so. */
>> + ret = EAGAIN;
>> + }
>> + }
>> netdev_close(netdev);
>> if (ret) {
>> return false;
>> @@ -3448,6 +3512,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
>> /* Do not allocate extra space. */
>> flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
>> memset(&flow->stats, 0, sizeof flow->stats);
>> + memset(&flow->last_stats, 0, sizeof flow->last_stats);
>> + memset(&flow->last_attrs, 0, sizeof flow->last_attrs);
>> flow->dead = false;
>> flow->batch = NULL;
>> flow->mark = INVALID_FLOW_MARK;
>>
>
More information about the dev
mailing list