[ovs-dev] [PATCH v2] dpif-netdev: Avoid deadlock with offloading during PMD thread deletion.
Kevin Traynor
ktraynor at redhat.com
Wed Jul 15 16:44:36 UTC 2020
On 15/07/2020 17:00, Ilya Maximets wrote:
> Main thread will try to pause/stop all revalidators during datapath
> reconfiguration via datapath purge callback (dp_purge_cb) while
> holding 'dp->port_mutex'. And deadlock happens in case any of
> revalidator threads is already waiting on 'dp->port_mutex' while
> dumping offloaded flows:
>
> main thread revalidator
> --------------------------------- ----------------------------------
>
> ovs_mutex_lock(&dp->port_mutex)
>
> dpif_netdev_flow_dump_next()
> -> dp_netdev_flow_to_dpif_flow
> -> get_dpif_flow_status
> -> dpif_netdev_get_flow_offload_status()
> -> ovs_mutex_lock(&dp->port_mutex)
> <waiting for mutex here>
>
> reconfigure_datapath()
> -> reconfigure_pmd_threads()
> -> dp_netdev_del_pmd()
> -> dp_purge_cb()
> -> udpif_pause_revalidators()
> -> ovs_barrier_block(&udpif->pause_barrier)
> <waiting for revalidators to reach barrier>
>
> <DEADLOCK>
>
> We're not allowed to call offloading API without holding global
> port mutex from the userspace datapath due to thread safety
> restrictions on netdev-offload-dpdk module. And it's also not easy
> to rework datapath reconfiguration process in order to move actual
> PMD removal and datapath purge out of the port mutex.
>
> So, for now, not sleeping on a mutex if it's not immediately available
> seem like an easiest workaround. This will have impact on flow
> statistics update rate and on ability to get the latest statistics
> before removing the flow (latest stats will be lost in case we were
> not able to take the mutex). However, this will allow us to operate
> normally avoiding the deadlock.
>
> The last bit is that to avoid flapping of flow attributes and
> statistics we're not failing the operation, but returning last
> statistics and attributes returned by offload provider. Since those
> might be updated in different threads, stores and reads are atomic.
>
> Reported-by: Frank Wang (王培辉) <wangpeihui at inspur.com>
> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-June/371753.html
> Fixes: a309e4f52660 ("dpif-netdev: Update offloaded flows statistics.")
> Signed-off-by: Ilya Maximets <i.maximets at ovn.org>
LGTM
Acked-by: Kevin Traynor <ktraynor at redhat.com>
> ---
>
> Version 2:
> - Storing error code from netdev_flow_get() to mimic the actual result
> of the last call.
>
> AUTHORS.rst | 1 +
> lib/dpif-netdev.c | 93 ++++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 88 insertions(+), 6 deletions(-)
>
> diff --git a/AUTHORS.rst b/AUTHORS.rst
> index 8e6a0769f..eb36a01d0 100644
> --- a/AUTHORS.rst
> +++ b/AUTHORS.rst
> @@ -504,6 +504,7 @@ Edwin Chiu echiu at vmware.com
> Eivind Bulie Haanaes
> Enas Ahmad enas.ahmad at kaust.edu.sa
> Eric Lopez
> +Frank Wang (王培辉) wangpeihui at inspur.com
> Frido Roose fr.roose at gmail.com
> Gaetano Catalli gaetano.catalli at gmail.com
> Gavin Remaley gavin_remaley at selinc.com
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 5bb392cba..2aad24511 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -492,6 +492,12 @@ struct dp_netdev_flow_stats {
> atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
> };
>
> +/* Contained by struct dp_netdev_flow's 'last_attrs' member. */
> +struct dp_netdev_flow_attrs {
> + atomic_bool offloaded; /* True if flow is offloaded to HW. */
> + ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. */
> +};
> +
> /* A flow in 'dp_netdev_pmd_thread's 'flow_table'.
> *
> *
> @@ -552,6 +558,11 @@ struct dp_netdev_flow {
> /* Statistics. */
> struct dp_netdev_flow_stats stats;
>
> + /* Statistics and attributes received from the netdev offload provider. */
> + atomic_int netdev_flow_get_result;
> + struct dp_netdev_flow_stats last_stats;
> + struct dp_netdev_flow_attrs last_attrs;
> +
> /* Actions. */
> OVSRCU_TYPE(struct dp_netdev_actions *) actions;
>
> @@ -3277,9 +3288,56 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
> return NULL;
> }
>
> +static void
> +dp_netdev_flow_set_last_stats_attrs(struct dp_netdev_flow *netdev_flow,
> + const struct dpif_flow_stats *stats,
> + const struct dpif_flow_attrs *attrs,
> + int result)
> +{
> + struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats;
> + struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs;
> +
> + atomic_store_relaxed(&netdev_flow->netdev_flow_get_result, result);
> + if (result) {
> + return;
> + }
> +
> + atomic_store_relaxed(&last_stats->used, stats->used);
> + atomic_store_relaxed(&last_stats->packet_count, stats->n_packets);
> + atomic_store_relaxed(&last_stats->byte_count, stats->n_bytes);
> + atomic_store_relaxed(&last_stats->tcp_flags, stats->tcp_flags);
> +
> + atomic_store_relaxed(&last_attrs->offloaded, attrs->offloaded);
> + atomic_store_relaxed(&last_attrs->dp_layer, attrs->dp_layer);
> +
> +}
> +
> +static void
> +dp_netdev_flow_get_last_stats_attrs(struct dp_netdev_flow *netdev_flow,
> + struct dpif_flow_stats *stats,
> + struct dpif_flow_attrs *attrs,
> + int *result)
> +{
> + struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats;
> + struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs;
> +
> + atomic_read_relaxed(&netdev_flow->netdev_flow_get_result, result);
> + if (*result) {
> + return;
> + }
> +
> + atomic_read_relaxed(&last_stats->used, &stats->used);
> + atomic_read_relaxed(&last_stats->packet_count, &stats->n_packets);
> + atomic_read_relaxed(&last_stats->byte_count, &stats->n_bytes);
> + atomic_read_relaxed(&last_stats->tcp_flags, &stats->tcp_flags);
> +
> + atomic_read_relaxed(&last_attrs->offloaded, &attrs->offloaded);
> + atomic_read_relaxed(&last_attrs->dp_layer, &attrs->dp_layer);
> +}
> +
> static bool
> dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
> - const struct dp_netdev_flow *netdev_flow,
> + struct dp_netdev_flow *netdev_flow,
> struct dpif_flow_stats *stats,
> struct dpif_flow_attrs *attrs)
> {
> @@ -3302,11 +3360,31 @@ dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
> }
> ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
> /* Taking a global 'port_mutex' to fulfill thread safety
> - * restrictions for the netdev-offload-dpdk module. */
> - ovs_mutex_lock(&dp->port_mutex);
> - ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid,
> - stats, attrs, &buf);
> - ovs_mutex_unlock(&dp->port_mutex);
> + * restrictions for the netdev-offload-dpdk module.
> + *
> + * XXX: Main thread will try to pause/stop all revalidators during datapath
> + * reconfiguration via datapath purge callback (dp_purge_cb) while
> + * holding 'dp->port_mutex'. So we're not waiting for mutex here.
> + * Otherwise, deadlock is possible, bcause revalidators might sleep
> + * waiting for the main thread to release the lock and main thread
> + * will wait for them to stop processing.
> + * This workaround might make statistics less accurate. Especially
> + * for flow deletion case, since there will be no other attempt. */
> + if (!ovs_mutex_trylock(&dp->port_mutex)) {
> + ret = netdev_flow_get(netdev, &match, &actions,
> + &netdev_flow->mega_ufid, stats, attrs, &buf);
> + /* Storing statistics and attributes from the last request for
> + * later use on mutex contention. */
> + dp_netdev_flow_set_last_stats_attrs(netdev_flow, stats, attrs, ret);
> + ovs_mutex_unlock(&dp->port_mutex);
> + } else {
> + dp_netdev_flow_get_last_stats_attrs(netdev_flow, stats, attrs, &ret);
> + if (!ret && !attrs->dp_layer) {
> + /* Flow was never reported as 'offloaded' so it's harmless
> + * to continue to think so. */
> + ret = EAGAIN;
> + }
> + }
> netdev_close(netdev);
> if (ret) {
> return false;
> @@ -3575,6 +3653,9 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
> /* Do not allocate extra space. */
> flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
> memset(&flow->stats, 0, sizeof flow->stats);
> + atomic_init(&flow->netdev_flow_get_result, 0);
> + memset(&flow->last_stats, 0, sizeof flow->last_stats);
> + memset(&flow->last_attrs, 0, sizeof flow->last_attrs);
> flow->dead = false;
> flow->batch = NULL;
> flow->mark = INVALID_FLOW_MARK;
>
More information about the dev
mailing list