[ovs-dev] [PATCH v4] dpif-netdev: Remove PMD latency on seq_mutex
Loftus, Ciara
ciara.loftus at intel.com
Fri Jul 15 09:10:46 UTC 2016
>
> I applied this to master with the below incremental.
>
> We _usually_ use positive error numbers in int return value.
>
> I think there was an extra COVERAGE_INC(seq_change)
>
> Thanks for the patch!
Hi,
Could this be backported to the 2.5 branch?
Thanks,
Ciara
>
> diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
> index 7462579..8aef1f1 100644
> --- a/lib/ovs-rcu.c
> +++ b/lib/ovs-rcu.c
> @@ -157,7 +157,7 @@ int
> ovsrcu_try_quiesce(void)
> {
> struct ovsrcu_perthread *perthread;
> - int ret = -EBUSY;
> + int ret = EBUSY;
>
> ovs_assert(!single_threaded());
> perthread = ovsrcu_perthread_get();
> diff --git a/lib/seq.c b/lib/seq.c
> index 4e99c6c..b8b5b65 100644
> --- a/lib/seq.c
> +++ b/lib/seq.c
> @@ -138,8 +138,6 @@ void
> seq_change(struct seq *seq)
> OVS_EXCLUDED(seq_mutex)
> {
> - COVERAGE_INC(seq_change);
> -
> ovs_mutex_lock(&seq_mutex);
> seq_change_protected(seq);
> ovs_mutex_unlock(&seq_mutex);
>
>
>
> 2016-07-05 6:33 GMT-07:00 Flavio Leitner <fbl at redhat.com>:
>
> > The PMD thread needs to keep processing RX queues in order
> > to achieve maximum throughput. It also needs to sweep emc
> > cache and quiesce which use seq_mutex. That mutex can
> > eventually block the PMD thread causing latency spikes and
> > affecting the throughput.
> >
> > Since there is no requirement for running those tasks at a
> > specific time, this patch extend seq API to allow tentative
> > locking instead.
> >
> > Reported-by: Karl Rister <krister at redhat.com>
> > Co-authored-by: Karl Rister <krister at redhat.com>
> > Signed-off-by: Flavio Leitner <fbl at redhat.com>
> > ---
> > lib/dpif-netdev.c | 5 +++--
> > lib/ovs-rcu.c | 37 +++++++++++++++++++++++++++++++++++--
> > lib/ovs-rcu.h | 1 +
> > lib/seq.c | 49
> ++++++++++++++++++++++++++++++++++++++++++++++---
> > lib/seq.h | 5 +++++
> > 5 files changed, 90 insertions(+), 7 deletions(-)
> >
> > v4:
> > - return EBUSY if lock is busy.
> >
> > v3:
> > - addressed clang annotation feedbacks from Daniele
> > - tested over 4 days without spikes or other issues
> >
> > v2:
> > - expanded SEQ API instead of using recursive lock.
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index 37c2631..dc5b02e 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -2869,9 +2869,10 @@ reload:
> >
> > lc = 0;
> >
> > - emc_cache_slow_sweep(&pmd->flow_cache);
> > coverage_try_clear();
> > - ovsrcu_quiesce();
> > + if (!ovsrcu_try_quiesce()) {
> > + emc_cache_slow_sweep(&pmd->flow_cache);
> > + }
> >
> > atomic_read_relaxed(&pmd->change_seq, &seq);
> > if (seq != port_seq) {
> > diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
> > index 0ff508e..7462579 100644
> > --- a/lib/ovs-rcu.c
> > +++ b/lib/ovs-rcu.c
> > @@ -15,6 +15,7 @@
> > */
> >
> > #include <config.h>
> > +#include <errno.h>
> > #include "ovs-rcu.h"
> > #include "fatal-signal.h"
> > #include "guarded-list.h"
> > @@ -57,6 +58,7 @@ static struct guarded_list flushed_cbsets;
> > static struct seq *flushed_cbsets_seq;
> >
> > static void ovsrcu_init_module(void);
> > +static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool);
> > static void ovsrcu_flush_cbset(struct ovsrcu_perthread *);
> > static void ovsrcu_unregister__(struct ovsrcu_perthread *);
> > static bool ovsrcu_call_postponed(void);
> > @@ -151,6 +153,27 @@ ovsrcu_quiesce(void)
> > ovsrcu_quiesced();
> > }
> >
> > +int
> > +ovsrcu_try_quiesce(void)
> > +{
> > + struct ovsrcu_perthread *perthread;
> > + int ret = -EBUSY;
> > +
> > + ovs_assert(!single_threaded());
> > + perthread = ovsrcu_perthread_get();
> > + if (!seq_try_lock()) {
> > + perthread->seqno = seq_read_protected(global_seqno);
> > + if (perthread->cbset) {
> > + ovsrcu_flush_cbset__(perthread, true);
> > + }
> > + seq_change_protected(global_seqno);
> > + seq_unlock();
> > + ovsrcu_quiesced();
> > + ret = 0;
> > + }
> > + return ret;
> > +}
> > +
> > bool
> > ovsrcu_is_quiescent(void)
> > {
> > @@ -292,7 +315,7 @@ ovsrcu_postpone_thread(void *arg OVS_UNUSED)
> > }
> >
> > static void
> > -ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)
> > +ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool
> protected)
> > {
> > struct ovsrcu_cbset *cbset = perthread->cbset;
> >
> > @@ -300,11 +323,21 @@ ovsrcu_flush_cbset(struct ovsrcu_perthread
> > *perthread)
> > guarded_list_push_back(&flushed_cbsets, &cbset->list_node,
> > SIZE_MAX);
> > perthread->cbset = NULL;
> >
> > - seq_change(flushed_cbsets_seq);
> > + if (protected) {
> > + seq_change_protected(flushed_cbsets_seq);
> > + } else {
> > + seq_change(flushed_cbsets_seq);
> > + }
> > }
> > }
> >
> > static void
> > +ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)
> > +{
> > + ovsrcu_flush_cbset__(perthread, false);
> > +}
> > +
> > +static void
> > ovsrcu_unregister__(struct ovsrcu_perthread *perthread)
> > {
> > if (perthread->cbset) {
> > diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h
> > index 8750ead..dc75749 100644
> > --- a/lib/ovs-rcu.h
> > +++ b/lib/ovs-rcu.h
> > @@ -217,6 +217,7 @@ void ovsrcu_postpone__(void (*function)(void
> *aux),
> > void *aux);
> > void ovsrcu_quiesce_start(void);
> > void ovsrcu_quiesce_end(void);
> > void ovsrcu_quiesce(void);
> > +int ovsrcu_try_quiesce(void);
> > bool ovsrcu_is_quiescent(void);
> >
> > /* Synchronization. Waits for all non-quiescent threads to quiesce at
> > least
> > diff --git a/lib/seq.c b/lib/seq.c
> > index 57e0775..4e99c6c 100644
> > --- a/lib/seq.c
> > +++ b/lib/seq.c
> > @@ -100,6 +100,38 @@ seq_destroy(struct seq *seq)
> > ovs_mutex_unlock(&seq_mutex);
> > }
> >
> > +int
> > +seq_try_lock(void)
> > +{
> > + return ovs_mutex_trylock(&seq_mutex);
> > +}
> > +
> > +void
> > +seq_lock(void)
> > + OVS_ACQUIRES(seq_mutex)
> > +{
> > + ovs_mutex_lock(&seq_mutex);
> > +}
> > +
> > +void
> > +seq_unlock(void)
> > + OVS_RELEASES(seq_mutex)
> > +{
> > + ovs_mutex_unlock(&seq_mutex);
> > +}
> > +
> > +/* Increments 'seq''s sequence number, waking up any threads that are
> > waiting
> > + * on 'seq'. */
> > +void
> > +seq_change_protected(struct seq *seq)
> > + OVS_REQUIRES(seq_mutex)
> > +{
> > + COVERAGE_INC(seq_change);
> > +
> > + seq->value = seq_next++;
> > + seq_wake_waiters(seq);
> > +}
> > +
> > /* Increments 'seq''s sequence number, waking up any threads that are
> > waiting
> > * on 'seq'. */
> > void
> > @@ -109,8 +141,7 @@ seq_change(struct seq *seq)
> > COVERAGE_INC(seq_change);
> >
> > ovs_mutex_lock(&seq_mutex);
> > - seq->value = seq_next++;
> > - seq_wake_waiters(seq);
> > + seq_change_protected(seq);
> > ovs_mutex_unlock(&seq_mutex);
> > }
> >
> > @@ -120,13 +151,25 @@ seq_change(struct seq *seq)
> > * when an object changes, even without an ability to lock the object.
> > See
> > * Usage in seq.h for details. */
> > uint64_t
> > +seq_read_protected(const struct seq *seq)
> > + OVS_REQUIRES(seq_mutex)
> > +{
> > + return seq->value;
> > +}
> > +
> > +/* Returns 'seq''s current sequence number (which could change
> > immediately).
> > + *
> > + * seq_read() and seq_wait() can be used together to yield a race-free
> > wakeup
> > + * when an object changes, even without an ability to lock the object.
> > See
> > + * Usage in seq.h for details. */
> > +uint64_t
> > seq_read(const struct seq *seq)
> > OVS_EXCLUDED(seq_mutex)
> > {
> > uint64_t value;
> >
> > ovs_mutex_lock(&seq_mutex);
> > - value = seq->value;
> > + value = seq_read_protected(seq);
> > ovs_mutex_unlock(&seq_mutex);
> >
> > return value;
> > diff --git a/lib/seq.h b/lib/seq.h
> > index f3f4b53..221ab9a 100644
> > --- a/lib/seq.h
> > +++ b/lib/seq.h
> > @@ -121,9 +121,14 @@
> > struct seq *seq_create(void);
> > void seq_destroy(struct seq *);
> > void seq_change(struct seq *);
> > +void seq_change_protected(struct seq *);
> > +void seq_lock(void);
> > +int seq_try_lock(void);
> > +void seq_unlock(void);
> >
> > /* For observers. */
> > uint64_t seq_read(const struct seq *);
> > +uint64_t seq_read_protected(const struct seq *);
> >
> > void seq_wait_at(const struct seq *, uint64_t value, const char *where);
> > #define seq_wait(seq, value) seq_wait_at(seq, value,
> OVS_SOURCE_LOCATOR)
> > --
> > 2.5.5
> >
> >
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list