[ovs-dev] [RFC 2/5] dpif-netdev: Trigger parallel pmd reloads.

Wed May 8 07:17:37 UTC 2019


On 7 May 2019, at 16:00, Ilya Maximets wrote:

> On 07.05.2019 16:46, Eelco Chaudron wrote:
>>
>>
>> On 30 Apr 2019, at 14:17, David Marchand wrote:
>>
>>> pmd reloads are currently serialised in each steps calling
>>> reload_affected_pmds.
>>> Any pmd processing packets, waiting on a mutex etc... will make 
>>> other
>>> pmd threads wait for a delay that can be undeterministic when 
>>> syscalls
>>> adds up.
>>>
>>> Switch to a little busy loop on the control thread using an atomic
>>> count.
>>>
>>> Signed-off-by: David Marchand <david.marchand at redhat.com>
>>> ---
>>>  lib/dpif-netdev.c | 43 ++++++++++++++++++++++++++++++-------------
>>>  1 file changed, 30 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>> index 30774ed..b2b21fd 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -335,6 +335,9 @@ struct dp_netdev {
>>>      /* The time that a packet can wait in output batch for 
>>> sending. */
>>>      atomic_uint32_t tx_flush_interval;
>>>
>>> +    /* Count of pmds currently reloading */
>>> +    atomic_count reloading_pmds;
>>> +
>>>      /* Meters. */
>>>      struct ovs_mutex meter_locks[N_METER_LOCKS];
>>>      struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
>>> @@ -647,9 +650,6 @@ struct dp_netdev_pmd_thread {
>>>      struct ovs_refcount ref_cnt;    /* Every reference must 
>>> be refcount'ed. */
>>>      struct cmap_node node;          /* In 
>>> 'dp->poll_threads'. */
>>>
>>> -    pthread_cond_t cond;            /* For 
>>> synchronizing pmd thread reload. */
>>> -    struct ovs_mutex cond_mutex;    /* Mutex for condition 
>>> variable. */
>>> -
>>>      /* Per thread exact-match cache.  Note, the instance for 
>>> cpu core
>>>       * NON_PMD_CORE_ID can be accessed by multiple threads, 
>>> and thusly
>>>       * need to be protected by 'non_pmd_mutex'.  Every other 
>>> instance
>>> @@ -1525,6 +1525,8 @@ create_dp_netdev(const char *name, const 
>>> struct dpif_class *class,
>>>      atomic_init(&dp->emc_insert_min, 
>>> DEFAULT_EM_FLOW_INSERT_MIN);
>>>      atomic_init(&dp->tx_flush_interval, 
>>> DEFAULT_TX_FLUSH_INTERVAL);
>>>
>>> +    atomic_count_init(&dp->reloading_pmds, 0);
>>> +
>>>      cmap_init(&dp->poll_threads);
>>>      dp->pmd_rxq_assign_cyc = true;
>>>
>>> @@ -1754,11 +1756,8 @@ dp_netdev_reload_pmd__(struct 
>>> dp_netdev_pmd_thread *pmd)
>>>          return;
>>>      }
>>>
>>> -    ovs_mutex_lock(&pmd->cond_mutex);
>>>      seq_change(pmd->reload_seq);
>>>      atomic_store_relaxed(&pmd->reload, true);
>>> -    ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex);
>>> -    ovs_mutex_unlock(&pmd->cond_mutex);
>>>  }
>>>
>>>  static uint32_t
>>> @@ -4641,9 +4640,27 @@ rxq_scheduling(struct dp_netdev *dp, bool 
>>> pinned) OVS_REQUIRES(dp->port_mutex)
>>>  }
>>>
>>>  static void
>>> +wait_reloading_pmds(struct dp_netdev *dp)
>>> +{
>>> +    while (atomic_count_get(&dp->reloading_pmds) != 0) {
>>> +    }
>>> +}
>>> +
>>> +static void
>>>  reload_affected_pmds(struct dp_netdev *dp)
>>>  {
>>>      struct dp_netdev_pmd_thread *pmd;
>>> +    unsigned int pmd_count = 0;
>>> +
>>> +    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>>> +        if (pmd->core_id == NON_PMD_CORE_ID) {
>>> +            continue;
>>> +        }
>>> +        if (pmd->need_reload) {
>>> +            pmd_count++;
>>> +        }
>>> +    }
>>> +    atomic_count_set(&dp->reloading_pmds, pmd_count);
>>>      CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>>>          if (pmd->need_reload) {
>>> @@ -4652,6 +4669,10 @@ reload_affected_pmds(struct dp_netdev *dp)
>>
>> The above atomic_count_set() is a relaxed set, so is the 
>> atomic_store_relaxed(&pmd->reload, true) in 
>> dp_netdev_reload_pmd__(pmd).
>> Which could lead to the PMDs decreasing the dp->reloading_pmd before 
>> it’s set. Guess if the correct memory_order is selected for 
>> pmd->reload as suggested by Ilya in patch 1/5 it should work.
>
> relaxed memory ordering does not imply any synchronization and will
> not be synchronized with 'reload' regardless of its memory ordering.
> To guarantee the order, atomic_count_set() must be replaced with
> non-relaxed version along with 'reload'.

Thanks for the additional info, as it made me wonder why… But then I 
found this, https://lwn.net/Articles/586838/, telling me a 
memory_order_release is not the same as the Linux implementation with a 
full read/write barrier :(