[ovs-dev] [PATCH v3 1/2] dpif-netdev: Rework of rx queue management.

Ilya Maximets i.maximets at samsung.com
Tue Jan 26 05:10:31 UTC 2016


Thanks for review.

I'll prepare new version.

Best regards, Ilya Maximets.

On 26.01.2016 03:21, Daniele Di Proietto wrote:
> Hi,
> 
> Thanks again for the patch,
> 
> As I said in the discussion of v2, I think we should keep the
> condition variable, so that we can return from the datapath
> functions when the port operations are effectively completed.
> 
> I also have a couple of minor coding style suggestions (inline).
> 
> 
> Other than this the patch looks good to me
> 
> Thanks,
> 
> Daniele
> 
> 
> On 25/01/2016 07:00, "Ilya Maximets" <i.maximets at samsung.com> wrote:
> 
>> Current rx queue management model is buggy and will not work properly
>> without additional barriers and other syncronization between PMD
>> threads and main thread.
>>
>> Known BUGS of current model:
>> 	* While reloading, two PMD threads, one already reloaded and
>> 	  one not yet reloaded, can poll same queue of the same port.
>> 	  This behavior may lead to dpdk driver failure, because they
>> 	  are not thread-safe.
>> 	* Same bug as fixed in commit e4e74c3a2b
>> 	  ("dpif-netdev: Purge all ukeys when reconfigure pmd.") but
>> 	  reproduced while only reconfiguring of pmd threads without
>> 	  restarting, because addition may change the sequence of
>> 	  other ports, which is important in time of reconfiguration.
>>
>> Introducing the new model, where distribution of queues made by main
>> thread with minimal synchronizations and without data races between
>> pmd threads. Also, this model should work faster, because only
>> needed threads will be interrupted for reconfiguraition and total
>> computational complexity of reconfiguration is less.
>>
>> Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
>> ---
>> lib/dpif-netdev.c | 245
>> ++++++++++++++++++++++++++++++++----------------------
>> 1 file changed, 147 insertions(+), 98 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index cd72e62..f055c6e 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -372,6 +372,13 @@ struct dp_netdev_pmd_cycles {
>>     atomic_ullong n[PMD_N_CYCLES];
>> };
>>
>> +/* Contained by struct dp_netdev_pmd_thread's 'poll_list' member. */
>> +struct rxq_poll {
>> +    struct dp_netdev_port *port;
>> +    struct netdev_rxq *rx;
>> +    struct ovs_list node;
>> +};
>> +
>> /* PMD: Poll modes drivers.  PMD accesses devices via polling to
>> eliminate
>>  * the performance overhead of interrupt processing.  Therefore netdev
>> can
>>  * not implement rx-wait for these devices.  dpif-netdev needs to poll
>> @@ -393,9 +400,6 @@ struct dp_netdev_pmd_thread {
>>     struct ovs_refcount ref_cnt;    /* Every reference must be
>> refcount'ed. */
>>     struct cmap_node node;          /* In 'dp->poll_threads'. */
>>
>> -    pthread_cond_t cond;            /* For synchronizing pmd thread
>> reload. */
>> -    struct ovs_mutex cond_mutex;    /* Mutex for condition variable. */
>> -
>>     /* Per thread exact-match cache.  Note, the instance for cpu core
>>      * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
>>      * need to be protected (e.g. by 'dp_netdev_mutex').  All other
>> @@ -430,6 +434,11 @@ struct dp_netdev_pmd_thread {
>>     int tx_qid;                     /* Queue id used by this pmd thread
>> to
>>                                      * send packets on all netdevs */
>>
>> +    struct ovs_mutex poll_mutex;    /* Mutex for poll_list. */
>> +    /* List of rx queues to poll. */
>> +    struct ovs_list poll_list OVS_GUARDED;
>> +    int poll_cnt;                   /* Number of elemints in poll_list.
>> */
>> +
>>     /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>>      * The main thread keeps 'stats_zero' and 'cycles_zero' as base
>>      * values and subtracts them from 'stats' and 'cycles' before
>> @@ -469,7 +478,6 @@ static void dp_netdev_input(struct
>> dp_netdev_pmd_thread *,
>>                             struct dp_packet **, int cnt);
>>
>> static void dp_netdev_disable_upcall(struct dp_netdev *);
>> -void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
> 
> Assuming we're keeping this function, I think it should be made static.
> This wasn't introduced by this patch, but it would be nice to fix it
> anyway.
> 
> 
>> static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
>>                                     struct dp_netdev *dp, int index,
>>                                     unsigned core_id, int numa_id);
>> @@ -482,6 +490,11 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct
>> cmap_position *pos);
>> static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
>> static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int
>> numa_id);
>> static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int
>> numa_id);
>> +static void
>> +dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
>> +                         struct dp_netdev_port *port, struct netdev_rxq
>> *rx);
>> +static struct dp_netdev_pmd_thread *
>> +dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id);
>> static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp);
>> static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
>> static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
>> @@ -1019,22 +1032,7 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread
>> *pmd)
>>         return;
>>     }
>>
>> -    ovs_mutex_lock(&pmd->cond_mutex);
>>     atomic_add_relaxed(&pmd->change_seq, 1, &old_seq);
>> -    ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex);
>> -    ovs_mutex_unlock(&pmd->cond_mutex);
>> -}
>> -
>> -/* Causes all pmd threads to reload its tx/rx devices.
>> - * Must be called after adding/removing ports. */
>> -static void
>> -dp_netdev_reload_pmds(struct dp_netdev *dp)
>> -{
>> -    struct dp_netdev_pmd_thread *pmd;
>> -
>> -    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>> -        dp_netdev_reload_pmd__(pmd);
>> -    }
>> }
>>
>> static uint32_t
>> @@ -1128,8 +1126,26 @@ do_add_port(struct dp_netdev *dp, const char
>> *devname, const char *type,
>>     cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
>>
>>     if (netdev_is_pmd(netdev)) {
>> -        dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev));
>> -        dp_netdev_reload_pmds(dp);
>> +        int numa_id = netdev_get_numa_id(netdev);
>> +        struct dp_netdev_pmd_thread *pmd;
>> +
>> +        /* Cannot create pmd threads for invalid numa node. */
>> +        ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
>> +
>> +        for (i = 0; i < netdev_n_rxq(netdev); i++) {
>> +            pmd = dp_netdev_less_loaded_pmd_on_numa(dp, numa_id);
>> +            if (!pmd) {
>> +                /* There is no pmd threads on this numa node. */
>> +                dp_netdev_set_pmds_on_numa(dp, numa_id);
>> +                /* Assigning of rx queues done. */
>> +                break;
>> +            }
>> +
>> +            ovs_mutex_lock(&pmd->poll_mutex);
>> +            dp_netdev_add_rxq_to_pmd(pmd, port, port->rxq[i]);
>> +            ovs_mutex_unlock(&pmd->poll_mutex);
>> +            dp_netdev_reload_pmd__(pmd);
>> +        }
>>     }
>>     seq_change(dp->port_seq);
>>
>> @@ -1226,16 +1242,6 @@ port_ref(struct dp_netdev_port *port)
>>     }
>> }
>>
>> -static bool
>> -port_try_ref(struct dp_netdev_port *port)
>> -{
>> -    if (port) {
>> -        return ovs_refcount_try_ref_rcu(&port->ref_cnt);
>> -    }
>> -
>> -    return false;
>> -}
>> -
>> static void
>> port_unref(struct dp_netdev_port *port)
>> {
>> @@ -1313,12 +1319,38 @@ do_del_port(struct dp_netdev *dp, struct
>> dp_netdev_port *port)
>>     if (netdev_is_pmd(port->netdev)) {
>>         int numa_id = netdev_get_numa_id(port->netdev);
>>
>> +        /* PMD threads can not be on invalid numa node. */
>> +        ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
>>         /* If there is no netdev on the numa node, deletes the pmd
>> threads
>> -         * for that numa.  Else, just reloads the queues.  */
>> +         * for that numa.  Else, deletes the queues from polling lists.
>> */
>>         if (!has_pmd_port_for_numa(dp, numa_id)) {
>>             dp_netdev_del_pmds_on_numa(dp, numa_id);
>>         }
>> -        dp_netdev_reload_pmds(dp);
>> +        else {
> 
> This should be
> 
> } else {
> 
> on the same line
> 
> 
>> +            struct dp_netdev_pmd_thread *pmd;
>> +            struct rxq_poll *poll, *next;
>> +
>> +            CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>> +                if (pmd->numa_id == numa_id) {
>> +                    bool found = false;
>> +
>> +                    ovs_mutex_lock(&pmd->poll_mutex);
>> +                    LIST_FOR_EACH_SAFE (poll, next, node,
>> &pmd->poll_list) {
>> +                        if (poll->port == port) {
>> +                            found = true;
>> +                            port_unref(poll->port);
>> +                            list_remove(&poll->node);
>> +                            pmd->poll_cnt--;
>> +                            free(poll);
>> +                        }
>> +                    }
>> +                    ovs_mutex_unlock(&pmd->poll_mutex);
>> +                    if (found) {
>> +                        dp_netdev_reload_pmd__(pmd);
>> +                    }
>> +                }
>> +            }
>> +        }
>>     }
>>
>>     port_unref(port);
> 
> 
> 



More information about the dev mailing list