[ovs-dev] [PATCH v3 1/2] dpif-netdev: Rework of rx queue management.
Ilya Maximets
i.maximets at samsung.com
Tue Jan 26 05:10:31 UTC 2016
Thanks for review.
I'll prepare new version.
Best regards, Ilya Maximets.
On 26.01.2016 03:21, Daniele Di Proietto wrote:
> Hi,
>
> Thanks again for the patch,
>
> As I said in the discussion of v2, I think we should keep the
> condition variable, so that we can return from the datapath
> functions when the port operations are effectively completed.
>
> I also have a couple of minor coding style suggestions (inline).
>
>
> Other than this the patch looks good to me
>
> Thanks,
>
> Daniele
>
>
> On 25/01/2016 07:00, "Ilya Maximets" <i.maximets at samsung.com> wrote:
>
>> Current rx queue management model is buggy and will not work properly
>> without additional barriers and other syncronization between PMD
>> threads and main thread.
>>
>> Known BUGS of current model:
>> * While reloading, two PMD threads, one already reloaded and
>> one not yet reloaded, can poll same queue of the same port.
>> This behavior may lead to dpdk driver failure, because they
>> are not thread-safe.
>> * Same bug as fixed in commit e4e74c3a2b
>> ("dpif-netdev: Purge all ukeys when reconfigure pmd.") but
>> reproduced while only reconfiguring of pmd threads without
>> restarting, because addition may change the sequence of
>> other ports, which is important in time of reconfiguration.
>>
>> Introducing the new model, where distribution of queues made by main
>> thread with minimal synchronizations and without data races between
>> pmd threads. Also, this model should work faster, because only
>> needed threads will be interrupted for reconfiguraition and total
>> computational complexity of reconfiguration is less.
>>
>> Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
>> ---
>> lib/dpif-netdev.c | 245
>> ++++++++++++++++++++++++++++++++----------------------
>> 1 file changed, 147 insertions(+), 98 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index cd72e62..f055c6e 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -372,6 +372,13 @@ struct dp_netdev_pmd_cycles {
>> atomic_ullong n[PMD_N_CYCLES];
>> };
>>
>> +/* Contained by struct dp_netdev_pmd_thread's 'poll_list' member. */
>> +struct rxq_poll {
>> + struct dp_netdev_port *port;
>> + struct netdev_rxq *rx;
>> + struct ovs_list node;
>> +};
>> +
>> /* PMD: Poll modes drivers. PMD accesses devices via polling to
>> eliminate
>> * the performance overhead of interrupt processing. Therefore netdev
>> can
>> * not implement rx-wait for these devices. dpif-netdev needs to poll
>> @@ -393,9 +400,6 @@ struct dp_netdev_pmd_thread {
>> struct ovs_refcount ref_cnt; /* Every reference must be
>> refcount'ed. */
>> struct cmap_node node; /* In 'dp->poll_threads'. */
>>
>> - pthread_cond_t cond; /* For synchronizing pmd thread
>> reload. */
>> - struct ovs_mutex cond_mutex; /* Mutex for condition variable. */
>> -
>> /* Per thread exact-match cache. Note, the instance for cpu core
>> * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
>> * need to be protected (e.g. by 'dp_netdev_mutex'). All other
>> @@ -430,6 +434,11 @@ struct dp_netdev_pmd_thread {
>> int tx_qid; /* Queue id used by this pmd thread
>> to
>> * send packets on all netdevs */
>>
>> + struct ovs_mutex poll_mutex; /* Mutex for poll_list. */
>> + /* List of rx queues to poll. */
>> + struct ovs_list poll_list OVS_GUARDED;
>> + int poll_cnt; /* Number of elemints in poll_list.
>> */
>> +
>> /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>> * The main thread keeps 'stats_zero' and 'cycles_zero' as base
>> * values and subtracts them from 'stats' and 'cycles' before
>> @@ -469,7 +478,6 @@ static void dp_netdev_input(struct
>> dp_netdev_pmd_thread *,
>> struct dp_packet **, int cnt);
>>
>> static void dp_netdev_disable_upcall(struct dp_netdev *);
>> -void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
>
> Assuming we're keeping this function, I think it should be made static.
> This wasn't introduced by this patch, but it would be nice to fix it
> anyway.
>
>
>> static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
>> struct dp_netdev *dp, int index,
>> unsigned core_id, int numa_id);
>> @@ -482,6 +490,11 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct
>> cmap_position *pos);
>> static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
>> static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int
>> numa_id);
>> static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int
>> numa_id);
>> +static void
>> +dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
>> + struct dp_netdev_port *port, struct netdev_rxq
>> *rx);
>> +static struct dp_netdev_pmd_thread *
>> +dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id);
>> static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp);
>> static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
>> static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
>> @@ -1019,22 +1032,7 @@ dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread
>> *pmd)
>> return;
>> }
>>
>> - ovs_mutex_lock(&pmd->cond_mutex);
>> atomic_add_relaxed(&pmd->change_seq, 1, &old_seq);
>> - ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex);
>> - ovs_mutex_unlock(&pmd->cond_mutex);
>> -}
>> -
>> -/* Causes all pmd threads to reload its tx/rx devices.
>> - * Must be called after adding/removing ports. */
>> -static void
>> -dp_netdev_reload_pmds(struct dp_netdev *dp)
>> -{
>> - struct dp_netdev_pmd_thread *pmd;
>> -
>> - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>> - dp_netdev_reload_pmd__(pmd);
>> - }
>> }
>>
>> static uint32_t
>> @@ -1128,8 +1126,26 @@ do_add_port(struct dp_netdev *dp, const char
>> *devname, const char *type,
>> cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
>>
>> if (netdev_is_pmd(netdev)) {
>> - dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev));
>> - dp_netdev_reload_pmds(dp);
>> + int numa_id = netdev_get_numa_id(netdev);
>> + struct dp_netdev_pmd_thread *pmd;
>> +
>> + /* Cannot create pmd threads for invalid numa node. */
>> + ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
>> +
>> + for (i = 0; i < netdev_n_rxq(netdev); i++) {
>> + pmd = dp_netdev_less_loaded_pmd_on_numa(dp, numa_id);
>> + if (!pmd) {
>> + /* There is no pmd threads on this numa node. */
>> + dp_netdev_set_pmds_on_numa(dp, numa_id);
>> + /* Assigning of rx queues done. */
>> + break;
>> + }
>> +
>> + ovs_mutex_lock(&pmd->poll_mutex);
>> + dp_netdev_add_rxq_to_pmd(pmd, port, port->rxq[i]);
>> + ovs_mutex_unlock(&pmd->poll_mutex);
>> + dp_netdev_reload_pmd__(pmd);
>> + }
>> }
>> seq_change(dp->port_seq);
>>
>> @@ -1226,16 +1242,6 @@ port_ref(struct dp_netdev_port *port)
>> }
>> }
>>
>> -static bool
>> -port_try_ref(struct dp_netdev_port *port)
>> -{
>> - if (port) {
>> - return ovs_refcount_try_ref_rcu(&port->ref_cnt);
>> - }
>> -
>> - return false;
>> -}
>> -
>> static void
>> port_unref(struct dp_netdev_port *port)
>> {
>> @@ -1313,12 +1319,38 @@ do_del_port(struct dp_netdev *dp, struct
>> dp_netdev_port *port)
>> if (netdev_is_pmd(port->netdev)) {
>> int numa_id = netdev_get_numa_id(port->netdev);
>>
>> + /* PMD threads can not be on invalid numa node. */
>> + ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
>> /* If there is no netdev on the numa node, deletes the pmd
>> threads
>> - * for that numa. Else, just reloads the queues. */
>> + * for that numa. Else, deletes the queues from polling lists.
>> */
>> if (!has_pmd_port_for_numa(dp, numa_id)) {
>> dp_netdev_del_pmds_on_numa(dp, numa_id);
>> }
>> - dp_netdev_reload_pmds(dp);
>> + else {
>
> This should be
>
> } else {
>
> on the same line
>
>
>> + struct dp_netdev_pmd_thread *pmd;
>> + struct rxq_poll *poll, *next;
>> +
>> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>> + if (pmd->numa_id == numa_id) {
>> + bool found = false;
>> +
>> + ovs_mutex_lock(&pmd->poll_mutex);
>> + LIST_FOR_EACH_SAFE (poll, next, node,
>> &pmd->poll_list) {
>> + if (poll->port == port) {
>> + found = true;
>> + port_unref(poll->port);
>> + list_remove(&poll->node);
>> + pmd->poll_cnt--;
>> + free(poll);
>> + }
>> + }
>> + ovs_mutex_unlock(&pmd->poll_mutex);
>> + if (found) {
>> + dp_netdev_reload_pmd__(pmd);
>> + }
>> + }
>> + }
>> + }
>> }
>>
>> port_unref(port);
>
>
>
More information about the dev
mailing list