[ovs-dev] [PATCH 2/5] dpif-netdev: Make PMD auto load balance use common rxq scheduling.

David Marchand david.marchand at redhat.com
Thu Jun 24 14:30:54 UTC 2021


On Fri, Jun 4, 2021 at 11:19 PM Kevin Traynor <ktraynor at redhat.com> wrote:
>
> PMD auto load balance had its own separate implementation of the
> rxq scheduling that it used for dry runs. This was done because
> previously the rxq scheduling was not reusable for a dry run.
>
> Apart from the code duplication (which is a good enough reason
> to replace it alone) this meant that if any further rxq scheduling
> changes or assignment types were added they would also have to be
> duplicated in the auto load balance code too.
>
> This patch replaces the current PMD auto load balance rxq scheduling
> code to reuse the common rxq scheduling code.
>
> The behaviour does not change from a user perspective, except the logs
> are updated to be more consistent.
>
> As the dry run will compare the pmd load variances for current and
> estimated assignments, new functions are added to populate the current
> assignments and calculate variance on the rxq scheduling data structs.
>
> Now that the new rxq scheduling data structures are being used in
> PMD auto load balance, the older rr_* data structs and associated
> functions can be removed.
>
> Signed-off-by: Kevin Traynor <ktraynor at redhat.com>
> ---
>  lib/dpif-netdev.c | 511 +++++++++++++++-------------------------------
>  1 file changed, 164 insertions(+), 347 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 57d23e112..eaa4e9733 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -4872,138 +4872,4 @@ port_reconfigure(struct dp_netdev_port *port)
>  }
>
> -struct rr_numa_list {
> -    struct hmap numas;  /* Contains 'struct rr_numa' */
> -};
> -
> -struct rr_numa {
> -    struct hmap_node node;
> -
> -    int numa_id;
> -
> -    /* Non isolated pmds on numa node 'numa_id' */
> -    struct dp_netdev_pmd_thread **pmds;
> -    int n_pmds;
> -
> -    int cur_index;
> -    bool idx_inc;
> -};
> -
> -static size_t
> -rr_numa_list_count(struct rr_numa_list *rr)
> -{
> -    return hmap_count(&rr->numas);
> -}
> -
> -static struct rr_numa *
> -rr_numa_list_lookup(struct rr_numa_list *rr, int numa_id)
> -{
> -    struct rr_numa *numa;
> -
> -    HMAP_FOR_EACH_WITH_HASH (numa, node, hash_int(numa_id, 0), &rr->numas) {
> -        if (numa->numa_id == numa_id) {
> -            return numa;
> -        }
> -    }
> -
> -    return NULL;
> -}
> -
> -/* Returns the next node in numa list following 'numa' in round-robin fashion.
> - * Returns first node if 'numa' is a null pointer or the last node in 'rr'.
> - * Returns NULL if 'rr' numa list is empty. */
> -static struct rr_numa *
> -rr_numa_list_next(struct rr_numa_list *rr, const struct rr_numa *numa)
> -{
> -    struct hmap_node *node = NULL;
> -
> -    if (numa) {
> -        node = hmap_next(&rr->numas, &numa->node);
> -    }
> -    if (!node) {
> -        node = hmap_first(&rr->numas);
> -    }
> -
> -    return (node) ? CONTAINER_OF(node, struct rr_numa, node) : NULL;
> -}
> -
> -static void
> -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
> -{
> -    struct dp_netdev_pmd_thread *pmd;
> -    struct rr_numa *numa;
> -
> -    hmap_init(&rr->numas);
> -
> -    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> -        if (pmd->core_id == NON_PMD_CORE_ID || pmd->isolated) {
> -            continue;
> -        }
> -
> -        numa = rr_numa_list_lookup(rr, pmd->numa_id);
> -        if (!numa) {
> -            numa = xzalloc(sizeof *numa);
> -            numa->numa_id = pmd->numa_id;
> -            hmap_insert(&rr->numas, &numa->node, hash_int(pmd->numa_id, 0));
> -        }
> -        numa->n_pmds++;
> -        numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa->pmds);
> -        numa->pmds[numa->n_pmds - 1] = pmd;
> -        /* At least one pmd so initialise curr_idx and idx_inc. */
> -        numa->cur_index = 0;
> -        numa->idx_inc = true;
> -    }
> -}
> -
> -/*
> - * Returns the next pmd from the numa node.
> - *
> - * If 'updown' is 'true' it will alternate between selecting the next pmd in
> - * either an up or down walk, switching between up/down when the first or last
> - * core is reached. e.g. 1,2,3,3,2,1,1,2...
> - *
> - * If 'updown' is 'false' it will select the next pmd wrapping around when last
> - * core reached. e.g. 1,2,3,1,2,3,1,2...
> - */
> -static struct dp_netdev_pmd_thread *
> -rr_numa_get_pmd(struct rr_numa *numa, bool updown)
> -{
> -    int numa_idx = numa->cur_index;
> -
> -    if (numa->idx_inc == true) {
> -        /* Incrementing through list of pmds. */
> -        if (numa->cur_index == numa->n_pmds-1) {
> -            /* Reached the last pmd. */
> -            if (updown) {
> -                numa->idx_inc = false;
> -            } else {
> -                numa->cur_index = 0;
> -            }
> -        } else {
> -            numa->cur_index++;
> -        }
> -    } else {
> -        /* Decrementing through list of pmds. */
> -        if (numa->cur_index == 0) {
> -            /* Reached the first pmd. */
> -            numa->idx_inc = true;
> -        } else {
> -            numa->cur_index--;
> -        }
> -    }
> -    return numa->pmds[numa_idx];
> -}
> -
> -static void
> -rr_numa_list_destroy(struct rr_numa_list *rr)
> -{
> -    struct rr_numa *numa;
> -
> -    HMAP_FOR_EACH_POP (numa, node, &rr->numas) {
> -        free(numa->pmds);
> -        free(numa);
> -    }
> -    hmap_destroy(&rr->numas);
> -}
> -
>  struct sched_numa_list {
>      struct hmap numas;  /* Contains 'struct sched_numa'. */
> @@ -5033,4 +4899,6 @@ struct sched_numa {
>  };
>
> +static uint64_t variance(uint64_t a[], int n);

Nit: this fwd declaration can be moved right before
sched_numa_list_variance() which is the only function that uses it.
Or variance() itself could be moved.


> +
>  static size_t
>  sched_numa_list_count(struct sched_numa_list *numa_list)
> @@ -5181,4 +5049,36 @@ sched_add_rxq_to_sched_pmd(struct sched_pmd *sched_pmd,
>  }
>
> +static void
> +sched_numa_list_assignments(struct sched_numa_list *numa_list,
> +                                     struct dp_netdev *dp)

Indent of second line.
+
Don't we need a OVS_REQUIRES(dp->port_mutex) annotation?



> +{
> +    struct dp_netdev_port *port;
> +
> +    /* For each port. */
> +    HMAP_FOR_EACH (port, node, &dp->ports) {
> +        if (!netdev_is_pmd(port->netdev)) {
> +            continue;
> +        }

[snip]


-- 
David Marchand



More information about the dev mailing list