[ovs-dev] [PATCH 1/5] dpif-netdev: Rework rxq scheduling code.

Pai G, Sunil sunil.pai.g at intel.com
Tue Jun 22 19:02:12 UTC 2021


Hey Kevin , 

Patch looks good to me:

Builds fine , all test cases here http://patchwork.ozlabs.org/project/openvswitch/patch/20210316154532.127858-1-ktraynor@redhat.com/ pass as well.

Some minor nits inline :

<snipped>
 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 650e67ab3..57d23e112
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -5006,4 +5006,211 @@ rr_numa_list_destroy(struct rr_numa_list *rr)  }
> 
> +struct sched_numa_list {
> +    struct hmap numas;  /* Contains 'struct sched_numa'. */ };
> +
> +/* Meta data for out-of-place pmd rxq assignments. */ struct sched_pmd
> +{
> +    /* Associated PMD thread. */
> +    struct dp_netdev_pmd_thread *pmd;
> +    uint64_t pmd_proc_cycles;

Is there a purpose to store pmd_proc_cycles? just curious.

> +    struct dp_netdev_rxq **rxqs;
> +    unsigned n_rxq;
> +    bool isolated;
> +};
> +
> +struct sched_numa {
> +    struct hmap_node node;
> +    int numa_id;
> +    /* PMDs on numa node. */
> +    struct sched_pmd *pmds;
> +    /* Num of PMDs on numa node. */
> +    unsigned n_pmds;
> +    /* Num of isolated PMDs on numa node. */
> +    unsigned n_iso;

iso is a bit cryptic , something like n_isolated is better ?


> +    int rr_cur_index;
> +    bool rr_idx_inc;
> +};
> +

<snipped>

> +static unsigned
> +sched_get_numa_pmd_noniso(struct sched_numa *numa) {
> +    if (numa->n_pmds > numa->n_iso) {
> +        return numa->n_pmds - numa->n_iso;
> +    }
> +    return 0;
> +}
> +
>  /* Sort Rx Queues by the processing cycles they are consuming. */  static int
> @@ -5037,22 +5244,106 @@ compare_rxq_cycles(const void *a, const void
> *b)  }
> 
> -/* Assign pmds to queues.  If 'pinned' is true, assign pmds to pinned
> - * queues and marks the pmds as isolated.  Otherwise, assign non isolated
> - * pmds to unpinned queues.
> +/*
> + * Returns the next pmd from the numa node.
>   *
> - * The function doesn't touch the pmd threads, it just stores the assignment
> - * in the 'pmd' member of each rxq. */
> + * If 'updown' is 'true' it will alternate between selecting the next
> +pmd in
> + * either an up or down walk, switching between up/down when the first
> +or last
> + * core is reached. e.g. 1,2,3,3,2,1,1,2...
> + *
> + * If 'updown' is 'false' it will select the next pmd wrapping around
> +when
> + * last core reached. e.g. 1,2,3,1,2,3,1,2...
> + */
> +static struct sched_pmd *
> +get_rr_pmd(struct sched_numa *numa, bool updown) {
> +    int numa_idx = numa->rr_cur_index;
> +
> +    if (numa->rr_idx_inc == true) {
> +        /* Incrementing through list of pmds. */
> +        if (numa->rr_cur_index == numa->n_pmds - 1) {
> +            /* Reached the last pmd. */
> +            if (updown) {
> +                numa->rr_idx_inc = false;
> +            } else {
> +                numa->rr_cur_index = 0;
> +            }
> +        } else {
> +            numa->rr_cur_index++;
> +        }
> +    } else {
> +        /* Decrementing through list of pmds. */
> +        if (numa->rr_cur_index == 0) {
> +            /* Reached the first pmd. */
> +            numa->rr_idx_inc = true;
> +        } else {
> +            numa->rr_cur_index--;
> +        }
> +    }
> +    return &numa->pmds[numa_idx];
> +}
> +
> +static struct sched_pmd *
> +get_available_rr_pmd(struct sched_numa *numa, bool updown) {
> +    struct sched_pmd *sched_pmd = NULL;
> +
> +    /* get_rr_pmd() may return duplicate PMDs before all PMDs have been
> +     * returned depending on updown. Extend the number of call to ensure
> all
> +     * PMDs can be checked. */
> +    for (unsigned i = 0; i < numa->n_pmds * 2; i++) {
> +        sched_pmd = get_rr_pmd(numa, updown);
> +        if (!sched_pmd->isolated) {
> +            break;
> +        }
> +        sched_pmd = NULL;
> +    }
> +    return sched_pmd;
> +}
> +
> +static struct sched_pmd *
> +get_next_pmd(struct sched_numa *numa, bool algo) {
> +    return get_available_rr_pmd(numa, algo); }

just checking : 
if algo is cycles based : updown = true , else use roundrobin ?

<snipped>

> +
>  static void
> -rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp-
> >port_mutex)
> +sched_numa_list_schedule(struct sched_numa_list *numa_list,
> +                         struct dp_netdev *dp,
> +                         bool algo,

algo could be renamed to something like is_rxq_cyc/is_round_robin ? a bit verbose , but algo seems a bit generic.
Seems like its used in the entire patch.

> +                         enum vlog_level level)
> +    OVS_REQUIRES(dp->port_mutex)

<snipped>


More information about the dev mailing list