[ovs-dev] [PATCH v4 4/6] dpif-netdev: Change rxq_scheduling to use rxq processing cycles.
Greg Rose
gvrose8192 at gmail.com
Thu Aug 10 23:01:27 UTC 2017
On 08/09/2017 08:45 AM, Kevin Traynor wrote:
> Previously rxqs were assigned to pmds by round robin in
> port/queue order.
>
> Now that we have the processing cycles used for existing rxqs,
> use that information to try and produced a better balanced
> distribution of rxqs across pmds. i.e. given multiple pmds, the
> rxqs which have consumed the largest amount of processing cycles
> will be placed on different pmds.
>
> The rxqs are sorted by their processing cycles and assigned (in
> sorted order) round robin across pmds.
>
> Signed-off-by: Kevin Traynor <ktraynor at redhat.com>
> ---
> Documentation/howto/dpdk.rst | 7 +++
> lib/dpif-netdev.c | 105 ++++++++++++++++++++++++++++++-------------
> 2 files changed, 81 insertions(+), 31 deletions(-)
>
> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
> index d7f6610..44737e4 100644
> --- a/Documentation/howto/dpdk.rst
> +++ b/Documentation/howto/dpdk.rst
> @@ -119,4 +119,11 @@ After that PMD threads on cores where RX queues was pinned will become
> thread.
>
> +If pmd-rxq-affinity is not set for rxqs, they will be assigned to pmds (cores)
> +automatically. The processing cycles that have been required for each rxq
> +will be used where known to assign rxqs with the highest consumption of
> +processing cycles to different pmds.
> +
> +Rxq to pmds assignment takes place whenever there are configuration changes.
> +
> QoS
> ---
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index e344063..b4663ab 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3328,8 +3328,29 @@ rr_numa_list_destroy(struct rr_numa_list *rr)
> }
>
> +/* Sort Rx Queues by the processing cycles they are consuming. */
> +static int
> +rxq_cycle_sort(const void *a, const void *b)
> +{
> + struct dp_netdev_rxq * qa;
> + struct dp_netdev_rxq * qb;
> +
> + qa = *(struct dp_netdev_rxq **) a;
> + qb = *(struct dp_netdev_rxq **) b;
> +
> + if (dp_netdev_rxq_get_cycles(qa, RXQ_CYCLES_PROC_LAST) >=
> + dp_netdev_rxq_get_cycles(qb, RXQ_CYCLES_PROC_LAST)) {
> + return -1;
> + }
> +
> + return 1;
> +}
> +
> /* Assign pmds to queues. If 'pinned' is true, assign pmds to pinned
> * queues and marks the pmds as isolated. Otherwise, assign non isolated
> * pmds to unpinned queues.
> *
> + * If 'pinned' is false queues will be sorted by processing cycles they are
> + * consuming and then assigned to pmds in round robin order.
> + *
> * The function doesn't touch the pmd threads, it just stores the assignment
> * in the 'pmd' member of each rxq. */
> @@ -3340,18 +3361,14 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
> struct rr_numa_list rr;
> struct rr_numa *non_local_numa = NULL;
> -
> - rr_numa_list_populate(dp, &rr);
> + struct dp_netdev_rxq ** rxqs = NULL;
> + int i, n_rxqs = 0;
> + struct rr_numa *numa = NULL;
> + int numa_id;
>
> HMAP_FOR_EACH (port, node, &dp->ports) {
> - struct rr_numa *numa;
> - int numa_id;
> -
> if (!netdev_is_pmd(port->netdev)) {
> continue;
> }
>
> - numa_id = netdev_get_numa_id(port->netdev);
> - numa = rr_numa_list_lookup(&rr, numa_id);
> -
> for (int qid = 0; qid < port->n_rxq; qid++) {
> struct dp_netdev_rxq *q = &port->rxqs[qid];
> @@ -3371,34 +3388,60 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
> }
> } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
> - if (!numa) {
> - /* There are no pmds on the queue's local NUMA node.
> - Round-robin on the NUMA nodes that do have pmds. */
> - non_local_numa = rr_numa_list_next(&rr, non_local_numa);
> - if (!non_local_numa) {
> - VLOG_ERR("There is no available (non-isolated) pmd "
> - "thread for port \'%s\' queue %d. This queue "
> - "will not be polled. Is pmd-cpu-mask set to "
> - "zero? Or are all PMDs isolated to other "
> - "queues?", netdev_get_name(port->netdev),
> - qid);
> - continue;
> - }
> - q->pmd = rr_numa_get_pmd(non_local_numa);
> - VLOG_WARN("There's no available (non-isolated) pmd thread "
> - "on numa node %d. Queue %d on port \'%s\' will "
> - "be assigned to the pmd on core %d "
> - "(numa node %d). Expect reduced performance.",
> - numa_id, qid, netdev_get_name(port->netdev),
> - q->pmd->core_id, q->pmd->numa_id);
> + if (n_rxqs == 0) {
> + rxqs = xmalloc(sizeof *rxqs);
> } else {
> - /* Assign queue to the next (round-robin) PMD on it's local
> - NUMA node. */
> - q->pmd = rr_numa_get_pmd(numa);
> + rxqs = xrealloc(rxqs, sizeof *rxqs * (n_rxqs + 1));
> }
> + /* Store the queue. */
> + rxqs[n_rxqs++] = q;
> }
> }
> }
>
> + if (n_rxqs > 1) {
> + /* Sort the queues in order of the processing cycles
> + * they consumed during their last pmd interval. */
> + qsort(rxqs, n_rxqs, sizeof *rxqs, rxq_cycle_sort);
> + }
> +
> + rr_numa_list_populate(dp, &rr);
> + /* Assign the sorted queues to pmds in round robin. */
> + for (i = 0; i < n_rxqs; i++) {
> + numa_id = netdev_get_numa_id(rxqs[i]->port->netdev);
> + numa = rr_numa_list_lookup(&rr, numa_id);
> + if (!numa) {
> + /* There are no pmds on the queue's local NUMA node.
> + Round-robin on the NUMA nodes that do have pmds. */
> + non_local_numa = rr_numa_list_next(&rr, non_local_numa);
> + if (!non_local_numa) {
> + VLOG_ERR("There is no available (non-isolated) pmd "
> + "thread for port \'%s\' queue %d. This queue "
> + "will not be polled. Is pmd-cpu-mask set to "
> + "zero? Or are all PMDs isolated to other "
> + "queues?", netdev_rxq_get_name(rxqs[i]->rx),
> + netdev_rxq_get_queue_id(rxqs[i]->rx));
> + continue;
> + }
> + rxqs[i]->pmd = rr_numa_get_pmd(non_local_numa);
> + VLOG_WARN("There's no available (non-isolated) pmd thread "
> + "on numa node %d. Queue %d on port \'%s\' will "
> + "be assigned to the pmd on core %d "
> + "(numa node %d). Expect reduced performance.",
> + numa_id, netdev_rxq_get_queue_id(rxqs[i]->rx),
> + netdev_rxq_get_name(rxqs[i]->rx),
> + rxqs[i]->pmd->core_id, rxqs[i]->pmd->numa_id);
> + } else {
> + rxqs[i]->pmd = rr_numa_get_pmd(numa);
> + VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
> + "rx queue %d (measured processing cycles %"PRIu64").",
> + rxqs[i]->pmd->core_id, numa_id,
> + netdev_rxq_get_name(rxqs[i]->rx),
> + netdev_rxq_get_queue_id(rxqs[i]->rx),
> + dp_netdev_rxq_get_cycles(rxqs[i], RXQ_CYCLES_PROC_LAST));
> + }
> + }
> +
> rr_numa_list_destroy(&rr);
> + free(rxqs);
> }
>
>
Tested-by: Greg Rose <gvrose8192 at gmail.com>
Reviewed-by: Greg Rose <gvrose8192 at gmail.com>
More information about the dev
mailing list