[ovs-dev] [PATCH v10 2/5] dpif-netdev: Count cycles on per-rxq basis.
Stokes, Ian
ian.stokes at intel.com
Sat Jan 13 12:16:01 UTC 2018
> -----Original Message-----
> From: Ilya Maximets [mailto:i.maximets at samsung.com]
> Sent: Friday, January 12, 2018 11:17 AM
> To: ovs-dev at openvswitch.org
> Cc: Heetae Ahn <heetae82.ahn at samsung.com>; Bodireddy, Bhanuprakash
> <bhanuprakash.bodireddy at intel.com>; Fischetti, Antonio
> <antonio.fischetti at intel.com>; Eelco Chaudron <echaudro at redhat.com>;
> Loftus, Ciara <ciara.loftus at intel.com>; Kevin Traynor
> <ktraynor at redhat.com>; Jan Scheurich <jan.scheurich at ericsson.com>; O
> Mahony, Billy <billy.o.mahony at intel.com>; Stokes, Ian
> <ian.stokes at intel.com>; Ilya Maximets <i.maximets at samsung.com>
> Subject: [PATCH v10 2/5] dpif-netdev: Count cycles on per-rxq basis.
>
> Upcoming time-based output batching will allow to collect in a single
> output batch packets from different RX queues. Lets keep the list of RX
> queues for each output packet and collect cycles for them on send.
>
> Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
> ---
> lib/dpif-netdev.c | 31 +++++++++++++++++++++++++++++--
> 1 file changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index b35700d..6909a03
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -501,6 +501,7 @@ struct tx_port {
> long long last_used;
> struct hmap_node node;
> struct dp_packet_batch output_pkts;
> + struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
> };
>
> /* A set of properties for the current processing loop that is not
> directly @@ -510,6 +511,8 @@ struct tx_port { struct
> dp_netdev_pmd_thread_ctx {
> /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */
> long long now;
> + /* RX queue from which last packet was received. */
> + struct dp_netdev_rxq *last_rxq;
> };
>
> /* PMD: Poll modes drivers. PMD accesses devices via polling to
> eliminate @@ -3155,9 +3158,14 @@ static void
> dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
> struct tx_port *p) {
> + int i;
> int tx_qid;
> int output_cnt;
> bool dynamic_txqs;
> + struct cycle_timer timer;
> + uint64_t cycles;
> +
> + cycle_timer_start(&pmd->perf_stats, &timer);
>
> dynamic_txqs = p->port->dynamic_txqs;
> if (dynamic_txqs) {
> @@ -3167,12 +3175,22 @@ dp_netdev_pmd_flush_output_on_port(struct
> dp_netdev_pmd_thread *pmd,
> }
>
> output_cnt = dp_packet_batch_size(&p->output_pkts);
> + ovs_assert(output_cnt > 0);
>
> netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs);
> dp_packet_batch_init(&p->output_pkts);
>
> pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_PKTS,
> output_cnt);
> pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_BATCHES, 1);
> +
> + /* Update send cycles for all the rx queues evenly. */
Just a query, is it right that this is distributed evenly?
If there are more packets from one rx queue than another will it make a difference or will the cycles spent sending that batch be the same as a batch of a small or larger size?
> + cycles = cycle_timer_stop(&pmd->perf_stats, &timer) / output_cnt;
> + for (i = 0; i < output_cnt; i++) {
> + if (p->output_pkts_rxqs[i]) {
> + dp_netdev_rxq_add_cycles(p->output_pkts_rxqs[i],
> + RXQ_CYCLES_PROC_CURR, cycles);
> + }
> + }
> }
>
> static void
> @@ -3196,10 +3214,14 @@ dp_netdev_process_rxq_port(struct
> dp_netdev_pmd_thread *pmd,
> struct cycle_timer timer;
> int error;
> int batch_cnt = 0;
> + uint64_t cycles;
>
> /* Measure duration for polling and processing rx burst. */
> cycle_timer_start(&pmd->perf_stats, &timer);
> +
> + pmd->ctx.last_rxq = rxq;
> dp_packet_batch_init(&batch);
> +
> error = netdev_rxq_recv(rxq->rx, &batch);
> if (!error) {
> /* At least one packet received. */ @@ -3208,12 +3230,12 @@
> dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
>
> batch_cnt = batch.count;
> dp_netdev_input(pmd, &batch, port_no);
> - dp_netdev_pmd_flush_output_packets(pmd);
>
> /* Assign processing cycles to rx queue. */
> - uint64_t cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
> + cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
> dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
>
> + dp_netdev_pmd_flush_output_packets(pmd);
> } else {
> /* Discard cycles. */
> cycle_timer_stop(&pmd->perf_stats, &timer); @@ -3225,6 +3247,8 @@
> dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
> }
> }
>
> + pmd->ctx.last_rxq = NULL;
> +
> return batch_cnt;
> }
>
> @@ -4512,6 +4536,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread
> *pmd, struct dp_netdev *dp,
> ovs_mutex_init(&pmd->port_mutex);
> cmap_init(&pmd->flow_table);
> cmap_init(&pmd->classifiers);
> + pmd->ctx.last_rxq = NULL;
> pmd_thread_ctx_time_update(pmd);
> pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
> pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN; @@ -
> 5389,6 +5414,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
> dp_netdev_pmd_flush_output_on_port(pmd, p);
> }
> DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
> + p->output_pkts_rxqs[dp_packet_batch_size(&p-
> >output_pkts)] =
> +
> + pmd->ctx.last_rxq;
> dp_packet_batch_add(&p->output_pkts, packet);
> }
> return;
> --
> 2.7.4
More information about the dev
mailing list