[ovs-dev] [PATCH v3 1/4] dpif-netdev: Output packet batching.
Gao Zhenyu
sysugaozhenyu at gmail.com
Fri Aug 18 02:30:21 UTC 2017
Hi IIya,
Thanks for working on it.
This patch consumes dp_packet_batch_clone so I have concern on the
performance. Could you please show some performace number with/without your
patch.
Thanks
Zhenyu Gao
2017-08-10 23:38 GMT+08:00 Ilya Maximets <i.maximets at samsung.com>:
> While processing incoming batch of packets they are scattered
> across many per-flow batches and sent separately.
>
> This becomes an issue while using more than a few flows.
>
> For example if we have balanced-tcp OvS bonding with 2 ports
> there will be 256 datapath internal flows for each dp_hash
> pattern. This will lead to scattering of a single recieved
> batch across all of that 256 per-flow batches and invoking
> send for each packet separately. This behaviour greatly degrades
> overall performance of netdev_send because of inability to use
> advantages of vectorized transmit functions.
> But the half (if 2 ports in bonding) of datapath flows will
> have the same output actions. This means that we can collect
> them in a single place back and send at once using single call
> to netdev_send. This patch introduces per-port packet batch
> for output packets for that purpose.
>
> 'output_pkts' batch is thread local and located in send port cache.
>
> Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
> ---
> lib/dpif-netdev.c | 104 ++++++++++++++++++++++++++++++
> ++++++++++++------------
> 1 file changed, 82 insertions(+), 22 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index e2cd931..a2a25be 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -502,6 +502,7 @@ struct tx_port {
> int qid;
> long long last_used;
> struct hmap_node node;
> + struct dp_packet_batch output_pkts;
> };
>
> /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
> @@ -633,9 +634,10 @@ static void dp_netdev_execute_actions(struct
> dp_netdev_pmd_thread *pmd,
> size_t actions_len,
> long long now);
> static void dp_netdev_input(struct dp_netdev_pmd_thread *,
> - struct dp_packet_batch *, odp_port_t port_no);
> + struct dp_packet_batch *, odp_port_t port_no,
> + long long now);
> static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
> - struct dp_packet_batch *);
> + struct dp_packet_batch *, long long
> now);
>
> static void dp_netdev_disable_upcall(struct dp_netdev *);
> static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
> @@ -667,6 +669,9 @@ static void dp_netdev_add_rxq_to_pmd(struct
> dp_netdev_pmd_thread *pmd,
> static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
> struct rxq_poll *poll)
> OVS_REQUIRES(pmd->port_mutex);
> +static void
> +dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
> + long long now);
> static void reconfigure_datapath(struct dp_netdev *dp)
> OVS_REQUIRES(dp->port_mutex);
> static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
> @@ -2809,6 +2814,7 @@ dpif_netdev_execute(struct dpif *dpif, struct
> dpif_execute *execute)
> struct dp_netdev *dp = get_dp_netdev(dpif);
> struct dp_netdev_pmd_thread *pmd;
> struct dp_packet_batch pp;
> + long long now = time_msec();
>
> if (dp_packet_size(execute->packet) < ETH_HEADER_LEN ||
> dp_packet_size(execute->packet) > UINT16_MAX) {
> @@ -2851,8 +2857,8 @@ dpif_netdev_execute(struct dpif *dpif, struct
> dpif_execute *execute)
>
> dp_packet_batch_init_packet(&pp, execute->packet);
> dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
> - execute->actions, execute->actions_len,
> - time_msec());
> + execute->actions, execute->actions_len,
> now);
> + dp_netdev_pmd_flush_output_packets(pmd, now);
>
> if (pmd->core_id == NON_PMD_CORE_ID) {
> ovs_mutex_unlock(&dp->non_pmd_mutex);
> @@ -3101,6 +3107,37 @@ cycles_count_intermediate(struct
> dp_netdev_pmd_thread *pmd,
> non_atomic_ullong_add(&pmd->cycles.n[type], interval);
> }
>
> +static void
> +dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
> + struct tx_port *p, long long now)
> +{
> + int tx_qid;
> + bool dynamic_txqs;
> +
> + dynamic_txqs = p->port->dynamic_txqs;
> + if (dynamic_txqs) {
> + tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now);
> + } else {
> + tx_qid = pmd->static_tx_qid;
> + }
> +
> + netdev_send(p->port->netdev, tx_qid, &p->output_pkts, true,
> dynamic_txqs);
> + dp_packet_batch_init(&p->output_pkts);
> +}
> +
> +static void
> +dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
> + long long now)
> +{
> + struct tx_port *p;
> +
> + HMAP_FOR_EACH (p, node, &pmd->send_port_cache) {
> + if (!dp_packet_batch_is_empty(&p->output_pkts)) {
> + dp_netdev_pmd_flush_output_on_port(pmd, p, now);
> + }
> + }
> +}
> +
> static int
> dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
> struct netdev_rxq *rx,
> @@ -3113,10 +3150,13 @@ dp_netdev_process_rxq_port(struct
> dp_netdev_pmd_thread *pmd,
> dp_packet_batch_init(&batch);
> error = netdev_rxq_recv(rx, &batch);
> if (!error) {
> + long long now = time_msec();
> +
> *recirc_depth_get() = 0;
>
> batch_cnt = batch.count;
> - dp_netdev_input(pmd, &batch, port_no);
> + dp_netdev_input(pmd, &batch, port_no, now);
> + dp_netdev_pmd_flush_output_packets(pmd, now);
> } else if (error != EAGAIN && error != EOPNOTSUPP) {
> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>
> @@ -4481,6 +4521,7 @@ dp_netdev_add_port_tx_to_pmd(struct
> dp_netdev_pmd_thread *pmd,
>
> tx->port = port;
> tx->qid = -1;
> + dp_packet_batch_init(&tx->output_pkts);
>
> hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port->port_
> no));
> pmd->need_reload = true;
> @@ -4901,7 +4942,8 @@ fast_path_processing(struct dp_netdev_pmd_thread
> *pmd,
> static void
> dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
> struct dp_packet_batch *packets,
> - bool md_is_valid, odp_port_t port_no)
> + bool md_is_valid, odp_port_t port_no,
> + long long now)
> {
> int cnt = packets->count;
> #if !defined(__CHECKER__) && !defined(_WIN32)
> @@ -4913,7 +4955,6 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
> OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
> struct netdev_flow_key keys[PKT_ARRAY_SIZE];
> struct packet_batch_per_flow batches[PKT_ARRAY_SIZE];
> - long long now = time_msec();
> size_t n_batches;
> odp_port_t in_port;
>
> @@ -4949,16 +4990,16 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
> static void
> dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
> struct dp_packet_batch *packets,
> - odp_port_t port_no)
> + odp_port_t port_no, long long now)
> {
> - dp_netdev_input__(pmd, packets, false, port_no);
> + dp_netdev_input__(pmd, packets, false, port_no, now);
> }
>
> static void
> dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
> - struct dp_packet_batch *packets)
> + struct dp_packet_batch *packets, long long now)
> {
> - dp_netdev_input__(pmd, packets, true, 0);
> + dp_netdev_input__(pmd, packets, true, 0, now);
> }
>
> struct dp_netdev_execute_aux {
> @@ -5136,18 +5177,37 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
> case OVS_ACTION_ATTR_OUTPUT:
> p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
> if (OVS_LIKELY(p)) {
> - int tx_qid;
> - bool dynamic_txqs;
> + struct dp_packet *packet;
> + struct dp_packet_batch out;
>
> - dynamic_txqs = p->port->dynamic_txqs;
> - if (dynamic_txqs) {
> - tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now);
> - } else {
> - tx_qid = pmd->static_tx_qid;
> + if (!may_steal) {
> + dp_packet_batch_clone(&out, packets_);
> + dp_packet_batch_reset_cutlen(packets_);
> + packets_ = &out;
> + }
> + dp_packet_batch_apply_cutlen(packets_);
> +
> +#ifdef DPDK_NETDEV
> + if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
> + && packets_->packets[0]->source
> + != p->output_pkts.packets[0]->source)) {
> + /* XXX: netdev-dpdk assumes that all packets in a single
> + * outptut batch has the same source. Flush here to
> + * avoid memory access issues. */
> + dp_netdev_pmd_flush_output_on_port(pmd, p, now);
> + }
> +#endif
> +
> + if (OVS_UNLIKELY(dp_packet_batch_size(&p->output_pkts)
> + + dp_packet_batch_size(packets_) >
> NETDEV_MAX_BURST)) {
> + /* Some packets was generated while input batch
> processing.
> + * Flush here to avoid overflow. */
> + dp_netdev_pmd_flush_output_on_port(pmd, p, now);
> }
>
> - netdev_send(p->port->netdev, tx_qid, packets_, may_steal,
> - dynamic_txqs);
> + DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
> + dp_packet_batch_add(&p->output_pkts, packet);
> + }
> return;
> }
> break;
> @@ -5188,7 +5248,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
> }
>
> (*depth)++;
> - dp_netdev_recirculate(pmd, packets_);
> + dp_netdev_recirculate(pmd, packets_, now);
> (*depth)--;
> return;
> }
> @@ -5253,7 +5313,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
> }
>
> (*depth)++;
> - dp_netdev_recirculate(pmd, packets_);
> + dp_netdev_recirculate(pmd, packets_, now);
> (*depth)--;
>
> return;
> --
> 2.7.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
More information about the dev
mailing list