[ovs-dev] [RFC PATCH] netdev-dpdk: Narrow down txq critical section.
Ilya Maximets
i.maximets at ovn.org
Tue Dec 3 11:17:49 UTC 2019
On 02.12.2019 17:24, David Marchand wrote:
> tx_lock protects the NIC/vhost queue from concurrent access.
> Move it closer to the parts it protects and let packets duplication (when
> source is not DPDK) and the egress policer run out of it.
>
> Signed-off-by: David Marchand <david.marchand at redhat.com>
> ---
> I caught this by code review, but I imagine this could make the
> contention on the tx lock even worse if broadcasting packets.
Idea is OK, I think. I didn't look a t the code, but, IIRC, there was
already a similar patch in a mail-list.. Hard to find.
>
> ---
> lib/netdev-dpdk.c | 43 +++++++++++++++++++++++--------------------
> 1 file changed, 23 insertions(+), 20 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 4c9f122b0..9dd43f2a9 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2053,10 +2053,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
> * Returns the number of packets that weren't transmitted. */
> static inline int
> netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> - struct rte_mbuf **pkts, int cnt)
> + struct rte_mbuf **pkts, int cnt, bool concurrent_txq)
> {
> uint32_t nb_tx = 0;
>
> + if (OVS_UNLIKELY(concurrent_txq)) {
> + qid = qid % dev->up.n_txq;
> + rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> + }
> +
> while (nb_tx != cnt) {
> uint32_t ret;
>
> @@ -2068,6 +2073,10 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> nb_tx += ret;
> }
>
> + if (OVS_UNLIKELY(concurrent_txq)) {
> + rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> + }
> +
> if (OVS_UNLIKELY(nb_tx != cnt)) {
> /* Free buffers, which we couldn't transmit, one at a time (each
> * packet could come from a different mempool) */
> @@ -2412,11 +2421,6 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> goto out;
> }
>
> - if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) {
> - COVERAGE_INC(vhost_tx_contention);
> - rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> - }
> -
> cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> sw_stats_add.tx_mtu_exceeded_drops = total_packets - cnt;
>
> @@ -2427,6 +2431,11 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>
> n_packets_to_free = cnt;
>
> + if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) {
> + COVERAGE_INC(vhost_tx_contention);
> + rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> + }
> +
> do {
> int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
> unsigned int tx_pkts;
> @@ -2468,7 +2477,8 @@ out:
>
> /* Tx function. Transmit packets indefinitely */
> static void
> -dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
> +dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
> + bool concurrent_txq)
> OVS_NO_THREAD_SAFETY_ANALYSIS
> {
> const size_t batch_cnt = dp_packet_batch_size(batch);
> @@ -2527,7 +2537,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
> __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
> txcnt);
> } else {
> - tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
> + tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt,
> + concurrent_txq);
> }
> }
>
> @@ -2549,7 +2560,7 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> {
>
> if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> - dpdk_do_tx_copy(netdev, qid, batch);
> + dpdk_do_tx_copy(netdev, qid, batch, true);
> dp_packet_delete_batch(batch, true);
> } else {
> __netdev_dpdk_vhost_send(netdev, qid, batch->packets,
> @@ -2568,15 +2579,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> return;
> }
>
> - if (OVS_UNLIKELY(concurrent_txq)) {
> - qid = qid % dev->up.n_txq;
> - rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> - }
> -
> if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> struct netdev *netdev = &dev->up;
>
> - dpdk_do_tx_copy(netdev, qid, batch);
> + dpdk_do_tx_copy(netdev, qid, batch, concurrent_txq);
> dp_packet_delete_batch(batch, true);
> } else {
> struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
> @@ -2591,7 +2597,8 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
> qos_drops -= tx_cnt;
>
> - tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
> + tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt,
> + concurrent_txq);
>
> dropped = tx_failure + mtu_drops + qos_drops;
> if (OVS_UNLIKELY(dropped)) {
> @@ -2603,10 +2610,6 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> rte_spinlock_unlock(&dev->stats_lock);
> }
> }
> -
> - if (OVS_UNLIKELY(concurrent_txq)) {
> - rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> - }
> }
>
> static int
>
More information about the dev
mailing list