[ovs-dev] [PATCH v3 1/2] netdev-dpdk: Use instant sending instead of queueing of packets.

Daniele Di Proietto diproiettod at vmware.com
Fri Jul 15 01:58:27 UTC 2016


This commit fixes a problem in branch-2.5.

Commit c293b7c7f43a("dpif-netdev: Unique and sequential tx_qids.") stopped
using the core_id as txq_id for the pmd threads, but the flushing logic
in netdev_dpdk_rxq_recv() still assumes that the txq_id is equal to the
core_id.

I see two ways to fix this:

1) Revert c293b7c7f43a on branch-2.5.
2) Backport this patch to branch-2.5.

Given that the flushing logic was so contorted I'd prefer to backport
this patch to branch-2.5, if I don't hear any objections.

Thanks,

Daniele


On 06/07/2016 15:48, "Daniele Di Proietto" <diproiettod at vmware.com> wrote:

>I applied this to master, thanks
>
>
>
>On 27/06/2016 06:28, "Ilya Maximets" <i.maximets at samsung.com> wrote:
>
>>Current implementarion of TX packet's queueing is broken in several ways:
>>
>>	* TX queue flushing implemented on receive assumes that all
>>	  core_id-s are sequential and starts from zero. This may lead
>>	  to situation when packets will stuck in queue forever and,
>>	  also, this influences on latency.
>>
>>	* For a long time flushing logic depends on uninitialized
>>	  'txq_needs_locking', because it usually calculated after
>>	  'netdev_dpdk_alloc_txq' but used inside of this function
>>	  for initialization of 'flush_tx'.
>>
>>Testing shows no performance difference with and without queueing.
>>Lets remove queueing at all because it doesn't work properly now and
>>also does not increase performance.
>>
>>Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
>>Acked-by: Daniele Di Proietto <diproiettod at vmware.com>
>>---
>> lib/netdev-dpdk.c | 101 ++++++++----------------------------------------------
>> 1 file changed, 14 insertions(+), 87 deletions(-)
>>
>>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>>index 02e2c58..8bb33d6 100644
>>--- a/lib/netdev-dpdk.c
>>+++ b/lib/netdev-dpdk.c
>>@@ -165,7 +165,6 @@ static const struct rte_eth_conf port_conf = {
>>     },
>> };
>> 
>>-enum { MAX_TX_QUEUE_LEN = 384 };
>> enum { DPDK_RING_SIZE = 256 };
>> BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
>> enum { DRAIN_TSC = 200000ULL };
>>@@ -282,8 +281,7 @@ static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)
>>     = OVS_LIST_INITIALIZER(&dpdk_mp_list);
>> 
>> /* This mutex must be used by non pmd threads when allocating or freeing
>>- * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may
>>- * use mempools, a non pmd thread should hold this mutex while calling them */
>>+ * mbufs through mempools. */
>> static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
>> 
>> struct dpdk_mp {
>>@@ -297,17 +295,12 @@ struct dpdk_mp {
>> /* There should be one 'struct dpdk_tx_queue' created for
>>  * each cpu core. */
>> struct dpdk_tx_queue {
>>-    bool flush_tx;                 /* Set to true to flush queue everytime */
>>-                                   /* pkts are queued. */
>>-    int count;
>>     rte_spinlock_t tx_lock;        /* Protects the members and the NIC queue
>>                                     * from concurrent access.  It is used only
>>                                     * if the queue is shared among different
>>                                     * pmd threads (see 'txq_needs_locking'). */
>>     int map;                       /* Mapping of configured vhost-user queues
>>                                     * to enabled by guest. */
>>-    uint64_t tsc;
>>-    struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
>> };
>> 
>> /* dpdk has no way to remove dpdk ring ethernet devices
>>@@ -720,19 +713,6 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)
>> 
>>     dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);
>>     for (i = 0; i < n_txqs; i++) {
>>-        int numa_id = ovs_numa_get_numa_id(i);
>>-
>>-        if (!dev->txq_needs_locking) {
>>-            /* Each index is considered as a cpu core id, since there should
>>-             * be one tx queue for each cpu core.  If the corresponding core
>>-             * is not on the same numa node as 'dev', flags the
>>-             * 'flush_tx'. */
>>-            dev->tx_q[i].flush_tx = dev->socket_id == numa_id;
>>-        } else {
>>-            /* Queues are shared among CPUs. Always flush */
>>-            dev->tx_q[i].flush_tx = true;
>>-        }
>>-
>>         /* Initialize map for vhost devices. */
>>         dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
>>         rte_spinlock_init(&dev->tx_q[i].tx_lock);
>>@@ -1088,16 +1068,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
>> }
>> 
>> static inline void
>>-dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
>>+netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
>>+                             struct rte_mbuf **pkts, int cnt)
>> {
>>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>>     uint32_t nb_tx = 0;
>> 
>>-    while (nb_tx != txq->count) {
>>+    while (nb_tx != cnt) {
>>         uint32_t ret;
>> 
>>-        ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,
>>-                               txq->count - nb_tx);
>>+        ret = rte_eth_tx_burst(dev->port_id, qid, pkts + nb_tx, cnt - nb_tx);
>>         if (!ret) {
>>             break;
>>         }
>>@@ -1105,32 +1084,18 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
>>         nb_tx += ret;
>>     }
>> 
>>-    if (OVS_UNLIKELY(nb_tx != txq->count)) {
>>+    if (OVS_UNLIKELY(nb_tx != cnt)) {
>>         /* free buffers, which we couldn't transmit, one at a time (each
>>          * packet could come from a different mempool) */
>>         int i;
>> 
>>-        for (i = nb_tx; i < txq->count; i++) {
>>-            rte_pktmbuf_free(txq->burst_pkts[i]);
>>+        for (i = nb_tx; i < cnt; i++) {
>>+            rte_pktmbuf_free(pkts[i]);
>>         }
>>         rte_spinlock_lock(&dev->stats_lock);
>>-        dev->stats.tx_dropped += txq->count-nb_tx;
>>+        dev->stats.tx_dropped += cnt - nb_tx;
>>         rte_spinlock_unlock(&dev->stats_lock);
>>     }
>>-
>>-    txq->count = 0;
>>-    txq->tsc = rte_get_timer_cycles();
>>-}
>>-
>>-static inline void
>>-dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
>>-{
>>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>>-
>>-    if (txq->count == 0) {
>>-        return;
>>-    }
>>-    dpdk_queue_flush__(dev, qid);
>> }
>> 
>> static inline bool
>>@@ -1303,15 +1268,6 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
>>     int nb_rx;
>>     int dropped = 0;
>> 
>>-    /* There is only one tx queue for this core.  Do not flush other
>>-     * queues.
>>-     * Do not flush tx queue which is shared among CPUs
>>-     * since it is always flushed */
>>-    if (rxq->queue_id == rte_lcore_id() &&
>>-        OVS_LIKELY(!dev->txq_needs_locking)) {
>>-        dpdk_queue_flush(dev, rxq->queue_id);
>>-    }
>>-
>>     nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
>>                              (struct rte_mbuf **) packets,
>>                              NETDEV_MAX_BURST);
>>@@ -1433,35 +1389,6 @@ out:
>>     }
>> }
>> 
>>-inline static void
>>-dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
>>-               struct rte_mbuf **pkts, int cnt)
>>-{
>>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>>-    uint64_t diff_tsc;
>>-
>>-    int i = 0;
>>-
>>-    while (i < cnt) {
>>-        int freeslots = MAX_TX_QUEUE_LEN - txq->count;
>>-        int tocopy = MIN(freeslots, cnt-i);
>>-
>>-        memcpy(&txq->burst_pkts[txq->count], &pkts[i],
>>-               tocopy * sizeof (struct rte_mbuf *));
>>-
>>-        txq->count += tocopy;
>>-        i += tocopy;
>>-
>>-        if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {
>>-            dpdk_queue_flush__(dev, qid);
>>-        }
>>-        diff_tsc = rte_get_timer_cycles() - txq->tsc;
>>-        if (diff_tsc >= DRAIN_TSC) {
>>-            dpdk_queue_flush__(dev, qid);
>>-        }
>>-    }
>>-}
>>-
>> /* Tx function. Transmit packets indefinitely */
>> static void
>> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
>>@@ -1527,8 +1454,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
>>         newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
>> 
>>         dropped += qos_pkts - newcnt;
>>-        dpdk_queue_pkts(dev, qid, mbufs, newcnt);
>>-        dpdk_queue_flush(dev, qid);
>>+        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
>>     }
>> 
>>     if (OVS_UNLIKELY(dropped)) {
>>@@ -1611,7 +1537,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>>                     temp_cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts,
>>                                                 temp_cnt);
>>                     dropped += qos_pkts - temp_cnt;
>>-                    dpdk_queue_pkts(dev, qid,
>>+                    netdev_dpdk_eth_tx_burst(dev, qid,
>>                                     (struct rte_mbuf **)&pkts[next_tx_idx],
>>                                     temp_cnt);
>> 
>>@@ -1631,8 +1557,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>> 
>>             cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);
>>             dropped += qos_pkts - cnt;
>>-            dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],
>>-                            cnt);
>>+            netdev_dpdk_eth_tx_burst(dev, qid,
>>+                                     (struct rte_mbuf **)&pkts[next_tx_idx],
>>+                                     cnt);
>>         }
>> 
>>         if (OVS_UNLIKELY(dropped)) {
>>-- 
>>2.7.4
>>


More information about the dev mailing list