[ovs-dev] [PATCH v3 1/2] netdev-dpdk: Use instant sending instead of queueing of packets.
Daniele Di Proietto
diproiettod at vmware.com
Fri Jul 15 01:58:27 UTC 2016
This commit fixes a problem in branch-2.5.
Commit c293b7c7f43a("dpif-netdev: Unique and sequential tx_qids.") stopped
using the core_id as txq_id for the pmd threads, but the flushing logic
in netdev_dpdk_rxq_recv() still assumes that the txq_id is equal to the
core_id.
I see two ways to fix this:
1) Revert c293b7c7f43a on branch-2.5.
2) Backport this patch to branch-2.5.
Given that the flushing logic was so contorted I'd prefer to backport
this patch to branch-2.5, if I don't hear any objections.
Thanks,
Daniele
On 06/07/2016 15:48, "Daniele Di Proietto" <diproiettod at vmware.com> wrote:
>I applied this to master, thanks
>
>
>
>On 27/06/2016 06:28, "Ilya Maximets" <i.maximets at samsung.com> wrote:
>
>>Current implementarion of TX packet's queueing is broken in several ways:
>>
>> * TX queue flushing implemented on receive assumes that all
>> core_id-s are sequential and starts from zero. This may lead
>> to situation when packets will stuck in queue forever and,
>> also, this influences on latency.
>>
>> * For a long time flushing logic depends on uninitialized
>> 'txq_needs_locking', because it usually calculated after
>> 'netdev_dpdk_alloc_txq' but used inside of this function
>> for initialization of 'flush_tx'.
>>
>>Testing shows no performance difference with and without queueing.
>>Lets remove queueing at all because it doesn't work properly now and
>>also does not increase performance.
>>
>>Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
>>Acked-by: Daniele Di Proietto <diproiettod at vmware.com>
>>---
>> lib/netdev-dpdk.c | 101 ++++++++----------------------------------------------
>> 1 file changed, 14 insertions(+), 87 deletions(-)
>>
>>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>>index 02e2c58..8bb33d6 100644
>>--- a/lib/netdev-dpdk.c
>>+++ b/lib/netdev-dpdk.c
>>@@ -165,7 +165,6 @@ static const struct rte_eth_conf port_conf = {
>> },
>> };
>>
>>-enum { MAX_TX_QUEUE_LEN = 384 };
>> enum { DPDK_RING_SIZE = 256 };
>> BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
>> enum { DRAIN_TSC = 200000ULL };
>>@@ -282,8 +281,7 @@ static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mutex)
>> = OVS_LIST_INITIALIZER(&dpdk_mp_list);
>>
>> /* This mutex must be used by non pmd threads when allocating or freeing
>>- * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may
>>- * use mempools, a non pmd thread should hold this mutex while calling them */
>>+ * mbufs through mempools. */
>> static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
>>
>> struct dpdk_mp {
>>@@ -297,17 +295,12 @@ struct dpdk_mp {
>> /* There should be one 'struct dpdk_tx_queue' created for
>> * each cpu core. */
>> struct dpdk_tx_queue {
>>- bool flush_tx; /* Set to true to flush queue everytime */
>>- /* pkts are queued. */
>>- int count;
>> rte_spinlock_t tx_lock; /* Protects the members and the NIC queue
>> * from concurrent access. It is used only
>> * if the queue is shared among different
>> * pmd threads (see 'txq_needs_locking'). */
>> int map; /* Mapping of configured vhost-user queues
>> * to enabled by guest. */
>>- uint64_t tsc;
>>- struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
>> };
>>
>> /* dpdk has no way to remove dpdk ring ethernet devices
>>@@ -720,19 +713,6 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)
>>
>> dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);
>> for (i = 0; i < n_txqs; i++) {
>>- int numa_id = ovs_numa_get_numa_id(i);
>>-
>>- if (!dev->txq_needs_locking) {
>>- /* Each index is considered as a cpu core id, since there should
>>- * be one tx queue for each cpu core. If the corresponding core
>>- * is not on the same numa node as 'dev', flags the
>>- * 'flush_tx'. */
>>- dev->tx_q[i].flush_tx = dev->socket_id == numa_id;
>>- } else {
>>- /* Queues are shared among CPUs. Always flush */
>>- dev->tx_q[i].flush_tx = true;
>>- }
>>-
>> /* Initialize map for vhost devices. */
>> dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
>> rte_spinlock_init(&dev->tx_q[i].tx_lock);
>>@@ -1088,16 +1068,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
>> }
>>
>> static inline void
>>-dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
>>+netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
>>+ struct rte_mbuf **pkts, int cnt)
>> {
>>- struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>> uint32_t nb_tx = 0;
>>
>>- while (nb_tx != txq->count) {
>>+ while (nb_tx != cnt) {
>> uint32_t ret;
>>
>>- ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,
>>- txq->count - nb_tx);
>>+ ret = rte_eth_tx_burst(dev->port_id, qid, pkts + nb_tx, cnt - nb_tx);
>> if (!ret) {
>> break;
>> }
>>@@ -1105,32 +1084,18 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
>> nb_tx += ret;
>> }
>>
>>- if (OVS_UNLIKELY(nb_tx != txq->count)) {
>>+ if (OVS_UNLIKELY(nb_tx != cnt)) {
>> /* free buffers, which we couldn't transmit, one at a time (each
>> * packet could come from a different mempool) */
>> int i;
>>
>>- for (i = nb_tx; i < txq->count; i++) {
>>- rte_pktmbuf_free(txq->burst_pkts[i]);
>>+ for (i = nb_tx; i < cnt; i++) {
>>+ rte_pktmbuf_free(pkts[i]);
>> }
>> rte_spinlock_lock(&dev->stats_lock);
>>- dev->stats.tx_dropped += txq->count-nb_tx;
>>+ dev->stats.tx_dropped += cnt - nb_tx;
>> rte_spinlock_unlock(&dev->stats_lock);
>> }
>>-
>>- txq->count = 0;
>>- txq->tsc = rte_get_timer_cycles();
>>-}
>>-
>>-static inline void
>>-dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
>>-{
>>- struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>>-
>>- if (txq->count == 0) {
>>- return;
>>- }
>>- dpdk_queue_flush__(dev, qid);
>> }
>>
>> static inline bool
>>@@ -1303,15 +1268,6 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
>> int nb_rx;
>> int dropped = 0;
>>
>>- /* There is only one tx queue for this core. Do not flush other
>>- * queues.
>>- * Do not flush tx queue which is shared among CPUs
>>- * since it is always flushed */
>>- if (rxq->queue_id == rte_lcore_id() &&
>>- OVS_LIKELY(!dev->txq_needs_locking)) {
>>- dpdk_queue_flush(dev, rxq->queue_id);
>>- }
>>-
>> nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
>> (struct rte_mbuf **) packets,
>> NETDEV_MAX_BURST);
>>@@ -1433,35 +1389,6 @@ out:
>> }
>> }
>>
>>-inline static void
>>-dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
>>- struct rte_mbuf **pkts, int cnt)
>>-{
>>- struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>>- uint64_t diff_tsc;
>>-
>>- int i = 0;
>>-
>>- while (i < cnt) {
>>- int freeslots = MAX_TX_QUEUE_LEN - txq->count;
>>- int tocopy = MIN(freeslots, cnt-i);
>>-
>>- memcpy(&txq->burst_pkts[txq->count], &pkts[i],
>>- tocopy * sizeof (struct rte_mbuf *));
>>-
>>- txq->count += tocopy;
>>- i += tocopy;
>>-
>>- if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {
>>- dpdk_queue_flush__(dev, qid);
>>- }
>>- diff_tsc = rte_get_timer_cycles() - txq->tsc;
>>- if (diff_tsc >= DRAIN_TSC) {
>>- dpdk_queue_flush__(dev, qid);
>>- }
>>- }
>>-}
>>-
>> /* Tx function. Transmit packets indefinitely */
>> static void
>> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
>>@@ -1527,8 +1454,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
>> newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
>>
>> dropped += qos_pkts - newcnt;
>>- dpdk_queue_pkts(dev, qid, mbufs, newcnt);
>>- dpdk_queue_flush(dev, qid);
>>+ netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
>> }
>>
>> if (OVS_UNLIKELY(dropped)) {
>>@@ -1611,7 +1537,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>> temp_cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts,
>> temp_cnt);
>> dropped += qos_pkts - temp_cnt;
>>- dpdk_queue_pkts(dev, qid,
>>+ netdev_dpdk_eth_tx_burst(dev, qid,
>> (struct rte_mbuf **)&pkts[next_tx_idx],
>> temp_cnt);
>>
>>@@ -1631,8 +1557,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>>
>> cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);
>> dropped += qos_pkts - cnt;
>>- dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],
>>- cnt);
>>+ netdev_dpdk_eth_tx_burst(dev, qid,
>>+ (struct rte_mbuf **)&pkts[next_tx_idx],
>>+ cnt);
>> }
>>
>> if (OVS_UNLIKELY(dropped)) {
>>--
>>2.7.4
>>
More information about the dev
mailing list