[ovs-dev] [dpdk patch 4/8] netdev-dpdk: Create 'number of dpdk ifaces on same cpu socket' rx queues and 'number of cpu cores' tx queues for each dpdk interface.

Alex Wang alexw at nicira.com
Tue Aug 12 04:56:39 UTC 2014


Before this commit, ovs only creates one tx/rx queue for each
dpdk interface and uses only one poll thread for handling the
I/O of all dpdk interfaces.  As one step toward using multiple
poll threads, this commit makes ovs, by default, create same
number of rx queues as the number dpdk interfaces on the cpu
socket.  Also each dpdk interface will have one tx queue for
each cpu core, even though not all of those queues will be
used.

Signed-off-by: Alex Wang <alexw at nicira.com>
---
 lib/dpif-netdev.h |    1 -
 lib/netdev-dpdk.c |   55 +++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index 410fcfa..50c1198 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -40,7 +40,6 @@ static inline void dp_packet_pad(struct ofpbuf *b)
     }
 }
 
-#define NR_QUEUE   1
 #define NR_PMD_THREADS 1
 
 #ifdef  __cplusplus
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 109006f..432524f 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -36,6 +36,7 @@
 #include "odp-util.h"
 #include "ofp-print.h"
 #include "ofpbuf.h"
+#include "ovs-numa.h"
 #include "ovs-thread.h"
 #include "ovs-rcu.h"
 #include "packet-dpif.h"
@@ -179,7 +180,9 @@ struct netdev_dpdk {
     int port_id;
     int max_packet_len;
 
-    struct dpdk_tx_queue tx_q[NR_QUEUE];
+    struct dpdk_tx_queue *tx_q;
+    int n_tx_q;
+    int n_rx_q;
 
     struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex);
 
@@ -384,6 +387,25 @@ dpdk_watchdog(void *dummy OVS_UNUSED)
     return NULL;
 }
 
+/* Returns the number of dpdk ifaces on the cpu socket. */
+static int
+dpdk_get_n_devs(int socket_id)
+{
+    int count = 0;
+    int i;
+
+    ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id));
+
+    for (i = 0; i < rte_eth_dev_count(); i++) {
+        if (rte_eth_dev_socket_id(i) == socket_id) {
+            count++;
+        }
+    }
+    ovs_assert(count);
+
+    return count;
+}
+
 static int
 dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
 {
@@ -396,13 +418,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
         return ENODEV;
     }
 
-    diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE,  &port_conf);
+    diag = rte_eth_dev_configure(dev->port_id, dev->n_rx_q, dev->n_tx_q,
+                                 &port_conf);
     if (diag) {
         VLOG_ERR("eth dev config error %d",diag);
         return -diag;
     }
 
-    for (i = 0; i < NR_QUEUE; i++) {
+    for (i = 0; i < dev->n_tx_q; i++) {
         diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE,
                                       dev->socket_id, &tx_conf);
         if (diag) {
@@ -411,7 +434,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
         }
     }
 
-    for (i = 0; i < NR_QUEUE; i++) {
+    for (i = 0; i < dev->n_rx_q; i++) {
         diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE,
                                       dev->socket_id,
                                       &rx_conf, dev->dpdk_mp->mp);
@@ -463,13 +486,25 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk
 {
     struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
     int err = 0;
-    int i;
+    int n_cores, i;
 
     ovs_mutex_init(&netdev->mutex);
 
     ovs_mutex_lock(&netdev->mutex);
 
-    for (i = 0; i < NR_QUEUE; i++) {
+    netdev->n_rx_q = dpdk_get_n_devs(netdev->socket_id);
+
+    /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q
+     * for each of them. */
+    n_cores = ovs_numa_get_n_cores();
+    if (n_cores == OVS_CORE_UNSPEC) {
+        VLOG_WARN_RL(&rl, "netdev_dpdk init failed due to no cpu core info");
+        err = ENOENT;
+        goto unlock;
+    }
+    netdev->n_tx_q = n_cores;
+    netdev->tx_q = dpdk_rte_mzalloc(netdev->n_tx_q * sizeof *netdev->tx_q);
+    for (i = 0; i < netdev->n_tx_q; i++) {
         rte_spinlock_init(&netdev->tx_q[i].tx_lock);
     }
 
@@ -492,11 +527,14 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk
     if (err) {
         goto unlock;
     }
-    netdev_->n_rxq = NR_QUEUE;
+    netdev_->n_rxq = netdev->n_rx_q;
 
     list_push_back(&dpdk_list, &netdev->list_node);
 
 unlock:
+    if (err) {
+        rte_free(netdev->tx_q);
+    }
     ovs_mutex_unlock(&netdev->mutex);
     return err;
 }
@@ -548,6 +586,7 @@ netdev_dpdk_destruct(struct netdev *netdev_)
     ovs_mutex_unlock(&dev->mutex);
 
     ovs_mutex_lock(&dpdk_mutex);
+    rte_free(dev->tx_q);
     list_remove(&dev->list_node);
     dpdk_mp_put(dev->dpdk_mp);
     ovs_mutex_unlock(&dpdk_mutex);
@@ -786,7 +825,7 @@ netdev_dpdk_send(struct netdev *netdev, struct dpif_packet **pkts, int cnt,
         int next_tx_idx = 0;
         int dropped = 0;
 
-        qid = rte_lcore_id() % NR_QUEUE;
+        qid = rte_lcore_id();
 
         for (i = 0; i < cnt; i++) {
             int size = ofpbuf_size(&pkts[i]->ofpbuf);
-- 
1.7.9.5




More information about the dev mailing list