[ovs-dev] [PATCH RFC 1/1] netdev-dpdk: NUMA Aware vHost

Ciara Loftus ciara.loftus at intel.com
Fri Mar 4 10:08:39 UTC 2016


This commit allows for vHost memory from QEMU, DPDK and OVS, as well
as the servicing PMD, to all come from the same socket.

DPDK v2.2 introduces a new configuration option:
CONFIG_RTE_LIBRTE_VHOST_NUMA. If enabled, DPDK detects the socket
from which a vhost device's memory has been allocated by QEMU, and
accordingly reallocates device memory managed by DPDK to that same
socket.

OVS by default sets the socket id of a vhost port to that of the
master lcore. This commit introduces the ability to update the
socket id of the port if it is detected (during VM boot) that the
port memory is not on the default NUMA node. If this is the case, the
mempool of the port is also changed to the new node, and a PMD
thread currently servicing the port will no longer, in favour of a
thread from the new node (if enabled in the CPU mask).

Signed-off-by: Ciara Loftus <ciara.loftus at intel.com>
---
 INSTALL.DPDK.md   |  6 +++++-
 acinclude.m4      |  2 +-
 lib/netdev-dpdk.c | 25 +++++++++++++++++++++++--
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index dca79bd..82e6908 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
@@ -33,6 +33,10 @@ on Debian/Ubuntu)
 
      `CONFIG_RTE_BUILD_COMBINE_LIBS=y`
 
+     Enable NUMA-aware vHost by modifying the following in the same file:
+
+     `CONFIG_RTE_LIBRTE_VHOST_NUMA=y`
+
      Then run `make install` to build and install the library.
      For default install without IVSHMEM:
 
@@ -383,7 +387,7 @@ Performance Tuning:
 
 	It is good practice to ensure that threads that are in the datapath are
 	pinned to cores in the same NUMA area. e.g. pmd threads and QEMU vCPUs
-	responsible for forwarding.
+	responsible for forwarding. This is now default behavior for vHost ports.
 
   9. Rx Mergeable buffers
 
diff --git a/acinclude.m4 b/acinclude.m4
index 11c7787..432bdbd 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -199,7 +199,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [
     found=false
     save_LIBS=$LIBS
     for extras in "" "-ldl"; do
-        LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB"
+        LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma"
         AC_LINK_IFELSE(
            [AC_LANG_PROGRAM([#include <rte_config.h>
                              #include <rte_eal.h>],
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 17b8d51..4e1ce53 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <numaif.h>
 
 #include "dirs.h"
 #include "dp-packet.h"
@@ -1878,6 +1879,8 @@ new_device(struct virtio_net *dev)
 {
     struct netdev_dpdk *netdev;
     bool exists = false;
+    int newnode = 0;
+    long err = 0;
 
     ovs_mutex_lock(&dpdk_mutex);
     /* Add device to the vhost port with the same name as that passed down. */
@@ -1891,6 +1894,24 @@ new_device(struct virtio_net *dev)
             }
             ovsrcu_set(&netdev->virtio_dev, dev);
             exists = true;
+
+            /* Get NUMA information */
+            err = get_mempolicy(&newnode, NULL, 0, dev, MPOL_F_NODE | MPOL_F_ADDR);
+            if (err) {
+                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
+                        dev->ifname);
+                newnode = netdev->socket_id;
+            } else if (newnode != netdev->socket_id) {
+                netdev->socket_id = newnode;
+                /* Change mempool to new NUMA Node */
+                dpdk_mp_put(netdev->dpdk_mp);
+                netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu);
+                /* Request netdev reconfiguration. The port may now be
+                 * serviced by a PMD on the new node if enabled in the cpu
+                 * mask */
+                netdev_request_reconfigure(&netdev->up);
+            }
+
             dev->flags |= VIRTIO_DEV_RUNNING;
             /* Disable notifications. */
             set_irq_status(dev);
@@ -1907,8 +1928,8 @@ new_device(struct virtio_net *dev)
         return -1;
     }
 
-    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added", dev->ifname,
-              dev->device_fh);
+    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on socket %i",
+              dev->ifname, dev->device_fh, newnode);
     return 0;
 }
 
-- 
2.4.3




More information about the dev mailing list