[ovs-dev] [PATCH v8 1/2] netdev-linux: Detect numa node id.

Yi-Hung Wei yihung.wei at gmail.com
Wed Dec 18 20:31:05 UTC 2019


From: William Tu <u9012063 at gmail.com>

The patch detects the numa node id from the name of the netdev,
by reading the '/sys/class/net/<devname>/device/numa_node'.
If not available, ex: virtual device, or any error happens,
return numa id 0.  Currently only the afxdp netdev type uses it,
other linux netdev types are disabled due to no use case.

Signed-off-by: William Tu <u9012063 at gmail.com>
Acked-by: Eelco Chaudron <echaudro at redhat.com>
---
v8:
  - Addreess review comments from Eelco and Ilya in patch 2.
    * Use OVS_FIND_DEPENDENCY().
    * Avoid the locking issue when calling netdev_get_numa_id().
    * Check NETDEV_NUMA_UNSPEC.
    * Use return value from netdev_get_numa_id() directly, and
      check NETDEV_NUMA_UNSPEC case.
    * Use numa_set_preferred().
  - Travis CI: https://travis-ci.org/YiHungWei/ovs/builds/626865328

v7:
  - Add numa aware memory allocation for afxdp related memory in the following
    patch.
  - Travis CI: https://travis-ci.org/YiHungWei/ovs/builds/626403984

v6:
  Feedbacks from Ilya
  - add thread safety check at netdev_linux_get_numa_id__, and
    pass netdev_linux
  - preserve numa cache on netlink updates
  - Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/605634673

v5:
  Feedbacks from Ilya
  - reafactor the error handling
  - add mutex lock
  - Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/601947245
_UNSPEC
v4:
  Feedbacks from Eelco
  - Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/599308893

v3:
  Feedbacks from Ilya and Eelco
  - update doc, afxdp.rst
  - fix coding style
  - fix limit of numa node max, by using ovs_numa_numa_id_is_valid
  - move the function to netdev-linux
  - cache the result of numa_id
  - add security check for netdev name
  - use fscanf instead of read and convert to int
  - revise some error message content

v2:
  address feedback from Eelco
        fix memory leak of xaspintf
        log using INFO instead of WARN

---
 Documentation/intro/install/afxdp.rst |  1 -
 lib/netdev-afxdp.c                    | 11 -----
 lib/netdev-afxdp.h                    |  1 -
 lib/netdev-linux-private.h            |  2 +
 lib/netdev-linux.c                    | 67 +++++++++++++++++++++++++--
 5 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
index 15e3c918f942..7b0736c96114 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -327,7 +327,6 @@ Below is a script using namespaces and veth peer::
 
 Limitations/Known Issues
 ------------------------
-#. Device's numa ID is always 0, need a way to find numa id from a netdev.
 #. No QoS support because AF_XDP netdev by-pass the Linux TC layer. A possible
    work-around is to use OpenFlow meter action.
 #. Most of the tests are done using i40e single port. Multiple ports and
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index bdabf97f4fdb..91b70b298e57 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -695,17 +695,6 @@ out:
     return err;
 }
 
-int
-netdev_afxdp_get_numa_id(const struct netdev *netdev)
-{
-    /* FIXME: Get netdev's PCIe device ID, then find
-     * its NUMA node id.
-     */
-    VLOG_INFO("FIXME: Device %s always use numa id 0.",
-              netdev_get_name(netdev));
-    return 0;
-}
-
 static void
 xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode mode)
 {
diff --git a/lib/netdev-afxdp.h b/lib/netdev-afxdp.h
index 8188bc669526..246a4b62fb57 100644
--- a/lib/netdev-afxdp.h
+++ b/lib/netdev-afxdp.h
@@ -60,7 +60,6 @@ int netdev_afxdp_batch_send(struct netdev *netdev_, int qid,
 int netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
                             char **errp);
 int netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args);
-int netdev_afxdp_get_numa_id(const struct netdev *netdev);
 int netdev_afxdp_get_stats(const struct netdev *netdev_,
                            struct netdev_stats *stats);
 int netdev_afxdp_get_custom_stats(const struct netdev *netdev,
diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
index 8873caa9d412..4d66b0858222 100644
--- a/lib/netdev-linux-private.h
+++ b/lib/netdev-linux-private.h
@@ -96,6 +96,8 @@ struct netdev_linux {
     /* LAG information. */
     bool is_lag_master;         /* True if the netdev is a LAG master. */
 
+    int numa_id;                /* NUMA node id. */
+
 #ifdef HAVE_AF_XDP
     /* AF_XDP information. */
     struct xsk_socket_info **xsks;
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index f8e59bacfb13..e32df970fe4b 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -236,6 +236,7 @@ enum {
     VALID_VPORT_STAT_ERROR  = 1 << 5,
     VALID_DRVINFO           = 1 << 6,
     VALID_FEATURES          = 1 << 7,
+    VALID_NUMA_ID           = 1 << 8,
 };
 
 struct linux_lag_slave {
@@ -820,9 +821,9 @@ netdev_linux_update__(struct netdev_linux *dev,
 {
     if (rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) {
         if (change->nlmsg_type == RTM_NEWLINK) {
-            /* Keep drv-info, and ip addresses. */
+            /* Keep drv-info, ip addresses, and NUMA id. */
             netdev_linux_changed(dev, change->ifi_flags,
-                                 VALID_DRVINFO | VALID_IN);
+                                 VALID_DRVINFO | VALID_IN | VALID_NUMA_ID);
 
             /* Update netdev from rtnl-change msg. */
             if (change->mtu) {
@@ -1391,6 +1392,66 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
     return 0;
 }
 
+static int
+netdev_linux_get_numa_id__(struct netdev_linux *netdev)
+    OVS_REQUIRES(netdev->mutex)
+{
+    char *numa_node_path;
+    const char *name;
+    int node_id;
+    FILE *stream;
+
+    if (netdev->cache_valid & VALID_NUMA_ID) {
+        return netdev->numa_id;
+    }
+
+    netdev->numa_id = 0;
+    netdev->cache_valid |= VALID_NUMA_ID;
+
+    name = netdev_get_name(&netdev->up);
+    if (strpbrk(name, "/\\")) {
+        VLOG_ERR_RL(&rl, "\"%s\" is not a valid name for a port. "
+                    "A valid name must not include '/' or '\\'."
+                    "Using numa_id 0", name);
+        return 0;
+    }
+
+    numa_node_path = xasprintf("/sys/class/net/%s/device/numa_node", name);
+
+    stream = fopen(numa_node_path, "r");
+    if (!stream) {
+        /* Virtual device does not have this info. */
+        VLOG_INFO_RL(&rl, "%s: Can't open '%s': %s, using numa_id 0",
+                     name, numa_node_path, ovs_strerror(errno));
+        free(numa_node_path);
+        return 0;
+    }
+
+    if (fscanf(stream, "%d", &node_id) != 1
+        || !ovs_numa_numa_id_is_valid(node_id))  {
+        VLOG_WARN_RL(&rl, "%s: Can't detect NUMA node, using numa_id 0", name);
+        node_id = 0;
+    }
+
+    netdev->numa_id = node_id;
+    fclose(stream);
+    free(numa_node_path);
+    return node_id;
+}
+
+static int OVS_UNUSED
+netdev_linux_get_numa_id(const struct netdev *netdev_)
+{
+    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+    int numa_id;
+
+    ovs_mutex_lock(&netdev->mutex);
+    numa_id = netdev_linux_get_numa_id__(netdev);
+    ovs_mutex_unlock(&netdev->mutex);
+
+    return numa_id;
+}
+
 /* Sends 'batch' on 'netdev'.  Returns 0 if successful, otherwise a positive
  * errno value.  Returns EAGAIN without blocking if the packet cannot be queued
  * immediately.  Returns EMSGSIZE if a partial packet was transmitted or if
@@ -3308,7 +3369,7 @@ const struct netdev_class netdev_afxdp_class = {
     .set_config = netdev_afxdp_set_config,
     .get_config = netdev_afxdp_get_config,
     .reconfigure = netdev_afxdp_reconfigure,
-    .get_numa_id = netdev_afxdp_get_numa_id,
+    .get_numa_id = netdev_linux_get_numa_id,
     .send = netdev_afxdp_batch_send,
     .rxq_construct = netdev_afxdp_rxq_construct,
     .rxq_destruct = netdev_afxdp_rxq_destruct,
-- 
2.17.1



More information about the dev mailing list