[ovs-dev] [PATCH v7 2/2] netdev-afxdp: NUMA-aware memory allocation for XSK related memory

Yi-Hung Wei yihung.wei at gmail.com
Tue Dec 17 22:27:54 UTC 2019


Currently, the AF_XDP socket (XSK) related memory are allocated by main
thread in the main thread's NUMA domain.  With the patch that detects
netdev-linux's NUMA node id, the PMD thread of AF_XDP port will be run on
the AF_XDP netdev's NUMA domain.  If the net device's NUMA domain
is different from the main thread's NUMA domain, we will have two
cross-NUMA memory accesses (netdev <-> memory, memory <-> CPU).

This patch addresses the aforementioned issue by allocating
the memory in the net device's NUMA domain.

Signed-off-by: Yi-Hung Wei <yihung.wei at gmail.com>
---
 Documentation/intro/install/afxdp.rst |  2 +-
 acinclude.m4                          |  5 ++++-
 include/sparse/automake.mk            |  1 +
 include/sparse/numa.h                 | 27 +++++++++++++++++++++++++++
 lib/netdev-afxdp.c                    | 21 ++++++++++++++++++---
 5 files changed, 51 insertions(+), 5 deletions(-)
 create mode 100644 include/sparse/numa.h

diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
index 7b0736c96114..c4685fa7ebac 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -164,7 +164,7 @@ If a test case fails, check the log at::
 
 Setup AF_XDP netdev
 -------------------
-Before running OVS with AF_XDP, make sure the libbpf and libelf are
+Before running OVS with AF_XDP, make sure the libbpf, libelf, and libnuma are
 set-up right::
 
   ldd vswitchd/ovs-vswitchd
diff --git a/acinclude.m4 b/acinclude.m4
index 542637ac8cb8..73ed11d701aa 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -286,9 +286,12 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
     AC_CHECK_FUNCS([pthread_spin_lock], [],
       [AC_MSG_ERROR([unable to find pthread_spin_lock for AF_XDP support])])
 
+    AC_CHECK_LIB(numa, numa_alloc_onnode, [],
+      [AC_MSG_ERROR([unable to find libnuma for AF_XDP support])])
+
     AC_DEFINE([HAVE_AF_XDP], [1],
               [Define to 1 if AF_XDP support is available and enabled.])
-    LIBBPF_LDADD=" -lbpf -lelf"
+    LIBBPF_LDADD=" -lbpf -lelf -lnuma"
     AC_SUBST([LIBBPF_LDADD])
 
     AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk
index 073631e8c082..974ad3fe55f7 100644
--- a/include/sparse/automake.mk
+++ b/include/sparse/automake.mk
@@ -5,6 +5,7 @@ noinst_HEADERS += \
         include/sparse/bits/floatn.h \
         include/sparse/assert.h \
         include/sparse/math.h \
+        include/sparse/numa.h \
         include/sparse/netinet/in.h \
         include/sparse/netinet/ip6.h \
         include/sparse/netpacket/packet.h \
diff --git a/include/sparse/numa.h b/include/sparse/numa.h
new file mode 100644
index 000000000000..3691a0eaf729
--- /dev/null
+++ b/include/sparse/numa.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CHECKER__
+#error "Use this header only with sparse.  It is not a correct implementation."
+#endif
+
+/* Avoid sparse warning: non-ANSI function declaration of function" */
+#define numa_get_membind_compat() numa_get_membind_compat(void)
+#define numa_get_interleave_mask_compat() numa_get_interleave_mask_compat(void)
+#define numa_get_run_node_mask_compat() numa_get_run_node_mask_compat(void)
+
+/* Get actual <numa.h> definitions for us to annotate and build on. */
+#include_next<numa.h>
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 4c1f9c68270a..2d6f739b4b67 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -26,6 +26,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/if_xdp.h>
 #include <net/if.h>
+#include <numa.h>
 #include <poll.h>
 #include <stdlib.h>
 #include <sys/resource.h>
@@ -469,7 +470,7 @@ xsk_configure_all(struct netdev *netdev)
 {
     struct netdev_linux *dev = netdev_linux_cast(netdev);
     int i, ifindex, n_rxq, n_txq;
-    int qid = 0;
+    int qid = 0, err = 0;
 
     ifindex = linux_get_ifindex(netdev_get_name(netdev));
 
@@ -477,6 +478,14 @@ xsk_configure_all(struct netdev *netdev)
     ovs_assert(dev->tx_locks == NULL);
 
     n_rxq = netdev_n_rxq(netdev);
+
+    /* Allocate all the xsk related memory in the netdev's NUMA domain. */
+    struct bitmask *old_bm = numa_get_membind();
+    struct bitmask *new_bm = numa_allocate_nodemask();
+    netdev_get_numa_id(netdev);
+    numa_bitmask_setbit(new_bm, dev->numa_id);
+    numa_set_membind(new_bm);
+
     dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
 
     if (dev->xdp_mode == OVS_AF_XDP_MODE_BEST_EFFORT) {
@@ -518,11 +527,17 @@ xsk_configure_all(struct netdev *netdev)
         ovs_spin_init(&dev->tx_locks[i]);
     }
 
-    return 0;
+    goto out;
 
 err:
     xsk_destroy_all(netdev);
-    return EINVAL;
+    err = EINVAL;
+
+out:
+    numa_set_membind(old_bm);
+    numa_bitmask_free(old_bm);
+    numa_bitmask_free(new_bm);
+    return err;
 }
 
 static void
-- 
2.17.1



More information about the dev mailing list