[ovs-dev] [PATCH v7 2/2] netdev-afxdp: NUMA-aware memory allocation for XSK related memory
Yi-Hung Wei
yihung.wei at gmail.com
Tue Dec 17 22:27:54 UTC 2019
Currently, the AF_XDP socket (XSK) related memory are allocated by main
thread in the main thread's NUMA domain. With the patch that detects
netdev-linux's NUMA node id, the PMD thread of AF_XDP port will be run on
the AF_XDP netdev's NUMA domain. If the net device's NUMA domain
is different from the main thread's NUMA domain, we will have two
cross-NUMA memory accesses (netdev <-> memory, memory <-> CPU).
This patch addresses the aforementioned issue by allocating
the memory in the net device's NUMA domain.
Signed-off-by: Yi-Hung Wei <yihung.wei at gmail.com>
---
Documentation/intro/install/afxdp.rst | 2 +-
acinclude.m4 | 5 ++++-
include/sparse/automake.mk | 1 +
include/sparse/numa.h | 27 +++++++++++++++++++++++++++
lib/netdev-afxdp.c | 21 ++++++++++++++++++---
5 files changed, 51 insertions(+), 5 deletions(-)
create mode 100644 include/sparse/numa.h
diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
index 7b0736c96114..c4685fa7ebac 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -164,7 +164,7 @@ If a test case fails, check the log at::
Setup AF_XDP netdev
-------------------
-Before running OVS with AF_XDP, make sure the libbpf and libelf are
+Before running OVS with AF_XDP, make sure the libbpf, libelf, and libnuma are
set-up right::
ldd vswitchd/ovs-vswitchd
diff --git a/acinclude.m4 b/acinclude.m4
index 542637ac8cb8..73ed11d701aa 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -286,9 +286,12 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
AC_CHECK_FUNCS([pthread_spin_lock], [],
[AC_MSG_ERROR([unable to find pthread_spin_lock for AF_XDP support])])
+ AC_CHECK_LIB(numa, numa_alloc_onnode, [],
+ [AC_MSG_ERROR([unable to find libnuma for AF_XDP support])])
+
AC_DEFINE([HAVE_AF_XDP], [1],
[Define to 1 if AF_XDP support is available and enabled.])
- LIBBPF_LDADD=" -lbpf -lelf"
+ LIBBPF_LDADD=" -lbpf -lelf -lnuma"
AC_SUBST([LIBBPF_LDADD])
AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk
index 073631e8c082..974ad3fe55f7 100644
--- a/include/sparse/automake.mk
+++ b/include/sparse/automake.mk
@@ -5,6 +5,7 @@ noinst_HEADERS += \
include/sparse/bits/floatn.h \
include/sparse/assert.h \
include/sparse/math.h \
+ include/sparse/numa.h \
include/sparse/netinet/in.h \
include/sparse/netinet/ip6.h \
include/sparse/netpacket/packet.h \
diff --git a/include/sparse/numa.h b/include/sparse/numa.h
new file mode 100644
index 000000000000..3691a0eaf729
--- /dev/null
+++ b/include/sparse/numa.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CHECKER__
+#error "Use this header only with sparse. It is not a correct implementation."
+#endif
+
+/* Avoid sparse warning: non-ANSI function declaration of function" */
+#define numa_get_membind_compat() numa_get_membind_compat(void)
+#define numa_get_interleave_mask_compat() numa_get_interleave_mask_compat(void)
+#define numa_get_run_node_mask_compat() numa_get_run_node_mask_compat(void)
+
+/* Get actual <numa.h> definitions for us to annotate and build on. */
+#include_next<numa.h>
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 4c1f9c68270a..2d6f739b4b67 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -26,6 +26,7 @@
#include <linux/rtnetlink.h>
#include <linux/if_xdp.h>
#include <net/if.h>
+#include <numa.h>
#include <poll.h>
#include <stdlib.h>
#include <sys/resource.h>
@@ -469,7 +470,7 @@ xsk_configure_all(struct netdev *netdev)
{
struct netdev_linux *dev = netdev_linux_cast(netdev);
int i, ifindex, n_rxq, n_txq;
- int qid = 0;
+ int qid = 0, err = 0;
ifindex = linux_get_ifindex(netdev_get_name(netdev));
@@ -477,6 +478,14 @@ xsk_configure_all(struct netdev *netdev)
ovs_assert(dev->tx_locks == NULL);
n_rxq = netdev_n_rxq(netdev);
+
+ /* Allocate all the xsk related memory in the netdev's NUMA domain. */
+ struct bitmask *old_bm = numa_get_membind();
+ struct bitmask *new_bm = numa_allocate_nodemask();
+ netdev_get_numa_id(netdev);
+ numa_bitmask_setbit(new_bm, dev->numa_id);
+ numa_set_membind(new_bm);
+
dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
if (dev->xdp_mode == OVS_AF_XDP_MODE_BEST_EFFORT) {
@@ -518,11 +527,17 @@ xsk_configure_all(struct netdev *netdev)
ovs_spin_init(&dev->tx_locks[i]);
}
- return 0;
+ goto out;
err:
xsk_destroy_all(netdev);
- return EINVAL;
+ err = EINVAL;
+
+out:
+ numa_set_membind(old_bm);
+ numa_bitmask_free(old_bm);
+ numa_bitmask_free(new_bm);
+ return err;
}
static void
--
2.17.1
More information about the dev
mailing list