[ovs-dev] [PATCH v7 2/2] netdev-afxdp: NUMA-aware memory allocation for XSK related memory
Eelco Chaudron
echaudro at redhat.com
Wed Dec 18 08:24:28 UTC 2019
On 17 Dec 2019, at 23:27, Yi-Hung Wei wrote:
> Currently, the AF_XDP socket (XSK) related memory are allocated by
> main
> thread in the main thread's NUMA domain. With the patch that detects
> netdev-linux's NUMA node id, the PMD thread of AF_XDP port will be run
> on
> the AF_XDP netdev's NUMA domain. If the net device's NUMA domain
> is different from the main thread's NUMA domain, we will have two
> cross-NUMA memory accesses (netdev <-> memory, memory <-> CPU).
>
> This patch addresses the aforementioned issue by allocating
> the memory in the net device's NUMA domain.
Did not try the patch yet, but it looks good. One quick comment below…
> Signed-off-by: Yi-Hung Wei <yihung.wei at gmail.com>
> ---
> Documentation/intro/install/afxdp.rst | 2 +-
> acinclude.m4 | 5 ++++-
> include/sparse/automake.mk | 1 +
> include/sparse/numa.h | 27
> +++++++++++++++++++++++++++
> lib/netdev-afxdp.c | 21 ++++++++++++++++++---
> 5 files changed, 51 insertions(+), 5 deletions(-)
> create mode 100644 include/sparse/numa.h
>
> diff --git a/Documentation/intro/install/afxdp.rst
> b/Documentation/intro/install/afxdp.rst
> index 7b0736c96114..c4685fa7ebac 100644
> --- a/Documentation/intro/install/afxdp.rst
> +++ b/Documentation/intro/install/afxdp.rst
> @@ -164,7 +164,7 @@ If a test case fails, check the log at::
>
> Setup AF_XDP netdev
> -------------------
> -Before running OVS with AF_XDP, make sure the libbpf and libelf are
> +Before running OVS with AF_XDP, make sure the libbpf, libelf, and
> libnuma are
> set-up right::
>
> ldd vswitchd/ovs-vswitchd
> diff --git a/acinclude.m4 b/acinclude.m4
> index 542637ac8cb8..73ed11d701aa 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -286,9 +286,12 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
> AC_CHECK_FUNCS([pthread_spin_lock], [],
> [AC_MSG_ERROR([unable to find pthread_spin_lock for AF_XDP
> support])])
>
> + AC_CHECK_LIB(numa, numa_alloc_onnode, [],
> + [AC_MSG_ERROR([unable to find libnuma for AF_XDP support])])
> +
> AC_DEFINE([HAVE_AF_XDP], [1],
> [Define to 1 if AF_XDP support is available and
> enabled.])
> - LIBBPF_LDADD=" -lbpf -lelf"
> + LIBBPF_LDADD=" -lbpf -lelf -lnuma"
> AC_SUBST([LIBBPF_LDADD])
>
> AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
> diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk
> index 073631e8c082..974ad3fe55f7 100644
> --- a/include/sparse/automake.mk
> +++ b/include/sparse/automake.mk
> @@ -5,6 +5,7 @@ noinst_HEADERS += \
> include/sparse/bits/floatn.h \
> include/sparse/assert.h \
> include/sparse/math.h \
> + include/sparse/numa.h \
> include/sparse/netinet/in.h \
> include/sparse/netinet/ip6.h \
> include/sparse/netpacket/packet.h \
> diff --git a/include/sparse/numa.h b/include/sparse/numa.h
> new file mode 100644
> index 000000000000..3691a0eaf729
> --- /dev/null
> +++ b/include/sparse/numa.h
> @@ -0,0 +1,27 @@
> +/*
> + * Copyright (c) 2019 Nicira, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions
> and
> + * limitations under the License.
> + */
> +
> +#ifndef __CHECKER__
> +#error "Use this header only with sparse. It is not a correct
> implementation."
> +#endif
> +
> +/* Avoid sparse warning: non-ANSI function declaration of function"
> */
> +#define numa_get_membind_compat() numa_get_membind_compat(void)
> +#define numa_get_interleave_mask_compat()
> numa_get_interleave_mask_compat(void)
> +#define numa_get_run_node_mask_compat()
> numa_get_run_node_mask_compat(void)
> +
> +/* Get actual <numa.h> definitions for us to annotate and build on.
> */
> +#include_next<numa.h>
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 4c1f9c68270a..2d6f739b4b67 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -26,6 +26,7 @@
> #include <linux/rtnetlink.h>
> #include <linux/if_xdp.h>
> #include <net/if.h>
> +#include <numa.h>
> #include <poll.h>
> #include <stdlib.h>
> #include <sys/resource.h>
> @@ -469,7 +470,7 @@ xsk_configure_all(struct netdev *netdev)
> {
> struct netdev_linux *dev = netdev_linux_cast(netdev);
> int i, ifindex, n_rxq, n_txq;
> - int qid = 0;
> + int qid = 0, err = 0;
>
> ifindex = linux_get_ifindex(netdev_get_name(netdev));
>
> @@ -477,6 +478,14 @@ xsk_configure_all(struct netdev *netdev)
> ovs_assert(dev->tx_locks == NULL);
>
> n_rxq = netdev_n_rxq(netdev);
> +
> + /* Allocate all the xsk related memory in the netdev's NUMA
> domain. */
> + struct bitmask *old_bm = numa_get_membind();
> + struct bitmask *new_bm = numa_allocate_nodemask();
> + netdev_get_numa_id(netdev);
> + numa_bitmask_setbit(new_bm, dev->numa_id);
Rather than access the device specific structure, why not use the return
value?
numa_bitmask_setbit(new_bm, netdev_get_numa_id(netdev));
> + numa_set_membind(new_bm);
> +
> dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
>
> if (dev->xdp_mode == OVS_AF_XDP_MODE_BEST_EFFORT) {
> @@ -518,11 +527,17 @@ xsk_configure_all(struct netdev *netdev)
> ovs_spin_init(&dev->tx_locks[i]);
> }
>
> - return 0;
> + goto out;
>
> err:
> xsk_destroy_all(netdev);
> - return EINVAL;
> + err = EINVAL;
> +
> +out:
> + numa_set_membind(old_bm);
> + numa_bitmask_free(old_bm);
> + numa_bitmask_free(new_bm);
> + return err;
> }
>
> static void
> --
> 2.17.1
More information about the dev
mailing list