[ovs-dev] [PATCHv5] netdev-afxdp: Add need_wakeup supprt.
Ilya Maximets
i.maximets at ovn.org
Fri Oct 18 15:14:46 UTC 2019
On 26.09.2019 21:29, William Tu wrote:
> The patch adds support for using need_wakeup flag in AF_XDP rings.
> A new option, use_need_wakeup, is added. When this option is used,
> it means that OVS has to explicitly wake up the kernel RX, using poll()
> syscall and wake up TX, using sendto() syscall. This feature improves
> the performance by avoiding unnecessary sendto syscalls for TX.
> For RX, instead of kernel always busy-spinning on fille queue, OVS wakes
> up the kernel RX processing when fill queue is replenished.
>
> The need_wakeup feature is merged into Linux kernel bpf-next tee with commit
> 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") and
> OVS enables it by default. Running the feature before this version causes
> xsk bind fails, please use options:use_need_wakeup=false to disable it.
> If users enable it but runs in an older version of libbpf, then the
> need_wakeup feature has no effect, and a warning message is logged.
>
> For virtual interface, it's better set use_need_wakeup=false, since
> the virtual device's AF_XDP xmit is synchronous: the sendto syscall
> enters kernel and process the TX packet on tx queue directly.
>
> On Intel Xeon E5-2620 v3 2.4GHz system, performance of physical port
> to physical port improves from 6.1Mpps to 7.3Mpps.
>
> Suggested-by: Ilya Maximets <i.maximets at ovn.org>
> Signed-off-by: William Tu <u9012063 at gmail.com>
> ---
> v5:
> - address feedback from Ilya
> - update commit msg about kernel version using bpf-next tree
> - remove __func__, and use DBG in log_
> - fix alignment
> - remove the kernel version requirement for need_wakeup, we can
> wait until tag is available
>
> v4:
> - move use_need_wakeup check inside xsk_rx_wakeup_if_needed
>
> v3:
> - add warning when user enables it but libbpf not support it
> - revise documentation
>
> v2:
> - address feedbacks from Ilya and Eelco
> - add options:use_need_wakeup, default to true
> - remove poll timeout=1sec, make poll() return immediately
> - naming change: rename to xsk_rx_wakeup_if_needing
> - fix indents and return value for errno
> ---
> Documentation/intro/install/afxdp.rst | 15 ++++-
> acinclude.m4 | 5 ++
> lib/netdev-afxdp.c | 108 ++++++++++++++++++++++++++++++----
> lib/netdev-linux-private.h | 2 +
> vswitchd/vswitch.xml | 12 ++++
> 5 files changed, 126 insertions(+), 16 deletions(-)
>
> diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
> index 820e9d993d8f..545516b2bbec 100644
> --- a/Documentation/intro/install/afxdp.rst
> +++ b/Documentation/intro/install/afxdp.rst
> @@ -176,9 +176,18 @@ in :doc:`general`::
> ovs-vswitchd ...
> ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev
>
> -Make sure your device driver support AF_XDP, and to use 1 PMD (on core 4)
> -on 1 queue (queue 0) device, configure these options: **pmd-cpu-mask,
> -pmd-rxq-affinity, and n_rxq**. The **xdpmode** can be "drv" or "skb"::
> +Make sure your device driver support AF_XDP, netdev-afxdp supports
> +the following additional options (see man ovs-vswitchd.conf.db for
> +more details):
> +
> + * **xdpmode**: use "drv" for driver mode, or "skb" for skb mode.
> +
> + * **use_need_wakeup**: disable by setting to "false", otherwise default
> + is "true"
> +
> +For example, to use 1 PMD (on core 4) on 1 queue (queue 0) device,
> +configure these options: **pmd-cpu-mask, pmd-rxq-affinity, and n_rxq**.
> +The **xdpmode** can be "drv" or "skb"::
>
> ethtool -L enp2s0 combined 1
> ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x10
> diff --git a/acinclude.m4 b/acinclude.m4
> index f0e38898b17a..f1ecb86adf7c 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -276,6 +276,11 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
> [Define to 1 if AF_XDP support is available and enabled.])
> LIBBPF_LDADD=" -lbpf -lelf"
> AC_SUBST([LIBBPF_LDADD])
> +
> + AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
> + AC_DEFINE([HAVE_XDP_NEED_WAKEUP], [1],
> + [XDP need wakeup support detected in xsk.h.])
> + ], [], [[#include <bpf/xsk.h>]])
> fi
> AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true)
> ])
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 6e01803272aa..fee9413bfd0a 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -26,6 +26,7 @@
> #include <linux/rtnetlink.h>
> #include <linux/if_xdp.h>
> #include <net/if.h>
> +#include <poll.h>
> #include <stdlib.h>
> #include <sys/resource.h>
> #include <sys/socket.h>
> @@ -82,7 +83,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
> #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
>
> static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
> - int mode);
> + int mode, bool use_need_wakeup);
> static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
> static void xsk_destroy(struct xsk_socket_info *xsk);
> static int xsk_configure_all(struct netdev *netdev);
> @@ -117,6 +118,54 @@ struct xsk_socket_info {
> atomic_uint64_t tx_dropped;
> };
>
> +#ifdef HAVE_XDP_NEED_WAKEUP
> +static inline void
> +xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
> + struct netdev *netdev, int fd)
> +{
> + struct netdev_linux *dev = netdev_linux_cast(netdev);
> + struct pollfd pfd;
> + int ret;
> +
> + if (!dev->use_need_wakeup) {
> + return;
> + }
> +
> + if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
> + pfd.fd = fd;
> + pfd.events = POLLIN;
> +
> + ret = poll(&pfd, 1, 0);
> + if (OVS_UNLIKELY(ret < 0)) {
> + VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.",
> + netdev_get_name(netdev),
> + ovs_strerror(errno));
> + }
> + }
> +}
> +
> +static inline bool
> +xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
> +{
> + return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
> +}
> +
> +#else /* !HAVE_XDP_NEED_WAKEUP */
> +static inline void
> +xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED,
> + struct netdev *netdev OVS_UNUSED,
> + int fd OVS_UNUSED)
> +{
> + /* Nothing. */
> +}
> +
> +static inline bool
> +xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
> +{
> + return true;
> +}
> +#endif /* HAVE_XDP_NEED_WAKEUP */
> +
> static void
> netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
> {
> @@ -234,7 +283,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
>
> static struct xsk_socket_info *
> xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
> - uint32_t queue_id, int xdpmode)
> + uint32_t queue_id, int xdpmode, bool use_need_wakeup)
> {
> struct xsk_socket_config cfg;
> struct xsk_socket_info *xsk;
> @@ -257,6 +306,12 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
> cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
> }
>
> + if (use_need_wakeup) {
> +#ifdef HAVE_XDP_NEED_WAKEUP
> + cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
> +#endif
> + }
> +
> if (if_indextoname(ifindex, devname) == NULL) {
> VLOG_ERR("ifindex %d to devname failed (%s)",
> ifindex, ovs_strerror(errno));
> @@ -267,9 +322,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
> ret = xsk_socket__create(&xsk->xsk, devname, queue_id, umem->umem,
> &xsk->rx, &xsk->tx, &cfg);
> if (ret) {
> - VLOG_ERR("xsk_socket__create failed (%s) mode: %s qid: %d",
> + VLOG_ERR("xsk_socket__create failed (%s) mode: %s "
> + "use_need_wakeup: %s qid: %d",
> ovs_strerror(errno),
> xdpmode == XDP_COPY ? "SKB": "DRV",
> + use_need_wakeup ? "true" : "false",
> queue_id);
> free(xsk);
> return NULL;
> @@ -311,7 +368,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
> }
>
> static struct xsk_socket_info *
> -xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
> +xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
> + bool use_need_wakeup)
> {
> struct xsk_socket_info *xsk;
> struct xsk_umem_info *umem;
> @@ -334,7 +392,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
>
> VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
>
> - xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
> + xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
> + use_need_wakeup);
> if (!xsk) {
> /* Clean up umem and xpacket pool. */
> if (xsk_umem__delete(umem->umem)) {
> @@ -365,9 +424,12 @@ xsk_configure_all(struct netdev *netdev)
>
> /* Configure each queue. */
> for (i = 0; i < n_rxq; i++) {
> - VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
> - dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
> - xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
> + VLOG_DBG("%s: configure queue %d mode %s use_need_wakeup %s.",
> + netdev_get_name(netdev), i,
> + dev->xdpmode == XDP_COPY ? "SKB" : "DRV",
> + dev->use_need_wakeup ? "true" : "false");
> + xsk_info = xsk_configure(ifindex, i, dev->xdpmode,
> + dev->use_need_wakeup);
> if (!xsk_info) {
> VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i);
> dev->xsks[i] = NULL;
> @@ -459,6 +521,7 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
> struct netdev_linux *dev = netdev_linux_cast(netdev);
> const char *str_xdpmode;
> int xdpmode, new_n_rxq;
> + bool need_wakeup;
>
> ovs_mutex_lock(&dev->mutex);
> new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
> @@ -481,10 +544,19 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
> return EINVAL;
> }
>
> + need_wakeup = smap_get_bool(args, "use_need_wakeup", true);
> + if (need_wakeup) {
> +#ifndef HAVE_XDP_NEED_WAKEUP
> + VLOG_WARN("XDP need_wakeup is not supported in libbpf.");
This warning breaks all the tests on system without need_wakeup support.
It seems better to enable this feature by default only if supported by libbpf.
One more thing that we need to actually drop need_wakeup to false here,
otherwise get_config() and some log messages will report wrong information.
Suggesting following incremental:
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index a516cc727..60259bdd3 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -68,6 +68,12 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
#define PROD_NUM_DESCS XSK_RING_PROD__DEFAULT_NUM_DESCS
#define CONS_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS
+#ifdef HAVE_XDP_NEED_WAKEUP
+#define NEED_WAKEUP_DEFAULT true
+#else
+#define NEED_WAKEUP_DEFAULT false
+#endif
+
/* The worst case is all 4 queues TX/CQ/RX/FILL are full + some packets
* still on processing in threads. Number of packets currently in OVS
* processing is hard to estimate because it depends on number of ports.
@@ -307,11 +313,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
}
- if (use_need_wakeup) {
#ifdef HAVE_XDP_NEED_WAKEUP
+ if (use_need_wakeup) {
cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
-#endif
}
+#endif
if (if_indextoname(ifindex, devname) == NULL) {
VLOG_ERR("ifindex %d to devname failed (%s)",
@@ -545,12 +551,13 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
return EINVAL;
}
- need_wakeup = smap_get_bool(args, "use_need_wakeup", true);
- if (need_wakeup) {
+ need_wakeup = smap_get_bool(args, "use_need_wakeup", NEED_WAKEUP_DEFAULT);
#ifndef HAVE_XDP_NEED_WAKEUP
+ if (need_wakeup) {
VLOG_WARN("XDP need_wakeup is not supported in libbpf.");
-#endif
+ need_wakeup = false;
}
+#endif
if (dev->requested_n_rxq != new_n_rxq
|| dev->requested_xdpmode != xdpmode
@@ -1049,7 +1056,7 @@ netdev_afxdp_construct(struct netdev *netdev)
dev->requested_n_rxq = NR_QUEUE;
dev->requested_xdpmode = XDP_COPY;
- dev->requested_need_wakeup = true;
+ dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT;
dev->xsks = NULL;
dev->tx_locks = NULL;
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 24d061891..decccfe84 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3130,7 +3130,7 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
syscall and wakes up TX, using sendto() syscall. For physical
devices, this feature improves the performance by avoiding
unnecessary sendto syscalls.
- Defaults is true.
+ Defaults to true if supported by libbpf.
</p>
</column>
---
Some updates about default value also will be needed in commit message and,
probably, in afxdp docs.
Best regards, Ilya Maximets.
More information about the dev
mailing list