[ovs-dev] [PATCHv5] netdev-afxdp: Add need_wakeup supprt.

Ilya Maximets i.maximets at ovn.org
Fri Oct 18 15:14:46 UTC 2019


On 26.09.2019 21:29, William Tu wrote:
> The patch adds support for using need_wakeup flag in AF_XDP rings.
> A new option, use_need_wakeup, is added. When this option is used,
> it means that OVS has to explicitly wake up the kernel RX, using poll()
> syscall and wake up TX, using sendto() syscall. This feature improves
> the performance by avoiding unnecessary sendto syscalls for TX.
> For RX, instead of kernel always busy-spinning on fille queue, OVS wakes
> up the kernel RX processing when fill queue is replenished.
> 
> The need_wakeup feature is merged into Linux kernel bpf-next tee with commit
> 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") and
> OVS enables it by default. Running the feature before this version causes
> xsk bind fails, please use options:use_need_wakeup=false to disable it.
> If users enable it but runs in an older version of libbpf, then the
> need_wakeup feature has no effect, and a warning message is logged.
> 
> For virtual interface, it's better set use_need_wakeup=false, since
> the virtual device's AF_XDP xmit is synchronous: the sendto syscall
> enters kernel and process the TX packet on tx queue directly.
> 
> On Intel Xeon E5-2620 v3 2.4GHz system, performance of physical port
> to physical port improves from 6.1Mpps to 7.3Mpps.
> 
> Suggested-by: Ilya Maximets <i.maximets at ovn.org>
> Signed-off-by: William Tu <u9012063 at gmail.com>
> ---
> v5:
> - address feedback from Ilya
>    - update commit msg about kernel version using bpf-next tree
>    - remove __func__, and use DBG in log_
>    - fix alignment
>    - remove the kernel version requirement for need_wakeup, we can
>      wait until tag is available
> 
> v4:
> - move use_need_wakeup check inside xsk_rx_wakeup_if_needed
> 
> v3:
> - add warning when user enables it but libbpf not support it
> - revise documentation
> 
> v2:
> - address feedbacks from Ilya and Eelco
> - add options:use_need_wakeup, default to true
> - remove poll timeout=1sec, make poll() return immediately
> - naming change: rename to xsk_rx_wakeup_if_needing
> - fix indents and return value for errno
> ---
>   Documentation/intro/install/afxdp.rst |  15 ++++-
>   acinclude.m4                          |   5 ++
>   lib/netdev-afxdp.c                    | 108 ++++++++++++++++++++++++++++++----
>   lib/netdev-linux-private.h            |   2 +
>   vswitchd/vswitch.xml                  |  12 ++++
>   5 files changed, 126 insertions(+), 16 deletions(-)
> 
> diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
> index 820e9d993d8f..545516b2bbec 100644
> --- a/Documentation/intro/install/afxdp.rst
> +++ b/Documentation/intro/install/afxdp.rst
> @@ -176,9 +176,18 @@ in :doc:`general`::
>     ovs-vswitchd ...
>     ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev
>   
> -Make sure your device driver support AF_XDP, and to use 1 PMD (on core 4)
> -on 1 queue (queue 0) device, configure these options: **pmd-cpu-mask,
> -pmd-rxq-affinity, and n_rxq**. The **xdpmode** can be "drv" or "skb"::
> +Make sure your device driver support AF_XDP, netdev-afxdp supports
> +the following additional options (see man ovs-vswitchd.conf.db for
> +more details):
> +
> + * **xdpmode**: use "drv" for driver mode, or "skb" for skb mode.
> +
> + * **use_need_wakeup**: disable by setting to "false", otherwise default
> +   is "true"
> +
> +For example, to use 1 PMD (on core 4) on 1 queue (queue 0) device,
> +configure these options: **pmd-cpu-mask, pmd-rxq-affinity, and n_rxq**.
> +The **xdpmode** can be "drv" or "skb"::
>   
>     ethtool -L enp2s0 combined 1
>     ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x10
> diff --git a/acinclude.m4 b/acinclude.m4
> index f0e38898b17a..f1ecb86adf7c 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -276,6 +276,11 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
>                 [Define to 1 if AF_XDP support is available and enabled.])
>       LIBBPF_LDADD=" -lbpf -lelf"
>       AC_SUBST([LIBBPF_LDADD])
> +
> +    AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
> +      AC_DEFINE([HAVE_XDP_NEED_WAKEUP], [1],
> +        [XDP need wakeup support detected in xsk.h.])
> +    ], [], [[#include <bpf/xsk.h>]])
>     fi
>     AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true)
>   ])
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 6e01803272aa..fee9413bfd0a 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -26,6 +26,7 @@
>   #include <linux/rtnetlink.h>
>   #include <linux/if_xdp.h>
>   #include <net/if.h>
> +#include <poll.h>
>   #include <stdlib.h>
>   #include <sys/resource.h>
>   #include <sys/socket.h>
> @@ -82,7 +83,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
>   #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
>   
>   static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
> -                                             int mode);
> +                                             int mode, bool use_need_wakeup);
>   static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
>   static void xsk_destroy(struct xsk_socket_info *xsk);
>   static int xsk_configure_all(struct netdev *netdev);
> @@ -117,6 +118,54 @@ struct xsk_socket_info {
>       atomic_uint64_t tx_dropped;
>   };
>   
> +#ifdef HAVE_XDP_NEED_WAKEUP
> +static inline void
> +xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
> +                        struct netdev *netdev, int fd)
> +{
> +    struct netdev_linux *dev = netdev_linux_cast(netdev);
> +    struct pollfd pfd;
> +    int ret;
> +
> +    if (!dev->use_need_wakeup) {
> +        return;
> +    }
> +
> +    if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
> +        pfd.fd = fd;
> +        pfd.events = POLLIN;
> +
> +        ret = poll(&pfd, 1, 0);
> +        if (OVS_UNLIKELY(ret < 0)) {
> +            VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.",
> +                         netdev_get_name(netdev),
> +                         ovs_strerror(errno));
> +        }
> +    }
> +}
> +
> +static inline bool
> +xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
> +{
> +    return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
> +}
> +
> +#else /* !HAVE_XDP_NEED_WAKEUP */
> +static inline void
> +xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED,
> +                        struct netdev *netdev OVS_UNUSED,
> +                        int fd OVS_UNUSED)
> +{
> +    /* Nothing. */
> +}
> +
> +static inline bool
> +xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
> +{
> +    return true;
> +}
> +#endif /* HAVE_XDP_NEED_WAKEUP */
> +
>   static void
>   netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
>   {
> @@ -234,7 +283,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
>   
>   static struct xsk_socket_info *
>   xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
> -                     uint32_t queue_id, int xdpmode)
> +                     uint32_t queue_id, int xdpmode, bool use_need_wakeup)
>   {
>       struct xsk_socket_config cfg;
>       struct xsk_socket_info *xsk;
> @@ -257,6 +306,12 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
>           cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
>       }
>   
> +    if (use_need_wakeup) {
> +#ifdef HAVE_XDP_NEED_WAKEUP
> +        cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
> +#endif
> +    }
> +
>       if (if_indextoname(ifindex, devname) == NULL) {
>           VLOG_ERR("ifindex %d to devname failed (%s)",
>                    ifindex, ovs_strerror(errno));
> @@ -267,9 +322,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
>       ret = xsk_socket__create(&xsk->xsk, devname, queue_id, umem->umem,
>                                &xsk->rx, &xsk->tx, &cfg);
>       if (ret) {
> -        VLOG_ERR("xsk_socket__create failed (%s) mode: %s qid: %d",
> +        VLOG_ERR("xsk_socket__create failed (%s) mode: %s "
> +                 "use_need_wakeup: %s qid: %d",
>                    ovs_strerror(errno),
>                    xdpmode == XDP_COPY ? "SKB": "DRV",
> +                 use_need_wakeup ? "true" : "false",
>                    queue_id);
>           free(xsk);
>           return NULL;
> @@ -311,7 +368,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
>   }
>   
>   static struct xsk_socket_info *
> -xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
> +xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
> +              bool use_need_wakeup)
>   {
>       struct xsk_socket_info *xsk;
>       struct xsk_umem_info *umem;
> @@ -334,7 +392,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
>   
>       VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
>   
> -    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
> +    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
> +                               use_need_wakeup);
>       if (!xsk) {
>           /* Clean up umem and xpacket pool. */
>           if (xsk_umem__delete(umem->umem)) {
> @@ -365,9 +424,12 @@ xsk_configure_all(struct netdev *netdev)
>   
>       /* Configure each queue. */
>       for (i = 0; i < n_rxq; i++) {
> -        VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
> -                  dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
> -        xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
> +        VLOG_DBG("%s: configure queue %d mode %s use_need_wakeup %s.",
> +                 netdev_get_name(netdev), i,
> +                 dev->xdpmode == XDP_COPY ? "SKB" : "DRV",
> +                 dev->use_need_wakeup ? "true" : "false");
> +        xsk_info = xsk_configure(ifindex, i, dev->xdpmode,
> +                                 dev->use_need_wakeup);
>           if (!xsk_info) {
>               VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i);
>               dev->xsks[i] = NULL;
> @@ -459,6 +521,7 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
>       struct netdev_linux *dev = netdev_linux_cast(netdev);
>       const char *str_xdpmode;
>       int xdpmode, new_n_rxq;
> +    bool need_wakeup;
>   
>       ovs_mutex_lock(&dev->mutex);
>       new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
> @@ -481,10 +544,19 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
>           return EINVAL;
>       }
>   
> +    need_wakeup = smap_get_bool(args, "use_need_wakeup", true);
> +    if (need_wakeup) {
> +#ifndef HAVE_XDP_NEED_WAKEUP
> +        VLOG_WARN("XDP need_wakeup is not supported in libbpf.");

This warning breaks all the tests on system without need_wakeup support.
It seems better to enable this feature by default only if supported by libbpf.

One more thing that we need to actually drop need_wakeup to false here,
otherwise get_config() and some log messages will report wrong information.

Suggesting following incremental:

diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index a516cc727..60259bdd3 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -68,6 +68,12 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
  #define PROD_NUM_DESCS      XSK_RING_PROD__DEFAULT_NUM_DESCS
  #define CONS_NUM_DESCS      XSK_RING_CONS__DEFAULT_NUM_DESCS
  
+#ifdef HAVE_XDP_NEED_WAKEUP
+#define NEED_WAKEUP_DEFAULT true
+#else
+#define NEED_WAKEUP_DEFAULT false
+#endif
+
  /* The worst case is all 4 queues TX/CQ/RX/FILL are full + some packets
   * still on processing in threads. Number of packets currently in OVS
   * processing is hard to estimate because it depends on number of ports.
@@ -307,11 +313,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
          cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
      }
  
-    if (use_need_wakeup) {
  #ifdef HAVE_XDP_NEED_WAKEUP
+    if (use_need_wakeup) {
          cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
-#endif
      }
+#endif
  
      if (if_indextoname(ifindex, devname) == NULL) {
          VLOG_ERR("ifindex %d to devname failed (%s)",
@@ -545,12 +551,13 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
          return EINVAL;
      }
  
-    need_wakeup = smap_get_bool(args, "use_need_wakeup", true);
-    if (need_wakeup) {
+    need_wakeup = smap_get_bool(args, "use_need_wakeup", NEED_WAKEUP_DEFAULT);
  #ifndef HAVE_XDP_NEED_WAKEUP
+    if (need_wakeup) {
          VLOG_WARN("XDP need_wakeup is not supported in libbpf.");
-#endif
+        need_wakeup = false;
      }
+#endif
  
      if (dev->requested_n_rxq != new_n_rxq
          || dev->requested_xdpmode != xdpmode
@@ -1049,7 +1056,7 @@ netdev_afxdp_construct(struct netdev *netdev)
  
      dev->requested_n_rxq = NR_QUEUE;
      dev->requested_xdpmode = XDP_COPY;
-    dev->requested_need_wakeup = true;
+    dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT;
  
      dev->xsks = NULL;
      dev->tx_locks = NULL;
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 24d061891..decccfe84 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3130,7 +3130,7 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
            syscall and wakes up TX, using sendto() syscall. For physical
            devices, this feature improves the performance by avoiding
            unnecessary sendto syscalls.
-          Defaults is true.
+          Defaults to true if supported by libbpf.
          </p>
        </column>
  
---

Some updates about default value also will be needed in commit message and,
probably, in afxdp docs.

Best regards, Ilya Maximets.


More information about the dev mailing list