[ovs-dev] [PATCH 2/2] netdev-afxdp: Add need_wakeup supprt.

Ilya Maximets i.maximets at samsung.com
Wed Sep 4 12:19:29 UTC 2019


Hi William,

Thanks for the patch!
One general comment is that we, probably, should make this feature
configurable.  There are 2 reasons:

1. We might want to run OVS on older kernel while building with more
   recent libbpf. In this case socket creation will fail due to
   unsupported flag and we'll not be able to make it work.
2. Performance impact is not always positive. It depends on the
   number of available CPU cores, port types (phisical or virtual),
   interrupts affinity.  And this was proved by test results from
   Eelco.  So, it might be good to have control over the enabling/
   disabling the feature.

However, I think that it's better to keep it enabled by default.

Some comments inline.

Best regards, Ilya Maximets.

On 27.08.2019 2:02, William Tu wrote:
> The patch adds support for using need_wakeup flag in AF_XDP rings.
> When this flag is used, it means that OVS has to explicitly wake
> up the kernel RX, using poll() syscall and wake up TX, using sendto()
> syscall. This feature improves the performance by avoiding unnecessary
> syscalls, so keeping more CPU time in userspace to process packets.
> 
> On Intel Xeon E5-2620 v3 2.4GHz system, performance of physical port
> to physical port improves from 6.1Mpps to 7.3Mpps.
> 
> Suggested-by: Eelco Chaudron <echaudro at redhat.com>

Wasn't it me? :) Just curious.

> Signed-off-by: William Tu <u9012063 at gmail.com>
> ---
>  acinclude.m4       |  5 +++++
>  lib/netdev-afxdp.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/acinclude.m4 b/acinclude.m4
> index 116ffcf9096d..8821b821ec3c 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -276,6 +276,11 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
>                [Define to 1 if AF_XDP support is available and enabled.])
>      LIBBPF_LDADD=" -lbpf -lelf"
>      AC_SUBST([LIBBPF_LDADD])
> +
> +    AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
> +      AC_DEFINE([HAVE_XDP_NEED_WAKEUP], [1],
> +        [XDP need wakeup support detected in xsk.h.])
> +    ], [], [#include <bpf/xsk.h>])
>    fi
>    AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true)
>  ])
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index e5b058d08a09..d14d100e8fa3 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -26,6 +26,7 @@
>  #include <linux/rtnetlink.h>
>  #include <linux/if_xdp.h>
>  #include <net/if.h>
> +#include <poll.h>
>  #include <stdlib.h>
>  #include <sys/resource.h>
>  #include <sys/socket.h>
> @@ -117,6 +118,48 @@ struct xsk_socket_info {
>      atomic_uint64_t tx_dropped;
>  };
>  
> +#ifdef HAVE_XDP_NEED_WAKEUP
> +static inline void
> +xsk_rx_need_wakeup(struct xsk_umem_info *umem,

Function name is misleading, because it actually tries to wake
rx up, not just checking. Something like 'xsk_rx_wakeup' or
'xsk_rx_wakeup_if_needed' might be more suitable.
Naming suggestions are welcome.

> +                   struct netdev *netdev, int fd) {
> +    struct pollfd pfd;
> +    int ret;
> +
> +    if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
> +        pfd.fd = fd;
> +        pfd.events = POLLIN;
> +
> +        ret = poll(&pfd, 1, 1000);

Agree with Eelco that we shouldn't pass 1000 there. Looks dangerous.

> +        if (OVS_UNLIKELY(ret == 0)) {
> +            VLOG_WARN_RL(&rl, "%s: poll rx fd timeout.",
> +                    netdev_get_name(netdev));

Indents are off here and below. Please, align arguments to the next
symbol after '('.

> +        } else if (OVS_UNLIKELY(ret < 0)) {
> +            VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.",
> +                    netdev_get_name(netdev),
> +                    ovs_strerror(ret));

'ret' is always -1 on failure. errno should be there.

> +        }
> +    }
> +}
> +
> +static inline bool
> +xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
> +{
> +    return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
> +}
> +#else
> +static inline void
> +xsk_rx_need_wakeup(struct xsk_umem_info *umem OVS_UNUSED,
> +                   struct netdev *netdev OVS_UNUSED, int fd OVS_UNUSED) {
> +    /* Nothing. */
> +}
> +
> +static inline bool
> +xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
> +{
> +    return true;
> +}
> +#endif /* HAVE_XDP_NEED_WAKEUP */
> +
>  static void
>  netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
>  {
> @@ -257,6 +300,10 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
>          cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
>      }
>  
> +#ifdef HAVE_XDP_NEED_WAKEUP
> +    cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
> +#endif
> +
>      if (if_indextoname(ifindex, devname) == NULL) {
>          VLOG_ERR("ifindex %d to devname failed (%s)",
>                   ifindex, ovs_strerror(errno));
> @@ -660,6 +707,7 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
>  
>      rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
>      if (!rcvd) {
> +        xsk_rx_need_wakeup(umem, netdev, rx->fd);
>          return EAGAIN;
>      }
>  
> @@ -709,6 +757,9 @@ kick_tx(struct xsk_socket_info *xsk_info, int xdpmode)
>      int ret, retries;
>      static const int KERNEL_TX_BATCH_SIZE = 16;
>  
> +    if (!xsk_tx_need_wakeup(xsk_info)) {
> +        return 0;
> +    }
>      /* In SKB_MODE packet transmission is synchronous, and the kernel xmits
>       * only TX_BATCH_SIZE(16) packets for a single sendmsg syscall.
>       * So, we have to kick the kernel (n_packets / 16) times to be sure that
> 


More information about the dev mailing list