[ovs-dev] [PATCHv2] netdev-afxdp: Enable loading XDP program.

William Tu u9012063 at gmail.com
Wed Oct 30 03:29:46 UTC 2019


Hi Eelco,

Thanks for your review.

On Tue, Oct 29, 2019 at 8:28 AM Eelco Chaudron <echaudro at redhat.com> wrote:
>
> See comments inline…
>
> On 5 Oct 2019, at 2:12, William Tu wrote:
>
> > Now netdev-afxdp always forwards all packets to userspace because
> > it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'.
> > There are some cases when users want to keep packets in kernel instead
> > of sending to userspace, for example, management traffic such as SSH
> > should be processed in kernel.
> >
> > The patch enables loading the user-provide XDP program by doing
> >   $ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>
>
> Should we maybe change the name to xdp-obj?

Yes, will fix it next version.

>
> > So users can implement their filtering logic or traffic steering idea
> > in their XDP program, and rest of the traffic passes to AF_XDP socket
> > handled by OVS.
> >
> > Signed-off-by: William Tu <u9012063 at gmail.com>
> > ---
> > v2:
> >     A couple fixes and remove RFC
> > ---
> >  Documentation/intro/install/afxdp.rst |  46 ++++++++++++
> >  lib/netdev-afxdp.c                    | 137
> > ++++++++++++++++++++++++++++++----
> >  lib/netdev-linux-private.h            |   2 +
> >  3 files changed, 169 insertions(+), 16 deletions(-)
> >
> > diff --git a/Documentation/intro/install/afxdp.rst
> > b/Documentation/intro/install/afxdp.rst
> > index 820e9d993d8f..f484bdb8249b 100644
> > --- a/Documentation/intro/install/afxdp.rst
> > +++ b/Documentation/intro/install/afxdp.rst
> > @@ -265,6 +265,52 @@ Or, use OVS pmd tool::
> >    ovs-appctl dpif-netdev/pmd-stats-show
> >
> >
> > +Loading Custom XDP Program
> > +--------------------------
> > +By defailt, netdev-afxdp always forwards all packets to userspace
> > because
> > +it is using libbpf's default XDP program. There are some cases when
> > users
> > +want to keep packets in kernel instead of sending to userspace, for
> > example,
> > +management traffic such as SSH should be processed in kernel. This
> > can be
> > +done by loading the user-provided XDP program::
> > +
> > +  ovs-vsctl -- set int afxdp-p0 options:xdpobj=<path/to/xdp/obj>
> > +
> > +So users can implement their filtering logic or traffic steering idea
> > +in their XDP program, and rest of the traffic passes to AF_XDP socket
> > +handled by OVS. Below is a sample C program compiled under kernel's
> > +samples/bpf/.
> > +
> > +.. code-block:: c
> > +
> > +  #include <uapi/linux/bpf.h>
> > +  #include "bpf_helpers.h"
> > +
> > +  /* OVS will associate map 'xsks_map' to xsk socket. */
> > +  struct bpf_map_def SEC("maps") xsks_map = {
> > +      .type = BPF_MAP_TYPE_XSKMAP,
> > +      .key_size = sizeof(int),
> > +      .value_size = sizeof(int),
> > +      .max_entries = 32,
> > +  };
> > +
>
> Maybe something like this could be added to make sure it compiles with
> older kernels:
>
> #if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0)
>

Good point, I will add it.

> /* Kernel version before 5.3 needed an additional map */
> struct bpf_map_def SEC("maps") qidconf_map = {
>         .type = BPF_MAP_TYPE_ARRAY,
>         .key_size = sizeof(int),
>         .value_size = sizeof(int),
>         .max_entries = 64,
> };
> #endi
>
> See also here:
> https://github.com/chaudron/xdp-tutorial/blob/master/advanced03-AF_XDP/af_xdp_kern.c
>
> > +  SEC("xdp_sock")
> > +  int xdp_sock_prog(struct xdp_md *ctx)
> > +  {
> > +      int index = ctx->rx_queue_index;
> > +
> > +      /* Customized by user.
> > +       * For example
> > +       * 1) filter out all SSH traffic and return XDP_PASS
> > +       *    for kernel to process.
> > +       * 2) Drop unwanted packet by returning XDP_DROP.
> > +       */
> > +
> > +      /* Rest of packets goes to AF_XDP. */
> > +      return bpf_redirect_map(&xsks_map, index, 0);
> > +  }
> > +  char _license[] SEC("license") = "GPL";
> > +
> > +
> >  Example Script
> >  --------------
> >
> > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> > index 6e01803272aa..231d6473fb67 100644
> > --- a/lib/netdev-afxdp.c
> > +++ b/lib/netdev-afxdp.c
> > @@ -21,6 +21,7 @@
> >  #include "netdev-afxdp.h"
> >  #include "netdev-afxdp-pool.h"
> >
> > +#include <bpf/bpf.h>
> >  #include <errno.h>
> >  #include <inttypes.h>
> >  #include <linux/rtnetlink.h>
> > @@ -29,6 +30,7 @@
> >  #include <stdlib.h>
> >  #include <sys/resource.h>
> >  #include <sys/socket.h>
> > +#include <sys/stat.h>
> >  #include <sys/types.h>
> >  #include <unistd.h>
> >
> > @@ -82,7 +84,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
> >  #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char
> > *)base))
> >
> >  static struct xsk_socket_info *xsk_configure(int ifindex, int
> > xdp_queue_id,
> > -                                             int mode);
> > +                                             int mode, bool
> > use_default_xdp);
> >  static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
> >  static void xsk_destroy(struct xsk_socket_info *xsk);
> >  static int xsk_configure_all(struct netdev *netdev);
> > @@ -158,6 +160,51 @@ netdev_afxdp_sweep_unused_pools(void *aux
> > OVS_UNUSED)
> >      ovs_mutex_unlock(&unused_pools_mutex);
> >  }
> >
> > +static int
> > +xsk_load_prog(const char *path, struct bpf_object **obj,
> > +              int *prog_fd)
> > +{
> > +    struct bpf_prog_load_attr attr = {
> > +        .prog_type = BPF_PROG_TYPE_XDP,
> > +        .file = path,
> > +    };
> > +
> > +    if (bpf_prog_load_xattr(&attr, obj, prog_fd)) {
> > +        VLOG_WARN("Can't load XDP program at '%s'", path);
>
> I think it should be a VLOG_ERR()

OK

>
> > +        return EINVAL;
> > +    }
> > +
> > +    VLOG_INFO("Loaded XDP program at '%s'", path);
> > +    return 0;
> > +}
> > +
> > +static int
> > +xsk_configure_map(struct bpf_object *obj, struct xsk_socket_info
> > *xsk,
> > +                  int queue_id)
> > +{
> > +    struct bpf_map *map;
> > +    int xsks_map_fd;
> > +    int xsk_fd;
> > +    int ret;
> > +
> > +    map = bpf_object__find_map_by_name(obj, "xsks_map");
> > +    if (!map) {
> > +        VLOG_WARN("XDP map 'xsks_map' not found");
> > +        return EINVAL;
> > +    }
> > +
> > +    xsks_map_fd = bpf_map__fd(map);
> > +    xsk_fd = xsk_socket__fd(xsk->xsk);
> > +
> > +    ret = bpf_map_update_elem(xsks_map_fd, &queue_id, &xsk_fd, 0);
>
> Don’t think this is needed, as
> xsk_socket__create()->xsk_setup_xdp_prog()->xsk_lookup_bpf_maps() will
> take care of it.

I think this is needed, because when enable custom xdp prog
 XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD = true;
the xsk_setup_xdp_prog is skipped.
see:
    if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
        err = xsk_setup_xdp_prog(xsk);

>
> > +    if (ret) {
> > +        VLOG_WARN("Can't set XSK map, map fd: %d", xsks_map_fd);
> > +        return EINVAL;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static struct xsk_umem_info *
> >  xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
> >  {
> > @@ -234,7 +281,7 @@ xsk_configure_umem(void *buffer, uint64_t size,
> > int xdpmode)
> >
> >  static struct xsk_socket_info *
> >  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
> > -                     uint32_t queue_id, int xdpmode)
> > +                     uint32_t queue_id, int xdpmode, bool
> > use_default_xdp)
> >  {
> >      struct xsk_socket_config cfg;
> >      struct xsk_socket_info *xsk;
> > @@ -257,6 +304,10 @@ xsk_configure_socket(struct xsk_umem_info *umem,
> > uint32_t ifindex,
> >          cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST |
> > XDP_FLAGS_SKB_MODE;
> >      }
> >
> > +    if (!use_default_xdp) {
> > +        cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
> > +    }
> > +
> >      if (if_indextoname(ifindex, devname) == NULL) {
> >          VLOG_ERR("ifindex %d to devname failed (%s)",
> >                   ifindex, ovs_strerror(errno));
> > @@ -275,17 +326,19 @@ xsk_configure_socket(struct xsk_umem_info *umem,
> > uint32_t ifindex,
> >          return NULL;
> >      }
> >
> > -    /* Make sure the built-in AF_XDP program is loaded. */
> > -    ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
> > -    if (ret || !prog_id) {
> > -        if (ret) {
> > -            VLOG_ERR("Get XDP prog ID failed (%s)",
> > ovs_strerror(errno));
> > -        } else {
> > -            VLOG_ERR("No XDP program is loaded at ifindex %d",
> > ifindex);
> > +    if (use_default_xdp) {
> > +        /* Make sure the built-in AF_XDP program is loaded. */
> > +        ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
> > +        if (ret || !prog_id) {
> > +            if (ret) {
> > +                VLOG_ERR("Get XDP prog ID failed (%s)",
> > ovs_strerror(errno));
> > +            } else {
> > +                VLOG_ERR("No XDP program is loaded at ifindex %d",
> > ifindex);
> > +            }
> > +            xsk_socket__delete(xsk->xsk);
> > +            free(xsk);
> > +            return NULL;
> >          }
>
> I think a WARNING message should be logged when an external program is
> loaded so operators know when debugging something might be out of the
> ordinary. I would suggest to include the interface name and program
> path/name loaded.

OK I will add it.

>
> > -        xsk_socket__delete(xsk->xsk);
> > -        free(xsk);
> > -        return NULL;
> >      }
> >
> >      while (!xsk_ring_prod__reserve(&xsk->umem->fq,
> > @@ -311,7 +364,8 @@ xsk_configure_socket(struct xsk_umem_info *umem,
> > uint32_t ifindex,
> >  }
> >
> >  static struct xsk_socket_info *
> > -xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
> > +xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
> > +              bool use_default_xdp)
> >  {
> >      struct xsk_socket_info *xsk;
> >      struct xsk_umem_info *umem;
> > @@ -334,7 +388,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int
> > xdpmode)
> >
> >      VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
> >
> > -    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
> > +    xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
> > +                               use_default_xdp);
> >      if (!xsk) {
> >          /* Clean up umem and xpacket pool. */
> >          if (xsk_umem__delete(umem->umem)) {
> > @@ -354,6 +409,10 @@ xsk_configure_all(struct netdev *netdev)
> >      struct netdev_linux *dev = netdev_linux_cast(netdev);
> >      struct xsk_socket_info *xsk_info;
> >      int i, ifindex, n_rxq, n_txq;
> > +    bool use_default_xdp;
> > +    struct bpf_object *obj;
> > +    int prog_fd = 0;
> > +    int ret;
> >
> >      ifindex = linux_get_ifindex(netdev_get_name(netdev));
> >
> > @@ -362,12 +421,13 @@ xsk_configure_all(struct netdev *netdev)
> >
> >      n_rxq = netdev_n_rxq(netdev);
> >      dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
> > +    use_default_xdp = dev->xdpobj ? false : true;
> >
> >      /* Configure each queue. */
> >      for (i = 0; i < n_rxq; i++) {
> >          VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
> >                    dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
> > -        xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
> > +        xsk_info = xsk_configure(ifindex, i, dev->xdpmode,
> > use_default_xdp);
> >          if (!xsk_info) {
> >              VLOG_ERR("Failed to create AF_XDP socket on queue %d.",
> > i);
> >              dev->xsks[i] = NULL;
> > @@ -377,6 +437,27 @@ xsk_configure_all(struct netdev *netdev)
> >          atomic_init(&xsk_info->tx_dropped, 0);
> >          xsk_info->outstanding_tx = 0;
> >          xsk_info->available_rx = PROD_NUM_DESCS;
> > +
> > +        if (dev->xdpobj) {
> > +            if (prog_fd == 0) {
> > +                /* XDP program is per-netdev, so all queues share
> > +                   the same XDP program.
> > +                 */
> Format comment to be inline with other multiline comments

Yes, thank you.

>
> > +                ret = xsk_load_prog(dev->xdpobj, &obj, &prog_fd);
> > +                if (ret) {
> > +                    VLOG_ERR("%s: Can't load XDP object file",
> > +                             netdev_get_name(netdev));
>
> Maybe combine this failure with the one in xsk_load_prog() and report
> both the device and filename

OK, it is a little redundant.

>
> > +                    goto err;
> > +                }
> > +            }
> > +            ret = xsk_configure_map(obj, xsk_info, i);
> > +            if (ret) {
> > +                VLOG_ERR("%s: Can't configure XDP map",
> > +                         netdev_get_name(netdev));
> > +                goto err;
> > +            }
> > +            bpf_set_link_xdp_fd(ifindex, prog_fd, dev->xdpmode);
> > +        }
> >      }
> >
> >      n_txq = netdev_n_txq(netdev);
> > @@ -450,6 +531,8 @@ xsk_destroy_all(struct netdev *netdev)
> >          free(dev->tx_locks);
> >          dev->tx_locks = NULL;
> >      }
> > +    free(CONST_CAST(char *, dev->xdpobj));
> > +    dev->xdpobj = NULL;
> >  }
>
> Do we need to adjust xsk_remove_xdp_program() to free the loaded
> program/maps if it gets reconfigured with another one?

maybe, let me double check.
at xsk_load_prog, I can check whether there exists a load program and
remove it.

>
> >  int
> > @@ -458,7 +541,9 @@ netdev_afxdp_set_config(struct netdev *netdev,
> > const struct smap *args,
> >  {
> >      struct netdev_linux *dev = netdev_linux_cast(netdev);
> >      const char *str_xdpmode;
> > +    const char *str_xdpobj;
> >      int xdpmode, new_n_rxq;
> > +    struct stat s;
> >
> >      ovs_mutex_lock(&dev->mutex);
> >      new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
> > @@ -481,10 +566,25 @@ netdev_afxdp_set_config(struct netdev *netdev,
> > const struct smap *args,
> >          return EINVAL;
> >      }
> >
> > +    str_xdpobj = smap_get_def(args, "xdpobj", NULL);
> > +    if (str_xdpobj && stat(str_xdpobj, &s)) {
> > +        VLOG_ERR("Invalid xdpobj '%s': %s", str_xdpobj,
> > +                 ovs_strerror(errno));
> > +        ovs_mutex_unlock(&dev->mutex);
> Maybe move unlock above log?

OK!

>
> > +        return EINVAL;
> > +    } else if (str_xdpobj && !S_ISREG(s.st_mode)) {
> > +        VLOG_ERR("xdpobj '%s' is not a regular file", str_xdpobj);
> > +        ovs_mutex_unlock(&dev->mutex);
> Maybe move unlock above log?

OK!

>
> > +        return EINVAL;
> > +    }
> > +
> >      if (dev->requested_n_rxq != new_n_rxq
> > -        || dev->requested_xdpmode != xdpmode) {
> > +        || dev->requested_xdpmode != xdpmode
> > +        || !nullable_string_is_equal(dev->requested_xdpobj,
> > str_xdpobj)
> > +        || str_xdpobj) {
> >          dev->requested_n_rxq = new_n_rxq;
> >          dev->requested_xdpmode = xdpmode;
> > +        dev->requested_xdpobj = nullable_xstrdup(str_xdpobj);
> >          netdev_request_reconfigure(netdev);
> >      }
> >      ovs_mutex_unlock(&dev->mutex);
> > @@ -500,6 +600,7 @@ netdev_afxdp_get_config(const struct netdev
> > *netdev, struct smap *args)
> >      smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
> >      smap_add_format(args, "xdpmode", "%s",
> >          dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
> > +    smap_add_format(args, "xdpobj", "%s", dev->xdpobj);
> >      ovs_mutex_unlock(&dev->mutex);
> >      return 0;
> >  }
> > @@ -539,6 +640,8 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
> >           */
> >      }
> >
> > +    dev->xdpobj = dev->requested_xdpobj;
>
> Don’t we need to free the current dev->xdpobj before we override it?

I think it already been free at xsk_destroy_all().
Thanks, will work on next version.

Regards,
William


More information about the dev mailing list