[ovs-dev] [PATCH RFC] netdev-afxdp: Enable shared umem support.

Eelco Chaudron echaudro at redhat.com
Wed Nov 6 07:49:53 UTC 2019



On 5 Nov 2019, at 23:16, William Tu wrote:

> The RFC patch enables shared umem support. It requires kernel change 
> and
> libbpf change, I will post it in another thread. I tested with 
> multiple
> afxdp ports using skb mode.  For example:
>   ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1 type="afxdp" 
> options:xdpmode=skb
>   ovs-vsctl -- set interface afxdp-p1 options:n_rxq=1 type="afxdp" 
> options:xdpmode=skb
> Will share one umem instead of two.
>
> Note that once shared umem is created with a specific mode (ex: 
> XDP_COPY),
> the netdev that shares this umem can not change its mode.  So I'm 
> thinking about
> using just one shared umem for all skb-mode netdevs, and for the rest 
> drv-mode
> netdevs, keep using dedicated umem. Or create one umem per mode? So 
> the
> drv-mode netdevs also share one umem.


Hi William,

Did not go trough the entire patch, but wasn’t Magnus hinting on not 
using the shared umem ring’s but just the buffers they point too?

This way you do not need any locking when doing the ring operations, 
only when getting buffers. Guess this is more like the mbuf 
implementation in DPDK.

//Eelco

> Any comments are welcome.
>
> Suggested-by: Eelco Chaudron <echaudro at redhat.com>
> Signed-off-by: William Tu <u9012063 at gmail.com>
> ---
>  lib/netdev-afxdp.c | 97 
> +++++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 70 insertions(+), 27 deletions(-)
>
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 3037770b27cb..42767e1e27f3 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -95,6 +95,8 @@ static void xsk_destroy(struct xsk_socket_info 
> *xsk);
>  static int xsk_configure_all(struct netdev *netdev);
>  static void xsk_destroy_all(struct netdev *netdev);
>
> +static struct xsk_umem_info *shared_umem;
> +
>  struct unused_pool {
>      struct xsk_umem_info *umem_info;
>      int lost_in_rings; /* Number of packets left in tx, rx, cq and 
> fq. */
> @@ -112,6 +114,8 @@ struct xsk_umem_info {
>      struct xsk_ring_cons cq;
>      struct xsk_umem *umem;
>      void *buffer;
> +    int refcount;
> +    struct ovs_mutex mutex;
>  };
>
>  struct xsk_socket_info {
> @@ -228,6 +232,7 @@ xsk_configure_umem(void *buffer, uint64_t size, 
> int xdpmode)
>      uconfig.comp_size = CONS_NUM_DESCS;
>      uconfig.frame_size = FRAME_SIZE;
>      uconfig.frame_headroom = OVS_XDP_HEADROOM;
> +    ovs_mutex_init(&umem->mutex);
>
>      ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, 
> &umem->cq,
>                             &uconfig);
> @@ -296,6 +301,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>      struct xsk_socket_info *xsk;
>      char devname[IF_NAMESIZE];
>      uint32_t idx = 0, prog_id;
> +    bool shared;
>      int ret;
>      int i;
>
> @@ -304,6 +310,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>      cfg.rx_size = CONS_NUM_DESCS;
>      cfg.tx_size = PROD_NUM_DESCS;
>      cfg.libbpf_flags = 0;
> +    shared = umem->refcount > 1;
>
>      if (xdpmode == XDP_ZEROCOPY) {
>          cfg.bind_flags = XDP_ZEROCOPY;
> @@ -319,6 +326,10 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>      }
>  #endif
>
> +    if (shared) {
> +        cfg.bind_flags = XDP_SHARED_UMEM;
> +    }
> +
>      if (if_indextoname(ifindex, devname) == NULL) {
>          VLOG_ERR("ifindex %d to devname failed (%s)",
>                   ifindex, ovs_strerror(errno));
> @@ -352,6 +363,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>          return NULL;
>      }
>
> +    if (shared) {
> +        return xsk;
> +    }
> +
> +    /* Only the first umem needs to go below. */
>      while (!xsk_ring_prod__reserve(&xsk->umem->fq,
>                                     PROD_NUM_DESCS, &idx)) {
>          VLOG_WARN_RL(&rl, "Retry xsk_ring_prod__reserve to FILL 
> queue");
> @@ -380,33 +396,43 @@ xsk_configure(int ifindex, int xdp_queue_id, int 
> xdpmode,
>  {
>      struct xsk_socket_info *xsk;
>      struct xsk_umem_info *umem;
> -    void *bufs;
> +    void *bufs = NULL;
>
>      netdev_afxdp_sweep_unused_pools(NULL);
>
> -    /* Umem memory region. */
> -    bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
> -    memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
> +    if (!shared_umem) {
> +        /* Umem memory region. */
> +        bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
> +        memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
> +
> +        /* Create AF_XDP socket. */
> +        umem = xsk_configure_umem(bufs,
> +                                  NUM_FRAMES * FRAME_SIZE,
> +                                  xdpmode);
> +        if (!umem) {
> +            free_pagealign(bufs);
> +            return NULL;
> +        }
>
> -    /* Create AF_XDP socket. */
> -    umem = xsk_configure_umem(bufs,
> -                              NUM_FRAMES * FRAME_SIZE,
> -                              xdpmode);
> -    if (!umem) {
> -        free_pagealign(bufs);
> -        return NULL;
> +        shared_umem = umem;
> +        umem->refcount++;
> +        VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) 
> umem);
> +    } else {
> +        umem = shared_umem;
> +        umem->refcount++;
>      }
>
> -    VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
> -
>      xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
>                                 use_need_wakeup);
> -    if (!xsk) {
> +    if (!xsk && !umem->refcount--) {
>          /* Clean up umem and xpacket pool. */
> +        shared_umem = NULL;
>          if (xsk_umem__delete(umem->umem)) {
>              VLOG_ERR("xsk_umem__delete failed.");
>          }
> -        free_pagealign(bufs);
> +        if (bufs) {
> +            free_pagealign(bufs);
> +        }
>          umem_pool_cleanup(&umem->mpool);
>          xpacket_pool_cleanup(&umem->xpool);
>          free(umem);
> @@ -472,21 +498,29 @@ xsk_destroy(struct xsk_socket_info *xsk_info)
>      xsk_info->xsk = NULL;
>
>      umem = xsk_info->umem->umem;
> -    if (xsk_umem__delete(umem)) {
> -        VLOG_ERR("xsk_umem__delete failed.");
> -    }
> +    xsk_info->umem->refcount--;
>
> -    pool = xzalloc(sizeof *pool);
> -    pool->umem_info = xsk_info->umem;
> -    pool->lost_in_rings = xsk_info->outstanding_tx + 
> xsk_info->available_rx;
> +    if (!xsk_info->umem->refcount) {
> +        VLOG_WARN("destroy umem");
> +        shared_umem = NULL;
> +        if (xsk_umem__delete(umem)) {
> +            VLOG_ERR("xsk_umem__delete failed.");
> +        }
> +        ovs_mutex_destroy(&xsk_info->umem->mutex);
>
> -    ovs_mutex_lock(&unused_pools_mutex);
> -    ovs_list_push_back(&unused_pools, &pool->list_node);
> -    ovs_mutex_unlock(&unused_pools_mutex);
> +        pool = xzalloc(sizeof *pool);
> +        pool->umem_info = xsk_info->umem;
> +        pool->lost_in_rings = xsk_info->outstanding_tx +
> +                              xsk_info->available_rx;
>
> -    free(xsk_info);
> +        ovs_mutex_lock(&unused_pools_mutex);
> +        ovs_list_push_back(&unused_pools, &pool->list_node);
> +        ovs_mutex_unlock(&unused_pools_mutex);
>
> -    netdev_afxdp_sweep_unused_pools(NULL);
> +        free(xsk_info);
> +
> +        netdev_afxdp_sweep_unused_pools(NULL);
> +    }
>  }
>
>  static void
> @@ -733,11 +767,14 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, 
> struct dp_packet_batch *batch,
>      prepare_fill_queue(xsk_info);
>
>      umem = xsk_info->umem;
> +    ovs_mutex_lock(&umem->mutex);
> +
>      rx->fd = xsk_socket__fd(xsk_info->xsk);
>
>      rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
>      if (!rcvd) {
>          xsk_rx_wakeup_if_needed(umem, netdev, rx->fd);
> +        ovs_mutex_unlock(&umem->mutex);
>          return EAGAIN;
>      }
>
> @@ -778,6 +815,7 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, 
> struct dp_packet_batch *batch,
>          /* TODO: return the number of remaining packets in the queue. 
> */
>          *qfill = 0;
>      }
> +    ovs_mutex_unlock(&umem->mutex);
>      return 0;
>  }
>
> @@ -924,7 +962,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, 
> int qid,
>      struct netdev_linux *dev = netdev_linux_cast(netdev);
>      struct xsk_socket_info *xsk_info;
>      void *elems_pop[BATCH_SIZE];
> -    struct xsk_umem_info *umem;
> +    struct xsk_umem_info *umem = NULL;
>      struct dp_packet *packet;
>      bool free_batch = false;
>      unsigned long orig;
> @@ -942,6 +980,8 @@ __netdev_afxdp_batch_send(struct netdev *netdev, 
> int qid,
>      free_batch = check_free_batch(batch);
>
>      umem = xsk_info->umem;
> +    ovs_mutex_lock(&umem->mutex);
> +
>      ret = umem_elem_pop_n(&umem->mpool, dp_packet_batch_size(batch),
>                            elems_pop);
>      if (OVS_UNLIKELY(ret)) {
> @@ -993,6 +1033,9 @@ __netdev_afxdp_batch_send(struct netdev *netdev, 
> int qid,
>      }
>
>  out:
> +    if (umem) {
> +        ovs_mutex_unlock(&umem->mutex);
> +    }
>      if (free_batch) {
>          free_afxdp_buf_batch(batch);
>      } else {
> -- 
> 2.7.4



More information about the dev mailing list