[ovs-dev] [PATCH RFC] netdev-afxdp: Enable shared umem support.

William Tu u9012063 at gmail.com
Tue Nov 5 22:16:40 UTC 2019


The RFC patch enables shared umem support. It requires kernel change and
libbpf change, I will post it in another thread. I tested with multiple
afxdp ports using skb mode.  For example:
  ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1 type="afxdp" options:xdpmode=skb
  ovs-vsctl -- set interface afxdp-p1 options:n_rxq=1 type="afxdp" options:xdpmode=skb
Will share one umem instead of two.

Note that once shared umem is created with a specific mode (ex: XDP_COPY),
the netdev that shares this umem can not change its mode.  So I'm thinking about
using just one shared umem for all skb-mode netdevs, and for the rest drv-mode
netdevs, keep using dedicated umem. Or create one umem per mode? So the
drv-mode netdevs also share one umem.

Any comments are welcome.

Suggested-by: Eelco Chaudron <echaudro at redhat.com>
Signed-off-by: William Tu <u9012063 at gmail.com>
---
 lib/netdev-afxdp.c | 97 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 27 deletions(-)

diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 3037770b27cb..42767e1e27f3 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -95,6 +95,8 @@ static void xsk_destroy(struct xsk_socket_info *xsk);
 static int xsk_configure_all(struct netdev *netdev);
 static void xsk_destroy_all(struct netdev *netdev);
 
+static struct xsk_umem_info *shared_umem;
+
 struct unused_pool {
     struct xsk_umem_info *umem_info;
     int lost_in_rings; /* Number of packets left in tx, rx, cq and fq. */
@@ -112,6 +114,8 @@ struct xsk_umem_info {
     struct xsk_ring_cons cq;
     struct xsk_umem *umem;
     void *buffer;
+    int refcount;
+    struct ovs_mutex mutex;
 };
 
 struct xsk_socket_info {
@@ -228,6 +232,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
     uconfig.comp_size = CONS_NUM_DESCS;
     uconfig.frame_size = FRAME_SIZE;
     uconfig.frame_headroom = OVS_XDP_HEADROOM;
+    ovs_mutex_init(&umem->mutex);
 
     ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
                            &uconfig);
@@ -296,6 +301,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
     struct xsk_socket_info *xsk;
     char devname[IF_NAMESIZE];
     uint32_t idx = 0, prog_id;
+    bool shared;
     int ret;
     int i;
 
@@ -304,6 +310,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
     cfg.rx_size = CONS_NUM_DESCS;
     cfg.tx_size = PROD_NUM_DESCS;
     cfg.libbpf_flags = 0;
+    shared = umem->refcount > 1;
 
     if (xdpmode == XDP_ZEROCOPY) {
         cfg.bind_flags = XDP_ZEROCOPY;
@@ -319,6 +326,10 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
     }
 #endif
 
+    if (shared) {
+        cfg.bind_flags = XDP_SHARED_UMEM;
+    }
+
     if (if_indextoname(ifindex, devname) == NULL) {
         VLOG_ERR("ifindex %d to devname failed (%s)",
                  ifindex, ovs_strerror(errno));
@@ -352,6 +363,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
         return NULL;
     }
 
+    if (shared) {
+        return xsk;
+    }
+
+    /* Only the first umem needs to go below. */
     while (!xsk_ring_prod__reserve(&xsk->umem->fq,
                                    PROD_NUM_DESCS, &idx)) {
         VLOG_WARN_RL(&rl, "Retry xsk_ring_prod__reserve to FILL queue");
@@ -380,33 +396,43 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
 {
     struct xsk_socket_info *xsk;
     struct xsk_umem_info *umem;
-    void *bufs;
+    void *bufs = NULL;
 
     netdev_afxdp_sweep_unused_pools(NULL);
 
-    /* Umem memory region. */
-    bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
-    memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
+    if (!shared_umem) {
+        /* Umem memory region. */
+        bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
+        memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
+
+        /* Create AF_XDP socket. */
+        umem = xsk_configure_umem(bufs,
+                                  NUM_FRAMES * FRAME_SIZE,
+                                  xdpmode);
+        if (!umem) {
+            free_pagealign(bufs);
+            return NULL;
+        }
 
-    /* Create AF_XDP socket. */
-    umem = xsk_configure_umem(bufs,
-                              NUM_FRAMES * FRAME_SIZE,
-                              xdpmode);
-    if (!umem) {
-        free_pagealign(bufs);
-        return NULL;
+        shared_umem = umem;
+        umem->refcount++;
+        VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
+    } else {
+        umem = shared_umem;
+        umem->refcount++;
     }
 
-    VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
-
     xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
                                use_need_wakeup);
-    if (!xsk) {
+    if (!xsk && !umem->refcount--) {
         /* Clean up umem and xpacket pool. */
+        shared_umem = NULL;
         if (xsk_umem__delete(umem->umem)) {
             VLOG_ERR("xsk_umem__delete failed.");
         }
-        free_pagealign(bufs);
+        if (bufs) {
+            free_pagealign(bufs);
+        }
         umem_pool_cleanup(&umem->mpool);
         xpacket_pool_cleanup(&umem->xpool);
         free(umem);
@@ -472,21 +498,29 @@ xsk_destroy(struct xsk_socket_info *xsk_info)
     xsk_info->xsk = NULL;
 
     umem = xsk_info->umem->umem;
-    if (xsk_umem__delete(umem)) {
-        VLOG_ERR("xsk_umem__delete failed.");
-    }
+    xsk_info->umem->refcount--;
 
-    pool = xzalloc(sizeof *pool);
-    pool->umem_info = xsk_info->umem;
-    pool->lost_in_rings = xsk_info->outstanding_tx + xsk_info->available_rx;
+    if (!xsk_info->umem->refcount) {
+        VLOG_WARN("destroy umem");
+        shared_umem = NULL;
+        if (xsk_umem__delete(umem)) {
+            VLOG_ERR("xsk_umem__delete failed.");
+        }
+        ovs_mutex_destroy(&xsk_info->umem->mutex);
 
-    ovs_mutex_lock(&unused_pools_mutex);
-    ovs_list_push_back(&unused_pools, &pool->list_node);
-    ovs_mutex_unlock(&unused_pools_mutex);
+        pool = xzalloc(sizeof *pool);
+        pool->umem_info = xsk_info->umem;
+        pool->lost_in_rings = xsk_info->outstanding_tx +
+                              xsk_info->available_rx;
 
-    free(xsk_info);
+        ovs_mutex_lock(&unused_pools_mutex);
+        ovs_list_push_back(&unused_pools, &pool->list_node);
+        ovs_mutex_unlock(&unused_pools_mutex);
 
-    netdev_afxdp_sweep_unused_pools(NULL);
+        free(xsk_info);
+
+        netdev_afxdp_sweep_unused_pools(NULL);
+    }
 }
 
 static void
@@ -733,11 +767,14 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
     prepare_fill_queue(xsk_info);
 
     umem = xsk_info->umem;
+    ovs_mutex_lock(&umem->mutex);
+
     rx->fd = xsk_socket__fd(xsk_info->xsk);
 
     rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
     if (!rcvd) {
         xsk_rx_wakeup_if_needed(umem, netdev, rx->fd);
+        ovs_mutex_unlock(&umem->mutex);
         return EAGAIN;
     }
 
@@ -778,6 +815,7 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
         /* TODO: return the number of remaining packets in the queue. */
         *qfill = 0;
     }
+    ovs_mutex_unlock(&umem->mutex);
     return 0;
 }
 
@@ -924,7 +962,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
     struct netdev_linux *dev = netdev_linux_cast(netdev);
     struct xsk_socket_info *xsk_info;
     void *elems_pop[BATCH_SIZE];
-    struct xsk_umem_info *umem;
+    struct xsk_umem_info *umem = NULL;
     struct dp_packet *packet;
     bool free_batch = false;
     unsigned long orig;
@@ -942,6 +980,8 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
     free_batch = check_free_batch(batch);
 
     umem = xsk_info->umem;
+    ovs_mutex_lock(&umem->mutex);
+
     ret = umem_elem_pop_n(&umem->mpool, dp_packet_batch_size(batch),
                           elems_pop);
     if (OVS_UNLIKELY(ret)) {
@@ -993,6 +1033,9 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
     }
 
 out:
+    if (umem) {
+        ovs_mutex_unlock(&umem->mutex);
+    }
     if (free_batch) {
         free_afxdp_buf_batch(batch);
     } else {
-- 
2.7.4



More information about the dev mailing list