[ovs-dev] [PATCH RFC] netdev-afxdp: Enable shared umem support.
William Tu
u9012063 at gmail.com
Tue Nov 5 22:16:40 UTC 2019
The RFC patch enables shared umem support. It requires kernel change and
libbpf change, I will post it in another thread. I tested with multiple
afxdp ports using skb mode. For example:
ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1 type="afxdp" options:xdpmode=skb
ovs-vsctl -- set interface afxdp-p1 options:n_rxq=1 type="afxdp" options:xdpmode=skb
Will share one umem instead of two.
Note that once shared umem is created with a specific mode (ex: XDP_COPY),
the netdev that shares this umem can not change its mode. So I'm thinking about
using just one shared umem for all skb-mode netdevs, and for the rest drv-mode
netdevs, keep using dedicated umem. Or create one umem per mode? So the
drv-mode netdevs also share one umem.
Any comments are welcome.
Suggested-by: Eelco Chaudron <echaudro at redhat.com>
Signed-off-by: William Tu <u9012063 at gmail.com>
---
lib/netdev-afxdp.c | 97 +++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 70 insertions(+), 27 deletions(-)
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 3037770b27cb..42767e1e27f3 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -95,6 +95,8 @@ static void xsk_destroy(struct xsk_socket_info *xsk);
static int xsk_configure_all(struct netdev *netdev);
static void xsk_destroy_all(struct netdev *netdev);
+static struct xsk_umem_info *shared_umem;
+
struct unused_pool {
struct xsk_umem_info *umem_info;
int lost_in_rings; /* Number of packets left in tx, rx, cq and fq. */
@@ -112,6 +114,8 @@ struct xsk_umem_info {
struct xsk_ring_cons cq;
struct xsk_umem *umem;
void *buffer;
+ int refcount;
+ struct ovs_mutex mutex;
};
struct xsk_socket_info {
@@ -228,6 +232,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
uconfig.comp_size = CONS_NUM_DESCS;
uconfig.frame_size = FRAME_SIZE;
uconfig.frame_headroom = OVS_XDP_HEADROOM;
+ ovs_mutex_init(&umem->mutex);
ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
&uconfig);
@@ -296,6 +301,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
struct xsk_socket_info *xsk;
char devname[IF_NAMESIZE];
uint32_t idx = 0, prog_id;
+ bool shared;
int ret;
int i;
@@ -304,6 +310,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
cfg.rx_size = CONS_NUM_DESCS;
cfg.tx_size = PROD_NUM_DESCS;
cfg.libbpf_flags = 0;
+ shared = umem->refcount > 1;
if (xdpmode == XDP_ZEROCOPY) {
cfg.bind_flags = XDP_ZEROCOPY;
@@ -319,6 +326,10 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
}
#endif
+ if (shared) {
+ cfg.bind_flags = XDP_SHARED_UMEM;
+ }
+
if (if_indextoname(ifindex, devname) == NULL) {
VLOG_ERR("ifindex %d to devname failed (%s)",
ifindex, ovs_strerror(errno));
@@ -352,6 +363,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
return NULL;
}
+ if (shared) {
+ return xsk;
+ }
+
+ /* Only the first umem needs to go below. */
while (!xsk_ring_prod__reserve(&xsk->umem->fq,
PROD_NUM_DESCS, &idx)) {
VLOG_WARN_RL(&rl, "Retry xsk_ring_prod__reserve to FILL queue");
@@ -380,33 +396,43 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
{
struct xsk_socket_info *xsk;
struct xsk_umem_info *umem;
- void *bufs;
+ void *bufs = NULL;
netdev_afxdp_sweep_unused_pools(NULL);
- /* Umem memory region. */
- bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
- memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
+ if (!shared_umem) {
+ /* Umem memory region. */
+ bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
+ memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
+
+ /* Create AF_XDP socket. */
+ umem = xsk_configure_umem(bufs,
+ NUM_FRAMES * FRAME_SIZE,
+ xdpmode);
+ if (!umem) {
+ free_pagealign(bufs);
+ return NULL;
+ }
- /* Create AF_XDP socket. */
- umem = xsk_configure_umem(bufs,
- NUM_FRAMES * FRAME_SIZE,
- xdpmode);
- if (!umem) {
- free_pagealign(bufs);
- return NULL;
+ shared_umem = umem;
+ umem->refcount++;
+ VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
+ } else {
+ umem = shared_umem;
+ umem->refcount++;
}
- VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
-
xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
use_need_wakeup);
- if (!xsk) {
+ if (!xsk && !umem->refcount--) {
/* Clean up umem and xpacket pool. */
+ shared_umem = NULL;
if (xsk_umem__delete(umem->umem)) {
VLOG_ERR("xsk_umem__delete failed.");
}
- free_pagealign(bufs);
+ if (bufs) {
+ free_pagealign(bufs);
+ }
umem_pool_cleanup(&umem->mpool);
xpacket_pool_cleanup(&umem->xpool);
free(umem);
@@ -472,21 +498,29 @@ xsk_destroy(struct xsk_socket_info *xsk_info)
xsk_info->xsk = NULL;
umem = xsk_info->umem->umem;
- if (xsk_umem__delete(umem)) {
- VLOG_ERR("xsk_umem__delete failed.");
- }
+ xsk_info->umem->refcount--;
- pool = xzalloc(sizeof *pool);
- pool->umem_info = xsk_info->umem;
- pool->lost_in_rings = xsk_info->outstanding_tx + xsk_info->available_rx;
+ if (!xsk_info->umem->refcount) {
+ VLOG_WARN("destroy umem");
+ shared_umem = NULL;
+ if (xsk_umem__delete(umem)) {
+ VLOG_ERR("xsk_umem__delete failed.");
+ }
+ ovs_mutex_destroy(&xsk_info->umem->mutex);
- ovs_mutex_lock(&unused_pools_mutex);
- ovs_list_push_back(&unused_pools, &pool->list_node);
- ovs_mutex_unlock(&unused_pools_mutex);
+ pool = xzalloc(sizeof *pool);
+ pool->umem_info = xsk_info->umem;
+ pool->lost_in_rings = xsk_info->outstanding_tx +
+ xsk_info->available_rx;
- free(xsk_info);
+ ovs_mutex_lock(&unused_pools_mutex);
+ ovs_list_push_back(&unused_pools, &pool->list_node);
+ ovs_mutex_unlock(&unused_pools_mutex);
- netdev_afxdp_sweep_unused_pools(NULL);
+ free(xsk_info);
+
+ netdev_afxdp_sweep_unused_pools(NULL);
+ }
}
static void
@@ -733,11 +767,14 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
prepare_fill_queue(xsk_info);
umem = xsk_info->umem;
+ ovs_mutex_lock(&umem->mutex);
+
rx->fd = xsk_socket__fd(xsk_info->xsk);
rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
if (!rcvd) {
xsk_rx_wakeup_if_needed(umem, netdev, rx->fd);
+ ovs_mutex_unlock(&umem->mutex);
return EAGAIN;
}
@@ -778,6 +815,7 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
/* TODO: return the number of remaining packets in the queue. */
*qfill = 0;
}
+ ovs_mutex_unlock(&umem->mutex);
return 0;
}
@@ -924,7 +962,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
struct netdev_linux *dev = netdev_linux_cast(netdev);
struct xsk_socket_info *xsk_info;
void *elems_pop[BATCH_SIZE];
- struct xsk_umem_info *umem;
+ struct xsk_umem_info *umem = NULL;
struct dp_packet *packet;
bool free_batch = false;
unsigned long orig;
@@ -942,6 +980,8 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
free_batch = check_free_batch(batch);
umem = xsk_info->umem;
+ ovs_mutex_lock(&umem->mutex);
+
ret = umem_elem_pop_n(&umem->mpool, dp_packet_batch_size(batch),
elems_pop);
if (OVS_UNLIKELY(ret)) {
@@ -993,6 +1033,9 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
}
out:
+ if (umem) {
+ ovs_mutex_unlock(&umem->mutex);
+ }
if (free_batch) {
free_afxdp_buf_batch(batch);
} else {
--
2.7.4
More information about the dev
mailing list