[ovs-dev] [RFC v7 01/13] netdev-dpdk: fix mbuf sizing
Loftus, Ciara
ciara.loftus at intel.com
Mon May 28 15:41:29 UTC 2018
>
> From: Mark Kavanagh <mark.b.kavanagh at intel.com>
>
> There are numerous factors that must be considered when calculating
> the size of an mbuf:
> - the data portion of the mbuf must be sized in accordance With Rx
> buffer alignment (typically 1024B). So, for example, in order to
> successfully receive and capture a 1500B packet, mbufs with a
> data portion of size 2048B must be used.
> - in OvS, the elements that comprise an mbuf are:
> * the dp packet, which includes a struct rte mbuf (704B)
> * RTE_PKTMBUF_HEADROOM (128B)
> * packet data (aligned to 1k, as previously described)
> * RTE_PKTMBUF_TAILROOM (typically 0)
>
> Some PMDs require that the total mbuf size (i.e. the total sum of all
> of the above-listed components' lengths) is cache-aligned. To satisfy
> this requirement, it may be necessary to round up the total mbuf size
> with respect to cacheline size. In doing so, it's possible that the
> dp_packet's data portion is inadvertently increased in size, such that
> it no longer adheres to Rx buffer alignment. Consequently, the
> following property of the mbuf no longer holds true:
>
> mbuf.data_len == mbuf.buf_len - mbuf.data_off
>
> This creates a problem in the case of multi-segment mbufs, where that
> assumption is assumed to be true for all but the final segment in an
> mbuf chain. Resolve this issue by adjusting the size of the mbuf's
> private data portion, as opposed to the packet data portion when
> aligning mbuf size to cachelines.
>
> Fixes: 4be4d22 ("netdev-dpdk: clean up mbuf initialization")
> Fixes: 31b88c9 ("netdev-dpdk: round up mbuf_size to cache_line_size")
> CC: Santosh Shukla <santosh.shukla at caviumnetworks.com>
> Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
> ---
> lib/netdev-dpdk.c | 46 ++++++++++++++++++++++++++++++----------------
> 1 file changed, 30 insertions(+), 16 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 87152a7..356a967 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -82,12 +82,6 @@ static struct vlog_rate_limit rl =
> VLOG_RATE_LIMIT_INIT(5, 20);
> + (2 * VLAN_HEADER_LEN))
> #define MTU_TO_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_LEN +
> ETHER_CRC_LEN)
> #define MTU_TO_MAX_FRAME_LEN(mtu) ((mtu) +
> ETHER_HDR_MAX_LEN)
> -#define FRAME_LEN_TO_MTU(frame_len) ((frame_len) \
> - - ETHER_HDR_LEN - ETHER_CRC_LEN)
> -#define MBUF_SIZE(mtu)
> ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) \
> - + sizeof(struct dp_packet) \
> - + RTE_PKTMBUF_HEADROOM), \
> - RTE_CACHE_LINE_SIZE)
> #define NETDEV_DPDK_MBUF_ALIGN 1024
> #define NETDEV_DPDK_MAX_PKT_LEN 9728
>
> @@ -491,7 +485,7 @@ is_dpdk_class(const struct netdev_class *class)
> * behaviour, which reduces performance. To prevent this, use a buffer size
> * that is closest to 'mtu', but which satisfies the aforementioned criteria.
> */
> -static uint32_t
> +static uint16_t
> dpdk_buf_size(int mtu)
> {
> return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) +
> RTE_PKTMBUF_HEADROOM),
> @@ -582,7 +576,7 @@ dpdk_mp_do_not_free(struct rte_mempool *mp)
> OVS_REQUIRES(dpdk_mp_mutex)
> * - a new mempool was just created;
> * - a matching mempool already exists. */
> static struct rte_mempool *
> -dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
> +dpdk_mp_create(struct netdev_dpdk *dev, uint16_t mbuf_pkt_data_len)
> {
> char mp_name[RTE_MEMPOOL_NAMESIZE];
> const char *netdev_name = netdev_get_name(&dev->up);
> @@ -590,6 +584,7 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
> uint32_t n_mbufs;
> uint32_t hash = hash_string(netdev_name, 0);
> struct rte_mempool *mp = NULL;
> + uint16_t mbuf_size, aligned_mbuf_size, mbuf_priv_data_len;
>
> /*
> * XXX: rough estimation of number of mbufs required for this port:
> @@ -609,12 +604,13 @@ dpdk_mp_create(struct netdev_dpdk *dev, int
> mtu)
> * longer than RTE_MEMPOOL_NAMESIZE. */
> int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
> "ovs%08x%02d%05d%07u",
> - hash, socket_id, mtu, n_mbufs);
> + hash, socket_id, mbuf_pkt_data_len, n_mbufs);
%u for printing the unsigned mbuf_pkt_data_len
> if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
> VLOG_DBG("snprintf returned %d. "
> "Failed to generate a mempool name for \"%s\". "
> - "Hash:0x%x, socket_id: %d, mtu:%d, mbufs:%u.",
> - ret, netdev_name, hash, socket_id, mtu, n_mbufs);
> + "Hash:0x%x, socket_id: %d, pkt data room:%d, mbufs:%u.",
> + ret, netdev_name, hash, socket_id, mbuf_pkt_data_len,
> + n_mbufs);
Same here
> break;
> }
>
> @@ -623,13 +619,31 @@ dpdk_mp_create(struct netdev_dpdk *dev, int
> mtu)
> netdev_name, n_mbufs, socket_id,
> dev->requested_n_rxq, dev->requested_n_txq);
>
> + mbuf_priv_data_len = sizeof(struct dp_packet) -
> + sizeof(struct rte_mbuf);
> + /* The size of the entire mbuf. */
> + mbuf_size = sizeof (struct dp_packet) +
> + mbuf_pkt_data_len + RTE_PKTMBUF_HEADROOM;
> + /* mbuf size, rounded up to cacheline size. */
> + aligned_mbuf_size = ROUND_UP(mbuf_size, RTE_CACHE_LINE_SIZE);
> + /* If there is a size discrepancy, add padding to mbuf_priv_data_len.
> + * This maintains mbuf size cache alignment, while also honoring RX
> + * buffer alignment in the data portion of the mbuf. If this adjustment
> + * is not made, there is a possiblity later on that for an element of
> + * the mempool, buf, buf->data_len < (buf->buf_len - buf->data_off).
> + * This is problematic in the case of multi-segment mbufs, particularly
> + * when an mbuf segment needs to be resized (when [push|popp]ing a
> VLAN
> + * header, for example.
> + */
> + mbuf_priv_data_len += (aligned_mbuf_size - mbuf_size);
> +
> mp = rte_pktmbuf_pool_create(mp_name, n_mbufs, MP_CACHE_SZ,
> - sizeof (struct dp_packet) - sizeof (struct rte_mbuf),
> - MBUF_SIZE(mtu) - sizeof(struct dp_packet), socket_id);
> + mbuf_priv_data_len,
> + mbuf_pkt_data_len + RTE_PKTMBUF_HEADROOM, socket_id);
>
> if (mp) {
> VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
> - mp_name, n_mbufs);
> + mp_name, n_mbufs);
> /* rte_pktmbuf_pool_create has done some initialization of the
> * rte_mbuf part of each dp_packet. Some OvS specific fields
> * of the packet still need to be initialized by
> @@ -690,13 +704,13 @@ static int
> netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
> OVS_REQUIRES(dev->mutex)
> {
> - uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
> + uint16_t buf_size = dpdk_buf_size(dev->requested_mtu);
> struct rte_mempool *mp;
> int ret = 0;
>
> dpdk_mp_sweep();
>
> - mp = dpdk_mp_create(dev, FRAME_LEN_TO_MTU(buf_size));
> + mp = dpdk_mp_create(dev, buf_size);
> if (!mp) {
> VLOG_ERR("Failed to create memory pool for netdev "
> "%s, with MTU %d on socket %d: %s\n",
> --
> 2.7.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
More information about the dev
mailing list