[ovs-dev] [PATCH 5/5] netdev-dpdk: Remove alloc from packet recv.
Jarno Rajahalme
jrajahalme at nicira.com
Wed Apr 2 20:00:38 UTC 2014
On Mar 31, 2014, at 9:43 PM, Pravin <pshelar at nicira.com> wrote:
> From: Pravin Shelar <pshelar at nicira.com>
>
> On DPDK packet recv, ovs is given pointer to mbuf which has
> information about a packet, for example pointer to data and size.
> By moving mbuf to ofpbuf we can let dpdk allocate ofpbuf and
> pass that to ovs for processing the packet.
>
> Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
> ---
> lib/netdev-dpdk.c | 99 +++++++++++++++++++++++++++--------------------------
> lib/ofpbuf.c | 4 +--
> lib/ofpbuf.h | 6 +++-
> 3 files changed, 57 insertions(+), 52 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 5db4b49..facf220 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -209,16 +209,53 @@ dpdk_rte_mzalloc(size_t sz)
> void
> free_dpdk_buf(struct ofpbuf *b)
> {
> - struct rte_mbuf *pkt;
> -
> - pkt = b->private_p;
> - if (!pkt) {
> - return;
> - }
> + struct rte_mbuf *pkt = (struct rte_mbuf *) b;
>
> rte_mempool_put(pkt->pool, pkt);
> }
>
> +static void
> +__rte_pktmbuf_init(struct rte_mempool *mp,
> + __attribute__((unused)) void *opaque_arg,
> + void *_m,
> + __attribute__((unused)) unsigned i)
Can use OVS_UNUSED here.
> +{
> + struct rte_mbuf *m = _m;
> + uint32_t buf_len = mp->elt_size - sizeof(struct ofpbuf);
> +
> + RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct ofpbuf));
> +
> + memset(m, 0, mp->elt_size);
> +
> + /* start of buffer is just after mbuf structure */
> + m->buf_addr = (char *)m + sizeof(struct ofpbuf);
> + m->buf_physaddr = rte_mempool_virt2phy(mp, m) +
> + sizeof(struct ofpbuf);
> + m->buf_len = (uint16_t)buf_len;
> +
> + /* keep some headroom between start of buffer and data */
> + m->pkt.data = (char*) m->buf_addr + RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
> +
> + /* init some constant fields */
Are these fields constant though the lifetime of ‘m’?
> + m->type = RTE_MBUF_PKT;
> + m->pool = mp;
> + m->pkt.nb_segs = 1;
> + m->pkt.in_port = 0xff;
> +}
> +
> +static void
> +ovs_rte_pktmbuf_init(struct rte_mempool *mp,
> + __attribute__((unused)) void *opaque_arg,
> + void *_m,
> + __attribute__((unused)) unsigned i)
> +{
> + struct rte_mbuf *m = _m;
> +
> + __rte_pktmbuf_init(mp, opaque_arg, _m, i);
> +
> + ofpbuf_init_dpdk((struct ofpbuf *) m, m->buf_len);
> +}
> +
> static struct dpdk_mp *
> dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
> {
> @@ -242,7 +279,7 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
> MP_CACHE_SZ,
> sizeof(struct rte_pktmbuf_pool_private),
> rte_pktmbuf_pool_init, NULL,
> - rte_pktmbuf_init, NULL,
> + ovs_rte_pktmbuf_init, NULL,
> socket_id, 0);
>
> if (dmp->mp == NULL) {
> @@ -550,47 +587,22 @@ dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
> rte_spinlock_unlock(&txq->tx_lock);
> }
>
> -inline static struct ofpbuf *
> -build_ofpbuf(struct rte_mbuf *pkt)
> -{
> - struct ofpbuf *b;
> -
> - b = ofpbuf_new(0);
> - b->private_p = pkt;
> -
> - ofpbuf_set_data(b, pkt->pkt.data);
> - ofpbuf_set_base(b, (char *)ofpbuf_get_data(b) - DP_NETDEV_HEADROOM - VLAN_ETH_HEADER_LEN);
> - b->allocated = pkt->buf_len;
> - ofpbuf_set_size(b, rte_pktmbuf_data_len(pkt));
> - b->source = OFPBUF_DPDK;
> -
> - dp_packet_pad(b);
> -
> - return b;
> -}
> -
> static int
> netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct ofpbuf **packet, int *c)
> {
> struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_);
> struct netdev *netdev = rx->up.netdev;
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> - struct rte_mbuf *burst_pkts[MAX_RX_QUEUE_LEN];
> int nb_rx;
> - int i;
>
> dpdk_queue_flush(dev, rxq_->queue_id);
>
> nb_rx = rte_eth_rx_burst(rx->port_id, rxq_->queue_id,
> - burst_pkts, MAX_RX_QUEUE_LEN);
> + (struct rte_mbuf **) packet, MAX_RX_QUEUE_LEN);
The parameter ‘packet’ should be called ‘packets’ instead.
> if (!nb_rx) {
> return EAGAIN;
> }
>
> - for (i = 0; i < nb_rx; i++) {
> - packet[i] = build_ofpbuf(burst_pkts[i]);
> - }
> -
> *c = nb_rx;
>
> return 0;
> @@ -677,32 +689,23 @@ netdev_dpdk_send(struct netdev *netdev,
> goto out;
> }
>
> - rte_prefetch0(&ofpbuf->private_p);
> - if (!may_steal ||
> - !ofpbuf->private_p || ofpbuf->source != OFPBUF_DPDK) {
> + if (!may_steal || ofpbuf->source != OFPBUF_DPDK) {
> dpdk_do_tx_copy(netdev, (char *) ofpbuf_get_data(ofpbuf), ofpbuf_get_size(ofpbuf));
> +
> + if (may_steal) {
> + ofpbuf_delete(ofpbuf);
> + }
> } else {
> - struct rte_mbuf *pkt;
> int qid;
>
> - pkt = ofpbuf->private_p;
> - ofpbuf->private_p = NULL;
> - rte_pktmbuf_data_len(pkt) = ofpbuf_get_size(ofpbuf);
> - rte_pktmbuf_pkt_len(pkt) = ofpbuf_get_size(ofpbuf);
> -
> qid = rte_lcore_id() % NR_QUEUE;
>
> - dpdk_queue_pkt(dev, qid, pkt);
> + dpdk_queue_pkt(dev, qid, (struct rte_mbuf *)ofpbuf);
>
> - ofpbuf->private_p = NULL;
> }
> ret = 0;
>
> out:
> - if (may_steal) {
> - ofpbuf_delete(ofpbuf);
> - }
> -
> return ret;
> }
>
> diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
> index f616b1c..749f7ff 100644
> --- a/lib/ofpbuf.c
> +++ b/lib/ofpbuf.c
> @@ -134,9 +134,7 @@ ofpbuf_uninit(struct ofpbuf *b)
> if (b->source == OFPBUF_MALLOC) {
> free(ofpbuf_get_base(b));
> }
> - if (b->source == OFPBUF_DPDK) {
> - free_dpdk_buf(b);
> - }
> + ovs_assert(b->source != OFPBUF_DPDK);
> }
> }
>
> diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
> index 94651e4..ec3429a 100644
> --- a/lib/ofpbuf.h
> +++ b/lib/ofpbuf.h
> @@ -41,7 +41,6 @@ enum OVS_PACKED_ENUM ofpbuf_source {
> struct ofpbuf {
> #ifdef DPDK_NETDEV
> struct rte_mbuf mbuf; /* DPDK mbuf */
> - void *private_p; /* private pointer for use by dpdk */
> #else
> void *base; /* First byte of allocated space. */
> void *data; /* First byte actually in use. */
> @@ -155,6 +154,11 @@ static inline void *ofpbuf_get_uninit_pointer(struct ofpbuf *b)
> static inline void ofpbuf_delete(struct ofpbuf *b)
> {
> if (b) {
> + if (b->source == OFPBUF_DPDK) {
> + free_dpdk_buf(b);
> + return;
> + }
> +
There is a comment in ofpbuf_get_uninit_pointer() that you may want to address in context of this patch.
Otherwise:
Acked-by: Jarno Rajahalme <jrajahalme at nicira.com>
More information about the dev
mailing list