[ovs-dev] [PATCH] netdev-dpdk: Create separate memory pool for each port

Stokes, Ian ian.stokes at intel.com
Sun Jan 15 18:14:47 UTC 2017


> Since it's possible to delete memory pool in DPDK we can try to estimate
> better required memory size when port is reconfigured, e.g. with different
> number of rx queues.

Thanks for the patch.

It will need to be rebased for it to apply cleanly to master.
I've applied the changes manually just to test and from what I can see there are no issues.
It also resolves issue reported with having 64 queues for dpdk physical ports which causes ovs to crash.

A few other comments below.
> 
> Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz at intel.com>
> ---
>  lib/netdev-dpdk.c | 85 +++++++++++++++++++++++++++-----------------------
> -----
>  1 file changed, 41 insertions(+), 44 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 7c1523e..4c9e1e8
> 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -282,6 +282,8 @@ struct dpdk_mp {
>      struct rte_mempool *mp;
>      int mtu;
>      int socket_id;
> +    char if_name[IFNAMSIZ];
> +    unsigned mp_size;
>      int refcount;
>      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);  }; @@ -
> 451,75 +453,66 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,  }
> 
>  static struct dpdk_mp *
> -dpdk_mp_create(int socket_id, int mtu)
> +dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
>  {
>      struct rte_pktmbuf_pool_private mbp_priv;
>      struct dpdk_mp *dmp;
> -    unsigned mp_size;
>      char *mp_name;
> 
>      dmp = dpdk_rte_mzalloc(sizeof *dmp);
>      if (!dmp) {
>          return NULL;
>      }
> -    dmp->socket_id = socket_id;
> +    dmp->socket_id = dev->requested_socket_id;
>      dmp->mtu = mtu;
> +    strncpy(dmp->if_name, dev->up.name, IFNAMSIZ);
>      dmp->refcount = 1;
>      mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct
> dp_packet);
>      mbp_priv.mbuf_priv_size = sizeof(struct dp_packet)
> -                              - sizeof(struct rte_mbuf);
> -    /* XXX: this is a really rough method of provisioning memory.
> -     * It's impossible to determine what the exact memory requirements
> are
> -     * when the number of ports and rxqs that utilize a particular
> mempool can
> -     * change dynamically at runtime. For now, use this rough heurisitic.
> +        - sizeof(struct rte_mbuf);
> +    /*
> +     * XXX: rough estimation of memory required for port:
> +     * <packets required to fill the device rxqs>
> +     * + <packets that could be stuck on other ports txqs>
> +     * + <packets in the pmd threads>
>       */
> -    if (mtu >= ETHER_MTU) {
> -        mp_size = MAX_NB_MBUF;
> -    } else {
> -        mp_size = MIN_NB_MBUF;
> -    }
> +
> +    dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
> +        + dev->requested_n_txq * dev->requested_txq_size
> +        + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST;
> 
>      do {
> -        mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
> -                            mp_size);
> +        mp_name = xasprintf("%s_%d_%d_%u", dmp->if_name, dmp->mtu,
> +                            dmp->socket_id, dmp->mp_size);
> 
> -        dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
> +        dmp->mp = rte_mempool_create(mp_name, dmp->mp_size,
> + MBUF_SIZE(mtu),
>                                       MP_CACHE_SZ,
>                                       sizeof(struct
> rte_pktmbuf_pool_private),
>                                       rte_pktmbuf_pool_init, &mbp_priv,
>                                       ovs_rte_pktmbuf_init, NULL,
> -                                     socket_id, 0);
> +                                     dmp->socket_id, 0);
>          if (dmp->mp) {
>              VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
> -                     mp_name, mp_size);
> +                     mp_name, dmp->mp_size);
>          }
>          free(mp_name);
>          if (dmp->mp) {
>              return dmp;
>          }
> -    } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
> +    } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >=
> + MIN_NB_MBUF);
> 
>      rte_free(dmp);
>      return NULL;
>  }
> 
>  static struct dpdk_mp *
> -dpdk_mp_get(int socket_id, int mtu)
> +dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
>  {
>      struct dpdk_mp *dmp;
> 
>      ovs_mutex_lock(&dpdk_mp_mutex);
> -    LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
> -        if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
> -            dmp->refcount++;
> -            goto out;
> -        }
> -    }
> -
As you remove the use of dpdk_mp_list above, I wonder is there a need to maintain the global list?
With mempools now set on a per netdev basis, perhaps the global list could be removed also. Thoughts?

> -    dmp = dpdk_mp_create(socket_id, mtu);
> +    dmp = dpdk_mp_create(dev, mtu);
>      ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
> -
> -out:
>      ovs_mutex_unlock(&dpdk_mp_mutex);
> 
>      return dmp;
> @@ -528,6 +521,8 @@ out:
>  static void
>  dpdk_mp_put(struct dpdk_mp *dmp)
>  {
> +    char *mp_name;
> +
>      if (!dmp) {
>          return;
>      }
> @@ -536,6 +531,10 @@ dpdk_mp_put(struct dpdk_mp *dmp)
>      ovs_assert(dmp->refcount);
> 
>      if (!--dmp->refcount) {
> +        mp_name = xasprintf("%s_%d_%d_%u", dmp->if_name, dmp->mtu,
> +                            dmp->socket_id, dmp->mp_size);
> +        VLOG_DBG("Releasing \"%s\" mempool", mp_name);
> +        free(mp_name);
>          ovs_list_remove(&dmp->list_node);
>          rte_mempool_free(dmp->mp);
>          rte_free(dmp);
> @@ -554,7 +553,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
>      uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
>      struct dpdk_mp *mp;
> 
> -    mp = dpdk_mp_get(dev->requested_socket_id,
> FRAME_LEN_TO_MTU(buf_size));
> +    mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
>      if (!mp) {
>          VLOG_ERR("Insufficient memory to create memory pool for netdev "
>                   "%s, with MTU %d on socket %d\n", @@ -827,6 +826,15 @@
> netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
>      ovsrcu_index_init(&dev->vid, -1);
>      dev->vhost_reconfigured = false;
> 
> +    netdev->n_rxq = NR_QUEUE;
> +    netdev->n_txq = NR_QUEUE;
> +    dev->requested_n_rxq = netdev->n_rxq;
> +    dev->requested_n_txq = netdev->n_txq;
> +    dev->rxq_size = NIC_PORT_DEFAULT_RXQ_SIZE;
> +    dev->txq_size = NIC_PORT_DEFAULT_TXQ_SIZE;
> +    dev->requested_rxq_size = dev->rxq_size;
> +    dev->requested_txq_size = dev->txq_size;
> +
>      err = netdev_dpdk_mempool_configure(dev);
>      if (err) {
>          goto unlock;
> @@ -838,15 +846,6 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      dev->policer_rate = 0;
>      dev->policer_burst = 0;
> 
> -    netdev->n_rxq = NR_QUEUE;
> -    netdev->n_txq = NR_QUEUE;
> -    dev->requested_n_rxq = netdev->n_rxq;
> -    dev->requested_n_txq = netdev->n_txq;
> -    dev->rxq_size = NIC_PORT_DEFAULT_RXQ_SIZE;
> -    dev->txq_size = NIC_PORT_DEFAULT_TXQ_SIZE;
> -    dev->requested_rxq_size = dev->rxq_size;
> -    dev->requested_txq_size = dev->txq_size;
> -
>      /* Initialize the flow control to NULL */
>      memset(&dev->fc_conf, 0, sizeof dev->fc_conf);
>      if (type == DPDK_DEV_ETH) {
> @@ -2941,16 +2940,14 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
> 
>      rte_eth_dev_stop(dev->port_id);
> 
> -    if (dev->mtu != dev->requested_mtu) {
> -        netdev_dpdk_mempool_configure(dev);
> -    }
> -
>      netdev->n_txq = dev->requested_n_txq;
>      netdev->n_rxq = dev->requested_n_rxq;
> 
>      dev->rxq_size = dev->requested_rxq_size;
>      dev->txq_size = dev->requested_txq_size;
> 
> +    netdev_dpdk_mempool_configure(dev);
> +
>      rte_free(dev->tx_q);
>      err = dpdk_eth_dev_init(dev);
>      dev->tx_q = netdev_dpdk_alloc_txq(netdev->n_txq);
> --
> 1.8.3.1
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list