[ovs-dev] [PATCH V3] netdev-dpdk: Add Jumbo Frame Support.

Flavio Leitner fbl at sysclose.org
Thu Feb 4 20:51:04 UTC 2016


Some minor comments just in case you decide to spin another version:

On Mon,  1 Feb 2016 10:18:54 +0000
Mark Kavanagh <mark.b.kavanagh at intel.com> wrote:


[...]
> +static uint32_t
> +dpdk_buf_size(struct netdev_dpdk *netdev, int frame_len)
> +{
> +    struct rte_eth_dev_info info;
> +    uint32_t buf_size;
> +    /* XXX: This is a workaround for DPDK v2.2, and needs to be
> refactored with a
> +     * future DPDK release. */
> +    uint32_t len = frame_len + (2 * DPDK_VLAN_TAG_LEN);
> +
> +    if(netdev->type == DPDK_DEV_ETH) {
         ^ space here


> +        rte_eth_dev_info_get(netdev->port_id, &info);
> +        buf_size = (info.min_rx_bufsize == 0) ?
> +                   NETDEV_DPDK_DEFAULT_RX_BUFSIZE :
> +                   info.min_rx_bufsize;
> +    } else {
> +        buf_size = NETDEV_DPDK_DEFAULT_RX_BUFSIZE;
> +    }
> +
> +    if(len % buf_size) {

also here

> +        len = buf_size * ((len/buf_size) + 1);
> +    }
> +
> +    return len;
> +}
> +
>  /* XXX: use dpdk malloc for entire OVS. in fact huge page should be
> used
>   * for all other segments data, bss and text. */
>  
> @@ -280,26 +321,65 @@ free_dpdk_buf(struct dp_packet *p)
>  }
>  
>  static void
> -__rte_pktmbuf_init(struct rte_mempool *mp,
> -                   void *opaque_arg OVS_UNUSED,
> -                   void *_m,
> -                   unsigned i OVS_UNUSED)
> +ovs_rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg)
>  {
> -    struct rte_mbuf *m = _m;
> -    uint32_t buf_len = mp->elt_size - sizeof(struct dp_packet);
> +    struct rte_pktmbuf_pool_private *user_mbp_priv, *mbp_priv;
> +    struct rte_pktmbuf_pool_private default_mbp_priv;
> +    uint16_t roomsz;
>  
>      RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet));
>  
> +    /* if no structure is provided, assume no mbuf private area */
> +
> +    user_mbp_priv = opaque_arg;
> +    if (user_mbp_priv == NULL) {
> +        default_mbp_priv.mbuf_priv_size = 0;
> +        if (mp->elt_size > sizeof(struct dp_packet)) {
> +            roomsz = mp->elt_size - sizeof(struct dp_packet);
> +        } else {
> +            roomsz = 0;
> +        }
> +        default_mbp_priv.mbuf_data_room_size = roomsz;
> +        user_mbp_priv = &default_mbp_priv;
> +    }
> +
> +    RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet) +
> +        user_mbp_priv->mbuf_data_room_size +
> +        user_mbp_priv->mbuf_priv_size);
> +
> +    mbp_priv = rte_mempool_get_priv(mp);
> +    memcpy(mbp_priv, user_mbp_priv, sizeof(*mbp_priv));
> +}
> +
> +/* Initialise some fields in the mbuf structure that are not
> modified by the
> + * user once created (origin pool, buffer start address, etc.*/
> +static void
> +__ovs_rte_pktmbuf_init(struct rte_mempool *mp,
> +                       void *opaque_arg OVS_UNUSED,
> +                       void *_m,
> +                       unsigned i OVS_UNUSED)
> +{
> +    struct rte_mbuf *m = _m;
> +    uint32_t buf_size, buf_len, priv_size;
> +
> +    priv_size = rte_pktmbuf_priv_size(mp);
> +    buf_size = sizeof(struct dp_packet) + priv_size;
> +    buf_len = rte_pktmbuf_data_room_size(mp);
> +
> +    RTE_MBUF_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) ==
> priv_size);
> +    RTE_MBUF_ASSERT(mp->elt_size >= buf_size);
> +    RTE_MBUF_ASSERT(buf_len <= UINT16_MAX);
> +
>      memset(m, 0, mp->elt_size);
>  
> -    /* start of buffer is just after mbuf structure */
> -    m->buf_addr = (char *)m + sizeof(struct dp_packet);
> -    m->buf_physaddr = rte_mempool_virt2phy(mp, m) +
> -                    sizeof(struct dp_packet);
> +    /* start of buffer is after dp_packet structure and priv data */
> +    m->priv_size = priv_size;
> +    m->buf_addr = (char *)m + buf_size;
> +    m->buf_physaddr = rte_mempool_virt2phy(mp, m) + buf_size;
>      m->buf_len = (uint16_t)buf_len;
>  
>      /* keep some headroom between start of buffer and data */
> -    m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
> +    m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM,
> (uint16_t)m->buf_len); 
>      /* init some constant fields */
>      m->pool = mp;
> @@ -315,7 +395,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>  {
>      struct rte_mbuf *m = _m;
>  
> -    __rte_pktmbuf_init(mp, opaque_arg, _m, i);
> +    __ovs_rte_pktmbuf_init(mp, opaque_arg, m, i);
>  
>      dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len);
>  }
> @@ -326,6 +406,7 @@ dpdk_mp_get(int socket_id, int mtu)
> OVS_REQUIRES(dpdk_mutex) struct dpdk_mp *dmp = NULL;
>      char mp_name[RTE_MEMPOOL_NAMESIZE];
>      unsigned mp_size;
> +    struct rte_pktmbuf_pool_private mbp_priv;
>  
>      LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
>          if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
> @@ -338,6 +419,8 @@ dpdk_mp_get(int socket_id, int mtu)
> OVS_REQUIRES(dpdk_mutex) dmp->socket_id = socket_id;
>      dmp->mtu = mtu;
>      dmp->refcount = 1;
> +    mbp_priv.mbuf_data_room_size = MTU_TO_FRAME_LEN(mtu) +
> RTE_PKTMBUF_HEADROOM;
> +    mbp_priv.mbuf_priv_size = 0;
>  
>      mp_size = MAX_NB_MBUF;
>      do {
> @@ -346,10 +429,10 @@ dpdk_mp_get(int socket_id, int mtu)
> OVS_REQUIRES(dpdk_mutex) return NULL;
>          }
>  
> -        dmp->mp = rte_mempool_create(mp_name, mp_size,
> MBUF_SIZE(mtu),
> +        dmp->mp = rte_mempool_create(mp_name, mp_size,
> MBUF_SEGMENT_SIZE(mtu), MP_CACHE_SZ,
>                                       sizeof(struct
> rte_pktmbuf_pool_private),
> -                                     rte_pktmbuf_pool_init, NULL,
> +                                     ovs_rte_pktmbuf_pool_init,
> &mbp_priv, ovs_rte_pktmbuf_init, NULL,
>                                       socket_id, 0);
>      } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >=
> MIN_NB_MBUF); @@ -433,6 +516,7 @@ dpdk_eth_dev_queue_setup(struct
> netdev_dpdk *dev, int n_rxq, int n_txq) {
>      int diag = 0;
>      int i;
> +    struct rte_eth_conf conf = port_conf;
>  
>      /* A device may report more queues than it makes available (this
> has
>       * been observed for Intel xl710, which reserves some of them for
> @@ -444,7 +528,12 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk
> *dev, int n_rxq, int n_txq) VLOG_INFO("Retrying setup with (rxq:%d
> txq:%d)", n_rxq, n_txq); }
>  
> -        diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq,
> &port_conf);
> +        if(OVS_UNLIKELY(dev->mtu > ETHER_MTU)) {

space needed

> +            conf.rxmode.jumbo_frame =
> NETDEV_DPDK_JUMBO_FRAME_ENABLED;
> +            conf.rxmode.max_rx_pkt_len = MTU_TO_FRAME_LEN(dev->mtu);
> +        }
> +
> +        diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq,
> &conf); if (diag) {
>              break;
>          }
> @@ -586,6 +675,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned
> int port_no, struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>      int sid;
>      int err = 0;
> +    uint32_t buf_size;
>  
>      ovs_mutex_init(&netdev->mutex);
>      ovs_mutex_lock(&netdev->mutex);
> @@ -605,10 +695,16 @@ netdev_dpdk_init(struct netdev *netdev_,
> unsigned int port_no, netdev->port_id = port_no;
>      netdev->type = type;
>      netdev->flags = 0;
> +
> +    /* Initialize port's MTU and frame len to the default Ethernet
> values.
> +     * Larger, user-specified (jumbo) frame buffers are accommodated
> in
> +     * netdev_dpdk_set_config.
> +     */
> +    netdev->max_packet_len = ETHER_MAX_LEN;
>      netdev->mtu = ETHER_MTU;
> -    netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu);
>  
> -    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu);
> +    buf_size = dpdk_buf_size(netdev, ETHER_MAX_LEN);
> +    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id,
> FRAME_LEN_TO_MTU(buf_size)); if (!netdev->dpdk_mp) {
>          err = ENOMEM;
>          goto unlock;
> @@ -651,6 +747,27 @@ dpdk_dev_parse_name(const char dev_name[], const
> char prefix[], return 0;
>  }
>  
> +static void
> +dpdk_dev_parse_mtu(const struct smap *args, int *mtu)
> +{
> +    const char *mtu_str = smap_get(args, "dpdk-mtu");
> +    char *end_ptr = NULL;
> +    int local_mtu;
> +
> +    if(mtu_str) {
> +        local_mtu = strtoul(mtu_str, &end_ptr, 0);
> +    }
> +    if(!mtu_str || local_mtu < ETHER_MTU ||
> +                   local_mtu >
> FRAME_LEN_TO_MTU(NETDEV_DPDK_MAX_FRAME_LEN) ||
> +                   *end_ptr != '\0') {

This needs to align with !mtu_str

Thanks,
-- 
fbl




More information about the dev mailing list