[ovs-dev] [PATCH V4 2/4] netdev-dpdk: Add Jumbo Frame Support

Mark Kavanagh mark.b.kavanagh at intel.com
Tue Feb 16 11:22:37 UTC 2016


Add support for Jumbo Frames to DPDK-enabled port types,
using single-segment-mbufs.

Using this approach, the amount of memory allocated for each mbuf
to store frame data is increased to a value greater than 1518B
(typical Ethernet maximum frame length). The increased space
available in the mbuf means that an entire Jumbo Frame can be carried
in a single mbuf, as opposed to partitioning it across multiple mbuf
segments.

The amount of space allocated to each mbuf to hold frame data is
defined dynamically by the user when adding a DPDK port to a bridge.
If an MTU value is not supplied, or the user-supplied value is invalid,
the MTU for the port defaults to standard Ethernet MTU (i.e. 1500B).

Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
---
 INSTALL.DPDK.md   |   1 -
 lib/netdev-dpdk.c | 259 +++++++++++++++++++++++++++++++++---------------------
 2 files changed, 159 insertions(+), 101 deletions(-)

diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index d892788..722fb9e 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
@@ -881,7 +881,6 @@ determines how many queues can be used by the guest.
 Restrictions:
 -------------
 
-  - Work with 1500 MTU, needs few changes in DPDK lib to fix this issue.
   - Currently DPDK port does not make use any offload functionality.
   - DPDK-vHost support works with 1G huge pages.
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 5bcd36d..93a0930 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -77,6 +77,7 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
                                     + sizeof(struct dp_packet)    \
                                     + RTE_PKTMBUF_HEADROOM)
 #define NETDEV_DPDK_MBUF_ALIGN      1024
+#define NETDEV_DPDK_MAX_FRAME_LEN   13312
 
 /* Max and min number of packets in the mempool.  OVS tries to allocate a
  * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
@@ -294,34 +295,6 @@ free_dpdk_buf(struct dp_packet *p)
 }
 
 static void
-__rte_pktmbuf_init(struct rte_mempool *mp,
-                   void *opaque_arg OVS_UNUSED,
-                   void *_m,
-                   unsigned i OVS_UNUSED)
-{
-    struct rte_mbuf *m = _m;
-    uint32_t buf_len = mp->elt_size - sizeof(struct dp_packet);
-
-    RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet));
-
-    memset(m, 0, mp->elt_size);
-
-    /* start of buffer is just after mbuf structure */
-    m->buf_addr = (char *)m + sizeof(struct dp_packet);
-    m->buf_physaddr = rte_mempool_virt2phy(mp, m) +
-                    sizeof(struct dp_packet);
-    m->buf_len = (uint16_t)buf_len;
-
-    /* keep some headroom between start of buffer and data */
-    m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
-
-    /* init some constant fields */
-    m->pool = mp;
-    m->nb_segs = 1;
-    m->port = 0xff;
-}
-
-static void
 ovs_rte_pktmbuf_init(struct rte_mempool *mp,
                      void *opaque_arg OVS_UNUSED,
                      void *_m,
@@ -450,6 +423,7 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
 {
     int diag = 0;
     int i;
+    struct rte_eth_conf conf = port_conf;
 
     /* A device may report more queues than it makes available (this has
      * been observed for Intel xl710, which reserves some of them for
@@ -461,7 +435,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
             VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq);
         }
 
-        diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &port_conf);
+        if (dev->mtu > ETHER_MTU) {
+            conf.rxmode.jumbo_frame = 1;
+            conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
+        } else {
+            conf.rxmode.jumbo_frame = 0;
+            conf.rxmode.max_rx_pkt_len = 0;
+        }
+
+        diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &conf);
         if (diag) {
             break;
         }
@@ -602,8 +584,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
 {
     struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
     int sid;
-    int err = 0;
-    uint32_t buf_size;
 
     ovs_mutex_init(&netdev->mutex);
     ovs_mutex_lock(&netdev->mutex);
@@ -623,15 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
     netdev->port_id = port_no;
     netdev->type = type;
     netdev->flags = 0;
-    netdev->mtu = ETHER_MTU;
-    netdev->max_packet_len = ETHER_MAX_LEN;
-
-    buf_size = dpdk_buf_size(netdev->mtu);
-    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, FRAME_LEN_TO_MTU(buf_size));
-    if (!netdev->dpdk_mp) {
-        err = ENOMEM;
-        goto unlock;
-    }
+    netdev->mtu = 0;
 
     netdev_->n_txq = NR_QUEUE;
     netdev_->n_rxq = NR_QUEUE;
@@ -640,20 +612,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
 
     if (type == DPDK_DEV_ETH) {
         netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
-        err = dpdk_eth_dev_init(netdev);
-        if (err) {
-            goto unlock;
-        }
     }
 
     list_push_back(&dpdk_list, &netdev->list_node);
 
-unlock:
-    if (err) {
-        rte_free(netdev->tx_q);
-    }
     ovs_mutex_unlock(&netdev->mutex);
-    return err;
+    return 0;
 }
 
 static int
@@ -671,6 +635,27 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[],
     return 0;
 }
 
+static void
+dpdk_dev_parse_mtu(const struct smap *args, int *mtu)
+{
+    const char *mtu_str = smap_get(args, "mtu_request");
+    char *end_ptr = NULL;
+    int local_mtu;
+
+    if (mtu_str) {
+        local_mtu = strtoul(mtu_str, &end_ptr, 0);
+    }
+    if (!mtu_str || local_mtu < ETHER_MTU ||
+        local_mtu > FRAME_LEN_TO_MTU(NETDEV_DPDK_MAX_FRAME_LEN) ||
+        *end_ptr != '\0') {
+        local_mtu = ETHER_MTU;
+        VLOG_WARN("Invalid or missing mtu_request parameter - defaulting to %d.\n",
+                   local_mtu);
+    }
+
+    *mtu = local_mtu;
+}
+
 static int
 vhost_construct_helper(struct netdev *netdev_) OVS_REQUIRES(dpdk_mutex)
 {
@@ -801,15 +786,72 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
     smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq);
     smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq);
     smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq);
+    smap_add_format(args, "mtu", "%d", dev->mtu);
     ovs_mutex_unlock(&dev->mutex);
 
     return 0;
 }
 
+/* Set the mtu of DPDK_DEV_ETH ports */
+static int
+netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    int err, dpdk_mtu;
+    uint32_t buf_size;
+    struct dpdk_mp *mp;
+
+    ovs_mutex_lock(&dpdk_mutex);
+    ovs_mutex_lock(&dev->mutex);
+    if (dev->mtu == mtu) {
+        err = 0;
+        goto out;
+    }
+
+    buf_size = dpdk_buf_size(mtu);
+    dpdk_mtu = FRAME_LEN_TO_MTU(buf_size);
+
+    mp = dpdk_mp_get(dev->socket_id, dpdk_mtu);
+    if (!mp) {
+        err = ENOMEM;
+        goto out;
+    }
+
+    rte_eth_dev_stop(dev->port_id);
+
+    dev->dpdk_mp = mp;
+    dev->mtu = mtu;
+    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
+
+    err = dpdk_eth_dev_init(dev);
+    if (err) {
+        VLOG_WARN("Unable to set MTU '%d' for '%s'; falling back to default "
+                  "MTU '%d'\n", mtu, dev->up.name, ETHER_MTU);
+        dpdk_mp_put(mp);
+        dev->mtu = ETHER_MTU;
+        mp = dpdk_mp_get(dev->socket_id, dev->mtu);
+        if (!mp) {
+            err = ENOMEM;
+            goto out;
+        }
+        dev->dpdk_mp = mp;
+        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
+        dpdk_eth_dev_init(dev);
+        goto out;
+    } else {
+        netdev_change_seq_changed(netdev);
+    }
+out:
+    ovs_mutex_unlock(&dev->mutex);
+    ovs_mutex_unlock(&dpdk_mutex);
+    return err;
+}
+
 static int
 netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    int mtu;
 
     ovs_mutex_lock(&dev->mutex);
     netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
@@ -817,6 +859,10 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
     netdev_change_seq_changed(netdev);
     ovs_mutex_unlock(&dev->mutex);
 
+    if (!dev->mtu) {
+        dpdk_dev_parse_mtu(args, &mtu);
+        return netdev_dpdk_set_mtu(netdev, mtu);
+    }
     return 0;
 }
 
@@ -1435,53 +1481,6 @@ netdev_dpdk_get_mtu(const struct netdev *netdev, int *mtup)
 }
 
 static int
-netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-    int old_mtu, err;
-    struct dpdk_mp *old_mp;
-    struct dpdk_mp *mp;
-
-    ovs_mutex_lock(&dpdk_mutex);
-    ovs_mutex_lock(&dev->mutex);
-    if (dev->mtu == mtu) {
-        err = 0;
-        goto out;
-    }
-
-    mp = dpdk_mp_get(dev->socket_id, dev->mtu);
-    if (!mp) {
-        err = ENOMEM;
-        goto out;
-    }
-
-    rte_eth_dev_stop(dev->port_id);
-
-    old_mtu = dev->mtu;
-    old_mp = dev->dpdk_mp;
-    dev->dpdk_mp = mp;
-    dev->mtu = mtu;
-    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
-
-    err = dpdk_eth_dev_init(dev);
-    if (err) {
-        dpdk_mp_put(mp);
-        dev->mtu = old_mtu;
-        dev->dpdk_mp = old_mp;
-        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
-        dpdk_eth_dev_init(dev);
-        goto out;
-    }
-
-    dpdk_mp_put(old_mp);
-    netdev_change_seq_changed(netdev);
-out:
-    ovs_mutex_unlock(&dev->mutex);
-    ovs_mutex_unlock(&dpdk_mutex);
-    return err;
-}
-
-static int
 netdev_dpdk_get_carrier(const struct netdev *netdev_, bool *carrier);
 
 static int
@@ -2024,6 +2023,57 @@ dpdk_vhost_user_class_init(void)
     return 0;
 }
 
+/* Set the mtu of DPDK_DEV_VHOST ports */
+static int
+netdev_dpdk_vhost_set_mtu(const struct netdev *netdev, int mtu)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    int err = 0;
+    struct dpdk_mp *mp;
+
+    ovs_mutex_lock(&dpdk_mutex);
+    ovs_mutex_lock(&dev->mutex);
+    if (dev->mtu == mtu) {
+        err = 0;
+        goto out;
+    }
+
+    mp = dpdk_mp_get(dev->socket_id, mtu);
+    if (!mp) {
+        err = ENOMEM;
+        goto out;
+    }
+
+    dev->dpdk_mp = mp;
+    dev->mtu = mtu;
+    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
+
+    netdev_change_seq_changed(netdev);
+out:
+    ovs_mutex_unlock(&dev->mutex);
+    ovs_mutex_unlock(&dpdk_mutex);
+    return err;
+}
+
+static int
+netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap *args)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    int mtu;
+
+    ovs_mutex_lock(&dev->mutex);
+    netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
+                                               netdev->requested_n_rxq), 1);
+    netdev_change_seq_changed(netdev);
+    ovs_mutex_unlock(&dev->mutex);
+
+    if (!dev->mtu) {
+        dpdk_dev_parse_mtu(args, &mtu);
+        return netdev_dpdk_vhost_set_mtu(netdev, mtu);
+    }
+    return 0;
+}
+
 static void
 dpdk_common_init(void)
 {
@@ -2160,8 +2210,9 @@ unlock_dpdk:
     return err;
 }
 
-#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \
-    GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV)          \
+#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, SET_CONFIG, \
+        MULTIQ, SEND, SET_MTU, GET_CARRIER, GET_STATS, GET_FEATURES,   \
+        GET_STATUS, RXQ_RECV)                                          \
 {                                                             \
     NAME,                                                     \
     INIT,                       /* init */                    \
@@ -2173,7 +2224,7 @@ unlock_dpdk:
     DESTRUCT,                                                 \
     netdev_dpdk_dealloc,                                      \
     netdev_dpdk_get_config,                                   \
-    netdev_dpdk_set_config,                                   \
+    SET_CONFIG            ,                                   \
     NULL,                       /* get_tunnel_config */       \
     NULL,                       /* build header */            \
     NULL,                       /* push header */             \
@@ -2187,7 +2238,7 @@ unlock_dpdk:
     netdev_dpdk_set_etheraddr,                                \
     netdev_dpdk_get_etheraddr,                                \
     netdev_dpdk_get_mtu,                                      \
-    netdev_dpdk_set_mtu,                                      \
+    SET_MTU,                                                  \
     netdev_dpdk_get_ifindex,                                  \
     GET_CARRIER,                                              \
     netdev_dpdk_get_carrier_resets,                           \
@@ -2333,8 +2384,10 @@ static const struct netdev_class dpdk_class =
         NULL,
         netdev_dpdk_construct,
         netdev_dpdk_destruct,
+        netdev_dpdk_set_config,
         netdev_dpdk_set_multiq,
         netdev_dpdk_eth_send,
+        netdev_dpdk_set_mtu,
         netdev_dpdk_get_carrier,
         netdev_dpdk_get_stats,
         netdev_dpdk_get_features,
@@ -2347,8 +2400,10 @@ static const struct netdev_class dpdk_ring_class =
         NULL,
         netdev_dpdk_ring_construct,
         netdev_dpdk_destruct,
+        netdev_dpdk_set_config,
         netdev_dpdk_set_multiq,
         netdev_dpdk_ring_send,
+        netdev_dpdk_set_mtu,
         netdev_dpdk_get_carrier,
         netdev_dpdk_get_stats,
         netdev_dpdk_get_features,
@@ -2361,8 +2416,10 @@ static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class =
         dpdk_vhost_cuse_class_init,
         netdev_dpdk_vhost_cuse_construct,
         netdev_dpdk_vhost_destruct,
+        netdev_dpdk_set_config,
         netdev_dpdk_vhost_cuse_set_multiq,
         netdev_dpdk_vhost_send,
+        NULL,
         netdev_dpdk_vhost_get_carrier,
         netdev_dpdk_vhost_get_stats,
         NULL,
@@ -2375,8 +2432,10 @@ static const struct netdev_class OVS_UNUSED dpdk_vhost_user_class =
         dpdk_vhost_user_class_init,
         netdev_dpdk_vhost_user_construct,
         netdev_dpdk_vhost_destruct,
+        netdev_dpdk_vhost_set_config,
         netdev_dpdk_vhost_set_multiq,
         netdev_dpdk_vhost_send,
+        netdev_dpdk_vhost_set_mtu,
         netdev_dpdk_vhost_get_carrier,
         netdev_dpdk_vhost_get_stats,
         NULL,
-- 
1.9.3




More information about the dev mailing list