[ovs-dev] [PATCH 1/1] netdev-dpdk: enable multi-segment jumbo frames

Mark Kavanagh mark.b.kavanagh at intel.com
Mon May 8 09:59:30 UTC 2017


Currently, jumbo frame support for OvS-DPDK is implemented by increasing
the size of mbufs within a mempool, such that each mbuf within the pool
is large enough to contain an entire jumbo frame of a user-defined size.
Typically, for each user-defined MTU 'requested_mtu', a new mempool is
created, containing mbufs of size ~requested_mtu.

With the multi-segment approach, all ports share the same mempool, in
which each mbuf is of standard/default size (~2k MB). To accommodate
jumbo frames, mbufs may be chained together, each mbuf storing a portion
of the jumbo frame; each mbuf in the chain is termed a segment, hence
the name.
---
 lib/dpdk.c           |  6 ++++++
 lib/netdev-dpdk.c    | 54 ++++++++++++++++++++++++++++++++++++++++++++++------
 lib/netdev-dpdk.h    |  1 +
 vswitchd/vswitch.xml | 19 ++++++++++++++++++
 4 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/lib/dpdk.c b/lib/dpdk.c
index 8da6c32..7d08e34 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -450,6 +450,12 @@ dpdk_init__(const struct smap *ovs_other_config)
 
     /* Finally, register the dpdk classes */
     netdev_dpdk_register();
+
+    bool multi_seg_mbufs_enable = smap_get_bool(ovs_other_config, "dpdk-multi-seg-mbufs", false);
+    if (multi_seg_mbufs_enable) {
+        VLOG_INFO("DPDK multi-segment mbufs enabled\n");
+        netdev_dpdk_multi_segment_mbufs_enable();
+    }
 }
 
 void
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 34fc54b..82bc0e2 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -58,6 +58,7 @@
 
 VLOG_DEFINE_THIS_MODULE(netdev_dpdk);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+static bool dpdk_multi_segment_mbufs = false;
 
 #define DPDK_PORT_WATCHDOG_INTERVAL 5
 
@@ -480,7 +481,7 @@ dpdk_mp_create(int socket_id, int mtu)
      * when the number of ports and rxqs that utilize a particular mempool can
      * change dynamically at runtime. For now, use this rough heurisitic.
      */
-    if (mtu >= ETHER_MTU) {
+    if (mtu >= ETHER_MTU || dpdk_multi_segment_mbufs) {
         mp_size = MAX_NB_MBUF;
     } else {
         mp_size = MIN_NB_MBUF;
@@ -558,17 +559,33 @@ dpdk_mp_put(struct dpdk_mp *dmp)
     ovs_mutex_unlock(&dpdk_mp_mutex);
 }
 
-/* Tries to allocate new mempool on requested_socket_id with
- * mbuf size corresponding to requested_mtu.
+/* Tries to configure a mempool for 'dev' on requested socket_id to accommodate
+ * packets of size 'requested_mtu'. The properties of the mempool's elements
+ * are dependent on the value of 'dpdk_multi_segment_mbufs':
+ * - if 'true', then the mempool contains standard-sized mbufs that are chained
+ *   together to accommodate packets of size 'requested_mtu'. All ports on the
+ *   same socket will share this mempool, irrespective of their MTU.
+ * - if 'false', then a mempool is allocated, the members of which are non-
+ *   standard-sized mbufs. Each mbuf in the mempool is large enough to fully
+ *   accomdate packets of size 'requested_mtu'.
+ *
  * On success new configuration will be applied.
  * On error, device will be left unchanged. */
 static int
 netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
     OVS_REQUIRES(dev->mutex)
 {
-    uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
+    uint32_t buf_size = 0;
     struct dpdk_mp *mp;
 
+    /* Contiguous mbufs in use - permit oversized mbufs */
+    if (!dpdk_multi_segment_mbufs) {
+        buf_size = dpdk_buf_size(dev->requested_mtu);
+    } else {
+        /* multi-segment mbufs - use standard mbuf size */
+        buf_size = dpdk_buf_size(ETHER_MTU);
+    }
+
     mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
     if (!mp) {
         VLOG_ERR("Failed to create memory pool for netdev "
@@ -577,7 +594,13 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
                  rte_strerror(rte_errno));
         return rte_errno;
     } else {
-        dpdk_mp_put(dev->dpdk_mp);
+        /* When single-segment mbufs are in use, a new mempool is allocated,
+         * so release the old one. In the case of multi-segment mbufs, the
+         * same mempool is used for all MTUs.
+         */
+        if (!dpdk_multi_segment_mbufs) {
+            dpdk_mp_put(dev->dpdk_mp);
+        }
         dev->dpdk_mp = mp;
         dev->mtu = dev->requested_mtu;
         dev->socket_id = dev->requested_socket_id;
@@ -639,6 +662,7 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
     int diag = 0;
     int i;
     struct rte_eth_conf conf = port_conf;
+    struct rte_eth_txconf txconf;
 
     if (dev->mtu > ETHER_MTU) {
         conf.rxmode.jumbo_frame = 1;
@@ -666,9 +690,21 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
             break;
         }
 
+        /* DPDK PMDs typically attempt to use simple or vectorized
+         * transmit functions, neither of which are compatible with
+         * multi-segment mbufs. Ensure that these are disabled in the
+         * multi-segment mbuf case.
+         */
+        if (dpdk_multi_segment_mbufs) {
+            memset(&txconf, 0, sizeof(txconf));
+            txconf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
+        }
+
         for (i = 0; i < n_txq; i++) {
             diag = rte_eth_tx_queue_setup(dev->port_id, i, dev->txq_size,
-                                          dev->socket_id, NULL);
+                                          dev->socket_id,
+                                          dpdk_multi_segment_mbufs ? &txconf
+                                                                   : NULL);
             if (diag) {
                 VLOG_INFO("Interface %s txq(%d) setup error: %s",
                           dev->up.name, i, rte_strerror(-diag));
@@ -3287,6 +3323,12 @@ unlock:
     return err;
 }
 
+void
+netdev_dpdk_multi_segment_mbufs_enable(void)
+{
+    dpdk_multi_segment_mbufs = true;
+}
+
 #define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT,    \
                           SET_CONFIG, SET_TX_MULTIQ, SEND,    \
                           GET_CARRIER, GET_STATS,             \
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index b7d02a7..5b14c96 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -26,6 +26,7 @@ struct dp_packet;
 #ifdef DPDK_NETDEV
 
 void netdev_dpdk_register(void);
+void netdev_dpdk_multi_segment_mbufs_enable(void);
 void free_dpdk_buf(struct dp_packet *);
 
 #else
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index bb66cb5..fd8551e 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -283,6 +283,25 @@
         </p>
       </column>
 
+      <column name="other_config" key="dpdk-multi-seg-mbufs"
+              type='{"type": "boolean"}'>
+        <p>
+          Specifies if DPDK uses multi-segment mbufs for handling jumbo frames.
+        </p>
+        <p>
+            If true, DPDK allocates a single mempool for all ports, irrespective
+            of the ports' requested MTU sizes. The elements of this mempool are
+            'standard'-sized mbufs (typically 2k MB), which may be chained
+            together to accommodate jumbo frames. In this approach, each mbuf
+            typically stores a fragment of the overall jumbo frame.
+        </p>
+        <p>
+            If not specified, defaults to <code>false</code>, in which case, the size
+            of each mbuf within a DPDK port's mempool will be grown to accommodate
+            jumbo frames within a single mbuf.
+        </p>
+      </column>
+
       <column name="other_config" key="dpdk-extra"
               type='{"type": "string"}'>
         <p>
-- 
1.9.3



More information about the dev mailing list