[ovs-dev] [PATCHv2] lib: upgrade to DPDK 1.8

Mark Kavanagh mark.b.kavanagh at intel.com
Fri Feb 6 16:43:26 UTC 2015


DPDK v1.8.0 makes significant changes to struct rte_mbuf, including
removal of the 'pkt' and 'data' fields. The latter, formally a
pointer, is now calculated via an offset from the start of the
segment buffer. These fields are referenced by OVS when accessing
the data section of an ofpbuf.

The following changes are required to add support for DPDK 1.8:
- update affected functions to use the correct rte_mbuf fields
- remove init function from netdev-dpdk (no longer required as
  rte_eal_pci_probe is now invoked from eal_init)
- split large amounts of data across multiple ofpbufs; with the
  removal of the mbuf's 'data' pointer, and replacement with a
  'data_off' field, it is necessary to limit the size of data
  contained in an ofpbuf to UINT16_MAX when mbufs are used
  (data_off and data_len are both of type uint16_t).
  Were data not split across multiple ofpbufs, values larger
  than UINT16_MAX for 'data_len' and 'data_off' would result
  in wrap-around, and consequently, data corruption. Changes
  introduced in this patch prevent this from occurring.

Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
Signed-off-by: Mark Gray <mark.d.gray at intel.com>
Signed-off-by: Rory Sexton <rory.sexton at intel.com>
---
 lib/jsonrpc.c     |   27 +++++++++++++++++++--------
 lib/netdev-dpdk.c |   31 +++++++++----------------------
 lib/ofpbuf.c      |    4 +++-
 lib/ofpbuf.h      |   35 +++++++++++++++++++++++++++++------
 lib/packet-dpif.h |    4 ++--
 5 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
index f15adca..7bbdc22 100644
--- a/lib/jsonrpc.c
+++ b/lib/jsonrpc.c
@@ -238,10 +238,10 @@ jsonrpc_log_msg(const struct jsonrpc *rpc, const char *title,
 int
 jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg *msg)
 {
-    struct ofpbuf *buf;
     struct json *json;
     size_t length;
     char *s;
+    size_t remaining;
 
     if (rpc->status) {
         jsonrpc_msg_destroy(msg);
@@ -252,15 +252,26 @@ jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg *msg)
 
     json = jsonrpc_msg_to_json(msg);
     s = json_to_string(json, 0);
-    length = strlen(s);
+    remaining = length = strlen(s);
     json_destroy(json);
 
-    buf = xmalloc(sizeof *buf);
-    ofpbuf_use(buf, s, length);
-    ofpbuf_set_size(buf, length);
-    list_push_back(&rpc->output, &buf->list_node);
-    rpc->output_count++;
-    rpc->backlog += length;
+    /* Large (i.e. > OFPBUF_DATA_MAX) strings must be split across multiple
+     * ofpbufs to prevent data corruption. This is largely applicable when DPDK
+     * mbufs are used, since the 'data_off' and 'data_len' fields are of type
+     * uint16_t, and subject to wrap-around if the amount of data destined for
+     * the containing ofpbuf exceeds UINT16_MAX.
+     */
+    while (remaining) {
+        size_t segment_size;
+        segment_size = remaining > OFPBUF_DATA_MAX ? OFPBUF_DATA_MAX : remaining;
+
+        struct ofpbuf *new_buf = ofpbuf_clone_data((char *)s + length - remaining, segment_size);
+        list_push_back(&rpc->output, &new_buf->list_node);
+
+        rpc->output_count++;
+        rpc->backlog += segment_size;
+        remaining -= segment_size;
+    }
 
     if (rpc->output_count >= 50) {
         VLOG_INFO_RL(&rl, "excessive sending backlog, jsonrpc: %s, num of"
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 0ede200..d4f859a 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -28,6 +28,9 @@
 #include <unistd.h>
 #include <stdio.h>
 
+#include <rte_config.h>
+#include <rte_mbuf.h>
+
 #include "dpif-netdev.h"
 #include "list.h"
 #include "netdev-dpdk.h"
@@ -265,13 +268,12 @@ __rte_pktmbuf_init(struct rte_mempool *mp,
     m->buf_len = (uint16_t)buf_len;
 
     /* keep some headroom between start of buffer and data */
-    m->pkt.data = (char*) m->buf_addr + RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
+    m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
 
     /* init some constant fields */
-    m->type = RTE_MBUF_PKT;
     m->pool = mp;
-    m->pkt.nb_segs = 1;
-    m->pkt.in_port = 0xff;
+    m->nb_segs = 1;
+    m->port = 0xff;
 }
 
 static void
@@ -825,7 +827,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dpif_packet ** pkts,
         }
 
         /* We have to do a copy for now */
-        memcpy(mbufs[newcnt]->pkt.data, ofpbuf_data(&pkts[i]->ofpbuf), size);
+        memcpy(rte_pktmbuf_mtod(mbufs[newcnt], char *),
+                ofpbuf_data(&pkts[i]->ofpbuf), size);
 
         rte_pktmbuf_data_len(mbufs[newcnt]) = size;
         rte_pktmbuf_pkt_len(mbufs[newcnt]) = size;
@@ -1270,22 +1273,6 @@ dpdk_common_init(void)
     ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL);
 }
 
-static int
-dpdk_class_init(void)
-{
-    int result;
-
-    result = rte_eal_pci_probe();
-    if (result) {
-        VLOG_ERR("Cannot probe PCI");
-        return -result;
-    }
-
-    VLOG_INFO("Ethernet Device Count: %d", (int)rte_eth_dev_count());
-
-    return 0;
-}
-
 /* Client Rings */
 
 static int
@@ -1510,7 +1497,7 @@ dpdk_init(int argc, char **argv)
 const struct netdev_class dpdk_class =
     NETDEV_DPDK_CLASS(
         "dpdk",
-        dpdk_class_init,
+        NULL,
         netdev_dpdk_construct,
         netdev_dpdk_set_multiq,
         netdev_dpdk_eth_send);
diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
index 4946e6f..6e9e17f 100644
--- a/lib/ofpbuf.c
+++ b/lib/ofpbuf.c
@@ -280,7 +280,6 @@ ofpbuf_resize__(struct ofpbuf *b, size_t new_headroom, size_t new_tailroom)
     }
 
     b->allocated = new_allocated;
-    ofpbuf_set_base(b, new_base);
 
     new_data = (char *) new_base + new_headroom;
     if (ofpbuf_data(b) != new_data) {
@@ -289,7 +288,10 @@ ofpbuf_resize__(struct ofpbuf *b, size_t new_headroom, size_t new_tailroom)
 
             b->frame = (char *) b->frame + data_delta;
         }
+        ofpbuf_set_base(b, new_base);
         ofpbuf_set_data(b, new_data);
+    } else {
+        ofpbuf_set_base(b, new_base);
     }
 }
 
diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
index 4e7038d0..ef0c319 100644
--- a/lib/ofpbuf.h
+++ b/lib/ofpbuf.h
@@ -19,6 +19,11 @@
 
 #include <stddef.h>
 #include <stdint.h>
+
+#ifdef DPDK_NETDEV
+#include <rte_common.h>
+#endif
+
 #include "list.h"
 #include "packets.h"
 #include "util.h"
@@ -28,6 +33,12 @@
 extern "C" {
 #endif
 
+#ifdef DPDK_NETDEV
+    #define OFPBUF_DATA_MAX UINT16_MAX
+#else
+    #define OFPBUF_DATA_MAX UINT32_MAX
+#endif
+
 enum OVS_PACKED_ENUM ofpbuf_source {
     OFPBUF_MALLOC,              /* Obtained via malloc(). */
     OFPBUF_STACK,               /* Un-movable stack space or static buffer. */
@@ -386,12 +397,23 @@ BUILD_ASSERT_DECL(offsetof(struct ofpbuf, mbuf) == 0);
 
 static inline void * ofpbuf_data(const struct ofpbuf *b)
 {
-    return b->mbuf.pkt.data;
+    return rte_pktmbuf_mtod(&(b->mbuf), void *);
 }
 
 static inline void ofpbuf_set_data(struct ofpbuf *b, void *d)
 {
-    b->mbuf.pkt.data = d;
+    uintptr_t data_delta;
+
+    /* NULL 'd' value is valid */
+    if (unlikely(d == NULL)) {
+        b->mbuf.data_off = 0;
+    } else {
+        ovs_assert(d >= b->mbuf.buf_addr);
+        /* Work out the offset between the start of segment buffer and 'd' */
+        data_delta = RTE_PTR_DIFF(d, b->mbuf.buf_addr);
+        ovs_assert(data_delta <= OFPBUF_DATA_MAX);
+        b->mbuf.data_off = data_delta;
+    }
 }
 
 static inline void * ofpbuf_base(const struct ofpbuf *b)
@@ -406,14 +428,15 @@ static inline void ofpbuf_set_base(struct ofpbuf *b, void *d)
 
 static inline uint32_t ofpbuf_size(const struct ofpbuf *b)
 {
-    return b->mbuf.pkt.pkt_len;
+    return b->mbuf.pkt_len;
 }
 
 static inline void ofpbuf_set_size(struct ofpbuf *b, uint32_t v)
 {
-    b->mbuf.pkt.data_len = v;    /* Current seg length. */
-    b->mbuf.pkt.pkt_len = v;     /* Total length of all segments linked to
-                                  * this segment. */
+    ovs_assert(v <= OFPBUF_DATA_MAX);
+    b->mbuf.data_len = v;    /* Current seg length. */
+    b->mbuf.pkt_len = v;     /* Total length of all segments linked to
+                              * this segment. */
 }
 
 #else
diff --git a/lib/packet-dpif.h b/lib/packet-dpif.h
index 1a5efb6..692a81a 100644
--- a/lib/packet-dpif.h
+++ b/lib/packet-dpif.h
@@ -50,7 +50,7 @@ static inline void dpif_packet_delete(struct dpif_packet *p)
 static inline uint32_t dpif_packet_get_dp_hash(struct dpif_packet *p)
 {
 #ifdef DPDK_NETDEV
-    return p->ofpbuf.mbuf.pkt.hash.rss;
+    return p->ofpbuf.mbuf.hash.rss;
 #else
     return p->dp_hash;
 #endif
@@ -60,7 +60,7 @@ static inline void dpif_packet_set_dp_hash(struct dpif_packet *p,
                                            uint32_t hash)
 {
 #ifdef DPDK_NETDEV
-    p->ofpbuf.mbuf.pkt.hash.rss = hash;
+    p->ofpbuf.mbuf.hash.rss = hash;
 #else
     p->dp_hash = hash;
 #endif
-- 
1.7.4.1




More information about the dev mailing list