[ovs-dev] [RFC v5 3/8] dp-packet: Add support for multi-seg mbufs

Tiago Lam tiago.lam at intel.com
Tue May 1 17:02:09 UTC 2018


From: Mark Kavanagh <mark.b.kavanagh at intel.com>

Some functions in dp-packet assume that the data held by a dp_packet is
contiguous, and perform operations such as pointer arithmetic under that
assumption. However, with the introduction of multi-segment mbufs, where
data is non-contiguous, such assumptions are no longer possible. Thus,
dp_packet_put_init(), dp_packet_shift(), dp_packet_tail(),
dp_packet_end() and dp_packet_at() were modified to take multi-segment
mbufs into account.

Both dp_packet_put_uninit() and dp_packet_shift() are, in their current
implementation, operating on the data buffer of a dp_packet as if it
were contiguous, which in the case of multi-segment mbufs means they
operate on the first mbuf in the chain. However, in the case of
dp_packet_put_uninit(), for example, it is the data length of the last
mbuf in the mbuf chain that should be adjusted. Both functions have thus
been modified to support multi-segment mbufs.

Finally, dp_packet_tail(), dp_packet_end() and dp_packet_at() were also
modified to operate differently when dealing with multi-segment mbufs,
and now iterate over the non-contiguous data buffers for their
calculations.

Co-authored-by: Tiago Lam <tiago.lam at intel.com>

Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
Signed-off-by: Tiago Lam <tiago.lam at intel.com>
---
 lib/dp-packet.c   |  44 ++++++++++++++++-
 lib/dp-packet.h   | 142 ++++++++++++++++++++++++++++++++++++++----------------
 lib/netdev-dpdk.c |   8 +--
 3 files changed, 147 insertions(+), 47 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 443c225..fd9fad0 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -298,10 +298,33 @@ dp_packet_prealloc_headroom(struct dp_packet *b, size_t size)
 /* Shifts all of the data within the allocated space in 'b' by 'delta' bytes.
  * For example, a 'delta' of 1 would cause each byte of data to move one byte
  * forward (from address 'p' to 'p+1'), and a 'delta' of -1 would cause each
- * byte to move one byte backward (from 'p' to 'p-1'). */
+ * byte to move one byte backward (from 'p' to 'p-1').
+ * Note for DPBUF_DPDK(XXX): The shift can only move within a size of RTE_
+ * PKTMBUF_HEADROOM, to either left or right, which is usually defined as 128
+ * bytes.
+ */
 void
 dp_packet_shift(struct dp_packet *b, int delta)
 {
+#ifdef DPDK_NETDEV
+     if (b->source == DPBUF_DPDK) {
+        ovs_assert(delta > 0 ? delta <= dp_packet_headroom(b)
+                   : delta < 0 ? -delta <= dp_packet_headroom(b)
+                   : true);
+
+        if (delta != 0) {
+            struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
+
+            if (delta > 0) {
+                rte_pktmbuf_prepend(mbuf, delta);
+            } else {
+                rte_pktmbuf_prepend(mbuf, delta);
+            }
+        }
+
+        return;
+    }
+#endif
     ovs_assert(delta > 0 ? delta <= dp_packet_tailroom(b)
                : delta < 0 ? -delta <= dp_packet_headroom(b)
                : true);
@@ -315,14 +338,31 @@ dp_packet_shift(struct dp_packet *b, int delta)
 
 /* Appends 'size' bytes of data to the tail end of 'b', reallocating and
  * copying its data if necessary.  Returns a pointer to the first byte of the
- * new data, which is left uninitialized. */
+ * new data, which is left uninitialized.
+ * Note for DPBUF_DPDK(XXX): In this case there must be enough tailroom to put
+ * the data in, otherwise this will result in a call to ovs_abort(). */
 void *
 dp_packet_put_uninit(struct dp_packet *b, size_t size)
 {
     void *p;
     dp_packet_prealloc_tailroom(b, size);
     p = dp_packet_tail(b);
+#ifdef DPDK_NETDEV
+    if (b->source == DPBUF_DPDK) {
+        /* In the case of multi-segment mbufs, the data length of the last mbuf
+         * should be adjusted by 'size' bytes. The packet length of the entire
+         * mbuf chain (stored in the first mbuf of said chain) is adjusted in
+         * the normal execution path below.
+         */
+        struct rte_mbuf *buf = &(b->mbuf);
+        buf = rte_pktmbuf_lastseg(buf);
+
+        buf->data_len += size;
+    }
+#endif
+
     dp_packet_set_size(b, dp_packet_size(b) + size);
+
     return p;
 }
 
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 9bfb7b7..d6512cf 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -55,12 +55,12 @@ struct dp_packet {
     struct rte_mbuf mbuf;       /* DPDK mbuf */
 #else
     void *base_;                /* First byte of allocated space. */
-    uint16_t allocated_;        /* Number of bytes allocated. */
     uint16_t data_ofs;          /* First byte actually in use. */
     uint32_t size_;             /* Number of bytes in use. */
     uint32_t rss_hash;          /* Packet hash. */
     bool rss_hash_valid;        /* Is the 'rss_hash' valid? */
 #endif
+    uint16_t allocated_;        /* Number of bytes allocated. */
     enum dp_packet_source source;  /* Source of memory allocated as 'base'. */
 
     /* All the following elements of this struct are copied in a single call
@@ -133,6 +133,8 @@ static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
                                  size_t size);
 static inline void *dp_packet_at_assert(const struct dp_packet *,
                                         size_t offset, size_t size);
+static inline void * dp_packet_at_offset(const struct dp_packet *b,
+                                         size_t offset);
 static inline void *dp_packet_tail(const struct dp_packet *);
 static inline void *dp_packet_end(const struct dp_packet *);
 
@@ -180,40 +182,6 @@ dp_packet_delete(struct dp_packet *b)
     }
 }
 
-/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
- * byte 'offset'.  Otherwise, returns a null pointer. */
-static inline void *
-dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
-{
-    return offset + size <= dp_packet_size(b)
-           ? (char *) dp_packet_data(b) + offset
-           : NULL;
-}
-
-/* Returns a pointer to byte 'offset' in 'b', which must contain at least
- * 'offset + size' bytes of data. */
-static inline void *
-dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
-{
-    ovs_assert(offset + size <= dp_packet_size(b));
-    return ((char *) dp_packet_data(b)) + offset;
-}
-
-/* Returns a pointer to byte following the last byte of data in use in 'b'. */
-static inline void *
-dp_packet_tail(const struct dp_packet *b)
-{
-    return (char *) dp_packet_data(b) + dp_packet_size(b);
-}
-
-/* Returns a pointer to byte following the last byte allocated for use (but
- * not necessarily in use) in 'b'. */
-static inline void *
-dp_packet_end(const struct dp_packet *b)
-{
-    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
-}
-
 /* Returns the number of bytes of headroom in 'b', that is, the number of bytes
  * of unused space in dp_packet 'b' before the data that is in use.  (Most
  * commonly, the data in a dp_packet is at its beginning, and thus the
@@ -454,18 +422,107 @@ __packet_set_data(struct dp_packet *b, uint16_t v)
     b->mbuf.data_off = v;
 }
 
-static inline uint16_t
-dp_packet_get_allocated(const struct dp_packet *b)
+static inline void *
+dp_packet_at_offset(const struct dp_packet *b, size_t offset)
 {
-    return b->mbuf.buf_len;
+    if (b->source == DPBUF_DPDK) {
+        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
+
+        while (buf && offset > buf->data_len) {
+            offset -= buf->data_len;
+
+            buf = buf->next;
+        }
+        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : NULL;
+    } else {
+        return (char *) dp_packet_data(b) + offset;
+    }
 }
 
-static inline void
-dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
+/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
+ * byte 'offset'.  Otherwise, returns a null pointer. */
+static inline void *
+dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
+{
+    return offset + size <= dp_packet_size(b)
+        ? dp_packet_at_offset(b, offset)
+        : NULL;
+}
+
+/* Returns a pointer to byte 'offset' in 'b', which must contain at least
+ * 'offset + size' bytes of data. */
+static inline void *
+dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
 {
-    b->mbuf.buf_len = s;
+    ovs_assert(offset + size <= dp_packet_size(b));
+    return dp_packet_at_offset(b, offset);
+}
+
+/* Returns a pointer to byte following the last byte of data in use in 'b'. */
+static inline void *
+dp_packet_tail(const struct dp_packet *b)
+{
+    if (b->source == DPBUF_DPDK) {
+        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
+
+        buf = rte_pktmbuf_lastseg(buf);
+
+        return rte_pktmbuf_mtod_offset(buf, char *, buf->data_len);
+    } else {
+        return (char *) dp_packet_data(b) + dp_packet_size(b);
+    }
+}
+
+/* Returns a pointer to byte following the last byte allocated for use (but
+ * not necessarily in use) in 'b'. */
+static inline void *
+dp_packet_end(const struct dp_packet *b)
+{
+    if (b->source == DPBUF_DPDK) {
+        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &(b->mbuf));
+
+        buf = rte_pktmbuf_lastseg(buf);
+
+        return rte_pktmbuf_mtod(buf, char *) + buf->buf_len;
+    } else {
+        return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
+    }
 }
 #else
+/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
+ * byte 'offset'.  Otherwise, returns a null pointer. */
+static inline void *
+dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
+{
+    return offset + size <= dp_packet_size(b)
+           ? (char *) dp_packet_data(b) + offset
+           : NULL;
+}
+
+/* Returns a pointer to byte 'offset' in 'b', which must contain at least
+ * 'offset + size' bytes of data. */
+static inline void *
+dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
+{
+    ovs_assert(offset + size <= dp_packet_size(b));
+    return ((char *) dp_packet_data(b)) + offset;
+}
+
+/* Returns a pointer to byte following the last byte of data in use in 'b'. */
+static inline void *
+dp_packet_tail(const struct dp_packet *b)
+{
+    return (char *) dp_packet_data(b) + dp_packet_size(b);
+}
+
+/* Returns a pointer to byte following the last byte allocated for use (but
+ * not necessarily in use) in 'b'. */
+static inline void *
+dp_packet_end(const struct dp_packet *b)
+{
+    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
+}
+
 static inline void *
 dp_packet_base(const struct dp_packet *b)
 {
@@ -502,6 +559,8 @@ __packet_set_data(struct dp_packet *b, uint16_t v)
     b->data_ofs = v;
 }
 
+#endif
+
 static inline uint16_t
 dp_packet_get_allocated(const struct dp_packet *b)
 {
@@ -513,7 +572,6 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
 {
     b->allocated_ = s;
 }
-#endif
 
 static inline void
 dp_packet_reset_cutlen(struct dp_packet *b)
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 648a1de..7008492 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -506,13 +506,14 @@ free_dpdk_buf(struct dp_packet *p)
 
 static void
 ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED,
-                     void *opaque_arg OVS_UNUSED,
+                     void *opaque_arg,
                      void *_p,
                      unsigned i OVS_UNUSED)
 {
     struct rte_mbuf *pkt = _p;
+    uint16_t allocated = *(uint16_t *) opaque_arg;
 
-    dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
+    dp_packet_init_dpdk((struct dp_packet *) pkt, allocated);
 }
 
 static int
@@ -643,7 +644,8 @@ dpdk_mp_create(struct netdev_dpdk *dev, uint16_t mbuf_pkt_data_len)
              * rte_mbuf part of each dp_packet. Some OvS specific fields
              * of the packet still need to be initialized by
              * ovs_rte_pktmbuf_init. */
-            rte_mempool_obj_iter(mp, ovs_rte_pktmbuf_init, NULL);
+            uint16_t allocated_bytes = dpdk_buf_size(dev->requested_mtu);
+            rte_mempool_obj_iter(mp, ovs_rte_pktmbuf_init, &allocated_bytes);
         } else if (rte_errno == EEXIST) {
             /* A mempool with the same name already exists.  We just
              * retrieve its pointer to be returned to the caller. */
-- 
2.7.4



More information about the dev mailing list