[ovs-dev] [PATCH v15 06/15] dp-packet: Add support for data "linearization".

Obrembski michalx.obrembski at intel.com
Wed Sep 11 08:08:19 UTC 2019


From: Tiago Lam <tiago.lam at intel.com>

Previous commits have added support to the dp_packet API to handle
multi-segmented packets, where data is not stored contiguously in
memory. However, in some cases, it is inevitable and data must be
provided contiguously. Examples of such cases are when performing csums
over the entire packet data, or when write()'ing to a file descriptor
(for a tap interface, for example). For such cases, the dp_packet API
has been extended to provide a way to transform a multi-segmented
DPBUF_DPDK packet into a DPBUF_MALLOC system packet (at the expense of a
copy of memory). If the packet's data is already stored in memory
contigously then there's no need to convert the packet.

Thus, the main use cases that were assuming that a dp_packet's data is
always held contiguously in memory were changed to make use of the new
"linear functions" in the dp_packet API when there's a need to traverse
the entire's packet data. Per the example above, when the packet's data
needs to be write() to the tap's file descriptor, or when the conntrack
module needs to verify a packet's checksum, the data is now linearized.

Additionally, the miniflow_extract() function has been modified to check
if the respective packet headers don't span across multiple mbufs. This
requirement is needed to guarantee that callers can assume headers are
always in contiguous memory.

Signed-off-by: Tiago Lam <tiago.lam at intel.com>
Signed-off-by: Michal Obrembski <michalx.obrembski at intel.com>
---
 lib/conntrack.c               |   5 ++
 lib/crc32c.c                  |  17 +++-
 lib/crc32c.h                  |   2 +
 lib/dp-packet.c               |  18 ++++
 lib/dp-packet.h               | 197 +++++++++++++++++++++++++++++++++++++-----
 lib/dpif-netdev.c             |  18 +++-
 lib/dpif-netlink.c            |   3 +
 lib/dpif.c                    |   6 ++
 lib/flow.c                    | 111 ++++++++++++++++++++----
 lib/flow.h                    |   4 +-
 lib/mcast-snooping.c          |   2 +
 lib/netdev-bsd.c              |   3 +
 lib/netdev-dummy.c            |   6 ++
 lib/netdev-linux.c            |   6 ++
 lib/netdev-native-tnl.c       |  26 +++---
 lib/odp-execute.c             |  24 ++++-
 lib/packets.c                 |  96 +++++++++++++++++---
 lib/packets.h                 |   7 ++
 ofproto/ofproto-dpif-upcall.c |  21 +++--
 ofproto/ofproto-dpif-xlate.c  |  27 +++++-
 tests/test-rstp.c             |   9 +-
 tests/test-stp.c              |   9 +-
 22 files changed, 529 insertions(+), 88 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index e5266e5..9976546 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1211,6 +1211,11 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
     struct conn_lookup_ctx ctx;
 
     DP_PACKET_BATCH_FOR_EACH (i, packet, pkt_batch) {
+        /* Linearize the packet to ensure conntrack has the whole data */
+        if (!dp_packet_is_linear(packet)) {
+            dp_packet_linearize(packet);
+        }
+
         if (packet->md.ct_state == CS_INVALID
             || !conn_key_extract(ct, packet, dl_type, &ctx, zone)) {
             packet->md.ct_state = CS_INVALID;
diff --git a/lib/crc32c.c b/lib/crc32c.c
index e8dd6ee..83beec7 100644
--- a/lib/crc32c.c
+++ b/lib/crc32c.c
@@ -141,19 +141,30 @@ ovs_be32
 crc32c(const uint8_t *data, size_t size)
 {
     uint32_t crc = 0xffffffffL;
+    return crc32c_finish(crc32c_continue(crc, data, size));
+}
 
+uint32_t
+crc32c_continue(uint32_t partial, const uint8_t *data, size_t size)
+{
     while (size--) {
-        crc = crc32Table[(crc ^ *data++) & 0xff] ^ (crc >> 8);
+        partial = crc32Table[(partial ^ *data++) & 0xff] ^ (partial >> 8);
     }
 
+    return partial;
+}
+
+ovs_be32
+crc32c_finish(uint32_t partial)
+{
     /* The result of this CRC calculation provides us a value in the reverse
      * byte-order as compared with our architecture. On big-endian systems,
      * this is opposite to our return type. So, to return a big-endian
      * value, we must swap the byte-order. */
 #if defined(WORDS_BIGENDIAN)
-    crc = uint32_byteswap(crc);
+    crc = uint32_byteswap(partial);
 #endif
 
     /* Our value is in network byte-order. OVS_FORCE keeps sparse happy. */
-    return (OVS_FORCE ovs_be32) ~crc;
+    return (OVS_FORCE ovs_be32) ~partial;
 }
diff --git a/lib/crc32c.h b/lib/crc32c.h
index 92c7d7f..17c8190 100644
--- a/lib/crc32c.h
+++ b/lib/crc32c.h
@@ -20,6 +20,8 @@
 
 #include "openvswitch/types.h"
 
+uint32_t crc32c_continue(uint32_t partial, const uint8_t *data, size_t size);
+ovs_be32 crc32c_finish(uint32_t partial);
 ovs_be32 crc32c(const uint8_t *data, size_t);
 
 #endif /* crc32c.h */
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index bc31a04..ce78b0a 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -121,6 +121,9 @@ void
 dp_packet_init_dpdk(struct dp_packet *b)
 {
     b->source = DPBUF_DPDK;
+#ifdef DPDK_NETDEV
+    b->mstate = NULL;
+#endif
 }
 
 /* Initializes 'b' as an empty dp_packet with an initial capacity of 'size'
@@ -138,6 +141,21 @@ dp_packet_uninit(struct dp_packet *b)
     if (b) {
         if (b->source == DPBUF_MALLOC) {
             free(dp_packet_base(b));
+
+#ifdef DPDK_NETDEV
+            /* Packet has been "linearized" */
+            if (b->mstate) {
+                b->source = DPBUF_DPDK;
+                b->mbuf.buf_addr = b->mstate->addr;
+                b->mbuf.buf_len = b->mstate->len;
+                b->mbuf.data_off = b->mstate->off;
+
+                free(b->mstate);
+                b->mstate = NULL;
+
+                free_dpdk_buf((struct dp_packet *) b);
+            }
+#endif
         } else if (b->source == DPBUF_DPDK) {
 #ifdef DPDK_NETDEV
             /* If this dp_packet was allocated by DPDK it must have been
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 3efa9d6..f091265 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -28,7 +28,6 @@
 #include "netdev-afxdp.h"
 #include "netdev-dpdk.h"
 #include "openvswitch/list.h"
-#include "packets.h"
 #include "util.h"
 #include "flow.h"
 
@@ -56,6 +55,16 @@ enum dp_packet_offload_mask {
 };
 #endif
 
+#ifdef DPDK_NETDEV
+/* Struct to save data for when a DPBUF_DPDK packet is converted to
+ * DPBUF_MALLOC. */
+struct mbuf_state {
+    void *addr;
+    uint16_t len;
+    uint16_t off;
+};
+#endif
+
 /* Buffer for holding packet data.  A dp_packet is automatically reallocated
  * as necessary if it grows too large for the available memory.
  * By default the packet type is set to Ethernet (PT_ETH).
@@ -63,6 +72,7 @@ enum dp_packet_offload_mask {
 struct dp_packet {
 #ifdef DPDK_NETDEV
     struct rte_mbuf mbuf;       /* DPDK mbuf */
+    struct mbuf_state *mstate;  /* Used when packet has been "linearized" */
 #else
     void *base_;                /* First byte of allocated space. */
     uint16_t allocated_;        /* Number of bytes allocated. */
@@ -103,6 +113,9 @@ static inline void dp_packet_set_data(struct dp_packet *, void *);
 static inline void *dp_packet_base(const struct dp_packet *);
 static inline void dp_packet_set_base(struct dp_packet *, void *);
 
+static inline bool dp_packet_is_linear(const struct dp_packet *);
+static inline void dp_packet_linearize(struct dp_packet *);
+
 static inline uint32_t dp_packet_size(const struct dp_packet *);
 static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
 
@@ -119,6 +132,7 @@ static inline void *dp_packet_l2_5(const struct dp_packet *);
 static inline void dp_packet_set_l2_5(struct dp_packet *, void *);
 static inline void *dp_packet_l3(const struct dp_packet *);
 static inline void dp_packet_set_l3(struct dp_packet *, void *);
+static inline size_t dp_packet_l3_size(const struct dp_packet *);
 static inline void *dp_packet_l4(const struct dp_packet *);
 static inline void dp_packet_set_l4(struct dp_packet *, void *);
 static inline size_t dp_packet_l4_size(const struct dp_packet *);
@@ -157,6 +171,11 @@ static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
                                  size_t size);
 static inline void *dp_packet_at_assert(const struct dp_packet *,
                                         size_t offset, size_t size);
+
+static inline void
+dp_packet_copy_from_offset(const struct dp_packet *b, size_t offset,
+                           size_t size, void *buf);
+
 #ifdef DPDK_NETDEV
 static inline const struct rte_mbuf *
 dp_packet_mbuf_from_offset(const struct dp_packet *b, size_t *offset);
@@ -195,26 +214,27 @@ void *dp_packet_steal_data(struct dp_packet *);
 static inline bool dp_packet_equal(const struct dp_packet *,
                                    const struct dp_packet *);
 
+static inline ssize_t
+dp_packet_read_data(const struct dp_packet *b, size_t offset, size_t size,
+                    void **ptr, void *buf);
+
+
 
 /* Frees memory that 'b' points to, as well as 'b' itself. */
 static inline void
 dp_packet_delete(struct dp_packet *b)
 {
     if (b) {
-        if (b->source == DPBUF_DPDK) {
-            /* If this dp_packet was allocated by DPDK it must have been
-             * created as a dp_packet */
-            free_dpdk_buf((struct dp_packet*) b);
-            return;
-        }
-
         if (b->source == DPBUF_AFXDP) {
             free_afxdp_buf(b);
             return;
         }
 
         dp_packet_uninit(b);
-        free(b);
+
+        if (b->source != DPBUF_DPDK) {
+            free(b);
+        }
     }
 }
 
@@ -384,6 +404,39 @@ dp_packet_try_pull(struct dp_packet *b, size_t size)
         ? dp_packet_pull(b, size) : NULL;
 }
 
+/* Reads 'size' bytes from 'offset' in 'b', linearly, to 'ptr', if 'buf' is
+ * NULL. Otherwise, if a 'buf' is provided, it must have 'size' bytes, and the
+ * data will be copied there, iff it is found to be non-linear. */
+static inline ssize_t
+dp_packet_read_data(const struct dp_packet *b, size_t offset, size_t size,
+                    void **ptr, void *buf) {
+    /* Zero copy */
+    if ((*ptr = dp_packet_at(b, offset, size)) != NULL) {
+        return 0;
+    }
+
+    /* Copy available linear data */
+    if (buf == NULL) {
+#ifdef DPDK_NETDEV
+        size_t mofs = offset;
+        const struct rte_mbuf *mbuf = dp_packet_mbuf_from_offset(b, &mofs);
+        *ptr = dp_packet_at(b, offset, mbuf->data_len - mofs);
+
+        return size - (mbuf->data_len - mofs);
+#else
+        /* Non-DPDK dp_packets should always hit the above condition */
+        ovs_assert(1);
+#endif
+    }
+
+    /* Copy all data */
+
+    *ptr = buf;
+    dp_packet_copy_from_offset(b, offset, size, buf);
+
+    return 0;
+}
+
 static inline bool
 dp_packet_is_eth(const struct dp_packet *b)
 {
@@ -453,6 +506,28 @@ dp_packet_set_l3(struct dp_packet *b, void *l3)
     b->l3_ofs = l3 ? (char *) l3 - (char *) dp_packet_data(b) : UINT16_MAX;
 }
 
+/* Returns the size of the l3 header. Caller must make sure both l3_ofs and
+ * l4_ofs are set*/
+static inline size_t
+dp_packet_l3h_size(const struct dp_packet *b)
+{
+    return b->l4_ofs - b->l3_ofs;
+}
+
+/* Returns the size of the packet from the beginning of the L3 header to the
+ * end of the L3 payload.  Hence L2 padding is not included. */
+static inline size_t
+dp_packet_l3_size(const struct dp_packet *b)
+{
+    if (!dp_packet_may_pull(b, b->l3_ofs, 0)) {
+        return 0;
+    }
+
+    size_t l3_size = dp_packet_size(b) - b->l3_ofs;
+
+    return l3_size - dp_packet_l2_pad_size(b);
+}
+
 static inline void *
 dp_packet_l4(const struct dp_packet *b)
 {
@@ -467,17 +542,6 @@ dp_packet_set_l4(struct dp_packet *b, void *l4)
     b->l4_ofs = l4 ? (char *) l4 - (char *) dp_packet_data(b) : UINT16_MAX;
 }
 
-/* Returns the size of the packet from the beginning of the L3 header to the
- * end of the L3 payload.  Hence L2 padding is not included. */
-static inline size_t
-dp_packet_l3_size(const struct dp_packet *b)
-{
-    return OVS_LIKELY(b->l3_ofs != UINT16_MAX)
-        ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l3(b)
-        - dp_packet_l2_pad_size(b)
-        : 0;
-}
-
 /* Returns the size of the packet from the beginning of the L4 header to the
  * end of the L4 payload.  Hence L2 padding is not included. */
 static inline size_t
@@ -512,21 +576,21 @@ static inline const void *
 dp_packet_get_udp_payload(const struct dp_packet *b)
 {
     return OVS_LIKELY(dp_packet_l4_size(b) >= UDP_HEADER_LEN)
-        ? (const char *)dp_packet_l4(b) + UDP_HEADER_LEN : NULL;
+        ? (const char *) dp_packet_l4(b) + UDP_HEADER_LEN : NULL;
 }
 
 static inline const void *
 dp_packet_get_sctp_payload(const struct dp_packet *b)
 {
     return OVS_LIKELY(dp_packet_l4_size(b) >= SCTP_HEADER_LEN)
-        ? (const char *)dp_packet_l4(b) + SCTP_HEADER_LEN : NULL;
+        ? (const char *) dp_packet_l4(b) + SCTP_HEADER_LEN : NULL;
 }
 
 static inline const void *
 dp_packet_get_icmp_payload(const struct dp_packet *b)
 {
     return OVS_LIKELY(dp_packet_l4_size(b) >= ICMP_HEADER_LEN)
-        ? (const char *)dp_packet_l4(b) + ICMP_HEADER_LEN : NULL;
+        ? (const char *) dp_packet_l4(b) + ICMP_HEADER_LEN : NULL;
 }
 
 static inline const void *
@@ -547,6 +611,7 @@ dp_packet_init_specific(struct dp_packet *p)
     p->mbuf.tx_offload = p->mbuf.packet_type = 0;
     p->mbuf.nb_segs = 1;
     p->mbuf.next = NULL;
+    p->mstate = NULL;
 }
 
 static inline const struct rte_mbuf *
@@ -817,6 +882,74 @@ dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark)
     p->mbuf.ol_flags |= PKT_RX_FDIR_ID;
 }
 
+static inline void
+dp_packet_copy_from_offset(const struct dp_packet *b, size_t offset,
+                           size_t size, void *buf) {
+    if (dp_packet_is_linear(b)) {
+        memcpy(buf, (char *)dp_packet_data(b) + offset, size);
+    } else {
+        const struct rte_mbuf *mbuf = dp_packet_mbuf_from_offset(b, &offset);
+        rte_pktmbuf_read(mbuf, offset, size, buf);
+    }
+}
+
+static inline bool
+dp_packet_is_linear(const struct dp_packet *b)
+{
+    if (b->source == DPBUF_DPDK) {
+        return rte_pktmbuf_is_contiguous(&b->mbuf);
+    }
+
+    return true;
+}
+
+/* Linearizes the data on packet 'b', by copying the data into system's memory.
+ * After this the packet is effectively a DPBUF_MALLOC packet. If 'b' is
+ * already linear, no operations are performed on the packet.
+ *
+ * This is an expensive operation which should only be performed as a last
+ * resort, when multi-segments are under use but data must be accessed
+ * linearly. */
+static inline void
+dp_packet_linearize(struct dp_packet *b)
+{
+    struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
+    struct dp_packet *pkt = CONST_CAST(struct dp_packet *, b);
+    struct mbuf_state *mstate = NULL;
+    void *dst = NULL;
+    uint32_t pkt_len = 0;
+
+    /* If already linear, bail out early. */
+    if (OVS_LIKELY(dp_packet_is_linear(b))) {
+        return;
+    }
+
+    pkt_len = dp_packet_size(pkt);
+    dst = xmalloc(pkt_len);
+
+    /* Copy packet's data to system's memory */
+    if (!rte_pktmbuf_read(mbuf, 0, pkt_len, dst)) {
+        free(dst);
+        return;
+    }
+
+    /* Free all mbufs except for the first */
+    dp_packet_clear(pkt);
+
+    /* Save mbuf's buf_addr to restore later */
+    mstate = xmalloc(sizeof(*mstate));
+    mstate->addr = pkt->mbuf.buf_addr;
+    mstate->len = pkt->mbuf.buf_len;
+    mstate->off = pkt->mbuf.data_off;
+    pkt->mstate = mstate;
+
+    /* Tranform DPBUF_DPDK packet into a DPBUF_MALLOC packet */
+    pkt->source = DPBUF_MALLOC;
+    pkt->mbuf.buf_addr = dst;
+    pkt->mbuf.buf_len = pkt_len;
+    pkt->mbuf.data_off = 0;
+    dp_packet_set_size(pkt, pkt_len);
+}
 #else /* DPDK_NETDEV */
 
 static inline bool
@@ -947,6 +1080,24 @@ dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark)
     p->flow_mark = mark;
     p->ol_flags |= DP_PACKET_OL_FLOW_MARK_MASK;
 }
+
+static inline void
+dp_packet_copy_from_offset(const struct dp_packet *b, size_t offset,
+                           size_t size, void *buf)
+{
+    memcpy(buf, (char *)dp_packet_data(b) + offset, size);
+}
+
+static inline bool
+dp_packet_is_linear(const struct dp_packet *b OVS_UNUSED)
+{
+    return true;
+}
+
+static inline void
+dp_packet_linearize(struct dp_packet *b OVS_UNUSED)
+{
+}
 #endif /* DPDK_NETDEV */
 
 static inline void
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 75d85b2..29278e5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6219,6 +6219,9 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
             .support = dp_netdev_support,
         };
 
+        /* Gather the whole data for printing the packet (if debug enabled) */
+        dp_packet_linearize(packet_);
+
         ofpbuf_init(&key, 0);
         odp_flow_key_from_flow(&odp_parms, &key);
         packet_str = ofp_dp_packet_to_string(packet_);
@@ -6463,6 +6466,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
     bool smc_enable_db;
     size_t map_cnt = 0;
     bool batch_enable = true;
+    int error;
 
     atomic_read_relaxed(&pmd->dp->smc_enable_db, &smc_enable_db);
     pmd_perf_update_counter(&pmd->perf_stats,
@@ -6509,7 +6513,12 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
             }
         }
 
-        miniflow_extract(packet, &key->mf);
+        error = miniflow_extract(packet, &key->mf);
+        if (OVS_UNLIKELY(error)) {
+            dp_packet_delete(packet);
+            continue;
+        }
+
         key->len = 0; /* Not computed yet. */
         key->hash =
                 (md_is_valid == false)
@@ -7123,8 +7132,13 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
             }
 
             struct dp_packet *packet;
+            int error;
             DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
-                flow_extract(packet, &flow);
+                error = flow_extract(packet, &flow);
+                if (error) {
+                    dp_packet_delete(packet);
+                    continue;
+                }
                 dpif_flow_hash(dp->dpif, &flow, sizeof flow, &ufid);
                 dp_execute_userspace_action(pmd, packet, should_steal, &flow,
                                             &ufid, &actions, userdata);
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 7bc71d6..e9ce12a 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1850,6 +1850,9 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
                 }
                 n_ops = i;
             } else {
+                /* Linearize the packet to encode the whole message */
+                dp_packet_linearize(op->execute.packet);
+
                 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
                                             &aux->request);
             }
diff --git a/lib/dpif.c b/lib/dpif.c
index c88b210..2315a63 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1407,6 +1407,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops,
 
                 case DPIF_OP_EXECUTE:
                     COVERAGE_INC(dpif_execute);
+
                     log_execute_message(dpif, &this_module, &op->execute,
                                         false, error);
                     break;
@@ -1834,6 +1835,11 @@ log_execute_message(const struct dpif *dpif,
         uint64_t stub[1024 / 8];
         struct ofpbuf md = OFPBUF_STUB_INITIALIZER(stub);
 
+        /* We will need the whole data for logging */
+        struct dp_packet *p = CONST_CAST(struct dp_packet *,
+                                         execute->packet);
+        dp_packet_linearize(p);
+
         packet = ofp_packet_to_string(dp_packet_data(execute->packet),
                                       dp_packet_size(execute->packet),
                                       execute->packet->packet_type);
diff --git a/lib/flow.c b/lib/flow.c
index ac6a4e1..94cfd62 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -628,18 +628,23 @@ parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key)
 
 /* This does the same thing as miniflow_extract() with a full-size 'flow' as
  * the destination. */
-void
+int
 flow_extract(struct dp_packet *packet, struct flow *flow)
 {
     struct {
         struct miniflow mf;
         uint64_t buf[FLOW_U64S];
     } m;
+    int error;
 
     COVERAGE_INC(flow_extract);
 
-    miniflow_extract(packet, &m.mf);
+    error = miniflow_extract(packet, &m.mf);
+    if (error) {
+        return error;
+    }
     miniflow_expand(&m.mf, flow);
+    return 0;
 }
 
 static inline bool
@@ -731,8 +736,11 @@ ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t size)
  *    - packet->l4_ofs is set to just past the IPv4 or IPv6 header, if one is
  *      present and the packet has at least the content used for the fields
  *      of interest for the flow, otherwise UINT16_MAX.
+ *
+ * If multi-segment mbufs are under use, this function verifies if the packet
+ * headers are within the first mbuf of the chain, otherwise returns -EINVAL.
  */
-void
+int
 miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
 {
     /* Add code to this function (or its callees) to extract new fields. */
@@ -854,6 +862,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         int ip_len;
         uint16_t tot_len;
 
+        /* Check if header is in first mbuf, otherwise return error */
+        if (!dp_packet_is_linear(packet)) {
+            if (!dp_packet_may_pull(packet, packet->l3_ofs, sizeof *nh)) {
+                return -EINVAL;
+            }
+        }
+
         if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) {
             goto out;
         }
@@ -884,6 +899,12 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         ovs_be32 tc_flow;
         uint16_t plen;
 
+        if (!dp_packet_is_linear(packet)) {
+            if (!dp_packet_may_pull(packet, packet->l3_ofs, sizeof *nh)) {
+                return -EINVAL;
+            }
+        }
+
         if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
             goto out;
         }
@@ -929,6 +950,14 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         if (dl_type == htons(ETH_TYPE_ARP) ||
             dl_type == htons(ETH_TYPE_RARP)) {
             struct eth_addr arp_buf[2];
+
+            if (!dp_packet_is_linear(packet)) {
+                if (!dp_packet_may_pull(packet, packet->l3_ofs,
+                                        ARP_ETH_HEADER_LEN)) {
+                    return -EINVAL;
+                }
+            }
+
             const struct arp_eth_header *arp = (const struct arp_eth_header *)
                 data_try_pull(&data, &size, ARP_ETH_HEADER_LEN);
 
@@ -976,6 +1005,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
             if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
                 const struct tcp_header *tcp = data;
 
+                if (!dp_packet_is_linear(packet)) {
+                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
+                                            TCP_HEADER_LEN)) {
+                        return -EINVAL;
+                    }
+                }
+
                 miniflow_push_be32(mf, arp_tha.ea[2], 0);
                 miniflow_push_be32(mf, tcp_flags,
                                    TCP_FLAGS_BE32(tcp->tcp_ctl));
@@ -988,6 +1024,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
             if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
                 const struct udp_header *udp = data;
 
+                if (!dp_packet_is_linear(packet)) {
+                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
+                                            UDP_HEADER_LEN)) {
+                        return -EINVAL;
+                    }
+                }
+
                 miniflow_push_be16(mf, tp_src, udp->udp_src);
                 miniflow_push_be16(mf, tp_dst, udp->udp_dst);
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
@@ -997,6 +1040,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
             if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
                 const struct sctp_header *sctp = data;
 
+                if (!dp_packet_is_linear(packet)) {
+                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
+                                            SCTP_HEADER_LEN)) {
+                        return -EINVAL;
+                    }
+                }
+
                 miniflow_push_be16(mf, tp_src, sctp->sctp_src);
                 miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
@@ -1006,6 +1056,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
             if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
                 const struct icmp_header *icmp = data;
 
+                if (!dp_packet_is_linear(packet)) {
+                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
+                                            ICMP_HEADER_LEN)) {
+                        return -EINVAL;
+                    }
+                }
+
                 miniflow_push_be16(mf, tp_src, htons(icmp->icmp_type));
                 miniflow_push_be16(mf, tp_dst, htons(icmp->icmp_code));
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
@@ -1015,6 +1072,13 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
             if (OVS_LIKELY(size >= IGMP_HEADER_LEN)) {
                 const struct igmp_header *igmp = data;
 
+                if (!dp_packet_is_linear(packet)) {
+                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
+                                            IGMP_HEADER_LEN)) {
+                        return -EINVAL;
+                    }
+                }
+
                 miniflow_push_be16(mf, tp_src, htons(igmp->igmp_type));
                 miniflow_push_be16(mf, tp_dst, htons(igmp->igmp_code));
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
@@ -1032,8 +1096,18 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 uint8_t opt_type;
                 /* This holds the ND Reserved field. */
                 uint32_t rso_flags;
-                const struct icmp6_hdr *icmp = data_pull(&data,
-                                               &size,ICMP6_HEADER_LEN);
+                const struct icmp6_hdr *icmp;
+
+                if (!dp_packet_is_linear(packet)) {
+                    if (!dp_packet_may_pull(packet, packet->l4_ofs,
+                                            sizeof *icmp)) {
+                        return -EINVAL;
+                    }
+                }
+
+                icmp = data_pull(&data, &size, sizeof *icmp);
+
+
                 if (parse_icmpv6(&data, &size, icmp,
                                  &rso_flags, &nd_target, arp_buf, &opt_type)) {
                     if (nd_target) {
@@ -1071,6 +1145,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
     }
  out:
     dst->map = mf.map;
+    return 0;
 }
 
 ovs_be16
@@ -3079,7 +3154,7 @@ static void
 flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
                      uint32_t pseudo_hdr_csum)
 {
-    size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
+    size_t l4_len = dp_packet_l4_size(p);
 
     if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
         || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
@@ -3087,14 +3162,16 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
             struct tcp_header *tcp = dp_packet_l4(p);
 
             tcp->tcp_csum = 0;
-            tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
-                                                      tcp, l4_len));
+            tcp->tcp_csum = csum_finish(
+                packet_csum_continue(p, pseudo_hdr_csum, p->l4_ofs, l4_len));
+
         } else if (flow->nw_proto == IPPROTO_UDP) {
             struct udp_header *udp = dp_packet_l4(p);
 
             udp->udp_csum = 0;
-            udp->udp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
-                                                      udp, l4_len));
+            udp->udp_csum = csum_finish(
+                packet_csum_continue(p, pseudo_hdr_csum, p->l4_ofs, l4_len));
+
             if (!udp->udp_csum) {
                 udp->udp_csum = htons(0xffff);
             }
@@ -3102,18 +3179,20 @@ flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
             struct icmp_header *icmp = dp_packet_l4(p);
 
             icmp->icmp_csum = 0;
-            icmp->icmp_csum = csum(icmp, l4_len);
+            icmp->icmp_csum = packet_csum(p, p->l4_ofs, l4_len);
         } else if (flow->nw_proto == IPPROTO_IGMP) {
             struct igmp_header *igmp = dp_packet_l4(p);
 
             igmp->igmp_csum = 0;
-            igmp->igmp_csum = csum(igmp, l4_len);
+            igmp->igmp_csum = packet_csum(p, p->l4_ofs, l4_len);
         } else if (flow->nw_proto == IPPROTO_ICMPV6) {
             struct icmp6_hdr *icmp = dp_packet_l4(p);
 
             icmp->icmp6_cksum = 0;
             icmp->icmp6_cksum = (OVS_FORCE uint16_t)
-                csum_finish(csum_continue(pseudo_hdr_csum, icmp, l4_len));
+                csum_finish(packet_csum_continue(p, pseudo_hdr_csum, p->l4_ofs,
+                            l4_len));
+
         }
     }
 }
@@ -3139,12 +3218,12 @@ packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
         eth->eth_type = htons(dp_packet_size(p));
     } else if (dl_type_is_ip_any(flow->dl_type)) {
         uint32_t pseudo_hdr_csum;
-        size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
+        size_t l4_len = dp_packet_l4_size(p);
 
         if (flow->dl_type == htons(ETH_TYPE_IP)) {
             struct ip_header *ip = dp_packet_l3(p);
 
-            ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
+            ip->ip_tot_len = htons(dp_packet_l3_size(p));
             ip->ip_csum = 0;
             ip->ip_csum = csum(ip, sizeof *ip);
 
@@ -3233,7 +3312,7 @@ flow_compose(struct dp_packet *p, const struct flow *flow,
         l4_len = flow_compose_l4(p, flow, l7, l7_len);
 
         ip = dp_packet_l3(p);
-        ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
+        ip->ip_tot_len = htons(dp_packet_l3_size(p));
         /* Checksum has already been zeroed by put_zeros call. */
         ip->ip_csum = csum(ip, sizeof *ip);
 
diff --git a/lib/flow.h b/lib/flow.h
index 7298c71..11c9566 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -68,7 +68,7 @@ extern int flow_vlan_limit;
     DIV_ROUND_UP(FLOW_U64_OFFREM(FIELD) + MEMBER_SIZEOF(struct flow, FIELD), \
                  sizeof(uint64_t))
 
-void flow_extract(struct dp_packet *, struct flow *);
+int flow_extract(struct dp_packet *, struct flow *);
 
 void flow_zero_wildcards(struct flow *, const struct flow_wildcards *);
 void flow_unwildcard_tp_ports(const struct flow *, struct flow_wildcards *);
@@ -540,7 +540,7 @@ struct pkt_metadata;
 /* The 'dst' must follow with buffer space for FLOW_U64S 64-bit units.
  * 'dst->map' is ignored on input and set on output to indicate which fields
  * were extracted. */
-void miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
+int miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
 void miniflow_map_init(struct miniflow *, const struct flow *);
 void flow_wc_map(const struct flow *, struct flowmap *);
 size_t miniflow_alloc(struct miniflow *dsts[], size_t n,
diff --git a/lib/mcast-snooping.c b/lib/mcast-snooping.c
index 6730301..875b7a1 100644
--- a/lib/mcast-snooping.c
+++ b/lib/mcast-snooping.c
@@ -455,6 +455,7 @@ mcast_snooping_add_report(struct mcast_snooping *ms,
     if (!igmpv3) {
         return 0;
     }
+    offset = (char *) igmpv3 - (char *) dp_packet_data(p);
     ngrp = ntohs(igmpv3->ngrp);
     offset += IGMPV3_HEADER_LEN;
     while (ngrp--) {
@@ -507,6 +508,7 @@ mcast_snooping_add_mld(struct mcast_snooping *ms,
     if (!mld) {
         return 0;
     }
+    offset = (char *) mld - (char *) dp_packet_data(p);
     ngrp = ntohs(mld->ngrp);
     offset += MLD_HEADER_LEN;
     addr = dp_packet_at(p, offset, sizeof(struct in6_addr));
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 7875636..6224dab 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -700,6 +700,9 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
     }
 
     DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        /* We need the whole data to send the packet on the device */
+        dp_packet_linearize(packet);
+
         const void *data = dp_packet_data(packet);
         size_t size = dp_packet_size(packet);
 
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index f0c0fba..048725a 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -244,6 +244,9 @@ dummy_packet_stream_run(struct netdev_dummy *dev, struct dummy_packet_stream *s)
 
         ASSIGN_CONTAINER(txbuf_node, ovs_list_front(&s->txq), list_node);
         txbuf = txbuf_node->pkt;
+
+        dp_packet_linearize(txbuf);
+
         retval = stream_send(s->stream, dp_packet_data(txbuf), dp_packet_size(txbuf));
 
         if (retval > 0) {
@@ -1105,6 +1108,9 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
 
     struct dp_packet *packet;
     DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
+        /* We need the whole data to send the packet on the device */
+        dp_packet_linearize(packet);
+
         const void *buffer = dp_packet_data(packet);
         size_t size = dp_packet_size(packet);
 
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index f481923..6439d7c 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1316,6 +1316,9 @@ netdev_linux_sock_batch_send(int sock, int ifindex,
 
     struct dp_packet *packet;
     DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        /* We need the whole data to send the packet on the device */
+        dp_packet_linearize(packet);
+
         iov[i].iov_base = dp_packet_data(packet);
         iov[i].iov_len = dp_packet_size(packet);
         mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
@@ -1369,6 +1372,9 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
         ssize_t retval;
         int error;
 
+        /* We need the whole data to send the packet on the device */
+        dp_packet_linearize(packet);
+
         do {
             retval = write(netdev->tap_fd, dp_packet_data(packet), size);
             error = retval < 0 ? errno : 0;
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 56baaa2..285b927 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -65,7 +65,7 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
     void *nh;
     struct ip_header *ip;
     struct ovs_16aligned_ip6_hdr *ip6;
-    void *l4;
+    char *l4;
     int l3_size;
 
     nh = dp_packet_l3(packet);
@@ -79,15 +79,15 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
 
     *hlen = sizeof(struct eth_header);
 
-    l3_size = dp_packet_size(packet) -
-              ((char *)nh - (char *)dp_packet_data(packet));
+    l3_size = dp_packet_l3_size(packet);
 
     if (IP_VER(ip->ip_ihl_ver) == 4) {
 
         ovs_be32 ip_src, ip_dst;
 
         if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(packet))) {
-            if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
+            if (packet_csum(packet, packet->l3_ofs,
+                            IP_IHL(ip->ip_ihl_ver) * 4)) {
                 VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
                 return NULL;
             }
@@ -196,10 +196,8 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
                 csum = packet_csum_pseudoheader(dp_packet_l3(packet));
             }
 
-            csum = csum_continue(csum, udp, dp_packet_size(packet) -
-                                 ((const unsigned char *)udp -
-                                  (const unsigned char *)dp_packet_eth(packet)
-                                 ));
+            csum = packet_csum_continue(packet, csum, packet->l4_ofs,
+                                        dp_packet_l4_size(packet));
             if (csum_finish(csum)) {
                 return NULL;
             }
@@ -236,7 +234,7 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
             csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet)));
         }
 
-        csum = csum_continue(csum, udp, ip_tot_size);
+        csum = packet_csum_continue(packet, csum, packet->l4_ofs, ip_tot_size);
         udp->udp_csum = csum_finish(csum);
 
         if (!udp->udp_csum) {
@@ -373,9 +371,8 @@ parse_gre_header(struct dp_packet *packet,
     if (greh->flags & htons(GRE_CSUM)) {
         ovs_be16 pkt_csum;
 
-        pkt_csum = csum(greh, dp_packet_size(packet) -
-                              ((const unsigned char *)greh -
-                               (const unsigned char *)dp_packet_eth(packet)));
+        pkt_csum = packet_csum(packet, packet->l4_ofs,
+                               dp_packet_l4_size(packet));
         if (pkt_csum) {
             return -EINVAL;
         }
@@ -448,8 +445,9 @@ netdev_gre_push_header(const struct netdev *netdev,
     greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
 
     if (greh->flags & htons(GRE_CSUM)) {
-        ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
-        *csum_opt = csum(greh, ip_tot_size);
+        greh = dp_packet_l4(packet);
+        ovs_be16 *csum_opt = (ovs_be16 *) greh;
+        *csum_opt = packet_csum(packet, packet->l4_ofs, ip_tot_size);
     }
 
     if (greh->flags & htons(GRE_SEQ)) {
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 563ad1d..389a1fb 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -248,8 +248,14 @@ static void
 odp_set_nd(struct dp_packet *packet, const struct ovs_key_nd *key,
            const struct ovs_key_nd *mask)
 {
-    const struct ovs_nd_msg *ns = dp_packet_l4(packet);
-    const struct ovs_nd_lla_opt *lla_opt = dp_packet_get_nd_payload(packet);
+    const struct ovs_nd_msg *ns;
+    const struct ovs_nd_lla_opt *lla_opt;
+
+    /* To orocess neighbor discovery options, we need the whole packet */
+    dp_packet_linearize(packet);
+
+    ns = dp_packet_l4(packet);
+    lla_opt = dp_packet_get_nd_payload(packet);
 
     if (OVS_LIKELY(ns && lla_opt)) {
         int bytes_remain = dp_packet_l4_size(packet) - sizeof(*ns);
@@ -818,6 +824,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             case OVS_HASH_ALG_L4: {
                 struct flow flow;
                 uint32_t hash;
+                int error;
 
                 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
                     /* RSS hash can be used here instead of 5tuple for
@@ -826,7 +833,11 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
                         hash = dp_packet_get_rss_hash(packet);
                         hash = hash_int(hash, hash_act->hash_basis);
                     } else {
-                        flow_extract(packet, &flow);
+                        error = flow_extract(packet, &flow);
+                        if (error) {
+                            dp_packet_delete(packet);
+                            continue;
+                        }
                         hash = flow_hash_5tuple(&flow, hash_act->hash_basis);
                     }
                     packet->md.dp_hash = hash;
@@ -836,9 +847,14 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             case OVS_HASH_ALG_SYM_L4: {
                 struct flow flow;
                 uint32_t hash;
+                int error;
 
                 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-                    flow_extract(packet, &flow);
+                    error = flow_extract(packet, &flow);
+                    if (error) {
+                        dp_packet_delete(packet);
+                        continue;
+                    }
                     hash = flow_hash_symmetric_l3l4(&flow,
                                                     hash_act->hash_basis,
                                                     false);
diff --git a/lib/packets.c b/lib/packets.c
index 12053df..4ff170b 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1011,12 +1011,18 @@ packet_rh_present(struct dp_packet *packet, uint8_t *nexthdr)
     const struct ovs_16aligned_ip6_hdr *nh;
     size_t len;
     size_t remaining;
-    uint8_t *data = dp_packet_l3(packet);
+    uint8_t *data;
 
-    remaining = packet->l4_ofs - packet->l3_ofs;
+    remaining = dp_packet_l3h_size(packet);
     if (remaining < sizeof *nh) {
         return false;
     }
+
+    /* We will need the whole data for processing the headers below */
+    dp_packet_linearize(packet);
+
+    data = dp_packet_l3(packet);
+
     nh = ALIGNED_CAST(struct ovs_16aligned_ip6_hdr *, data);
     data += sizeof *nh;
     remaining -= sizeof *nh;
@@ -1258,12 +1264,12 @@ packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 
     old_csum = get_16aligned_be32(&sh->sctp_csum);
     put_16aligned_be32(&sh->sctp_csum, 0);
-    old_correct_csum = crc32c((void *)sh, tp_len);
+    old_correct_csum = packet_crc32c(packet, packet->l4_ofs, tp_len);
 
     sh->sctp_src = src;
     sh->sctp_dst = dst;
 
-    new_csum = crc32c((void *)sh, tp_len);
+    new_csum = packet_crc32c(packet, packet->l4_ofs, tp_len);
     put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
 }
 
@@ -1374,6 +1380,9 @@ packet_set_nd(struct dp_packet *packet, const struct in6_addr *target,
         return;
     }
 
+    /* To process neighbor discovery options, we need the whole packet */
+    dp_packet_linearize(packet);
+
     ns = dp_packet_l4(packet);
     opt = &ns->options[0];
     bytes_remain -= sizeof(*ns);
@@ -1596,8 +1605,8 @@ compose_nd_ns(struct dp_packet *b, const struct eth_addr eth_src,
 
     ns->icmph.icmp6_cksum = 0;
     icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
-    ns->icmph.icmp6_cksum = csum_finish(
-        csum_continue(icmp_csum, ns, ND_MSG_LEN + ND_LLA_OPT_LEN));
+    ns->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
+        b, icmp_csum, b->l4_ofs, ND_MSG_LEN + ND_LLA_OPT_LEN));
 }
 
 /* Compose an IPv6 Neighbor Discovery Neighbor Advertisement message. */
@@ -1627,8 +1636,8 @@ compose_nd_na(struct dp_packet *b,
 
     na->icmph.icmp6_cksum = 0;
     icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
-    na->icmph.icmp6_cksum = csum_finish(csum_continue(
-        icmp_csum, na, ND_MSG_LEN + ND_LLA_OPT_LEN));
+    na->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
+        b, icmp_csum, b->l4_ofs, ND_MSG_LEN + ND_LLA_OPT_LEN));
 }
 
 /* Compose an IPv6 Neighbor Discovery Router Advertisement message with
@@ -1678,8 +1687,8 @@ compose_nd_ra(struct dp_packet *b,
 
     ra->icmph.icmp6_cksum = 0;
     uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
-    ra->icmph.icmp6_cksum = csum_finish(csum_continue(
-        icmp_csum, ra, RA_MSG_LEN + ND_LLA_OPT_LEN + mtu_opt_len));
+    ra->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
+        b, icmp_csum, b->l4_ofs, RA_MSG_LEN + ND_LLA_OPT_LEN + mtu_opt_len));
 }
 
 /* Append an IPv6 Neighbor Discovery Prefix Information option to a
@@ -1708,8 +1717,8 @@ packet_put_ra_prefix_opt(struct dp_packet *b,
     struct ovs_ra_msg *ra = dp_packet_l4(b);
     ra->icmph.icmp6_cksum = 0;
     uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
-    ra->icmph.icmp6_cksum = csum_finish(csum_continue(
-        icmp_csum, ra, prev_l4_size + ND_PREFIX_OPT_LEN));
+    ra->icmph.icmp6_cksum = csum_finish(packet_csum_continue(
+        b, icmp_csum, b->l4_ofs, prev_l4_size + ND_PREFIX_OPT_LEN));
 }
 
 uint32_t
@@ -1761,6 +1770,69 @@ packet_csum_upperlayer6(const struct ovs_16aligned_ip6_hdr *ip6,
 }
 #endif
 
+/* Wrapper around csum_continue() that takes segmented packets into account,
+ * traversing the segments to read data appropriately if so.
+ *
+ * It adds the 'n' bytes in packet 'b', from 'offset', to the partial IP
+ * checksum 'partial' and returns the updated checksum. */
+uint32_t
+packet_csum_continue(const struct dp_packet *b, uint32_t partial,
+                     uint16_t offset, size_t n)
+{
+    char *ptr = NULL;
+    size_t rem = 0;
+    size_t size = 0;
+
+    while (n > 1) {
+        rem = dp_packet_read_data(b, offset, n, (void *)&ptr, NULL);
+
+        size = n - rem;
+        partial = csum_continue(partial, ptr, size);
+
+        offset += size;
+        n = rem;
+    }
+
+    return partial;
+}
+
+/* Wrapper around csum() that takes segmented packets into account, traversing
+ * the segments to read data appropriately if so.
+ *
+ * Returns the IP checksum of the 'n' bytes in packet 'b',
+ * starting in 'offset'. */
+ovs_be16
+packet_csum(const struct dp_packet *b, uint16_t offset, size_t n)
+{
+    return csum_finish(packet_csum_continue(b, 0, offset, n));
+}
+
+/* Wrapper around crc32c() that takes segmented packets into account,
+ * traversing the segments to read data appropriately if so.
+ *
+ * It returns the CRC32c checksum as per RFC4960, of the 'n' bytes in packet
+ * 'b', from 'offset'. */
+ovs_be32
+packet_crc32c(const struct dp_packet *b, uint16_t offset, size_t n)
+{
+    char *ptr = NULL;
+    size_t rem = 0;
+    size_t size = 0;
+    uint32_t partial = 0xffffffffL;
+
+    while (n > 1) {
+        rem = dp_packet_read_data(b, offset, n, (void *)&ptr, NULL);
+
+        size = n - rem;
+        partial = crc32c_continue(partial, (uint8_t *) ptr, size);
+
+        offset += size;
+        n = rem;
+    }
+
+    return crc32c_finish(partial);
+}
+
 void
 IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
 {
diff --git a/lib/packets.h b/lib/packets.h
index c440098..65fa8aa 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -1617,6 +1617,13 @@ void packet_put_ra_prefix_opt(struct dp_packet *,
                               ovs_be32 preferred_lifetime,
                               const ovs_be128 router_prefix);
 uint32_t packet_csum_pseudoheader(const struct ip_header *);
+uint32_t
+packet_csum_continue(const struct dp_packet *b, uint32_t partial,
+                     uint16_t offset, size_t n);
+ovs_be16
+packet_csum(const struct dp_packet *b, uint16_t offset, size_t n);
+ovs_be32
+packet_crc32c(const struct dp_packet *b, uint16_t offset, size_t n);
 void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
 
 #define DNS_HEADER_LEN 12
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 657aa7f..97cf20e 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -832,7 +832,10 @@ recv_upcalls(struct handler *handler)
         upcall->actions = dupcall->actions;
 
         pkt_metadata_from_flow(&dupcall->packet.md, flow);
-        flow_extract(&dupcall->packet, flow);
+        error = flow_extract(&dupcall->packet, flow);
+        if (error) {
+            goto cleanup;
+        }
 
         error = process_upcall(udpif, upcall,
                                &upcall->odp_actions, &upcall->wc);
@@ -1418,12 +1421,16 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
     case SFLOW_UPCALL:
         if (upcall->sflow) {
             struct dpif_sflow_actions sflow_actions;
+            struct dp_packet *p = CONST_CAST(struct dp_packet *, packet);
 
             memset(&sflow_actions, 0, sizeof sflow_actions);
 
             actions_len = dpif_read_actions(udpif, upcall, flow,
                                             upcall->type, &sflow_actions);
-            dpif_sflow_received(upcall->sflow, packet, flow,
+            /* Gather the whole data */
+            dp_packet_linearize(p);
+
+            dpif_sflow_received(upcall->sflow, p, flow,
                                 flow->in_port.odp_port, &upcall->cookie,
                                 actions_len > 0 ? &sflow_actions : NULL);
         }
@@ -1485,6 +1492,10 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
 
             const struct frozen_state *state = &recirc_node->state;
 
+            /* Gather the whole data */
+            struct dp_packet *p = CONST_CAST(struct dp_packet *, packet);
+            dp_packet_linearize(p);
+
             struct ofproto_async_msg *am = xmalloc(sizeof *am);
             *am = (struct ofproto_async_msg) {
                 .controller_id = cookie->controller.controller_id,
@@ -1492,9 +1503,9 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
                 .pin = {
                     .up = {
                         .base = {
-                            .packet = xmemdup(dp_packet_data(packet),
-                                              dp_packet_size(packet)),
-                            .packet_len = dp_packet_size(packet),
+                            .packet = xmemdup(dp_packet_data(p),
+                                              dp_packet_size(p)),
+                            .packet_len = dp_packet_size(p),
                             .reason = cookie->controller.reason,
                             .table_id = state->table_id,
                             .cookie = get_32aligned_be64(
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 17800f3..ada23a2 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -3009,6 +3009,13 @@ xlate_normal(struct xlate_ctx *ctx)
         && is_ip_any(flow)) {
         struct mcast_snooping *ms = ctx->xbridge->ms;
         struct mcast_group *grp = NULL;
+        struct dp_packet *p = CONST_CAST(struct dp_packet *,
+                                         ctx->xin->packet);
+
+        /* We will need the whole data for processing the packet below */
+        if (p) {
+            dp_packet_linearize(p);
+        }
 
         if (is_igmp(flow, wc)) {
             /*
@@ -3317,10 +3324,16 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport)
     const struct flow *flow = &ctx->xin->flow;
     struct flow_wildcards *wc = ctx->wc;
     const struct xbridge *xbridge = ctx->xbridge;
-    const struct dp_packet *packet = ctx->xin->packet;
+    struct dp_packet *packet = CONST_CAST(struct dp_packet *,
+                                          ctx->xin->packet);
     enum slow_path_reason slow;
     bool lacp_may_enable;
 
+    if (packet) {
+        /* Gather the whole data for further processing */
+        dp_packet_linearize(packet);
+    }
+
     if (!xport) {
         slow = 0;
     } else if (xport->cfm && cfm_should_process_flow(xport->cfm, flow, wc)) {
@@ -3421,9 +3434,13 @@ compose_table_xlate(struct xlate_ctx *ctx, const struct xport *out_dev,
     ovs_version_t version = ofproto_dpif_get_tables_version(xbridge->ofproto);
     struct ofpact_output output;
     struct flow flow;
+    int error;
 
     ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
-    flow_extract(packet, &flow);
+    error = flow_extract(packet, &flow);
+    if (error) {
+        return error;
+    }
     flow.in_port.ofp_port = out_dev->ofp_port;
     output.port = OFPP_TABLE;
     output.max_len = 0;
@@ -7781,10 +7798,14 @@ xlate_send_packet(const struct ofport_dpif *ofport, bool oam,
     uint64_t ofpacts_stub[1024 / 8];
     struct ofpbuf ofpacts;
     struct flow flow;
+    int error;
 
     ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
+    error = flow_extract(packet, &flow);
+    if (error) {
+        return error;
+    }
     /* Use OFPP_NONE as the in_port to avoid special packet processing. */
-    flow_extract(packet, &flow);
     flow.in_port.ofp_port = OFPP_NONE;
 
     xport = xport_lookup(xcfg, ofport);
diff --git a/tests/test-rstp.c b/tests/test-rstp.c
index 01aeaf8..2b886a0 100644
--- a/tests/test-rstp.c
+++ b/tests/test-rstp.c
@@ -86,8 +86,13 @@ send_bpdu(struct dp_packet *pkt, void *port_, void *b_)
     assert(port_no < b->n_ports);
     lan = b->ports[port_no];
     if (lan) {
-        const void *data = dp_packet_l3(pkt);
-        size_t size = (char *) dp_packet_tail(pkt) - (char *) data;
+        const char *data;
+        size_t size;
+
+        dp_packet_linearize(pkt);
+
+        data = dp_packet_l3(pkt);
+        size = dp_packet_size(pkt) - pkt->l3_ofs;
         int i;
 
         for (i = 0; i < lan->n_conns; i++) {
diff --git a/tests/test-stp.c b/tests/test-stp.c
index c85c99d..71265d5 100644
--- a/tests/test-stp.c
+++ b/tests/test-stp.c
@@ -94,8 +94,13 @@ send_bpdu(struct dp_packet *pkt, int port_no, void *b_)
     assert(port_no < b->n_ports);
     lan = b->ports[port_no];
     if (lan) {
-        const void *data = dp_packet_l3(pkt);
-        size_t size = (char *) dp_packet_tail(pkt) - (char *) data;
+        const char *data;
+        size_t size;
+
+        dp_packet_linearize(pkt);
+
+        data = dp_packet_l3(pkt);
+        size = dp_packet_size(pkt) - pkt->l3_ofs;
         int i;
 
         for (i = 0; i < lan->n_conns; i++) {
-- 
2.7.4



More information about the dev mailing list