[ovs-dev] [PATCH v1] Fix crash due to multiple tnl push action

Lam, Tiago tiago.lam at intel.com
Mon Jul 2 17:56:51 UTC 2018


On 25/06/2018 13:15, Anju Thomas wrote:
> Hi Ben,
> 
> We are facing multiple such crashes on different computes in our deployments. Seems to be a pretty common problem in our setup.  As you suggested, it would be good if we can make the below changes as well.    How do you suggest we move forward regarding the approaches?
> 
> Regards & thanks
> Anju
> 

Hi Anju,

I agree that this patch should go in irrespective of the result of this
discussion. It is a bug nonetheless (which in this case, as you
explained, is triggering a crash because of dp_packet_resize__() calls
on DPBUF_DPDK packets).

The approach of completely refactoring dp_packet to properly deal with
errors, even though it is the safest option, seems like it would require
a fair amount of change compared to what we actually need to cover,
which is "resize operations on DPBUF_DPDK packets". Resize operations on
any other packets (!= DPBUF_DPDK) wouldn't require any type of change:
if memory is not available, xmalloc() will deal with that by aborting.
Thus, any packet created with dp_packet_new() / dp_packet_init() /
dp_packet_use_stub(), or even cloned (a DPBUF_DPDK is cloned into a
DPBUF_MALLOC, for example), won't have these limitations.

So, going back to the approaches you described before, Anju, my
preference would be to go towards approach 2 (or better yet, an
alternate version of 2). In this version, dp_packet_put_uninit() and
dp_packet_push_uninit() wouldn't call neither
dp_packet_prealloc_tailroom() nor dp_packet_prealloc_headroom() for
DPBUF_DPDK packets. Instead, they would check if there's enough space
beforehand, and if not return NULL, instead of the normal pointer to the
data. Otherwise the operations would continue as expected.

This means we would need to deal with the NULL case only where we may be
using DPBUF_DPDK packets (all the other cases remain unaffected as NULL
won't be possible there). The cases I've identified it happens is where
headers are being pushed (which seems to be what's causing your crashes
as well):
- In eth_push_vlan(), push_eth(), push_mpls() and push_nsh(), which get
  called from odp_execute_actions();
- In netdev_tnl_push_ip_header(), which gets called by the native
  tunnels implementations, such as netdev_gre_push_header() for GRE, and
is ultimately called by push_tnl_action().

I haven't found a case where dp_packet_put_uninit() is actually used in
a DPBUF_DPDK packet. In all cases it seems to be a result of a packet
created locally by using dp_packet_new() or some variant.

What are your thoughts? Would be cool to hear other people's thoughts on
this as well.

Here's the diff of how this would look like. Mind you, this hasn't been
run whatsoever, only compiled.

Regards,
Tiago.

-=-=-=-=-=-=-=-=-=-=-=-

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 443c225..ab01ca4 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -273,6 +273,21 @@ dp_packet_resize__(struct dp_packet *b, size_t
new_headroom, size_t new_tailroom
     }
 }

+static bool
+dp_packet_tailroom_avail(struct dp_packet *b, size_t size)
+{
+#ifdef DPDK_NETDEV
+    if (b->source == DPBUF_DPDK) {
+        if (size > dp_packet_tailroom(b)) {
+            return false;
+        }
+
+        return true;
+    }
+#endif
+    return true;
+}
+
 /* Ensures that 'b' has room for at least 'size' bytes at its tail end,
  * reallocating and copying its data if necessary.  Its headroom, if
any, is
  * preserved. */
@@ -284,6 +299,21 @@ dp_packet_prealloc_tailroom(struct dp_packet *b,
size_t size)
     }
 }

+static bool
+dp_packet_headroom_avail(struct dp_packet *b, size_t size)
+{
+#ifdef DPDK_NETDEV
+    if (b->source == DPBUF_DPDK) {
+        if (size > dp_packet_headroom(b)) {
+            return false;
+        }
+
+        return true;
+    }
+#endif
+    return true;
+}
+
 /* Ensures that 'b' has room for at least 'size' bytes at its head,
  * reallocating and copying its data if necessary.  Its tailroom, if
any, is
  * preserved. */
@@ -320,6 +350,11 @@ void *
 dp_packet_put_uninit(struct dp_packet *b, size_t size)
 {
     void *p;
+
+    if (!dp_packet_tailroom_avail(b, size)) {
+        return NULL;
+    }
+
     dp_packet_prealloc_tailroom(b, size);
     p = dp_packet_tail(b);
     dp_packet_set_size(b, dp_packet_size(b) + size);
@@ -333,6 +368,9 @@ void *
 dp_packet_put_zeros(struct dp_packet *b, size_t size)
 {
     void *dst = dp_packet_put_uninit(b, size);
+    if (!dst) {
+        return NULL;
+    }
     memset(dst, 0, size);
     return dst;
 }
@@ -344,6 +382,9 @@ void *
 dp_packet_put(struct dp_packet *b, const void *p, size_t size)
 {
     void *dst = dp_packet_put_uninit(b, size);
+    if (!dst) {
+        return NULL;
+    }
     memcpy(dst, p, size);
     return dst;
 }
@@ -370,7 +411,9 @@ dp_packet_put_hex(struct dp_packet *b, const char
*s, size_t *n)
             return CONST_CAST(char *, s);
         }

-        dp_packet_put(b, &byte, 1);
+        if (!dp_packet_put(b, &byte, 1)) {
+            return NULL;
+        }
         s += 2;
     }
 }
@@ -380,7 +423,7 @@ dp_packet_put_hex(struct dp_packet *b, const char
*s, size_t *n)
 void
 dp_packet_reserve(struct dp_packet *b, size_t size)
 {
-    ovs_assert(!dp_packet_size(b));
+    ovs_assert(b->source != DPBUF_DPDK && !dp_packet_size(b));
     dp_packet_prealloc_tailroom(b, size);
     dp_packet_set_data(b, (char*)dp_packet_data(b) + size);
 }
@@ -392,7 +435,7 @@ void
 dp_packet_reserve_with_tailroom(struct dp_packet *b, size_t headroom,
                              size_t tailroom)
 {
-    ovs_assert(!dp_packet_size(b));
+    ovs_assert(b->source != DPBUF_DPDK && !dp_packet_size(b));
     dp_packet_prealloc_tailroom(b, headroom + tailroom);
     dp_packet_set_data(b, (char*)dp_packet_data(b) + headroom);
 }
@@ -403,6 +446,10 @@ dp_packet_reserve_with_tailroom(struct dp_packet
*b, size_t headroom,
 void *
 dp_packet_push_uninit(struct dp_packet *b, size_t size)
 {
+    if (!dp_packet_headroom_avail(b, size)) {
+        return NULL;
+    }
+
     dp_packet_prealloc_headroom(b, size);
     dp_packet_set_data(b, (char*)dp_packet_data(b) - size);
     dp_packet_set_size(b, dp_packet_size(b) + size);
@@ -416,6 +463,9 @@ void *
 dp_packet_push_zeros(struct dp_packet *b, size_t size)
 {
     void *dst = dp_packet_push_uninit(b, size);
+    if (!dst) {
+        return NULL;
+    }
     memset(dst, 0, size);
     return dst;
 }
@@ -427,6 +477,9 @@ void *
 dp_packet_push(struct dp_packet *b, const void *p, size_t size)
 {
     void *dst = dp_packet_push_uninit(b, size);
+    if (!dst) {
+        return NULL;
+    }
     memcpy(dst, p, size);
     return dst;
 }
@@ -468,7 +521,9 @@ void *
 dp_packet_resize_l2_5(struct dp_packet *b, int increment)
 {
     if (increment >= 0) {
-        dp_packet_push_uninit(b, increment);
+        if (!dp_packet_push_uninit(b, increment)) {
+            return NULL;
+        }
     } else {
         dp_packet_pull(b, -increment);
     }
@@ -486,7 +541,9 @@ dp_packet_resize_l2_5(struct dp_packet *b, int
increment)
 void *
 dp_packet_resize_l2(struct dp_packet *b, int increment)
 {
-    dp_packet_resize_l2_5(b, increment);
+    if (!dp_packet_resize_l2_5(b, increment)) {
+        return NULL;
+    }
     dp_packet_adjust_layer_offset(&b->l2_5_ofs, increment);
     return dp_packet_data(b);
 }
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index a63fe24..9aee6a1 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -152,6 +152,9 @@ netdev_tnl_push_ip_header(struct dp_packet *packet,
     struct ovs_16aligned_ip6_hdr *ip6;

     eth = dp_packet_push_uninit(packet, size);
+    if (!eth) {
+        return NULL;
+    }
     *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);

     memcpy(eth, header, size);
@@ -214,7 +217,7 @@ udp_extract_tnl_md(struct dp_packet *packet, struct
flow_tnl *tnl,
 }


-void
+int
 netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
                            struct dp_packet *packet,
                            const struct ovs_action_push_tnl *data)
@@ -223,6 +226,9 @@ netdev_tnl_push_udp_header(const struct netdev
*netdev OVS_UNUSED,
     int ip_tot_size;

     udp = netdev_tnl_push_ip_header(packet, data->header,
data->header_len, &ip_tot_size);
+    if (!udp) {
+        return 1;
+    }

     /* set udp src port */
     udp->udp_src = netdev_tnl_get_src_port(packet);
@@ -243,6 +249,8 @@ netdev_tnl_push_udp_header(const struct netdev
*netdev OVS_UNUSED,
             udp->udp_csum = htons(0xffff);
         }
     }
+
+    return 0;
 }

 static void *
@@ -435,7 +443,7 @@ err:
     return NULL;
 }

-void
+int
 netdev_gre_push_header(const struct netdev *netdev,
                        struct dp_packet *packet,
                        const struct ovs_action_push_tnl *data)
@@ -446,6 +454,9 @@ netdev_gre_push_header(const struct netdev *netdev,
     int ip_tot_size;

     greh = netdev_tnl_push_ip_header(packet, data->header,
data->header_len, &ip_tot_size);
+    if (!greh) {
+        return 1;
+    }

     if (greh->flags & htons(GRE_CSUM)) {
         ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
@@ -460,6 +471,8 @@ netdev_gre_push_header(const struct netdev *netdev,
         tnl_cfg = &dev->tnl_cfg;
         put_16aligned_be32(seq_opt, htonl(tnl_cfg->seqno++));
     }
+
+    return 0;
 }

 int
@@ -588,7 +601,7 @@ err:
     return NULL;
 }

-void
+int
 netdev_erspan_push_header(const struct netdev *netdev,
                           struct dp_packet *packet,
                           const struct ovs_action_push_tnl *data)
@@ -602,6 +615,9 @@ netdev_erspan_push_header(const struct netdev *netdev,

     greh = netdev_tnl_push_ip_header(packet, data->header,
                                      data->header_len, &ip_tot_size);
+    if (!greh) {
+        return 1;
+    }

     /* update GRE seqno */
     tnl_cfg = &dev->tnl_cfg;
@@ -614,7 +630,7 @@ netdev_erspan_push_header(const struct netdev *netdev,
         md2 = ALIGNED_CAST(struct erspan_md2 *, ersh + 1);
         put_16aligned_be32(&md2->timestamp, get_erspan_ts(ERSPAN_100US));
     }
-    return;
+    return 0;
 }

 int
diff --git a/lib/netdev-native-tnl.h b/lib/netdev-native-tnl.h
index 5dc0012..7c36229 100644
--- a/lib/netdev-native-tnl.h
+++ b/lib/netdev-native-tnl.h
@@ -33,7 +33,7 @@ netdev_gre_build_header(const struct netdev *netdev,
                         struct ovs_action_push_tnl *data,
                         const struct netdev_tnl_build_header_params
*params);

-void
+int
 netdev_gre_push_header(const struct netdev *netdev,
                        struct dp_packet *packet,
                        const struct ovs_action_push_tnl *data);
@@ -45,14 +45,14 @@ netdev_erspan_build_header(const struct netdev *netdev,
                            struct ovs_action_push_tnl *data,
                            const struct netdev_tnl_build_header_params *p);

-void
+int
 netdev_erspan_push_header(const struct netdev *netdev,
                           struct dp_packet *packet,
                           const struct ovs_action_push_tnl *data);
 struct dp_packet *
 netdev_erspan_pop_header(struct dp_packet *packet);

-void
+int
 netdev_tnl_push_udp_header(const struct netdev *netdev,
                            struct dp_packet *packet,
                            const struct ovs_action_push_tnl *data);
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 4da579c..fb794e8 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -314,7 +314,7 @@ struct netdev_class {
      * flow.  Push header is called for packet to build header specific to
      * a packet on actual transmit.  It uses partial header build by
      * build_header() which is passed as data. */
-    void (*push_header)(const struct netdev *,
+    int (*push_header)(const struct netdev *,
                         struct dp_packet *packet,
                         const struct ovs_action_push_tnl *data);

diff --git a/lib/netdev.c b/lib/netdev.c
index 83614f5..5e239ae 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -857,9 +857,20 @@ netdev_push_header(const struct netdev *netdev,
                    const struct ovs_action_push_tnl *data)
 {
     struct dp_packet *packet;
-    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-        netdev->netdev_class->push_header(netdev, packet, data);
-        pkt_metadata_init(&packet->md, data->out_port);
+    bool drop = false;
+    const size_t cnt = dp_packet_batch_size(batch);
+
+    size_t i;
+    DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, batch) {
+        drop = netdev->netdev_class->push_header(netdev, packet, data);
+        if (drop) {
+            /* XXX(tlam): Put WARN here */
+            dp_packet_delete(packet);
+        } else {
+            pkt_metadata_init(&packet->md, data->out_port);
+            /* Re-inject packet */
+            dp_packet_batch_refill(batch, packet, i);
+        }
     }

     return 0;
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 5831d1f..e004163 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -780,9 +780,17 @@ odp_execute_actions(void *dp, struct
dp_packet_batch *batch, bool steal,
         case OVS_ACTION_ATTR_PUSH_VLAN: {
             const struct ovs_action_push_vlan *vlan = nl_attr_get(a);

-            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-                eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
+            size_t i;
+            const size_t num = dp_packet_batch_size(batch);
+
+            DP_PACKET_BATCH_REFILL_FOR_EACH (i, num, packet, batch) {
+                if (!eth_push_vlan(packet, vlan->vlan_tpid,
vlan->vlan_tci)) {
+                    dp_packet_batch_refill(batch, packet, i);
+                } else {
+                    dp_packet_delete(packet);
+                }
             }
+
             break;
         }

@@ -795,9 +803,17 @@ odp_execute_actions(void *dp, struct
dp_packet_batch *batch, bool steal,
         case OVS_ACTION_ATTR_PUSH_MPLS: {
             const struct ovs_action_push_mpls *mpls = nl_attr_get(a);

-            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-                push_mpls(packet, mpls->mpls_ethertype, mpls->mpls_lse);
+            size_t i;
+            const size_t num = dp_packet_batch_size(batch);
+
+            DP_PACKET_BATCH_REFILL_FOR_EACH (i, num, packet, batch) {
+                if (!push_mpls(packet, mpls->mpls_ethertype,
mpls->mpls_lse)) {
+                    dp_packet_batch_refill(batch, packet, i);
+                } else {
+                    dp_packet_delete(packet);
+                }
             }
+
             break;
          }

@@ -858,10 +874,18 @@ odp_execute_actions(void *dp, struct
dp_packet_batch *batch, bool steal,
         case OVS_ACTION_ATTR_PUSH_ETH: {
             const struct ovs_action_push_eth *eth = nl_attr_get(a);

-            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-                push_eth(packet, &eth->addresses.eth_dst,
-                         &eth->addresses.eth_src);
+            size_t i;
+            const size_t num = dp_packet_batch_size(batch);
+
+            DP_PACKET_BATCH_REFILL_FOR_EACH (i, num, packet, batch) {
+                if (!push_eth(packet, &eth->addresses.eth_dst,
+                         &eth->addresses.eth_src)) {
+                    dp_packet_batch_refill(batch, packet, i);
+                } else {
+                    dp_packet_delete(packet);
+                }
             }
+
             break;
         }

@@ -876,8 +900,15 @@ odp_execute_actions(void *dp, struct
dp_packet_batch *batch, bool steal,
             struct nsh_hdr *nsh_hdr = ALIGNED_CAST(struct nsh_hdr *,
buffer);
             nsh_reset_ver_flags_ttl_len(nsh_hdr);
             odp_nsh_hdr_from_attr(nl_attr_get(a), nsh_hdr,
NSH_HDR_MAX_LEN);
-            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-                push_nsh(packet, nsh_hdr);
+            size_t i;
+            const size_t num = dp_packet_batch_size(batch);
+
+            DP_PACKET_BATCH_REFILL_FOR_EACH (i, num, packet, batch) {
+                if (!push_nsh(packet, nsh_hdr)) {
+                    dp_packet_batch_refill(batch, packet, i);
+                } else {
+                    dp_packet_delete(packet);
+                }
             }
             break;
         }
diff --git a/lib/packets.c b/lib/packets.c
index 38bfb60..7119a55 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -210,16 +210,21 @@ compose_rarp(struct dp_packet *b, const struct
eth_addr eth_src)
  * packet.  Ignores the CFI bit of 'tci' using 0 instead.
  *
  * Also adjusts the layer offsets accordingly. */
-void
+int
 eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci)
 {
     struct vlan_eth_header *veh;

     /* Insert new 802.1Q header. */
     veh = dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
+    if (!veh) {
+        return 1;
+    }
     memmove(veh, (char *)veh + VLAN_HEADER_LEN, 2 * ETH_ADDR_LEN);
     veh->veth_type = tpid;
     veh->veth_tci = tci & htons(~VLAN_CFI);
+
+    return 0;
 }

 /* Removes outermost VLAN header (if any is present) from 'packet'.
@@ -240,7 +245,7 @@ eth_pop_vlan(struct dp_packet *packet)
 }

 /* Push Ethernet header onto 'packet' assuming it is layer 3 */
-void
+int
 push_eth(struct dp_packet *packet, const struct eth_addr *dst,
          const struct eth_addr *src)
 {
@@ -248,10 +253,15 @@ push_eth(struct dp_packet *packet, const struct
eth_addr *dst,

     ovs_assert(packet->packet_type != htonl(PT_ETH));
     eh = dp_packet_resize_l2(packet, ETH_HEADER_LEN);
+    if (!eh) {
+        return 1;
+    }
     eh->eth_dst = *dst;
     eh->eth_src = *src;
     eh->eth_type = pt_ns_type_be(packet->packet_type);
     packet->packet_type = htonl(PT_ETH);
+
+    return 0;
 }

 /* Removes Ethernet header, including VLAN header, from 'packet'.
@@ -369,14 +379,14 @@ set_mpls_lse(struct dp_packet *packet, ovs_be32
mpls_lse)
 /* Push MPLS label stack entry 'lse' onto 'packet' as the outermost MPLS
  * header.  If 'packet' does not already have any MPLS labels, then its
  * Ethertype is changed to 'ethtype' (which must be an MPLS Ethertype). */
-void
+int
 push_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse)
 {
     char * header;
     size_t len;

     if (!eth_type_mpls(ethtype)) {
-        return;
+        return 1;
     }

     if (!is_mpls(packet)) {
@@ -389,8 +399,13 @@ push_mpls(struct dp_packet *packet, ovs_be16
ethtype, ovs_be32 lse)
     /* Push new MPLS shim header onto packet. */
     len = packet->l2_5_ofs;
     header = dp_packet_resize_l2_5(packet, MPLS_HLEN);
+    if (!header) {
+        return 1;
+    }
     memmove(header, header + MPLS_HLEN, len);
     memcpy(header + len, &lse, sizeof lse);
+
+    return 0;
 }

 /* If 'packet' is an MPLS packet, removes its outermost MPLS label
stack entry.
@@ -414,7 +429,7 @@ pop_mpls(struct dp_packet *packet, ovs_be16 ethtype)
     }
 }

-void
+int
 push_nsh(struct dp_packet *packet, const struct nsh_hdr *nsh_hdr_src)
 {
     struct nsh_hdr *nsh;
@@ -439,11 +454,16 @@ push_nsh(struct dp_packet *packet, const struct
nsh_hdr *nsh_hdr_src)
     }

     nsh = (struct nsh_hdr *) dp_packet_push_uninit(packet, length);
+    if (!nsh) {
+        return 1;
+    }
     memcpy(nsh, nsh_hdr_src, length);
     nsh->next_proto = next_proto;
     packet->packet_type = htonl(PT_NSH);
     dp_packet_reset_offsets(packet);
     packet->l3_ofs = 0;
+
+    return 0;
 }

 bool
diff --git a/lib/packets.h b/lib/packets.h
index 7645a9d..d8c0795 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -330,7 +330,7 @@ bool eth_addr_from_string(const char *, struct
eth_addr *);

 void compose_rarp(struct dp_packet *, const struct eth_addr);

-void eth_push_vlan(struct dp_packet *, ovs_be16 tpid, ovs_be16 tci);
+int eth_push_vlan(struct dp_packet *, ovs_be16 tpid, ovs_be16 tci);
 void eth_pop_vlan(struct dp_packet *);

 const char *eth_from_hex(const char *hex, struct dp_packet **packetp);
@@ -338,7 +338,7 @@ void eth_format_masked(const struct eth_addr ea,
                        const struct eth_addr *mask, struct ds *s);

 void set_mpls_lse(struct dp_packet *, ovs_be32 label);
-void push_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse);
+int push_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse);
 void pop_mpls(struct dp_packet *, ovs_be16 ethtype);

 void set_mpls_lse_ttl(ovs_be32 *lse, uint8_t ttl);
@@ -438,11 +438,11 @@ struct eth_header {
 };
 BUILD_ASSERT_DECL(ETH_HEADER_LEN == sizeof(struct eth_header));

-void push_eth(struct dp_packet *packet, const struct eth_addr *dst,
+int push_eth(struct dp_packet *packet, const struct eth_addr *dst,
               const struct eth_addr *src);
 void pop_eth(struct dp_packet *packet);

-void push_nsh(struct dp_packet *packet, const struct nsh_hdr *nsh_hdr_src);
+int push_nsh(struct dp_packet *packet, const struct nsh_hdr *nsh_hdr_src);
 bool pop_nsh(struct dp_packet *packet);

 #define LLC_DSAP_SNAP 0xaa


More information about the dev mailing list