[ovs-dev] [PATCH 5/5] compat: iptunnel: NULL pointer deref for ip_md_tunnel_xmit

Greg Rose gvrose8192 at gmail.com
Wed Mar 27 15:32:19 UTC 2019


From: Alan Maguire <alan.maguire at oracle.com>

Upstream commit:
    commit f4b3ec4e6aa1a2ca437905a519ae08e8cf6af754
    Author: Alan Maguire <alan.maguire at oracle.com>
    Date:   Wed Mar 6 10:25:42 2019 +0000

    iptunnel: NULL pointer deref for ip_md_tunnel_xmit

    Naresh Kamboju noted the following oops during execution of selftest
    tools/testing/selftests/bpf/test_tunnel.sh on x86_64:

    [  274.120445] BUG: unable to handle kernel NULL pointer dereference
    at 0000000000000000
    [  274.128285] #PF error: [INSTR]
    [  274.131351] PGD 8000000414a0e067 P4D 8000000414a0e067 PUD 3b6334067 PMD 0
    [  274.138241] Oops: 0010 [#1] SMP PTI
    [  274.141734] CPU: 1 PID: 11464 Comm: ping Not tainted
    5.0.0-rc4-next-20190129 #1
    [  274.149046] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
    2.0b 07/27/2017
    [  274.156526] RIP: 0010:          (null)
    [  274.160280] Code: Bad RIP value.
    [  274.163509] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286
    [  274.168726] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000
    [  274.175851] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0
    [  274.182974] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c
    [  274.190098] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000
    [  274.197222] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400
    [  274.204346] FS:  00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000)
    knlGS:0000000000000000
    [  274.212424] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [  274.218162] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0
    [  274.225292] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    [  274.232416] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    [  274.239541] Call Trace:
    [  274.241988]  ? tnl_update_pmtu+0x296/0x3b0
    [  274.246085]  ip_md_tunnel_xmit+0x1bc/0x520
    [  274.250176]  gre_fb_xmit+0x330/0x390
    [  274.253754]  gre_tap_xmit+0x128/0x180
    [  274.257414]  dev_hard_start_xmit+0xb7/0x300
    [  274.261598]  sch_direct_xmit+0xf6/0x290
    [  274.265430]  __qdisc_run+0x15d/0x5e0
    [  274.269007]  __dev_queue_xmit+0x2c5/0xc00
    [  274.273011]  ? dev_queue_xmit+0x10/0x20
    [  274.276842]  ? eth_header+0x2b/0xc0
    [  274.280326]  dev_queue_xmit+0x10/0x20
    [  274.283984]  ? dev_queue_xmit+0x10/0x20
    [  274.287813]  arp_xmit+0x1a/0xf0
    [  274.290952]  arp_send_dst.part.19+0x46/0x60
    [  274.295138]  arp_solicit+0x177/0x6b0
    [  274.298708]  ? mod_timer+0x18e/0x440
    [  274.302281]  neigh_probe+0x57/0x70
    [  274.305684]  __neigh_event_send+0x197/0x2d0
    [  274.309862]  neigh_resolve_output+0x18c/0x210
    [  274.314212]  ip_finish_output2+0x257/0x690
    [  274.318304]  ip_finish_output+0x219/0x340
    [  274.322314]  ? ip_finish_output+0x219/0x340
    [  274.326493]  ip_output+0x76/0x240
    [  274.329805]  ? ip_fragment.constprop.53+0x80/0x80
    [  274.334510]  ip_local_out+0x3f/0x70
    [  274.337992]  ip_send_skb+0x19/0x40
    [  274.341391]  ip_push_pending_frames+0x33/0x40
    [  274.345740]  raw_sendmsg+0xc15/0x11d0
    [  274.349403]  ? __might_fault+0x85/0x90
    [  274.353151]  ? _copy_from_user+0x6b/0xa0
    [  274.357070]  ? rw_copy_check_uvector+0x54/0x130
    [  274.361604]  inet_sendmsg+0x42/0x1c0
    [  274.365179]  ? inet_sendmsg+0x42/0x1c0
    [  274.368937]  sock_sendmsg+0x3e/0x50
    [  274.372460]  ___sys_sendmsg+0x26f/0x2d0
    [  274.376293]  ? lock_acquire+0x95/0x190
    [  274.380043]  ? __handle_mm_fault+0x7ce/0xb70
    [  274.384307]  ? lock_acquire+0x95/0x190
    [  274.388053]  ? __audit_syscall_entry+0xdd/0x130
    [  274.392586]  ? ktime_get_coarse_real_ts64+0x64/0xc0
    [  274.397461]  ? __audit_syscall_entry+0xdd/0x130
    [  274.401989]  ? trace_hardirqs_on+0x4c/0x100
    [  274.406173]  __sys_sendmsg+0x63/0xa0
    [  274.409744]  ? __sys_sendmsg+0x63/0xa0
    [  274.413488]  __x64_sys_sendmsg+0x1f/0x30
    [  274.417405]  do_syscall_64+0x55/0x190
    [  274.421064]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
    [  274.426113] RIP: 0033:0x7ff4ae0e6e87
    [  274.429686] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00
    00 00 00 8b 05 ca d9 2b 00 48 63 d2 48 63 ff 85 c0 75 10 b8 2e 00 00
    00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 53 48 89 f3 48 83 ec 10 48 89 7c
    24 08
    [  274.448422] RSP: 002b:00007ffcd9b76db8 EFLAGS: 00000246 ORIG_RAX:
    000000000000002e
    [  274.455978] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007ff4ae0e6e87
    [  274.463104] RDX: 0000000000000000 RSI: 00000000006092e0 RDI: 0000000000000003
    [  274.470228] RBP: 0000000000000000 R08: 00007ffcd9bc40a0 R09: 00007ffcd9bc4080
    [  274.477349] R10: 000000000000060a R11: 0000000000000246 R12: 0000000000000003
    [  274.484475] R13: 0000000000000016 R14: 00007ffcd9b77fa0 R15: 00007ffcd9b78da4
    [  274.491602] Modules linked in: cls_bpf sch_ingress iptable_filter
    ip_tables algif_hash af_alg x86_pkg_temp_thermal fuse [last unloaded:
    test_bpf]
    [  274.504634] CR2: 0000000000000000
    [  274.507976] ---[ end trace 196d18386545eae1 ]---
    [  274.512588] RIP: 0010:          (null)
    [  274.516334] Code: Bad RIP value.
    [  274.519557] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286
    [  274.524775] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000
    [  274.531921] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0
    [  274.539082] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c
    [  274.546205] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000
    [  274.553329] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400
    [  274.560456] FS:  00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000)
    knlGS:0000000000000000
    [  274.568541] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [  274.574277] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0
    [  274.581403] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    [  274.588535] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    [  274.595658] Kernel panic - not syncing: Fatal exception in interrupt
    [  274.602046] Kernel Offset: 0x14400000 from 0xffffffff81000000
    (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
    [  274.612827] ---[ end Kernel panic - not syncing: Fatal exception in
    interrupt ]---
    [  274.620387] ------------[ cut here ]------------

    I'm also seeing the same failure on x86_64, and it reproduces
    consistently.

    >From poking around it looks like the skb's dst entry is being used
    to calculate the mtu in:

    mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;

    ...but because that dst_entry  has an "ops" value set to md_dst_ops,
    the various ops (including mtu) are not set:

    crash> struct sk_buff._skb_refdst ffff928f87447700 -x
          _skb_refdst = 0xffffcd6fbf5ea590
    crash> struct dst_entry.ops 0xffffcd6fbf5ea590
      ops = 0xffffffffa0193800
    crash> struct dst_ops.mtu 0xffffffffa0193800
      mtu = 0x0
    crash>

    I confirmed that the dst entry also has dst->input set to
    dst_md_discard, so it looks like it's an entry that's been
    initialized via __metadata_dst_init alright.

    I think the fix here is to use skb_valid_dst(skb) - it checks
    for  DST_METADATA also, and with that fix in place, the
    problem - which was previously 100% reproducible - disappears.

    The below patch resolves the panic and all bpf tunnel tests pass
    without incident.

    Fixes: c8b34e680a09 ("ip_tunnel: Add tnl_update_pmtu in ip_md_tunnel_xmit")
    Reported-by: Naresh Kamboju <naresh.kamboju at linaro.org>
    Signed-off-by: Alan Maguire <alan.maguire at oracle.com>
    Acked-by: Alexei Starovoitov <ast at kernel.org>
    Tested-by: Anders Roxell <anders.roxell at linaro.org>
    Reported-by: Nicolas Dichtel <nicolas.dichtel at 6wind.com>
    Tested-by: Nicolas Dichtel <nicolas.dichtel at 6wind.com>
    Acked-by: Nicolas Dichtel <nicolas.dichtel at 6wind.com>
    Signed-off-by: David S. Miller <davem at davemloft.net>

Fixed up for backward compatibility to our own compat layer ip_tunnel.c
module.

Cc: Alan Maguire <alan.maguire at oracle.com>
Signed-off-by: Greg Rose <gvrose8192 at gmail.com>
---
 datapath/linux/compat/include/net/dst_metadata.h | 12 ++++++++++++
 datapath/linux/compat/ip_tunnel.c                | 10 +++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/datapath/linux/compat/include/net/dst_metadata.h b/datapath/linux/compat/include/net/dst_metadata.h
index 36f3f39..9a82b3c 100644
--- a/datapath/linux/compat/include/net/dst_metadata.h
+++ b/datapath/linux/compat/include/net/dst_metadata.h
@@ -127,4 +127,16 @@ rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags)
 }
 #define metadata_dst_alloc rpl_metadata_dst_alloc
 
+#ifndef DST_METADATA 
+#define DST_METADATA		0x0200
+#endif
+
+static inline bool rpl_skb_valid_dst(const struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+
+	return dst && !(dst->flags & DST_METADATA);
+}
+#define skb_valid_dst rpl_skb_valid_dst
+
 #endif /* __NET_DST_METADATA_WRAPPER_H */
diff --git a/datapath/linux/compat/ip_tunnel.c b/datapath/linux/compat/ip_tunnel.c
index d16e60f..7dd57fe 100644
--- a/datapath/linux/compat/ip_tunnel.c
+++ b/datapath/linux/compat/ip_tunnel.c
@@ -51,6 +51,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
+#include <net/dst_metadata.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -263,9 +264,9 @@ static int rpl_tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
 					- sizeof(struct iphdr) - tunnel->hlen;
 	else
-		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
-	if (skb_dst(skb))
+	if (skb_valid_dst(skb))
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
@@ -279,7 +280,10 @@ static int rpl_tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+		struct rt6_info *rt6;
+
+		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
+					   NULL;
 
 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
 			   mtu >= IPV6_MIN_MTU) {
-- 
1.8.3.1



More information about the dev mailing list