[ovs-dev] [help]Linux kernel panic happened, and find skb->protocol is 8.Is anybody ever had the same situation before?

Qianhuibin qianhuibin at huawei.com
Tue Feb 17 07:44:28 UTC 2015


Here is the problem.
I have a network system as flows:
It has vlan & vxlan on it, and between every two vms, they send tcp&udp. After this linux system runs 14 days later, a kernel panic happened. Then I analyze Linux kernel crash dumps with crash. Here is the result:
crash> bt
PID: 3323   TASK: ffff8801ecdde580  CPU: 1   COMMAND: "xxxxxxx"
#0 [ffff8801ed0117f0] crash_kexec at ffffffff8008d6fa
#1 [ffff8801ed0118c0] oops_end at ffffffff8040ec28
#2 [ffff8801ed0118e0] general_protection at ffffffff8040df08
    [exception RIP: nf_reinject+42]
    RIP: ffffffff803804ca  RSP: ffff8801ed011998  RFLAGS: 00010286
   RAX: 3938373635343332  RBX: ffff8801e854e4c0  RCX: ffff88001621d300
    RDX: 0000000000000010  RSI: 0000000000000001  RDI: ffff8801e854e4c0
    RBP: 0000000000000001   R8: 111a2a2928060f77   R9: ff2f2e2d2c070978
    R10: 111a2a2928060f77  R11: ff2f2e2d2c070978  R12: ffff880081b0ee80
    R13: ffff8801ed011a20  R14: 0000000000000001  R15: ffff8801f451657c
    ORIG_RAX: ffffffffffffffff  CS: e030  SS: e02b
#3 [ffff8801ed0119d0] nfqnl_recv_verdict at ffffffffa03d7c56 [nfnetlink_queue]
#4 [ffff8801ed011a10] nfnetlink_rcv_msg at ffffffffa03672d1 [nfnetlink]
#5 [ffff8801ed011a80] nfnetlink_rcv_msg at ffffffffa036719d [nfnetlink]
#6 [ffff8801ed011af0] netlink_rcv_skb at ffffffff8037ce29
#7 [ffff8801ed011b10] nfnetlink_rcv at ffffffffa036715f [nfnetlink]
#8 [ffff8801ed011b20] netlink_unicast at ffffffff8037ca9f
#9 [ffff8801ed011b70] netlink_sendmsg at ffffffff8037d7a5
#10 [ffff8801ed011c00] sock_sendmsg at ffffffff8033eb4b
#11 [ffff8801ed011d80] ___sys_sendmsg at ffffffff8033fd4b
#12 [ffff8801ed011f20] __sys_sendmsg at ffffffff8033ff42
#13 [ffff8801ed011f80] system_call_fastpath at ffffffff80415b23
    RIP: 00007fb12bec4e0d  RSP: 00007fb12ae52810  RFLAGS: 00000293
    RAX: 000000000000002e  RBX: ffffffff80415b23  RCX: ffffffff800032eb
    RDX: 0000000000000000  RSI: 00007fb12ae52830  RDI: 0000000000000009
    RBP: 00000000006c1100   R8: 00007fb12ae52c18   R9: 0000000000000001
    R10: 00007fb12b378530  R11: 0000000000000293  R12: 00007fb12ae528f0
    R13: 0000000000000000  R14: 00000001fffffff7  R15: 0000000000000000
    ORIG_RAX: 000000000000002e  CS: e033  SS: e02b

Also I analyze the first parameter of nf_reinject, and find that the member of indev is freed some where, and the member of skb-> protocol is 8. The member of skb is:
crash> struct sk_buff 0xffff880081b0ee80
struct sk_buff {
  next = 0x0,
  prev = 0x0,
  tstamp = {
    tv64 = 1422279902816506863
  },
  sk = 0x0,
  dev = 0xffff88008b379000,
  cb = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
  _skb_refdst = 18446612135790802496,
  sp = 0x0,
  len = 336,
  data_len = 0,
  mac_len = 14,
  hdr_len = 0,
  {
    csum = 0,
    {
      csum_start = 0,
      csum_offset = 0
    }
  },
  priority = 0,
  local_df = 0 '\000',
  cloned = 0 '\000',
  ip_summed = 0 '\000',
  nohdr = 0 '\000',
  nfctinfo = 0 '\000',
  pkt_type = 0 '\000',
  fclone = 0 '\000',
  ipvs_property = 0 '\000',
  peeked = 0 '\000',
  nf_trace = 0 '\000',
  protocol = 8,
  destructor = 0,
  nfct = 0xffff88008b2c1198,
  nfct_reasm = 0x0,
  nf_bridge = 0xffff8800064d8180,
  skb_iif = 2,
  tc_index = 0,
  tc_verd = 0,
  rxhash = 3531363220,
  queue_mapping = 4,
  ndisc_nodetype = 0 '\000',
  pfmemalloc = 0 '\000',
  ooo_okay = 0 '\000',
  no_fcs = 0 '\000',
  dma_cookie = 0,
  secmark = 0,
  {
    mark = 0,
    dropcount = 0
  },
  vlan_tci = 0,
  transport_header = 212,
  network_header = 192,
  mac_header = 178,
  tail = 528,
  end = 768,
  head = 0xffff88001621d000 "@\336\071\a\002\210\377\377{\002",
  data = 0xffff88001621d0c0 "E",
  truesize = 2672,
  users = {
    counter = 1
  }
}

Here is the question: How does this happen? Why there is a protocol is 8? Is anybody ever had the same situation before? Thank you!





More information about the dev mailing list