[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#588509: linux-image-2.6.32-5-amd64: hangs under Xen after Write protecting the kernel read-only data: 4220k



Here's some debugging output. I booted domU with an older kernel to be able to install 

On opensuse dom0 I ran

sudo gdbserver-xen 127.0.0.1:9999 --attach 37

where 37 is domid printed by xm list and then used

sudo gdb /local/lindi/vmlinux-2.6.32-5-amd64
(gdb) target remote 127.0.0.1:9999

to start the debugger. It seems that the kernel is stuck trying to
print information about a kernel OOPS:

(gdb) info threads
* 1 Remote target  0xffffffff812f8c78 in __ticket_spin_lock (
    lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:114

(gdb) bt full
#0  0xffffffff812f8c78 in __ticket_spin_lock (lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:114
        inc = 1
        tmp = 0
#1  __raw_spin_lock (lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:190
No locals.
#2  __spin_lock (lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/include/linux/spinlock_api_smp.h:337
No locals.
#3  _spin_lock (lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/spinlock.c:138
No locals.
#4  0xffffffff8104d344 in vprintk (
    fmt=0xffffffff813a8798 "<1>BUG: unable to handle kernel ", 
    args=0xffff88003fdb93a8)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/printk.c:705
        printed_len = 0
        current_log_level = 4
        flags = 0
        p = <value optimized out>
#5  0xffffffff812f6eec in printk (fmt=0xffffffff815d13c0 "")
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/printk.c:595
---Type <return> to continue, or q <return> to quit---
        args = {{gp_offset = 8, fp_offset = 48, 
            overflow_arg_area = 0xffff88003fdb9488, 
            reg_save_area = 0xffff88003fdb93c8}}
        r = 1
#6  0xffffffff810311b0 in show_fault_oops (address=<value optimized out>, 
    error_code=<value optimized out>, regs=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:583
No locals.
#7  no_context (address=<value optimized out>, 
    error_code=<value optimized out>, regs=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:658
        tsk = 0xffff88003fdb4000
        flags = 0
        sig = <value optimized out>
#8  0xffffffff81031429 in __bad_area_nosemaphore (regs=0xffff88003fdb9598, 
    error_code=<value optimized out>, address=18446744071582043416, 
    si_code=196609)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:741
        tsk = 0xffff88003fdb4000
#9  0xffffffff812f9015 in ?? ()
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/kernel/entry_64.S:1383
No locals.
#10 0x0005398d646e415d in ?? ()
No symbol table info available.
#11 0x00000000d5de35ab in ?? ()
---Type <return> to continue, or q <return> to quit---
No symbol table info available.
#12 0x0000000000000002 in ?? ()
No symbol table info available.
#13 0x000c82198774ef00 in ?? ()
No symbol table info available.
#14 0x0000000000000001 in ?? ()
No symbol table info available.
#15 0xffff8800032e6040 in ?? ()
No symbol table info available.
#16 0x0000000000000202 in ?? ()
No symbol table info available.
#17 0xffff880000000000 in ?? ()
No symbol table info available.
#18 0xffffffff813a87b8 in ?? ()
        __kcrctab_unregister_net_sysctl_table = 2623496682
        __kstrtab_unregister_net_sysctl_table = "unregister_net_sysctl_table"
        net_sysctl_ro_root = {root_list = {next = 0xffffffff814649b0, 
            prev = 0xffffffff814a6350}, default_set = {list = {
              next = 0xffff88003fe34c08, prev = 0xffff88003d8dd808}, 
            parent = 0xffffffff814649c0, is_seen = 0}, lookup = 0, 
          permissions = 0xffffffff812e4313 <net_ctl_ro_header_perms>}
        sysctl_pernet_ops = {list = {next = 0xffffffff8149bcc0, 
            prev = 0xffffffff814a6140}, 
          init = 0xffffffff812e4323 <sysctl_net_init>, 
          exit = 0xffffffff812e4375 <sysctl_net_exit>}
        __ksymtab_register_net_sysctl_table = {value = 18446744071581877167, 
          name = 0xffffffff81422f46 "register_net_sysctl_table"}
        __kstrtab_register_net_sysctl_table = "register_net_sysctl_table"
        __initcall_sysctl_init4 = 0
---Type <return> to continue, or q <return> to quit---
        __kcrctab_register_net_sysctl_table = 829491708
        __ksymtab_register_net_sysctl_rotable = {value = 18446744071581877142, 
          name = 0xffffffff81422f2a "register_net_sysctl_rotable"}
        __ksymtab_unregister_net_sysctl_table = {value = 18446744071581877137, 
          name = 0xffffffff81422f0e "unregister_net_sysctl_table"}
        net_sysctl_root = {root_list = {next = 0xffffffff814a6390, 
            prev = 0xffffffff814649b0}, default_set = {list = {next = 0x0, 
              prev = 0x0}, parent = 0x0, is_seen = 0}, 
          lookup = 0xffffffff812e42e8 <net_ctl_header_lookup>, 
          permissions = 0xffffffff812e4342 <net_ctl_permissions>}
        __kcrctab_register_net_sysctl_rotable = 2528268687
        __kstrtab_register_net_sysctl_rotable = "register_net_sysctl_rotable"
#19 0x0000000000000020 in ?? ()
No symbol table info available.
#20 0x0005398d90ee1210 in ?? ()
No symbol table info available.
#21 0x00000000f2572b01 in ?? ()
No symbol table info available.
#22 0x0005398d9113c2e5 in ?? ()
No symbol table info available.
#23 0xffffffff813a87b8 in ?? ()
        __kcrctab_unregister_net_sysctl_table = 2623496682
        __kstrtab_unregister_net_sysctl_table = "unregister_net_sysctl_table"
        net_sysctl_ro_root = {root_list = {next = 0xffffffff814649b0, 
            prev = 0xffffffff814a6350}, default_set = {list = {
              next = 0xffff88003fe34c08, prev = 0xffff88003d8dd808}, 
            parent = 0xffffffff814649c0, is_seen = 0}, lookup = 0, 
          permissions = 0xffffffff812e4313 <net_ctl_ro_header_perms>}
        sysctl_pernet_ops = {list = {next = 0xffffffff8149bcc0, 
---Type <return> to continue, or q <return> to quit---
            prev = 0xffffffff814a6140}, 
          init = 0xffffffff812e4323 <sysctl_net_init>, 
          exit = 0xffffffff812e4375 <sysctl_net_exit>}
        __ksymtab_register_net_sysctl_table = {value = 18446744071581877167, 
          name = 0xffffffff81422f46 "register_net_sysctl_table"}
        __kstrtab_register_net_sysctl_table = "register_net_sysctl_table"
        __initcall_sysctl_init4 = 0
        __kcrctab_register_net_sysctl_table = 829491708
        __ksymtab_register_net_sysctl_rotable = {value = 18446744071581877142, 
          name = 0xffffffff81422f2a "register_net_sysctl_rotable"}
        __ksymtab_unregister_net_sysctl_table = {value = 18446744071581877137, 
          name = 0xffffffff81422f0e "unregister_net_sysctl_table"}
        net_sysctl_root = {root_list = {next = 0xffffffff814a6390, 
            prev = 0xffffffff814649b0}, default_set = {list = {next = 0x0, 
              prev = 0x0}, parent = 0x0, is_seen = 0}, 
          lookup = 0xffffffff812e42e8 <net_ctl_header_lookup>, 
          permissions = 0xffffffff812e4342 <net_ctl_permissions>}
        __kcrctab_register_net_sysctl_rotable = 2528268687
        __kstrtab_register_net_sysctl_rotable = "register_net_sysctl_rotable"
#24 0xffff8800032e6040 in ?? ()
No symbol table info available.
#25 0xffffffffffffffff in ?? ()
No symbol table info available.
#26 0xffffffff8102bddf in pvclock_clocksource_read (src=0x1)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/kernel/pvclock.c:145
        ret = 0
        offset = <value optimized out>
        last = 1
---Type <return> to continue, or q <return> to quit---
#27 0x0000000000000000 in ?? ()
No symbol table info available.


I think we should figure out what caused the OOPS so the following
information about the printk() being stuck is not important:

(gdb) x/10i $rip
0xffffffff812f8c78 <__ticket_spin_lock+21>:	movzwl (%rdi),%edx
0xffffffff812f8c7b <__ticket_spin_lock+24>:	
    jmp    0xffffffff812f8c72 <__ticket_spin_lock+15>
0xffffffff812f8c7d <_spin_lock+26>:	retq   
0xffffffff812f8c7e <__raw_write_lock>:	subl   $0x1000000,%ds:(%rdi)
0xffffffff812f8c85 <__raw_write_lock+7>:	
    je     0xffffffff812f8c8c <_write_lock+14>
0xffffffff812f8c87 <__raw_write_lock+9>:	callq  0xffffffff81193b80
0xffffffff812f8c8c <_write_lock+14>:	retq   
0xffffffff812f8c8d <__ticket_spin_unlock>:	incw   (%rdi)
0xffffffff812f8c90 <raw_local_irq_restore>:	mov    %rsi,%rdi
0xffffffff812f8c93 <raw_local_irq_restore+3>:	callq  *0xffffffff814629b8
(gdb) si
0xffffffff812f8c7b	114		asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
(gdb) si
0xffffffff812f8c72	114		asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
(gdb) x/10i $rip
0xffffffff812f8c72 <__ticket_spin_lock+15>:	cmp    %eax,%edx
0xffffffff812f8c74 <__ticket_spin_lock+17>:	
    je     0xffffffff812f8c7d <_spin_lock+26>
0xffffffff812f8c76 <__ticket_spin_lock+19>:	pause  
0xffffffff812f8c78 <__ticket_spin_lock+21>:	movzwl (%rdi),%edx
0xffffffff812f8c7b <__ticket_spin_lock+24>:	
    jmp    0xffffffff812f8c72 <__ticket_spin_lock+15>
0xffffffff812f8c7d <_spin_lock+26>:	retq   
0xffffffff812f8c7e <__raw_write_lock>:	subl   $0x1000000,%ds:(%rdi)
0xffffffff812f8c85 <__raw_write_lock+7>:	
    je     0xffffffff812f8c8c <_write_lock+14>
0xffffffff812f8c87 <__raw_write_lock+9>:	callq  0xffffffff81193b80
0xffffffff812f8c8c <_write_lock+14>:	retq   
(gdb) info register
rax            0x1	1
rbx            0x0	0
rcx            0xfffedb34	4294892340
rdx            0x0	0
rsi            0xffff88003fdb93a8	-131940323978328
rdi            0xffffffff815d13c0	-2124606528
rbp            0xffff88003fdb93a8	0xffff88003fdb93a8
rsp            0xffff88003fdb92f0	0xffff88003fdb92f0
r8             0x2	2
r9             0xffff88003fdb4000	-131940323999744
r10            0xffff880000000000	-131941395333120
r11            0x202	514
r12            0xffffffff813a8798	-2126870632
r13            0x4	4
r14            0x3	3
r15            0x0	0
rip            0xffffffff812f8c72	0xffffffff812f8c72 <__ticket_spin_lock+15>
eflags         0x1397	[ CF PF AF SF TF IF #12 ]
cs             0xe033	57395
ss             0xe02b	57387
ds             0x0	0
es             0x0	0
fs             0x0	0
gs             0x0	0
fctrl          0x37f	895
fstat          0x0	0
ftag           0xffff	65535
fiseg          0x0	0
---Type <return> to continue, or q <return> to quit---
fioff          0x0	0
foseg          0x0	0
fooff          0x0	0
fop            0x0	0
mxcsr          0x1f80	[ IM DM ZM OM UM PM ]
(gdb) 

However, why did the kernel OOPS in the first place?


(gdb) frame 7
#7  no_context (address=<value optimized out>,
    error_code=<value optimized out>, regs=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:658
658             show_fault_oops(regs, error_code, address);
(gdb) p/x *regs
$10 = {r15 = 0x5398d646e415d, r14 = 0xd5de35ab, r13 = 0x2,
  r12 = 0xc82198774ef00, bp = 0x1, bx = 0xffff8800032e6040, r11 = 0x202,
  r10 = 0xffff880000000000, r9 = 0xffffffff813a87b8, r8 = 0x20,
  ax = 0x5398d90ee1210, cx = 0xf2572b01, dx = 0x5398d9113c2e5,
  si = 0xffffffff813a87b8, di = 0xffff8800032e6040,
  orig_ax = 0xffffffffffffffff, ip = 0xffffffff8102bddf, cs = 0x10000e030,
  flags = 0x10002, sp = 0xffff88003fdb9640, ss = 0xe02b}
(gdb) p error_code
$11 = 3
(gdb) p/x address
$22 = 0xffffffff8130cd18
(gdb) x/6i regs->ip
0xffffffff8102bddf <__cmpxchg>:
    cmpxchg %rdx,%ds:0x2e0f30(%rip)        # 0xffffffff8130cd18
0xffffffff8102bde8 <pvclock_clocksource_read+122>:      cmp    %rdx,%rax
0xffffffff8102bdeb <pvclock_clocksource_read+125>:
    jne    0xffffffff8102bdda <pvclock_clocksource_read+108>
0xffffffff8102bded <pvclock_clocksource_read+127>:      pop    %rcx
0xffffffff8102bdee <pvclock_clocksource_read+128>:      pop    %rbx
0xffffffff8102bdef <pvclock_clocksource_read+129>:      pop    %rbp


(gdb) shell objdump -axdt /local/lindi/vmlinux-2.6.32-5-amd64 | grep ffffffff8130cd18
ffffffff8130cd18 l     O .rodata        0000000000000008 last_value
ffffffff8102bdd3:       48 8b 05 3e 0f 2e 00    mov    0x2e0f3e(%rip),%rax        # ffffffff8130cd18 <last_value>
ffffffff8102bddf:       f0 48 0f b1 15 30 0f    lock cmpxchg %rdx,0x2e0f30(%rip)        # ffffffff8130cd18 <last_value>




Reply to: