Bug#588509: linux-image-2.6.32-5-amd64: hangs under Xen after Write protecting the kernel read-only data: 4220k
Here's some debugging output. I booted domU with an older kernel to be able to install
On opensuse dom0 I ran
sudo gdbserver-xen 127.0.0.1:9999 --attach 37
where 37 is domid printed by xm list and then used
sudo gdb /local/lindi/vmlinux-2.6.32-5-amd64
(gdb) target remote 127.0.0.1:9999
to start the debugger. It seems that the kernel is stuck trying to
print information about a kernel OOPS:
(gdb) info threads
* 1 Remote target 0xffffffff812f8c78 in __ticket_spin_lock (
lock=<value optimized out>)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:114
(gdb) bt full
#0 0xffffffff812f8c78 in __ticket_spin_lock (lock=<value optimized out>)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:114
inc = 1
tmp = 0
#1 __raw_spin_lock (lock=<value optimized out>)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:190
No locals.
#2 __spin_lock (lock=<value optimized out>)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/include/linux/spinlock_api_smp.h:337
No locals.
#3 _spin_lock (lock=<value optimized out>)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/spinlock.c:138
No locals.
#4 0xffffffff8104d344 in vprintk (
fmt=0xffffffff813a8798 "<1>BUG: unable to handle kernel ",
args=0xffff88003fdb93a8)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/printk.c:705
printed_len = 0
current_log_level = 4
flags = 0
p = <value optimized out>
#5 0xffffffff812f6eec in printk (fmt=0xffffffff815d13c0 "")
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/printk.c:595
---Type <return> to continue, or q <return> to quit---
args = {{gp_offset = 8, fp_offset = 48,
overflow_arg_area = 0xffff88003fdb9488,
reg_save_area = 0xffff88003fdb93c8}}
r = 1
#6 0xffffffff810311b0 in show_fault_oops (address=<value optimized out>,
error_code=<value optimized out>, regs=<value optimized out>)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:583
No locals.
#7 no_context (address=<value optimized out>,
error_code=<value optimized out>, regs=<value optimized out>)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:658
tsk = 0xffff88003fdb4000
flags = 0
sig = <value optimized out>
#8 0xffffffff81031429 in __bad_area_nosemaphore (regs=0xffff88003fdb9598,
error_code=<value optimized out>, address=18446744071582043416,
si_code=196609)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:741
tsk = 0xffff88003fdb4000
#9 0xffffffff812f9015 in ?? ()
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/kernel/entry_64.S:1383
No locals.
#10 0x0005398d646e415d in ?? ()
No symbol table info available.
#11 0x00000000d5de35ab in ?? ()
---Type <return> to continue, or q <return> to quit---
No symbol table info available.
#12 0x0000000000000002 in ?? ()
No symbol table info available.
#13 0x000c82198774ef00 in ?? ()
No symbol table info available.
#14 0x0000000000000001 in ?? ()
No symbol table info available.
#15 0xffff8800032e6040 in ?? ()
No symbol table info available.
#16 0x0000000000000202 in ?? ()
No symbol table info available.
#17 0xffff880000000000 in ?? ()
No symbol table info available.
#18 0xffffffff813a87b8 in ?? ()
__kcrctab_unregister_net_sysctl_table = 2623496682
__kstrtab_unregister_net_sysctl_table = "unregister_net_sysctl_table"
net_sysctl_ro_root = {root_list = {next = 0xffffffff814649b0,
prev = 0xffffffff814a6350}, default_set = {list = {
next = 0xffff88003fe34c08, prev = 0xffff88003d8dd808},
parent = 0xffffffff814649c0, is_seen = 0}, lookup = 0,
permissions = 0xffffffff812e4313 <net_ctl_ro_header_perms>}
sysctl_pernet_ops = {list = {next = 0xffffffff8149bcc0,
prev = 0xffffffff814a6140},
init = 0xffffffff812e4323 <sysctl_net_init>,
exit = 0xffffffff812e4375 <sysctl_net_exit>}
__ksymtab_register_net_sysctl_table = {value = 18446744071581877167,
name = 0xffffffff81422f46 "register_net_sysctl_table"}
__kstrtab_register_net_sysctl_table = "register_net_sysctl_table"
__initcall_sysctl_init4 = 0
---Type <return> to continue, or q <return> to quit---
__kcrctab_register_net_sysctl_table = 829491708
__ksymtab_register_net_sysctl_rotable = {value = 18446744071581877142,
name = 0xffffffff81422f2a "register_net_sysctl_rotable"}
__ksymtab_unregister_net_sysctl_table = {value = 18446744071581877137,
name = 0xffffffff81422f0e "unregister_net_sysctl_table"}
net_sysctl_root = {root_list = {next = 0xffffffff814a6390,
prev = 0xffffffff814649b0}, default_set = {list = {next = 0x0,
prev = 0x0}, parent = 0x0, is_seen = 0},
lookup = 0xffffffff812e42e8 <net_ctl_header_lookup>,
permissions = 0xffffffff812e4342 <net_ctl_permissions>}
__kcrctab_register_net_sysctl_rotable = 2528268687
__kstrtab_register_net_sysctl_rotable = "register_net_sysctl_rotable"
#19 0x0000000000000020 in ?? ()
No symbol table info available.
#20 0x0005398d90ee1210 in ?? ()
No symbol table info available.
#21 0x00000000f2572b01 in ?? ()
No symbol table info available.
#22 0x0005398d9113c2e5 in ?? ()
No symbol table info available.
#23 0xffffffff813a87b8 in ?? ()
__kcrctab_unregister_net_sysctl_table = 2623496682
__kstrtab_unregister_net_sysctl_table = "unregister_net_sysctl_table"
net_sysctl_ro_root = {root_list = {next = 0xffffffff814649b0,
prev = 0xffffffff814a6350}, default_set = {list = {
next = 0xffff88003fe34c08, prev = 0xffff88003d8dd808},
parent = 0xffffffff814649c0, is_seen = 0}, lookup = 0,
permissions = 0xffffffff812e4313 <net_ctl_ro_header_perms>}
sysctl_pernet_ops = {list = {next = 0xffffffff8149bcc0,
---Type <return> to continue, or q <return> to quit---
prev = 0xffffffff814a6140},
init = 0xffffffff812e4323 <sysctl_net_init>,
exit = 0xffffffff812e4375 <sysctl_net_exit>}
__ksymtab_register_net_sysctl_table = {value = 18446744071581877167,
name = 0xffffffff81422f46 "register_net_sysctl_table"}
__kstrtab_register_net_sysctl_table = "register_net_sysctl_table"
__initcall_sysctl_init4 = 0
__kcrctab_register_net_sysctl_table = 829491708
__ksymtab_register_net_sysctl_rotable = {value = 18446744071581877142,
name = 0xffffffff81422f2a "register_net_sysctl_rotable"}
__ksymtab_unregister_net_sysctl_table = {value = 18446744071581877137,
name = 0xffffffff81422f0e "unregister_net_sysctl_table"}
net_sysctl_root = {root_list = {next = 0xffffffff814a6390,
prev = 0xffffffff814649b0}, default_set = {list = {next = 0x0,
prev = 0x0}, parent = 0x0, is_seen = 0},
lookup = 0xffffffff812e42e8 <net_ctl_header_lookup>,
permissions = 0xffffffff812e4342 <net_ctl_permissions>}
__kcrctab_register_net_sysctl_rotable = 2528268687
__kstrtab_register_net_sysctl_rotable = "register_net_sysctl_rotable"
#24 0xffff8800032e6040 in ?? ()
No symbol table info available.
#25 0xffffffffffffffff in ?? ()
No symbol table info available.
#26 0xffffffff8102bddf in pvclock_clocksource_read (src=0x1)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/kernel/pvclock.c:145
ret = 0
offset = <value optimized out>
last = 1
---Type <return> to continue, or q <return> to quit---
#27 0x0000000000000000 in ?? ()
No symbol table info available.
I think we should figure out what caused the OOPS so the following
information about the printk() being stuck is not important:
(gdb) x/10i $rip
0xffffffff812f8c78 <__ticket_spin_lock+21>: movzwl (%rdi),%edx
0xffffffff812f8c7b <__ticket_spin_lock+24>:
jmp 0xffffffff812f8c72 <__ticket_spin_lock+15>
0xffffffff812f8c7d <_spin_lock+26>: retq
0xffffffff812f8c7e <__raw_write_lock>: subl $0x1000000,%ds:(%rdi)
0xffffffff812f8c85 <__raw_write_lock+7>:
je 0xffffffff812f8c8c <_write_lock+14>
0xffffffff812f8c87 <__raw_write_lock+9>: callq 0xffffffff81193b80
0xffffffff812f8c8c <_write_lock+14>: retq
0xffffffff812f8c8d <__ticket_spin_unlock>: incw (%rdi)
0xffffffff812f8c90 <raw_local_irq_restore>: mov %rsi,%rdi
0xffffffff812f8c93 <raw_local_irq_restore+3>: callq *0xffffffff814629b8
(gdb) si
0xffffffff812f8c7b 114 asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
(gdb) si
0xffffffff812f8c72 114 asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
(gdb) x/10i $rip
0xffffffff812f8c72 <__ticket_spin_lock+15>: cmp %eax,%edx
0xffffffff812f8c74 <__ticket_spin_lock+17>:
je 0xffffffff812f8c7d <_spin_lock+26>
0xffffffff812f8c76 <__ticket_spin_lock+19>: pause
0xffffffff812f8c78 <__ticket_spin_lock+21>: movzwl (%rdi),%edx
0xffffffff812f8c7b <__ticket_spin_lock+24>:
jmp 0xffffffff812f8c72 <__ticket_spin_lock+15>
0xffffffff812f8c7d <_spin_lock+26>: retq
0xffffffff812f8c7e <__raw_write_lock>: subl $0x1000000,%ds:(%rdi)
0xffffffff812f8c85 <__raw_write_lock+7>:
je 0xffffffff812f8c8c <_write_lock+14>
0xffffffff812f8c87 <__raw_write_lock+9>: callq 0xffffffff81193b80
0xffffffff812f8c8c <_write_lock+14>: retq
(gdb) info register
rax 0x1 1
rbx 0x0 0
rcx 0xfffedb34 4294892340
rdx 0x0 0
rsi 0xffff88003fdb93a8 -131940323978328
rdi 0xffffffff815d13c0 -2124606528
rbp 0xffff88003fdb93a8 0xffff88003fdb93a8
rsp 0xffff88003fdb92f0 0xffff88003fdb92f0
r8 0x2 2
r9 0xffff88003fdb4000 -131940323999744
r10 0xffff880000000000 -131941395333120
r11 0x202 514
r12 0xffffffff813a8798 -2126870632
r13 0x4 4
r14 0x3 3
r15 0x0 0
rip 0xffffffff812f8c72 0xffffffff812f8c72 <__ticket_spin_lock+15>
eflags 0x1397 [ CF PF AF SF TF IF #12 ]
cs 0xe033 57395
ss 0xe02b 57387
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
fctrl 0x37f 895
fstat 0x0 0
ftag 0xffff 65535
fiseg 0x0 0
---Type <return> to continue, or q <return> to quit---
fioff 0x0 0
foseg 0x0 0
fooff 0x0 0
fop 0x0 0
mxcsr 0x1f80 [ IM DM ZM OM UM PM ]
(gdb)
However, why did the kernel OOPS in the first place?
(gdb) frame 7
#7 no_context (address=<value optimized out>,
error_code=<value optimized out>, regs=<value optimized out>)
at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:658
658 show_fault_oops(regs, error_code, address);
(gdb) p/x *regs
$10 = {r15 = 0x5398d646e415d, r14 = 0xd5de35ab, r13 = 0x2,
r12 = 0xc82198774ef00, bp = 0x1, bx = 0xffff8800032e6040, r11 = 0x202,
r10 = 0xffff880000000000, r9 = 0xffffffff813a87b8, r8 = 0x20,
ax = 0x5398d90ee1210, cx = 0xf2572b01, dx = 0x5398d9113c2e5,
si = 0xffffffff813a87b8, di = 0xffff8800032e6040,
orig_ax = 0xffffffffffffffff, ip = 0xffffffff8102bddf, cs = 0x10000e030,
flags = 0x10002, sp = 0xffff88003fdb9640, ss = 0xe02b}
(gdb) p error_code
$11 = 3
(gdb) p/x address
$22 = 0xffffffff8130cd18
(gdb) x/6i regs->ip
0xffffffff8102bddf <__cmpxchg>:
cmpxchg %rdx,%ds:0x2e0f30(%rip) # 0xffffffff8130cd18
0xffffffff8102bde8 <pvclock_clocksource_read+122>: cmp %rdx,%rax
0xffffffff8102bdeb <pvclock_clocksource_read+125>:
jne 0xffffffff8102bdda <pvclock_clocksource_read+108>
0xffffffff8102bded <pvclock_clocksource_read+127>: pop %rcx
0xffffffff8102bdee <pvclock_clocksource_read+128>: pop %rbx
0xffffffff8102bdef <pvclock_clocksource_read+129>: pop %rbp
(gdb) shell objdump -axdt /local/lindi/vmlinux-2.6.32-5-amd64 | grep ffffffff8130cd18
ffffffff8130cd18 l O .rodata 0000000000000008 last_value
ffffffff8102bdd3: 48 8b 05 3e 0f 2e 00 mov 0x2e0f3e(%rip),%rax # ffffffff8130cd18 <last_value>
ffffffff8102bddf: f0 48 0f b1 15 30 0f lock cmpxchg %rdx,0x2e0f30(%rip) # ffffffff8130cd18 <last_value>
Reply to: