[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#588509: linux-image-2.6.32-5-amd64: hangs under Xen after Write protecting the kernel read-only data: 4220k



Here's some debugging output. I installed
linux-image-2.6.32-5-amd64-dbg and linux-source-2.6.32 and then used

sudo gdbserver-xen 127.0.0.1:9999 --attach 37

where 37 is the domid printed by xm list and

sudo gdb /local/lindi/vmlinux-2.6.32-5-amd64
(gdb) target remote 127.0.0.1:9999

on the opensuse dom0 to attach gdb to linux. It seems that Linux (-17)
is stuck trying to print an OOPS:

(gdb) info threads
* 1 Remote target  0xffffffff812f8c78 in __ticket_spin_lock (
    lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:114

(gdb) bt full
#0  0xffffffff812f8c78 in __ticket_spin_lock (lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:114
        inc = 1
        tmp = 0
#1  __raw_spin_lock (lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:190
No locals.
#2  __spin_lock (lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/include/linux/spinlock_api_smp.h:337
No locals.
#3  _spin_lock (lock=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/spinlock.c:138
No locals.
#4  0xffffffff8104d344 in vprintk (
    fmt=0xffffffff813a8798 "<1>BUG: unable to handle kernel ", 
    args=0xffff88003fdb93a8)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/printk.c:705
        printed_len = 0
        current_log_level = 4
        flags = 0
        p = <value optimized out>
#5  0xffffffff812f6eec in printk (fmt=0xffffffff815d13c0 "")
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/printk.c:595
---Type <return> to continue, or q <return> to quit---
        args = {{gp_offset = 8, fp_offset = 48, 
            overflow_arg_area = 0xffff88003fdb9488, 
            reg_save_area = 0xffff88003fdb93c8}}
        r = 1
#6  0xffffffff810311b0 in show_fault_oops (address=<value optimized out>, 
    error_code=<value optimized out>, regs=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:583
No locals.
#7  no_context (address=<value optimized out>, 
    error_code=<value optimized out>, regs=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:658
        tsk = 0xffff88003fdb4000
        flags = 0
        sig = <value optimized out>
#8  0xffffffff81031429 in __bad_area_nosemaphore (regs=0xffff88003fdb9598, 
    error_code=<value optimized out>, address=18446744071582043416, 
    si_code=196609)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:741
        tsk = 0xffff88003fdb4000
#9  0xffffffff812f9015 in ?? ()
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/kernel/entry_64.S:1383
No locals.
#10 0x0005398d646e415d in ?? ()
No symbol table info available.
#11 0x00000000d5de35ab in ?? ()
---Type <return> to continue, or q <return> to quit---
No symbol table info available.
#12 0x0000000000000002 in ?? ()
No symbol table info available.
#13 0x000c82198774ef00 in ?? ()
No symbol table info available.
#14 0x0000000000000001 in ?? ()
No symbol table info available.
#15 0xffff8800032e6040 in ?? ()
No symbol table info available.
#16 0x0000000000000202 in ?? ()
No symbol table info available.
#17 0xffff880000000000 in ?? ()
No symbol table info available.
#18 0xffffffff813a87b8 in ?? ()
        __kcrctab_unregister_net_sysctl_table = 2623496682
        __kstrtab_unregister_net_sysctl_table = "unregister_net_sysctl_table"
        net_sysctl_ro_root = {root_list = {next = 0xffffffff814649b0, 
            prev = 0xffffffff814a6350}, default_set = {list = {
              next = 0xffff88003fe34c08, prev = 0xffff88003d8dd808}, 
            parent = 0xffffffff814649c0, is_seen = 0}, lookup = 0, 
          permissions = 0xffffffff812e4313 <net_ctl_ro_header_perms>}
        sysctl_pernet_ops = {list = {next = 0xffffffff8149bcc0, 
            prev = 0xffffffff814a6140}, 
          init = 0xffffffff812e4323 <sysctl_net_init>, 
          exit = 0xffffffff812e4375 <sysctl_net_exit>}
        __ksymtab_register_net_sysctl_table = {value = 18446744071581877167, 
          name = 0xffffffff81422f46 "register_net_sysctl_table"}
        __kstrtab_register_net_sysctl_table = "register_net_sysctl_table"
        __initcall_sysctl_init4 = 0
---Type <return> to continue, or q <return> to quit---
        __kcrctab_register_net_sysctl_table = 829491708
        __ksymtab_register_net_sysctl_rotable = {value = 18446744071581877142, 
          name = 0xffffffff81422f2a "register_net_sysctl_rotable"}
        __ksymtab_unregister_net_sysctl_table = {value = 18446744071581877137, 
          name = 0xffffffff81422f0e "unregister_net_sysctl_table"}
        net_sysctl_root = {root_list = {next = 0xffffffff814a6390, 
            prev = 0xffffffff814649b0}, default_set = {list = {next = 0x0, 
              prev = 0x0}, parent = 0x0, is_seen = 0}, 
          lookup = 0xffffffff812e42e8 <net_ctl_header_lookup>, 
          permissions = 0xffffffff812e4342 <net_ctl_permissions>}
        __kcrctab_register_net_sysctl_rotable = 2528268687
        __kstrtab_register_net_sysctl_rotable = "register_net_sysctl_rotable"
#19 0x0000000000000020 in ?? ()
No symbol table info available.
#20 0x0005398d90ee1210 in ?? ()
No symbol table info available.
#21 0x00000000f2572b01 in ?? ()
No symbol table info available.
#22 0x0005398d9113c2e5 in ?? ()
No symbol table info available.
#23 0xffffffff813a87b8 in ?? ()
        __kcrctab_unregister_net_sysctl_table = 2623496682
        __kstrtab_unregister_net_sysctl_table = "unregister_net_sysctl_table"
        net_sysctl_ro_root = {root_list = {next = 0xffffffff814649b0, 
            prev = 0xffffffff814a6350}, default_set = {list = {
              next = 0xffff88003fe34c08, prev = 0xffff88003d8dd808}, 
            parent = 0xffffffff814649c0, is_seen = 0}, lookup = 0, 
          permissions = 0xffffffff812e4313 <net_ctl_ro_header_perms>}
        sysctl_pernet_ops = {list = {next = 0xffffffff8149bcc0, 
---Type <return> to continue, or q <return> to quit---
            prev = 0xffffffff814a6140}, 
          init = 0xffffffff812e4323 <sysctl_net_init>, 
          exit = 0xffffffff812e4375 <sysctl_net_exit>}
        __ksymtab_register_net_sysctl_table = {value = 18446744071581877167, 
          name = 0xffffffff81422f46 "register_net_sysctl_table"}
        __kstrtab_register_net_sysctl_table = "register_net_sysctl_table"
        __initcall_sysctl_init4 = 0
        __kcrctab_register_net_sysctl_table = 829491708
        __ksymtab_register_net_sysctl_rotable = {value = 18446744071581877142, 
          name = 0xffffffff81422f2a "register_net_sysctl_rotable"}
        __ksymtab_unregister_net_sysctl_table = {value = 18446744071581877137, 
          name = 0xffffffff81422f0e "unregister_net_sysctl_table"}
        net_sysctl_root = {root_list = {next = 0xffffffff814a6390, 
            prev = 0xffffffff814649b0}, default_set = {list = {next = 0x0, 
              prev = 0x0}, parent = 0x0, is_seen = 0}, 
          lookup = 0xffffffff812e42e8 <net_ctl_header_lookup>, 
          permissions = 0xffffffff812e4342 <net_ctl_permissions>}
        __kcrctab_register_net_sysctl_rotable = 2528268687
        __kstrtab_register_net_sysctl_rotable = "register_net_sysctl_rotable"
#24 0xffff8800032e6040 in ?? ()
No symbol table info available.
#25 0xffffffffffffffff in ?? ()
No symbol table info available.
#26 0xffffffff8102bddf in pvclock_clocksource_read (src=0x1)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/kernel/pvclock.c:145
        ret = 0
        offset = <value optimized out>
        last = 1
---Type <return> to continue, or q <return> to quit---
#27 0x0000000000000000 in ?? ()
No symbol table info available.

We probably should figure out what caused the OOPS so the following
information about printk being stuck is probably not very relevant:

(gdb) x/10i $rip
0xffffffff812f8c78 <__ticket_spin_lock+21>:	movzwl (%rdi),%edx
0xffffffff812f8c7b <__ticket_spin_lock+24>:	
    jmp    0xffffffff812f8c72 <__ticket_spin_lock+15>
0xffffffff812f8c7d <_spin_lock+26>:	retq   
0xffffffff812f8c7e <__raw_write_lock>:	subl   $0x1000000,%ds:(%rdi)
0xffffffff812f8c85 <__raw_write_lock+7>:	
    je     0xffffffff812f8c8c <_write_lock+14>
0xffffffff812f8c87 <__raw_write_lock+9>:	callq  0xffffffff81193b80
0xffffffff812f8c8c <_write_lock+14>:	retq   
0xffffffff812f8c8d <__ticket_spin_unlock>:	incw   (%rdi)
0xffffffff812f8c90 <raw_local_irq_restore>:	mov    %rsi,%rdi
0xffffffff812f8c93 <raw_local_irq_restore+3>:	callq  *0xffffffff814629b8
(gdb) si
0xffffffff812f8c7b	114		asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
(gdb) si
0xffffffff812f8c72	114		asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
(gdb) x/10i $rip
0xffffffff812f8c72 <__ticket_spin_lock+15>:	cmp    %eax,%edx
0xffffffff812f8c74 <__ticket_spin_lock+17>:	
    je     0xffffffff812f8c7d <_spin_lock+26>
0xffffffff812f8c76 <__ticket_spin_lock+19>:	pause  
0xffffffff812f8c78 <__ticket_spin_lock+21>:	movzwl (%rdi),%edx
0xffffffff812f8c7b <__ticket_spin_lock+24>:	
    jmp    0xffffffff812f8c72 <__ticket_spin_lock+15>
0xffffffff812f8c7d <_spin_lock+26>:	retq   
0xffffffff812f8c7e <__raw_write_lock>:	subl   $0x1000000,%ds:(%rdi)
0xffffffff812f8c85 <__raw_write_lock+7>:	
    je     0xffffffff812f8c8c <_write_lock+14>
0xffffffff812f8c87 <__raw_write_lock+9>:	callq  0xffffffff81193b80
0xffffffff812f8c8c <_write_lock+14>:	retq   
(gdb) info register
rax            0x1	1
rbx            0x0	0
rcx            0xfffedb34	4294892340
rdx            0x0	0
rsi            0xffff88003fdb93a8	-131940323978328
rdi            0xffffffff815d13c0	-2124606528
rbp            0xffff88003fdb93a8	0xffff88003fdb93a8
rsp            0xffff88003fdb92f0	0xffff88003fdb92f0
r8             0x2	2
r9             0xffff88003fdb4000	-131940323999744
r10            0xffff880000000000	-131941395333120
r11            0x202	514
r12            0xffffffff813a8798	-2126870632
r13            0x4	4
r14            0x3	3
r15            0x0	0
rip            0xffffffff812f8c72	0xffffffff812f8c72 <__ticket_spin_lock+15>
eflags         0x1397	[ CF PF AF SF TF IF #12 ]
cs             0xe033	57395
ss             0xe02b	57387
ds             0x0	0
es             0x0	0
fs             0x0	0
gs             0x0	0
fctrl          0x37f	895
fstat          0x0	0
ftag           0xffff	65535
fiseg          0x0	0
---Type <return> to continue, or q <return> to quit---
fioff          0x0	0
foseg          0x0	0
fooff          0x0	0
fop            0x0	0
mxcsr          0x1f80	[ IM DM ZM OM UM PM ]
(gdb) 

However, it seems that the OOPS is caused by a write (error_code==3)
to variable called "last_value" that is in read-only memory (.rodata):

(gdb) frame 7
#7  no_context (address=<value optimized out>,
    error_code=<value optimized out>, regs=<value optimized out>)
    at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:658
658             show_fault_oops(regs, error_code, address);
(gdb) p/x *regs
$10 = {r15 = 0x5398d646e415d, r14 = 0xd5de35ab, r13 = 0x2,
  r12 = 0xc82198774ef00, bp = 0x1, bx = 0xffff8800032e6040, r11 = 0x202,
  r10 = 0xffff880000000000, r9 = 0xffffffff813a87b8, r8 = 0x20,
  ax = 0x5398d90ee1210, cx = 0xf2572b01, dx = 0x5398d9113c2e5,
  si = 0xffffffff813a87b8, di = 0xffff8800032e6040,
  orig_ax = 0xffffffffffffffff, ip = 0xffffffff8102bddf, cs = 0x10000e030,
  flags = 0x10002, sp = 0xffff88003fdb9640, ss = 0xe02b}
(gdb) p error_code
$11 = 3
(gdb) p/x address
$22 = 0xffffffff8130cd18
(gdb) x/6i regs->ip
0xffffffff8102bddf <__cmpxchg>:
    cmpxchg %rdx,%ds:0x2e0f30(%rip)        # 0xffffffff8130cd18
0xffffffff8102bde8 <pvclock_clocksource_read+122>:      cmp    %rdx,%rax
0xffffffff8102bdeb <pvclock_clocksource_read+125>:
    jne    0xffffffff8102bdda <pvclock_clocksource_read+108>
0xffffffff8102bded <pvclock_clocksource_read+127>:      pop    %rcx
0xffffffff8102bdee <pvclock_clocksource_read+128>:      pop    %rbx
0xffffffff8102bdef <pvclock_clocksource_read+129>:      pop    %rbp

(gdb) shell objdump -axdt /local/lindi/vmlinux-2.6.32-5-amd64 | grep ffffffff8130cd18
ffffffff8130cd18 l     O .rodata        0000000000000008 last_value
ffffffff8102bdd3:       48 8b 05 3e 0f 2e 00    mov    0x2e0f3e(%rip),%rax        # ffffffff8130cd18 <last_value>
ffffffff8102bddf:       f0 48 0f b1 15 30 0f    lock cmpxchg %rdx,0x2e0f30(%rip)        # ffffffff8130cd18 <last_value>

What is interesting is that -15 does not contain such a variable:

$ objdump -axdt 15/usr/lib/debug/boot/vmlinux-2.6.32-5-amd64|grep last_value               ffffffff81671780 l     O .bss   0000000000000001 last_value.26980
ffffffff811f5947:       0f b6 05 32 be 47 00    movzbl 0x47be32(%rip),%eax        # ffffffff81671780 <last_value.26980>
ffffffff811f5955:       88 15 25 be 47 00       mov    %dl,0x47be25(%rip)        # ffffffff81671780 <last_value.26980>

$ objdump -axdt 17/usr/lib/debug/boot/vmlinux-2.6.32-5-amd64|grep last_value
ffffffff8130cd18 l     O .rodata        0000000000000008 last_value
ffffffff81675bc0 l     O .bss   0000000000000001 last_value.26980
ffffffff8102bdd3:       48 8b 05 3e 0f 2e 00    mov    0x2e0f3e(%rip),%rax        # ffffffff8130cd18 <last_value>
ffffffff8102bddf:       f0 48 0f b1 15 30 0f    lock cmpxchg %rdx,0x2e0f30(%rip)        # ffffffff8130cd18 <last_value>
ffffffff811f49f7:       0f b6 05 c2 11 48 00    movzbl 0x4811c2(%rip),%eax        # ffffffff81675bc0 <last_value.26980>
ffffffff811f4a05:       88 15 b5 11 48 00       mov    %dl,0x4811b5(%rip)        # ffffffff81675bc0 <last_value.26980>

interdiff -z linux-2.6_2.6.32-15.diff.gz linux-2.6_2.6.32-17.diff.gz

shows

++static atomic64_t last_value = ATOMIC64_INIT(0);
++
+ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+ {
+       struct pvclock_shadow_time shadow;
+       unsigned version;
+       cycle_t ret, offset;
++      u64 last;
+
+       do {
+               version = pvclock_get_time_values(&shadow, src);
+@@ -123,6 +126,27 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+               barrier();
+       } while (version != src->version);
+
++      /*
++       * Assumption here is that last_value, a global accumulator, always goes
++       * forward. If we are less than that, we should not be much smaller.
++       * We assume there is an error marging we're inside, and then the correction
++       * does not sacrifice accuracy.
++       *
++       * For reads: global may have changed between test and return,
++       * but this means someone else updated poked the clock at a later time.
++       * We just need to make sure we are not seeing a backwards event.
++       *
++       * For updates: last_value = ret is not enough, since two vcpus could be
++       * updating at the same time, and one of them could be slightly behind,
++       * making the assumption that last_value always go forward fail to hold.
++       */
++      last = atomic64_read(&last_value);
++      do {
++              if (ret < last)
++                      return last;
++              last = atomic64_cmpxchg(&last_value, last, ret);
++      } while (unlikely(last != ret));
++
+       return ret;
+ }
+

-Timo



Reply to: