[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#534880: linux-image-2.6.26-2-xen-686: domU hang and are unresponsive



Got some more info by dumping the hung machine registers with
'xm debug-keys d' and 'xm dmesg':

(XEN) *** Dumping CPU3 host state: ***
(XEN) ----[ Xen-3.2-1  x86_64  debug=n  Not tainted ]----
(XEN) CPU:    3
(XEN) RIP:    e008:[<ffff828c8010cd51>] __dump_execstate+0x1/0x60
(XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor
(XEN) rax: 0000000000000001   rbx: ffff828c80224780   rcx: 0000000000000003
(XEN) rdx: ffff8300bee1fd28   rsi: ffff828c8010cd50   rdi: 0000000000000000
(XEN) rbp: 000000000000011b   rsp: ffff8300bee0ff08   r8:  0000000000000000
(XEN) r9:  000000000107bb33   r10: 0000000000000006   r11: 0000000000000246
(XEN) r12: 00000000000000d2   r13: ffffffff8053b404   r14: 0000000000000006
(XEN) r15: ffffffffff5f7000   cr0: 000000008005003b   cr4: 00000000000026b0
(XEN) cr3: 00000004d3f75000   cr2: ffff8803ebb1feb0
(XEN) ds: 002b   es: 002b   fs: 0000   gs: 0000   ss: 0000   cs: e008
(XEN) Xen stack trace from rsp=ffff8300bee0ff08:
(XEN)    ffff828c80224780 ffff828c8013c228 ffff880001c45a20 ffff828c8012deaa
(XEN)    ffffffffff5f7000 0000000000000006 ffffffff8053b404 00000000000000d2
(XEN)    000000000000011b ffff880001c45a20 0000000000000246 0000000000000006
(XEN)    000000000107bb33 0000000000000000 8000000000000000 000000000666399d
(XEN)    0000000000000000 8000000107bb3281 8000000107bb3281 000000fb00000000
(XEN)    ffffffff8020e3d0 000000000000e033 0000000000000202 ffff88003fdc7dc8
(XEN)    000000000000e02b 5555555555555555 5555555555555555 5555555555555555
(XEN)    5555555555555555 5555555500000003 ffff8300bee30080
(XEN) Xen call trace:
(XEN)    [<ffff828c8010cd51>] __dump_execstate+0x1/0x60
(XEN)    [<ffff828c8013c228>] smp_call_function_interrupt+0x58/0xb0
(XEN)    [<ffff828c8012deaa>] call_function_interrupt+0x2a/0x30
(XEN)
(XEN) *** Dumping CPU3 guest state: ***
(XEN) ----[ Xen-3.2-1  x86_64  debug=n  Not tainted ]----
(XEN) CPU:    3
(XEN) RIP:    e033:[<ffffffff8020e3d0>]
(XEN) RFLAGS: 0000000000000202   CONTEXT: guest
(XEN) rax: 8000000000000000   rbx: ffff880001c45a20   rcx: 000000000666399d
(XEN) rdx: 0000000000000000   rsi: 8000000107bb3281   rdi: 8000000107bb3281
(XEN) rbp: 000000000000011b   rsp: ffff88003fdc7dc8   r8:  0000000000000000
(XEN) r9:  000000000107bb33   r10: 0000000000000006   r11: 0000000000000246
(XEN) r12: 00000000000000d2   r13: ffffffff8053b404   r14: 0000000000000006
(XEN) r15: ffffffffff5f7000   cr0: 000000008005003b   cr4: 00000000000026b0
(XEN) cr3: 00000004d3f75000   cr2: 00000000006c4e5c
(XEN) ds: 002b   es: 002b   fs: 0000   gs: 0000   ss: e02b   cs: e033
(XEN) Guest stack trace from rsp=ffff88003fdc7dc8:
(XEN)    ffff880001c45a20 ffffffff8037d2ce 0000000000000000 0000000000000001
(XEN)    0000000000000000 0000001d01c44540 0000000000000006 ffffffff80380202
(XEN)    ffffffffff5f7000 ffffffff8053b404 000000000000d2ce 0000000000000000
(XEN)    ffff880001c43000 ffffffff804364ee 0000000000000006 ffff88003fd43300
(XEN)    0000000000000000 ffffffff8020ea9b 0000000000000000 0000000000000006
(XEN)    0000000000000001 0000000601c44ea0 0000000000000000 00000000ffffffff
(XEN)    0000000000000061 000000003fdb8000 ffff880001c44f60 ffffffff80508980
(XEN)    ffffffff8026150a ffff88003fd43300 0000000000000000 0000000000000000
(XEN)    000000000000011c 0000000000000006 ffffffffff5f7000 ffffffff8025f11a
(XEN)    0000000000000001 ffffffff80544380 000000000000011c ffff88003fd43300
(XEN)    ffffffff805443d0 ffffffff802605d3 000000000000011c 000000000000011c
(XEN)    0000000000000000 000000000000001e 0000000000000001 ffffffff8020e146
(XEN)    0000000107bacbb4 000000000000001e 0000000000000000 ffffffff8037dbbc
(XEN)    ffff88003fc75e58 0000000000000000 ffffffffff5f7180 0000000000000006
(XEN)    ffff88003fc75e58 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 ffffffff8020bbde ffff88003fc75e58 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000

It turns out the guest is looping on the following instructions:

      9 (XEN) RIP:    e033:[<ffffffff8020e3c7>]
     21 (XEN) RIP:    e033:[<ffffffff8020e3cd>]
     13 (XEN) RIP:    e033:[<ffffffff8020e3d0>]
     11 (XEN) RIP:    e033:[<ffffffff8020e3d2>]
     52 (XEN) RIP:    e033:[<ffffffff8020e3d4>]

These addreses belong to the begining of the jiffies_to_st function
where an inline representation of read_seqbegin is placed:

/* Start of read calculation -- fetch last complete writer token */
static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
{
        unsigned ret;

repeat:
        ret = sl->sequence;
        smp_rmb();
        if (unlikely(ret & 1)) {
                cpu_relax();
                goto repeat;
        }

        return ret;
}

So it seems that somethin is holding the xtime_lock and all the CPUs are
looping while trying to read it:

xtime_lock = $3 = {
  sequence = 107362717,
  lock = {
    raw_lock = {
      slock = 0xD6CE
    }
  }
}

-- 
Valentin



Reply to: