Your message dated Wed, 11 Jan 2012 06:14:08 -0600 with message-id <20120111121408.GD32173@burratino> and subject line Re: XFS lockups [task xfssyncd blocked for more than 120 seconds] has caused the Debian Bug report #655353, regarding XFS lockups on 2.6.32 [task xfssyncd blocked for more than 120 seconds] to be marked as done. This means that you claim that the problem has been dealt with. If this is not the case it is now your responsibility to reopen the Bug report if necessary, and/or fix the problem forthwith. (NB: If you are a system administrator and have no idea what this message is talking about, this may indicate a serious mail system misconfiguration somewhere. Please contact owner@bugs.debian.org immediately.) -- 655353: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=655353 Debian Bug Tracking System Contact owner@bugs.debian.org with problems
--- Begin Message ---
- To: Debian Bug Tracking System <submit@bugs.debian.org>
- Subject: XFS lockups on 2.6.32 [task xfssyncd blocked for more than 120 seconds]
- From: Carlos Alberto Lopez Perez <clopez@igalia.com>
- Date: Tue, 10 Jan 2012 16:36:52 +0100
- Message-id: <[🔎] 4F0C5B14.7070701@igalia.com>
Source: linux-2.6 Version: 2.6.32-39 Severity: normal Hello, Since the upgrade from lenny to squeeze I am getting in several servers random kernel lockups related to the XFS filesystem. The errors looks the same on all servers. All servers are running Debian kernel 2.6.32-39 Server 1 running Debian/Squeeze with linux-image-2.6.32-5-686/2.6.32-39 ----------------------------------------------------------------------------------------- [539160.520045] INFO: task xfssyncd:985 blocked for more than 120 seconds. [539160.520077] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [539160.520123] xfssyncd D f9197020 0 985 2 0x00000000 [539160.520129] c340fb40 00000046 f6c32200 f9197020 00000000 c141d100 c141d100 c14186ac [539160.520137] c340fcfc c2108100 00000001 0000a0bf 00000000 00000000 00000010 f6a7f280 [539160.520145] c21036ac c340fcfc 0806c251 f6a38b90 00000082 00000000 00000000 00000000 [539160.520154] Call Trace: [539160.520179] [<c126d9b9>] ? schedule_timeout+0x20/0xb0 [539160.520184] [<c1138433>] ? __lookup_tag+0x8e/0xee [539160.520189] [<c11385b1>] ? radix_tree_gang_lookup_tag+0x8a/0xd5 [539160.520193] [<c126d8c2>] ? wait_for_common+0xa4/0x100 [539160.520199] [<c102d500>] ? default_wake_function+0x0/0x8 [539160.520225] [<f8324433>] ? xfs_reclaim_inode+0x85/0xc8 [xfs] [539160.520259] [<f8324c26>] ? xfs_inode_ag_walk+0x73/0xbd [xfs] [539160.520280] [<f83243ae>] ? xfs_reclaim_inode+0x0/0xc8 [xfs] [539160.520302] [<f8324cc8>] ? xfs_inode_ag_iterator+0x58/0x8f [xfs] [539160.520324] [<f83243ae>] ? xfs_reclaim_inode+0x0/0xc8 [xfs] [539160.520345] [<f8324de4>] ? xfs_reclaim_inodes+0x12/0x16 [xfs] [539160.520367] [<f8324e0b>] ? xfs_sync_worker+0x23/0x5c [xfs] [539160.520388] [<f83246d7>] ? xfssyncd+0x134/0x17d [xfs] [539160.520410] [<f83245a3>] ? xfssyncd+0x0/0x17d [xfs] [539160.520415] [<c1043dfc>] ? kthread+0x61/0x66 [539160.520418] [<c1043d9b>] ? kthread+0x0/0x66 [539160.520423] [<c1003d47>] ? kernel_thread_helper+0x7/0x10 Server 2 running Debian/Squeeze with linux-image-2.6.32-5-vserver-amd64/2.6.32-39 ----------------------------------------------------------------------------------------- [864600.444518] INFO: task xfssyncd:933 blocked for more than 120 seconds. [864600.444563] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [864600.444608] xfssyncd D 0000000000000000 0 933 2 0x00000000 [864600.444612] ffff88022f06b980 0000000000000046 0000000000000000 ffff880050925e40 [864600.444617] 0000000000002000 ffffffff8118af2b 000000000000f9e0 ffff88022c607fd8 [864600.444620] 0000000000015780 0000000000015780 ffff88022f195640 ffff88022f195938 [864600.444625] Call Trace: [864600.444632] [<ffffffff8118af2b>] ? generic_make_request+0x299/0x2f9 [864600.444637] [<ffffffff8130a2fd>] ? schedule_timeout+0x2e/0xdd [864600.444659] [<ffffffffa01cea3d>] ? _xfs_buf_ioapply+0x204/0x22f [xfs] [864600.444663] [<ffffffff8119e9c8>] ? __lookup_tag+0xad/0x11b [864600.444666] [<ffffffff8130a1b6>] ? wait_for_common+0xde/0x15b [864600.444671] [<ffffffff8104a705>] ? default_wake_function+0x0/0x9 [864600.444685] [<ffffffffa01d6058>] ? xfs_reclaim_inode+0x95/0xe0 [xfs] [864600.444698] [<ffffffffa01d6975>] ? xfs_inode_ag_walk+0x92/0xef [xfs] [864600.444711] [<ffffffffa01d5fc3>] ? xfs_reclaim_inode+0x0/0xe0 [xfs] [864600.444724] [<ffffffffa01d6a43>] ? xfs_inode_ag_iterator+0x71/0xb2 [xfs] [864600.444737] [<ffffffffa01d5fc3>] ? xfs_reclaim_inode+0x0/0xe0 [xfs] [864600.444751] [<ffffffffa01d6bd0>] ? xfs_sync_worker+0x26/0x5f [xfs] [864600.444764] [<ffffffffa01d6338>] ? xfssyncd+0x150/0x1bb [xfs] [864600.444777] [<ffffffffa01d61e8>] ? xfssyncd+0x0/0x1bb [xfs] [864600.444780] [<ffffffff81065bf5>] ? kthread+0x79/0x81 [864600.444784] [<ffffffff81011baa>] ? child_rip+0xa/0x20 [864600.444788] [<ffffffff8118bd3d>] ? generic_unplug_device+0x0/0x34 [864600.444791] [<ffffffff81065b7c>] ? kthread+0x0/0x81 [864600.444794] [<ffffffff81011ba0>] ? child_rip+0x0/0x20 Server 3 running Debian/Squeeze with linux-image-2.6.32-5-vserver-amd64/2.6.32-39 ----------------------------------------------------------------------------------------- [ 5520.764046] INFO: task xfssyncd:1376 blocked for more than 120 seconds. [ 5520.764080] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 5520.764126] xfssyncd D 0000000000000000 0 1376 2 0x00000000 [ 5520.764132] ffff88007fb5d640 0000000000000046 0000000000000000 ffff88001f342540 [ 5520.764138] 00000000000000ff ffffffff8118af2b 000000000000f9e0 ffff88007e291fd8 [ 5520.764143] 0000000000015780 0000000000015780 ffff880037baab20 ffff880037baae18 [ 5520.764148] Call Trace: [ 5520.764159] [<ffffffff8118af2b>] ? generic_make_request+0x299/0x2f9 [ 5520.764166] [<ffffffff8130a2fd>] ? schedule_timeout+0x2e/0xdd [ 5520.764200] [<ffffffffa0268a3d>] ? _xfs_buf_ioapply+0x204/0x22f [xfs] [ 5520.764205] [<ffffffff8119e9c8>] ? __lookup_tag+0xad/0x11b [ 5520.764210] [<ffffffff8130a1b6>] ? wait_for_common+0xde/0x15b [ 5520.764216] [<ffffffff8104a705>] ? default_wake_function+0x0/0x9 [ 5520.764234] [<ffffffffa0270058>] ? xfs_reclaim_inode+0x95/0xe0 [xfs] [ 5520.764251] [<ffffffffa0270975>] ? xfs_inode_ag_walk+0x92/0xef [xfs] [ 5520.764268] [<ffffffffa026ffc3>] ? xfs_reclaim_inode+0x0/0xe0 [xfs] [ 5520.764285] [<ffffffffa0270a43>] ? xfs_inode_ag_iterator+0x71/0xb2 [xfs] [ 5520.764301] [<ffffffffa026ffc3>] ? xfs_reclaim_inode+0x0/0xe0 [xfs] [ 5520.764318] [<ffffffffa0270bd0>] ? xfs_sync_worker+0x26/0x5f [xfs] [ 5520.764335] [<ffffffffa0270338>] ? xfssyncd+0x150/0x1bb [xfs] [ 5520.764351] [<ffffffffa02701e8>] ? xfssyncd+0x0/0x1bb [xfs] [ 5520.764356] [<ffffffff81065bf5>] ? kthread+0x79/0x81 [ 5520.764362] [<ffffffff81011baa>] ? child_rip+0xa/0x20 [ 5520.764366] [<ffffffff81065b7c>] ? kthread+0x0/0x81 [ 5520.764369] [<ffffffff81011ba0>] ? child_rip+0x0/0x20 Server 4 running Debian/Squeeze with linux-image-2.6.32-5-vserver-amd64/2.6.32-39 ----------------------------------------------------------------------------------------- [279117.747342] INFO: task xfssyncd:1985 blocked for more than 120 seconds. [279117.747401] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [279117.747463] xfssyncd D 0000000000000000 0 1985 2 0x00000000 [279117.747466] ffff88023f06c0b0 0000000000000046 0000000000800012 ffffffff8118a673 [279117.747468] ffff88023f0a6c00 000000000000f9e0 ffff88023cd93fd8 0000000000015780 [279117.747470] 0000000000015780 ffff88023c1547e0 ffff88023c154ad8 0000000298bc2318 [279117.747473] Call Trace: [279117.747479] [<ffffffff8118a673>] ? generic_make_request+0x299/0x2f9 [279117.747483] [<ffffffff810114ce>] ? common_interrupt+0xe/0x13 [279117.747501] [<ffffffffa01b45f4>] ? xfs_ioend_wait+0x84/0x9c [xfs] [279117.747503] [<ffffffff8106599e>] ? autoremove_wake_function+0x0/0x2e [279117.747513] [<ffffffffa0199460>] ? xfs_ilock_nowait+0x32/0x92 [xfs] [279117.747520] [<ffffffffa01bce36>] ? xfs_sync_inode_data+0x91/0xa8 [xfs] [279117.747528] [<ffffffffa01bcf61>] ? xfs_inode_ag_walk+0x92/0xef [xfs] [279117.747535] [<ffffffffa01bcda5>] ? xfs_sync_inode_data+0x0/0xa8 [xfs] [279117.747542] [<ffffffffa01bd02f>] ? xfs_inode_ag_iterator+0x71/0xb2 [xfs] [279117.747549] [<ffffffffa01bcda5>] ? xfs_sync_inode_data+0x0/0xa8 [xfs] [279117.747557] [<ffffffffa01bd2fe>] ? xfs_sync_data+0x20/0x42 [xfs] [279117.747564] [<ffffffffa01bd344>] ? xfs_flush_inodes_work+0x24/0x31 [xfs] [279117.747571] [<ffffffffa01bc924>] ? xfssyncd+0x150/0x1bb [xfs] [279117.747578] [<ffffffffa01bc7d4>] ? xfssyncd+0x0/0x1bb [xfs] [279117.747580] [<ffffffff810656d1>] ? kthread+0x79/0x81 [279117.747582] [<ffffffff81011baa>] ? child_rip+0xa/0x20 [279117.747584] [<ffffffff81065658>] ? kthread+0x0/0x81 [279117.747585] [<ffffffff81011ba0>] ? child_rip+0x0/0x20 A bit of googling [1] suggests that perhaps the commit 17b3847 [2] could have fixed this Also I can say that I have switched some weeks ago one of this servers to a vainilla/vserver 3.1 Kernel and since then I didn't saw this lockup anymore. ----------------------------------------------------------------------------------------- [1] http://comments.gmane.org/gmane.comp.file-systems.xfs.general/41907 [2] https://git.kernel.org/linus/17b3847 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Carlos Alberto Lopez Perez http://neutrino.es Igalia - Free Software Engineering http://www.igalia.com ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Attachment: signature.asc
Description: OpenPGP digital signature
--- End Message ---
--- Begin Message ---
- To: Carlos Alberto Lopez Perez <clopez@igalia.com>
- Cc: 655353-done@bugs.debian.org
- Subject: Re: XFS lockups [task xfssyncd blocked for more than 120 seconds]
- From: Jonathan Nieder <jrnieder@gmail.com>
- Date: Wed, 11 Jan 2012 06:14:08 -0600
- Message-id: <20120111121408.GD32173@burratino>
- In-reply-to: <[🔎] 4F0C5B14.7070701@igalia.com>
- References: <[🔎] 4F0C5B14.7070701@igalia.com>
Version: 3.1.8-2 Hi Carlos, Carlos Alberto Lopez Perez wrote: > Since the upgrade from lenny to squeeze I am getting in several > servers random kernel lockups related to the XFS filesystem. The > errors looks the same on all servers. All servers are running Debian > kernel 2.6.32-39 Thanks for reporting this. Are the hangs noticeable when using the system? What is happening in other tasks? (It should be possible to tell by using sysrq + w when in that state; you might have to enable it first using echo 1 >/proc/sys/kernel/sysrq # or echo 446 >/proc/sys/kernel/sysrq or you can trigger it directly with "echo w >/proc/sysrq-trigger" if able to run commands. See Documentation/sysrq.txt in the kernel source or the documentation directory of a linux-doc-<version> package for details.) [...] > A bit of googling [1] suggests that perhaps the commit 17b3847 [2] > could have fixed this Hm. Mike's example you pointed to is stuck in xfs_log_reserve while most of your examples are stuck in xfs_reclaim_inode. But if you'd like to try it, you can test the patch below[*] against linux 2.6.32.y (either from kernel.org or a squeeze kernel). [3] explains how to patch and build a custom kernel. [...] > Also I can say that I have switched some weeks ago one of this > servers to a vainilla/vserver 3.1 Kernel and since then I didn't saw > this lockup anymore. Marking accordingly. Please try 3.0.0-6 from http://snapshot.debian.org/ so we can see if the fix was introduced in 3.1. If that one does not produce the lockup and you have time for it, it would also be useful to try some kernels halfway between 2.6.32 and 3.0 to see when the fix was introduced. Thanks for a pleasant report, and hope that helps. Sincerely, Jonathan [3] http://kernel-handbook.alioth.debian.org/ch-common-tasks.html [*] From: Christoph Hellwig <hch@infradead.org> Date: Tue, 11 Oct 2011 15:14:09 +0000 Subject: xfs: force the log if we encounter pinned buffers in .iop_pushbuf commit 17b38471c3c07a49f0bbc2ecc2e92050c164e226 upstream. We need to check for pinned buffers even in .iop_pushbuf given that inode items flush into the same buffers that may be pinned directly due operations on the unlinked inode list operating directly on buffers. To do this add a return value to .iop_pushbuf that tells the AIL push about this and use the existing log force mechanisms to unpin it. Signed-off-by: Christoph Hellwig <hch@lst.de> Reported-by: Stefan Priebe <s.priebe@profihost.ag> Tested-by: Stefan Priebe <s.priebe@profihost.ag> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Alex Elder <aelder@sgi.com> --- Only compile-tested. fs/xfs/quota/xfs_dquot_item.c | 11 +++++++---- fs/xfs/xfs_inode_item.c | 14 +++++++++----- fs/xfs/xfs_trans.h | 2 +- fs/xfs/xfs_trans_ail.c | 9 +++++++-- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index d0d4a9a0bbd7..38cc06f94d47 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -205,7 +205,7 @@ xfs_qm_dqunpin_wait( * search the buffer cache can be a time consuming thing, and AIL lock is a * spinlock. */ -STATIC void +STATIC bool xfs_qm_dquot_logitem_pushbuf( xfs_dq_logitem_t *qip) { @@ -213,6 +213,7 @@ xfs_qm_dquot_logitem_pushbuf( xfs_mount_t *mp; xfs_buf_t *bp; uint dopush; + bool ret = true; dqp = qip->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); @@ -233,7 +234,7 @@ xfs_qm_dquot_logitem_pushbuf( ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); - return; + return true; } mp = dqp->q_mount; bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, @@ -249,6 +250,7 @@ xfs_qm_dquot_logitem_pushbuf( if (XFS_BUF_ISPINNED(bp)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + ret = false; } if (dopush) { int error; @@ -269,11 +271,12 @@ xfs_qm_dquot_logitem_pushbuf( xfs_dqunlock(dqp); xfs_buf_relse(bp); } - return; + return ret; } qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); + return true; } /* @@ -402,7 +405,7 @@ static struct xfs_item_ops xfs_dquot_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_qm_dquot_logitem_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push, - .iop_pushbuf = (void(*)(xfs_log_item_t*)) + .iop_pushbuf = (bool(*)(xfs_log_item_t*)) xfs_qm_dquot_logitem_pushbuf, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_qm_dquot_logitem_committing diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 9794b876d6ff..dd0c2db25a00 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -751,7 +751,7 @@ xfs_inode_item_committed( * We aren't holding the AIL lock (or the flush lock) when this gets called, * so it is inherently race-y. */ -STATIC void +STATIC bool xfs_inode_item_pushbuf( xfs_inode_log_item_t *iip) { @@ -759,6 +759,7 @@ xfs_inode_item_pushbuf( xfs_mount_t *mp; xfs_buf_t *bp; uint dopush; + bool ret = true; ip = iip->ili_inode; @@ -779,7 +780,7 @@ xfs_inode_item_pushbuf( ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); - return; + return true; } mp = ip->i_mount; @@ -804,6 +805,7 @@ xfs_inode_item_pushbuf( if (XFS_BUF_ISPINNED(bp)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + ret = false; } if (dopush) { int error; @@ -818,9 +820,11 @@ xfs_inode_item_pushbuf( } else { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (XFS_BUF_ISPINNED(bp)) + ret = false; xfs_buf_relse(bp); } - return; + return ret; } /* * We have to be careful about resetting pushbuf flag too early (above). @@ -832,7 +836,7 @@ xfs_inode_item_pushbuf( */ iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); - return; + return true; } @@ -903,7 +907,7 @@ static struct xfs_item_ops xfs_inode_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_inode_item_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push, - .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, + .iop_pushbuf = (bool(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_inode_item_committing }; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a0574f593f52..e5282706fbd9 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -792,7 +792,7 @@ typedef struct xfs_item_ops { void (*iop_unlock)(xfs_log_item_t *); xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); void (*iop_push)(xfs_log_item_t *); - void (*iop_pushbuf)(xfs_log_item_t *); + bool (*iop_pushbuf)(xfs_log_item_t *); void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); } xfs_item_ops_t; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 2ffc570679be..43d9b8adaf31 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -306,8 +306,13 @@ xfsaild_push( case XFS_ITEM_PUSHBUF: XFS_STATS_INC(xs_push_ail_pushbuf); - IOP_PUSHBUF(lip); - last_pushed_lsn = lsn; + + if (!IOP_PUSHBUF(lip)) { + stuck++; + flush_log = 1; + } else { + last_pushed_lsn = lsn; + } break; case XFS_ITEM_PINNED: -- 1.7.8.3
--- End Message ---