nfs hang on 2.6.24
I've recently updated to 2.6.24 as recommended on bug 463508.
But the new kernel now has nfs hanging problems.
A few times now it has got into a situation such that any process that
touches an nfs mount hangs.
The machine has both nfs3 and nfs4 mounts. My suspicion is that it relates
to putting a heavy load on both nfs3 and nfs4 mounts at the same time.
The machine for the moment is still hung so let me know if there are
further debugging steps I can perform.
There are many other processes but these are the unique call stacks that
involve nfs related functions.
=======================
nfsv4-delegre S 00000282 0 17156 2
df8dcd30 00000046 f8c5f813 00000282 f35d4a60 00000000 93c19d62
000008a4
df8dce98 c180a940 00000000 f35d4b10 0000a296 00000000 f669be00
f8ce9b74
f8c5f08b 00000246 e6a59f34 00000000 e6a59f3c c18003b0 f8c62e85
c02bc97f
Call Trace:
[<f8c5f813>] xprt_timer+0x0/0x6f [sunrpc]
[<f8ce9b74>] nfs4_xdr_enc_delegreturn+0x0/0x84 [nfs]
[<f8c5f08b>] xprt_release_xprt+0x39/0x66 [sunrpc]
[<f8c62e85>] rpc_wait_bit_interruptible+0x1a/0x1f [sunrpc]
[<c02bc97f>] __wait_on_bit+0x33/0x58
[<f8c62e6b>] rpc_wait_bit_interruptible+0x0/0x1f [sunrpc]
[<f8c62e6b>] rpc_wait_bit_interruptible+0x0/0x1f [sunrpc]
[<c02bca07>] out_of_line_wait_on_bit+0x63/0x6b
[<c013545e>] wake_bit_function+0x0/0x3c
[<f8c62e19>] __rpc_wait_for_completion_task+0x32/0x39 [sunrpc]
[<f8ce1352>] nfs4_wait_for_completion_rpc_task+0x1b/0x2f [nfs]
[<f8ce2336>] nfs4_proc_delegreturn+0x116/0x172 [nfs]
[<f8ced370>] nfs_do_return_delegation+0xf/0x1d [nfs]
[<f8ced88a>] recall_thread+0xad/0xc0 [nfs]
[<f8ced7dd>] recall_thread+0x0/0xc0 [nfs]
[<c0104b0f>] kernel_thread_helper+0x7/0x10
=======================
mysqld S f704db38 0 3156 3110
df848130 00000082 00000002 f704db38 f704db30 00000000 6369898c
0000052b
df848298 c1822940 00000003 0163574b f704db5c f7c86000 000000ff
00000000
00000000 00000000 f704db5c 016359d3 f7459140 000001f4 c02bc84e
00000001
Call Trace:
[<c02bc84e>] schedule_timeout+0x70/0x8d
[<c01048c3>] common_interrupt+0x23/0x28
[<c012c3e2>] process_timeout+0x0/0x5
[<c02bc849>] schedule_timeout+0x6b/0x8d
[<c0183476>] do_select+0x365/0x3bc
[<c0183a60>] __pollwait+0x0/0xac
[<c011e069>] enqueue_entity+0x2b/0x3d
[<c0115343>] apic_wait_icr_idle+0xe/0x15
[<c011e091>] enqueue_task_fair+0x16/0x24
[<c011d647>] enqueue_task+0x52/0x5d
[<c011de8a>] resched_task+0x52/0x54
[<c011f445>] try_to_wake_up+0x2b8/0x2c2
[<c013543e>] autoremove_wake_function+0x15/0x35
[<c011d482>] __wake_up_common+0x32/0x5c
[<c011eeb8>] __wake_up+0x32/0x42
[<c013540e>] __wake_up_bit+0x2e/0x33
[<f8c630f5>] __rpc_do_wake_up_task+0x1fb/0x218 [sunrpc]
[<f8c6003a>] xdr_partial_copy_from_skb+0x32/0x171 [sunrpc]
[<f8c63148>] rpc_wake_up_task+0x36/0x4a [sunrpc]
[<f8c61cfd>] xs_tcp_data_recv+0x3d3/0x407 [sunrpc]
[<f8c6028f>] xdr_skb_read_bits+0x0/0x35 [sunrpc]
[<c0259bb5>] skb_release_all+0xa3/0xfa
[<c028484c>] tcp_read_sock+0x15e/0x16a
[<f8c6192a>] xs_tcp_data_recv+0x0/0x407 [sunrpc]
[<f8c624a8>] xs_tcp_data_ready+0x55/0x61 [sunrpc]
[<c012c8ec>] mod_timer+0x19/0x36
[<c02564e7>] sk_reset_timer+0xc/0x16
[<c028ba16>] tcp_rcv_established+0x3ae/0x63c
[<c0290ceb>] tcp_v4_do_rcv+0x2b/0x343
[<f8a593a5>] ip_vs_in+0xa5/0x220 [ip_vs]
[<f8a59300>] ip_vs_in+0x0/0x220 [ip_vs]
[<c0293000>] tcp_v4_rcv+0x80e/0x882
[<c0183750>] core_sys_select+0x283/0x2a0
[<c027a934>] ip_local_deliver_finish+0x114/0x1b7
[<c0259c6b>] __alloc_skb+0x49/0xf7
[<c0259bb5>] skb_release_all+0xa3/0xfa
[<f8833e62>] e1000_alloc_rx_buffers+0x1cb/0x295 [e1000]
[<c02594b1>] __kfree_skb+0x8/0x61
[<f88301f9>] e1000_unmap_and_free_tx_resource+0x1b/0x23 [e1000]
[<f8831c09>] e1000_clean_tx_irq+0xbe/0x2c3 [e1000]
[<f883480e>] e1000_clean_rx_irq+0x419/0x449 [e1000]
[<f88343f5>] e1000_clean_rx_irq+0x0/0x449 [e1000]
[<f8832045>] e1000_clean+0x1e9/0x213 [e1000]
[<c025fe81>] net_rx_action+0x9f/0x198
[<c0183bb0>] sys_select+0xa4/0x187
[<c0103e5e>] sysenter_past_esp+0x6b/0xa1
=======================
sshd S f74fdc80 0 17157 3065
df813830 00000086 00000002 f74fdc80 f74fdc78 00000000 f52ac680
f8c634bf
df813998 c1822940 00000003 015ba1ef 00000000 f7d6fcd0 000000ff
00000000
00000000 00000000 f74fdcc0 00000000 f74fdcc8 c1800ce0 f8c62e85
c02bc97f
Call Trace:
[<f8c634bf>] rpc_sleep_on+0x21/0x221 [sunrpc]
[<f8c62e85>] rpc_wait_bit_interruptible+0x1a/0x1f [sunrpc]
[<c02bc97f>] __wait_on_bit+0x33/0x58
[<f8c62e6b>] rpc_wait_bit_interruptible+0x0/0x1f [sunrpc]
[<f8c62e6b>] rpc_wait_bit_interruptible+0x0/0x1f [sunrpc]
[<c02bca07>] out_of_line_wait_on_bit+0x63/0x6b
[<c013545e>] wake_bit_function+0x0/0x3c
[<f8c62e19>] __rpc_wait_for_completion_task+0x32/0x39 [sunrpc]
[<f8ce1352>] nfs4_wait_for_completion_rpc_task+0x1b/0x2f [nfs]
[<f8ce3812>] _nfs4_proc_open+0x75/0x1b4 [nfs]
[<f8ce3e0c>] nfs4_do_open+0x112/0x23b [nfs]
[<f8ce50d3>] nfs4_open_revalidate+0x5e/0x11e [nfs]
[<f8cd25c1>] nfs_open_revalidate+0xc7/0x187 [nfs]
[<c017e8c4>] do_lookup+0x101/0x140
[<c01803f0>] __link_path_walk+0x744/0xb4b
[<c018083b>] link_path_walk+0x44/0xb3
[<c0176b59>] get_unused_fd_flags+0x4d/0xba
[<c0180b23>] do_path_lookup+0x162/0x1c4
[<c01793cc>] get_empty_filp+0x95/0x152
[<c0181488>] __path_lookup_intent_open+0x45/0x75
[<c0181527>] path_lookup_open+0x20/0x25
[<c0181606>] open_namei+0x72/0x558
[<c0176e00>] do_filp_open+0x25/0x39
[<c0176b59>] get_unused_fd_flags+0x4d/0xba
[<c0176e58>] do_sys_open+0x44/0xc0
[<c0176f0d>] sys_open+0x1c/0x1e
[<c0103e5e>] sysenter_past_esp+0x6b/0xa1
=======================
sshd S f52e4800 0 17199 3065
df8dd8f0 00000082 f8c63859 f52e4800 f669be00 c02bd82e 8660c63b
000008ce
df8dda58 c180a940 00000000 c01868b0 0000a690 00000000 c7387ce4
f7699b80
f7699b80 00000246 e7379cc0 00000000 e7379cc8 c1802c60 f8c62e85
c02bc97f
Call Trace:
[<f8c63859>] rpc_wake_up_next+0x12c/0x136 [sunrpc]
[<c02bd82e>] _spin_lock_bh+0x8/0x18
[<c01868b0>] dput+0x15/0xdc
[<f8c62e85>] rpc_wait_bit_interruptible+0x1a/0x1f [sunrpc]
[<c02bc97f>] __wait_on_bit+0x33/0x58
[<f8c62e6b>] rpc_wait_bit_interruptible+0x0/0x1f [sunrpc]
[<f8c62e6b>] rpc_wait_bit_interruptible+0x0/0x1f [sunrpc]
[<c02bca07>] out_of_line_wait_on_bit+0x63/0x6b
[<c013545e>] wake_bit_function+0x0/0x3c
[<f8c62e19>] __rpc_wait_for_completion_task+0x32/0x39 [sunrpc]
[<f8ce1352>] nfs4_wait_for_completion_rpc_task+0x1b/0x2f [nfs]
[<f8ce3812>] _nfs4_proc_open+0x75/0x1b4 [nfs]
[<f8ce3e0c>] nfs4_do_open+0x112/0x23b [nfs]
[<f8ce50d3>] nfs4_open_revalidate+0x5e/0x11e [nfs]
[<f8cd25c1>] nfs_open_revalidate+0xc7/0x187 [nfs]
[<c017e8c4>] do_lookup+0x101/0x140
[<c01803f0>] __link_path_walk+0x744/0xb4b
[<f8ce2021>] nfs4_proc_getattr+0x31/0x3e [nfs]
[<f8cd459b>] __nfs_revalidate_inode+0x269/0x276 [nfs]
[<c018083b>] link_path_walk+0x44/0xb3
[<c0176b59>] get_unused_fd_flags+0x4d/0xba
[<c0180b23>] do_path_lookup+0x162/0x1c4
[<c01793cc>] get_empty_filp+0x95/0x152
[<c0181488>] __path_lookup_intent_open+0x45/0x75
[<c0181527>] path_lookup_open+0x20/0x25
[<c0181606>] open_namei+0x72/0x558
[<c0176e00>] do_filp_open+0x25/0x39
[<c0176b59>] get_unused_fd_flags+0x4d/0xba
[<c0176e58>] do_sys_open+0x44/0xc0
[<c0176f0d>] sys_open+0x1c/0x1e
[<c0103e5e>] sysenter_past_esp+0x6b/0xa1
=======================
sshd S f5203bf4 0 17252 3065
f7cd2db0 00000086 00000002 f5203bf4 f5203bec 00000000 ffffffff
f76742c0
f7cd2f18 c1822940 00000003 015e25d4 015e2abf c017554d 000000ff
00000000
00000000 00000000 f5203c34 00000000 f5203c3c c1802f30 f8c62e85
c02bc97f
Call Trace:
[<c017554d>] cache_alloc_refill+0x58/0x477
[<f8c62e85>] rpc_wait_bit_interruptible+0x1a/0x1f [sunrpc]
[<c02bc97f>] __wait_on_bit+0x33/0x58
[<f8c62e6b>] rpc_wait_bit_interruptible+0x0/0x1f [sunrpc]
[<f8c62e6b>] rpc_wait_bit_interruptible+0x0/0x1f [sunrpc]
[<c02bca07>] out_of_line_wait_on_bit+0x63/0x6b
[<c013545e>] wake_bit_function+0x0/0x3c
[<f8c632cb>] __rpc_execute+0xeb/0x231 [sunrpc]
[<f8c62ac7>] rpc_set_active+0x3a/0x56 [sunrpc]
[<f8c5dc5e>] rpc_do_run_task+0x76/0x8f [sunrpc]
[<f8c5dcf8>] rpc_call_sync+0x21/0x39 [sunrpc]
[<f8ce2501>] nfs4_proc_access+0x129/0x19e [nfs]
[<c0176b59>] get_unused_fd_flags+0x4d/0xba
[<c0176a8e>] fd_install+0x1b/0x41
[<c025c25c>] scm_detach_fds+0xf4/0x12c
[<c02b0fe5>] unix_stream_recvmsg+0x48d/0x4bd
[<c0259c6b>] __alloc_skb+0x49/0xf7
[<f8cd0d83>] nfs_do_access+0x133/0x2ab [nfs]
[<f8c63fb0>] rpcauth_lookupcred+0x65/0x8a [sunrpc]
[<f8cd0fc0>] nfs_permission+0xc5/0x134 [nfs]
[<c01868b0>] dput+0x15/0xdc
[<c017e6c1>] __follow_mount+0x1e/0x60
[<f8cd0efb>] nfs_permission+0x0/0x134 [nfs]
[<c017e5bc>] permission+0xa3/0xef
[<c017fdc6>] __link_path_walk+0x11a/0xb4b
[<c015a9cd>] find_lock_page+0x19/0x7f
[<c018083b>] link_path_walk+0x44/0xb3
[<c0259157>] skb_dequeue+0x39/0x3f
[<c0180b23>] do_path_lookup+0x162/0x1c4
[<c017fae8>] getname+0x59/0xad
[<c01812f4>] __user_walk_fd+0x2f/0x40
[<c017b33f>] vfs_stat_fd+0x19/0x40
[<c0259157>] skb_dequeue+0x39/0x3f
[<c017b41b>] sys_stat64+0xf/0x23
[<c012fe3f>] set_current_groups+0x14d/0x159
[<c01bd9cf>] security_capable+0x9/0xa
[<c012aff8>] __capable+0x8/0x1b
[<c0131894>] sys_setresuid+0x163/0x18e
[<c01bdc8f>] security_task_post_setuid+0x17/0x1a
[<c0103e5e>] sysenter_past_esp+0x6b/0xa1
Reply to: