Bug#648811: DRBD+OCFS2: general protection fault: 0000 [#1] SMP
Hi,
I can confirm a bug.
I tried DRBD (two primaries) with OCFS2 too. It seems I hit a very
similar bug after I did a reboot renamed nodes and did mount ocfs2 by hand. Two
identical nodes boot1 and boot2 guests on libvirt/KVM (different irons):
Linux boot1 2.6.32-5-amd64 #1 SMP Sun May 6 04:00:17 UTC 2012 x86_64 GNU/Linux
i linux-image-2.6.32-5-amd64 2.6.32-45 Linux 2.6.32 for 64-bit PCs
Virtuals have configured ttyS0 console (both attached), so I cut&paste
what was on the consoles:
========================== boot1 ==============================
[ 9985.562785] general protection fault: 0000 [#1] SMP
[ 9985.563235] last sysfs file: /sys/fs/o2cb/interface_revision
[ 9985.563707] CPU 0
[ 9985.563893] Modules linked in: ocfs2 quota_tree drbd lru_cache cn nfsd exportfs nfs lockd fscache nfs_acl auth_rpcgss sunrpc ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs loop snd_pcm snd_timer snd soundcore snd_page_alloc psmouse virtio_balloon button serio_raw pcspkr processor evdev i2c_piix4 i2c_core ext4 mbcache jbd2 crc16 dm_mod ata_generic virtio_blk ata_piix uhci_hcd ehci_hcd 8139too libata floppy thermal thermal_sys virtio_pci virtio_ring virtio 8139cp mii usbcore nls_base scsi_mod [last unloaded: scsi_wait_scan]
[ 9985.566703] Pid: 0, comm: swapper Not tainted 2.6.32-5-amd64 #1 Bochs
[ 9985.566703] RIP: 0010:[<ffffffff8129eb2e>] [<ffffffff8129eb2e>] fib_get_table+0x2b/0x3a
[ 9985.566703] RSP: 0018:ffff880001803bb8 EFLAGS: 00010286
[ 9985.566703] RAX: ffff880000000001 RBX: ffff880001803c20 RCX: ffff880001803c20
[ 9985.566703] RDX: c3f000ff53f000ff RSI: 00000000000000fe RDI: ffffffff816d35b0
[ 9985.566703] RBP: ffff880001803cd0 R08: ffffffff816d35b0 R09: 00000000011fa8c0
[ 9985.566703] R10: 0000000000000002 R11: 0000000000000000 R12: ffff88001f9d0480
[ 9985.566703] R13: 0000000000000000 R14: ffff880001803c20 R15: ffff88001f9d04f8
[ 9985.566703] FS: 0000000000000000(0000) GS:ffff880001800000(0000) knlGS:0000000000000000
[ 9985.566703] CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
[ 9985.566703] CR2: 00000000018c9808 CR3: 000000001dd98000 CR4: 00000000000006f0
[ 9985.566703] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 9985.566703] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 9985.566703] Process swapper (pid: 0, threadinfo ffffffff8142e000, task ffffffff814891f0)
[ 9985.566703] Stack:
[ 9985.566703] ffffffff812a427e 0000000000000000 ffff88001f9db000 ffff880001803cd0
[ 9985.566703] <0> ffffffff8125e1b1 ffff8800182371a8 ffff880001803c90 0000000000000002
[ 9985.566703] <0> 00000000ffffffea 00000000011fa8c0 ffff8800181b1000 ffff880018057800
[ 9985.566703] Call Trace:
[ 9985.566703] <IRQ>
[ 9985.566703] [<ffffffff812a427e>] ? fib4_rule_action+0x35/0x54
[ 9985.566703] [<ffffffff8125e1b1>] ? fib_rules_lookup+0x89/0xc3
[ 9985.566703] [<ffffffff812a4474>] ? fib_lookup+0x2d/0x3d
[ 9985.566703] [<ffffffff8103a311>] ? enqueue_task+0x5f/0x68
[ 9985.566703] [<ffffffff81271d9f>] ? ip_route_input+0x59e/0xcbf
[ 9985.566703] [<ffffffff8104a45a>] ? try_to_wake_up+0x289/0x29b
[ 9985.566703] [<ffffffff8106504b>] ? autoremove_wake_function+0x9/0x2e
[ 9985.566703] [<ffffffff8103aa06>] ? __wake_up_common+0x44/0x72
[ 9985.566703] [<ffffffff810ad4e8>] ? cpupri_set+0x10c/0x135
[ 9985.566703] [<ffffffff81295e3e>] ? arp_process+0x20f/0x60b
[ 9985.566703] [<ffffffffa00737fb>] ? cp_rx_poll+0x2d8/0x3bd [8139cp]
[ 9985.566703] [<ffffffff8125039f>] ? net_rx_action+0xae/0x1c9
[ 9985.566703] [<ffffffff81053d6f>] ? __do_softirq+0xdd/0x1a6
[ 9985.566703] [<ffffffff81011cac>] ? call_softirq+0x1c/0x30
[ 9985.566703] [<ffffffff8101322b>] ? do_softirq+0x3f/0x7c
[ 9985.566703] [<ffffffff81053bdf>] ? irq_exit+0x36/0x76
[ 9985.566703] [<ffffffff81012922>] ? do_IRQ+0xa0/0xb6
[ 9985.566703] [<ffffffff810114d3>] ? ret_from_intr+0x0/0x11
[ 9985.566703] <EOI>
[ 9985.566703] [<ffffffff8102c584>] ? native_safe_halt+0x2/0x3
[ 9985.566703] [<ffffffff8101758d>] ? default_idle+0x34/0x51
[ 9985.566703] [<ffffffff8100fe97>] ? cpu_idle+0xa2/0xda
[ 9985.566703] [<ffffffff8151c140>] ? early_idt_handler+0x0/0x71
[ 9985.566703] [<ffffffff8151ccdd>] ? start_kernel+0x3dc/0x3e8
[ 9985.566703] [<ffffffff8151c3b7>] ? x86_64_start_kernel+0xf9/0x106
[ 9985.566703] Code: 85 f6 b8 fe 00 00 00 0f 44 f0 48 89 f0 83 e0 01 48 c1 e0 03 48 03 87 f0 01 00 00 48 8b 10 eb 03 48 8b 12 48 85 d2 75 03 31 c0 c3 <39> 72 10 48 8b 02 0f 18 08 48 89 d0 75 e7 c3 48 8b 42 10 48 85
[ 9985.566703] RIP [<ffffffff8129eb2e>] fib_get_table+0x2b/0x3a
[ 9985.566703] RSP <ffff880001803bb8>
[ 9985.594195] ---[ end trace 49c80a52371bee10 ]---
[ 9985.594581] Kernel panic - not syncing: Fatal exception in interrupt
[ 9985.595102] Pid: 0, comm: swapper Tainted: G D 2.6.32-5-amd64 #1
[ 9985.595650] Call Trace:
[ 9985.595857] <IRQ> [<ffffffff812faf69>] ? panic+0x86/0x143
[ 9985.596356] [<ffffffff81011673>] ? apic_timer_interrupt+0x13/0x20
[ 9985.596869] [<ffffffff811b89f1>] ? vgacon_cursor+0x0/0x140
[ 9985.597342] [<ffffffff812fdc62>] ? oops_end+0x64/0xb4
[ 9985.597764] [<ffffffff812fdca5>] ? oops_end+0xa7/0xb4
[ 9985.598188] [<ffffffff812fd155>] ? general_protection+0x25/0x30
[ 9985.598682] [<ffffffff8129eb2e>] ? fib_get_table+0x2b/0x3a
[ 9985.599141] [<ffffffff812a427e>] ? fib4_rule_action+0x35/0x54
[ 9985.599622] [<ffffffff8125e1b1>] ? fib_rules_lookup+0x89/0xc3
[ 9985.600112] [<ffffffff812a4474>] ? fib_lookup+0x2d/0x3d
[ 9985.600568] [<ffffffff8103a311>] ? enqueue_task+0x5f/0x68
[ 9985.601017] [<ffffffff81271d9f>] ? ip_route_input+0x59e/0xcbf
[ 9985.601498] [<ffffffff8104a45a>] ? try_to_wake_up+0x289/0x29b
[ 9985.601980] [<ffffffff8106504b>] ? autoremove_wake_function+0x9/0x2e
[ 9985.602522] [<ffffffff8103aa06>] ? __wake_up_common+0x44/0x72
[ 9985.603011] [<ffffffff810ad4e8>] ? cpupri_set+0x10c/0x135
[ 9985.603463] [<ffffffff81295e3e>] ? arp_process+0x20f/0x60b
[ 9985.603925] [<ffffffffa00737fb>] ? cp_rx_poll+0x2d8/0x3bd [8139cp]
[ 9985.604472] [<ffffffff8125039f>] ? net_rx_action+0xae/0x1c9
[ 9985.604936] [<ffffffff81053d6f>] ? __do_softirq+0xdd/0x1a6
[ 9985.605384] [<ffffffff81011cac>] ? call_softirq+0x1c/0x30
[ 9985.605851] [<ffffffff8101322b>] ? do_softirq+0x3f/0x7c
[ 9985.606290] [<ffffffff81053bdf>] ? irq_exit+0x36/0x76
[ 9985.606709] [<ffffffff81012922>] ? do_IRQ+0xa0/0xb6
[ 9985.607114] [<ffffffff810114d3>] ? ret_from_intr+0x0/0x11
[ 9985.607559] <EOI> [<ffffffff8102c584>] ? native_safe_halt+0x2/0x3
[ 9985.608094] [<ffffffff8101758d>] ? default_idle+0x34/0x51
[ 9985.608561] [<ffffffff8100fe97>] ? cpu_idle+0xa2/0xda
[ 9985.608986] [<ffffffff8151c140>] ? early_idt_handler+0x0/0x71
[ 9985.609465] [<ffffffff8151ccdd>] ? start_kernel+0x3dc/0x3e8
[ 9985.609929] [<ffffffff8151c3b7>] ? x86_64_start_kernel+0xf9/0x106
----- last messages in syslog -----
Sep 17 11:52:15 boot1 kernel: [ 9944.833721] OCFS2 1.5.0
Sep 17 11:52:15 boot1 kernel: [ 9944.835069] ocfs2_dlm: Nodes in domain ("612BC4D1190E45C9988476ECE94A89D3"): 0
Sep 17 11:52:15 boot1 kernel: [ 9944.840480] ocfs2: Mounting device (147,0) on (node 0, slot 0) with ordered data mode.
Sep 17 11:52:15 boot1 kernel: [ 9944.840759] (4855,0):ocfs2_replay_journal:1607 Recovering node 1 from slot 1 on device (147,0)
Sep 17 11:52:16 boot1 kernel: [ 9946.073028] (4855,0):ocfs2_begin_quota_recovery:376 Beginning quota recovery in slot 1
Sep 17 11:52:16 boot1 kernel: [ 9946.078288] (4847,0):ocfs2_finish_quota_recovery:569 Finishing quota recovery in slot 1
Sep 17 11:52:31 boot1 kernel: [ 9961.377383] o2net: accepted connection from node boot2 (num 1) at 192.168.31.52:7777
Sep 17 11:52:35 boot1 kernel: [ 9965.413269] ocfs2_dlm: Node 1 joins domain 612BC4D1190E45C9988476ECE94A89D3
Sep 17 11:52:35 boot1 kernel: [ 9965.413272] ocfs2_dlm: Nodes in domain ("612BC4D1190E45C9988476ECE94A89D3"): 0 1
========================== boot2 ==============================
[ 9529.404113] block drbd0: PingAck did not arrive in time.
[ 9529.404637] block drbd0: short read expecting header on sock: r=-512
[ 9577.212104] (833,0):o2net_connect_expired:1656 ERROR: no connection established with node 0 after 30.0 seconds, giving up and returning errors.
[ 9591.380104] general protection fault: 0000 [#1] SMP
[ 9591.380576] last sysfs file: /sys/fs/o2cb/interface_revision
[ 9591.381047] CPU 0
[ 9591.381226] Modules linked in: ocfs2 quota_tree drbd lru_cache cn nfsd exportfs nfs lockd fscache nfs_acl auth_rpcgss sunrpc ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs loop psmouse snd_pcm snd_timer snd soundcore snd_page_alloc i2c_piix4 i2c_core pcspkr serio_raw virtio_balloon processor evdev button ext4 mbcache jbd2 crc16 dm_mod ata_generic virtio_blk ata_piix uhci_hcd ehci_hcd 8139too libata floppy thermal thermal_sys virtio_pci virtio_ring virtio 8139cp mii usbcore nls_base scsi_mod [last unloaded: scsi_wait_scan]
[ 9591.384017] Pid: 0, comm: swapper Not tainted 2.6.32-5-amd64 #1 Bochs
[ 9591.384017] RIP: 0010:[<ffffffff8129eb2e>] [<ffffffff8129eb2e>] fib_get_table+0x2b/0x3a
[ 9591.384017] RSP: 0018:ffff880001803a28 EFLAGS: 00010286
[ 9591.384017] RAX: ffff880000000000 RBX: ffff880001803a90 RCX: ffff880001803a90
[ 9591.384017] RDX: f000ff53f000ff53 RSI: 00000000000000fe RDI: ffffffff816d35b0
[ 9591.384017] RBP: ffff880001803b00 R08: ffffffff816d35b0 R09: ffff88001f967a48
[ 9591.384017] R10: ffff880017a38740 R11: 0000000000000000 R12: ffff88001f9d0480
[ 9591.384017] R13: 0000000000000000 R14: ffff880001803a90 R15: ffff88001f9d04f8
[ 9591.384017] FS: 0000000000000000(0000) GS:ffff880001800000(0000) knlGS:0000000000000000
[ 9591.384017] CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
[ 9591.384017] CR2: 0000000001827ff0 CR3: 000000001fb7c000 CR4: 00000000000006f0
[ 9591.384017] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 9591.384017] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 9591.384017] Process swapper (pid: 0, threadinfo ffffffff8142e000, task ffffffff814891f0)
[ 9591.384017] Stack:
[ 9591.384017] ffffffff812a427e ffff880017901000 ffff88001f9db000 ffff880001803b00
[ 9591.384017] <0> ffffffff8125e1b1 0000000000000000 ffff880001803ae0 ffff880001803b00
[ 9591.384017] <0> 0000000000000000 ffffffff816d35b0 ffff880001803c30 ffff880017878000
[ 9591.384017] Call Trace:
[ 9591.384017] <IRQ>
[ 9591.384017] [<ffffffff812a427e>] ? fib4_rule_action+0x35/0x54
[ 9591.384017] [<ffffffff8125e1b1>] ? fib_rules_lookup+0x89/0xc3
[ 9591.384017] [<ffffffff812a4474>] ? fib_lookup+0x2d/0x3d
[ 9591.384017] [<ffffffff8127108c>] ? __ip_route_output_key+0x38b/0x8a6
[ 9591.384017] [<ffffffff812715c9>] ? ip_route_output_flow+0x22/0x1cb
[ 9591.384017] [<ffffffff8129a56f>] ? inet_sk_rebuild_header+0x126/0x358
[ 9591.384017] [<ffffffff810e60a1>] ? virt_to_head_page+0x9/0x2a
[ 9591.384017] [<ffffffff81275ddc>] ? ip_cork_release+0x2e/0x3b
[ 9591.384017] [<ffffffff81289e95>] ? tcp_retransmit_skb+0x91/0x5aa
[ 9591.384017] [<ffffffff8126e9e0>] ? rt_del+0x7d/0xa4
[ 9591.384017] [<ffffffff8128b7db>] ? tcp_retransmit_timer+0x46f/0x56b
[ 9591.384017] [<ffffffff8128f159>] ? tcp_v4_err+0x352/0x4cb
[ 9591.384017] [<ffffffff812973fd>] ? icmp_rcv+0x1ea/0x220
[ 9591.384017] [<ffffffff81273c40>] ? ip_local_deliver_finish+0x146/0x1e9
[ 9591.384017] [<ffffffff8127378f>] ? ip_rcv_finish+0x373/0x38d
[ 9591.384017] [<ffffffff8124fdb9>] ? process_backlog+0x81/0xb4
[ 9591.384017] [<ffffffff8106938f>] ? sched_clock_local+0x13/0x74
[ 9591.384017] [<ffffffff8125039f>] ? net_rx_action+0xae/0x1c9
[ 9591.384017] [<ffffffff81053d6f>] ? __do_softirq+0xdd/0x1a6
[ 9591.384017] [<ffffffff8102462a>] ? lapic_next_event+0x18/0x1d
[ 9591.384017] [<ffffffff81011cac>] ? call_softirq+0x1c/0x30
[ 9591.384017] [<ffffffff8101322b>] ? do_softirq+0x3f/0x7c
[ 9591.384017] [<ffffffff81053bdf>] ? irq_exit+0x36/0x76
[ 9591.384017] [<ffffffff810250f8>] ? smp_apic_timer_interrupt+0x87/0x95
[ 9591.384017] [<ffffffff81011673>] ? apic_timer_interrupt+0x13/0x20
[ 9591.384017] <EOI>
[ 9591.384017] [<ffffffff8102c584>] ? native_safe_halt+0x2/0x3
[ 9591.384017] [<ffffffff8101758d>] ? default_idle+0x34/0x51
[ 9591.384017] [<ffffffff8100fe97>] ? cpu_idle+0xa2/0xda
[ 9591.384017] [<ffffffff8151c140>] ? early_idt_handler+0x0/0x71
[ 9591.384017] [<ffffffff8151ccdd>] ? start_kernel+0x3dc/0x3e8
[ 9591.384017] [<ffffffff8151c3b7>] ? x86_64_start_kernel+0xf9/0x106
[ 9591.384017] Code: 85 f6 b8 fe 00 00 00 0f 44 f0 48 89 f0 83 e0 01 48 c1 e0 03 48 03 87 f0 01 00 00 48 8b 10 eb 03 48 8b 12 48 85 d2 75 03 31 c0 c3 <39> 72 10 48 8b 02 0f 18 08 48 89 d0 75 e7 c3 48 8b 42 10 48 85
[ 9591.384017] RIP [<ffffffff8129eb2e>] fib_get_table+0x2b/0x3a
[ 9591.384017] RSP <ffff880001803a28>
[ 9591.415967] ---[ end trace 392e24ab78cc83a4 ]---
[ 9591.416395] Kernel panic - not syncing: Fatal exception in interrupt
[ 9591.416969] Pid: 0, comm: swapper Tainted: G D 2.6.32-5-amd64 #1
[ 9591.417546] Call Trace:
[ 9591.417755] <IRQ> [<ffffffff812faf69>] ? panic+0x86/0x143
[ 9591.418260] [<ffffffff81011673>] ? apic_timer_interrupt+0x13/0x20
[ 9591.418807] [<ffffffff811b89f1>] ? vgacon_cursor+0x0/0x140
[ 9591.419286] [<ffffffff812fdc62>] ? oops_end+0x64/0xb4
[ 9591.419729] [<ffffffff812fdca5>] ? oops_end+0xa7/0xb4
[ 9591.420190] [<ffffffff812fd155>] ? general_protection+0x25/0x30
[ 9591.420738] [<ffffffff8129eb2e>] ? fib_get_table+0x2b/0x3a
[ 9591.421210] [<ffffffff812a427e>] ? fib4_rule_action+0x35/0x54
[ 9591.421713] [<ffffffff8125e1b1>] ? fib_rules_lookup+0x89/0xc3
[ 9591.422212] [<ffffffff812a4474>] ? fib_lookup+0x2d/0x3d
[ 9591.422689] [<ffffffff8127108c>] ? __ip_route_output_key+0x38b/0x8a6
[ 9591.423220] [<ffffffff812715c9>] ? ip_route_output_flow+0x22/0x1cb
[ 9591.423760] [<ffffffff8129a56f>] ? inet_sk_rebuild_header+0x126/0x358
[ 9591.424339] [<ffffffff810e60a1>] ? virt_to_head_page+0x9/0x2a
[ 9591.424875] [<ffffffff81275ddc>] ? ip_cork_release+0x2e/0x3b
[ 9591.425350] [<ffffffff81289e95>] ? tcp_retransmit_skb+0x91/0x5aa
[ 9591.425848] [<ffffffff8126e9e0>] ? rt_del+0x7d/0xa4
[ 9591.426261] [<ffffffff8128b7db>] ? tcp_retransmit_timer+0x46f/0x56b
[ 9591.426807] [<ffffffff8128f159>] ? tcp_v4_err+0x352/0x4cb
[ 9591.427246] [<ffffffff812973fd>] ? icmp_rcv+0x1ea/0x220
[ 9591.427694] [<ffffffff81273c40>] ? ip_local_deliver_finish+0x146/0x1e9
[ 9591.428246] [<ffffffff8127378f>] ? ip_rcv_finish+0x373/0x38d
[ 9591.428770] [<ffffffff8124fdb9>] ? process_backlog+0x81/0xb4
[ 9591.429250] [<ffffffff8106938f>] ? sched_clock_local+0x13/0x74
[ 9591.429763] [<ffffffff8125039f>] ? net_rx_action+0xae/0x1c9
[ 9591.430344] [<ffffffff81053d6f>] ? __do_softirq+0xdd/0x1a6
[ 9591.430826] [<ffffffff8102462a>] ? lapic_next_event+0x18/0x1d
[ 9591.431310] [<ffffffff81011cac>] ? call_softirq+0x1c/0x30
[ 9591.431790] [<ffffffff8101322b>] ? do_softirq+0x3f/0x7c
[ 9591.432261] [<ffffffff81053bdf>] ? irq_exit+0x36/0x76
[ 9591.432747] [<ffffffff810250f8>] ? smp_apic_timer_interrupt+0x87/0x95
[ 9591.433280] [<ffffffff81011673>] ? apic_timer_interrupt+0x13/0x20
[ 9591.433797] <EOI> [<ffffffff8102c584>] ? native_safe_halt+0x2/0x3
[ 9591.434336] [<ffffffff8101758d>] ? default_idle+0x34/0x51
[ 9591.434805] [<ffffffff8100fe97>] ? cpu_idle+0xa2/0xda
[ 9591.435221] [<ffffffff8151c140>] ? early_idt_handler+0x0/0x71
[ 9591.435712] [<ffffffff8151ccdd>] ? start_kernel+0x3dc/0x3e8
[ 9591.436191] [<ffffffff8151c3b7>] ? x86_64_start_kernel+0xf9/0x106
----- last messages in syslog -----
Sep 17 11:52:32 boot2 kernel: [ 9494.857236] o2net: connected to node boot1 (num 0) at 192.168.31.51:7777
Sep 17 11:52:36 boot2 kernel: [ 9498.890803] OCFS2 1.5.0
Sep 17 11:52:36 boot2 kernel: [ 9498.893072] ocfs2_dlm: Nodes in domain ("612BC4D1190E45C9988476ECE94A89D3"): 0 1
Sep 17 11:52:36 boot2 kernel: [ 9498.903794] ocfs2: Mounting device (147,0) on (node 1, slot 1) with ordered data mode.
Sep 17 11:53:06 boot2 kernel: [ 9529.404113] block drbd0: PingAck did not arrive in time.
Sep 17 11:53:06 boot2 kernel: [ 9529.404614] block drbd0: peer( Primary -> Unknown ) conn( Connected -> NetworkFailure ) pdsk( UpToDate -> DUnknown )
Sep 17 11:53:06 boot2 kernel: [ 9529.404621] block drbd0: asender terminated
Sep 17 11:53:06 boot2 kernel: [ 9529.404622] block drbd0: Terminating drbd0_asender
Sep 17 11:53:06 boot2 kernel: [ 9529.404637] block drbd0: short read expecting header on sock: r=-512
Sep 17 11:53:06 boot2 kernel: [ 9529.405213] block drbd0: Creating new current UUID
Sep 17 11:53:06 boot2 kernel: [ 9529.405625] block drbd0: Connection closed
Sep 17 11:53:06 boot2 kernel: [ 9529.405629] block drbd0: conn( NetworkFailure -> Unconnected )
Sep 17 11:53:06 boot2 kernel: [ 9529.405632] block drbd0: receiver terminated
Sep 17 11:53:06 boot2 kernel: [ 9529.405633] block drbd0: Restarting drbd0_receiver
Sep 17 11:53:06 boot2 kernel: [ 9529.405634] block drbd0: receiver (re)started
Sep 17 11:53:06 boot2 kernel: [ 9529.405645] block drbd0: conn( Unconnected -> WFConnection )
Sep 17 11:53:24 boot2 kernel: [ 9547.212045] o2net: connection to node boot1 (num 0) at 192.168.31.51:7777 has been idle for 30.0 seconds, shutting it down.
Sep 17 11:53:24 boot2 kernel: [ 9547.212055] (0,0):o2net_idle_timer:1495 here are some times that might help debug the situation: (tmr 1347875574.463110 now 1347875604.462549 dr 1347875574.463065 adv 1347875574.463110:1347875574.463111 func (343e2184:505) 1347875556.162729:1347875556.162730)
Sep 17 11:53:24 boot2 kernel: [ 9547.212158] o2net: no longer connected to node boot1 (num 0) at 192.168.31.51:7777
Sep 17 11:53:54 boot2 kernel: [ 9577.212104] (833,0):o2net_connect_expired:1656 ERROR: no connection established with node 0 after 30.0 seconds, giving up and returning errors.
Kindly Regards
--
Zito
global {
usage-count yes;
# minor-count dialog-refresh disable-ip-verification
}
common {
protocol C;
handlers {
pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
# split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
# before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
# after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
}
startup {
# wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb;
wfc-timeout 180;
degr-wfc-timeout 180;
}
disk {
# on-io-error fencing use-bmbv no-disk-barrier no-disk-flushes
# no-disk-drain no-md-flushes max-bio-bvecs
on-io-error call-local-io-error;
}
net {
# snd‐buf-size rcvbuf-size timeout connect-int ping-int ping-timeout max-buffers
# max-epoch-size ko-count allow-two-primaries cram-hmac-alg shared-secret
# after-sb-0pri after-sb-1pri after-sb-2pri data-integrity-alg no-tcp-cork
}
syncer {
# rate after al-extents use-rle cpu-mask verify-alg csums-alg
rate 20M;
}
}
resource srv {
meta-disk internal;
device minor 0;
on boot1 {
disk /dev/boot1/srv;
address 192.168.31.51:7788;
}
on boot2 {
disk /dev/boot2/srv;
address 192.168.31.52:7788;
}
net {
allow-two-primaries;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
}
startup {
become-primary-on both;
}
}
node:
ip_port = 7777
ip_address = 192.168.31.51
number = 0
name = boot1
cluster = ocfs2
node:
ip_port = 7777
ip_address = 192.168.31.52
number = 1
name = boot2
cluster = ocfs2
cluster:
node_count = 2
name = ocfs2
Reply to: