grep & tar segfault - broken system
Dear all
Happy new year!
A series of events (recent apt-get upgrade & power failure), has brought
one of my servers (software RAID-1 with SiI3112 SATA controller)
to the following state:
[0] GREP segfaults:
helios:/# grep
Segmentation fault
helios:/#
An strace shows the following:
execve("/bin/grep", ["grep"], [/* 16 vars */]) = 0
uname({sys="Linux", node="helios", ...}) = 0
brk(0) = 0x805ac7c
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40017000
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=13447, ...}) = 0
old_mmap(NULL, 13447, PROT_READ, MAP_PRIVATE, 3, 0) = 0x40018000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
open("/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200^\1"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=1244080, ...}) = 0
old_mmap(NULL, 1254244, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x4001c000
old_mmap(0x40144000, 32768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x127000) = 0x40144000
old_mmap(0x4014c000, 9060, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x4014c000
close(3) = 0
munmap(0x40018000, 13447) = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++
[1] TAR segfaults too:
helios:/# tar -cf boot.tar boot/
Segmentation fault
helios:/#
Strace shows the following:
execve("/bin/tar", ["tar", "-cf", "boot.tar", "boot/"], [/* 15 vars */]) = 0
uname({sys="Linux", node="helios", ...}) = 0
brk(0) = 0x8070d58
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40017000
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=13447, ...}) = 0
old_mmap(NULL, 13447, PROT_READ, MAP_PRIVATE, 3, 0) = 0x40018000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
open("/lib/librt.so.1", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0@\33\0\000"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=26884, ...}) = 0
old_mmap(NULL, 69912, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x4001c000
old_mmap(0x40022000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x6000) = 0x40022000
old_mmap(0x40023000, 41240, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x40023000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
open("/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200^\1"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=1244080, ...}) = 0
old_mmap(NULL, 1254244, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x4002e000
old_mmap(0x40156000, 32768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x127000) = 0x40156000
old_mmap(0x4015e000, 9060, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x4015e000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
open("/lib/libpthread.so.0", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\340A\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=81127, ...}) = 0
old_mmap(NULL, 331716, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x40161000
old_mmap(0x4016e000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0xc000) = 0x4016e000
old_mmap(0x40170000, 270276, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x40170000
close(3) = 0
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x401b2000
munmap(0x40018000, 13447) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0
setrlimit(RLIMIT_STACK, {rlim_cur=2044*1024, rlim_max=RLIM_INFINITY}) = 0
getpid() = 1478
rt_sigaction(SIGRTMIN, {0x40169030, [], SA_RESTORER, 0x400575e8}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0x40169080, [], SA_RESTORER, 0x400575e8}, NULL, 8) = 0
rt_sigaction(SIGRT_2, {0x40169150, [], SA_RESTORER, 0x400575e8}, NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [RTMIN], NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RT_1], NULL, 8) = 0
_sysctl({{CTL_KERN, KERN_VERSION}, 2, 0xbffff804, 30, (nil), 0}) = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++
I can't apt-get upgrade anymore (although I see there is a new kernel package
out there), since grep is heavily used in the dpkg scripts:
helios:/# apt-get upgrade
Reading Package Lists... Done
Building Dependency Tree... Done
The following packages will be upgraded:
base-passwd debconf debconf-i18n discover1 kernel-image-2.4.27-1-386 libapache2-mod-php4 libdiscover1 libgcc1
libmysqlclient12 libtiff4 mailx modutils mysql-client mysql-common mysql-server php4 php4-cli php4-common php4-domxml
php4-gd php4-mysql phpmyadmin samba-common squid squid-common
25 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
Need to get 0B/24.9MB of archives.
After unpacking 1232kB of additional disk space will be used.
Do you want to continue? [Y/n]
Preconfiguring packages ...
/tmp/squid.config.17333: line 72: 1761 Segmentation fault grep -q "^cache_dir[$w]*[^/$w]" /etc/squid/squid.conf
/tmp/squid.config.17333: line 72: 1762 Segmentation fault grep -q '^dns_children\|^dns_defnames\|^cache_dns_program' $sq
/tmp/squid.config.17333: line 72: 1764 Segmentation fault grep -q '^cache_dir' $sq
/tmp/php4-mysql.config.17337: line 30: 1779 Segmentation fault grep -q "^[[:space:]]*extension[[:space:]]*=[[:space:]]*mysql.so" "/etc/php4/$SAPI/php.ini"
/tmp/php4-mysql.config.17337: line 30: 1781 Segmentation fault grep -q "^[[:space:]]*extension[[:space:]]*=[[:space:]]*mysql.so" "/etc/php4/$SAPI/php.ini"
/tmp/php4-domxml.config.17339: line 30: 1784 Segmentation fault grep -q "^[[:space:]]*extension[[:space:]]*=[[:space:]]*domxml.so" "/etc/php4/$SAPI/php.ini"
/tmp/php4-domxml.config.17339: line 30: 1786 Segmentation fault grep -q "^[[:space:]]*extension[[:space:]]*=[[:space:]]*domxml.so" "/etc/php4/$SAPI/php.ini"
/tmp/php4-gd.config.173311: line 30: 1789 Segmentation fault grep -q "^[[:space:]]*extension[[:space:]]*=[[:space:]]*gd.so" "/etc/php4/$SAPI/php.ini"
/tmp/php4-gd.config.173311: line 30: 1791 Segmentation fault grep -q "^[[:space:]]*extension[[:space:]]*=[[:space:]]*gd.so" "/etc/php4/$SAPI/php.ini"
/tmp/samba-common.config.173315: line 41: 1797 Segmentation fault grep -v dhcp.conf $FILE
1798 | grep -qEi '\\$|^[[:space:]]*include[[:space:]]*='
/tmp/samba-common.config.173315: line 154: 1821 Segmentation fault grep -q -i 'wins server' $FILE
dpkg-deb: subprocess tar killed by signal (Segmentation fault)
dpkg: error processing /var/cache/apt/archives/base-passwd_3.5.9_i386.deb (--unpack):
subprocess dpkg-deb --control returned error exit status 2
Errors were encountered while processing:
/var/cache/apt/archives/base-passwd_3.5.9_i386.deb
E: Sub-process /usr/bin/dpkg returned an error code (1)
helios:/#
Some information about the machine:
helios:~# cat /proc/cpuinfo
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 7
model name : Pentium III (Katmai)
stepping : 3
cpu MHz : 400.913
cache size : 512 KB
fdiv_bug : no
hlt_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 2
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx fxsr sse
bogomips : 799.53
helios:~# free -m
total used free shared buffers cached
Mem: 504 210 293 0 15 147
-/+ buffers/cache: 48 456
Swap: 525 0 525
helios:~# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/md1 4.6G 404M 4.0G 10% /
/dev/md3 71G 2.9G 68G 5% /srv
helios:~# cat /proc/mdstat
Personalities : [raid1]
read_ahead 1024 sectors
md2 : active raid1 ide/host2/bus1/target0/lun0/part2[0] ide/host2/bus0/target0/lun0/part2[1]
538112 blocks [2/2] [UU]
md3 : active raid1 ide/host2/bus1/target0/lun0/part3[0] ide/host2/bus0/target0/lun0/part3[1]
74613824 blocks [2/2] [UU]
md1 : active raid1 ide/host2/bus1/target0/lun0/part1[0] ide/host2/bus0/target0/lun0/part1[1]
4883648 blocks [2/2] [UU]
unused devices: <none>
helios:~# lsmod
Module Size Used by Not tainted
nls_cp437 4284 0 (autoclean)
sd_mod 10764 0 (autoclean) (unused)
scsi_mod 86020 1 (autoclean) [sd_mod]
usb-uhci 19504 0 (unused)
usbcore 52268 1 [usb-uhci]
rivafb 36144 63
fbcon-cfb16 3752 0 [rivafb]
fbcon-cfb32 3656 0 [rivafb]
fbcon-cfb8 3176 0 [rivafb]
cmpci 25164 1
soundcore 3268 2 [cmpci]
gameport 1388 0 [cmpci]
8139too 12328 1
mii 1952 0 [8139too]
crc32 2848 0 [8139too]
agpgart 39108 0 (unused)
ide-cd 27040 0
cdrom 26212 0 [ide-cd]
rtc 5768 0 (autoclean)
ide-detect 288 0 (autoclean) (unused)
siimage 6916 1 (autoclean)
piix 7784 1 (autoclean)
ide-disk 12416 6 (autoclean)
ide-core 91800 6 (autoclean) [ide-cd ide-detect siimage piix ide-disk]
unix 12720 13 (autoclean)
raid1 11216 3 (autoclean)
md 54880 6 (autoclean) [raid1]
ext3 65388 2 (autoclean)
jbd 34628 2 (autoclean) [ext3]
helios:~# uname -a
Linux helios 2.4.27-1-386 #1 Fri Sep 3 06:24:46 UTC 2004 i686 GNU/Linux
What can I do to fix this?
-A
Reply to: