optimiser could be improved
>Submitter-Id: net
>Originator: Herbert Xu <herbert@gondor.apana.org.au>
>Organization: The Debian Project
>Confidential: no
>Synopsis:
>Severity: non-critical
>Priority: low
>Category: optimization
>Class: sw-bug
>Release: 3.1 (Debian) (Debian unstable)
>Environment:
System: Debian GNU/Linux (unstable)
Architecture: i686
Desired=Unknown/Install/Remove/Purge/Hold
| Status=Not/Installed/Config-files/Unpacked/Failed-config/Half-installed
|/ Err?=(none)/Hold/Reinst-required/X=both-problems (Status,Err: uppercase=bad)
||/ Name Version Description
+++-==============-==============-============================================
ii gcc-3.1 3.1-2 The GNU C compiler.
ii binutils 2.12.90.0.7-1 The GNU assembler, linker and binary utiliti
ii libc6 2.2.5-6 GNU C Library: Shared libraries and Timezone
host: i386-linux
configured with: /mnt/data/gcc-3.1/gcc-3.1-3.1ds2/src/configure -v --enable-languages=c,c++,java,f77,proto,objc,ada --prefix=/usr --mandir=$\(prefix\)/share/man --infodir=$\(prefix\)/share/info --with-gxx-include-dir=$\(prefix\)/include/g++-v3-3.1 --enable-shared --with-system-zlib --enable-long-long --enable-nls --without-included-gettext --enable-clocale=gnu --enable-threads=posix --enable-java-gc=boehm --enable-objc-gc i386-linux
>Description:
[ Reported to the Debian BTS as report #67206.
Please CC 67206@bugs.debian.org on replies.
Log of report can be found at http://bugs.debian.org/67206 ]
The bug submitter writes (assembly updated for gcc-3.1):
Here are a couple of things I noticed for i386 while trying to fix alpha
unaligned traps.
1. Combining neighbouring instructions:
-- source --
#include <time.h>
#include <sys/time.h>
char *p;
void b(struct timeval);
void a() {
struct timeval k;
#if 0
memcpy(&k, p, sizeof(k));
b(k);
#else
b(*(struct timeval *)p);
#endif
}
-- assembly (gcc -O2 -S) --
.file "bug-67206-1.c"
.text
.align 2
.p2align 2,,3
.globl a
.type a,@function
a:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
movl p, %eax
pushl 4(%eax)
pushl (%eax)
call b
leave
ret
.Lfe1:
.size a,.Lfe1-a
.comm p,4,4
.ident "GCC: (GNU) 3.1.1 20020531 (Debian prerelease)"
-- end --
The subl and the addl can be combined.
2. Removing unused local variables.
-- source --
#include <time.h>
#include <sys/time.h>
char *p;
void b(struct timeval);
void a() {
struct timeval k;
#if 1
memcpy(&k, p, sizeof(k));
b(k);
#else
b(*(struct timeval *)p);
#endif
}
-- assembly (gcc -O2 -S) --
.file "bug-67206-2.c"
.text
.align 2
.p2align 2,,3
.globl a
.type a,@function
a:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
movl p, %eax
movl (%eax), %edx
movl %edx, -8(%ebp)
movl 4(%eax), %eax
movl %eax, -4(%ebp)
pushl -4(%ebp)
pushl -8(%ebp)
call b
leave
ret
.Lfe1:
.size a,.Lfe1-a
.comm p,4,4
.ident "GCC: (GNU) 3.1.1 20020531 (Debian prerelease)"
-- end --
I'm not sure how hard this is to do, but storing k and retrieving it again
seems to be a waste.
3. register pressure causes gcc to overlook some simple optimisations
#define _GNU_SOURCE
#include <string.h>
extern char *n;
extern int a, b;
char *foo(const char *s) {
char *p;
p = n;
a = b;
while (*s) {
char *q = p;
size_t len1, len1p, len2, len2p;
len1 = strcspn(s, "'");
len2 = strspn(s + len1, "'");
len1p = len1 ? len1 + 2 : len1;
switch (len2) {
case 0:
len2p = 0;
break;
case 1:
len2p = 2;
break;
default:
len2p = len2 + 2;
}
if (len1) {
*p = '\'';
q = mempcpy(p + 1, s, len1);
*q++ = '\'';
s += len1;
}
switch (len2) {
case 0:
break;
case 1:
*q++ = '\\';
*q = '\'';
s++;
break;
default:
*q = '"';
*(char *) mempcpy(q + 1, s, len2) = '"';
s += len2;
}
}
return p;
}
-- assembly (gcc -O2 -S) --
.file "bug-67206-3.c"
.text
.align 2
.p2align 2,,3
.globl foo
.type foo,@function
foo:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $12, %esp
movl n, %eax
movl %eax, -16(%ebp)
movl b, %eax
movl 8(%ebp), %edi
movl %eax, a
movb (%edi), %al
testb %al, %al
je .L68
.p2align 2,,3
.L66:
xorl %ebx, %ebx
testb %al, %al
movl -16(%ebp), %ecx
je .L10
cmpb $39, %al
je .L10
.p2align 2,,3
.L13:
incl %ebx
movb (%ebx,%edi), %dl
testb %dl, %dl
je .L10
cmpb $39, %dl
jne .L13
.L10:
leal (%ebx,%edi), %edx
xorl %esi, %esi
cmpb $39, (%edx)
je .L38
.L73:
testl %ebx, %ebx
jne .L78
.L60:
testl %esi, %esi
je .L2
cmpl $1, %esi
je .L79
movb $34, (%ecx)
pushl %eax
pushl %esi
pushl %edi
leal 1(%ecx), %eax
pushl %eax
call __mempcpy
movb $34, (%eax)
addl %esi, %edi
addl $16, %esp
.L2:
movb (%edi), %al
testb %al, %al
jne .L66
.L68:
movl -16(%ebp), %eax
leal -12(%ebp), %esp
popl %ebx
popl %esi
popl %edi
leave
ret
.L79:
incl %edi
movb $92, (%ecx)
movb $39, 1(%ecx)
jmp .L2
.L78:
movl -16(%ebp), %eax
movb $39, (%eax)
pushl %edx
pushl %ebx
movl -16(%ebp), %eax
pushl %edi
incl %eax
pushl %eax
call __mempcpy
movl %eax, %ecx
movb $39, (%eax)
incl %ecx
addl %ebx, %edi
addl $16, %esp
jmp .L60
.p2align 2,,3
.L38:
incl %esi
cmpb $39, (%esi,%edx)
je .L38
jmp .L73
.Lfe1:
.size foo,.Lfe1-foo
.ident "GCC: (GNU) 3.1.1 20020531 (Debian prerelease)"
-- end --
Now n could have been moved to %edi in the first place to eliminate the
second reading from -16(%ebp). This is what happens when some of the code
at the end of the loop is removed which reduces register pressure.
>How-To-Repeat:
>Fix:
--
To UNSUBSCRIBE, email to debian-gcc-request@lists.debian.org
with a subject of "unsubscribe". Trouble? Contact listmaster@lists.debian.org
Reply to: