[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

optimiser could be improved



>Submitter-Id:	net
>Originator:	Herbert Xu <herbert@gondor.apana.org.au>
>Organization:	The Debian Project
>Confidential:	no
>Synopsis:	
>Severity:	non-critical
>Priority:	low
>Category:	optimization
>Class:		sw-bug
>Release:	3.1 (Debian) (Debian unstable)
>Environment:
System: Debian GNU/Linux (unstable)
Architecture: i686
	
Desired=Unknown/Install/Remove/Purge/Hold
| Status=Not/Installed/Config-files/Unpacked/Failed-config/Half-installed
|/ Err?=(none)/Hold/Reinst-required/X=both-problems (Status,Err: uppercase=bad)
||/ Name           Version        Description
+++-==============-==============-============================================
ii  gcc-3.1        3.1-2          The GNU C compiler.
ii  binutils       2.12.90.0.7-1  The GNU assembler, linker and binary utiliti
ii  libc6          2.2.5-6        GNU C Library: Shared libraries and Timezone
host: i386-linux
configured with: /mnt/data/gcc-3.1/gcc-3.1-3.1ds2/src/configure -v --enable-languages=c,c++,java,f77,proto,objc,ada --prefix=/usr --mandir=$\(prefix\)/share/man --infodir=$\(prefix\)/share/info --with-gxx-include-dir=$\(prefix\)/include/g++-v3-3.1 --enable-shared --with-system-zlib --enable-long-long --enable-nls --without-included-gettext --enable-clocale=gnu --enable-threads=posix --enable-java-gc=boehm --enable-objc-gc i386-linux
>Description:
[ Reported to the Debian BTS as report #67206.
  Please CC 67206@bugs.debian.org on replies.
  Log of report can be found at http://bugs.debian.org/67206 ]

The bug submitter writes (assembly updated for gcc-3.1):

Here are a couple of things I noticed for i386 while trying to fix alpha
unaligned traps.

1. Combining neighbouring instructions:

-- source --
#include <time.h>
#include <sys/time.h>

char *p;

void b(struct timeval);

void a() {
	struct timeval k;
#if 0
	memcpy(&k, p, sizeof(k));
	b(k);
#else
	b(*(struct timeval *)p);
#endif
}
-- assembly (gcc -O2 -S) --
        .file   "bug-67206-1.c"
        .text
        .align 2
        .p2align 2,,3
.globl a
        .type   a,@function
a:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $16, %esp
        movl    p, %eax
        pushl   4(%eax)
        pushl   (%eax)
        call    b
        leave
        ret
.Lfe1:
        .size   a,.Lfe1-a
        .comm   p,4,4
        .ident  "GCC: (GNU) 3.1.1 20020531 (Debian prerelease)"
-- end --

The subl and the addl can be combined.

2. Removing unused local variables.

-- source --
#include <time.h>
#include <sys/time.h>

char *p;

void b(struct timeval);

void a() {
	struct timeval k;
#if 1
	memcpy(&k, p, sizeof(k));
	b(k);
#else
	b(*(struct timeval *)p);
#endif
}
-- assembly (gcc -O2 -S) --
        .file   "bug-67206-2.c"
        .text
        .align 2
        .p2align 2,,3
.globl a
        .type   a,@function
a:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $16, %esp
        movl    p, %eax
        movl    (%eax), %edx
        movl    %edx, -8(%ebp)
        movl    4(%eax), %eax
        movl    %eax, -4(%ebp)
        pushl   -4(%ebp)
        pushl   -8(%ebp)
        call    b
        leave
        ret
.Lfe1:
        .size   a,.Lfe1-a
        .comm   p,4,4
        .ident  "GCC: (GNU) 3.1.1 20020531 (Debian prerelease)"
-- end --

I'm not sure how hard this is to do, but storing k and retrieving it again
seems to be a waste.

3. register pressure causes gcc to overlook some simple optimisations
#define _GNU_SOURCE
#include <string.h>

extern char *n;
extern int a, b;

char *foo(const char *s) {
	char *p;

	p = n;
	a = b;

	while (*s) {
		char *q = p;
		size_t len1, len1p, len2, len2p;

		len1 = strcspn(s, "'");
		len2 = strspn(s + len1, "'");

		len1p = len1 ? len1 + 2 : len1;
		switch (len2) {
		case 0:
			len2p = 0;
			break;
		case 1:
			len2p = 2;
			break;
		default:
			len2p = len2 + 2;
		}

		if (len1) {
			*p = '\'';
			q = mempcpy(p + 1, s, len1);
			*q++ = '\'';
			s += len1;
		}

		switch (len2) {
		case 0:
			break;
		case 1:
			*q++ = '\\';
			*q = '\'';
			s++;
			break;
		default:
			*q = '"';
			*(char *) mempcpy(q + 1, s, len2) = '"';
			s += len2;
		}

	}

	return p;
}

-- assembly (gcc -O2 -S) --

        .file   "bug-67206-3.c"
        .text
        .align 2
        .p2align 2,,3
.globl foo
        .type   foo,@function
foo:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $12, %esp
        movl    n, %eax
        movl    %eax, -16(%ebp)
        movl    b, %eax
        movl    8(%ebp), %edi
        movl    %eax, a
        movb    (%edi), %al
        testb   %al, %al
        je      .L68
        .p2align 2,,3
.L66:
        xorl    %ebx, %ebx
        testb   %al, %al
        movl    -16(%ebp), %ecx
        je      .L10
        cmpb    $39, %al
        je      .L10
        .p2align 2,,3
.L13:
        incl    %ebx
        movb    (%ebx,%edi), %dl
        testb   %dl, %dl
        je      .L10
        cmpb    $39, %dl
        jne     .L13
.L10:
        leal    (%ebx,%edi), %edx
        xorl    %esi, %esi
        cmpb    $39, (%edx)
        je      .L38
.L73:
        testl   %ebx, %ebx
        jne     .L78
.L60:
        testl   %esi, %esi
        je      .L2
        cmpl    $1, %esi
        je      .L79
        movb    $34, (%ecx)
        pushl   %eax
        pushl   %esi
        pushl   %edi
        leal    1(%ecx), %eax
        pushl   %eax
        call    __mempcpy
        movb    $34, (%eax)
        addl    %esi, %edi
        addl    $16, %esp
.L2:
        movb    (%edi), %al
        testb   %al, %al
        jne     .L66
.L68:
        movl    -16(%ebp), %eax
        leal    -12(%ebp), %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        leave
        ret
.L79:
        incl    %edi
        movb    $92, (%ecx)
        movb    $39, 1(%ecx)
        jmp     .L2
.L78:
        movl    -16(%ebp), %eax
        movb    $39, (%eax)
        pushl   %edx
        pushl   %ebx
        movl    -16(%ebp), %eax
        pushl   %edi
        incl    %eax
        pushl   %eax
        call    __mempcpy
        movl    %eax, %ecx
        movb    $39, (%eax)
        incl    %ecx
        addl    %ebx, %edi
        addl    $16, %esp
        jmp     .L60
        .p2align 2,,3
.L38:
        incl    %esi
        cmpb    $39, (%esi,%edx)
        je      .L38
        jmp     .L73
.Lfe1:
        .size   foo,.Lfe1-foo
        .ident  "GCC: (GNU) 3.1.1 20020531 (Debian prerelease)"
 -- end --

Now n could have been moved to %edi in the first place to eliminate the
second reading from -16(%ebp).  This is what happens when some of the code
at the end of the loop is removed which reduces register pressure.

	
>How-To-Repeat:
	
>Fix:
	


-- 
To UNSUBSCRIBE, email to debian-gcc-request@lists.debian.org
with a subject of "unsubscribe". Trouble? Contact listmaster@lists.debian.org



Reply to: