Bug#820831: gcc miscompiling memcpy
Package: gcc
Version: 4:4.9.2-2
I am observing what appears to be gcc miscompiling a memcpy call. In
the following transcript, note that the memcpy call should be writing
to the memory range [0x9ca17df, 0x9ca17e7), and yet the bytes in range
[0x9ca17e7, 0x9ca17e9) also seem to get modified.
-----
$ uname -a
Linux packer-debian-8-amd64 3.16.0-4-amd64 #1 SMP Debian
3.16.7-ckt25-1 (2016-03-06) x86_64 GNU/Linux
$ cat memcpytestcase.c
#include <stdint.h>
#include <stdio.h>
#include <string.h>
int main() {
uint8_t *mem = malloc(4096);
uint8_t *nrenames = (uint8_t*)mem, *mcp, *mcode;
uint32_t renames[4] = {0, 0, 0x1234, 0};
mcode = mcp = mem + 2015;
mcode[0] = mcode[1] = 0;
*nrenames = 2;
printf("Before %p: %02x %02x\n", mcode, mcode[0], mcode[1]);
if (*nrenames) {
unsigned sz = *nrenames * 4;
mcp -= sz;
printf("memcpy(%p, %p, %u);\n", mcp, renames, sz);
memcpy(mcp, renames, sz);
}
printf("After %p: %02x %02x\n", mcode, mcode[0], mcode[1]);
return 0;
}
$ gcc -m32 -O2 -march=i686 memcpytestcase.c && ./a.out
Before 0x9ca17e7: 00 00
memcpy(0x9ca17df, 0xffec6a20, 8);
After 0x9ca17e7: 34 12
-----
For reference, the asm output (via the -S flag to gcc) appears to be
the following. If there are at least 8 bytes to copy (in this case,
there are exactly 8 bytes to copy), then the code at L33 comes into
play. Said code aligns the destination pointer (%edx) via L36 and L37,
and then copies 8 bytes at a time via the loop at L6. The copy loop
always executes for at least one iteration, which leads to a problem
if there were initially exactly 8 bytes to copy and the alignment
logic already handled some of those 8.
-----
...
call printf // The "Before" printf
movzbl (%ebx), %eax
testb %al, %al
jne .L32
.L2:
movzbl 2016(%ebx), %eax
movl %esi, 4(%esp)
movl $.LC2, (%esp)
movl %eax, 12(%esp)
movzbl 2015(%ebx), %eax
movl %eax, 8(%esp)
call printf // The "After" printf
...
.L32:
.cfi_restore_state
leal 0(,%eax,4), %ecx
movl %esi, %edx
subl %ecx, %edx
leal 32(%esp), %edi
movl %ecx, 12(%esp)
movl %edx, 4(%esp)
movl %edi, 8(%esp)
movl $.LC1, (%esp)
movl %ecx, 28(%esp)
movl %edx, 24(%esp)
call printf
movl 28(%esp), %ecx
movl 24(%esp), %edx
movl %ecx, %eax
movl %edi, %ecx
cmpl $8, %eax
jnb .L33
.L3:
xorl %edi, %edi
testb $4, 28(%esp)
jne .L34
.L8:
testb $2, 28(%esp)
jne .L35
.L9:
testb $1, 28(%esp)
je .L2
movzbl (%ecx,%edi), %eax
movb %al, (%edx,%edi)
jmp .L2
.L35:
movzwl (%ecx,%edi), %eax
movw %ax, (%edx,%edi)
addl $2, %edi
jmp .L9
.L34:
movl (%ecx), %edi
movl %edi, (%edx)
movl $4, %edi
jmp .L8
.L33:
testb $1, %dl
jne .L36
.L4:
testb $2, %dl
jne .L37
.L5:
movl 28(%esp), %edi
andl $-8, %edi
movl %edi, 24(%esp)
xorl %edi, %edi
.L6:
movl (%ecx,%edi), %eax
movl %eax, (%edx,%edi)
movl 4(%ecx,%edi), %eax
movl %eax, 4(%edx,%edi)
addl $8, %edi
cmpl 24(%esp), %edi
jb .L6
addl %edi, %edx
addl %edi, %ecx
jmp .L3
.L37:
movzwl (%ecx), %edi
addl $2, %edx
addl $2, %ecx
movw %di, -2(%edx)
subl $2, 28(%esp)
jmp .L5
.L36:
movzbl (%edi), %ecx
incl %edx
movb %cl, -1(%edx)
leal 33(%esp), %ecx
decl 28(%esp)
jmp .L4
-----
Reply to: