Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
Package: gcc-5
Version: 5.2.1-17
Severity: serious
Justification: causes ffmpeg to FTBFS
Control: affects -1 ffmpeg
X-Debbugs-Cc: Aurelien Jarno <aurel32@debian.org>, debian-mips@lists.debian.org
Dear Maintainer,
ffmpeg 7:2.8-1 failed to build on mips/mipsel due to test failures.
I investigated the problem and it turns out to be caused by a compiler bug
that can be avoided by using '-fno-expensive-optimizations'.
Attached is a reduced test case:
$ ls
Makefile main.c test.c
$ make
cc -fPIC -O2 -fno-expensive-optimizations -c -o working.o test.c
cc -shared -o libworking.so working.o
cc -o working main.c -L. -lworking
cc -fPIC -O2 -c -o broken.o test.c
cc -shared -o libbroken.so broken.o
cc -o broken main.c -L. -lbroken
LD_LIBRARY_PATH=. ./working || true
TEST: 0x12A15
working
LD_LIBRARY_PATH=. ./broken || true
TEST: 0x1FFFF
broken
This works correctly with gcc-5 5.2.1-16, so it is a regression in 5.2.1-17.
The changelog reveals:
[ Aurelien Jarno ]
* Use --with-mips-plt on mips*.
* Build for R2 ISA on mips, mips64 and mips64el.
* Optimize for R2 ISA on mipsel.
* Only apply mips-fix-loongson2f-nop on mipsel.
I assume one of these changes is responsible for this regression.
Best regards,
Andreas
-- System Information:
Debian Release: stretch/sid
APT prefers unstable
APT policy: (500, 'unstable')
Architecture: mipsel (mips)
Kernel: Linux 4.2.0-trunk-amd64 (SMP w/4 CPU cores)
Locale: LANG=C, LC_CTYPE=C (charmap=ANSI_X3.4-1968) (ignored: LC_ALL set to C)
Shell: /bin/sh linked to /bin/dash
Init: unable to detect
Versions of packages gcc-5 depends on:
ii binutils 2.25.1-3
ii cpp-5 5.2.1-17
ii gcc-5-base 5.2.1-17
ii libc6 2.19-22
ii libcc1-0 5.2.1-17
ii libgcc-5-dev 5.2.1-17
ii libgcc1 1:5.2.1-17
ii libgmp10 2:6.0.0+dfsg-7
ii libisl13 0.14-2
ii libmpc3 1.0.3-1
ii libmpfr4 3.1.3-1
ii libstdc++6 5.2.1-17
ii zlib1g 1:1.2.8.dfsg-2+b1
Versions of packages gcc-5 recommends:
ii libc6-dev 2.19-22
Versions of packages gcc-5 suggests:
pn gcc-5-doc <none>
pn gcc-5-locales <none>
pn gcc-5-multilib <none>
pn libasan2-dbg <none>
pn libatomic1-dbg <none>
pn libcilkrts5-dbg <none>
pn libgcc1-dbg <none>
pn libgomp1-dbg <none>
pn libitm1-dbg <none>
pn liblsan0-dbg <none>
pn libmpx0-dbg <none>
pn libquadmath-dbg <none>
pn libtsan0-dbg <none>
pn libubsan0-dbg <none>
-- no debconf information
#include <inttypes.h>
#include <stdio.h>
typedef struct Context {
int64_t out;
int64_t A;
int64_t B;
int64_t C;
int D;
} Context;
void init_tables(Context *c, int *table, int brightness, int contrast, int saturation);
int main()
{
int64_t out = 0;
Context c = { 0 };
int table[3] = {104597, 132201, 25675};
init_tables(&c, table, 0, 65536, 65536);
fprintf(stderr, "TEST: 0x%"PRIX64"\n", c.out);
if (c.out == 0x12A15) {
printf("working\n");
return 0;
}
printf("broken\n");
return 1;
}
CFLAGS += -fPIC -O2
LDFLAGS += -shared
all: working broken
LD_LIBRARY_PATH=. ./working || true
LD_LIBRARY_PATH=. ./broken || true
working.o:
$(CC) $(CFLAGS) -fno-expensive-optimizations -c -o working.o test.c
broken.o:
$(CC) $(CFLAGS) -c -o broken.o test.c
libworking.so: working.o
$(CC) $(LDFLAGS) -o libworking.so working.o
libbroken.so: broken.o
$(CC) $(LDFLAGS) -o libbroken.so broken.o
working: libworking.so
$(CC) -o working main.c -L. -lworking
broken: libbroken.so
$(CC) -o broken main.c -L. -lbroken
clean:
rm -f working.o libworking.so working broken.o libbroken.so broken
#include <inttypes.h>
typedef struct Context {
int64_t out;
int64_t A;
int64_t B;
int64_t C;
int D;
} Context;
static uint8_t clip_uint8_c(int a)
{
if (a&(~0xFF)) return (-a)>>31;
else return a;
}
static void fill_table(uint8_t* table[256 + 2*256], const int elemsize,
const int64_t inc, void *y_tab)
{
int i;
uint8_t *y_table = y_tab;
y_table -= elemsize * (inc >> 9);
for (i = 0; i < 256 + 2*256; i++) {
int64_t cb = clip_uint8_c(i-256)*inc;
table[i] = y_table + elemsize * (cb >> 16);
}
}
static void fill_gv_table(int table[256 + 2*256], const int elemsize, const int64_t inc)
{
int i;
int off = -(inc >> 9);
for (i = 0; i < 256 + 2*256; i++) {
int64_t cb = clip_uint8_c(i-256)*inc;
table[i] = elemsize * (off + (cb >> 16));
}
}
static uint16_t roundToInt16(int64_t f)
{
int r = (f + (1 << 15)) >> 16;
if (r < -0x7FFF)
return 0x8000;
else if (r > 0x7FFF)
return 0x7FFF;
else
return r;
}
uint8_t yuvTable[1024];
int table_gV[256 + 2*256];
uint8_t *table_rV[256 + 2*256];
uint8_t *table_gU[256 + 2*256];
uint8_t *table_bU[256 + 2*256];
__attribute__((cold)) void init_tables(Context *c, int *table, int brightness, int contrast, int saturation)
{
int i, dst, base, rbase;
const int bpp = c->D;
const int yoffs = brightness ? 384 : 326;
int64_t crv = table[0];
int64_t cbu = table[1];
int64_t cgu = -table[0];
int64_t cgv = -table[2];
int64_t cy = 1 << 16;
int64_t TEST = 1 << 16; // 0x10000
int64_t oy = 0;
int64_t yb = -26214400;
if (!brightness) {
cy = (cy * 255) / 219;
TEST = (TEST * 255) / 219; // 0x12A15
oy = 16 << 16;
} else {
crv = (crv * 224) / 255;
cbu = (cbu * 224) / 255;
cgu = (cgu * 224) / 255;
cgv = (cgv * 224) / 255;
}
// c->out = TEST; // 0x12A15
TEST = (TEST * contrast) >> 16;
c->out = TEST; // working: 0x12A15 / broken: 0x1FFFF
// The code below seems unrelated, but without it the bug does not occur.
cy = (cy * contrast) >> 16;
crv = (crv * contrast * contrast) >> 32;
cbu = (cbu * contrast * contrast) >> 32;
cgu = (cgu * contrast * saturation) >> 32;
cgv = (cgv * contrast * saturation) >> 32;
oy -= brightness;
c->A = roundToInt16(cy * 2) + roundToInt16(crv);
c->B = roundToInt16(cgv) + roundToInt16(cgu);
c->C = roundToInt16(oy);
dst = roundToInt16(cbu);
crv = (crv * (1 << 16)) / ((cy) > (1) ? (cy) : (1));
cbu = (cbu * (1 << 16)) / ((cy) > (1) ? (cy) : (1));
cgu = (cgu * (1 << 16)) / ((cy) > (1) ? (cy) : (1));
cgv = (cgv * (1 << 16)) / ((cy) > (1) ? (cy) : (1));
switch (bpp) {
case 0:
for (i = 0; i < 1024; i++) {
yuvTable[i] = clip_uint8_c((yb + 0x8000) >> 16);
yb += cy;
}
fill_table(table_bU, 1, cbu, yuvTable + 326);
fill_gv_table(table_gV, 1, cgv);
break;
case 1:
base = (dst == 0) ? 8 : 0;
rbase = base + (brightness ? 16 : 0);
yb = -(384 << 16) - oy;
for (i = 0; i < 1024; i++) {
unsigned yval = clip_uint8_c(yb >> 16);
yuvTable[i] = (yval << rbase) + (255u << base) + (yval << base);
yb += cy;
}
fill_table(table_rV, 4, crv, yuvTable);
fill_table(table_gU, 4, cgu, yuvTable);
fill_table(table_bU, 4, cbu, yuvTable + yoffs);
c->D = 0;
fill_gv_table(table_gV, 4, cgv);
break;
}
}
Reply to: