Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
Hi Aurelien,
On 28.09.2015 14:48, Aurelien Jarno wrote:
> On 2015-09-27 23:43, Andreas Cadhalpun wrote:
>> A slightly larger test case for mips is compiling ffmpeg...
>
> It what I did to test if the failure is due to the above changes.
> ffmpeg builds fine with gcc version 5.2.1-17 and -march=mips2
> -mtune=mips32 (instead of the new default -march=mips32r2), and passes
> the testsuite. I'll see if I can isolate a smaller testcase so that we
> can understand the problem.
Attached is a testcase for mips, but it works fine for mipsel.
One more peculiarity I noticed is that adding 'c->A = 1;' to
an unused code path inverts the behavior: It works with
-fexpensive-optimizations, but fails with -fno-expensive-optimizations.
Best regards,
Andreas
#include <inttypes.h>
#include <stdio.h>
typedef struct Context {
int64_t out;
int64_t A;
int64_t B;
int C;
} Context;
void init_tables(Context *c, int *table, int brightness, int contrast, int saturation);
int main()
{
Context c = { 0 };
int table[3] = {104597, 132201, 25675};
init_tables(&c, table, 0, 65536, 65536);
fprintf(stderr, "TEST: 0x%"PRIX64"\n", c.out);
if (c.out == 0x12a15) {
printf("working\n");
return 0;
}
printf("broken\n");
return 1;
}
CFLAGS += -fPIC -O2
LDFLAGS += -shared
all: working broken
LD_LIBRARY_PATH=. ./working || true
LD_LIBRARY_PATH=. ./broken || true
working.o:
$(CC) $(CFLAGS) -fno-expensive-optimizations -c -o working.o test.c
broken.o:
$(CC) $(CFLAGS) -c -o broken.o test.c
libworking.so: working.o
$(CC) $(LDFLAGS) -o libworking.so working.o
libbroken.so: broken.o
$(CC) $(LDFLAGS) -o libbroken.so broken.o
working: libworking.so
$(CC) -o working main.c -L. -lworking
broken: libbroken.so
$(CC) -o broken main.c -L. -lbroken
clean:
rm -f working.o libworking.so working broken.o libbroken.so broken
#include <inttypes.h>
typedef struct Context {
int64_t out;
int64_t A;
int64_t B;
int C;
} Context;
static uint8_t clip_uint8_c(int a)
{
if (a&(~0xFF)) return (-a)>>31;
else return a;
}
static void fill_table(uint8_t* table[256 + 2*256], const int elemsize,
const int64_t inc, void *y_tab)
{
int i;
uint8_t *y_table = y_tab;
y_table -= elemsize * (inc >> 9);
for (i = 0; i < 256 + 2*256; i++) {
int64_t cb = clip_uint8_c(i-256)*inc;
table[i] = y_table + elemsize * (cb >> 16);
}
}
static void fill_gv_table(int table[256 + 2*256], const int elemsize, const int64_t inc)
{
int i;
int off = -(inc >> 9);
for (i = 0; i < 256 + 2*256; i++) {
int64_t cb = clip_uint8_c(i-256)*inc;
table[i] = elemsize * (off + (cb >> 16));
}
}
static uint16_t roundToInt16(int64_t f)
{
int r = (f + (1 << 15)) >> 16;
if (r < -0x7FFF)
return 0x8000;
else if (r > 0x7FFF)
return 0x7FFF;
else
return r;
}
uint8_t yuvTable[1024];
int table_gV[256 + 2*256];
uint8_t *table_rV[256 + 2*256];
uint8_t *table_gU[256 + 2*256];
uint8_t *table_bU[256 + 2*256];
int64_t loc1;
int64_t loc2;
int64_t loc3;
int loc4;
__attribute__((cold)) void init_tables(Context *c, int *table, int brightness, int contrast, int saturation)
{
int i;
const int bpp = c->C;
const int yoffs = brightness ? 384 : 326;
int64_t crv = table[0];
int64_t cbu = table[1];
int64_t cgu = -table[0];
int64_t cgv = -table[2];
int64_t cy = 1 << 16;
int64_t TEST = 1 << 16;
int64_t oy = 0;
int64_t yb = 0;
if (!brightness) {
cy = (cy * 255) / 219;
TEST = (TEST * 255) / 219;
oy = 16 << 16;
} else {
crv = (crv * 224) / 255;
cbu = (cbu * 224) / 255;
cgu = (cgu * 224) / 255;
cgv = (cgv * 224) / 255;
}
cy = (cy * contrast) >> 16;
// c->out = TEST;
TEST = (TEST * contrast) >> 16;
c->out = TEST;
crv = (crv * contrast * saturation) >> 32;
cbu = (cbu * contrast * saturation) >> 32;
cgu = (cgu * contrast * saturation) >> 32;
cgv = (cgv * contrast * saturation) >> 32;
oy -= 256 * brightness;
c->A = 1;
c->B = roundToInt16(cy * 8192) + roundToInt16(crv * 8192);
loc1 = roundToInt16(cgv * 8192);
loc2 = roundToInt16(cgu * 8192);
loc3 = roundToInt16(oy * 8);
loc4 = roundToInt16(cy << 13) + roundToInt16(oy) + roundToInt16(crv << 13)
+ roundToInt16(cgv << 13) + roundToInt16(cgu << 13) + roundToInt16(cbu << 13);
crv = ((crv << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1));
cbu = ((cbu << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1));
cgu = ((cgu << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1));
cgv = ((cgv << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1));
switch (bpp) {
case 0:
yb = -(384 << 16) - oy;
for (i = 0; i < 1024; i++) {
yuvTable[i] = clip_uint8_c(yb >> 16);
yb += cy;
}
fill_table(table_rV, 1, crv, yuvTable);
fill_table(table_gU, 1, cgu, yuvTable);
fill_table(table_bU, 1, cbu, yuvTable);
fill_gv_table(table_gV, 1, cgv);
break;
case 1:
yb = -(384 << 16) - oy;
for (i = 0; i < 1024; i++) {
yuvTable[i] = clip_uint8_c(yb >> 16);
yb += cy;
}
// c->A = 1; // uncomment to change broken/working
fill_table(table_rV, 1, crv, yuvTable);
fill_table(table_gU, 1, cgu, yuvTable);
fill_table(table_bU, 1, cbu, yuvTable + yoffs);
fill_gv_table(table_gV, 1, cgv);
break;
}
}
Reply to: