Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations

To: 800318@bugs.debian.org, debian-mips@lists.debian.org
Subject: Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
From: Andreas Cadhalpun <andreas.cadhalpun@googlemail.com>
Date: Mon, 28 Sep 2015 18:17:15 +0200
Message-id: <[🔎] 5609680B.8020802@googlemail.com>
Reply-to: Andreas Cadhalpun <andreas.cadhalpun@googlemail.com>, 800318@bugs.debian.org
In-reply-to: <[🔎] 20150928124814.GA28915@aurel32.net>
References: <[🔎] 56083756.2010706@googlemail.com> <[🔎] 20150927211037.GA2689@aurel32.net> <[🔎] 560862E7.3050101@googlemail.com> <[🔎] 20150928124814.GA28915@aurel32.net>

Hi Aurelien,

On 28.09.2015 14:48, Aurelien Jarno wrote:
> On 2015-09-27 23:43, Andreas Cadhalpun wrote:
>> A slightly larger test case for mips is compiling ffmpeg...
> 
> It what I did to test if the failure is due to the above changes.
> ffmpeg builds fine with gcc version 5.2.1-17 and -march=mips2
> -mtune=mips32 (instead of the new default -march=mips32r2), and passes
> the testsuite. I'll see if I can isolate a smaller testcase so that we
> can understand the problem.

Attached is a testcase for mips, but it works fine for mipsel.

One more peculiarity I noticed is that adding 'c->A = 1;' to
an unused code path inverts the behavior: It works with
-fexpensive-optimizations, but fails with -fno-expensive-optimizations.

Best regards,
Andreas

#include <inttypes.h>
#include <stdio.h>

typedef struct Context {
    int64_t out;
    int64_t A;
    int64_t B;
    int C;
} Context;

void init_tables(Context *c, int *table, int brightness, int contrast, int saturation);

int main()
{
    Context c = { 0 };
    int table[3] = {104597, 132201, 25675};
    init_tables(&c, table, 0, 65536, 65536);

    fprintf(stderr, "TEST: 0x%"PRIX64"\n", c.out);
    if (c.out == 0x12a15) {
        printf("working\n");
        return 0;
    }
    printf("broken\n");
    return 1;
}

CFLAGS += -fPIC -O2
LDFLAGS += -shared

all: working broken
	LD_LIBRARY_PATH=. ./working || true
	LD_LIBRARY_PATH=. ./broken || true

working.o:
	$(CC) $(CFLAGS) -fno-expensive-optimizations -c -o working.o test.c

broken.o:
	$(CC) $(CFLAGS) -c -o broken.o test.c

libworking.so: working.o
	$(CC) $(LDFLAGS) -o libworking.so working.o

libbroken.so: broken.o
	$(CC) $(LDFLAGS) -o libbroken.so broken.o

working: libworking.so
	$(CC) -o working main.c -L. -lworking

broken: libbroken.so
	$(CC) -o broken main.c -L. -lbroken

clean:
	rm -f working.o libworking.so working broken.o libbroken.so broken

#include <inttypes.h>

typedef struct Context {
    int64_t out;
    int64_t A;
    int64_t B;
    int C;
} Context;

static uint8_t clip_uint8_c(int a)
{
    if (a&(~0xFF)) return (-a)>>31;
    else return a;
}
static void fill_table(uint8_t* table[256 + 2*256], const int elemsize,
                       const int64_t inc, void *y_tab)
{
    int i;
    uint8_t *y_table = y_tab;
    y_table -= elemsize * (inc >> 9);
    for (i = 0; i < 256 + 2*256; i++) {
        int64_t cb = clip_uint8_c(i-256)*inc;
        table[i] = y_table + elemsize * (cb >> 16);
    }
}
static void fill_gv_table(int table[256 + 2*256], const int elemsize, const int64_t inc)
{
    int i;
    int off = -(inc >> 9);
    for (i = 0; i < 256 + 2*256; i++) {
        int64_t cb = clip_uint8_c(i-256)*inc;
        table[i] = elemsize * (off + (cb >> 16));
    }
}
static uint16_t roundToInt16(int64_t f)
{
    int r = (f + (1 << 15)) >> 16;
    if (r < -0x7FFF)
        return 0x8000;
    else if (r > 0x7FFF)
        return 0x7FFF;
    else
        return r;
}

uint8_t yuvTable[1024];
int table_gV[256 + 2*256];
uint8_t *table_rV[256 + 2*256];
uint8_t *table_gU[256 + 2*256];
uint8_t *table_bU[256 + 2*256];
int64_t loc1;
int64_t loc2;
int64_t loc3;
int loc4;

__attribute__((cold)) void init_tables(Context *c, int *table, int brightness, int contrast, int saturation)
{
    int i;
    const int bpp = c->C;
    const int yoffs = brightness ? 384 : 326;
    int64_t crv = table[0];
    int64_t cbu = table[1];
    int64_t cgu = -table[0];
    int64_t cgv = -table[2];
    int64_t cy = 1 << 16;
    int64_t TEST = 1 << 16;
    int64_t oy = 0;
    int64_t yb = 0;
    if (!brightness) {
        cy = (cy * 255) / 219;
        TEST = (TEST * 255) / 219;
        oy = 16 << 16;
    } else {
        crv = (crv * 224) / 255;
        cbu = (cbu * 224) / 255;
        cgu = (cgu * 224) / 255;
        cgv = (cgv * 224) / 255;
    }
    cy = (cy * contrast) >> 16;
//    c->out = TEST;
    TEST = (TEST * contrast) >> 16;
    c->out = TEST;

    crv = (crv * contrast * saturation) >> 32;
    cbu = (cbu * contrast * saturation) >> 32;
    cgu = (cgu * contrast * saturation) >> 32;
    cgv = (cgv * contrast * saturation) >> 32;
    oy -= 256 * brightness;
    c->A = 1;
    c->B = roundToInt16(cy * 8192) + roundToInt16(crv * 8192);
    loc1 = roundToInt16(cgv * 8192);
    loc2 = roundToInt16(cgu * 8192);
    loc3 = roundToInt16(oy * 8);
    loc4 = roundToInt16(cy << 13) + roundToInt16(oy) + roundToInt16(crv << 13) 
           + roundToInt16(cgv << 13) + roundToInt16(cgu << 13) + roundToInt16(cbu << 13);
    crv = ((crv << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1));
    cbu = ((cbu << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1));
    cgu = ((cgu << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1));
    cgv = ((cgv << 16) + 0x8000) / ((cy) > (1) ? (cy) : (1));
    switch (bpp) {
    case 0:
        yb = -(384 << 16) - oy;
        for (i = 0; i < 1024; i++) {
            yuvTable[i] = clip_uint8_c(yb >> 16);
            yb += cy;
        }
        fill_table(table_rV, 1, crv, yuvTable);
        fill_table(table_gU, 1, cgu, yuvTable);
        fill_table(table_bU, 1, cbu, yuvTable);
        fill_gv_table(table_gV, 1, cgv);
        break;
    case 1:
        yb = -(384 << 16) - oy;
        for (i = 0; i < 1024; i++) {
            yuvTable[i] = clip_uint8_c(yb >> 16);
            yb += cy;
        }
//        c->A = 1; // uncomment to change broken/working
        fill_table(table_rV, 1, crv, yuvTable);
        fill_table(table_gU, 1, cgu, yuvTable);
        fill_table(table_bU, 1, cbu, yuvTable + yoffs);
        fill_gv_table(table_gV, 1, cgv);
        break;
    }
}

Reply to:

Follow-Ups:
- Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
  - From: Aurelien Jarno <aurelien@aurel32.net>

References:
- Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
  - From: Andreas Cadhalpun <andreas.cadhalpun@googlemail.com>
- Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
  - From: Aurelien Jarno <aurelien@aurel32.net>
- Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
  - From: Andreas Cadhalpun <andreas.cadhalpun@googlemail.com>
- Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
  - From: Aurelien Jarno <aurelien@aurel32.net>

Prev by Date: Help compiling code to 2.6.23 Kernel
Next by Date: Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
Previous by thread: Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
Next by thread: Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations
Index(es):
- Date
- Thread