[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#800318: gcc-5: [mips,mipsel] regression: miscompilation caused by -fexpensive-optimizations



Package: gcc-5
Version: 5.2.1-17
Severity: serious
Justification: causes ffmpeg to FTBFS
Control: affects -1 ffmpeg
X-Debbugs-Cc: Aurelien Jarno <aurel32@debian.org>, debian-mips@lists.debian.org


Dear Maintainer,

ffmpeg 7:2.8-1 failed to build on mips/mipsel due to test failures.

I investigated the problem and it turns out to be caused by a compiler bug
that can be avoided by using '-fno-expensive-optimizations'.

Attached is a reduced test case:
$ ls
Makefile  main.c  test.c
$ make
cc -fPIC -O2 -fno-expensive-optimizations -c -o working.o test.c
cc -shared -o libworking.so working.o
cc -o working main.c -L. -lworking
cc -fPIC -O2 -c -o broken.o test.c
cc -shared -o libbroken.so broken.o
cc -o broken main.c -L. -lbroken
LD_LIBRARY_PATH=. ./working || true
TEST: 0x12A15
working
LD_LIBRARY_PATH=. ./broken || true
TEST: 0x1FFFF
broken

This works correctly with gcc-5 5.2.1-16, so it is a regression in 5.2.1-17.
The changelog reveals:
  [ Aurelien Jarno ]
  * Use --with-mips-plt on mips*.
  * Build for R2 ISA on mips, mips64 and mips64el.
  * Optimize for R2 ISA on mipsel.
  * Only apply mips-fix-loongson2f-nop on mipsel.

I assume one of these changes is responsible for this regression.

Best regards,
Andreas


-- System Information:
Debian Release: stretch/sid
  APT prefers unstable
  APT policy: (500, 'unstable')
Architecture: mipsel (mips)

Kernel: Linux 4.2.0-trunk-amd64 (SMP w/4 CPU cores)
Locale: LANG=C, LC_CTYPE=C (charmap=ANSI_X3.4-1968) (ignored: LC_ALL set to C)
Shell: /bin/sh linked to /bin/dash
Init: unable to detect

Versions of packages gcc-5 depends on:
ii  binutils      2.25.1-3
ii  cpp-5         5.2.1-17
ii  gcc-5-base    5.2.1-17
ii  libc6         2.19-22
ii  libcc1-0      5.2.1-17
ii  libgcc-5-dev  5.2.1-17
ii  libgcc1       1:5.2.1-17
ii  libgmp10      2:6.0.0+dfsg-7
ii  libisl13      0.14-2
ii  libmpc3       1.0.3-1
ii  libmpfr4      3.1.3-1
ii  libstdc++6    5.2.1-17
ii  zlib1g        1:1.2.8.dfsg-2+b1

Versions of packages gcc-5 recommends:
ii  libc6-dev  2.19-22

Versions of packages gcc-5 suggests:
pn  gcc-5-doc        <none>
pn  gcc-5-locales    <none>
pn  gcc-5-multilib   <none>
pn  libasan2-dbg     <none>
pn  libatomic1-dbg   <none>
pn  libcilkrts5-dbg  <none>
pn  libgcc1-dbg      <none>
pn  libgomp1-dbg     <none>
pn  libitm1-dbg      <none>
pn  liblsan0-dbg     <none>
pn  libmpx0-dbg      <none>
pn  libquadmath-dbg  <none>
pn  libtsan0-dbg     <none>
pn  libubsan0-dbg    <none>

-- no debconf information
#include <inttypes.h>
#include <stdio.h>

typedef struct Context {
    int64_t out;
    int64_t A;
    int64_t B;
    int64_t C;
    int D;
} Context;

void init_tables(Context *c, int *table, int brightness, int contrast, int saturation);

int main()
{
    int64_t out = 0;
    Context c = { 0 };
    int table[3] = {104597, 132201, 25675};
    init_tables(&c, table, 0, 65536, 65536);

    fprintf(stderr, "TEST: 0x%"PRIX64"\n", c.out);
    if (c.out == 0x12A15) {
        printf("working\n");
        return 0;
    }
    printf("broken\n");
    return 1;
}
CFLAGS += -fPIC -O2
LDFLAGS += -shared

all: working broken
	LD_LIBRARY_PATH=. ./working || true
	LD_LIBRARY_PATH=. ./broken || true

working.o:
	$(CC) $(CFLAGS) -fno-expensive-optimizations -c -o working.o test.c

broken.o:
	$(CC) $(CFLAGS) -c -o broken.o test.c

libworking.so: working.o
	$(CC) $(LDFLAGS) -o libworking.so working.o

libbroken.so: broken.o
	$(CC) $(LDFLAGS) -o libbroken.so broken.o

working: libworking.so
	$(CC) -o working main.c -L. -lworking

broken: libbroken.so
	$(CC) -o broken main.c -L. -lbroken

clean:
	rm -f working.o libworking.so working broken.o libbroken.so broken
#include <inttypes.h>

typedef struct Context {
    int64_t out;
    int64_t A;
    int64_t B;
    int64_t C;
    int D;
} Context;

static uint8_t clip_uint8_c(int a)
{
    if (a&(~0xFF)) return (-a)>>31;
    else return a;
}
static void fill_table(uint8_t* table[256 + 2*256], const int elemsize,
                       const int64_t inc, void *y_tab)
{
    int i;
    uint8_t *y_table = y_tab;
    y_table -= elemsize * (inc >> 9);
    for (i = 0; i < 256 + 2*256; i++) {
        int64_t cb = clip_uint8_c(i-256)*inc;
        table[i] = y_table + elemsize * (cb >> 16);
    }
}
static void fill_gv_table(int table[256 + 2*256], const int elemsize, const int64_t inc)
{
    int i;
    int off = -(inc >> 9);
    for (i = 0; i < 256 + 2*256; i++) {
        int64_t cb = clip_uint8_c(i-256)*inc;
        table[i] = elemsize * (off + (cb >> 16));
    }
}
static uint16_t roundToInt16(int64_t f)
{
    int r = (f + (1 << 15)) >> 16;
    if (r < -0x7FFF)
        return 0x8000;
    else if (r > 0x7FFF)
        return 0x7FFF;
    else
        return r;
}

uint8_t yuvTable[1024];
int table_gV[256 + 2*256];
uint8_t *table_rV[256 + 2*256];
uint8_t *table_gU[256 + 2*256];
uint8_t *table_bU[256 + 2*256];

__attribute__((cold)) void init_tables(Context *c, int *table, int brightness, int contrast, int saturation)
{
    int i, dst, base, rbase;
    const int bpp = c->D;
    const int yoffs = brightness ? 384 : 326;
    int64_t crv = table[0];
    int64_t cbu = table[1];
    int64_t cgu = -table[0];
    int64_t cgv = -table[2];
    int64_t cy = 1 << 16;
    int64_t TEST = 1 << 16; // 0x10000
    int64_t oy = 0;
    int64_t yb = -26214400;
    if (!brightness) {
        cy = (cy * 255) / 219;
        TEST = (TEST * 255) / 219; // 0x12A15
        oy = 16 << 16;
    } else {
        crv = (crv * 224) / 255;
        cbu = (cbu * 224) / 255;
        cgu = (cgu * 224) / 255;
        cgv = (cgv * 224) / 255;
    }

//    c->out = TEST; // 0x12A15
    TEST = (TEST * contrast) >> 16;
    c->out = TEST; // working: 0x12A15 / broken: 0x1FFFF

//  The code below seems unrelated, but without it the bug does not occur.

    cy = (cy * contrast) >> 16;
    crv = (crv * contrast * contrast) >> 32;
    cbu = (cbu * contrast * contrast) >> 32;
    cgu = (cgu * contrast * saturation) >> 32;
    cgv = (cgv * contrast * saturation) >> 32;
    oy -= brightness;
    c->A = roundToInt16(cy * 2) + roundToInt16(crv);
    c->B = roundToInt16(cgv) + roundToInt16(cgu);
    c->C = roundToInt16(oy);
    dst = roundToInt16(cbu);
    crv = (crv * (1 << 16)) / ((cy) > (1) ? (cy) : (1));
    cbu = (cbu * (1 << 16)) / ((cy) > (1) ? (cy) : (1));
    cgu = (cgu * (1 << 16)) / ((cy) > (1) ? (cy) : (1));
    cgv = (cgv * (1 << 16)) / ((cy) > (1) ? (cy) : (1));
    switch (bpp) {
    case 0:
        for (i = 0; i < 1024; i++) {
            yuvTable[i] = clip_uint8_c((yb + 0x8000) >> 16);
            yb += cy;
        }
        fill_table(table_bU, 1, cbu, yuvTable + 326);
        fill_gv_table(table_gV, 1, cgv);
        break;
    case 1:
        base = (dst == 0) ? 8 : 0;
        rbase = base + (brightness ? 16 : 0);
        yb = -(384 << 16) - oy;
        for (i = 0; i < 1024; i++) {
            unsigned yval = clip_uint8_c(yb >> 16);
            yuvTable[i] = (yval << rbase) + (255u << base) + (yval << base);
            yb += cy;
        }
        fill_table(table_rV, 4, crv, yuvTable);
        fill_table(table_gU, 4, cgu, yuvTable);
        fill_table(table_bU, 4, cbu, yuvTable + yoffs);
        c->D = 0;
        fill_gv_table(table_gV, 4, cgv);
        break;
    }
}

Reply to: