[Bug target/35659] [4.3/4.4 Regression] Miscompiled code with -O2 (but not with -O2 -funroll-loops) on ia64
------- Comment #13 from jakub at gcc dot gnu dot org 2008-06-25 10:20 -------
And the miscompiled tlsc.f inline (compile with just -O2):
SUBROUTINE TLSC (A,B,AUX,IPIV,EPS,X)
COMMON /TLSDIM/ M1,M,N,L,IER
COMMON /SLATE/ BETA,H,I,IB,IB1,ID,ID1,IEND,II,IST,J,JA,JB,JK
+ ,JST,K,KPIV,KR,KST,KT,K1,LV,MR,M11,NK,NR,PIV,PIVT
+ ,SIG,DUM(11)
DIMENSION A(*), AUX(*), B(*), IPIV(*), X(*)
IF (N.GT.M.OR.M1.GT.N) GO TO 90
K1 = MAX (N,L)
IER = 1
DO 5 K=1,N
5 IPIV(K) = K
IST = - N
JB = 1 - L
M11 = M1 + 1
MR = M1
DO 50 K=1,N
IF (K.GT.M1) MR = M
IST = IST + N + 1
JB = JB + L
LV = MR - K + 1
PIV = 0.
ID = IST - N
DO 20 J=K,N
IF (K.EQ.1 .OR. K.EQ.M11) GO TO 10
PIVT = AUX(J) - A(ID)*A(ID)
GO TO 15
10 I = ID + N
IF (LV .EQ. 1) GO TO 12
CALL TLSMSQ (A(I),N,LV,PIVT)
GO TO 15
12 PIVT= A(I)*A(I)
15 AUX(J) = PIVT
ID = ID + 1
IF (PIVT*EPS.LE.PIV) GO TO 20
PIV = PIVT
KPIV = J
20 CONTINUE
I = KPIV - K
IF (I.LE.0) GO TO 25
H = AUX(K)
AUX(K) = AUX(KPIV)
AUX(KPIV) = H
ID = IST + I
NR = M - K + 1
CALL TLSWOP (A(IST),A(ID),N,NR)
25 CALL TLUK (A(IST),N,LV,SIG,BETA)
IF (LV.EQ.0) GO TO 90
J = K1 + K
AUX(J)=-SIG
IF (K.GE.N) GO TO 30
NK = N - K
IF (LV.EQ.1) GO TO 27
CALL TLSTEP (A(IST),A(IST+1),N,N,LV,NK,BETA)
GO TO 30
27 DO 28 J=1,NK
JST = IST + J
28 A(JST) = A(JST)*(1.-BETA*A(IST)**2)
30 IB = (K-1) * L + 1
IF (LV.EQ.1) GO TO 32
CALL TLSTEP (A(IST),B(IB),N,L,LV,L,BETA)
GO TO 34
32 DO 33 J=1,L
JST = IB + J - 1
33 B(JST) = B(JST)*(1.-BETA*A(IST)**2)
34 IPIV(KPIV) = IPIV(K)
IPIV(K) = KPIV
IF (K.GT.M1) GO TO 50
DO 45 I=M11,M
ID1 = IST + (I-K)*N
IF (A(ID1).EQ.0) GO TO 45
H = - A(ID1)/SIG
A(ID1) = H
ID1 = ID1 + 1
ID = IST + 1
DO 35 J=1,NK
A(ID1) = A(ID1) - H*A(ID)
ID1 = ID1 + 1
35 ID = ID + 1
IB1 = 1 + (I-1)*L
IB = JB
DO 40 J=1,L
B(IB1) = B(IB1) - H*B(IB)
IB1 = IB1 + 1
40 IB = IB + 1
45 CONTINUE
50 CONTINUE
IER = N * IER
KT = N
JK = (N-1) * L
K = K1 + N
PIV = 1./AUX(K)
DO 55 K=1,L
JK = JK + 1
55 X(JK) = PIV * B(JK)
KR = N - 1
IF (KR.LE.0) GO TO 70
JST = KR * (N+1) + 2
DO 65 J=1,KR
JST = JST - N - 1
IEND= (KR-J+1) * N
K = K1 + KR - J + 1
PIV = 1./AUX(K)
KST = K-K1
ID = IPIV(KST)-KST
KST = (KR-J) * L
DO 65 K=1,L
KST = KST + 1
H=B(KST)
II = KST
DO 60 I=JST,IEND
II = II + L
60 H = H - A(I) * X(II)
II = KST + ID *L
X(KST) = X(II)
X(II) = PIV * H
65 CONTINUE
70 IST = KT*L
DO 80 J=1,L
IST = IST + 1
H = 0.
JA = IST
IF (M.LE.KT) GO TO 80
NR = M - KT
IF (NR.EQ.1) GO TO 75
CALL TLSMSQ (B(IST),L,NR,H)
GO TO 80
75 H = B(IST)*B(IST)
80 AUX(J) = H
RETURN
90 IER = -1001
RETURN
END
The problem is that slate.k (aka prephitmp.78) is read before it is stored,
so it has the 0x20202020 value instead of 1.
At tlsc.f.198r.compgotos the code still looks correct:
(insn 1857 1434 1443 8 tlsc.f:16 (set (reg:SI 14 r14 [1392])
(const_int 1 [0x1])) 4 {*movsi_internal} (expr_list:REG_EQUIV
(const_int 1 [0x1])
(nil)))
(insn 1443 1857 242 8 tlsc.f:16 (set (mem/s/c:SI (post_modify:DI (reg/f:DI 53
r59 [1516])
(plus:DI (reg/f:DI 53 r59 [1516])
(const_int -60 [0xffffffffffffffc4]))) [2 slate.k+0 S4
A32])
(reg:SI 14 r14 [1392])) 4 {*movsi_internal} (expr_list:REG_INC
(reg/f:DI 53 r59 [1516])
(expr_list:REG_EQUAL (const_int 1 [0x1])
(nil))))
...
(insn 197 228 247 8 tlsc.f:17 (set (reg:DI 18 r18)
(zero_extend:DI (mem/s/c:SI (reg/f:DI 38 r44 [1517]) [2 slate.k+0 S4
A32]))) 103 {zero_extendsidi2} (expr_list:REG_EQUAL
(mem/s/c:SI (const:DI (plus:DI (symbol_ref:DI ("slate_") <var_decl
0x2000000003c54c80 slate>)
(const_int 60 [0x3c]))) [2 slate.k+0 S4 A32])
(nil)))
(insn 247 197 198 8 tlsc.f:22 (set (reg:SI 19 r19 [665])
(minus:SI (reg:SI 20 r20 [orig:560 D.775 ] [560])
(reg:SI 32 r38 [orig:529 prephitmp.68 ] [529]))) 165 {subsi3}
(nil))
(insn 198 247 253 8 tlsc.f:17 (set (reg:BI 262 p6 [626])
(le:BI (reg:SI 18 r18 [orig:522 prephitmp.78 ] [522])
(reg:SI 22 r22 [orig:459 prephitmp.258 ] [459]))) 298
{*cmpsi_normal} (nil))
but tlsc.f.200r.mach is already wrong:
(insn:TI 197 2150 1443 8 tlsc.f:17 (set (reg:DI 18 r18)
(zero_extend:DI (unspec:SI [
(mem/s/c:SI (reg/f:DI 38 r44 [1517]) [2 slate.k+0 S4 A32])
] 40))) 17 {zero_extendsidi2_advanced} (expr_list:REG_EQUAL
(mem/s/c:SI (const:DI (plus:DI (symbol_ref:DI ("slate_") <var_decl
0x2000000003c54c80 slate>)
(const_int 60 [0x3c]))) [2 slate.k+0 S4 A32])
(nil)))
(insn 1443 197 1427 8 tlsc.f:16 (set (mem/s/c:SI (post_modify:DI (reg/f:DI 53
r59 [1516])
(plus:DI (reg/f:DI 53 r59 [1516])
(const_int -60 [0xffffffffffffffc4]))) [2 slate.k+0 S4
A32])
(reg:SI 14 r14 [1392])) 4 {*movsi_internal} (expr_list:REG_DEAD (reg:SI
14 r14 [1392])
(expr_list:REG_INC (reg/f:DI 53 r59 [1516])
(expr_list:REG_EQUAL (const_int 1 [0x1])
(nil)))))
...
(insn:TI 198 2148 223 8 tlsc.f:17 (set (reg:BI 262 p6 [626])
(le:BI (reg:SI 18 r18 [orig:522 prephitmp.78 ] [522])
(reg:SI 22 r22 [orig:459 prephitmp.258 ] [459]))) 298
{*cmpsi_normal} (nil))
Note that the scheduler swapped the slate_.k = 1 store with prephitmp.78 =
slate_.k read. The same can be seen in the assembly:
ld4.a r18 = [r44] //, slate.k
[.LBE19:]
.loc 1 16 0
st4 [r59] = r14, -60 // slate.k, tmp1392
where r44 == r59.
(gdb) disas $pc $pc+16
Dump of assembler code from 0x4000000000000ec0 to 0x4000000000000ed0:
0x4000000000000ec0 <tlsc+512>: [MMI] ld4.a r18=[r44]
0x4000000000000ec1 <tlsc+513>: st4 [r59]=r14,-60
0x4000000000000ec2 <tlsc+514>: adds r16=48,r41
End of assembler dump.
(gdb) p/x $r44
$3 = 0x6000000000004e4c
(gdb) p/x $r59
$4 = 0x6000000000004e4c
insn 197 is the only ld4.a in tlsc_ routine.
--
jakub at gcc dot gnu dot org changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |vmakarov at gcc dot gnu dot
| |org, wilson at gcc dot gnu
| |dot org
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35659
------- You are receiving this mail because: -------
You are on the CC list for the bug, or are watching someone who is.
Reply to: