Buggy ssereg float function return on amd64
Greetings! I am having the following problem building blas on amd64
with gcc/gfortran 4.2. I'd appreciate a workaround in the short term
if there is one. Please cc me directly if possible. Thanks!
=============================================================================
Greetings! OK, th amd64 blas test failures appear to be derived from
a problem returning floating point values in registers between C and
fortran functions. My comments after ##:
(gdb) up
#2 0x00000000004016a2 in check2_ (sfac=@0x603a20) at c_sblat1.f:360
360 + SSIZE1(KN),SFAC)
(gdb) l
355 20 CONTINUE
356 *
357 IF (ICASE.EQ.1) THEN
358 * .. SDOTTEST ..
359 CALL STEST1(SDOTTEST(N,SX,INCX,SY,INCY),DT7(KN,KI),
360 + SSIZE1(KN),SFAC)
361 ELSE IF (ICASE.EQ.2) THEN
362 * .. SAXPYTEST ..
363 CALL SAXPYTEST(N,SA,SX,INCX,SY,INCY)
364 DO 40 J = 1, LENY
...
Breakpoint 8, check2_ (sfac=@0x603a20) at c_sblat1.f:360
360 + SSIZE1(KN),SFAC)
2: sx = (0.600000024, 0.100000001, -0.5, 0.800000012, 0.899999976, -0.300000012, -0.400000006)
1: incx = 1
(gdb) c
Continuing.
Breakpoint 8, check2_ (sfac=@0x603a20) at c_sblat1.f:360
360 + SSIZE1(KN),SFAC)
2: sx = (0.600000024, 0.100000001, -0.5, 0.800000012, 0.899999976, -0.300000012, -0.400000006)
1: incx = 1
(gdb) s
sdottest_ (N=0x604a14, X=0x7ffff45b0e50, incX=0x604a18, Y=0x7ffff45b0e70,
incY=0x604a1c) at c_sblas1.c:43
43 return cblas_sdot(*N, X, *incX, Y, *incY);
1: incx = 1
Current language: auto; currently c
(gdb) p cblas_sdot
$43 = {float (const int, const float *, const int, const float *,
const int)} 0x2b88b6735eac <cblas_sdot>
(gdb) p cblas_sdot(*N, X, *incX, Y, *incY)
$44 = 0.300000012 ### correct value
(gdb) finish
Run till exit from #0 sdottest_ (N=0x604a14, X=0x7ffff45b0e50,
incX=0x604a18, Y=0x7ffff45b0e70, incY=0x604a1c) at c_sblas1.c:43
0x0000000000401649 in check2_ (sfac=@0x603a20) at c_sblat1.f:360
360 + SSIZE1(KN),SFAC)
2: sx = (0.600000024, 0.100000001, -0.5, 0.800000012, 0.899999976, -0.300000012, -0.400000006)
1: incx = 1
Value returned is $45 = 0.30000001192092896 ### correct value
Current language: auto; currently fortran
(gdb) c
Continuing.
Breakpoint 5, stest_ (len=@0x403230, scomp=0x7ffff45b0c80,
strue=0x7ffff45b0c70, ssize=0x603f54, sfac=@0x603a20) at c_sblat1.f:639
639 PASS = .FALSE.
1: incx = 1
(gdb) p scomp
$46 = (PTR TO -> ( real*4 (0:-1))) 0x7ffff45b0c80
(gdb) up
#1 0x00000000004029c7 in stest1_ (scomp1=@0x7ffff45b0e4c, strue1=@0x604364,
ssize=0x603f54, sfac=@0x603a20) at c_sblat1.f:674
674 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC)
(gdb) p scomp1
$47 = (REF TO -> ( real*4 )) @0x7ffff45b0e4c: 2 ### wrong value
(gdb) p strue1
$48 = (REF TO -> ( real*4 )) @0x604364: 0.300000012
(gdb) up
#2 0x00000000004016a2 in check2_ (sfac=@0x603a20) at c_sblat1.f:360
360 + SSIZE1(KN),SFAC)
(gdb) disassemble
Dump of assembler code for function check2_:
0x00000000004014b5 <check2_+0>: push %rbp
0x00000000004014b6 <check2_+1>: mov %rsp,%rbp
0x00000000004014b9 <check2_+4>: sub $0x250,%rsp
0x00000000004014c0 <check2_+11>: mov %rdi,-0x248(%rbp)
0x00000000004014c7 <check2_+18>: movl $0x1,-0x1c(%rbp)
0x00000000004014ce <check2_+25>: cmpl $0x4,-0x1c(%rbp)
0x00000000004014d2 <check2_+29>: jg 0x401a50 <check2_+1435>
0x00000000004014d8 <check2_+35>: mov -0x1c(%rbp),%eax
0x00000000004014db <check2_+38>: cltq
0x00000000004014dd <check2_+40>: sub $0x1,%rax
0x00000000004014e1 <check2_+44>: mov 0x603f90(,%rax,4),%eax
0x00000000004014e8 <check2_+51>: mov %eax,0x20352a(%rip) # 0x604a18 <combla_+8>
0x00000000004014ee <check2_+57>: mov -0x1c(%rbp),%eax
0x00000000004014f1 <check2_+60>: cltq
0x00000000004014f3 <check2_+62>: sub $0x1,%rax
0x00000000004014f7 <check2_+66>: mov 0x603f80(,%rax,4),%eax
0x00000000004014fe <check2_+73>: mov %eax,0x203518(%rip) # 0x604a1c <combla_+12>
0x0000000000401504 <check2_+79>: mov 0x20350e(%rip),%eax # 0x604a18 <combla_+8>
0x000000000040150a <check2_+85>: mov %eax,%edx
---Type <return> to continue, or q <return> to quit---
0x000000000040150c <check2_+87>: sar $0x1f,%edx
0x000000000040150f <check2_+90>: xor %edx,%eax
0x0000000000401511 <check2_+92>: mov %eax,-0x4(%rbp)
0x0000000000401514 <check2_+95>: sub %edx,-0x4(%rbp)
0x0000000000401517 <check2_+98>: mov 0x2034ff(%rip),%eax # 0x604a1c <combla_+12>
0x000000000040151d <check2_+104>: mov %eax,%edx
0x000000000040151f <check2_+106>: sar $0x1f,%edx
0x0000000000401522 <check2_+109>: xor %edx,%eax
0x0000000000401524 <check2_+111>: mov %eax,-0x8(%rbp)
0x0000000000401527 <check2_+114>: sub %edx,-0x8(%rbp)
0x000000000040152a <check2_+117>: movl $0x1,-0xc(%rbp)
0x0000000000401531 <check2_+124>: cmpl $0x4,-0xc(%rbp)
0x0000000000401535 <check2_+128>: jg 0x401a39 <check2_+1412>
0x000000000040153b <check2_+134>: mov -0xc(%rbp),%eax
0x000000000040153e <check2_+137>: cltq
0x0000000000401540 <check2_+139>: sub $0x1,%rax
0x0000000000401544 <check2_+143>: mov 0x603f30(,%rax,4),%eax
0x000000000040154b <check2_+150>: mov %eax,0x2034c3(%rip) # 0x604a14 <combla_+4>
0x0000000000401551 <check2_+156>: cmpl $0x2,-0xc(%rbp)
0x0000000000401555 <check2_+160>: jge 0x401562 <check2_+173>
0x0000000000401557 <check2_+162>: mov -0xc(%rbp),%eax
---Type <return> to continue, or q <return> to quit---
0x000000000040155a <check2_+165>: mov %eax,-0x250(%rbp)
0x0000000000401560 <check2_+171>: jmp 0x40156c <check2_+183>
0x0000000000401562 <check2_+173>: movl $0x2,-0x250(%rbp)
0x000000000040156c <check2_+183>: mov -0x250(%rbp),%eax
0x0000000000401572 <check2_+189>: mov %eax,-0x10(%rbp)
0x0000000000401575 <check2_+192>: mov -0xc(%rbp),%eax
0x0000000000401578 <check2_+195>: movslq %eax,%rdx
0x000000000040157b <check2_+198>: mov -0x4(%rbp),%eax
0x000000000040157e <check2_+201>: cltq
0x0000000000401580 <check2_+203>: shl $0x2,%rax
0x0000000000401584 <check2_+207>: lea (%rdx,%rax,1),%rax
0x0000000000401588 <check2_+211>: sub $0x5,%rax
0x000000000040158c <check2_+215>: mov 0x603f60(,%rax,4),%eax
0x0000000000401593 <check2_+222>: mov %eax,-0x24(%rbp)
0x0000000000401596 <check2_+225>: mov -0xc(%rbp),%eax
0x0000000000401599 <check2_+228>: movslq %eax,%rdx
0x000000000040159c <check2_+231>: mov -0x8(%rbp),%eax
0x000000000040159f <check2_+234>: cltq
0x00000000004015a1 <check2_+236>: shl $0x2,%rax
0x00000000004015a5 <check2_+240>: lea (%rdx,%rax,1),%rax
0x00000000004015a9 <check2_+244>: sub $0x5,%rax
0x00000000004015ad <check2_+248>: mov 0x603f60(,%rax,4),%eax
0x00000000004015b4 <check2_+255>: mov %eax,-0x20(%rbp)
---Type <return> to continue, or q <return> to quit---
0x00000000004015b7 <check2_+258>: movl $0x1,-0x18(%rbp)
0x00000000004015be <check2_+265>: cmpl $0x7,-0x18(%rbp)
0x00000000004015c2 <check2_+269>: jg 0x401615 <check2_+352>
0x00000000004015c4 <check2_+271>: mov -0x18(%rbp),%eax
0x00000000004015c7 <check2_+274>: cltq
0x00000000004015c9 <check2_+276>: lea -0x1(%rax),%rdx
0x00000000004015cd <check2_+280>: mov -0x18(%rbp),%eax
0x00000000004015d0 <check2_+283>: cltq
0x00000000004015d2 <check2_+285>: sub $0x1,%rax
0x00000000004015d6 <check2_+289>: mov 0x603fa0(,%rax,4),%eax
0x00000000004015dd <check2_+296>: mov %eax,-0xa0(%rbp,%rdx,4)
0x00000000004015e4 <check2_+303>: mov -0x18(%rbp),%eax
0x00000000004015e7 <check2_+306>: cltq
0x00000000004015e9 <check2_+308>: lea -0x1(%rax),%rdx
0x00000000004015ed <check2_+312>: mov -0x18(%rbp),%eax
0x00000000004015f0 <check2_+315>: cltq
0x00000000004015f2 <check2_+317>: sub $0x1,%rax
0x00000000004015f6 <check2_+321>: mov 0x603fc0(,%rax,4),%eax
0x00000000004015fd <check2_+328>: mov %eax,-0x80(%rbp,%rdx,4)
0x0000000000401601 <check2_+332>: cmpl $0x7,-0x18(%rbp)
0x0000000000401605 <check2_+336>: sete %al
0x0000000000401608 <check2_+339>: movzbl %al,%eax
0x000000000040160b <check2_+342>: addl $0x1,-0x18(%rbp)
---Type <return> to continue, or q <return> to quit---
0x000000000040160f <check2_+346>: test %eax,%eax
0x0000000000401611 <check2_+348>: jne 0x401615 <check2_+352>
0x0000000000401613 <check2_+350>: jmp 0x4015c4 <check2_+271>
0x0000000000401615 <check2_+352>: mov 0x2033f5(%rip),%eax # 0x604a10 <combla_>
0x000000000040161b <check2_+358>: cmp $0x1,%eax
0x000000000040161e <check2_+361>: jne 0x4016a7 <check2_+498>
0x0000000000401624 <check2_+367>: lea -0x80(%rbp),%rcx
0x0000000000401628 <check2_+371>: lea -0xa0(%rbp),%rsi
0x000000000040162f <check2_+378>: mov $0x604a1c,%r8d
0x0000000000401635 <check2_+384>: mov $0x604a18,%edx
0x000000000040163a <check2_+389>: mov $0x604a14,%edi
0x000000000040163f <check2_+394>: mov $0x0,%eax
0x0000000000401644 <check2_+399>: callq 0x402d41 <sdottest_>
0x0000000000401649 <check2_+404>: movss %xmm0,-0xa4(%rbp) ### I suspect the return is not set here
0x0000000000401651 <check2_+412>: mov -0xc(%rbp),%eax
0x0000000000401654 <check2_+415>: cltq
0x0000000000401656 <check2_+417>: sub $0x1,%rax
0x000000000040165a <check2_+421>: shl $0x2,%rax
0x000000000040165e <check2_+425>: lea 0x603f50(%rax),%r8
0x0000000000401665 <check2_+432>: mov -0xc(%rbp),%eax
0x0000000000401668 <check2_+435>: movslq %eax,%rdx
0x000000000040166b <check2_+438>: mov -0x1c(%rbp),%eax
---Type <return> to continue, or q <return> to quit---
0x000000000040166e <check2_+441>: cltq
0x0000000000401670 <check2_+443>: shl $0x2,%rax
0x0000000000401674 <check2_+447>: lea (%rdx,%rax,1),%rax
0x0000000000401678 <check2_+451>: sub $0x5,%rax
0x000000000040167c <check2_+455>: shl $0x2,%rax
0x0000000000401680 <check2_+459>: lea 0x604360(%rax),%rsi
0x0000000000401687 <check2_+466>: mov -0x248(%rbp),%rcx
0x000000000040168e <check2_+473>: lea -0xa4(%rbp),%rdi
0x0000000000401695 <check2_+480>: mov %r8,%rdx
0x0000000000401698 <check2_+483>: mov $0x0,%eax
0x000000000040169d <check2_+488>: callq 0x40297b <stest1_>
0x00000000004016a2 <check2_+493>: jmpq 0x401a22 <check2_+1389>
...
(gdb) i reg
rax 0x1 1
rbx 0x2b88b6712c00 47866176416768
rcx 0x603f54 6307668
rdx 0x0 0
rsi 0x7ffff45b0c34 140737292995636
rdi 0x7ffff45b0c30 140737292995632
rbp 0x7ffff45b0ef0 0x7ffff45b0ef0
rsp 0x7ffff45b0ca0 0x7ffff45b0ca0
r8 0x603a20 6306336
r9 0x7ffff45b0c4c 140737292995660
r10 0x7ffff45b0c3c 140737292995644
r11 0x2b88b679b450 47866176975952
r12 0x0 0
r13 0x7ffff45b1190 140737292997008
r14 0x0 0
r15 0x0 0
rip 0x4016a2 0x4016a2 <check2_+493>
eflags 0x202 [ IF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
---Type <return> to continue, or q <return> to quit---
gs 0x0 0
fctrl 0x37f 895
fstat 0x0 0
ftag 0xffff 65535
fiseg 0x0 0
fioff 0x0 0
foseg 0x0 0
fooff 0x0 0
fop 0x0 0
mxcsr 0x1fa0 [ PE IM DM ZM OM UM PM ]
(gdb) i reg rbp
rbp 0x7ffff45b0ef0 0x7ffff45b0ef0
(gdb) down
#1 0x00000000004029c7 in stest1_ (scomp1=@0x7ffff45b0e4c, strue1=@0x604364,
ssize=0x603f54, sfac=@0x603a20) at c_sblat1.f:674
674 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC)
(gdb) p &scomp1
$50 = (PTR TO -> ( real*4 )) 0x7ffff45b0e4c
(gdb) p *(float *)0x7ffff45b0e4c@10
$51 = (2, 0.600000024, 0.100000001, -0.5, 0.800000012, 0.899999976, -0.300000012, -0.400000006, 0, 0.5)
(gdb) i reg xmm0 ### But I cannot check in gdb
xmm0 <error reading variable>
(gdb) shell
### and I can't seem to work around by instructing the compiler to
avoid this:
gfortran -pipe -g -c c_sblat1.f
camm@eisluft:~/blas-1.2/cblas/testing$ gfortran -mnosseregparam -pipe -g -c c_sblat1.f
f951: error: unrecognized command line option "-mnosseregparam"
camm@eisluft:~/blas-1.2/cblas/testing$ gfortran -mnosseregparm -pipe -g -c c_sblat1.f
f951: error: unrecognized command line option "-mnosseregparm"
camm@eisluft:~/blas-1.2/cblas/testing$ gfortran -mno-sseregparm -pipe -g -c c_sblat1.f
f951: error: unrecognized command line option "-mno-sseregparm"
camm@eisluft:~/blas-1.2/cblas/testing$ gfortran -mno-sse -pipe -g -c c_sblat1.f
c_sblat1.f: In function â\u20ac\u02dccheck1â\u20ac\u2122:
c_sblat1.f:214: error: SSE register return with SSE disabled
camm@eisluft:~/blas-1.2/cblas/testing$
Advice as to the next step most appreciated.
Take care,
Riku Voipio <riku.voipio@iki.fi> writes:
> Hi wookey,
>
> can you provide a amd64 machine with refblas/lapack/atlas build-deps
> installed for camm?
>
> On Sat, Nov 10, 2007 at 11:32:41AM -0500, Camm Maguire wrote:
> > Greetings, again!
> >
> > Just spent a few minutes reviewing our project machines availability
> > for the various platforms. Many are still locked down from the
> > breakin many many months ago. Requests to debian-admin go
> > unanswered. I have several previously portable packages which are now
> > held up for months due in part to lack of access to a proper debugging
> > environment. It appears as if this situation is not likely to be
> > remedied soon. (In particular, alpha, arm, and sparc appear to be the
> > most egregious offenders.)
> >
> > I like everyone else have limited time to devote to Debian, but of
> > course value to the project and especially its portability dearly.
> > Nevertheless, it appears that it might be wise for me, and perhaps for
> > others, to adopt one of the following procedures for their packages:
> >
> > 1) mark for i386 only, and let others be resonponsible for porting
> > 2) upload binaries built under a cross compiling environment, assuming
> > an acceptable one exists.
> >
> > I think that all bugs should be marked as minor if they are arch
> > specific and if no machine is provided for at least a contiguous three
> > months to address the issue.
> >
> > Comments?
> >
> > Take care,
> >
> > Riku Voipio <riku.voipio@iki.fi> writes:
> >
> > > Hi,
> > >
> > > Firstly, I've collected all (well, most) information bits to:
> > >
> > > http://wiki.debian.org/GfortranTransition
> > >
> > > Feel free to update/edit the page. Most importantly, we need to get
> > > the packages that need a library rename done uploaded to experimental
> > > soon.
> > >
> > > Second refblas3 testsuite on gfortran failed again on many architectures:,
> > >
> > > http://experimental.debian.net/build.php?pkg=refblas3
> > >
> > > Amd64 and mips fail with the following tests:
> > >
> > > Test of subprogram number 1 CBLAS_SDOT
> > > FAIL
> > >
> > > CASE N INCX INCY MODE I COMP(I) TRUE(I) DIFFERENCE SIZE(I)
> > >
> > > 1 1 1 1 9999 1 0.20000000E+01 0.30000001E+00 0.1700E+01 0.3000E+00
> > > 1 2 1 1 9999 1 -0.20000000E+01 0.20999999E+00 -0.2210E+01 0.1600E+01
> > > 1 4 1 1 9999 1 0.20000000E+01 0.62000000E+00 0.1380E+01 0.3200E+01
> > > 1 1 2 -2 9999 1 0.20000000E+01 0.30000001E+00 0.1700E+01 0.3000E+00
> > > 1 2 2 -2 9999 1 0.00000000E+00 -0.70000000E-01 0.7000E-01 0.1600E+01
> > > 1 4 2 -2 9999 1 0.36893488E+20 0.85000002E+00 0.3689E+20 0.3200E+01
> > > 1 1 -2 1 9999 1 0.20000000E+01 0.30000001E+00 0.1700E+01 0.3000E+00
> > > 1 2 -2 1 9999 1 0.10842022E-18 -0.79000002E+00 0.7900E+00 0.1600E+01
> > > 1 4 -2 1 9999 1 0.36893488E+20 -0.74000001E+00 0.3689E+20 0.3200E+01
> > > 1 1 -1 -2 9999 1 0.20000000E+01 0.30000001E+00 0.1700E+01 0.3000E+00
> > > 1 2 -1 -2 9999 1 0.36893488E+20 0.33000001E+00 0.3689E+20 0.1600E+01
> > > 1 4 -1 -2 9999 1 0.00000000E+00 0.12700000E+01 -0.1270E+01 0.3200E+01
> > >
> > > The E+20 are worrying.
> > >
> > > mipsel, sparc, s390 and hppa fail with slightly different results:
> > >
> > > Test of subprogram number 1 CBLAS_SDOT
> > > FAIL
> > >
> > > CASE N INCX INCY MODE I COMP(I) TRUE(I) DIFFERENCE SIZE(I)
> > >
> > > 1 1 1 1 9999 1 0.16500000E+01 0.30000001E+00 0.1350E+01 0.3000E+00
> > > 1 2 1 1 9999 1 0.15849999E+01 0.20999999E+00 0.1375E+01 0.1600E+01
> > > 1 4 1 1 9999 1 0.17800000E+01 0.62000000E+00 0.1160E+01 0.3200E+01
> > > 1 1 2 -2 9999 1 0.16500000E+01 0.30000001E+00 0.1350E+01 0.3000E+00
> > > 1 2 2 -2 9999 1 -0.13900000E+01 -0.70000000E-01 -0.1320E+01 0.1600E+01
> > > 1 4 2 -2 9999 1 0.18375000E+01 0.85000002E+00 0.9875E+00 0.3200E+01
> > > 1 1 -2 1 9999 1 0.16500000E+01 0.30000001E+00 0.1350E+01 0.3000E+00
> > > 1 2 -2 1 9999 1 -0.18225000E+01 -0.79000002E+00 -0.1033E+01 0.1600E+01
> > > 1 4 -2 1 9999 1 -0.18099999E+01 -0.74000001E+00 -0.1070E+01 0.3200E+01
> > > 1 1 -1 -2 9999 1 0.16500000E+01 0.30000001E+00 0.1350E+01 0.3000E+00
> > > 1 2 -1 -2 9999 1 0.16650000E+01 0.33000001E+00 0.1335E+01 0.1600E+01
> > > 1 4 -1 -2 9999 1 0.19087499E+01 0.12700000E+01 0.6387E+00 0.3200E+01
> > >
> > > These could be some kind of rounding disparity.
> > >
> > >
> > > --
> > > "rm -rf" only sounds scary if you don't have backups
> > >
> > >
> > >
> >
> > --
> > Camm Maguire camm@enhanced.com
> > ==========================================================================
> > "The earth is but one country, and mankind its citizens." -- Baha'u'llah
>
> --
> "rm -rf" only sounds scary if you don't have backups
>
>
>
--
Camm Maguire camm@enhanced.com
==========================================================================
"The earth is but one country, and mankind its citizens." -- Baha'u'llah
Matthias Klose <doko@cs.tu-berlin.de> writes:
> Camm, you might want to subscribe to fortran@gcc.gnu.org as well and
> maybe ask questions there.
>
> Matthias
>
>
>
--
Camm Maguire camm@enhanced.com
==========================================================================
"The earth is but one country, and mankind its citizens." -- Baha'u'llah
Reply to: