[patch] glibc/ia64 memccpy segfault
Hi,
Please find attached a patch which I have been sitting on for way too
long. It should apply cleanly to both glibc-2.2 and glibc-2.3.
It basically solve a bug in the ia64 memccpy implementation where the
readahead will cause a segfault if you try to memccpy from across a page
boundary, with the second page not being mapped and the end-of-stream
character is found in the original page.
This is a segfault bugfix and I recommend distributions to include it in
their next updates.
Cheers,
Jes
2003-04-11 Jes Sorensen <jes@wildopensource.com>
* sysdeps/ia64/memccpy.S: When recovering for src_aligned and the
character is found during recovery, use correct register when
determining the position of the found character.
2003-04-01 Jes Sorensen <jes@wildopensource.com>
* sysdeps/ia64/memccpy.S: Use speculatively loads for readahead to
avoid segfaults when reading from unmapped pages. For aligned
reload and continue, for misaligned, roll back and use byte copy.
Save ar.ec on entry and restore on exit.
Index: sysdeps/ia64/memccpy.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/ia64/memccpy.S,v
retrieving revision 1.5
diff -u -r1.5 memccpy.S
--- sysdeps/ia64/memccpy.S 6 Jul 2001 04:55:54 -0000 1.5
+++ sysdeps/ia64/memccpy.S 9 Sep 2003 14:15:32 -0000
@@ -1,6 +1,6 @@
/* Optimized version of the memccpy() function.
This file is part of the GNU C Library.
- Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+ Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
Contributed by Dan Pop <Dan.Pop@cern.ch>.
The GNU C Library is free software; you can redistribute it and/or
@@ -46,6 +46,7 @@
#define tmp r23
#define char r24
#define charx8 r25
+#define saved_ec r26
#define sh2 r28
#define sh1 r29
#define loopcnt r30
@@ -56,25 +57,27 @@
alloc r2 = ar.pfs, 4, 40 - 4, 0, 40
#include "softpipe.h"
- .rotr r[MEMLAT + 3], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
+ .rotr r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
.rotp p[MEMLAT + 6 + 1]
mov ret0 = r0 // return NULL if no match
.save pr, saved_pr
mov saved_pr = pr // save the predicate registers
+ mov dest = in0 // dest
.save ar.lc, saved_lc
mov saved_lc = ar.lc // save the loop counter
+ mov saved_ec = ar.ec // save the loop counter
.body
- mov dest = in0 // dest
mov src = in1 // src
extr.u char = in2, 0, 8 // char
mov len = in3 // len
sub tmp = r0, in0 // tmp = -dest
cmp.ne p7, p0 = r0, r0 // clear p7
;;
- and loopcnt = 7, tmp // loopcnt = -dest % 8
+ and loopcnt = 7, tmp // loopcnt = -dest % 8
cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES
-(p6) br.cond.spnt .cpyfew // copy byte by byte
+ mov ar.ec = 0 // ec not guaranteed zero on entry
+(p6) br.cond.spnt .cpyfew // copy byte by byte
;;
cmp.eq p6, p0 = loopcnt, r0
mux1 charx8 = char, @brcst
@@ -109,26 +112,31 @@
cmp.ne p6, p0 = r0, r0 ;; // clear p6
.align 32
.l2:
-(p[0]) ld8 r[0] = [asrc], 8 // r[0] = w1
+(p[0]) ld8.s r[0] = [asrc], 8 // r[0] = w1
(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1
(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2
(p[MEMLAT+4]) xor tmp3[0] = val[1], charx8
(p[MEMLAT+5]) czx1.r pos0[0] = tmp3[1]
+(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 // our data isn't
+ // valid - rollback!
(p[MEMLAT+6]) cmp.ne p6, p0 = 8, pos0[1]
(p6) br.cond.spnt .gotit
(p[MEMLAT+6]) st8 [dest] = val[3], 8 // store val to dest
(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2
br.ctop.sptk .l2
br.cond.sptk .cpyfew
+
.src_aligned:
cmp.ne p6, p0 = r0, r0 // clear p6
mov ar.ec = MEMLAT + 2 + 1 ;; // set EC
.l3:
-(p[0]) ld8 r[0] = [src], 8
+(p[0]) ld8.s r[0] = [src], 8
(p[MEMLAT]) xor tmp3[0] = r[MEMLAT], charx8
(p[MEMLAT+1]) czx1.r pos0[0] = tmp3[1]
(p[MEMLAT+2]) cmp.ne p7, p0 = 8, pos0[1]
+(p[MEMLAT+2]) chk.s r[MEMLAT+2], .recovery2
(p7) br.cond.spnt .gotit
+.back2:
(p[MEMLAT+2]) st8 [dest] = r[MEMLAT+2], 8
br.ctop.dptk .l3
.cpyfew:
@@ -148,6 +156,7 @@
.restore_and_exit:
mov pr = saved_pr, -1 // restore the predicate registers
mov ar.lc = saved_lc // restore the loop counter
+ mov ar.ec = saved_ec ;; // restore the epilog counter
br.ret.sptk.many b0
.gotit:
.pred.rel "mutex" p6, p7
@@ -163,4 +172,33 @@
mov pr = saved_pr, -1
mov ar.lc = saved_lc
br.ret.sptk.many b0
+
+.recovery1:
+ adds src = -(MEMLAT + 6 + 1) * 8, asrc
+ mov loopcnt = ar.lc
+ mov tmp = ar.ec ;;
+ sub sh1 = (MEMLAT + 6 + 1), tmp
+ shr.u sh2 = sh2, 3
+ ;;
+ shl loopcnt = loopcnt, 3
+ sub src = src, sh2
+ shl sh1 = sh1, 3
+ shl tmp = tmp, 3
+ ;;
+ add len = len, loopcnt
+ add src = sh1, src ;;
+ add len = tmp, len
+.back1:
+ br.cond.sptk .cpyfew
+
+.recovery2:
+ add tmp = -(MEMLAT + 3) * 8, src
+(p7) br.cond.spnt .gotit
+ ;;
+ ld8 r[MEMLAT+2] = [tmp] ;;
+ xor pos0[1] = r[MEMLAT+2], charx8 ;;
+ czx1.r pos0[1] = pos0[1] ;;
+ cmp.ne p7, p6 = 8, pos0[1]
+(p7) br.cond.spnt .gotit
+ br.cond.sptk .back2
END(memccpy)
Reply to: