r3704 - in glibc-package/trunk/debian: . patches patches/alpha
Author: aurel32
Date: 2009-07-27 09:19:53 +0000 (Mon, 27 Jul 2009)
New Revision: 3704
Added:
glibc-package/trunk/debian/patches/alpha/submitted-asm-memchr.diff
Modified:
glibc-package/trunk/debian/changelog
glibc-package/trunk/debian/patches/series
Log:
* alpha/submitted-asm-memchr.diff: new patch to fix broken prefetching
in memchr() on alpha. Closes: #521737.
Modified: glibc-package/trunk/debian/changelog
===================================================================
--- glibc-package/trunk/debian/changelog 2009-07-26 23:33:24 UTC (rev 3703)
+++ glibc-package/trunk/debian/changelog 2009-07-27 09:19:53 UTC (rev 3704)
@@ -19,8 +19,10 @@
a memory ordering problem in pthread_mutex_{,timed}lock.
* Replace any/submitted-signalfd-eventfd.diff by upstream version
any/cvs-signalfd-eventfd.diff.
+ * alpha/submitted-asm-memchr.diff: new patch to fix broken prefetching
+ in memchr() on alpha. Closes: #521737.
- -- Aurelien Jarno <aurel32@debian.org> Mon, 27 Jul 2009 00:01:57 +0200
+ -- Aurelien Jarno <aurel32@debian.org> Mon, 27 Jul 2009 11:18:42 +0200
eglibc (2.9-22) unstable; urgency=low
Added: glibc-package/trunk/debian/patches/alpha/submitted-asm-memchr.diff
===================================================================
--- glibc-package/trunk/debian/patches/alpha/submitted-asm-memchr.diff (rev 0)
+++ glibc-package/trunk/debian/patches/alpha/submitted-asm-memchr.diff 2009-07-27 09:19:53 UTC (rev 3704)
@@ -0,0 +1,117 @@
+2009-07-27 Aurelien Jarno <aurelien@aurel32.net>
+
+ * sysdeps/alpha/memchr.S: Use prefetch load.
+ * sysdeps/alpha/alphaev6/memchr.S: Likewise.
+
+--- a/sysdeps/alpha/alphaev6/memchr.S
++++ b/sysdeps/alpha/alphaev6/memchr.S
+@@ -127,7 +127,7 @@ $first_quad:
+ cmpbge $31, $1, $2 # E :
+ bne $2, $found_it # U :
+ # At least one byte left to process.
+- ldq $1, 8($0) # L :
++ ldq $31, 8($0) # L :
+ subq $5, 1, $18 # E : U L U L
+
+ addq $0, 8, $0 # E :
+@@ -143,38 +143,38 @@ $first_quad:
+ and $4, 8, $4 # E : odd number of quads?
+ bne $4, $odd_quad_count # U :
+ # At least three quads remain to be accessed
+- mov $1, $4 # E : L U L U : move prefetched value to correct reg
++ nop # E : L U L U : move prefetched value to correct reg
+
+ .align 4
+ $unrolled_loop:
+- ldq $1, 8($0) # L : prefetch $1
+- xor $17, $4, $2 # E :
+- cmpbge $31, $2, $2 # E :
+- bne $2, $found_it # U : U L U L
++ ldq $1, 0($0) # L : load quad
++ xor $17, $1, $2 # E :
++ ldq $31, 8($0) # L : prefetch next quad
++ cmpbge $31, $2, $2 # E : U L U L
+
++ bne $2, $found_it # U :
+ addq $0, 8, $0 # E :
+ nop # E :
+ nop # E :
+- nop # E :
+
+ $odd_quad_count:
++ ldq $1, 0($0) # L : load quad
+ xor $17, $1, $2 # E :
+- ldq $4, 8($0) # L : prefetch $4
++ ldq $31, 8($0) # L : prefetch $4
+ cmpbge $31, $2, $2 # E :
+- addq $0, 8, $6 # E :
+
++ addq $0, 8, $6 # E :
+ bne $2, $found_it # U :
+ cmpult $6, $18, $6 # E :
+ addq $0, 8, $0 # E :
+- nop # E :
+
+ bne $6, $unrolled_loop # U :
+- mov $4, $1 # E : move prefetched value into $1
+ nop # E :
+ nop # E :
+-
+-$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do
+ nop # E :
++
++$final: ldq $1, 0($0) # L : load last quad
++ subq $5, $0, $18 # E : $18 <- number of bytes left to do
+ nop # E :
+ bne $18, $last_quad # U :
+
+--- a/sysdeps/alpha/memchr.S
++++ b/sysdeps/alpha/memchr.S
+@@ -119,7 +119,7 @@ $first_quad:
+
+ # At least one byte left to process.
+
+- ldq t0, 8(v0) # e0 :
++ ldq zero, 8(v0) # e0 : prefetch next quad
+ subq t4, 1, a2 # .. e1 :
+ addq v0, 8, v0 #-e0 :
+
+@@ -138,19 +138,19 @@ $first_quad:
+
+ # At least three quads remain to be accessed
+
+- mov t0, t3 # e0 : move prefetched value to correct reg
+-
+ .align 4
+ $unrolled_loop:
+- ldq t0, 8(v0) #-e0 : prefetch t0
+- xor a1, t3, t1 # .. e1 :
+- cmpbge zero, t1, t1 # e0 :
+- bne t1, $found_it # .. e1 :
++ ldq t0, 0(v0) # e0 : load quad
++ xor a1, t0, t1 # .. e1 :
++ ldq zero, 8(v0) # e0 : prefetch next quad
++ cmpbge zero, t1, t1 # .. e1:
++ bne t1, $found_it # e0 :
+
+- addq v0, 8, v0 #-e0 :
++ addq v0, 8, v0 # e1 :
+ $odd_quad_count:
++ ldq t0, 0(v0) # e0 : load quad
+ xor a1, t0, t1 # .. e1 :
+- ldq t3, 8(v0) # e0 : prefetch t3
++ ldq zero, 8(v0) # e0 : prefetch next quad
+ cmpbge zero, t1, t1 # .. e1 :
+ addq v0, 8, t5 #-e0 :
+ bne t1, $found_it # .. e1 :
+@@ -159,8 +159,8 @@ $odd_quad_count:
+ addq v0, 8, v0 # .. e1 :
+ bne t5, $unrolled_loop #-e1 :
+
+- mov t3, t0 # e0 : move prefetched value into t0
+-$final: subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
++$final: ldq t0, 0(v0) # e0 : load last quad
++ subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
+ bne a2, $last_quad # e1 :
+
+ $not_found:
Modified: glibc-package/trunk/debian/patches/series
===================================================================
--- glibc-package/trunk/debian/patches/series 2009-07-26 23:33:24 UTC (rev 3703)
+++ glibc-package/trunk/debian/patches/series 2009-07-27 09:19:53 UTC (rev 3704)
@@ -52,6 +52,7 @@
alpha/submitted-fpu-round.diff
alpha/submitted-PTR_MANGLE.diff
alpha/submitted-procfs_h.diff
+alpha/submitted-asm-memchr.diff
amd64/local-biarch.diff
amd64/local-clone.diff
Reply to: