aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@twiddle.net>2010-05-04 09:06:15 -0700
committerRichard Henderson <rth@twiddle.net>2010-05-04 09:06:15 -0700
commit200b5faee1cfac10d831e9b278ef294ca3119f53 (patch)
tree3ba28a6440d78a1a265fca0681d6ede0ca188099
parent2ec03f509faf40847d3b18b3dd51d0b003e935c8 (diff)
downloadglibc-200b5faee1cfac10d831e9b278ef294ca3119f53.tar.xz
glibc-200b5faee1cfac10d831e9b278ef294ca3119f53.zip
alpha: fix memchr to not cause memory faults.
http://www.mail-archive.com/debian-alpha@lists.debian.org/msg25088.html Signed-off-by: Matt Turner <mattst88@gmail.com>
-rw-r--r--ChangeLog.alpha5
-rw-r--r--sysdeps/alpha/alphaev6/memchr.S26
-rw-r--r--sysdeps/alpha/memchr.S22
3 files changed, 29 insertions, 24 deletions
diff --git a/ChangeLog.alpha b/ChangeLog.alpha
index c3bbe12582..39d58166bc 100644
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@@ -1,4 +1,9 @@
2010-05-03 Aurelien Jarno <aurelien@aurel32.net>
+
+ * sysdeps/alpha/memchr.S: Use prefetch load.
+ * sysdeps/alpha/alphaev6/memchr.S: Likewise.
+
+2010-05-03 Aurelien Jarno <aurelien@aurel32.net>
* sysdeps/alpha/dl-machine.h: Add dl-procinfo support.
* sysdeps/alpha/dl-procinfo.c: New.
diff --git a/sysdeps/alpha/alphaev6/memchr.S b/sysdeps/alpha/alphaev6/memchr.S
index 88e91fa988..fe77cd897b 100644
--- a/sysdeps/alpha/alphaev6/memchr.S
+++ b/sysdeps/alpha/alphaev6/memchr.S
@@ -127,7 +127,7 @@ $first_quad:
cmpbge $31, $1, $2 # E :
bne $2, $found_it # U :
# At least one byte left to process.
- ldq $1, 8($0) # L :
+ ldq $31, 8($0) # L :
subq $5, 1, $18 # E : U L U L
addq $0, 8, $0 # E :
@@ -143,38 +143,38 @@ $first_quad:
and $4, 8, $4 # E : odd number of quads?
bne $4, $odd_quad_count # U :
# At least three quads remain to be accessed
- mov $1, $4 # E : L U L U : move prefetched value to correct reg
+ nop # E : L U L U : move prefetched value to correct reg
.align 4
$unrolled_loop:
- ldq $1, 8($0) # L : prefetch $1
- xor $17, $4, $2 # E :
- cmpbge $31, $2, $2 # E :
- bne $2, $found_it # U : U L U L
+ ldq $1, 0($0) # L : load quad
+ xor $17, $1, $2 # E :
+ ldq $31, 8($0) # L : prefetch next quad
+ cmpbge $31, $2, $2 # E : U L U L
+ bne $2, $found_it # U :
addq $0, 8, $0 # E :
nop # E :
nop # E :
- nop # E :
$odd_quad_count:
+ ldq $1, 0($0) # L : load quad
xor $17, $1, $2 # E :
- ldq $4, 8($0) # L : prefetch $4
+ ldq $31, 8($0) # L : prefetch $4
cmpbge $31, $2, $2 # E :
- addq $0, 8, $6 # E :
+ addq $0, 8, $6 # E :
bne $2, $found_it # U :
cmpult $6, $18, $6 # E :
addq $0, 8, $0 # E :
- nop # E :
bne $6, $unrolled_loop # U :
- mov $4, $1 # E : move prefetched value into $1
nop # E :
nop # E :
-
-$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do
nop # E :
+
+$final: ldq $1, 0($0) # L : load last quad
+ subq $5, $0, $18 # E : $18 <- number of bytes left to do
nop # E :
bne $18, $last_quad # U :
diff --git a/sysdeps/alpha/memchr.S b/sysdeps/alpha/memchr.S
index 5d713d53ba..87c7fb10c0 100644
--- a/sysdeps/alpha/memchr.S
+++ b/sysdeps/alpha/memchr.S
@@ -119,7 +119,7 @@ $first_quad:
# At least one byte left to process.
- ldq t0, 8(v0) # e0 :
+ ldq zero, 8(v0) # e0 : prefetch next quad
subq t4, 1, a2 # .. e1 :
addq v0, 8, v0 #-e0 :
@@ -138,19 +138,19 @@ $first_quad:
# At least three quads remain to be accessed
- mov t0, t3 # e0 : move prefetched value to correct reg
-
.align 4
$unrolled_loop:
- ldq t0, 8(v0) #-e0 : prefetch t0
- xor a1, t3, t1 # .. e1 :
- cmpbge zero, t1, t1 # e0 :
- bne t1, $found_it # .. e1 :
+ ldq t0, 0(v0) # e0 : load quad
+ xor a1, t0, t1 # .. e1 :
+ ldq zero, 8(v0) # e0 : prefetch next quad
+ cmpbge zero, t1, t1 # .. e1:
+ bne t1, $found_it # e0 :
- addq v0, 8, v0 #-e0 :
+ addq v0, 8, v0 # e1 :
$odd_quad_count:
+ ldq t0, 0(v0) # e0 : load quad
xor a1, t0, t1 # .. e1 :
- ldq t3, 8(v0) # e0 : prefetch t3
+ ldq zero, 8(v0) # e0 : prefetch next quad
cmpbge zero, t1, t1 # .. e1 :
addq v0, 8, t5 #-e0 :
bne t1, $found_it # .. e1 :
@@ -159,8 +159,8 @@ $odd_quad_count:
addq v0, 8, v0 # .. e1 :
bne t5, $unrolled_loop #-e1 :
- mov t3, t0 # e0 : move prefetched value into t0
-$final: subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
+$final: ldq t0, 0(v0) # e0 : load last quad
+ subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
bne a2, $last_quad # e1 :
$not_found: