This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] x86_64: Remove 9 REX bytes from memchr.S
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>
- Date: Sat, 20 May 2017 07:50:06 -0700
- Subject: [PATCH] x86_64: Remove 9 REX bytes from memchr.S
- Authentication-results: sourceware.org; auth=none
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
There is no need to use 64-bit registers when only the lower 32 bits
are non-zero.
Tested on x86-64. OK for master?
H.J.
---
* sysdeps/x86_64/memchr.S (MEMCHR): Use 32-bit registers for
the lower 32 bits.
---
sysdeps/x86_64/memchr.S | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 8242f2d..f1dad9e 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -44,10 +44,10 @@ ENTRY(MEMCHR)
punpcklbw %xmm1, %xmm1
#endif
- and $63, %rcx
+ and $63, %ecx
pshufd $0, %xmm1, %xmm1
- cmp $48, %rcx
+ cmp $48, %ecx
ja L(crosscache)
movdqu (%rdi), %xmm0
@@ -59,7 +59,7 @@ ENTRY(MEMCHR)
sub $16, %rdx
jbe L(return_null)
add $16, %rdi
- and $15, %rcx
+ and $15, %ecx
and $-16, %rdi
add %rcx, %rdx
sub $64, %rdx
@@ -68,7 +68,7 @@ ENTRY(MEMCHR)
.p2align 4
L(crosscache):
- and $15, %rcx
+ and $15, %ecx
and $-16, %rdi
movdqa (%rdi), %xmm0
@@ -162,7 +162,7 @@ L(loop_prolog):
mov %rdi, %rcx
and $-64, %rdi
- and $63, %rcx
+ and $63, %ecx
add %rcx, %rdx
.p2align 4
@@ -214,7 +214,7 @@ L(align64_loop):
.p2align 4
L(exit_loop):
- add $32, %rdx
+ add $32, %edx
jle L(exit_loop_32)
movdqa (%rdi), %xmm0
@@ -234,7 +234,7 @@ L(exit_loop):
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32_1)
- sub $16, %rdx
+ sub $16, %edx
jle L(return_null)
PCMPEQ 48(%rdi), %xmm1
@@ -246,13 +246,13 @@ L(exit_loop):
.p2align 4
L(exit_loop_32):
- add $32, %rdx
+ add $32, %edx
movdqa (%rdi), %xmm0
PCMPEQ %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches_1)
- sub $16, %rdx
+ sub $16, %edx
jbe L(return_null)
PCMPEQ 16(%rdi), %xmm1
--
2.9.4