This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH v3] faster strlen on x64
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-alpha at sourceware dot org
- Date: Tue, 5 Feb 2013 23:20:14 +0100
- Subject: Re: [PATCH v3] faster strlen on x64
- References: <20130131095215.GA31998@domone.kolej.mff.cuni.cz>
On Thu, Jan 31, 2013 at 10:52:15AM +0100, OndÅej BÃlka wrote:
> Hi,
>
> Afetr testing by Liuba I prepared final version of my patch
> (attached and on neleai/strlen branch.).
>
I realized that it was not entirely complete. I forgotten to add
padding (it made fx10 loop slower) which is done by additionaly
applying this patch.
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index 72584fb..1e8f9be 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -101,6 +101,7 @@ L(n_nonzero):
andq $-16, %rax
PROLOG(loop)
+ .p2align 4
L(next):
andq $-64, %rax
PROLOG(loop_init)
@@ -115,12 +116,13 @@ L(strnlen_ret):
bsfq %rdx, %rax
ret
#endif
-
+ .p2align 4
L(loop_init):
pxor %xmm9, %xmm9
pxor %xmm10, %xmm10
pxor %xmm11, %xmm11
#ifdef AS_STRNLEN
+ .p2align 4
L(loop):
addq $64, %rax
@@ -137,6 +139,7 @@ L(loop):
jne L(exit)
jmp L(loop)
+ .p2align 4
L(exit_end):
cmp %rax, %r11
je L(first)
@@ -150,6 +153,7 @@ L(first):
subq %rdi, %rax
ret
+ .p2align 4
L(exit):
pxor %xmm8, %xmm8
FIND_ZERO
@@ -160,6 +164,7 @@ L(exit):
ret
#else
+ .p2align 4
L(loop):
movdqa 64(%rax), %xmm8
@@ -183,6 +188,7 @@ L(loop):
jne L(exit0)
jmp L(loop)
+ .p2align 4
L(exit64):
addq $64, %rax
L(exit0):