This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v3] faster strlen on x64


On Thu, Jan 31, 2013 at 10:52:15AM +0100, OndÅej BÃlka wrote:
> Hi, 
> 
> Afetr testing by Liuba I prepared final version of my patch
> (attached and on neleai/strlen branch.).
> 
I realized that it was not entirely complete. I forgotten to add
padding (it made fx10 loop slower) which is done by additionaly 
applying this patch.

diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index 72584fb..1e8f9be 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -101,6 +101,7 @@ L(n_nonzero):
 	andq	$-16, %rax
 	PROLOG(loop)
 
+	.p2align 4
 L(next):
 	andq	$-64, %rax
 	PROLOG(loop_init)
@@ -115,12 +116,13 @@ L(strnlen_ret):
 	bsfq	%rdx, %rax
 	ret
 #endif
-
+	.p2align 4
 L(loop_init):
 	pxor	%xmm9, %xmm9
 	pxor	%xmm10, %xmm10
 	pxor	%xmm11, %xmm11
 #ifdef AS_STRNLEN
+	.p2align 4
 L(loop):
 
 	addq	$64, %rax
@@ -137,6 +139,7 @@ L(loop):
 	jne	L(exit)
 	jmp	L(loop)
 
+	.p2align 4
 L(exit_end):
 	cmp	%rax, %r11
 	je	L(first)
@@ -150,6 +153,7 @@ L(first):
 	subq	%rdi, %rax
 	ret
 
+	.p2align 4
 L(exit):
 	pxor	%xmm8, %xmm8
 	FIND_ZERO
@@ -160,6 +164,7 @@ L(exit):
 	ret
 
 #else
+	.p2align 4
 L(loop):
 
 	movdqa	64(%rax), %xmm8
@@ -183,6 +188,7 @@ L(loop):
 	jne	L(exit0)
 	jmp	L(loop)
 
+	.p2align 4
 L(exit64):
 	addq	$64, %rax
 L(exit0):


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]