This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

GNU C Library master sources branch hjl/benchtests/master updated. glibc-2.23-158-g66e8db6

From: hjl at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 5 Apr 2016 01:42:19 -0000
Subject: GNU C Library master sources branch hjl/benchtests/master updated. glibc-2.23-158-g66e8db6

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/benchtests/master has been updated
       via  66e8db6b43e1b61bcc9a29a22ca12502a8290ccc (commit)
      from  21069d2b1c7c21a08b87790df2db3106a5af0827 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=66e8db6b43e1b61bcc9a29a22ca12502a8290ccc

commit 66e8db6b43e1b61bcc9a29a22ca12502a8290ccc
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Mon Apr 4 18:41:53 2016 -0700

    Remove L(loop)

diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 100d7b6..0eba85d 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -186,93 +186,6 @@ L(movsb):
 L(nop):
 	ret
 
-	.p2align 4
-L(more_2x_vec):
-	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
-	jae	L(large_data)
-	jmp	L(start_more_2x_vec)
-
-	.p2align 4
-L(movsb_more_2x_vec):
-	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
-	jae	L(large_data)
-	cmpq	$REP_MOVSB_THRESHOLD, %rdx
-	ja	L(movsb)
-L(start_more_2x_vec):
-	/* More than 2 * VEC.  */
-	cmpq	%rsi, %rdi
-	jb	L(copy_forward)
-	/* Source == destination is less common.  */
-	je	L(nop)
-	leaq	(%rsi,%rdx), %rcx
-	cmpq	%rcx, %rdi
-	jb	L(more_2x_vec_overlap)
-L(copy_forward):
-	leaq	(%rdi,%rdx), %rcx
-	cmpq	%rcx, %rsi
-	jb	L(more_2x_vec_overlap)
-# if  VEC_SIZE == 64
-	/* Force 32-bit displacement to avoid long nop between
-	   instructions.  */
-	VMOVU.d32 (%rsi), %VEC(0)
-# else
-	VMOVU	(%rsi), %VEC(0)
-# endif
-	VMOVU	VEC_SIZE(%rsi), %VEC(1)
-	VMOVU	-VEC_SIZE(%rsi,%rdx), %VEC(2)
-	VMOVU	-(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3)
-	VMOVU	%VEC(0), (%rdi)
-	VMOVU	%VEC(1), VEC_SIZE(%rdi)
-	VMOVU	%VEC(2), -VEC_SIZE(%rdi,%rdx)
-	VMOVU	%VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
-	cmpq	$(VEC_SIZE * 4), %rdx
-	jbe	L(return)
-	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(0)
-	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(1)
-	VMOVU	-(VEC_SIZE * 3)(%rsi,%rdx), %VEC(2)
-	VMOVU	-(VEC_SIZE * 4)(%rsi,%rdx), %VEC(3)
-	VMOVU	%VEC(0), (VEC_SIZE * 2)(%rdi)
-	VMOVU	%VEC(1), (VEC_SIZE * 3)(%rdi)
-	VMOVU	%VEC(2), -(VEC_SIZE * 3)(%rdi,%rdx)
-	VMOVU	%VEC(3), -(VEC_SIZE * 4)(%rdi,%rdx)
-	cmpq	$(VEC_SIZE * 8), %rdx
-	jbe	L(return)
-	leaq	(VEC_SIZE * 4)(%rdi), %rcx
-	addq	%rdi, %rdx
-	andq	$-(VEC_SIZE * 4), %rdx
-	andq	$-(VEC_SIZE * 4), %rcx
-	movq	%rcx, %r11
-	subq	%rdi, %r11
-	addq	%r11, %rsi
-	cmpq	%rdx, %rcx
-	je	L(return)
-	movq	%rsi, %r10
-	subq	%rcx, %r10
-	/* Force 32-bit displacement to avoid long nop between
-	   instructions.  */
-	leaq.d32 VEC_SIZE(%r10), %r9
-# if  VEC_SIZE == 16
-	leaq.d32 (VEC_SIZE * 2)(%r10), %r8
-	leaq.d32 (VEC_SIZE * 3)(%r10), %r11
-# else
-	leaq	(VEC_SIZE * 2)(%r10), %r8
-	leaq	(VEC_SIZE * 3)(%r10), %r11
-# endif
-	.p2align 4
-L(loop):
-	VMOVU	(%rcx,%r10), %VEC(0)
-	VMOVU	(%rcx,%r9), %VEC(1)
-	VMOVU	(%rcx,%r8), %VEC(2)
-	VMOVU	(%rcx,%r11), %VEC(3)
-	VMOVA	%VEC(0), (%rcx)
-	VMOVA	%VEC(1), VEC_SIZE(%rcx)
-	VMOVA	%VEC(2), (VEC_SIZE * 2)(%rcx)
-	VMOVA	%VEC(3), (VEC_SIZE * 3)(%rcx)
-	addq	$(VEC_SIZE * 4), %rcx
-	cmpq	%rcx, %rdx
-	jne	L(loop)
-	VZEROUPPER
-	ret
 L(less_vec):
 	/* Less than 1 VEC.  */
 # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
@@ -338,11 +251,19 @@ L(between_2_3):
 	movw	%si, (%rdi)
 	ret
 
-# if VEC_SIZE > 16
-	/* Align to 16 bytes to avoid long nop between instructions.  */
 	.p2align 4
-# endif
-L(more_2x_vec_overlap):
+L(more_2x_vec):
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
+	jae	L(large_data)
+	jmp	L(start_more_2x_vec)
+
+	.p2align 4
+L(movsb_more_2x_vec):
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
+	jae	L(large_data)
+	cmpq	$REP_MOVSB_THRESHOLD, %rdx
+	ja	L(movsb)
+L(start_more_2x_vec):
 	/* More than 2 * VEC and there is overlap bewteen destination
 	   and source.  */
 	cmpq	$(VEC_SIZE * 8), %rdx
@@ -416,14 +337,15 @@ L(more_8x_vec):
 	/* Force 32-bit displacement to avoid long nop between
 	   instructions.  */
 	VMOVU.d32 (%rsi), %VEC(4)
-# if VEC_SIZE == 16
 	VMOVU.d32 -VEC_SIZE(%rsi, %rdx), %VEC(5)
+	VMOVU.d32 -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6)
+# if VEC_SIZE == 16
+	VMOVU.d32 -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7)
+	VMOVU.d32 -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8)
 # else
-	VMOVU	-VEC_SIZE(%rsi, %rdx), %VEC(5)
-# endif
-	VMOVU	-(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6)
 	VMOVU	-(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7)
 	VMOVU	-(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8)
+# endif
 	/* Adjust source.  */
 	subq	%r8, %rsi
 	/* Adjust destination which should be aligned now.  */

-----------------------------------------------------------------------

Summary of changes:
 .../x86_64/multiarch/memmove-vec-unaligned-erms.S  |  112 +++-----------------
 1 files changed, 17 insertions(+), 95 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]