This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch hjl/benchtests/master updated. glibc-2.23-158-g66e8db6
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 5 Apr 2016 01:42:19 -0000
- Subject: GNU C Library master sources branch hjl/benchtests/master updated. glibc-2.23-158-g66e8db6
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, hjl/benchtests/master has been updated
via 66e8db6b43e1b61bcc9a29a22ca12502a8290ccc (commit)
from 21069d2b1c7c21a08b87790df2db3106a5af0827 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=66e8db6b43e1b61bcc9a29a22ca12502a8290ccc
commit 66e8db6b43e1b61bcc9a29a22ca12502a8290ccc
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Mon Apr 4 18:41:53 2016 -0700
Remove L(loop)
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 100d7b6..0eba85d 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -186,93 +186,6 @@ L(movsb):
L(nop):
ret
- .p2align 4
-L(more_2x_vec):
- cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
- jae L(large_data)
- jmp L(start_more_2x_vec)
-
- .p2align 4
-L(movsb_more_2x_vec):
- cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
- jae L(large_data)
- cmpq $REP_MOVSB_THRESHOLD, %rdx
- ja L(movsb)
-L(start_more_2x_vec):
- /* More than 2 * VEC. */
- cmpq %rsi, %rdi
- jb L(copy_forward)
- /* Source == destination is less common. */
- je L(nop)
- leaq (%rsi,%rdx), %rcx
- cmpq %rcx, %rdi
- jb L(more_2x_vec_overlap)
-L(copy_forward):
- leaq (%rdi,%rdx), %rcx
- cmpq %rcx, %rsi
- jb L(more_2x_vec_overlap)
-# if VEC_SIZE == 64
- /* Force 32-bit displacement to avoid long nop between
- instructions. */
- VMOVU.d32 (%rsi), %VEC(0)
-# else
- VMOVU (%rsi), %VEC(0)
-# endif
- VMOVU VEC_SIZE(%rsi), %VEC(1)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(2)
- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3)
- VMOVU %VEC(0), (%rdi)
- VMOVU %VEC(1), VEC_SIZE(%rdi)
- VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx)
- VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
- cmpq $(VEC_SIZE * 4), %rdx
- jbe L(return)
- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(0)
- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(1)
- VMOVU -(VEC_SIZE * 3)(%rsi,%rdx), %VEC(2)
- VMOVU -(VEC_SIZE * 4)(%rsi,%rdx), %VEC(3)
- VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi)
- VMOVU %VEC(1), (VEC_SIZE * 3)(%rdi)
- VMOVU %VEC(2), -(VEC_SIZE * 3)(%rdi,%rdx)
- VMOVU %VEC(3), -(VEC_SIZE * 4)(%rdi,%rdx)
- cmpq $(VEC_SIZE * 8), %rdx
- jbe L(return)
- leaq (VEC_SIZE * 4)(%rdi), %rcx
- addq %rdi, %rdx
- andq $-(VEC_SIZE * 4), %rdx
- andq $-(VEC_SIZE * 4), %rcx
- movq %rcx, %r11
- subq %rdi, %r11
- addq %r11, %rsi
- cmpq %rdx, %rcx
- je L(return)
- movq %rsi, %r10
- subq %rcx, %r10
- /* Force 32-bit displacement to avoid long nop between
- instructions. */
- leaq.d32 VEC_SIZE(%r10), %r9
-# if VEC_SIZE == 16
- leaq.d32 (VEC_SIZE * 2)(%r10), %r8
- leaq.d32 (VEC_SIZE * 3)(%r10), %r11
-# else
- leaq (VEC_SIZE * 2)(%r10), %r8
- leaq (VEC_SIZE * 3)(%r10), %r11
-# endif
- .p2align 4
-L(loop):
- VMOVU (%rcx,%r10), %VEC(0)
- VMOVU (%rcx,%r9), %VEC(1)
- VMOVU (%rcx,%r8), %VEC(2)
- VMOVU (%rcx,%r11), %VEC(3)
- VMOVA %VEC(0), (%rcx)
- VMOVA %VEC(1), VEC_SIZE(%rcx)
- VMOVA %VEC(2), (VEC_SIZE * 2)(%rcx)
- VMOVA %VEC(3), (VEC_SIZE * 3)(%rcx)
- addq $(VEC_SIZE * 4), %rcx
- cmpq %rcx, %rdx
- jne L(loop)
- VZEROUPPER
- ret
L(less_vec):
/* Less than 1 VEC. */
# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
@@ -338,11 +251,19 @@ L(between_2_3):
movw %si, (%rdi)
ret
-# if VEC_SIZE > 16
- /* Align to 16 bytes to avoid long nop between instructions. */
.p2align 4
-# endif
-L(more_2x_vec_overlap):
+L(more_2x_vec):
+ cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
+ jae L(large_data)
+ jmp L(start_more_2x_vec)
+
+ .p2align 4
+L(movsb_more_2x_vec):
+ cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
+ jae L(large_data)
+ cmpq $REP_MOVSB_THRESHOLD, %rdx
+ ja L(movsb)
+L(start_more_2x_vec):
/* More than 2 * VEC and there is overlap bewteen destination
and source. */
cmpq $(VEC_SIZE * 8), %rdx
@@ -416,14 +337,15 @@ L(more_8x_vec):
/* Force 32-bit displacement to avoid long nop between
instructions. */
VMOVU.d32 (%rsi), %VEC(4)
-# if VEC_SIZE == 16
VMOVU.d32 -VEC_SIZE(%rsi, %rdx), %VEC(5)
+ VMOVU.d32 -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6)
+# if VEC_SIZE == 16
+ VMOVU.d32 -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7)
+ VMOVU.d32 -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8)
# else
- VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(5)
-# endif
- VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6)
VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7)
VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8)
+# endif
/* Adjust source. */
subq %r8, %rsi
/* Adjust destination which should be aligned now. */
-----------------------------------------------------------------------
Summary of changes:
.../x86_64/multiarch/memmove-vec-unaligned-erms.S | 112 +++-----------------
1 files changed, 17 insertions(+), 95 deletions(-)
hooks/post-receive
--
GNU C Library master sources