This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[committed, PATCH] Fix memmove-vec-unaligned-erms.S
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>
- Date: Sun, 3 Apr 2016 12:44:06 -0700
- Subject: [committed, PATCH] Fix memmove-vec-unaligned-erms.S
- Authentication-results: sourceware.org; auth=none
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
__mempcpy_erms and __memmove_erms can't be placed between __memmove_chk
and __memmove it breaks __memmove_chk.
Don't check source == destination first since it is less common.
Tested on x86-64.
H.J.
---
* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:
(__mempcpy_erms, __memmove_erms): Moved before __mempcpy_chk
with unaligned_erms.
(__memmove_erms): Skip if source == destination.
(__memmove_unaligned_erms): Don't check source == destination
first.
---
ChangeLog | 9 ++++
.../x86_64/multiarch/memmove-vec-unaligned-erms.S | 54 ++++++++++++----------
2 files changed, 39 insertions(+), 24 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index d22231b..e93b7bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2016-04-03 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:
+ (__mempcpy_erms, __memmove_erms): Moved before __mempcpy_chk
+ with unaligned_erms.
+ (__memmove_erms): Skip if source == destination.
+ (__memmove_unaligned_erms): Don't check source == destination
+ first.
+
2016-04-01 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86/cpu-features.c (init_cpu_features): Don't set
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index cf645dd..66779a3 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -95,46 +95,30 @@ L(start):
ret
END (MEMMOVE_SYMBOL (__memmove, unaligned_2))
-# ifdef SHARED
-ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
-# endif
-
-ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
- movq %rdi, %rax
- addq %rdx, %rax
- jmp L(start_erms)
-END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
-
-# ifdef SHARED
-ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
-# endif
-
# if VEC_SIZE == 16
/* Only used to measure performance of REP MOVSB. */
# ifdef SHARED
ENTRY (__mempcpy_erms)
movq %rdi, %rax
addq %rdx, %rax
- jmp L(movsb)
+ jmp L(start_movsb)
END (__mempcpy_erms)
# endif
ENTRY (__memmove_erms)
movq %rdi, %rax
+L(start_movsb):
movq %rdx, %rcx
cmpq %rsi, %rdi
- jbe 1f
+ jb 1f
+ /* Source == destination is less common. */
+ je 2f
leaq (%rsi,%rcx), %rdx
cmpq %rdx, %rdi
jb L(movsb_backward)
1:
rep movsb
+2:
ret
L(movsb_backward):
leaq -1(%rdi,%rcx), %rdi
@@ -147,6 +131,26 @@ END (__memmove_erms)
strong_alias (__memmove_erms, __memcpy_erms)
# endif
+# ifdef SHARED
+ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
+# endif
+
+ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
+ movq %rdi, %rax
+ addq %rdx, %rax
+ jmp L(start_erms)
+END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
+
+# ifdef SHARED
+ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
+# endif
+
ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
movq %rdi, %rax
L(start_erms):
@@ -166,8 +170,9 @@ L(return):
L(movsb):
cmpq %rsi, %rdi
- je L(nop)
jb 1f
+ /* Source == destination is less common. */
+ je L(nop)
leaq (%rsi,%rdx), %r9
cmpq %r9, %rdi
/* Avoid slow backward REP MOVSB. */
@@ -191,8 +196,9 @@ L(movsb_more_2x_vec):
L(more_2x_vec):
/* More than 2 * VEC. */
cmpq %rsi, %rdi
- je L(nop)
jb L(copy_forward)
+ /* Source == destination is less common. */
+ je L(nop)
leaq (%rsi,%rdx), %rcx
cmpq %rcx, %rdi
jb L(more_2x_vec_overlap)
--
2.5.5