This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 3/7] Remove L(overlapping) from memcpy-sse2-unaligned.S
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: libc-alpha at sourceware dot org
- Cc: Ondrej Bilka <neleai at seznam dot cz>
- Date: Mon, 7 Mar 2016 09:36:26 -0800
- Subject: [PATCH 3/7] Remove L(overlapping) from memcpy-sse2-unaligned.S
- Authentication-results: sourceware.org; auth=none
- References: <1457372190-12196-1-git-send-email-hjl dot tools at gmail dot com>
Since memcpy doesn't need to check overlapping source and destination,
we can remove L(overlapping).
[BZ #19776]
* sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
(L(overlapping)): Removed.
---
sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S | 47 +-----------------------
1 file changed, 2 insertions(+), 45 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index 19d8aa6..335a498 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -25,12 +25,8 @@
ENTRY(__memcpy_sse2_unaligned)
movq %rdi, %rax
- movq %rsi, %r11
- leaq (%rdx,%rdx), %rcx
- subq %rdi, %r11
- subq %rdx, %r11
- cmpq %rcx, %r11
- jb L(overlapping)
+ testq %rdx, %rdx
+ je L(return)
cmpq $16, %rdx
jbe L(less_16)
movdqu (%rsi), %xmm8
@@ -89,45 +85,6 @@ L(loop):
cmpq %rcx, %rdx
jne L(loop)
ret
-L(overlapping):
- testq %rdx, %rdx
- .p2align 4,,5
- je L(return)
- movq %rdx, %r9
- leaq 16(%rsi), %rcx
- leaq 16(%rdi), %r8
- shrq $4, %r9
- movq %r9, %r11
- salq $4, %r11
- cmpq %rcx, %rdi
- setae %cl
- cmpq %r8, %rsi
- setae %r8b
- orl %r8d, %ecx
- cmpq $15, %rdx
- seta %r8b
- testb %r8b, %cl
- je .L21
- testq %r11, %r11
- je .L21
- xorl %ecx, %ecx
- xorl %r8d, %r8d
-.L7:
- movdqu (%rsi,%rcx), %xmm8
- addq $1, %r8
- movdqu %xmm8, (%rdi,%rcx)
- addq $16, %rcx
- cmpq %r8, %r9
- ja .L7
- cmpq %r11, %rdx
- je L(return)
-.L21:
- movzbl (%rsi,%r11), %ecx
- movb %cl, (%rdi,%r11)
- addq $1, %r11
- cmpq %r11, %rdx
- ja .L21
- ret
L(less_16):
testb $24, %dl
jne L(between_9_16)
--
2.5.0