This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 5/7] Enable __mempcpy_sse2_unaligned


Check Fast_Unaligned_Load for __mempcpy_sse2_unaligned.  The new
selection order is:

1. __mempcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __mempcpy_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __mempcpy_sse2 if SSSE3 isn't available.
4. __mempcpy_ssse3_back if Fast_Copy_Backward bit it set.
5. __mempcpy_ssse3

	[BZ #19776]
	* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Check
	Fast_Unaligned_Load to enable __mempcpy_sse2_unaligned.
---
 sysdeps/x86_64/multiarch/mempcpy.S | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index ed78623..1314d76 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -33,19 +33,22 @@ ENTRY(__mempcpy)
 	jz	1f
 	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
 	jz	1f
-	leaq    __mempcpy_avx512_no_vzeroupper(%rip), %rax
+	lea	__mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP
 	ret
 #endif
-1:	leaq	__mempcpy_sse2(%rip), %rax
+1:	lea	__mempcpy_avx_unaligned(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+	jnz	2f
+	lea	__mempcpy_sse2_unaligned(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+	jnz	2f
+	lea	__mempcpy_sse2(%rip), %RAX_LP
 	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leaq	__mempcpy_ssse3(%rip), %rax
+	lea	__mempcpy_ssse3_back(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jz	2f
-	leaq	__mempcpy_ssse3_back(%rip), %rax
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz	2f
-	leaq	__mempcpy_avx_unaligned(%rip), %rax
+	jnz	2f
+	lea	__mempcpy_ssse3(%rip), %RAX_LP
 2:	ret
 END(__mempcpy)
 
-- 
2.5.0


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]