From 14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 4 Mar 2016 08:37:40 -0800 Subject: [PATCH] x86-64: Fix memcpy IFUNC selection Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. Existing selection order is updated with following selection order: 1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set. 2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set. 3. __memcpy_sse2 if SSSE3 isn't available. 4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set. 5. __memcpy_ssse3 [BZ #18880] * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. --- ChangeLog | 8 ++++++++ sysdeps/x86_64/multiarch/memcpy.S | 27 ++++++++++++++------------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7c5ee2dd19..7b36bd732d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2016-03-04 Amit Pawar + H.J. Lu + + [BZ #18880] + * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, + instead of Slow_BSF, and also check for Fast_Copy_Backward to + enable __memcpy_ssse3_back. + 2016-03-03 H.J. Lu [BZ #19758] diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index 64a1bcd137..8882590e51 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -35,22 +35,23 @@ ENTRY(__new_memcpy) jz 1f HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) jz 1f - leaq __memcpy_avx512_no_vzeroupper(%rip), %rax + lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP ret #endif -1: leaq __memcpy_avx_unaligned(%rip), %rax +1: lea __memcpy_avx_unaligned(%rip), %RAX_LP HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz 2f - ret -2: leaq __memcpy_sse2(%rip), %rax - HAS_ARCH_FEATURE (Slow_BSF) - jnz 3f - leaq __memcpy_sse2_unaligned(%rip), %rax - ret -3: HAS_CPU_FEATURE (SSSE3) - jz 4f - leaq __memcpy_ssse3(%rip), %rax -4: ret + jnz 2f + lea __memcpy_sse2_unaligned(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Unaligned_Load) + jnz 2f + lea __memcpy_sse2(%rip), %RAX_LP + HAS_CPU_FEATURE (SSSE3) + jz 2f + lea __memcpy_ssse3_back(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Copy_Backward) + jnz 2f + lea __memcpy_ssse3(%rip), %RAX_LP +2: ret END(__new_memcpy) # undef ENTRY -- 2.43.5