Re: [COMMITED] faster memcpy on x64.

It looks like there is a confusion in the merged patch, I think it is
supposed to be (at least looks more logical) the different flag, you
only need to turn it on for Buldozer or whatever AMD machines the
version is also good.

diff --git a/sysdeps/x86_64/multiarch/memcpy.S
index a1e5031..f6a44d2 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -33,8 +33,8 @@ ENTRY(__new_memcpy)
        jne     1f
        call    __init_cpu_features
 1:     leaq    __memcpy_sse2(%rip), %rax
-       testl   $bit_Slow_BSF,
-       jnz     2f
+       testl   $bit_Fast_Unaligned_Load,
+       jz      2f
        leaq    __memcpy_sse2_unaligned(%rip), %rax
 2:     testl   $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)

And you forgot to remove the version which is never used now as memcpy
from the ifunc-impl-list:

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 28d3579..d6a7f4f 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -224,8 +224,6 @@ __libc_ifunc_impl_list (const char *name, struct
libc_ifunc_impl *array,

   /* Support sysdeps/x86_64/multiarch/memcpy.S.  */
   IFUNC_IMPL (i, name, memcpy,
-             IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
-                             __memcpy_ssse3_back)
              IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
              IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
              IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2))


On Mon, May 20, 2013 at 12:14 PM, OndÅej BÃlka <> wrote:
> Commited.

