This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583


>It was done based on assumption that AVX enabled machine has fast AVX unaligned load.  If it isn't true for AMD CPUs, we can enable it for all Intel AVX CPUs and you can set it for AMD CPUs properly.

Memcpy still needs to be fixed otherwise SSE2_Unaligned version is selected. Is it OK to fix in following way else please suggest.

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 1787716..e5c7184 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -159,9 +159,17 @@ init_cpu_features (struct cpu_features *cpu_features)
       if (family == 0x15)
        {
          /* "Excavator"   */
+#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_Fast_Copy_Backward
+# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_Fast_Copy_Backward
+#endif
+#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
+# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
+#endif
          if (model >= 0x60 && model <= 0x7f)
            cpu_features->feature[index_arch_Fast_Unaligned_Load]
-             |= bit_arch_Fast_Unaligned_Load;
+             |= (bit_arch_Fast_Unaligned_Load
+                 | bit_arch_Fast_Copy_Backward
+                 | bit_arch_Prefer_Fast_Copy_Backward);
        }
     }
   else
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 0624a92..9750f2f 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -35,6 +35,7 @@
 #define bit_arch_I686                          (1 << 15)
 #define bit_arch_Prefer_MAP_32BIT_EXEC         (1 << 16)
 #define bit_arch_Prefer_No_VZEROUPPER          (1 << 17)
+#define bit_arch_Prefer_Fast_Copy_Backward     (1 << 18)

 /* CPUID Feature flags.  */

@@ -101,6 +102,7 @@
 # define index_arch_I686               FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_Prefer_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE


 # if defined (_LIBC) && !IS_IN (nonlib)
@@ -259,6 +261,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_arch_I686               FEATURE_INDEX_1
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
+# define index_arch_Prefer_Fast_Copy_Backward FEATURE_INDEX_1

 #endif /* !__ASSEMBLER__ */

diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 8882590..6fad5cb 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -40,18 +40,20 @@ ENTRY(__new_memcpy)
 #endif
 1:     lea     __memcpy_avx_unaligned(%rip), %RAX_LP
        HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+       jnz     3f
+       HAS_ARCH_FEATURE (Preferred_Fast_Copy_Backward)
        jnz     2f
        lea     __memcpy_sse2_unaligned(%rip), %RAX_LP
        HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-       jnz     2f
-       lea     __memcpy_sse2(%rip), %RAX_LP
+       jnz     3f
+2:     lea     __memcpy_sse2(%rip), %RAX_LP
        HAS_CPU_FEATURE (SSSE3)
-       jz      2f
+       jz      3f
        lea    __memcpy_ssse3_back(%rip), %RAX_LP
        HAS_ARCH_FEATURE (Fast_Copy_Backward)
-       jnz     2f
+       jnz     3f
        lea     __memcpy_ssse3(%rip), %RAX_LP
-2:     ret
+3:     ret
 END(__new_memcpy)

 # undef ENTRY


--Amit

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]