This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- From: "Pawar, Amit" <Amit dot Pawar at amd dot com>
- To: "H.J. Lu" <hjl dot tools at gmail dot com>
- Cc: "libc-alpha at sourceware dot org" <libc-alpha at sourceware dot org>
- Date: Tue, 22 Mar 2016 11:08:28 +0000
- Subject: RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- Authentication-results: sourceware.org; auth=none
- Authentication-results: gmail.com; dkim=none (message not signed) header.d=none;gmail.com; dmarc=none action=none header.from=amd.com;
- References: <SN1PR12MB073325E2FB320E3CECD22660978B0 at SN1PR12MB0733 dot namprd12 dot prod dot outlook dot com> <CAMe9rOo_pgS7Vh1+JGWiYbHr3yXZmRDpxaLX6Xs9dzHr-TSH1A at mail dot gmail dot com> <SN1PR12MB0733B252EEDF7DE08EE91AF9978B0 at SN1PR12MB0733 dot namprd12 dot prod dot outlook dot com> <CAMe9rOqhAUNhvD0=FZm23MDeVMRaYrnkZ51wWB1O4JRu8o2ywg at mail dot gmail dot com> <SN1PR12MB07332500CE527AA6EAC1C360978C0 at SN1PR12MB0733 dot namprd12 dot prod dot outlook dot com> <CAMe9rOqGKGsWHsM1NO7L46QdtMALoG_Wq3mahg=beWSesAg0jg at mail dot gmail dot com> <SN1PR12MB0733A07FB69B2EC3831FB091978C0 at SN1PR12MB0733 dot namprd12 dot prod dot outlook dot com> <CAMe9rOoYXJQWB_T0SOM9+vj38yTndYknBZyjU3MbdAyc9x+g8A at mail dot gmail dot com> <SN1PR12MB0733522F9520520B45459C24978C0 at SN1PR12MB0733 dot namprd12 dot prod dot outlook dot com> <CAMe9rOqpbUF2m40pxjyr+O8pSrA9EmUgNsOcBPuP-wDaMqn+RQ at mail dot gmail dot com> <CAMe9rOr11s9j7i4Gk2yozXY-MWJLKpFXCKkFqoLW553tyJ4LZg at mail dot gmail dot com> <SN1PR12MB0733C372AE1AFC950CFBBF44978C0 at SN1PR12MB0733 dot namprd12 dot prod dot outlook dot com> <CAMe9rOot+1NehTWA-LAurcZwQ-gmtvDzwHBPVro3cadAjsmALA at mail dot gmail dot com>
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:23
>It was done based on assumption that AVX enabled machine has fast AVX unaligned load. If it isn't true for AMD CPUs, we can enable it for all Intel AVX CPUs and you can set it for AMD CPUs properly.
Memcpy still needs to be fixed otherwise SSE2_Unaligned version is selected. Is it OK to fix in following way else please suggest.
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 1787716..e5c7184 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -159,9 +159,17 @@ init_cpu_features (struct cpu_features *cpu_features)
if (family == 0x15)
{
/* "Excavator" */
+#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_Fast_Copy_Backward
+# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_Fast_Copy_Backward
+#endif
+#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
+# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
+#endif
if (model >= 0x60 && model <= 0x7f)
cpu_features->feature[index_arch_Fast_Unaligned_Load]
- |= bit_arch_Fast_Unaligned_Load;
+ |= (bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Copy_Backward
+ | bit_arch_Prefer_Fast_Copy_Backward);
}
}
else
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 0624a92..9750f2f 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -35,6 +35,7 @@
#define bit_arch_I686 (1 << 15)
#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16)
#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
+#define bit_arch_Prefer_Fast_Copy_Backward (1 << 18)
/* CPUID Feature flags. */
@@ -101,6 +102,7 @@
# define index_arch_I686 FEATURE_INDEX_1*FEATURE_SIZE
# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_Prefer_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
# if defined (_LIBC) && !IS_IN (nonlib)
@@ -259,6 +261,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_arch_I686 FEATURE_INDEX_1
# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
+# define index_arch_Prefer_Fast_Copy_Backward FEATURE_INDEX_1
#endif /* !__ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 8882590..6fad5cb 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -40,18 +40,20 @@ ENTRY(__new_memcpy)
#endif
1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+ jnz 3f
+ HAS_ARCH_FEATURE (Preferred_Fast_Copy_Backward)
jnz 2f
lea __memcpy_sse2_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- lea __memcpy_sse2(%rip), %RAX_LP
+ jnz 3f
+2: lea __memcpy_sse2(%rip), %RAX_LP
HAS_CPU_FEATURE (SSSE3)
- jz 2f
+ jz 3f
lea __memcpy_ssse3_back(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
+ jnz 3f
lea __memcpy_ssse3(%rip), %RAX_LP
-2: ret
+3: ret
END(__new_memcpy)
# undef ENTRY
--Amit
- References:
- [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- Re: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- Re: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- Re: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- Re: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- Re: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- Re: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- RE: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583
- Re: [PATCH x86_64] Update memcpy, mempcpy and memmove selection order for Excavator CPU BZ #19583