This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: Enable SSE2 memset for AMD'supcoming Orochi processor and bugfix


Hi,
This patch enables SSE2 memset for AMD's upcoming Orochi processor.
This patch also fixes the following bug:
For misaligned blocks larger than > 144 Bytes, memset branches into
the integer code path depending on the value of misalignment even if
the startup code chooses the SSE2 code path upfront, when multiarch
is enabled.

-Harsha

2011-03-02  Harsha Jagasia  <harsha.jagasia@amd.com>

	* sysdeps/x86_64/cacheinfo.c (init_cacheinfo):
	Set _x86_64_preferred_memory_instruction for AMD processsors.
	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
	Set bit_Prefer_SSE_for_memop for AMD processors.
	* sysdeps/x86_64/memset.S: After aligning destination, code
	branches to different locations depending on the value of
	misalignment, when multiarch is enabled. Fix this.

diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index eae54e7..92bc68c 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -621,6 +621,27 @@ init_cacheinfo (void)
       long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
 
+      unsigned int ebx_1;
+
+#ifdef USE_MULTIARCH
+      eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+      ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
+      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+      edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
+#else
+      __cpuid (1, eax, ebx_1, ecx, edx);
+#endif
+
+#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
+      /* AMD prefers SSSE3 instructions for memory/string routines
+	 if they are avaiable, otherwise it prefers integer
+	 instructions.  */
+      if ((ecx & 0x200))
+	__x86_64_preferred_memory_instruction = 3;
+      else
+	__x86_64_preferred_memory_instruction = 0;
+#endif
+
       /* Get maximum extended function. */
       __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
 
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index f6eb71f..9aa55ff 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -592,8 +592,11 @@ L(A7Q0):    mov    %dl,-0x7(%rdi)
 L(A6Q0):    mov    %dx,-0x6(%rdi)
 	mov    %edx,-0x4(%rdi)
 
+	.balign     16
+L(aligned_now):
+
 #ifndef USE_MULTIARCH
-	jmp    L(aligned_now)
+	jmp    L(check_code_path)
 
 L(SSE_pre):
 #endif
@@ -1201,7 +1204,7 @@ L(SSExDx):
 #endif /* !defined USE_MULTIARCH || defined USE_SSE2  */
 
 	.balign     16
-L(aligned_now):
+L(check_code_path):
 
 #ifndef USE_MULTIARCH
 	 cmpl   $0x1,__x86_64_preferred_memory_instruction(%rip)
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index f0d2bb7..4716532 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -107,6 +107,18 @@ __init_cpu_features (void)
       kind = arch_kind_amd;
 
       get_common_indeces (&family, &model);
+
+      unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+
+      /* AMD processors prefer SSE instructions for memory/string routines
+	 if they are avaiable, otherwise they prefer integer
+	 instructions.  */
+      if ((ecx & 0x200))
+	__cpu_features.feature[index_Prefer_SSE_for_memop]
+	  |= bit_Prefer_SSE_for_memop;
+      else
+	__cpu_features.feature[index_Prefer_SSE_for_memop]
+	  |= 0;
     }
   else
     kind = arch_kind_other;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]