GNU C Library master sources branch, master, updated. glibc-2.13-38-g7e4ba49

drepper@sourceware.org drepper@sourceware.org
Sat Mar 5 09:14:00 GMT 2011


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  7e4ba49cd365555ddaff2ae8bba7b912464ad6e5 (commit)
      from  13a804de8f3091e8ccd9b650f61becd6e1304227 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7e4ba49cd365555ddaff2ae8bba7b912464ad6e5

commit 7e4ba49cd365555ddaff2ae8bba7b912464ad6e5
Author: Harsha Jagasia <harsha.jagasia@amd.com>
Date:   Fri Mar 4 23:30:08 2011 -0500

    Enable SSE2 memset for AMD'supcoming Orochi processor.
    
    This patch enables SSE2 memset for AMD's upcoming Orochi processor.
    This patch also fixes the following bug:
    For misaligned blocks larger than > 144 Bytes, memset branches into
    the integer code path depending on the value of misalignment even if
    the startup code chooses the SSE2 code path upfront, when multiarch
    is enabled.

diff --git a/ChangeLog b/ChangeLog
index c6d652d..821a581 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2011-03-02  Harsha Jagasia  <harsha.jagasia@amd.com>
+	    Ulrich Drepper  <drepper@gmail.com>
+
+	* sysdeps/x86_64/memset.S: After aligning destination, code
+	branches to different locations depending on the value of
+	misalignment, when multiarch is enabled. Fix this.
+
+2011-03-02  Harsha Jagasia  <harsha.jagasia@amd.com>
+
+	* sysdeps/x86_64/cacheinfo.c (init_cacheinfo):
+	Set _x86_64_preferred_memory_instruction for AMD processsors.
+	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
+	Set bit_Prefer_SSE_for_memop for AMD processors.
+
 2011-03-04  Ulrich Drepper  <drepper@gmail.com>
 
 	* libio/fmemopen.c (fmemopen): Optimize a bit.
@@ -12,7 +26,7 @@
 
 2011-02-28  Aurelien Jarno  <aurelien@aurel32.net>
 
-	* sysdeps/sparc/sparc64/multiarch/memset.S(__bzero): call
+	* sysdeps/sparc/sparc64/multiarch/memset.S(__bzero): Call
 	__bzero_ultra1 instead of __memset_ultra1.
 
 2011-02-23  Andreas Schwab  <schwab@redhat.com>
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index eae54e7..337444d 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -1,5 +1,5 @@
 /* x86_64 cache info.
-   Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
+   Copyright (C) 2003,2004,2006,2007,2009,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -352,11 +352,11 @@ handle_amd (int name)
 
     case _SC_LEVEL2_CACHE_ASSOC:
       switch ((ecx >> 12) & 0xf)
-        {
-        case 0:
-        case 1:
-        case 2:
-        case 4:
+	{
+	case 0:
+	case 1:
+	case 2:
+	case 4:
 	  return (ecx >> 12) & 0xf;
 	case 6:
 	  return 8;
@@ -376,7 +376,7 @@ handle_amd (int name)
 	  return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
 	default:
 	  return 0;
-        }
+	}
       /* NOTREACHED */
 
     case _SC_LEVEL2_CACHE_LINESIZE:
@@ -521,10 +521,10 @@ init_cacheinfo (void)
       shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
 
       if (shared <= 0)
-        {
+	{
 	  /* Try L2 otherwise.  */
-          level  = 2;
-          shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
+	  level  = 2;
+	  shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
 	}
 
       unsigned int ebx_1;
@@ -540,7 +540,7 @@ init_cacheinfo (void)
 
 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
       /* Intel prefers SSSE3 instructions for memory/string routines
-	 if they are avaiable.  */
+	 if they are available.  */
       if ((ecx & 0x200))
 	__x86_64_preferred_memory_instruction = 3;
       else
@@ -550,7 +550,7 @@ init_cacheinfo (void)
       /* Figure out the number of logical threads that share the
 	 highest cache level.  */
       if (max_cpuid >= 4)
-        {
+	{
 	  int i = 0;
 
 	  /* Query until desired cache level is enumerated.  */
@@ -565,7 +565,7 @@ init_cacheinfo (void)
 	      if ((eax & 0x1f) == 0)
 		goto intel_bug_no_cache_info;
 	    }
-          while (((eax >> 5) & 0x7) != level);
+	  while (((eax >> 5) & 0x7) != level);
 
 	  threads = (eax >> 14) & 0x3ff;
 
@@ -602,7 +602,7 @@ init_cacheinfo (void)
 	  threads += 1;
 	}
       else
-        {
+	{
 	intel_bug_no_cache_info:
 	  /* Assume that all logical threads share the highest cache level.  */
 
@@ -612,7 +612,7 @@ init_cacheinfo (void)
       /* Cap usage of highest cache level to the number of supported
 	 threads.  */
       if (shared > 0 && threads > 0)
-        shared /= threads;
+	shared /= threads;
     }
   /* This spells out "AuthenticAMD".  */
   else if (is_amd)
@@ -621,6 +621,25 @@ init_cacheinfo (void)
       long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
 
+#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
+# ifdef USE_MULTIARCH
+      eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
+      ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
+      ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+      edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
+# else
+      __cpuid (1, eax, ebx, ecx, edx);
+# endif
+
+      /* AMD prefers SSSE3 instructions for memory/string routines
+	 if they are avaiable, otherwise it prefers integer
+	 instructions.  */
+      if ((ecx & 0x200))
+	__x86_64_preferred_memory_instruction = 3;
+      else
+	__x86_64_preferred_memory_instruction = 0;
+#endif
+
       /* Get maximum extended function. */
       __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
 
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index f6eb71f..d43c7f6 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -1,6 +1,6 @@
 /* memset/bzero -- set memory area to CH/0
    Optimized version for x86-64.
-   Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc.
+   Copyright (C) 2002-2005, 2007, 2008, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,7 @@
 #define __STOS_LOWER_BOUNDARY	$8192
 #define __STOS_UPPER_BOUNDARY	$65536
 
-        .text
+	.text
 #if !defined NOT_IN_libc && !defined USE_MULTIARCH
 ENTRY(__bzero)
 	mov	%rsi,%rdx	/* Adjust parameter.  */
@@ -417,7 +417,7 @@ L(P4Q0): mov    %edx,-0x4(%rdi)
 		retq
 
 	.balign     16
-#if defined(USE_EXTRA_TABLE)
+#ifdef USE_EXTRA_TABLE
 L(P5QI): mov    %rdx,-0x95(%rdi)
 #endif
 L(P5QH): mov    %rdx,-0x8d(%rdi)
@@ -596,6 +596,8 @@ L(A6Q0):    mov    %dx,-0x6(%rdi)
 	jmp    L(aligned_now)
 
 L(SSE_pre):
+#else
+L(aligned_now):
 #endif
 #if !defined USE_MULTIARCH || defined USE_SSE2
 	 # fill RegXMM0 with the pattern
@@ -606,16 +608,16 @@ L(SSE_pre):
 	 jge    L(byte32sse2_pre)
 
 	 add    %r8,%rdi
-#ifndef PIC
+# ifndef PIC
 	 lea    L(SSExDx)(%rip),%r9
 	 jmpq   *(%r9,%r8,8)
-#else
+# else
 	 lea    L(SSE0Q0)(%rip),%r9
 	 lea    L(SSExDx)(%rip),%rcx
 	 movswq (%rcx,%r8,2),%rcx
 	 lea    (%rcx,%r9,1),%r9
 	 jmpq   *%r9
-#endif
+# endif
 
 L(SSE0QB):  movdqa %xmm0,-0xb0(%rdi)
 L(SSE0QA):  movdqa %xmm0,-0xa0(%rdi)
@@ -881,16 +883,16 @@ L(byte32sse2):
 	lea    0x80(%rdi),%rdi
 	jge    L(byte32sse2)
 	add    %r8,%rdi
-#ifndef PIC
+# ifndef PIC
 	lea    L(SSExDx)(%rip),%r11
 	jmpq   *(%r11,%r8,8)
-#else
+# else
 	lea    L(SSE0Q0)(%rip),%r11
 	lea    L(SSExDx)(%rip),%rcx
 	movswq (%rcx,%r8,2),%rcx
 	lea    (%rcx,%r11,1),%r11
 	jmpq   *%r11
-#endif
+# endif
 
 	.balign     16
 L(sse2_nt_move_pre):
@@ -916,20 +918,20 @@ L(sse2_nt_move):
 	jge    L(sse2_nt_move)
 	sfence
 	add    %r8,%rdi
-#ifndef PIC
+# ifndef PIC
 	lea    L(SSExDx)(%rip),%r11
 	jmpq   *(%r11,%r8,8)
-#else
+# else
 	lea    L(SSE0Q0)(%rip),%r11
 	lea    L(SSExDx)(%rip),%rcx
 	movswq (%rcx,%r8,2),%rcx
 	lea   (%rcx,%r11,1),%r11
 	jmpq   *%r11
-#endif
+# endif
 
 	.pushsection .rodata
 	.balign     16
-#ifndef PIC
+# ifndef PIC
 L(SSExDx):
 	.quad       L(SSE0Q0), L(SSE1Q0), L(SSE2Q0), L(SSE3Q0)
 	.quad       L(SSE4Q0), L(SSE5Q0), L(SSE6Q0), L(SSE7Q0)
@@ -979,7 +981,7 @@ L(SSExDx):
 	.quad       L(SSE4QB), L(SSE5QB), L(SSE6QB), L(SSE7QB)
 	.quad       L(SSE8QB), L(SSE9QB), L(SSE10QB), L(SSE11QB)
 	.quad       L(SSE12QB), L(SSE13QB), L(SSE14QB), L(SSE15QB)
-#else
+# else
 L(SSExDx):
 	.short     L(SSE0Q0) -L(SSE0Q0)
 	.short     L(SSE1Q0) -L(SSE0Q0)
@@ -1196,14 +1198,14 @@ L(SSExDx):
 	.short     L(SSE13QB)-L(SSE0Q0)
 	.short     L(SSE14QB)-L(SSE0Q0)
 	.short     L(SSE15QB)-L(SSE0Q0)
-#endif
+# endif
 	.popsection
 #endif /* !defined USE_MULTIARCH || defined USE_SSE2  */
 
 	.balign     16
+#ifndef USE_MULTIARCH
 L(aligned_now):
 
-#ifndef USE_MULTIARCH
 	 cmpl   $0x1,__x86_64_preferred_memory_instruction(%rip)
 	 jg     L(SSE_pre)
 #endif /* USE_MULTIARCH */
@@ -1246,17 +1248,17 @@ L(8byte_move_loop):
 
 L(8byte_move_skip):
 	andl	$127,%r8d
-	lea    	(%rdi,%r8,1),%rdi
+	lea	(%rdi,%r8,1),%rdi
 
 #ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+	lea	L(setPxQx)(%rip),%r11
+	jmpq	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	lea    	L(Got0)(%rip),%r11
+	lea	L(Got0)(%rip),%r11
 	lea	L(setPxQx)(%rip),%rcx
 	movswq	(%rcx,%r8,2),%rcx
-	lea    	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
+	lea	(%rcx,%r11,1),%r11
+	jmpq	*%r11
 #endif
 
 	.balign     16
@@ -1290,16 +1292,16 @@ L(8byte_stos_skip):
 	ja	L(8byte_nt_move)
 
 	andl	$7,%r8d
-	lea    	(%rdi,%r8,1),%rdi
+	lea	(%rdi,%r8,1),%rdi
 #ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+	lea	L(setPxQx)(%rip),%r11
+	jmpq	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	lea    	L(Got0)(%rip),%r11
+	lea	L(Got0)(%rip),%r11
 	lea     L(setPxQx)(%rip),%rcx
 	movswq	(%rcx,%r8,2),%rcx
 	lea	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
+	jmpq	*%r11
 #endif
 
 	.balign     16
@@ -1338,16 +1340,16 @@ L(8byte_nt_move_loop):
 L(8byte_nt_move_skip):
 	andl	$127,%r8d
 
-	lea    	(%rdi,%r8,1),%rdi
+	lea	(%rdi,%r8,1),%rdi
 #ifndef PIC
-	lea    	L(setPxQx)(%rip),%r11
-	jmpq   	*(%r11,%r8,8) # old scheme remained for nonPIC
+	lea	L(setPxQx)(%rip),%r11
+	jmpq	*(%r11,%r8,8) # old scheme remained for nonPIC
 #else
-	lea    	L(Got0)(%rip),%r11
+	lea	L(Got0)(%rip),%r11
 	lea     L(setPxQx)(%rip),%rcx
 	movswq	(%rcx,%r8,2),%rcx
-	lea    	(%rcx,%r11,1),%r11
-	jmpq   	*%r11
+	lea	(%rcx,%r11,1),%r11
+	jmpq	*%r11
 #endif
 
 END (memset)
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index f0d2bb7..34ec2df 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -1,6 +1,6 @@
 /* Initialize CPU feature data.
    This file is part of the GNU C Library.
-   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@redhat.com>.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -60,7 +60,7 @@ __init_cpu_features (void)
       get_common_indeces (&family, &model);
 
       /* Intel processors prefer SSE instruction for memory/string
-	 routines if they are avaiable.  */
+	 routines if they are available.  */
       __cpu_features.feature[index_Prefer_SSE_for_memop]
 	|= bit_Prefer_SSE_for_memop;
 
@@ -107,6 +107,14 @@ __init_cpu_features (void)
       kind = arch_kind_amd;
 
       get_common_indeces (&family, &model);
+
+      unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
+
+      /* AMD processors prefer SSE instructions for memory/string routines
+	 if they are available, otherwise they prefer integer instructions.  */
+      if ((ecx & 0x200))
+	__cpu_features.feature[index_Prefer_SSE_for_memop]
+	  |= bit_Prefer_SSE_for_memop;
     }
   else
     kind = arch_kind_other;

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                            |   16 +++++++-
 sysdeps/x86_64/cacheinfo.c           |   49 +++++++++++++++++-------
 sysdeps/x86_64/memset.S              |   68 +++++++++++++++++----------------
 sysdeps/x86_64/multiarch/init-arch.c |   12 +++++-
 4 files changed, 94 insertions(+), 51 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources



More information about the Glibc-cvs mailing list