This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/ifunc/master created. glibc-2.23-564-gb66d779


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/ifunc/master has been created
        at  b66d779324e61dabed8913f1ea69575f36978090 (commit)

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b66d779324e61dabed8913f1ea69575f36978090

commit b66d779324e61dabed8913f1ea69575f36978090
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Jun 30 09:35:29 2016 -0700

    Check -non_temporal_store in GLIBC_IFUNC
    
    The x86 non-temporal threshold is an approximate value.  This patch
    checks -non_temporal_store in GLIBC_IFUNC to disable non-temporal store.
    
    	* sysdeps/x86/cacheinfo.c (init_cacheinfo): Set
    	__x86_shared_non_temporal_threshold only if it is not set.
    	* sysdeps/x86/cpu-features.c (__x86_shared_non_temporal_threshold):
    	New.
    	(init_cpu_features): Check -non_temporal_store in GLIBC_IFUNC
    	to disable non-temporal store.

diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index cf4f64b..2291ad4 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -762,8 +762,13 @@ intel_bug_no_cache_info:
       __x86_shared_cache_size = shared;
     }
 
-  /* The large memcpy micro benchmark in glibc shows that 6 times of
-     shared cache size is the approximate value above which non-temporal
-     store becomes faster.  */
-  __x86_shared_non_temporal_threshold = __x86_shared_cache_size * 6;
+  /* Set non-temporal threshold to an approximate value if it hasn't
+     been set.  */
+  if (__x86_shared_non_temporal_threshold == 0)
+    {
+      /* The large memcpy microbenchmark in glibc shows that 6 times
+         of shared cache size is the approximate value above which
+	 non-temporal store becomes faster.  */
+      __x86_shared_non_temporal_threshold = __x86_shared_cache_size * 6;
+    }
 }
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index c2349c8..3ae44cf 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -220,6 +220,8 @@ equal (const char *a, const char *b, size_t len)
       break;								\
     }
 
+extern long int __x86_shared_non_temporal_threshold attribute_hidden;
+
 static inline void
 init_cpu_features (struct cpu_features *cpu_features, char **env)
 {
@@ -540,6 +542,18 @@ no_cpuid:
 		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Rep_String, disable);
 		    break;
 		  case 18:
+		    if (disable)
+		      {
+			if (equal (n, "non_temporal_store",
+				   sizeof ("non_temporal_store") - 1))
+			  {
+			    /* Disable non-temporal store with
+			       "-non_temporal_store".  */
+			    __x86_shared_non_temporal_threshold
+			      = (long int) -1;
+			    break;
+			  }
+		      }
 		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Copy_Backward,
 						 disable);
 		    break;

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0bbe8eff8d6d75fb9f547c2e9af88f05ea2df95f

commit 0bbe8eff8d6d75fb9f547c2e9af88f05ea2df95f
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Mon Jun 27 15:13:50 2016 -0700

    Add GLIBC_IFUNC to control IFUNC selection
    
    The current IFUNC selection is based on microbenchmarks in glibc.  It
    should give the best performance for most workloads.  But other choices
    may have better performance for a particular workload or on the hardware
    which wasn't available at the selection was made.  The environment
    variable, GLIBC_IFUNC=-xxx,yyy,-zzz...., can be used to enable CPU/ARCH
    feature yyy, disable CPU/ARCH feature yyy and zzz, where the feature
    name is case-sensitive and has to match the ones in cpu-features.h.  It
    can be used by glibc developers to override the IFUNC selection to tune
    for a new processor or improve performance for a particular workload.
    It isn't intended for normal end users.
    
    Since all CPU/ARCH features are hardware optimizations without security
    implication, except for Prefer_MAP_32BIT_EXEC, which can only be disabled,
    we check GLIBC_IFUNC for programs, including set*id ones.
    
    NOTE: the IFUNC selection may change over time.  Please check all
    multiarch implementations when experimenting.
    
    	* sysdeps/i386/dl-machine.h (dl_platform_init): Pass _environ
    	to init_cpu_features.
    	* sysdeps/x86_64/dl-machine.h (dl_platform_init): Likewise.
    	* sysdeps/x86/libc-start.c (__libc_start_main): Likewise.
    	* sysdeps/x86/cpu-features.c (equal): New function.
    	(CHECK_GLIBC_IFUNC_CPU_OFF): New macro.
    	(CHECK_GLIBC_IFUNC_ARCH_OFF): Likewise.
    	(CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH): Likewise.
    	(CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH): Likewise.
    	(init_cpu_features): Updated to take the array of environment
    	strings.  Process GLIBC_IFUNC environment variable.

diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 4e3968a..30e6635 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -240,7 +240,8 @@ dl_platform_init (void)
 #ifdef SHARED
   /* init_cpu_features has been called early from __libc_start_main in
      static executable.  */
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+  extern char **_environ attribute_hidden;
+  init_cpu_features (&GLRO(dl_x86_cpu_features), _environ);
 #endif
 }
 
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 9ce4b49..c2349c8 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -91,8 +91,137 @@ get_common_indeces (struct cpu_features *cpu_features,
     }
 }
 
+#ifdef __x86_64__
+typedef long long op_t;
+#else
+typedef int op_t;
+#endif
+
+/* Return true if the first LEN bytes of strings A and B are the same
+   where LEN != 0.  We can't use string/memory functions because they
+   trigger an ifunc resolve loop.  */
+
+static bool
+equal (const char *a, const char *b, size_t len)
+{
+  size_t op_len = len % sizeof (op_t);
+  if (op_len)
+    {
+      switch (op_len)
+	{
+	case 1:
+	  if (*(char *) a != *(char *) b)
+	    return false;
+	  break;
+	case 2:
+	  if (*(short *) a != *(short *) b)
+	    return false;
+	  break;
+	case 3:
+	  if (*(short *) a != *(short *) b
+	      || *(char *) (a + 2) != *(char *) (b + 2))
+	    return false;
+	  break;
+#ifdef __x86_64__
+	case 4:
+	  if (*(int *) a != *(int *) b)
+	    return false;
+	  break;
+	default:
+	  if (*(int *) a != *(int *) b
+	      || *(int *) (a + op_len - 4) != *(int *) (b + op_len - 4))
+	    return false;
+	  break;
+#else
+	default:
+	  break;
+#endif
+	}
+      /* Align length to size of op_t.  */
+      len -= op_len;
+      if (len == 0)
+	return true;
+      a += op_len;
+      b += op_len;
+    }
+
+  /* Compare one op_t at a time.  */
+  do
+    {
+      if (*(op_t *) a != *(op_t *) b)
+	return false;
+      len -= sizeof (op_t);
+      if (len == 0)
+	return true;
+      a += sizeof (op_t);
+      b += sizeof (op_t);
+    }
+  while (1);
+}
+
+/* Disable a CPU feature NAME.  We don't enable a CPU feature which isn't
+   availble.  */
+#define CHECK_GLIBC_IFUNC_CPU_OFF(name)					\
+  if (equal (n, #name, sizeof (#name) - 1))				\
+    {									\
+      cpu_features->cpuid[index_cpu_##name].reg_##name			\
+	&= ~bit_cpu_##name;						\
+      break;								\
+    }
+
+/* Disable an ARCH feature NAME.  We don't enable an ARCH feature which
+   isn't availble or has security implication.  */
+#define CHECK_GLIBC_IFUNC_ARCH_OFF(name)				\
+  if (equal (n, #name, sizeof (#name) - 1))				\
+    {									\
+      cpu_features->feature[index_arch_##name]				\
+	&= ~bit_arch_##name;						\
+      break;								\
+    }
+
+/* Enable/disable an ARCH feature NAME.  */
+#define CHECK_GLIBC_IFUNC_ARCH_BOTH(name, disable)			\
+  if (equal (n, #name, sizeof (#name) - 1))				\
+    {									\
+      if (disable)							\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else								\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
+/* Enable/disable an ARCH feature NAME.  Enable an ARCH feature only
+   if the ARCH feature NEED is also enabled.  */
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH(name, need, disable)	\
+  if (equal (n, #name, sizeof (#name) - 1))				\
+    {									\
+      if (disable)							\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (CPU_FEATURES_ARCH_P (cpu_features, need))		\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
+/* Enable/disable an ARCH feature NAME.  Enable an ARCH feature only
+   if the CPU feature NEED is also enabled.  */
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH(name, need, disable)	\
+  if (equal (n, #name, sizeof (#name) - 1))				\
+    {									\
+      if (disable)							\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (CPU_FEATURES_CPU_P (cpu_features, need))			\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
 static inline void
-init_cpu_features (struct cpu_features *cpu_features)
+init_cpu_features (struct cpu_features *cpu_features, char **env)
 {
   unsigned int ebx, ecx, edx;
   unsigned int family = 0;
@@ -268,4 +397,178 @@ no_cpuid:
   cpu_features->family = family;
   cpu_features->model = model;
   cpu_features->kind = kind;
+
+  /* The current IFUNC selection is based on microbenchmarks in glibc.
+     It should give the best performance for most workloads.  But other
+     choices may have better performance for a particular workload or on
+     the hardware which wasn't available when the selection was made.
+     The environment variable, GLIBC_IFUNC=-xxx,yyy,-zzz...., can be
+     used to enable CPU/ARCH feature yyy, disable CPU/ARCH feature yyy
+     and zzz, where the feature name is case-sensitive and has to match
+     the ones in cpu-features.h.  It can be used by glibc developers to
+     tune for a new processor or override the IFUNC selection to improve
+     performance for a particular workload.
+
+     Since all CPU/ARCH features are hardware optimizations without
+     security implication, except for Prefer_MAP_32BIT_EXEC, which can
+     only be disabled, we check GLIBC_IFUNC for programs, including
+     set*id ones.
+
+     NOTE: the IFUNC selection may change over time.  Please check all
+     multiarch implementations when experimenting.  */
+
+  while (*env != NULL)
+    {
+      const char *p, *end;
+      size_t len = sizeof ("GLIBC_IFUNC=");
+
+      end = *env;
+      for (p = end; *p != '\0'; p++)
+	if (--len == 0 && equal (end, "GLIBC_IFUNC=",
+				 sizeof ("GLIBC_IFUNC=") - 1))
+	  {
+	    /* Can't use strlen because it may trigger an ifunc resolve
+	       loop.  */
+	    for (end = p; *end != '\0'; end++);
+	    do
+	      {
+		const char *c, *n;
+		bool disable;
+		size_t nl;
+
+		for (c = p; *c != ','; c++)
+		  if (c >= end)
+		    break;
+
+		len = c - p;
+		disable = *p == '-';
+		if (disable)
+		  {
+		    n = p + 1;
+		    nl = len - 1;
+		  }
+		else
+		  {
+		    n = p;
+		    nl = len;
+		  }
+		switch (nl)
+		  {
+		  default:
+		    break;
+		  case 3:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_CPU_OFF (AVX);
+			CHECK_GLIBC_IFUNC_CPU_OFF (CX8);
+			CHECK_GLIBC_IFUNC_CPU_OFF (FMA);
+			CHECK_GLIBC_IFUNC_CPU_OFF (HTT);
+			CHECK_GLIBC_IFUNC_CPU_OFF (RTM);
+		      }
+		    break;
+		  case 4:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_CPU_OFF (AVX2);
+			CHECK_GLIBC_IFUNC_CPU_OFF (CMOV);
+			CHECK_GLIBC_IFUNC_CPU_OFF (ERMS);
+			CHECK_GLIBC_IFUNC_CPU_OFF (FMA4);
+			CHECK_GLIBC_IFUNC_CPU_OFF (SSE2);
+			CHECK_GLIBC_IFUNC_ARCH_OFF (I586);
+			CHECK_GLIBC_IFUNC_ARCH_OFF (I686);
+		      }
+		    break;
+		  case 5:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_CPU_OFF (SSSE3);
+		      }
+		    break;
+		  case 6:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_1);
+			CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_2);
+		      }
+		    break;
+		  case 7:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_CPU_OFF (AVX512F);
+			CHECK_GLIBC_IFUNC_CPU_OFF (OSXSAVE);
+		      }
+		    break;
+		  case 8:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_CPU_OFF (AVX512DQ);
+			CHECK_GLIBC_IFUNC_CPU_OFF (POPCOUNT);
+		      }
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Slow_BSF, disable);
+		    break;
+		  case 10:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_ARCH_OFF (AVX_Usable);
+			CHECK_GLIBC_IFUNC_ARCH_OFF (FMA_Usable);
+		      }
+		    break;
+		  case 11:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_ARCH_OFF (AVX2_Usable);
+			CHECK_GLIBC_IFUNC_ARCH_OFF (FMA4_Usable);
+		      }
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Prefer_ERMS, disable);
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (Slow_SSE4_2,
+							  SSE4_2,
+							  disable);
+		    break;
+		  case 13:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512F_Usable);
+		      }
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+		      (AVX_Fast_Unaligned_Load, AVX_Usable, disable);
+		    break;
+		  case 15:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512DQ_Usable);
+		      }
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Rep_String, disable);
+		    break;
+		  case 18:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Copy_Backward,
+						 disable);
+		    break;
+		  case 19:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Load,
+						 disable);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Copy,
+						 disable);
+		    break;
+		  case 20:
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+		      (Prefer_No_VZEROUPPER, AVX_Usable, disable);
+		    break;
+		  case 21:
+		    if (disable)
+		      {
+			CHECK_GLIBC_IFUNC_ARCH_OFF (Prefer_MAP_32BIT_EXEC);
+		      }
+		    break;
+		  case 26:
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH
+		      (Prefer_PMINUB_for_stringop, SSE2, disable);
+		    break;
+		  }
+		p += len + 1;
+	      }
+	    while (p < end);
+	    return;
+	  }
+      env++;
+    }
 }
diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c
index 3b5ea6e..7dec1ca 100644
--- a/sysdeps/x86/libc-start.c
+++ b/sysdeps/x86/libc-start.c
@@ -34,7 +34,7 @@ __libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 		   void (*fini) (void),
 		   void (*rtld_fini) (void), void *stack_end)
 {
-  init_cpu_features (&_dl_x86_cpu_features);
+  init_cpu_features (&_dl_x86_cpu_features, &argv[argc + 1]);
   return generic_start_main (main, argc, argv, init, fini, rtld_fini,
 			     stack_end);
 }
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index ed0c1a8..1d2b144 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -227,7 +227,8 @@ dl_platform_init (void)
 #ifdef SHARED
   /* init_cpu_features has been called early from __libc_start_main in
      static executable.  */
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+  extern char **_environ attribute_hidden;
+  init_cpu_features (&GLRO(dl_x86_cpu_features), _environ);
 #endif
 }
 

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]