This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/ifunc/master created. glibc-2.23-523-g638f63c


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/ifunc/master has been created
        at  638f63c0a15653d7a4ca85ac42df2585c0760c5e (commit)

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=638f63c0a15653d7a4ca85ac42df2585c0760c5e

commit 638f63c0a15653d7a4ca85ac42df2585c0760c5e
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Mon Jun 27 12:27:49 2016 -0700

    Check ERMS in memmove/memcpy/mempcpy/memset
    
    Although the Enhanced REP MOVSB/STOSB (ERMS) implementations of memmove,
    memcpy, mempcpy and memset aren't used by the current processors, this
    patch adds Prefer_ERMS check in memmove, memcpy, mempcpy and memset so
    that glibc developers can experiment with it using GLIBC_IFUNC.
    
    	* sysdeps/x86/cpu-features.c (init_cpu_features): Also
    	check Prefer_ERMS.
    	* sysdeps/x86/cpu-features.h (bit_arch_Prefer_ERMS): New.
    	(index_arch_Prefer_ERMS): Likewise.
    	* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Return
    	__memcpy_erms for Prefer_ERMS.
    	* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
    	(__memmove_erms): Enabled for libc.a.
    	* ysdeps/x86_64/multiarch/memmove.S (__libc_memmove): Return
    	__memmove_erms or Prefer_ERMS.
    	* sysdeps/x86_64/multiarch/mempcpy.S (__mempcpy): Return
    	__mempcpy_erms for Prefer_ERMS.
    	* sysdeps/x86_64/multiarch/memset.S (memset): Return
    	__memset_erms for Prefer_ERMS.

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index fcc74c9..2151d9c 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -475,6 +475,7 @@ no_cpuid:
 		  case 13:
 		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX2_Usable);
 		    CHECK_GLIBC_IFUNC_ARCH_OFF (FMA4_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Prefer_ERMS);
 		    CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (Slow_SSE4_2,
 							  SSE4_2);
 		    break;
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 2bd9371..97ffe76 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -36,6 +36,7 @@
 #define bit_arch_Prefer_MAP_32BIT_EXEC		(1 << 16)
 #define bit_arch_Prefer_No_VZEROUPPER		(1 << 17)
 #define bit_arch_Fast_Unaligned_Copy		(1 << 18)
+#define bit_arch_Prefer_ERMS			(1 << 19)
 
 /* CPUID Feature flags.  */
 
@@ -105,6 +106,7 @@
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Fast_Unaligned_Copy	FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_Prefer_ERMS		FEATURE_INDEX_1*FEATURE_SIZE
 
 
 # if defined (_LIBC) && !IS_IN (nonlib)
@@ -274,6 +276,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
 # define index_arch_Fast_Unaligned_Copy	FEATURE_INDEX_1
+# define index_arch_Prefer_ERMS		FEATURE_INDEX_1
 
 #endif	/* !__ASSEMBLER__ */
 
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index f6771a4..df7fbac 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -29,6 +29,9 @@
 ENTRY(__new_memcpy)
 	.type	__new_memcpy, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	lea	__memcpy_erms(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Prefer_ERMS)
+	jnz	2f
 # ifdef HAVE_AVX512_ASM_SUPPORT
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index a2cce39..4893ea4 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -150,13 +150,15 @@ L(nop):
 #if defined USE_MULTIARCH && IS_IN (libc)
 END (MEMMOVE_SYMBOL (__memmove, unaligned))
 
-# if VEC_SIZE == 16 && defined SHARED
+# if VEC_SIZE == 16
+#  if defined SHARED
 /* Only used to measure performance of REP MOVSB.  */
 ENTRY (__mempcpy_erms)
 	movq	%rdi, %rax
 	addq	%rdx, %rax
 	jmp	L(start_movsb)
 END (__mempcpy_erms)
+#  endif
 
 ENTRY (__memmove_erms)
 	movq	%rdi, %rax
@@ -181,7 +183,9 @@ L(movsb_backward):
 	cld
 	ret
 END (__memmove_erms)
+#  if defined SHARED
 strong_alias (__memmove_erms, __memcpy_erms)
+#  endif
 # endif
 
 # ifdef SHARED
diff --git a/sysdeps/x86_64/multiarch/memmove.S b/sysdeps/x86_64/multiarch/memmove.S
index 25c3586..8e1c6ac 100644
--- a/sysdeps/x86_64/multiarch/memmove.S
+++ b/sysdeps/x86_64/multiarch/memmove.S
@@ -27,6 +27,9 @@
 ENTRY(__libc_memmove)
 	.type	__libc_memmove, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	lea	__memmove_erms(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Prefer_ERMS)
+	jnz	2f
 # ifdef HAVE_AVX512_ASM_SUPPORT
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index f9c6df3..4011a1a 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -29,6 +29,9 @@
 ENTRY(__mempcpy)
 	.type	__mempcpy, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	lea	__mempcpy_erms(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Prefer_ERMS)
+	jnz	2f
 # ifdef HAVE_AVX512_ASM_SUPPORT
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index 4e52d8f..2b964a0 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -26,6 +26,9 @@
 ENTRY(memset)
 	.type	memset, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	lea	__memset_erms(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Prefer_ERMS)
+	jnz	2f
 	lea	__memset_sse2_unaligned_erms(%rip), %RAX_LP
 	HAS_CPU_FEATURE (ERMS)
 	jnz	1f

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4c0c381f8c7e4b37cf955a79720299c53bfe393c

commit 4c0c381f8c7e4b37cf955a79720299c53bfe393c
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Mon Jun 27 15:13:50 2016 -0700

    Add GLIBC_IFUNC to control IFUNC selection
    
    The current IFUNC selection is based on microbenchmarks in glibc.  It
    should give the best performance for most workloads.  But other choices
    may have better performance for a particular workload or on the hardware
    which wasn't available at the selection was made.  The environment
    variable, GLIBC_IFUNC=xxx=0:yyy=1:zzz=0...., can be used to enable
    CPU/ARCH feature yyy, disable CPU/ARCH feature yyy and zzz, where the
    feature name is case-sensitive and has to match the ones in
    cpu-features.h.  It can be used by glibc developers to override the
    IFUNC selection to improve performance for a particular workload or
    tune for a new processor.  It isn't intended for normal end users.
    
    	* sysdeps/i386/dl-machine.h (dl_platform_init): Pass the
    	array of environment strings to init_cpu_features.
    	* sysdeps/x86/libc-start.c (__libc_start_main): Likewise.
    	* sysdeps/x86/cpu-features.c (equal): New function.
    	(CHECK_GLIBC_IFUNC_CPU_OFF): New macro.
    	(CHECK_GLIBC_IFUNC_ARCH_OFF): Likewise.
    	(CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH): Likewise.
    	(CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH): Likewise.
    	(init_cpu_features): Updated to take the array of environment
    	strings.  Process GLIBC_IFUNC environment variable.

diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 4e3968a..7584931 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -240,7 +240,8 @@ dl_platform_init (void)
 #ifdef SHARED
   /* init_cpu_features has been called early from __libc_start_main in
      static executable.  */
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+  init_cpu_features (&GLRO(dl_x86_cpu_features),
+		     &_dl_argv[_dl_argc + 1]);
 #endif
 }
 
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 9ce4b49..fcc74c9 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -91,8 +91,141 @@ get_common_indeces (struct cpu_features *cpu_features,
     }
 }
 
+#ifdef __x86_64__
+typedef long long op_t;
+#else
+typedef int op_t;
+#endif
+
+/* Return true if the first LEN bytes of strings A and B are the same
+   where LEN != 0.  We don't use string/memory functions since they may
+   not be available here and this version is faster for its usage.  */
+
+static bool
+equal (const char *a, const char *b, size_t len)
+{
+  size_t op_len = len % sizeof (op_t);
+  if (op_len)
+    {
+      switch (op_len)
+	{
+	case 1:
+	  if (*(char *) a != *(char *) b)
+	    return false;
+	  break;
+	case 2:
+	  if (*(short *) a != *(short *) b)
+	    return false;
+	  break;
+	case 3:
+	  if (*(short *) a != *(short *) b
+	      || *(char *) (a + 2) != *(char *) (b + 2))
+	    return false;
+	  break;
+#ifdef __x86_64__
+	case 4:
+	  if (*(int *) a != *(int *) b)
+	    return false;
+	  break;
+	default:
+	  if (*(int *) a != *(int *) b
+	      || *(int *) (a + op_len - 4) != *(int *) (b + op_len - 4))
+	    return false;
+	  break;
+#else
+	default:
+	  break;
+#endif
+	}
+      /* Align length to size of op_t.  */
+      len -= op_len;
+      if (len == 0)
+	return true;
+      a += op_len;
+      b += op_len;
+    }
+
+  /* Compare one op_t at a time.  */
+  do
+    {
+      if (*(op_t *) a != *(op_t *) b)
+	return false;
+      len -= sizeof (op_t);
+      if (len == 0)
+	return true;
+      a += sizeof (op_t);
+      b += sizeof (op_t);
+    }
+  while (1);
+}
+
+/* Disable a CPU feature by setting "name=0".  We don't enable a CPU
+   feature which isn't availble.  */
+#define CHECK_GLIBC_IFUNC_CPU_OFF(name)					\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->cpuid[index_cpu_##name].reg_##name		\
+	  &= ~bit_cpu_##name;						\
+      break;								\
+    }
+
+/* Disable an ARCH feature by setting "name=0".  We don't enable an
+   ARCH feature which isn't availble or has security implication.  */
+#define CHECK_GLIBC_IFUNC_ARCH_OFF(name)				\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      break;								\
+    }
+
+/* Enable/disable an ARCH feature by setting "name=1"/"name=0".  */
+#define CHECK_GLIBC_IFUNC_ARCH_BOTH(name)				\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1')				\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
+/* Disable an ARCH feature by setting "name=0".  Enable an ARCH feature
+   by setting "name=1" if the ARCH feature NEED is also enabled.  */
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH(name, need)		\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1'					\
+	       && CPU_FEATURES_ARCH_P (cpu_features, need))		\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
+/* Disable an ARCH feature by setting "name=0".  Enable an ARCH feature
+   by setting "name=1" if the CPU feature NEED is also enabled.  */
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH(name, need)		\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1'					\
+	       && CPU_FEATURES_CPU_P (cpu_features, need))		\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
 static inline void
-init_cpu_features (struct cpu_features *cpu_features)
+init_cpu_features (struct cpu_features *cpu_features, char **env)
 {
   unsigned int ebx, ecx, edx;
   unsigned int family = 0;
@@ -268,4 +401,116 @@ no_cpuid:
   cpu_features->family = family;
   cpu_features->model = model;
   cpu_features->kind = kind;
+
+  if (env == NULL)
+    return;
+
+  /* The current IFUNC selection is based on microbenchmarks in glibc.
+     It should give the best performance for most workloads.  But other
+     choices may have better performance for a particular workload or on
+     the hardware which wasn't available at the selection was made.  The
+     environment variable, GLIBC_IFUNC=xxx=0:yyy=1:zzz=0...., can be
+     used to enable CPU/ARCH feature yyy, disable CPU/ARCH feature yyy
+     and zzz, where the feature name is case-sensitive and has to match
+     the ones in cpu-features.h.  It can be used by glibc developers to
+     override the IFUNC selection to improve performance for a particular
+     workload or tune for a new processor.  */
+  while (*env != NULL)
+    {
+      const char *p, *end;
+      size_t len = sizeof ("GLIBC_IFUNC=");
+      end = *env;
+      for (p = end; *p != '\0'; p++)
+	if (--len == 0 && equal (end, "GLIBC_IFUNC=", 12))
+	  {
+	    len = strlen (p);
+	    end = p + len;
+	    do
+	      {
+		const char *c;
+		for (c = p; *c != ':'; c++)
+		  if (c >= end)
+		    break;
+		len = c - p;
+		switch (len)
+		  {
+		  default:
+		    break;
+		  case 5:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (CX8);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (FMA);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (HTT);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (RTM);
+		    break;
+		  case 6:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX2);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (CMOV);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (ERMS);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (FMA4);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE2);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (I586);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (I686);
+		    break;
+		  case 7:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSSE3);
+		    break;
+		  case 8:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_1);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_2);
+		    break;
+		  case 9:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX512F);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (OSXSAVE);
+		    break;
+		  case 10:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX512DQ);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (POPCOUNT);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Slow_BSF);
+		    break;
+		  case 12:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (FMA_Usable);
+		    break;
+		  case 13:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX2_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (FMA4_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (Slow_SSE4_2,
+							  SSE4_2);
+		    break;
+		  case 15:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512F_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+		      (AVX_Fast_Unaligned_Load, AVX_Usable);
+		    break;
+		  case 17:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512DQ_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Rep_String);
+		    break;
+		  case 20:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Copy_Backward);
+		    break;
+		  case 21:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Load);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Copy);
+		    break;
+		  case 22:
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+		      (Prefer_No_VZEROUPPER, AVX_Usable);
+		    break;
+		  case 23:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (Prefer_MAP_32BIT_EXEC);
+		    break;
+		  case 28:
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH
+		      (Prefer_PMINUB_for_stringop, SSE2);
+		    break;
+		  }
+		p += len + 1;
+	      }
+	    while (p < end);
+	    return;
+	  }
+      env++;
+    }
 }
diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c
index 3b5ea6e..7dec1ca 100644
--- a/sysdeps/x86/libc-start.c
+++ b/sysdeps/x86/libc-start.c
@@ -34,7 +34,7 @@ __libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 		   void (*fini) (void),
 		   void (*rtld_fini) (void), void *stack_end)
 {
-  init_cpu_features (&_dl_x86_cpu_features);
+  init_cpu_features (&_dl_x86_cpu_features, &argv[argc + 1]);
   return generic_start_main (main, argc, argv, init, fini, rtld_fini,
 			     stack_end);
 }
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index ed0c1a8..071a2e1 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -227,7 +227,8 @@ dl_platform_init (void)
 #ifdef SHARED
   /* init_cpu_features has been called early from __libc_start_main in
      static executable.  */
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+  init_cpu_features (&GLRO(dl_x86_cpu_features),
+		     &_dl_argv[_dl_argc + 1]);
 #endif
 }
 

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]