This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
PATCH: Check GLIBC_IFUNC to enable/disable ifunc features
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: "Carlos O'Donell" <carlos at redhat dot com>, GNU C Library <libc-alpha at sourceware dot org>, Siddhesh Poyarekar <sid at reserved-bit dot com>
- Date: Wed, 29 Jun 2016 11:15:22 -0700
- Subject: PATCH: Check GLIBC_IFUNC to enable/disable ifunc features
- Authentication-results: sourceware.org; auth=none
On Tue, Jun 28, 2016 at 7:28 PM, Mike Frysinger <vapier@gentoo.org> wrote:
> should have a comment or two mentioning why you need to code all of
> these things ad-hoc
>
> also needs a manual update
>
I am not sure if it needs manual update. I consider it more for glibc
developers than for normal end users.
> seems like the feature compare is case-sensitive ? but some of the
> feature strings are inconsistent case ? would be nice if they were
> standardized (like all lower case).
> -mike
I'd like to match the ones in cpu-features.h so that one can cut and
paste from it.
Here is the updated patch. Any more feedbacks?
Thanks.
--
H.J.
From 471248bb2d34ed027ed06487210db8dcd93cbe1b Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 27 Jun 2016 15:13:50 -0700
Subject: [PATCH] Add environment variable, GLIBC_IFUNC, to control IFUNC
selection
The current IFUNC selection is based on microbenchmarks in glibc. It
should give the best performance for most workloads. But other choices
may have better performance for a particular workload or the hardware
is too new to make the best choice. We provide an environment variable,
GLIBC_IFUNC=xxx=0:yyy=1:zzz=0...., to enable CPU/ARCH feature yyy,
disable CPU/ARCH feature yyy and zzz, where the feature name is
case-sensitive and has to match the ones in cpu-features.h. It can be
used by glibc developers to override the IFUNC selection to improve
performance for a particular workload or tune for a new processor. It
isn't intended for normal end users.
* sysdeps/i386/dl-machine.h (dl_platform_init): Pass the
array of environment strings to init_cpu_features.
* sysdeps/x86/libc-start.c (__libc_start_main): Likewise.
* sysdeps/x86/cpu-features.c (equal): New function.
(CHECK_GLIBC_IFUNC_CPU_OFF): New macro.
(CHECK_GLIBC_IFUNC_ARCH_OFF): Likewise.
(CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH): Likewise.
(CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH): Likewise.
(init_cpu_features): Updated to take the array of environment
strings. Process GLIBC_IFUNC environment variable.
---
sysdeps/i386/dl-machine.h | 3 +-
sysdeps/x86/cpu-features.c | 245 +++++++++++++++++++++++++++++++++++++++++++-
sysdeps/x86/libc-start.c | 2 +-
sysdeps/x86_64/dl-machine.h | 3 +-
4 files changed, 249 insertions(+), 4 deletions(-)
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 4e3968a..7584931 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -240,7 +240,8 @@ dl_platform_init (void)
#ifdef SHARED
/* init_cpu_features has been called early from __libc_start_main in
static executable. */
- init_cpu_features (&GLRO(dl_x86_cpu_features));
+ init_cpu_features (&GLRO(dl_x86_cpu_features),
+ &_dl_argv[_dl_argc + 1]);
#endif
}
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 9ce4b49..0e5765e 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -91,8 +91,139 @@ get_common_indeces (struct cpu_features *cpu_features,
}
}
+#ifdef __x86_64__
+typedef long long op_t;
+#else
+typedef int op_t;
+#endif
+
+/* Return true if the first LEN bytes of strings A and B are the same
+ where LEN != 0. We don't use string/memory functions since they may
+ not be available here and this version is faster for its usage. */
+
+static bool
+equal (const char *a, const char *b, size_t len)
+{
+ size_t op_len = len % sizeof (op_t);
+ if (op_len)
+ {
+ switch (op_len)
+ {
+ case 1:
+ if (*(char *) a != *(char *) b)
+ return false;
+ break;
+ case 2:
+ if (*(short *) a != *(short *) b)
+ return false;
+ break;
+ case 3:
+ if (*(short *) a != *(short *) b
+ || *(char *) (a + 2) != *(char *) (b + 2))
+ return false;
+ break;
+#ifdef __x86_64__
+ case 4:
+ if (*(int *) a != *(int *) b)
+ return false;
+ break;
+ default:
+ if (*(int *) a != *(int *) b
+ || *(int *) (a + op_len - 4) != *(int *) (b + op_len - 4))
+ return false;
+ break;
+#else
+ default:
+ break;
+#endif
+ }
+ /* Align length to size of op_t. */
+ len -= op_len;
+ if (len == 0)
+ return true;
+ }
+
+ /* Compare one op_t at a time. */
+ do
+ {
+ if (*(op_t *) a != *(op_t *) b)
+ return false;
+ len -= sizeof (op_t);
+ if (len == 0)
+ return true;
+ a += sizeof (op_t);
+ b += sizeof (op_t);
+ }
+ while (1);
+}
+
+/* Disable a CPU feature by setting "name=0". We don't enable a CPU
+ feature which isn't availble. */
+#define CHECK_GLIBC_IFUNC_CPU_OFF(name) \
+ if (equal (p, #name "=", sizeof (#name))) \
+ { \
+ if (p[sizeof (#name)] == '0') \
+ cpu_features->cpuid[index_cpu_##name].reg_##name \
+ &= ~bit_cpu_##name; \
+ break; \
+ }
+
+/* Disable an ARCH feature by setting "name=0". We don't enable an
+ ARCH feature which isn't availble or has security implication. */
+#define CHECK_GLIBC_IFUNC_ARCH_OFF(name) \
+ if (equal (p, #name "=", sizeof (#name))) \
+ { \
+ if (p[sizeof (#name)] == '0') \
+ cpu_features->feature[index_arch_##name] \
+ &= ~bit_arch_##name; \
+ break; \
+ }
+
+/* Enable/disable an ARCH feature by setting "name=1"/"name=0". */
+#define CHECK_GLIBC_IFUNC_ARCH_BOTH(name) \
+ if (equal (p, #name "=", sizeof (#name))) \
+ { \
+ if (p[sizeof (#name)] == '0') \
+ cpu_features->feature[index_arch_##name] \
+ &= ~bit_arch_##name; \
+ else if (p[sizeof (#name)] == '1') \
+ cpu_features->feature[index_arch_##name] \
+ |= bit_arch_##name; \
+ break; \
+ }
+
+/* Disable an ARCH feature by setting "name=0". Enable an ARCH feature
+ by setting "name=1" if the ARCH feature NEED is also enabled. */
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH(name, need) \
+ if (equal (p, #name "=", sizeof (#name))) \
+ { \
+ if (p[sizeof (#name)] == '0') \
+ cpu_features->feature[index_arch_##name] \
+ &= ~bit_arch_##name; \
+ else if (p[sizeof (#name)] == '1' \
+ && CPU_FEATURES_ARCH_P (cpu_features, need)) \
+ cpu_features->feature[index_arch_##name] \
+ |= bit_arch_##name; \
+ break; \
+ }
+
+/* Disable an ARCH feature by setting "name=0". Enable an ARCH feature
+ by setting "name=1" if the CPU feature NEED is also enabled. */
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH(name, need) \
+ if (equal (p, #name "=", sizeof (#name))) \
+ { \
+ if (p[sizeof (#name)] == '0') \
+ cpu_features->feature[index_arch_##name] \
+ &= ~bit_arch_##name; \
+ else if (p[sizeof (#name)] == '1' \
+ && CPU_FEATURES_CPU_P (cpu_features, need)) \
+ cpu_features->feature[index_arch_##name] \
+ |= bit_arch_##name; \
+ break; \
+ }
+
static inline void
-init_cpu_features (struct cpu_features *cpu_features)
+init_cpu_features (struct cpu_features *cpu_features, char **env)
{
unsigned int ebx, ecx, edx;
unsigned int family = 0;
@@ -268,4 +399,116 @@ no_cpuid:
cpu_features->family = family;
cpu_features->model = model;
cpu_features->kind = kind;
+
+ if (env == NULL)
+ return;
+
+ /* The current IFUNC selection is based on microbenchmarks in glibc.
+ It should give the best performance for most workloads. But other
+ choices may have better performance for a particular workload or
+ the hardware is too new to make the best choice. We provide an
+ environment variable, GLIBC_IFUNC=xxx=0:yyy=1:zzz=0...., to enable
+ CPU/ARCH feature yyy, disable CPU/ARCH feature yyy and zzz, where
+ the feature name is case-sensitive and has to match the ones in
+ cpu-features.h. It can be used by glibc developers to override
+ the IFUNC selection to improve performance for a particular
+ workload or tune for a new processor. */
+ while (*env != NULL)
+ {
+ const char *p, *end;
+ size_t len = 13;
+ end = *env;
+ for (p = end; *p != '\0'; p++)
+ if (--len == 0 && equal (end, "GLIBC_IFUNC=", 12))
+ {
+ len = strlen (p);
+ end = p + len;
+ do
+ {
+ const char *c;
+ for (c = p; *c != ':'; c++)
+ if (c >= end)
+ break;
+ len = c - p;
+ switch (len)
+ {
+ default:
+ break;
+ case 5:
+ CHECK_GLIBC_IFUNC_CPU_OFF (AVX);
+ CHECK_GLIBC_IFUNC_CPU_OFF (CX8);
+ CHECK_GLIBC_IFUNC_CPU_OFF (FMA);
+ CHECK_GLIBC_IFUNC_CPU_OFF (HTT);
+ CHECK_GLIBC_IFUNC_CPU_OFF (RTM);
+ break;
+ case 6:
+ CHECK_GLIBC_IFUNC_CPU_OFF (AVX2);
+ CHECK_GLIBC_IFUNC_CPU_OFF (CMOV);
+ CHECK_GLIBC_IFUNC_CPU_OFF (ERMS);
+ CHECK_GLIBC_IFUNC_CPU_OFF (FMA4);
+ CHECK_GLIBC_IFUNC_CPU_OFF (SSE2);
+ CHECK_GLIBC_IFUNC_ARCH_OFF (I586);
+ CHECK_GLIBC_IFUNC_ARCH_OFF (I686);
+ break;
+ case 7:
+ CHECK_GLIBC_IFUNC_CPU_OFF (SSSE3);
+ break;
+ case 8:
+ CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_1);
+ CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_2);
+ break;
+ case 9:
+ CHECK_GLIBC_IFUNC_CPU_OFF (AVX512F);
+ CHECK_GLIBC_IFUNC_CPU_OFF (OSXSAVE);
+ break;
+ case 10:
+ CHECK_GLIBC_IFUNC_CPU_OFF (AVX512DQ);
+ CHECK_GLIBC_IFUNC_CPU_OFF (POPCOUNT);
+ CHECK_GLIBC_IFUNC_ARCH_BOTH (Slow_BSF);
+ break;
+ case 12:
+ CHECK_GLIBC_IFUNC_ARCH_OFF (AVX_Usable);
+ CHECK_GLIBC_IFUNC_ARCH_OFF (FMA_Usable);
+ break;
+ case 13:
+ CHECK_GLIBC_IFUNC_ARCH_OFF (AVX2_Usable);
+ CHECK_GLIBC_IFUNC_ARCH_OFF (FMA4_Usable);
+ CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (Slow_SSE4_2,
+ SSE4_2);
+ break;
+ case 15:
+ CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512F_Usable);
+ CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+ (AVX_Fast_Unaligned_Load, AVX_Usable);
+ break;
+ case 17:
+ CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512DQ_Usable);
+ CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Rep_String);
+ break;
+ case 20:
+ CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Copy_Backward);
+ break;
+ case 21:
+ CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Load);
+ CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Copy);
+ break;
+ case 22:
+ CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+ (Prefer_No_VZEROUPPER, AVX_Usable);
+ break;
+ case 23:
+ CHECK_GLIBC_IFUNC_ARCH_OFF (Prefer_MAP_32BIT_EXEC);
+ break;
+ case 28:
+ CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH
+ (Prefer_PMINUB_for_stringop, SSE2);
+ break;
+ }
+ p += len + 1;
+ }
+ while (p < end);
+ return;
+ }
+ env++;
+ }
}
diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c
index 3b5ea6e..7dec1ca 100644
--- a/sysdeps/x86/libc-start.c
+++ b/sysdeps/x86/libc-start.c
@@ -34,7 +34,7 @@ __libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
void (*fini) (void),
void (*rtld_fini) (void), void *stack_end)
{
- init_cpu_features (&_dl_x86_cpu_features);
+ init_cpu_features (&_dl_x86_cpu_features, &argv[argc + 1]);
return generic_start_main (main, argc, argv, init, fini, rtld_fini,
stack_end);
}
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index ed0c1a8..071a2e1 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -227,7 +227,8 @@ dl_platform_init (void)
#ifdef SHARED
/* init_cpu_features has been called early from __libc_start_main in
static executable. */
- init_cpu_features (&GLRO(dl_x86_cpu_features));
+ init_cpu_features (&GLRO(dl_x86_cpu_features),
+ &_dl_argv[_dl_argc + 1]);
#endif
}
--
2.7.4