This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
PATCH: Support multiarch for i686
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>
- Date: Thu, 30 Jul 2009 14:33:11 -0700
- Subject: PATCH: Support multiarch for i686
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
Hi,
This patch adds multiarch support when configured for i686. I modified
some x86-64 functions to support 32bit. I will contribute 32bit SSE string
and memory functions later.
Thanks.
H.J.
---
2009-07-30 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/i386/i686/Makefile (sysdep_routines): Add cacheinfo.
* sysdeps/i386/i686/cacheinfo.c: New.
* sysdeps/i386/i686/multiarch/Makefile: Likewise.
* sysdeps/i386/i686/multiarch/ifunc-defines.sym: Likewise.
* sysdeps/i386/i686/multiarch/init-arch.c: Likewise.
* sysdeps/i386/i686/multiarch/init-arch.h: Likewise.
* sysdeps/i386/i686/multiarch/sched_cpucount.c: Likewise.
* sysdeps/unix/sysv/linux/i386/i686/sysconf.c: Likewise.
* sysdeps/x86_64/cacheinfo.c: Include <cpuid.h>.
(intel_check_word): Use __cpuid.
(handle_intel): Likewise.
(handle_amd): Likewise.
(__cache_sysconf): Likewise.
(init_cacheinfo): Likewise.
(__x86_64_prefetchw): Define only if __x86_64__ is defined.
(__x86_64_preferred_memory_instruction): Likewise.
* sysdeps/x86_64/multiarch/init-arch.c: Include <cpuid.h>.
(get_common_indeces): Use __cpuid.
(__init_cpu_features): Likewise. Turn off SSSE3 on Atom only
in 64bit.
* sysdeps/x86_64/multiarch/init-arch.h (HAS_SSE2): New.
* sysdeps/x86_64/multiarch/sched_cpucount.c (POPCNT): Use
popcnt instead of popcntq.
diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile
index c7378ab..dbcf1c3 100644
--- a/sysdeps/i386/i686/Makefile
+++ b/sysdeps/i386/i686/Makefile
@@ -5,3 +5,7 @@ endif
# So that we can test __m128's alignment
stack-align-test-flags += -msse
+
+ifeq ($(subdir),string)
+sysdep_routines += cacheinfo
+endif
diff --git a/sysdeps/i386/i686/cacheinfo.c b/sysdeps/i386/i686/cacheinfo.c
new file mode 100644
index 0000000..d0f4c0b
--- /dev/null
+++ b/sysdeps/i386/i686/cacheinfo.c
@@ -0,0 +1,5 @@
+#define __x86_64_data_cache_size_half __x86_data_cache_size_half
+#define __x86_64_shared_cache_size __x86_shared_cache_size
+#define __x86_64_shared_cache_size_half __x86_shared_cache_size_half
+
+#include <sysdeps/x86_64/cacheinfo.c>
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
new file mode 100644
index 0000000..33d98c3
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -0,0 +1,4 @@
+ifeq ($(subdir),csu)
+aux += init-arch
+gen-as-const-headers += ifunc-defines.sym
+endif
diff --git a/sysdeps/i386/i686/multiarch/ifunc-defines.sym b/sysdeps/i386/i686/multiarch/ifunc-defines.sym
new file mode 100644
index 0000000..e2021cd
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/ifunc-defines.sym
@@ -0,0 +1,17 @@
+#include "init-arch.h"
+#include <stddef.h>
+
+--
+
+CPU_FEATURES_SIZE sizeof (struct cpu_features)
+KIND_OFFSET offsetof (struct cpu_features, kind)
+CPUID_OFFSET offsetof (struct cpu_features, cpuid)
+CPUID_SIZE sizeof (struct cpuid_registers)
+CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax)
+CPUID_EBX_OFFSET offsetof (struct cpuid_registers, ebx)
+CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx)
+CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx)
+FAMILY_OFFSET offsetof (struct cpu_features, family)
+MODEL_OFFSET offsetof (struct cpu_features, model)
+
+COMMON_CPUID_INDEX_1
diff --git a/sysdeps/i386/i686/multiarch/init-arch.c b/sysdeps/i386/i686/multiarch/init-arch.c
new file mode 100644
index 0000000..00a94d8
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/init-arch.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/init-arch.c>
diff --git a/sysdeps/i386/i686/multiarch/init-arch.h b/sysdeps/i386/i686/multiarch/init-arch.h
new file mode 100644
index 0000000..cd2d0be
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/init-arch.h
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/init-arch.h>
diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c
new file mode 100644
index 0000000..7db31b0
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/sched_cpucount.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/sched_cpucount.c>
diff --git a/sysdeps/unix/sysv/linux/i386/i686/sysconf.c b/sysdeps/unix/sysv/linux/i386/i686/sysconf.c
new file mode 100644
index 0000000..cf79750
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/i386/i686/sysconf.c
@@ -0,0 +1 @@
+#include <sysdeps/unix/sysv/linux/x86_64/sysconf.c>
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index 75b8195..4a8119b 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -22,6 +22,7 @@
#include <stdbool.h>
#include <stdlib.h>
#include <unistd.h>
+#include <cpuid.h>
#ifdef USE_MULTIARCH
# include "multiarch/init-arch.h"
@@ -176,9 +177,7 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
- asm volatile ("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (1));
+ __cpuid (1, eax, ebx, ecx, edx);
family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
@@ -250,9 +249,7 @@ handle_intel (int name, unsigned int maxidx)
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
- asm volatile ("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (2));
+ __cpuid (2, eax, ebx, ecx, edx);
/* The low byte of EAX in the first round contain the number of
rounds we have to make. At least one, the one we are already
@@ -296,9 +293,7 @@ handle_amd (int name)
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
- asm volatile ("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (0x80000000));
+ __cpuid (0x80000000, eax, ebx, ecx, edx);
/* No level 4 cache (yet). */
if (name > _SC_LEVEL3_CACHE_LINESIZE)
@@ -308,9 +303,7 @@ handle_amd (int name)
if (eax < fn)
return 0;
- asm volatile ("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (fn));
+ __cpuid (fn, eax, ebx, ecx, edx);
if (name < _SC_LEVEL1_DCACHE_SIZE)
{
@@ -424,9 +417,7 @@ __cache_sysconf (int name)
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
- asm volatile ("cpuid"
- : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (0));
+ __cpuid (0, max_cpuid, ebx, ecx, edx);
#endif
if (is_intel)
@@ -449,6 +440,8 @@ long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
L2 or L3 size. */
long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
+
+#ifdef __x86_64__
/* PREFETCHW support flag for use in memory and string routines. */
int __x86_64_prefetchw attribute_hidden;
@@ -461,6 +454,7 @@ int __x86_64_prefetchw attribute_hidden;
*/
int __x86_64_preferred_memory_instruction attribute_hidden;
+#endif
static void
@@ -483,9 +477,7 @@ init_cacheinfo (void)
__init_cpu_features ();
#else
int max_cpuid;
- asm volatile ("cpuid"
- : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (0));
+ __cpuid (0, max_cpuid, ebx, ecx, edx);
#endif
if (is_intel)
@@ -509,17 +501,17 @@ init_cacheinfo (void)
ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
#else
- asm volatile ("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (1));
+ __cpuid (1, eax, ebx, ecx, edx);
#endif
+#ifdef __x86_64__
/* Intel prefers SSSE3 instructions for memory/string routines
if they are avaiable. */
if ((ecx & 0x200))
__x86_64_preferred_memory_instruction = 3;
else
__x86_64_preferred_memory_instruction = 2;
+#endif
/* Figure out the number of logical threads that share the
highest cache level. */
@@ -530,9 +522,7 @@ init_cacheinfo (void)
/* Query until desired cache level is enumerated. */
do
{
- asm volatile ("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (4), "2" (i++));
+ __cpuid_count (4, i++, eax, ebx, ecx, edx);
/* There seems to be a bug in at least some Pentium Ds
which sometimes fail to iterate all cache parameters.
@@ -566,9 +556,7 @@ init_cacheinfo (void)
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
/* Get maximum extended function. */
- asm volatile ("cpuid"
- : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (0x80000000));
+ __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
if (shared <= 0)
/* No shared L3 cache. All we have is the L2 cache. */
@@ -579,10 +567,7 @@ init_cacheinfo (void)
if (max_cpuid_ex >= 0x80000008)
{
/* Get width of APIC ID. */
- asm volatile ("cpuid"
- : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
- "=d" (edx)
- : "0" (0x80000008));
+ __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
threads = 1 << ((ecx >> 12) & 0x0f);
}
@@ -590,10 +575,7 @@ init_cacheinfo (void)
{
/* If APIC ID width is not available, use logical
processor count. */
- asm volatile ("cpuid"
- : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
- "=d" (edx)
- : "0" (0x00000001));
+ __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
if ((edx & (1 << 28)) != 0)
threads = (ebx >> 16) & 0xff;
@@ -608,15 +590,15 @@ init_cacheinfo (void)
shared += core;
}
+#ifdef __x86_64__
if (max_cpuid_ex >= 0x80000001)
{
- asm volatile ("cpuid"
- : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
- : "0" (0x80000001));
+ __cpuid (0x80000001, eax, ebx, ecx, edx);
/* PREFETCHW || 3DNow! */
if ((ecx & 0x100) || (edx & 0x80000000))
__x86_64_prefetchw = -1;
}
+#endif
}
if (data > 0)
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 49b421e..5b7a657 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -18,6 +18,7 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+#include <cpuid.h>
#include "init-arch.h"
@@ -27,12 +28,10 @@ struct cpu_features __cpu_features attribute_hidden;
static void
get_common_indeces (void)
{
- asm volatile ("cpuid"
- : "=a" (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax),
- "=b" (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx),
- "=c" (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx),
- "=d" (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx)
- : "0" (1));
+ __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
__cpu_features.family = (eax >> 8) & 0x0f;
@@ -47,10 +46,7 @@ __init_cpu_features (void)
unsigned int ecx;
unsigned int edx;
- asm volatile ("cpuid"
- : "=a" (__cpu_features.max_cpuid), "=b" (ebx), "=c" (ecx),
- "=d" (edx)
- : "0" (0));
+ __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
/* This spells out "GenuineIntel". */
if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
@@ -71,9 +67,11 @@ __init_cpu_features (void)
{
__cpu_features.model += extended_model;
+#ifdef __x86_64__
if (__cpu_features.model == 0x1c)
- /* Avoid SSSE3 on Atom since it is slow. */
+ /* Avoid SSSE3 on Atom in 64bit since it is slow. */
__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~(1 << 9);
+#endif
}
}
/* This spells out "AuthenticAMD". */
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 0151e8b..8d9b1e8 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -61,6 +61,9 @@ extern const struct cpu_features *__get_cpu_features (void)
/* Following are the feature tests used throughout libc. */
#ifndef NOT_IN_libc
+# define HAS_SSE2 \
+ ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx & (1 << 26)) != 0)
+
# define HAS_POPCOUNT \
((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0)
@@ -70,6 +73,9 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_FMA \
((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 12)) != 0)
#else
+# define HAS_SSE2 \
+ ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].edx & (1 << 26)) != 0)
+
# define HAS_POPCOUNT \
((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0)
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
index b6f425e..fde6dcc 100644
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ b/sysdeps/x86_64/multiarch/sched_cpucount.c
@@ -27,7 +27,7 @@
#define POPCNT(l) \
({ __cpu_mask r; \
- asm ("popcntq %1, %0" : "=r" (r) : "0" (l));\
+ asm ("popcnt %1, %0" : "=r" (r) : "0" (l));\
r; })
#define __sched_cpucount static popcount_cpucount
#include <posix/sched_cpucount.c>