This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.19-153-g2d63a51
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 13 Mar 2014 18:20:27 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.19-153-g2d63a51
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 2d63a517e4084ec80403cd9f278690fa8b676cc4 (commit)
from 44c4e5d598bfcbb309f05ceb7a57ab02662e7f34 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2d63a517e4084ec80403cd9f278690fa8b676cc4
commit 2d63a517e4084ec80403cd9f278690fa8b676cc4
Author: Igor Zamyatin <igor.zamyatin@intel.com>
Date: Thu Mar 13 11:10:22 2014 -0700
Save and restore AVX-512 zmm registers to x86-64 ld.so
AVX-512 ISA adds 512-bit zmm registers. This patch updates
_dl_runtime_profile to pass zmm registers to run-time audit. It also
changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm
registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL
is used. Its performance impact is minimum.
* config.h.in (HAVE_AVX512_SUPPORT): New #undef.
(HAVE_AVX512_ASM_SUPPORT): Likewise.
* sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New.
(La_x86_64_vector): Add zmm.
* sysdeps/x86_64/Makefile (tests): Add tst-audit10.
(modules-names): Add tst-auditmod10a and tst-auditmod10b.
($(objpfx)tst-audit10): New target.
($(objpfx)tst-audit10.out): Likewise.
(tst-audit10-ENV): New.
(AVX512-CFLAGS): Likewise.
(CFLAGS-tst-audit10.c): Likewise.
(CFLAGS-tst-auditmod10a.c): Likewise.
(CFLAGS-tst-auditmod10b.c): Likewise.
* sysdeps/x86_64/configure.ac: Set config-cflags-avx512,
HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT.
* sysdeps/x86_64/configure: Regenerated.
* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add
AVX-512 zmm register support.
(_dl_x86_64_save_sse): Likewise.
(_dl_x86_64_restore_sse): Likewise.
* sysdeps/x86_64/dl-trampoline.h: Updated to support different
size vector registers.
* sysdeps/x86_64/link-defines.sym (YMM_SIZE): New.
(ZMM_SIZE): Likewise.
* sysdeps/x86_64/tst-audit10.c: New file.
* sysdeps/x86_64/tst-auditmod10a.c: Likewise.
* sysdeps/x86_64/tst-auditmod10b.c: Likewise.
diff --git a/ChangeLog b/ChangeLog
index b9f4790..e6ac778 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,33 @@
+2014-03-13 Igor Zamyatin <igor.zamyatin@intel.com>
+
+ * config.h.in (HAVE_AVX512_SUPPORT): New #undef.
+ (HAVE_AVX512_ASM_SUPPORT): Likewise.
+ * sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New.
+ (La_x86_64_vector): Add zmm.
+ * sysdeps/x86_64/Makefile (tests): Add tst-audit10.
+ (modules-names): Add tst-auditmod10a and tst-auditmod10b.
+ ($(objpfx)tst-audit10): New target.
+ ($(objpfx)tst-audit10.out): Likewise.
+ (tst-audit10-ENV): New.
+ (AVX512-CFLAGS): Likewise.
+ (CFLAGS-tst-audit10.c): Likewise.
+ (CFLAGS-tst-auditmod10a.c): Likewise.
+ (CFLAGS-tst-auditmod10b.c): Likewise.
+ * sysdeps/x86_64/configure.ac: Set config-cflags-avx512,
+ HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT.
+ * sysdeps/x86_64/configure: Regenerated.
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add
+ AVX-512 zmm register support.
+ (_dl_x86_64_save_sse): Likewise.
+ (_dl_x86_64_restore_sse): Likewise.
+ * sysdeps/x86_64/dl-trampoline.h: Updated to support different
+ size vector registers.
+ * sysdeps/x86_64/link-defines.sym (YMM_SIZE): New.
+ (ZMM_SIZE): Likewise.
+ * sysdeps/x86_64/tst-audit10.c: New file.
+ * sysdeps/x86_64/tst-auditmod10a.c: Likewise.
+ * sysdeps/x86_64/tst-auditmod10b.c: Likewise.
+
2014-03-13 Roland McGrath <roland@hack.frob.com>
* configure.ac (HAVE_EHDR_START): New check.
diff --git a/config.h.in b/config.h.in
index ed3c593..3fc34bd 100644
--- a/config.h.in
+++ b/config.h.in
@@ -98,6 +98,12 @@
/* Define if gcc supports VEX encoding. */
#undef HAVE_SSE2AVX_SUPPORT
+/* Define if compiler supports AVX512. */
+#undef HAVE_AVX512_SUPPORT
+
+/* Define if assembler supports AVX512. */
+#undef HAVE_AVX512_ASM_SUPPORT
+
/* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT
diff --git a/sysdeps/x86/bits/link.h b/sysdeps/x86/bits/link.h
index 4ebc5c1..8673b21 100644
--- a/sysdeps/x86/bits/link.h
+++ b/sysdeps/x86/bits/link.h
@@ -66,6 +66,8 @@ __END_DECLS
typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16)));
typedef float La_x86_64_ymm
__attribute__ ((__vector_size__ (32), __aligned__ (16)));
+typedef double La_x86_64_zmm
+ __attribute__ ((__vector_size__ (64), __aligned__ (16)));
# else
typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__)));
# endif
@@ -74,6 +76,7 @@ typedef union
{
# if __GNUC_PREREQ (4,0)
La_x86_64_ymm ymm[2];
+ La_x86_64_zmm zmm[1];
# endif
La_x86_64_xmm xmm[4];
} La_x86_64_vector __attribute__ ((__aligned__ (16)));
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 08db331..58900a5 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -38,7 +38,7 @@ tests-pie += $(quad-pie-test)
$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
-tests += tst-audit3 tst-audit4 tst-audit5
+tests += tst-audit3 tst-audit4 tst-audit5 tst-audit10
ifeq (yes,$(config-cflags-avx))
tests += tst-audit6 tst-audit7
endif
@@ -46,7 +46,8 @@ modules-names += tst-auditmod3a tst-auditmod3b \
tst-auditmod4a tst-auditmod4b \
tst-auditmod5a tst-auditmod5b \
tst-auditmod6a tst-auditmod6b tst-auditmod6c \
- tst-auditmod7a tst-auditmod7b
+ tst-auditmod7a tst-auditmod7b \
+ tst-auditmod10a tst-auditmod10b
$(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
$(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
@@ -69,6 +70,10 @@ $(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so
$(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so
tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so
+$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so
+$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
+tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
+
ifeq (yes,$(config-cflags-avx))
AVX-CFLAGS=-mavx
ifeq (yes,$(config-cflags-novzeroupper))
@@ -81,6 +86,12 @@ CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS)
endif
+ifeq (yes,$(config-cflags-avx512))
+AVX512-CFLAGS = -mavx512f
+CFLAGS-tst-audit10.c += $(AVX512-CFLAGS)
+CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
+CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS)
+endif
endif
ifeq ($(subdir),csu)
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index 5a83a53..b931e68 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -95,6 +95,59 @@ fi
config_vars="$config_vars
config-cflags-avx = $libc_cv_cc_avx"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support" >&5
+$as_echo_n "checking for AVX512 support... " >&6; }
+if ${libc_cv_cc_avx512+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -mavx512f -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ libc_cv_cc_avx512=yes
+else
+ libc_cv_cc_avx512=no
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx512" >&5
+$as_echo "$libc_cv_cc_avx512" >&6; }
+if test $libc_cv_cc_avx512 = yes; then
+ $as_echo "#define HAVE_AVX512_SUPPORT 1" >>confdefs.h
+
+fi
+config_vars="$config_vars
+config-cflags-avx512 = $libc_cv_cc_avx512"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5
+$as_echo_n "checking for AVX512 support in assembler... " >&6; }
+if ${libc_cv_asm_avx512+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.s <<\EOF
+ vmovdqu64 %zmm0, (%rsp)
+EOF
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ libc_cv_asm_avx512=yes
+else
+ libc_cv_asm_avx512=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5
+$as_echo "$libc_cv_asm_avx512" >&6; }
+if test $libc_cv_asm_avx512 == yes; then
+ $as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h
+
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
if ${libc_cv_cc_sse2avx+:} false; then :
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index c682f93..5e5d61b 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -23,6 +23,30 @@ if test $libc_cv_cc_avx = yes; then
fi
LIBC_CONFIG_VAR([config-cflags-avx], [$libc_cv_cc_avx])
+dnl Check if -mavx512f works.
+AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl
+LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=yes], [libc_cv_cc_avx512=no])
+])
+if test $libc_cv_cc_avx512 = yes; then
+ AC_DEFINE(HAVE_AVX512_SUPPORT)
+fi
+LIBC_CONFIG_VAR([config-cflags-avx512], [$libc_cv_cc_avx512])
+
+dnl Check if asm supports AVX512.
+AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl
+cat > conftest.s <<\EOF
+ vmovdqu64 %zmm0, (%rsp)
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_asm_avx512=yes
+else
+ libc_cv_asm_avx512=no
+fi
+rm -f conftest*])
+if test $libc_cv_asm_avx512 == yes; then
+ AC_DEFINE(HAVE_AVX512_ASM_SUPPORT)
+fi
+
dnl Check if -msse2avx works.
AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
LIBC_TRY_CC_OPTION([-msse2avx],
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index ae38677..77c4d0f 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -96,7 +96,7 @@ _dl_runtime_profile:
/* Actively align the La_x86_64_regs structure. */
andq $0xfffffffffffffff0, %rsp
-# ifdef HAVE_AVX_SUPPORT
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
/* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
to detect if any xmm0-xmm7 registers are changed by audit
module. */
@@ -130,7 +130,7 @@ _dl_runtime_profile:
movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
-# ifdef HAVE_AVX_SUPPORT
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
.data
L(have_avx):
.zero 4
@@ -138,7 +138,7 @@ L(have_avx):
.previous
cmpl $0, L(have_avx)(%rip)
- jne 1f
+ jne L(defined)
movq %rbx, %r11 # Save rbx
movl $1, %eax
cpuid
@@ -147,18 +147,54 @@ L(have_avx):
// AVX and XSAVE supported?
andl $((1 << 28) | (1 << 27)), %ecx
cmpl $((1 << 28) | (1 << 27)), %ecx
- jne 2f
+ jne 10f
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ // AVX512 supported in processor?
+ movq %rbx, %r11 # Save rbx
+ xorl %ecx, %ecx
+ mov $0x7, %eax
+ cpuid
+ andl $(1 << 16), %ebx
+# endif
xorl %ecx, %ecx
// Get XFEATURE_ENABLED_MASK
xgetbv
- andl $0x6, %eax
-2: subl $0x5, %eax
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ test %ebx, %ebx
+ movq %r11, %rbx # Restore rbx
+ je 20f
+ // Verify that XCR0[7:5] = '111b' and
+ // XCR0[2:1] = '11b' which means
+ // that zmm state is enabled
+ andl $0xe6, %eax
+ cmpl $0xe6, %eax
+ jne 20f
+ movl %eax, L(have_avx)(%rip)
+L(avx512):
+# define RESTORE_AVX
+# define VMOV vmovdqu64
+# define VEC(i) zmm##i
+# define MORE_CODE
+# include "dl-trampoline.h"
+# undef VMOV
+# undef VEC
+# undef RESTORE_AVX
+# endif
+20: andl $0x6, %eax
+10: subl $0x5, %eax
movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
-1: js L(no_avx)
+L(defined):
+ js L(no_avx)
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ cmpl $0xe6, L(have_avx)(%rip)
+ je L(avx512)
+# endif
# define RESTORE_AVX
+# define VMOV vmovdqu
+# define VEC(i) ymm##i
# define MORE_CODE
# include "dl-trampoline.h"
@@ -180,9 +216,9 @@ L(no_avx):
.align 16
cfi_startproc
_dl_x86_64_save_sse:
-# ifdef HAVE_AVX_SUPPORT
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
cmpl $0, L(have_avx)(%rip)
- jne 1f
+ jne L(defined_5)
movq %rbx, %r11 # Save rbx
movl $1, %eax
cpuid
@@ -191,21 +227,43 @@ _dl_x86_64_save_sse:
// AVX and XSAVE supported?
andl $((1 << 28) | (1 << 27)), %ecx
cmpl $((1 << 28) | (1 << 27)), %ecx
- jne 2f
+ jne 1f
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ // AVX512 supported in a processor?
+ movq %rbx, %r11 # Save rbx
+ xorl %ecx,%ecx
+ mov $0x7,%eax
+ cpuid
+ andl $(1 << 16), %ebx
+# endif
xorl %ecx, %ecx
// Get XFEATURE_ENABLED_MASK
xgetbv
- andl $0x6, %eax
- cmpl $0x6, %eax
- // Nonzero if SSE and AVX state saving is enabled.
- sete %al
-2: leal -1(%eax,%eax), %eax
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ test %ebx, %ebx
+ movq %r11, %rbx # Restore rbx
+ je 2f
+ // Verify that XCR0[7:5] = '111b' and
+ // XCR0[2:1] = '11b' which means
+ // that zmm state is enabled
+ andl $0xe6, %eax
+ movl %eax, L(have_avx)(%rip)
+ cmpl $0xe6, %eax
+ je L(avx512_5)
+# endif
+
+2: andl $0x6, %eax
+1: subl $0x5, %eax
movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
-1: js L(no_avx5)
+L(defined_5):
+ js L(no_avx5)
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ cmpl $0xe6, L(have_avx)(%rip)
+ je L(avx512_5)
+# endif
-# define YMM_SIZE 32
vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
@@ -215,6 +273,18 @@ _dl_x86_64_save_sse:
vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
ret
+# ifdef HAVE_AVX512_ASM_SUPPORT
+L(avx512_5):
+ vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
+ vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
+ vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
+ vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
+ vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
+ vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
+ vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
+ vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
+ ret
+# endif
L(no_avx5):
# endif
movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
@@ -235,9 +305,13 @@ L(no_avx5):
.align 16
cfi_startproc
_dl_x86_64_restore_sse:
-# ifdef HAVE_AVX_SUPPORT
+# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
cmpl $0, L(have_avx)(%rip)
js L(no_avx6)
+# ifdef HAVE_AVX512_ASM_SUPPORT
+ cmpl $0xe6, L(have_avx)(%rip)
+ je L(avx512_6)
+# endif
vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
@@ -248,6 +322,18 @@ _dl_x86_64_restore_sse:
vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
ret
+# ifdef HAVE_AVX512_ASM_SUPPORT
+L(avx512_6):
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
+ vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
+ ret
+# endif
L(no_avx6):
# endif
movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index 5d1b75f..161af0f 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -19,14 +19,14 @@
#ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
- vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
- vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
- vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
- vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
- vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
- vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
- vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
- vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+ VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
+ VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
+ VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+ VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+ VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+ VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+ VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+ VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
/* Save xmm0-xmm7 registers to detect if any of them are
changed by audit module. */
@@ -72,7 +72,7 @@
je 2f
vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
+2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
@@ -81,7 +81,7 @@
je 2f
vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
@@ -90,7 +90,7 @@
je 2f
vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
@@ -99,7 +99,7 @@
je 2f
vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
@@ -108,7 +108,7 @@
je 2f
vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
@@ -117,7 +117,7 @@
je 2f
vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
@@ -126,7 +126,7 @@
je 2f
vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
@@ -135,7 +135,7 @@
je 2f
vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
jmp 1f
-2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
+2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
1:
@@ -213,8 +213,8 @@
#ifdef RESTORE_AVX
/* This is to support AVX audit modules. */
- vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
- vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
+ VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
+ VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
/* Save xmm0/xmm1 registers to detect if they are changed
by audit module. */
@@ -243,13 +243,13 @@
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
jne 1f
- vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
+ VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
jne 1f
- vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
+ VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
1:
#endif
diff --git a/sysdeps/x86_64/link-defines.sym b/sysdeps/x86_64/link-defines.sym
index 1694d88..85d35ad 100644
--- a/sysdeps/x86_64/link-defines.sym
+++ b/sysdeps/x86_64/link-defines.sym
@@ -4,6 +4,8 @@
--
VECTOR_SIZE sizeof (La_x86_64_vector)
XMM_SIZE sizeof (La_x86_64_xmm)
+YMM_SIZE sizeof (La_x86_64_ymm)
+ZMM_SIZE sizeof (La_x86_64_zmm)
LR_SIZE sizeof (struct La_x86_64_regs)
LR_RDX_OFFSET offsetof (struct La_x86_64_regs, lr_rdx)
diff --git a/sysdeps/x86_64/tst-audit10.c b/sysdeps/x86_64/tst-audit10.c
new file mode 100644
index 0000000..24c9696
--- /dev/null
+++ b/sysdeps/x86_64/tst-audit10.c
@@ -0,0 +1,70 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Test case for x86-64 preserved registers in dynamic linker. */
+
+#ifdef __AVX512F__
+#include <stdlib.h>
+#include <string.h>
+#include <cpuid.h>
+#include <immintrin.h>
+
+static int
+avx512_enabled (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+ if (!(ebx & bit_AVX512F))
+ return 0;
+
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ /* Verify that ZMM, YMM and XMM states are enabled. */
+ return (eax & 0xe6) == 0xe6;
+}
+
+
+extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i,
+ __m512i, __m512i, __m512i, __m512i);
+int
+main (void)
+{
+ /* Run AVX512 test only if AVX512 is supported. */
+ if (avx512_enabled ())
+ {
+ __m512i zmm = _mm512_setzero_si512 ();
+ __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm);
+
+ zmm = _mm512_set1_epi64 (0x12349876);
+
+ if (memcmp (&zmm, &ret, sizeof (ret)))
+ abort ();
+ }
+ return 0;
+}
+#else
+int
+main (void)
+{
+ return 0;
+}
+#endif
diff --git a/sysdeps/x86_64/tst-auditmod10a.c b/sysdeps/x86_64/tst-auditmod10a.c
new file mode 100644
index 0000000..ea1809c
--- /dev/null
+++ b/sysdeps/x86_64/tst-auditmod10a.c
@@ -0,0 +1,65 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Test case for x86-64 preserved registers in dynamic linker. */
+
+#ifdef __AVX512F__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m512i
+audit_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3,
+ __m512i x4, __m512i x5, __m512i x6, __m512i x7)
+{
+ __m512i zmm;
+
+ zmm = _mm512_set1_epi64 (1);
+ if (memcmp (&zmm, &x0, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (2);
+ if (memcmp (&zmm, &x1, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (3);
+ if (memcmp (&zmm, &x2, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (4);
+ if (memcmp (&zmm, &x3, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (5);
+ if (memcmp (&zmm, &x4, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (6);
+ if (memcmp (&zmm, &x5, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (7);
+ if (memcmp (&zmm, &x6, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi64 (8);
+ if (memcmp (&zmm, &x7, sizeof (zmm)))
+ abort ();
+
+ return _mm512_setzero_si512 ();
+}
+#endif
diff --git a/sysdeps/x86_64/tst-auditmod10b.c b/sysdeps/x86_64/tst-auditmod10b.c
new file mode 100644
index 0000000..358f50e
--- /dev/null
+++ b/sysdeps/x86_64/tst-auditmod10b.c
@@ -0,0 +1,219 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Verify that changing AVX512 registers in audit library won't affect
+ function parameter passing/return. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag == LA_ACT_CONSISTENT)
+ printf ("activity: consistent\n");
+ else if (flag == LA_ACT_ADD)
+ printf ("activity: add\n");
+ else if (flag == LA_ACT_DELETE)
+ printf ("activity: delete\n");
+ else
+ printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ char buf[100];
+ const char *flagstr;
+ if (flag == LA_SER_ORIG)
+ flagstr = "LA_SET_ORIG";
+ else if (flag == LA_SER_LIBPATH)
+ flagstr = "LA_SER_LIBPATH";
+ else if (flag == LA_SER_RUNPATH)
+ flagstr = "LA_SER_RUNPATH";
+ else if (flag == LA_SER_CONFIG)
+ flagstr = "LA_SER_CONFIG";
+ else if (flag == LA_SER_DEFAULT)
+ flagstr = "LA_SER_DEFAULT";
+ else if (flag == LA_SER_SECURE)
+ flagstr = "LA_SER_SECURE";
+ else
+ {
+ sprintf (buf, "unknown flag %d", flag);
+ flagstr = buf;
+ }
+ printf ("objsearch: %s, %s\n", name, flagstr);
+
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#include <tst-audit.h>
+
+#ifdef __AVX512F__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int
+check_avx512 (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+ if (!(ebx & bit_AVX512F))
+ return 0;
+
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ /* Verify that ZMM, YMM and XMM states are enabled. */
+ return (eax & 0xe6) == 0xe6;
+}
+
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX512F__
+ if (check_avx512 () && strcmp (symname, "audit_test") == 0)
+ {
+ __m512i zero = _mm512_setzero_si512 ();
+ if (memcmp (®s->lr_vector[0], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[1], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[2], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[3], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[4], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[5], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[6], &zero, sizeof (zero))
+ || memcmp (®s->lr_vector[7], &zero, sizeof (zero)))
+ abort ();
+
+ for (int i = 0; i < 8; i++)
+ regs->lr_vector[i].zmm[0]
+ = (La_x86_64_zmm) _mm512_set1_epi64 (i + 1);
+
+ __m512i zmm = _mm512_set1_epi64 (-1);
+ asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" );
+ asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" );
+ asm volatile ("vmovdqa64 %0, %%zmm2" : : "x" (zmm) : "xmm2" );
+ asm volatile ("vmovdqa64 %0, %%zmm3" : : "x" (zmm) : "xmm3" );
+ asm volatile ("vmovdqa64 %0, %%zmm4" : : "x" (zmm) : "xmm4" );
+ asm volatile ("vmovdqa64 %0, %%zmm5" : : "x" (zmm) : "xmm5" );
+ asm volatile ("vmovdqa64 %0, %%zmm6" : : "x" (zmm) : "xmm6" );
+ asm volatile ("vmovdqa64 %0, %%zmm7" : : "x" (zmm) : "xmm7" );
+
+ *framesizep = 1024;
+ }
+#endif
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx,
+ (ptrdiff_t) outregs->int_retval);
+
+#ifdef __AVX512F__
+ if (check_avx512 () && strcmp (symname, "audit_test") == 0)
+ {
+ __m512i zero = _mm512_setzero_si512 ();
+ if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero)))
+ abort ();
+
+ for (int i = 0; i < 8; i++)
+ {
+ __m512i zmm = _mm512_set1_epi64 (i + 1);
+ if (memcmp (&inregs->lr_vector[i], &zmm, sizeof (zmm)) != 0)
+ abort ();
+ }
+
+ outregs->lrv_vector0.zmm[0]
+ = (La_x86_64_zmm) _mm512_set1_epi64 (0x12349876);
+
+ __m512i zmm = _mm512_set1_epi64 (-1);
+ asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" );
+ asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" );
+ }
+#endif
+
+ return 0;
+}
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 30 +++++
config.h.in | 6 +
sysdeps/x86/bits/link.h | 3 +
sysdeps/x86_64/Makefile | 15 +++-
sysdeps/x86_64/configure | 53 +++++++++
sysdeps/x86_64/configure.ac | 24 ++++
sysdeps/x86_64/dl-trampoline.S | 122 ++++++++++++++++++---
sysdeps/x86_64/dl-trampoline.h | 40 ++++----
sysdeps/x86_64/link-defines.sym | 2 +
sysdeps/x86_64/tst-audit10.c | 70 ++++++++++++
sysdeps/x86_64/tst-auditmod10a.c | 65 +++++++++++
sysdeps/x86_64/tst-auditmod10b.c | 219 ++++++++++++++++++++++++++++++++++++++
12 files changed, 609 insertions(+), 40 deletions(-)
create mode 100644 sysdeps/x86_64/tst-audit10.c
create mode 100644 sysdeps/x86_64/tst-auditmod10a.c
create mode 100644 sysdeps/x86_64/tst-auditmod10b.c
hooks/post-receive
--
GNU C Library master sources