This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.26-175-gb9eaca8
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 23 Aug 2017 13:30:37 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.26-175-gb9eaca8
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via b9eaca8fa0a9628a992e0f1478aaadde576804e1 (commit)
from 5a706f649de3952271930a8340db4ca8aa50f485 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b9eaca8fa0a9628a992e0f1478aaadde576804e1
commit b9eaca8fa0a9628a992e0f1478aaadde576804e1
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Wed Aug 23 06:16:12 2017 -0700
x86_64: Replace AVX512F .byte sequences with instructions
Since binutils 2.25 or later is required to build glibc, we can replace
AVX512F .byte sequences with AVX512F instructions.
Tested on x86-64 and x32. There are no code differences in libmvec.so
and libmvec.a.
* sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Replace AVX512F
.byte sequences with AVX512F instructions.
* sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S:
Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
Likewise.
diff --git a/ChangeLog b/ChangeLog
index 43c8880..a58de05 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2017-08-23 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Replace AVX512F
+ .byte sequences with AVX512F instructions.
+ * sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Likewise.
+ * sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
+ * sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise.
+ * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S:
+ Likewise.
+ * sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
+ Likewise.
+
2017-08-22 Szabolcs Nagy <szabolcs.nagy@arm.com>
Steve Ellcey <sellcey@cavium.com>
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
index c920755..3667faa 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
@@ -599,24 +599,9 @@ libmvec_hidden_def(_ZGVeN8vl8l8_sincos_skx)
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $256, %rsp
- /* Encoding for vmovups %zmm1, 128(%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x4c
- .byte 0x24
- .byte 0x02
+ vmovups %zmm1, 128(%rsp)
lea (%rsp), %rdi
- /* Encoding for vmovups %zmm2, 192(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x57
- .byte 0x03
+ vmovups %zmm2, 192(%rdi)
lea 64(%rsp), %rsi
call HIDDEN_JUMPTARGET(\callee)
movq 128(%rsp), %rdx
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
index f73ab7d..8fa4255 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
@@ -510,40 +510,11 @@ libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $384, %rsp
- /* Encoding for vmovups %zmm1, 128(%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x4c
- .byte 0x24
- .byte 0x02
+ vmovups %zmm1, 128(%rsp)
lea (%rsp), %rdi
- /* Encoding for vmovups %zmm2, 192(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x57
- .byte 0x03
- /* Encoding for vmovups %zmm3, 256(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x5f
- .byte 0x04
- /* Encoding for vmovups %zmm4, 320(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x67
- .byte 0x05
+ vmovups %zmm2, 192(%rdi)
+ vmovups %zmm3, 256(%rdi)
+ vmovups %zmm4, 320(%rdi)
lea 64(%rsp), %rsi
call HIDDEN_JUMPTARGET(\callee)
movq 128(%rsp), %rdx
@@ -661,30 +632,8 @@ libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
leal -112(%rbp), %esi
leal -176(%rbp), %edi
subl $296, %esp
- /* Encoding for vmovdqa64 %zmm1, -240(%ebp). */
- .byte 0x67
- .byte 0x62
- .byte 0xf1
- .byte 0xfd
- .byte 0x48
- .byte 0x7f
- .byte 0x8d
- .byte 0x10
- .byte 0xff
- .byte 0xff
- .byte 0xff
- /* Encoding for vmovdqa64 %zmm2, -304(%ebp). */
- .byte 0x67
- .byte 0x62
- .byte 0xf1
- .byte 0xfd
- .byte 0x48
- .byte 0x7f
- .byte 0x95
- .byte 0xd0
- .byte 0xfe
- .byte 0xff
- .byte 0xff
+ vmovdqa64 %zmm1, -240(%ebp)
+ vmovdqa64 %zmm2, -304(%ebp)
call HIDDEN_JUMPTARGET(\callee)
movl -240(%ebp), %eax
vmovss -176(%ebp), %xmm0
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
index c104539..cdea304 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
@@ -35,32 +35,10 @@ END (_ZGVeN8vl8l8_sincos)
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $320, %rsp
- /* Encoding for vmovups %zmm0, 256(%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x44
- .byte 0x24
- .byte 0x04
+ vmovups %zmm0, 256(%rsp)
lea (%rsp), %rdi
- /* Encoding for vmovups %zmm1, 128(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x4f
- .byte 0x02
- /* Encoding for vmovups %zmm2, 192(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x57
- .byte 0x03
+ vmovups %zmm1, 128(%rdi)
+ vmovups %zmm2, 192(%rdi)
lea 64(%rsp), %rsi
call HIDDEN_JUMPTARGET(\callee)
vmovdqu 288(%rsp), %ymm0
@@ -142,18 +120,7 @@ END (_ZGVeN8vl8l8_sincos)
subl $280, %esp
vmovdqa %ymm1, -208(%ebp)
vmovdqa %ymm2, -240(%ebp)
- /* Encoding for vmovapd %zmm0, -304(%ebp). */
- .byte 0x67
- .byte 0x62
- .byte 0xf1
- .byte 0xfd
- .byte 0x48
- .byte 0x29
- .byte 0x85
- .byte 0xd0
- .byte 0xfe
- .byte 0xff
- .byte 0xff
+ vmovapd %zmm0, -304(%ebp)
call HIDDEN_JUMPTARGET(\callee)
leal 32(%r12), %esi
vmovupd -272(%ebp), %ymm0
diff --git a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
index 625eb66..3933644 100644
--- a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
@@ -201,29 +201,14 @@
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
+ vmovups %zmm0, (%rsp)
vmovupd (%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 64(%rsp)
vmovupd 32(%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0. */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x10
- .byte 0x44
- .byte 0x24
- .byte 0x01
+ vmovups 64(%rsp), %zmm0
movq %rbp, %rsp
cfi_def_cfa_register (%rsp)
popq %rbp
@@ -241,23 +226,8 @@
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x4c
- .byte 0x24
- .byte 0x01
+ vmovups %zmm0, (%rsp)
+ vmovups %zmm1, 64(%rsp)
vmovupd (%rsp), %ymm0
vmovupd 64(%rsp), %ymm1
call HIDDEN_JUMPTARGET(\callee)
@@ -266,15 +236,7 @@
vmovupd 96(%rsp), %ymm1
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0. */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x10
- .byte 0x44
- .byte 0x24
- .byte 0x02
+ vmovups 128(%rsp), %zmm0
movq %rbp, %rsp
cfi_def_cfa_register (%rsp)
popq %rbp
@@ -299,14 +261,7 @@
cfi_rel_offset (%r13, 0)
subq $176, %rsp
movq %rsi, %r13
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
+ vmovups %zmm0, (%rsp)
movq %rdi, %r12
vmovupd (%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
index d86c913..8ebcebb 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
@@ -35,48 +35,12 @@ END (_ZGVeN16vl4l4_sincosf)
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $448, %rsp
- /* Encoding for vmovups %zmm0, 384(%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x44
- .byte 0x24
- .byte 0x06
+ vmovups %zmm0, 384(%rsp)
lea (%rsp), %rdi
- /* Encoding for vmovups %zmm1, 128(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x4f
- .byte 0x02
- /* Encoding for vmovups %zmm2, 192(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x57
- .byte 0x03
- /* Encoding for vmovups %zmm3, 256(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x5f
- .byte 0x04
- /* Encoding for vmovups %zmm4, 320(%rdi). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x67
- .byte 0x05
+ vmovups %zmm1, 128(%rdi)
+ vmovups %zmm2, 192(%rdi)
+ vmovups %zmm3, 256(%rdi)
+ vmovups %zmm4, 320(%rdi)
lea 64(%rsp), %rsi
call HIDDEN_JUMPTARGET(\callee)
vmovdqu 416(%rsp), %ymm0
@@ -204,42 +168,9 @@ END (_ZGVeN16vl4l4_sincosf)
.cfi_escape 0x10,0x3,0x2,0x76,0x68
movq %rdi, %rbx
subl $344, %esp
- /* Encoding for vmovdqa64 %zmm1, -240(%ebp). */
- .byte 0x67
- .byte 0x62
- .byte 0xf1
- .byte 0xfd
- .byte 0x48
- .byte 0x7f
- .byte 0x8d
- .byte 0x10
- .byte 0xff
- .byte 0xff
- .byte 0xff
- /* Encoding for vmovdqa64 %zmm2, -304(%ebp). */
- .byte 0x67
- .byte 0x62
- .byte 0xf1
- .byte 0xfd
- .byte 0x48
- .byte 0x7f
- .byte 0x95
- .byte 0xd0
- .byte 0xfe
- .byte 0xff
- .byte 0xff
- /* Encoding for vmovaps %zmm0, -368(%ebp). */
- .byte 0x67
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x29
- .byte 0x85
- .byte 0x90
- .byte 0xfe
- .byte 0xff
- .byte 0xff
+ vmovdqa64 %zmm1, -240(%ebp)
+ vmovdqa64 %zmm2, -304(%ebp)
+ vmovaps %zmm0, -368(%ebp)
call HIDDEN_JUMPTARGET(\callee)
leal 32(%r12), %esi
vmovups -336(%ebp), %ymm0
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
index cd6d583..00b86cd 100644
--- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
@@ -246,29 +246,14 @@
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
+ vmovups %zmm0, (%rsp)
vmovupd (%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 64(%rsp)
vmovupd 32(%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
vmovupd %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0. */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x10
- .byte 0x44
- .byte 0x24
- .byte 0x01
+ vmovups 64(%rsp), %zmm0
movq %rbp, %rsp
cfi_def_cfa_register (%rsp)
popq %rbp
@@ -286,23 +271,8 @@
cfi_def_cfa_register (%rbp)
andq $-64, %rsp
subq $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x04
- .byte 0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x11
- .byte 0x4c
- .byte 0x24
- .byte 0x01
+ vmovups %zmm0, (%rsp)
+ vmovups %zmm1, 64(%rsp)
vmovups (%rsp), %ymm0
vmovups 64(%rsp), %ymm1
call HIDDEN_JUMPTARGET(\callee)
@@ -311,15 +281,7 @@
vmovups 96(%rsp), %ymm1
call HIDDEN_JUMPTARGET(\callee)
vmovups %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0. */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x10
- .byte 0x44
- .byte 0x24
- .byte 0x02
+ vmovups 128(%rsp), %zmm0
movq %rbp, %rsp
cfi_def_cfa_register (%rsp)
popq %rbp
@@ -340,14 +302,7 @@
pushq %r13
subq $176, %rsp
movq %rsi, %r13
-/* Below is encoding for vmovaps %zmm0, (%rsp). */
- .byte 0x62
- .byte 0xf1
- .byte 0x7c
- .byte 0x48
- .byte 0x29
- .byte 0x04
- .byte 0x24
+ vmovaps %zmm0, (%rsp)
movq %rdi, %r12
vmovaps (%rsp), %ymm0
call HIDDEN_JUMPTARGET(\callee)
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 12 +++
.../fpu/multiarch/svml_d_sincos8_core_avx512.S | 19 +----
.../fpu/multiarch/svml_s_sincosf16_core_avx512.S | 63 ++-------------
sysdeps/x86_64/fpu/svml_d_sincos8_core.S | 41 +---------
sysdeps/x86_64/fpu/svml_d_wrapper_impl.h | 57 ++------------
sysdeps/x86_64/fpu/svml_s_sincosf16_core.S | 85 ++------------------
sysdeps/x86_64/fpu/svml_s_wrapper_impl.h | 57 ++------------
7 files changed, 44 insertions(+), 290 deletions(-)
hooks/post-receive
--
GNU C Library master sources