This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
GNU C Library master sources branch master updated. glibc-2.26-175-gb9eaca8

From: hjl at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 23 Aug 2017 13:30:37 -0000
Subject: GNU C Library master sources branch master updated. glibc-2.26-175-gb9eaca8
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  b9eaca8fa0a9628a992e0f1478aaadde576804e1 (commit)
      from  5a706f649de3952271930a8340db4ca8aa50f485 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b9eaca8fa0a9628a992e0f1478aaadde576804e1

commit b9eaca8fa0a9628a992e0f1478aaadde576804e1
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Wed Aug 23 06:16:12 2017 -0700

    x86_64: Replace AVX512F .byte sequences with instructions
    
    Since binutils 2.25 or later is required to build glibc, we can replace
    AVX512F .byte sequences with AVX512F instructions.
    
    Tested on x86-64 and x32.  There are no code differences in libmvec.so
    and libmvec.a.
    
    	* sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Replace AVX512F
    	.byte sequences with AVX512F instructions.
    	* sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Likewise.
    	* sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
    	* sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise.
    	* sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S:
    	Likewise.
    	* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
    	Likewise.

diff --git a/ChangeLog b/ChangeLog
index 43c8880..a58de05 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2017-08-23  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Replace AVX512F
+	.byte sequences with AVX512F instructions.
+	* sysdeps/x86_64/fpu/svml_d_wrapper_impl.h: Likewise.
+	* sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
+	* sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S:
+	Likewise.
+	* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
+	Likewise.
+
 2017-08-22  Szabolcs Nagy  <szabolcs.nagy@arm.com>
 	    Steve Ellcey  <sellcey@cavium.com>
 
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
index c920755..3667faa 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
@@ -599,24 +599,9 @@ libmvec_hidden_def(_ZGVeN8vl8l8_sincos_skx)
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $256, %rsp
-        /* Encoding for vmovups %zmm1, 128(%rsp).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x4c
-        .byte 0x24
-        .byte 0x02
+        vmovups   %zmm1, 128(%rsp)
         lea       (%rsp), %rdi
-        /* Encoding for vmovups %zmm2, 192(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x57
-        .byte 0x03
+        vmovups   %zmm2, 192(%rdi)
         lea       64(%rsp), %rsi
         call      HIDDEN_JUMPTARGET(\callee)
         movq      128(%rsp), %rdx
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
index f73ab7d..8fa4255 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
@@ -510,40 +510,11 @@ libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $384, %rsp
-        /* Encoding for vmovups %zmm1, 128(%rsp).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x4c
-        .byte 0x24
-        .byte 0x02
+        vmovups   %zmm1, 128(%rsp)
         lea       (%rsp), %rdi
-        /* Encoding for vmovups %zmm2, 192(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x57
-        .byte 0x03
-        /* Encoding for vmovups %zmm3, 256(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x5f
-        .byte 0x04
-        /* Encoding for vmovups %zmm4, 320(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x67
-        .byte 0x05
+        vmovups   %zmm2, 192(%rdi)
+        vmovups   %zmm3, 256(%rdi)
+        vmovups   %zmm4, 320(%rdi)
         lea       64(%rsp), %rsi
         call      HIDDEN_JUMPTARGET(\callee)
         movq      128(%rsp), %rdx
@@ -661,30 +632,8 @@ libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
         leal    -112(%rbp), %esi
         leal    -176(%rbp), %edi
         subl    $296, %esp
-        /* Encoding for vmovdqa64 %zmm1, -240(%ebp).  */
-        .byte 0x67
-        .byte 0x62
-        .byte 0xf1
-        .byte 0xfd
-        .byte 0x48
-        .byte 0x7f
-        .byte 0x8d
-        .byte 0x10
-        .byte 0xff
-        .byte 0xff
-        .byte 0xff
-        /* Encoding for vmovdqa64 %zmm2, -304(%ebp).  */
-        .byte 0x67
-        .byte 0x62
-        .byte 0xf1
-        .byte 0xfd
-        .byte 0x48
-        .byte 0x7f
-        .byte 0x95
-        .byte 0xd0
-        .byte 0xfe
-        .byte 0xff
-        .byte 0xff
+        vmovdqa64 %zmm1, -240(%ebp)
+        vmovdqa64 %zmm2, -304(%ebp)
         call    HIDDEN_JUMPTARGET(\callee)
         movl    -240(%ebp), %eax
         vmovss  -176(%ebp), %xmm0
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
index c104539..cdea304 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
@@ -35,32 +35,10 @@ END (_ZGVeN8vl8l8_sincos)
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $320, %rsp
-        /* Encoding for vmovups %zmm0, 256(%rsp).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x44
-        .byte 0x24
-        .byte 0x04
+        vmovups    %zmm0, 256(%rsp)
         lea       (%rsp), %rdi
-        /* Encoding for vmovups %zmm1, 128(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x4f
-        .byte 0x02
-        /* Encoding for vmovups %zmm2, 192(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x57
-        .byte 0x03
+        vmovups   %zmm1, 128(%rdi)
+        vmovups   %zmm2, 192(%rdi)
         lea       64(%rsp), %rsi
         call      HIDDEN_JUMPTARGET(\callee)
         vmovdqu   288(%rsp), %ymm0
@@ -142,18 +120,7 @@ END (_ZGVeN8vl8l8_sincos)
         subl    $280, %esp
         vmovdqa %ymm1, -208(%ebp)
         vmovdqa %ymm2, -240(%ebp)
-        /* Encoding for vmovapd %zmm0, -304(%ebp).  */
-        .byte 0x67
-        .byte 0x62
-        .byte 0xf1
-        .byte 0xfd
-        .byte 0x48
-        .byte 0x29
-        .byte 0x85
-        .byte 0xd0
-        .byte 0xfe
-        .byte 0xff
-        .byte 0xff
+        vmovapd %zmm0, -304(%ebp)
         call    HIDDEN_JUMPTARGET(\callee)
         leal    32(%r12), %esi
         vmovupd -272(%ebp), %ymm0
diff --git a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
index 625eb66..3933644 100644
--- a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
@@ -201,29 +201,14 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
+        vmovups   %zmm0, (%rsp)
         vmovupd   (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 64(%rsp)
         vmovupd   32(%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x01
+        vmovups   64(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -241,23 +226,8 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x4c
-        .byte   0x24
-        .byte   0x01
+        vmovups   %zmm0, (%rsp)
+        vmovups   %zmm1, 64(%rsp)
         vmovupd   (%rsp), %ymm0
         vmovupd   64(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
@@ -266,15 +236,7 @@
         vmovupd   96(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x02
+        vmovups   128(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -299,14 +261,7 @@
         cfi_rel_offset (%r13, 0)
         subq      $176, %rsp
         movq      %rsi, %r13
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte	0x62
-        .byte	0xf1
-        .byte	0x7c
-        .byte	0x48
-        .byte	0x11
-        .byte	0x04
-        .byte	0x24
+        vmovups   %zmm0, (%rsp)
         movq    %rdi, %r12
         vmovupd (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
index d86c913..8ebcebb 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
@@ -35,48 +35,12 @@ END (_ZGVeN16vl4l4_sincosf)
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $448, %rsp
-        /* Encoding for vmovups %zmm0, 384(%rsp).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x44
-        .byte 0x24
-        .byte 0x06
+        vmovups   %zmm0, 384(%rsp)
         lea       (%rsp), %rdi
-        /* Encoding for vmovups %zmm1, 128(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x4f
-        .byte 0x02
-        /* Encoding for vmovups %zmm2, 192(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x57
-        .byte 0x03
-        /* Encoding for vmovups %zmm3, 256(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x5f
-        .byte 0x04
-        /* Encoding for vmovups %zmm4, 320(%rdi).  */
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x11
-        .byte 0x67
-        .byte 0x05
+        vmovups   %zmm1, 128(%rdi)
+        vmovups   %zmm2, 192(%rdi)
+        vmovups   %zmm3, 256(%rdi)
+        vmovups   %zmm4, 320(%rdi)
         lea       64(%rsp), %rsi
         call      HIDDEN_JUMPTARGET(\callee)
         vmovdqu   416(%rsp), %ymm0
@@ -204,42 +168,9 @@ END (_ZGVeN16vl4l4_sincosf)
         .cfi_escape 0x10,0x3,0x2,0x76,0x68
         movq    %rdi, %rbx
         subl    $344, %esp
-        /* Encoding for vmovdqa64 %zmm1, -240(%ebp).  */
-        .byte 0x67
-        .byte 0x62
-        .byte 0xf1
-        .byte 0xfd
-        .byte 0x48
-        .byte 0x7f
-        .byte 0x8d
-        .byte 0x10
-        .byte 0xff
-        .byte 0xff
-        .byte 0xff
-        /* Encoding for vmovdqa64 %zmm2, -304(%ebp).  */
-        .byte 0x67
-        .byte 0x62
-        .byte 0xf1
-        .byte 0xfd
-        .byte 0x48
-        .byte 0x7f
-        .byte 0x95
-        .byte 0xd0
-        .byte 0xfe
-        .byte 0xff
-        .byte 0xff
-        /* Encoding for vmovaps %zmm0, -368(%ebp).  */
-        .byte 0x67
-        .byte 0x62
-        .byte 0xf1
-        .byte 0x7c
-        .byte 0x48
-        .byte 0x29
-        .byte 0x85
-        .byte 0x90
-        .byte 0xfe
-        .byte 0xff
-        .byte 0xff
+        vmovdqa64 %zmm1, -240(%ebp)
+        vmovdqa64 %zmm2, -304(%ebp)
+        vmovaps   %zmm0, -368(%ebp)
         call    HIDDEN_JUMPTARGET(\callee)
         leal    32(%r12), %esi
         vmovups -336(%ebp), %ymm0
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
index cd6d583..00b86cd 100644
--- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
@@ -246,29 +246,14 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
+        vmovups   %zmm0, (%rsp)
         vmovupd   (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 64(%rsp)
         vmovupd   32(%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x01
+        vmovups   64(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -286,23 +271,8 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x4c
-        .byte   0x24
-        .byte   0x01
+        vmovups   %zmm0, (%rsp)
+        vmovups   %zmm1, 64(%rsp)
         vmovups   (%rsp), %ymm0
         vmovups   64(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
@@ -311,15 +281,7 @@
         vmovups   96(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
         vmovups   %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x02
+        vmovups   128(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -340,14 +302,7 @@
         pushq     %r13
         subq      $176, %rsp
         movq      %rsi, %r13
-/* Below is encoding for vmovaps %zmm0, (%rsp).  */
-        .byte	0x62
-        .byte	0xf1
-        .byte	0x7c
-        .byte	0x48
-        .byte	0x29
-        .byte	0x04
-        .byte	0x24
+        vmovaps   %zmm0, (%rsp)
         movq      %rdi, %r12
         vmovaps   (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                          |   12 +++
 .../fpu/multiarch/svml_d_sincos8_core_avx512.S     |   19 +----
 .../fpu/multiarch/svml_s_sincosf16_core_avx512.S   |   63 ++-------------
 sysdeps/x86_64/fpu/svml_d_sincos8_core.S           |   41 +---------
 sysdeps/x86_64/fpu/svml_d_wrapper_impl.h           |   57 ++------------
 sysdeps/x86_64/fpu/svml_s_sincosf16_core.S         |   85 ++------------------
 sysdeps/x86_64/fpu/svml_s_wrapper_impl.h           |   57 ++------------
 7 files changed, 44 insertions(+), 290 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]