This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH x86-64][BZ #20024] Fixed vector sincos/sincosf ABI
- From: Andrew Senkevich <andrew dot n dot senkevich at gmail dot com>
- To: libc-alpha <libc-alpha at sourceware dot org>
- Date: Tue, 31 May 2016 22:25:17 +0300
- Subject: [PATCH x86-64][BZ #20024] Fixed vector sincos/sincosf ABI
- Authentication-results: sourceware.org; auth=none
Hi,
this patch fixes wrong vector sincos/sincosf ABI to have it compatible with
current vector function declaration. According to current vector function
declaration vectorized sincos should have vector of pointers for second and
third parameters, so it is fixed with implementation as wrapper to version
having second and third parameters as pointers.
Is it Ok for trunk, 2.22 and 2.23 releases branches?
2016-05-31 Andrew Senkevich <andrew.senkevich@intel.com>
[BZ #20024]
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Fixed ABI
of this implementation of vector function.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S:
Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos2_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos4_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos8_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf16_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf4_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf8_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S: Likewise.
* sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Redefined wrapper
for testing vector function with fixed ABI.
* sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c: Likewise.
* sysdeps/x86_64/fpu/libm-test-ulps: Regenerated on KNL.
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps
b/sysdeps/x86_64/fpu/libm-test-ulps
index 7e7707b..38c4218 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1029,7 +1029,7 @@ Function: "cos_vlen4_avx2":
double: 2
Function: "cos_vlen8":
-double: 1
+double: 2
float: 1
Function: "cos_vlen8_avx2":
@@ -2125,7 +2125,7 @@ Function: "sincos_vlen4_avx2":
double: 2
Function: "sincos_vlen8":
-double: 1
+double: 2
float: 1
Function: "sincos_vlen8_avx2":
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
index d37275d..56e9c57
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
@@ -20,7 +20,7 @@
#include "svml_d_trig_data.h"
.text
-ENTRY (_ZGVbN2vvv_sincos_sse4)
+ENTRY (_ZGVbN2vl8l8_sincos_sse4)
/*
ALGORITHM DESCRIPTION:
@@ -311,4 +311,31 @@ ENTRY (_ZGVbN2vvv_sincos_sse4)
movsd %xmm0, 256(%rsp,%r15)
jmp .LBL_1_7
+END (_ZGVbN2vl8l8_sincos_sse4)
+libmvec_hidden_def(_ZGVbN2vl8l8_sincos_sse4)
+
+/* vvv version implemented with wrapper to vl8l8 variant. */
+ENTRY (_ZGVbN2vvv_sincos_sse4)
+ subq $72, %rsp
+ .cfi_def_cfa_offset 80
+ movdqu %xmm1, 32(%rsp)
+ lea (%rsp), %rdi
+ movdqu %xmm2, 48(%rdi)
+ lea 16(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4)
+ movq 32(%rsp), %rdx
+ movq 48(%rsp), %rsi
+ movq 40(%rsp), %r8
+ movq 56(%rsp), %r10
+ movq (%rsp), %rax
+ movq 16(%rsp), %rcx
+ movq 8(%rsp), %rdi
+ movq 24(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ addq $72, %rsp
+ .cfi_def_cfa_offset 8
+ ret
END (_ZGVbN2vvv_sincos_sse4)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
index 24b57f4..fc2b526
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
@@ -20,7 +20,7 @@
#include "svml_d_trig_data.h"
.text
-ENTRY (_ZGVdN4vvv_sincos_avx2)
+ENTRY (_ZGVdN4vl8l8_sincos_avx2)
/*
ALGORITHM DESCRIPTION:
@@ -274,4 +274,51 @@ ENTRY (_ZGVdN4vvv_sincos_avx2)
vmovsd %xmm0, 384(%rsp,%r15)
jmp .LBL_1_7
+END (_ZGVdN4vl8l8_sincos_avx2)
+libmvec_hidden_def(_ZGVdN4vl8l8_sincos_avx2)
+
+/* vvv version implemented with wrapper to vl8l8 variant. */
+ENTRY (_ZGVdN4vvv_sincos_avx2)
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-32, %rsp
+ subq $128, %rsp
+ vmovdqu %ymm1, 64(%rsp)
+ lea (%rsp), %rdi
+ vmovdqu %ymm2, 96(%rdi)
+ lea 32(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2)
+ movq 64(%rsp), %rdx
+ movq 96(%rsp), %rsi
+ movq 72(%rsp), %r8
+ movq 104(%rsp), %r10
+ movq (%rsp), %rax
+ movq 32(%rsp), %rcx
+ movq 8(%rsp), %rdi
+ movq 40(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq 80(%rsp), %rax
+ movq 112(%rsp), %rcx
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ movq 88(%rsp), %rdi
+ movq 120(%rsp), %r9
+ movq 16(%rsp), %r11
+ movq 48(%rsp), %rdx
+ movq 24(%rsp), %rsi
+ movq 56(%rsp), %r8
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq %rsi, (%rdi)
+ movq %r8, (%r9)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
END (_ZGVdN4vvv_sincos_avx2)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
index 1d9f426..1e1f220
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
@@ -36,9 +36,9 @@
sin(R), sin(R') are approximated by corresponding polynomial. */
.text
-ENTRY (_ZGVeN8vvv_sincos_knl)
+ENTRY (_ZGVeN8vl8l8_sincos_knl)
#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
#else
pushq %rbp
cfi_adjust_cfa_offset (8)
@@ -304,11 +304,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
jmp .LBL_1_7
#endif
-END (_ZGVeN8vvv_sincos_knl)
+END (_ZGVeN8vl8l8_sincos_knl)
+libmvec_hidden_def(_ZGVeN8vl8l8_sincos_knl)
-ENTRY (_ZGVeN8vvv_sincos_skx)
+ENTRY (_ZGVeN8vl8l8_sincos_skx)
#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
#else
pushq %rbp
cfi_adjust_cfa_offset (8)
@@ -585,6 +586,100 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
jmp .LBL_2_7
#endif
+END (_ZGVeN8vl8l8_sincos_skx)
+libmvec_hidden_def(_ZGVeN8vl8l8_sincos_skx)
+
+/* Wrapper between vvv and vl8l8 vector variants. */
+.macro WRAPPER_AVX512_vvv_vl8l8 callee
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $256, %rsp
+ /* Encoding for vmovups %zmm1, 128(%rsp). */
+ .byte 0x62
+ .byte 0xf1
+ .byte 0x7c
+ .byte 0x48
+ .byte 0x11
+ .byte 0x4c
+ .byte 0x24
+ .byte 0x02
+ lea (%rsp), %rdi
+ /* Encoding for vmovups %zmm2, 192(%rdi). */
+ .byte 0x62
+ .byte 0xf1
+ .byte 0x7c
+ .byte 0x48
+ .byte 0x11
+ .byte 0x57
+ .byte 0x03
+ lea 64(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 128(%rsp), %rdx
+ movq 136(%rsp), %rsi
+ movq 144(%rsp), %r8
+ movq 152(%rsp), %r10
+ movq (%rsp), %rax
+ movq 8(%rsp), %rcx
+ movq 16(%rsp), %rdi
+ movq 24(%rsp), %r9
+ movq %rax, (%rdx)
+ movq %rcx, (%rsi)
+ movq 160(%rsp), %rax
+ movq 168(%rsp), %rcx
+ movq %rdi, (%r8)
+ movq %r9, (%r10)
+ movq 176(%rsp), %rdi
+ movq 184(%rsp), %r9
+ movq 32(%rsp), %r11
+ movq 40(%rsp), %rdx
+ movq 48(%rsp), %rsi
+ movq 56(%rsp), %r8
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq 192(%rsp), %r11
+ movq 200(%rsp), %rdx
+ movq %rsi, (%rdi)
+ movq %r8, (%r9)
+ movq 208(%rsp), %rsi
+ movq 216(%rsp), %r8
+ movq 64(%rsp), %r10
+ movq 72(%rsp), %rax
+ movq 80(%rsp), %rcx
+ movq 88(%rsp), %rdi
+ movq %r10, (%r11)
+ movq %rax, (%rdx)
+ movq 224(%rsp), %r10
+ movq 232(%rsp), %rax
+ movq %rcx, (%rsi)
+ movq %rdi, (%r8)
+ movq 240(%rsp), %rcx
+ movq 248(%rsp), %rdi
+ movq 96(%rsp), %r9
+ movq 104(%rsp), %r11
+ movq 112(%rsp), %rdx
+ movq 120(%rsp), %rsi
+ movq %r9, (%r10)
+ movq %r11, (%rax)
+ movq %rdx, (%rcx)
+ movq %rsi, (%rdi)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+.endm
+
+ENTRY (_ZGVeN8vvv_sincos_knl)
+WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_knl
+END (_ZGVeN8vvv_sincos_knl)
+
+ENTRY (_ZGVeN8vvv_sincos_skx)
+WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_skx
END (_ZGVeN8vvv_sincos_skx)
.section .rodata, "a"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
index e375de8..c26ee0d
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
@@ -49,9 +49,9 @@
R2 = XOR( RC, SC ). */
.text
-ENTRY (_ZGVeN16vvv_sincosf_knl)
+ENTRY (_ZGVeN16vl4l4_sincosf_knl)
#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
+WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf
#else
pushq %rbp
cfi_adjust_cfa_offset (8)
@@ -267,9 +267,10 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
vmovss %xmm0, 1280(%rsp,%r15,8)
jmp .LBL_1_7
#endif
-END (_ZGVeN16vvv_sincosf_knl)
+END (_ZGVeN16vl4l4_sincosf_knl)
+libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_knl)
-ENTRY (_ZGVeN16vvv_sincosf_skx)
+ENTRY (_ZGVeN16vl4l4_sincosf_skx)
#ifndef HAVE_AVX512_ASM_SUPPORT
WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
#else
@@ -496,6 +497,164 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
vmovss %xmm0, 1280(%rsp,%r15,8)
jmp .LBL_2_7
#endif
+END (_ZGVeN16vl4l4_sincosf_skx)
+libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
+
+/* Wrapper between vvv and vl4l4 vector variants. */
+.macro WRAPPER_AVX512_vvv_vl4l4 callee
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $384, %rsp
+ /* Encoding for vmovups %zmm1, 128(%rsp). */
+ .byte 0x62
+ .byte 0xf1
+ .byte 0x7c
+ .byte 0x48
+ .byte 0x11
+ .byte 0x4c
+ .byte 0x24
+ .byte 0x02
+ lea (%rsp), %rdi
+ /* Encoding for vmovups %zmm2, 192(%rdi). */
+ .byte 0x62
+ .byte 0xf1
+ .byte 0x7c
+ .byte 0x48
+ .byte 0x11
+ .byte 0x57
+ .byte 0x03
+ /* Encoding for vmovups %zmm3, 256(%rdi). */
+ .byte 0x62
+ .byte 0xf1
+ .byte 0x7c
+ .byte 0x48
+ .byte 0x11
+ .byte 0x5f
+ .byte 0x04
+ /* Encoding for vmovups %zmm4, 320(%rdi). */
+ .byte 0x62
+ .byte 0xf1
+ .byte 0x7c
+ .byte 0x48
+ .byte 0x11
+ .byte 0x67
+ .byte 0x05
+ lea 64(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(\callee)
+ movq 128(%rsp), %rdx
+ movq 136(%rsp), %rsi
+ movq 144(%rsp), %r8
+ movq 152(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 160(%rsp), %rax
+ movq 168(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 176(%rsp), %rdi
+ movq 184(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 192(%rsp), %r11
+ movq 200(%rsp), %rdx
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 208(%rsp), %rsi
+ movq 216(%rsp), %r8
+ movl 32(%rsp), %r10d
+ movl 36(%rsp), %eax
+ movl 40(%rsp), %ecx
+ movl 44(%rsp), %edi
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 224(%rsp), %r10
+ movq 232(%rsp), %rax
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movq 240(%rsp), %rcx
+ movq 248(%rsp), %rdi
+ movl 48(%rsp), %r9d
+ movl 52(%rsp), %r11d
+ movl 56(%rsp), %edx
+ movl 60(%rsp), %esi
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movq 256(%rsp), %r9
+ movq 264(%rsp), %r11
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movq 272(%rsp), %rdx
+ movq 280(%rsp), %rsi
+ movl 64(%rsp), %r8d
+ movl 68(%rsp), %r10d
+ movl 72(%rsp), %eax
+ movl 76(%rsp), %ecx
+ movl %r8d, (%r9)
+ movl %r10d, (%r11)
+ movq 288(%rsp), %r8
+ movq 296(%rsp), %r10
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 304(%rsp), %rax
+ movq 312(%rsp), %rcx
+ movl 80(%rsp), %edi
+ movl 84(%rsp), %r9d
+ movl 88(%rsp), %r11d
+ movl 92(%rsp), %edx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 320(%rsp), %rdi
+ movq 328(%rsp), %r9
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 336(%rsp), %r11
+ movq 344(%rsp), %rdx
+ movl 96(%rsp), %esi
+ movl 100(%rsp), %r8d
+ movl 104(%rsp), %r10d
+ movl 108(%rsp), %eax
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 352(%rsp), %rsi
+ movq 360(%rsp), %r8
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 368(%rsp), %r10
+ movq 376(%rsp), %rax
+ movl 112(%rsp), %ecx
+ movl 116(%rsp), %edi
+ movl 120(%rsp), %r9d
+ movl 124(%rsp), %r11d
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+.endm
+
+ENTRY (_ZGVeN16vvv_sincosf_knl)
+WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_knl
+END (_ZGVeN16vvv_sincosf_knl)
+
+ENTRY (_ZGVeN16vvv_sincosf_skx)
+WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_skx
END (_ZGVeN16vvv_sincosf_skx)
.section .rodata, "a"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
index 562367b..54205ce
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
@@ -20,7 +20,7 @@
#include "svml_s_trig_data.h"
.text
-ENTRY (_ZGVbN4vvv_sincosf_sse4)
+ENTRY (_ZGVbN4vl4l4_sincosf_sse4)
/*
ALGORITHM DESCRIPTION:
@@ -265,4 +265,45 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4)
movss %xmm0, 256(%rsp,%r15,8)
jmp .LBL_1_7
+END (_ZGVbN4vl4l4_sincosf_sse4)
+libmvec_hidden_def(_ZGVbN4vl4l4_sincosf_sse4)
+
+/* vvv version implemented with wrapper to vl4l4 variant. */
+ENTRY (_ZGVbN4vvv_sincosf_sse4)
+ subq $104, %rsp
+ .cfi_def_cfa_offset 112
+ movdqu %xmm1, 32(%rsp)
+ lea (%rsp), %rdi
+ movdqu %xmm2, 48(%rdi)
+ lea 16(%rsp), %rsi
+ movdqu %xmm3, 48(%rsi)
+ movdqu %xmm4, 64(%rsi)
+ call HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4)
+ movq 32(%rsp), %rdx
+ movq 40(%rsp), %rsi
+ movq 48(%rsp), %r8
+ movq 56(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 64(%rsp), %rax
+ movq 72(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 80(%rsp), %rdi
+ movq 88(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ addq $104, %rsp
+ .cfi_def_cfa_offset 8
+ ret
END (_ZGVbN4vvv_sincosf_sse4)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
index baf887d..fef0b75
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
@@ -20,7 +20,7 @@
#include "svml_s_trig_data.h"
.text
-ENTRY(_ZGVdN8vvv_sincosf_avx2)
+ENTRY (_ZGVdN8vl4l4_sincosf_avx2)
/*
ALGORITHM DESCRIPTION:
@@ -238,4 +238,77 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2)
vmovss %xmm0, 384(%rsp,%r15,8)
jmp .LBL_1_7
-END(_ZGVdN8vvv_sincosf_avx2)
+END (_ZGVdN8vl4l4_sincosf_avx2)
+libmvec_hidden_def(_ZGVdN8vl4l4_sincosf_avx2)
+
+/* vvv version implemented with wrapper to vl4l4 variant. */
+ENTRY (_ZGVdN8vvv_sincosf_avx2)
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-32, %rsp
+ subq $192, %rsp
+ vmovdqu %ymm1, 64(%rsp)
+ lea (%rsp), %rdi
+ vmovdqu %ymm2, 96(%rdi)
+ vmovdqu %ymm3, 128(%rdi)
+ vmovdqu %ymm4, 160(%rdi)
+ lea 32(%rsp), %rsi
+ call HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2)
+ movq 64(%rsp), %rdx
+ movq 72(%rsp), %rsi
+ movq 80(%rsp), %r8
+ movq 88(%rsp), %r10
+ movl (%rsp), %eax
+ movl 4(%rsp), %ecx
+ movl 8(%rsp), %edi
+ movl 12(%rsp), %r9d
+ movl %eax, (%rdx)
+ movl %ecx, (%rsi)
+ movq 96(%rsp), %rax
+ movq 104(%rsp), %rcx
+ movl %edi, (%r8)
+ movl %r9d, (%r10)
+ movq 112(%rsp), %rdi
+ movq 120(%rsp), %r9
+ movl 16(%rsp), %r11d
+ movl 20(%rsp), %edx
+ movl 24(%rsp), %esi
+ movl 28(%rsp), %r8d
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movq 128(%rsp), %r11
+ movq 136(%rsp), %rdx
+ movl %esi, (%rdi)
+ movl %r8d, (%r9)
+ movq 144(%rsp), %rsi
+ movq 152(%rsp), %r8
+ movl 32(%rsp), %r10d
+ movl 36(%rsp), %eax
+ movl 40(%rsp), %ecx
+ movl 44(%rsp), %edi
+ movl %r10d, (%r11)
+ movl %eax, (%rdx)
+ movq 160(%rsp), %r10
+ movq 168(%rsp), %rax
+ movl %ecx, (%rsi)
+ movl %edi, (%r8)
+ movq 176(%rsp), %rcx
+ movq 184(%rsp), %rdi
+ movl 48(%rsp), %r9d
+ movl 52(%rsp), %r11d
+ movl 56(%rsp), %edx
+ movl 60(%rsp), %esi
+ movl %r9d, (%r10)
+ movl %r11d, (%rax)
+ movl %edx, (%rcx)
+ movl %esi, (%rdi)
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+END (_ZGVdN8vvv_sincosf_avx2)
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
index 74afa0a..3dbc692
--- a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
@@ -20,8 +20,13 @@
#include "svml_d_wrapper_impl.h"
.text
-ENTRY (_ZGVbN2vvv_sincos)
+ENTRY (_ZGVbN2vl8l8_sincos)
WRAPPER_IMPL_SSE2_fFF sincos
+END (_ZGVbN2vl8l8_sincos)
+libmvec_hidden_def (_ZGVbN2vl8l8_sincos)
+
+ENTRY (_ZGVbN2vvv_sincos)
+WRAPPER_IMPL_SSE2_fFF_vvv sincos
END (_ZGVbN2vvv_sincos)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
index 2c0b011..f2cf1c7
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
@@ -20,8 +20,13 @@
#include "svml_d_wrapper_impl.h"
.text
+ENTRY (_ZGVdN4vl8l8_sincos)
+WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
+END (_ZGVdN4vl8l8_sincos)
+libmvec_hidden_def (_ZGVdN4vl8l8_sincos)
+
ENTRY (_ZGVdN4vvv_sincos)
-WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos
+WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN2vl8l8_sincos
END (_ZGVdN4vvv_sincos)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
index e4320a9..cf3cd79
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
@@ -20,6 +20,10 @@
#include "svml_d_wrapper_impl.h"
.text
+ENTRY (_ZGVcN4vl8l8_sincos)
+WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
+END (_ZGVcN4vl8l8_sincos)
+
ENTRY (_ZGVcN4vvv_sincos)
-WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos
+WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos
END (_ZGVcN4vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
index 68d490e..7aba5f7
--- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
@@ -20,6 +20,10 @@
#include "svml_d_wrapper_impl.h"
.text
+ENTRY (_ZGVeN8vl8l8_sincos)
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
+END (_ZGVeN8vl8l8_sincos)
+
ENTRY (_ZGVeN8vvv_sincos)
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
END (_ZGVeN8vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
index 5cbf10b..e6a83e6
--- a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
@@ -20,6 +20,10 @@
#include "svml_s_wrapper_impl.h"
.text
+ENTRY (_ZGVeN16vl4l4_sincosf)
+WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf
+END (_ZGVeN16vl4l4_sincosf)
+
ENTRY (_ZGVeN16vvv_sincosf)
-WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
+WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN8vl4l4_sincosf
END (_ZGVeN16vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
index 1a7d273..e546c1c
--- a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
@@ -16,13 +16,17 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
-ENTRY (_ZGVbN4vvv_sincosf)
+ENTRY (_ZGVbN4vl4l4_sincosf)
WRAPPER_IMPL_SSE2_fFF sincosf
+END (_ZGVbN4vl4l4_sincosf)
+libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
+
+ENTRY (_ZGVbN4vvv_sincosf)
+WRAPPER_IMPL_SSE2_fFF_vvv sincosf
END (_ZGVbN4vvv_sincosf)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
index 74d1dfd..0cffa1f
--- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
@@ -20,8 +20,13 @@
#include "svml_s_wrapper_impl.h"
.text
+ENTRY (_ZGVdN8vl4l4_sincosf)
+WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
+END (_ZGVdN8vl4l4_sincosf)
+libmvec_hidden_def (_ZGVdN8vl4l4_sincosf)
+
ENTRY (_ZGVdN8vvv_sincosf)
-WRAPPER_IMPL_AVX_fFF _ZGVbN4vvv_sincosf
+WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN4vl4l4_sincosf
END (_ZGVdN8vvv_sincosf)
#ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
index 55b8b2d..0ccd9b5
--- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
@@ -20,6 +20,10 @@
#include "svml_s_wrapper_impl.h"
.text
-ENTRY(_ZGVcN8vvv_sincosf)
-WRAPPER_IMPL_AVX_fFF _ZGVbN4vvv_sincosf
-END(_ZGVcN8vvv_sincosf)
+ENTRY (_ZGVcN8vl4l4_sincosf)
+WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
+END (_ZGVcN8vl4l4_sincosf)
+
+ENTRY (_ZGVcN8vvv_sincosf)
+WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN4vl4l4_sincosf
+END (_ZGVcN8vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
index a9d1597..dc393be
--- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
@@ -23,7 +23,28 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVbN2v_log)
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVbN2v_exp)
VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVbN2vv_pow)
+
+#define VEC_INT_TYPE __m128i
+
+/* Redefinition of wrapper to be compatible with _ZGVbN2vvv_sincos. */
+#undef VECTOR_WRAPPER_fFF
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) \
+extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE); \
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \
+{ \
+ int i; \
+ VEC_TYPE mx; \
+ VEC_INT_TYPE mr, mr1; \
+ INIT_VEC_LOOP (mx, x, VEC_LEN); \
+ INIT_VEC_LOOP (mr, (long int)r, VEC_LEN); \
+ INIT_VEC_LOOP (mr1, (long int)r1, VEC_LEN); \
+ vector_func (mx, mr, mr1); \
+ TEST_VEC_LOOP (*(FLOAT*)mr, VEC_LEN); \
+ TEST_VEC_LOOP (*(FLOAT*)mr1, VEC_LEN); \
+ return; \
+}
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
index eb6a531..26448ea
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
@@ -26,7 +26,28 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVdN4v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVdN4v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVdN4vvv_sincos)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVdN4v_log)
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVdN4v_exp)
VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVdN4vv_pow)
+
+#define VEC_INT_TYPE __m256i
+
+/* Redefinition of wrapper to be compatible with _ZGVdN4vvv_sincos. */
+#undef VECTOR_WRAPPER_fFF
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) \
+extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE); \
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \
+{ \
+ int i; \
+ VEC_TYPE mx; \
+ VEC_INT_TYPE mr, mr1; \
+ INIT_VEC_LOOP (mx, x, VEC_LEN); \
+ INIT_VEC_LOOP (mr, (long int)r, VEC_LEN); \
+ INIT_VEC_LOOP (mr1, (long int)r1, VEC_LEN); \
+ vector_func (mx, mr, mr1); \
+ TEST_VEC_LOOP (*(FLOAT*)mr, VEC_LEN); \
+ TEST_VEC_LOOP (*(FLOAT*)mr1, VEC_LEN); \
+ return; \
+}
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVdN4vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
index 52b81da..52a67be
--- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
@@ -23,7 +23,29 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVcN4v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVcN4v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVcN4v_log)
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVcN4v_exp)
VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVcN4vv_pow)
+
+#define VEC_INT_TYPE __m128i
+
+/* Redefinition of wrapper to be compatible with _ZGVcN4vvv_sincos. */
+#undef VECTOR_WRAPPER_fFF
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) \
+extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \
+ VEC_INT_TYPE, VEC_INT_TYPE); \
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \
+{ \
+ int i; \
+ VEC_TYPE mx; \
+ VEC_INT_TYPE mr, mr1; \
+ INIT_VEC_LOOP (mx, x, VEC_LEN); \
+ INIT_VEC_LOOP (mr, (long int)r, VEC_LEN/2); \
+ INIT_VEC_LOOP (mr1, (long int)r1, VEC_LEN/2); \
+ vector_func (mx, mr, mr, mr1, mr1); \
+ TEST_VEC_LOOP (*(FLOAT*)mr, VEC_LEN/2); \
+ TEST_VEC_LOOP (*(FLOAT*)mr1, VEC_LEN/2); \
+ return; \
+}
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
index c10bb9c..557cb1e
--- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
@@ -23,7 +23,28 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVeN8v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVeN8v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVeN8vvv_sincos)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVeN8v_log)
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVeN8v_exp)
VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVeN8vv_pow)
+
+#define VEC_INT_TYPE __m512i
+
+/* Redefinition of wrapper to be compatible with _ZGVeN8vvv_sincos. */
+#undef VECTOR_WRAPPER_fFF
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) \
+extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE); \
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \
+{ \
+ int i; \
+ VEC_TYPE mx; \
+ VEC_INT_TYPE mr, mr1; \
+ INIT_VEC_LOOP (mx, x, VEC_LEN); \
+ INIT_VEC_LOOP (mr, (long int)r, VEC_LEN); \
+ INIT_VEC_LOOP (mr1, (long int)r1, VEC_LEN); \
+ vector_func (mx, mr, mr1); \
+ TEST_VEC_LOOP (*(FLOAT*)mr, VEC_LEN); \
+ TEST_VEC_LOOP (*(FLOAT*)mr1, VEC_LEN); \
+ return; \
+}
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVeN8vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
index dc09e4a..9137dbe
--- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
@@ -23,7 +23,29 @@
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVeN16v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVeN16v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf)
VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVeN16v_logf)
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVeN16v_expf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVeN16vv_powf)
+
+#define VEC_INT_TYPE __m512i
+
+/* Redefinition of wrapper to be compatible with _ZGVeN16vvv_sincosf. */
+#undef VECTOR_WRAPPER_fFF
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) \
+extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \
+ VEC_INT_TYPE, VEC_INT_TYPE); \
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \
+{ \
+ int i; \
+ VEC_TYPE mx; \
+ VEC_INT_TYPE mr, mr1; \
+ INIT_VEC_LOOP (mx, x, VEC_LEN); \
+ INIT_VEC_LOOP (mr, (long int)r, VEC_LEN/2); \
+ INIT_VEC_LOOP (mr1, (long int)r1, VEC_LEN/2); \
+ vector_func (mx, mr, mr, mr1, mr1); \
+ TEST_VEC_LOOP (*(FLOAT*)mr, VEC_LEN/2); \
+ TEST_VEC_LOOP (*(FLOAT*)mr1, VEC_LEN/2); \
+ return; \
+}
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
index 0bb9818..005ad22
--- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
@@ -23,7 +23,29 @@
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVbN4v_logf)
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVbN4v_expf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVbN4vv_powf)
+
+#define VEC_INT_TYPE __m128i
+
+/* Redefinition of wrapper to be compatible with _ZGVbN4vvv_sincosf. */
+#undef VECTOR_WRAPPER_fFF
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) \
+extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \
+ VEC_INT_TYPE, VEC_INT_TYPE); \
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \
+{ \
+ int i; \
+ VEC_TYPE mx; \
+ VEC_INT_TYPE mr, mr1; \
+ INIT_VEC_LOOP (mx, x, VEC_LEN); \
+ INIT_VEC_LOOP (mr, (long int)r, VEC_LEN/2); \
+ INIT_VEC_LOOP (mr1, (long int)r1, VEC_LEN/2); \
+ vector_func (mx, mr, mr, mr1, mr1); \
+ TEST_VEC_LOOP (*(FLOAT*)mr, VEC_LEN/2); \
+ TEST_VEC_LOOP (*(FLOAT*)mr1, VEC_LEN/2); \
+ return; \
+}
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
index 4985ac2..53f4221
--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
@@ -26,7 +26,29 @@
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVdN8v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVdN8v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf)
VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVdN8v_logf)
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVdN8v_expf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVdN8vv_powf)
+
+#define VEC_INT_TYPE __m256i
+
+/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */
+#undef VECTOR_WRAPPER_fFF
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) \
+extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \
+ VEC_INT_TYPE, VEC_INT_TYPE); \
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \
+{ \
+ int i; \
+ VEC_TYPE mx; \
+ VEC_INT_TYPE mr, mr1; \
+ INIT_VEC_LOOP (mx, x, VEC_LEN); \
+ INIT_VEC_LOOP (mr, (long int)r, VEC_LEN/2); \
+ INIT_VEC_LOOP (mr1, (long int)r1, VEC_LEN/2); \
+ vector_func (mx, mr, mr, mr1, mr1); \
+ TEST_VEC_LOOP (*(FLOAT*)mr, VEC_LEN/2); \
+ TEST_VEC_LOOP (*(FLOAT*)mr1, VEC_LEN/2); \
+ return; \
+}
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
index 9cc2883..12dc4b9
--- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
@@ -23,7 +23,31 @@
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVcN8v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVcN8v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf)
VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVcN8v_logf)
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVcN8v_expf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVcN8vv_powf)
+
+#define VEC_INT_TYPE __m128i
+
+/* Redefinition of wrapper to be compatible with _ZGVcN8vvv_sincosf. */
+#undef VECTOR_WRAPPER_fFF
+#define VECTOR_WRAPPER_fFF(scalar_func, vector_func) \
+extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \
+ VEC_INT_TYPE, VEC_INT_TYPE, \
+ VEC_INT_TYPE, VEC_INT_TYPE, \
+ VEC_INT_TYPE, VEC_INT_TYPE); \
+void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \
+{ \
+ int i; \
+ VEC_TYPE mx; \
+ VEC_INT_TYPE mr, mr1; \
+ INIT_VEC_LOOP (mx, x, VEC_LEN); \
+ INIT_VEC_LOOP (mr, (long int)r, VEC_LEN/4); \
+ INIT_VEC_LOOP (mr1, (long int)r1, VEC_LEN/4); \
+ vector_func (mx, mr, mr, mr, mr, mr1, mr1, mr1, mr1); \
+ TEST_VEC_LOOP (*(FLOAT*)mr, VEC_LEN/4); \
+ TEST_VEC_LOOP (*(FLOAT*)mr1, VEC_LEN/4); \
+ return; \
+}
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf)
--
WBR,
Andrew