This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[RFC PATCH 2/2] aarch64: add vector sin, cos, log and pow abi symbols
- From: Szabolcs Nagy <Szabolcs dot Nagy at arm dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>, Steve Ellcey <sellcey at marvell dot com>
- Cc: nd <nd at arm dot com>
- Date: Thu, 4 Jul 2019 11:09:42 +0000
- Subject: [RFC PATCH 2/2] aarch64: add vector sin, cos, log and pow abi symbols
Add simple assembly implementations that fall back to scalar code,
similar to the vector exp code.
These are the symbols we expect to optimize.
(TODO: deal with sincos)
2019-07-04 Szabolcs Nagy <szabolcs.nagy@arm.com>
* sysdeps/aarch64/fpu/Makefile: Add functions.
* sysdeps/aarch64/fpu/Versions: Add symbols.
* sysdeps/aarch64/fpu/libmvec_double_vlen2_cos.S: New file.
* sysdeps/aarch64/fpu/libmvec_double_vlen2_log.S: New file.
* sysdeps/aarch64/fpu/libmvec_double_vlen2_pow.S: New file.
* sysdeps/aarch64/fpu/libmvec_double_vlen2_sin.S: New file.
* sysdeps/aarch64/fpu/libmvec_float_vlen4_cosf.S: New file.
* sysdeps/aarch64/fpu/libmvec_float_vlen4_logf.S: New file.
* sysdeps/aarch64/fpu/libmvec_float_vlen4_powf.S: New file.
* sysdeps/aarch64/fpu/libmvec_float_vlen4_sinf.S: New file.
* sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c: Add wrappers.
* sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c: Add wrappers.
* sysdeps/aarch64/libm-test-ulps: Update.
* sysdeps/unix/sysv/linux/aarch64/libmvec.abilist: Update.
---
sysdeps/aarch64/fpu/Makefile | 12 +++-
sysdeps/aarch64/fpu/Versions | 4 ++
.../aarch64/fpu/libmvec_double_vlen2_cos.S | 21 ++++++
.../aarch64/fpu/libmvec_double_vlen2_log.S | 21 ++++++
.../aarch64/fpu/libmvec_double_vlen2_pow.S | 62 ++++++++++++++++
.../aarch64/fpu/libmvec_double_vlen2_sin.S | 21 ++++++
.../aarch64/fpu/libmvec_float_vlen4_cosf.S | 21 ++++++
.../aarch64/fpu/libmvec_float_vlen4_logf.S | 21 ++++++
.../aarch64/fpu/libmvec_float_vlen4_powf.S | 70 +++++++++++++++++++
.../aarch64/fpu/libmvec_float_vlen4_sinf.S | 21 ++++++
.../aarch64/fpu/test-double-vlen2-wrappers.c | 12 ++++
.../aarch64/fpu/test-float-vlen4-wrappers.c | 12 ++++
sysdeps/aarch64/libm-test-ulps | 18 +++++
.../unix/sysv/linux/aarch64/libmvec.abilist | 8 +++
14 files changed, 322 insertions(+), 2 deletions(-)
create mode 100644 sysdeps/aarch64/fpu/libmvec_double_vlen2_cos.S
create mode 100644 sysdeps/aarch64/fpu/libmvec_double_vlen2_log.S
create mode 100644 sysdeps/aarch64/fpu/libmvec_double_vlen2_pow.S
create mode 100644 sysdeps/aarch64/fpu/libmvec_double_vlen2_sin.S
create mode 100644 sysdeps/aarch64/fpu/libmvec_float_vlen4_cosf.S
create mode 100644 sysdeps/aarch64/fpu/libmvec_float_vlen4_logf.S
create mode 100644 sysdeps/aarch64/fpu/libmvec_float_vlen4_powf.S
create mode 100644 sysdeps/aarch64/fpu/libmvec_float_vlen4_sinf.S
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 220b664323..fe72a74aec 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -15,15 +15,23 @@ endif
ifeq ($(subdir),mathvec)
libmvec-support += \
+ libmvec_double_vlen2_cos \
libmvec_double_vlen2_exp \
+ libmvec_double_vlen2_log \
+ libmvec_double_vlen2_pow \
+ libmvec_double_vlen2_sin \
+ libmvec_float_vlen4_cosf \
libmvec_float_vlen4_expf \
+ libmvec_float_vlen4_logf \
+ libmvec_float_vlen4_powf \
+ libmvec_float_vlen4_sinf \
endif
ifeq ($(subdir),math)
ifeq ($(build-mathvec),yes)
-double-vlen2-funcs = exp
-float-vlen4-funcs = exp
+double-vlen2-funcs = cos exp log pow sin
+float-vlen4-funcs = cos exp log pow sin
ifeq ($(test-mathvec),yes)
libmvec-tests += double-vlen2 float-vlen4
endif
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index da36f3c495..94ffaeee6d 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -1,5 +1,9 @@
libmvec {
GLIBC_2.30 {
+ _ZGVnN2v_cos; _ZGVnN4v_cosf;
_ZGVnN2v_exp; _ZGVnN4v_expf;
+ _ZGVnN2v_log; _ZGVnN4v_logf;
+ _ZGVnN2vv_pow; _ZGVnN4vv_powf;
+ _ZGVnN2v_sin; _ZGVnN4v_sinf;
}
}
diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2_cos.S b/sysdeps/aarch64/fpu/libmvec_double_vlen2_cos.S
new file mode 100644
index 0000000000..f4ad3c75f4
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2_cos.S
@@ -0,0 +1,21 @@
+/* Double-precision 2 element vector cos function.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SCALAR_FUNCTION cos
+#define VECTOR_FUNCTION _ZGVnN2v_cos
+#include "libmvec_double_vlen2.h"
diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2_log.S b/sysdeps/aarch64/fpu/libmvec_double_vlen2_log.S
new file mode 100644
index 0000000000..b802a2608a
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2_log.S
@@ -0,0 +1,21 @@
+/* Double-precision 2 element vector log function.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SCALAR_FUNCTION log
+#define VECTOR_FUNCTION _ZGVnN2v_log
+#include "libmvec_double_vlen2.h"
diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2_pow.S b/sysdeps/aarch64/fpu/libmvec_double_vlen2_pow.S
new file mode 100644
index 0000000000..85151482bf
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2_pow.S
@@ -0,0 +1,62 @@
+/* Double-precision 2 element vector x^y function.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ENTRY (_ZGVnN2vv_pow)
+ stp x29, x30, [sp, -304]!
+ cfi_adjust_cfa_offset (304)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+ mov x29, sp
+ stp q8, q9, [sp, 16]
+ stp q10, q11, [sp, 48]
+ stp q12, q13, [sp, 80]
+ stp q14, q15, [sp, 112]
+ stp q16, q17, [sp, 144]
+ stp q18, q19, [sp, 176]
+ stp q20, q21, [sp, 208]
+ stp q22, q23, [sp, 240]
+
+ // Use per lane load/store to avoid endianness issues.
+ str q0, [sp, 272]
+ str q1, [sp, 288]
+ ldr d0, [sp, 272]
+ ldr d1, [sp, 288]
+ bl pow
+ str d0, [sp, 272]
+ ldr d0, [sp, 280]
+ ldr d1, [sp, 296]
+ bl pow
+ str d0, [sp, 280]
+ ldr q0, [sp, 272]
+
+ ldp q8, q9, [sp, 16]
+ ldp q10, q11, [sp, 48]
+ ldp q12, q13, [sp, 80]
+ ldp q14, q15, [sp, 112]
+ ldp q16, q17, [sp, 144]
+ ldp q18, q19, [sp, 176]
+ ldp q20, q21, [sp, 208]
+ ldp q22, q23, [sp, 240]
+ ldp x29, x30, [sp], 304
+ cfi_adjust_cfa_offset (304)
+ cfi_restore (x29)
+ cfi_restore (x30)
+ ret
+END (_ZGVnN2vv_pow)
diff --git a/sysdeps/aarch64/fpu/libmvec_double_vlen2_sin.S b/sysdeps/aarch64/fpu/libmvec_double_vlen2_sin.S
new file mode 100644
index 0000000000..c01e4399cd
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_double_vlen2_sin.S
@@ -0,0 +1,21 @@
+/* Double-precision 2 element vector sin function.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SCALAR_FUNCTION sin
+#define VECTOR_FUNCTION _ZGVnN2v_sin
+#include "libmvec_double_vlen2.h"
diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4_cosf.S b/sysdeps/aarch64/fpu/libmvec_float_vlen4_cosf.S
new file mode 100644
index 0000000000..2d9ea9fb36
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4_cosf.S
@@ -0,0 +1,21 @@
+/* Single-precision 4 element vector cos function.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SCALAR_FUNCTION cosf
+#define VECTOR_FUNCTION _ZGVnN4v_cosf
+#include "libmvec_float_vlen4.h"
diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4_logf.S b/sysdeps/aarch64/fpu/libmvec_float_vlen4_logf.S
new file mode 100644
index 0000000000..df961eadba
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4_logf.S
@@ -0,0 +1,21 @@
+/* Single-precision 4 element vector log function.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SCALAR_FUNCTION logf
+#define VECTOR_FUNCTION _ZGVnN4v_logf
+#include "libmvec_float_vlen4.h"
diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4_powf.S b/sysdeps/aarch64/fpu/libmvec_float_vlen4_powf.S
new file mode 100644
index 0000000000..95e593c151
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4_powf.S
@@ -0,0 +1,70 @@
+/* Single-precision 4 element vector x^y function.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ENTRY (_ZGVnN4vv_powf)
+ stp x29, x30, [sp, -304]!
+ cfi_adjust_cfa_offset (304)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+ mov x29, sp
+ stp q8, q9, [sp, 16]
+ stp q10, q11, [sp, 48]
+ stp q12, q13, [sp, 80]
+ stp q14, q15, [sp, 112]
+ stp q16, q17, [sp, 144]
+ stp q18, q19, [sp, 176]
+ stp q20, q21, [sp, 208]
+ stp q22, q23, [sp, 240]
+
+ // Use per lane load/store to avoid endianness issues.
+ str q0, [sp, 272]
+ str q1, [sp, 288]
+ ldr s0, [sp, 272]
+ ldr s1, [sp, 288]
+ bl powf
+ str s0, [sp, 272]
+ ldr s0, [sp, 276]
+ ldr s1, [sp, 292]
+ bl powf
+ str s0, [sp, 276]
+ ldr s0, [sp, 280]
+ ldr s1, [sp, 296]
+ bl powf
+ str s0, [sp, 280]
+ ldr s0, [sp, 284]
+ ldr s1, [sp, 300]
+ bl powf
+ str s0, [sp, 284]
+ ldr q0, [sp, 272]
+
+ ldp q8, q9, [sp, 16]
+ ldp q10, q11, [sp, 48]
+ ldp q12, q13, [sp, 80]
+ ldp q14, q15, [sp, 112]
+ ldp q16, q17, [sp, 144]
+ ldp q18, q19, [sp, 176]
+ ldp q20, q21, [sp, 208]
+ ldp q22, q23, [sp, 240]
+ ldp x29, x30, [sp], 304
+ cfi_adjust_cfa_offset (304)
+ cfi_restore (x29)
+ cfi_restore (x30)
+ ret
+END (_ZGVnN4vv_powf)
diff --git a/sysdeps/aarch64/fpu/libmvec_float_vlen4_sinf.S b/sysdeps/aarch64/fpu/libmvec_float_vlen4_sinf.S
new file mode 100644
index 0000000000..49b8e95a91
--- /dev/null
+++ b/sysdeps/aarch64/fpu/libmvec_float_vlen4_sinf.S
@@ -0,0 +1,21 @@
+/* Single-precision 4 element vector sin function.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SCALAR_FUNCTION sinf
+#define VECTOR_FUNCTION _ZGVnN4v_sinf
+#include "libmvec_float_vlen4.h"
diff --git a/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c b/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c
index 6c6c44d6b5..00c5f5bd4b 100644
--- a/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-vlen2-wrappers.c
@@ -25,4 +25,16 @@
placing it here happens to work, should be fixed in test-math-vector.h. */
__attribute__ ((aarch64_vector_pcs))
+VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVnN2v_cos)
+
+__attribute__ ((aarch64_vector_pcs))
VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVnN2v_exp)
+
+__attribute__ ((aarch64_vector_pcs))
+VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVnN2v_log)
+
+__attribute__ ((aarch64_vector_pcs))
+VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVnN2vv_pow)
+
+__attribute__ ((aarch64_vector_pcs))
+VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVnN2v_sin)
diff --git a/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c b/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c
index 5117633f1f..2b9cf6d31f 100644
--- a/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-vlen4-wrappers.c
@@ -25,4 +25,16 @@
placing it here happens to work, should be fixed in test-math-vector.h. */
__attribute__ ((aarch64_vector_pcs))
+VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVnN4v_cosf)
+
+__attribute__ ((aarch64_vector_pcs))
VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVnN4v_expf)
+
+__attribute__ ((aarch64_vector_pcs))
+VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVnN4v_logf)
+
+__attribute__ ((aarch64_vector_pcs))
+VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVnN4vv_powf)
+
+__attribute__ ((aarch64_vector_pcs))
+VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVnN4v_sinf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 1ed4af9e55..f83213c48c 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1043,6 +1043,12 @@ ifloat: 1
ildouble: 2
ldouble: 2
+Function: "cos_vlen2":
+double: 1
+
+Function: "cos_vlen4":
+float: 1
+
Function: "cosh":
double: 1
float: 1
@@ -1977,6 +1983,12 @@ ifloat: 1
ildouble: 2
ldouble: 2
+Function: "pow_vlen2":
+double: 1
+
+Function: "pow_vlen4":
+float: 1
+
Function: "sin":
double: 1
float: 1
@@ -2009,6 +2021,12 @@ ifloat: 1
ildouble: 3
ldouble: 3
+Function: "sin_vlen2":
+double: 1
+
+Function: "sin_vlen4":
+float: 1
+
Function: "sincos":
double: 1
float: 1
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index 9e178253f7..20cc3dcd7f 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -1,2 +1,10 @@
+GLIBC_2.30 _ZGVnN2v_cos F
GLIBC_2.30 _ZGVnN2v_exp F
+GLIBC_2.30 _ZGVnN2v_log F
+GLIBC_2.30 _ZGVnN2v_sin F
+GLIBC_2.30 _ZGVnN2vv_pow F
+GLIBC_2.30 _ZGVnN4v_cosf F
GLIBC_2.30 _ZGVnN4v_expf F
+GLIBC_2.30 _ZGVnN4v_logf F
+GLIBC_2.30 _ZGVnN4v_sinf F
+GLIBC_2.30 _ZGVnN4vv_powf F