diff --git a/Makeconfig b/Makeconfig index 24a3b82..4672008 100644 --- a/Makeconfig +++ b/Makeconfig @@ -476,7 +476,7 @@ link-libc = $(link-libc-rpath-link) $(link-libc-before-gnulib) $(gnulib) link-libc-tests = $(link-libc-tests-rpath-link) \ $(link-libc-before-gnulib) $(gnulib-tests) # This is how to find at build-time things that will be installed there. -rpath-dirs = math elf dlfcn nss nis rt resolv crypt +rpath-dirs = math elf dlfcn nss nis rt resolv crypt mathvec rpath-link = \ $(common-objdir):$(subst $(empty) ,:,$(patsubst ../$(subdir),.,$(rpath-dirs:%=$(common-objpfx)%))) else @@ -1018,7 +1018,7 @@ all-subdirs = csu assert ctype locale intl catgets math setjmp signal \ stdlib stdio-common libio malloc string wcsmbs time dirent \ grp pwd posix io termios resource misc socket sysvipc gmon \ gnulib iconv iconvdata wctype manual shadow gshadow po argp \ - crypt localedata timezone rt conform debug \ + crypt localedata timezone rt conform debug mathvec \ $(add-on-subdirs) dlfcn elf ifndef avoid-generated diff --git a/bits/math-vector.h b/bits/math-vector.h new file mode 100644 index 0000000..c8fe5cb --- /dev/null +++ b/bits/math-vector.h @@ -0,0 +1,22 @@ +/* Platform-specific SIMD declarations of math functions. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _MATH_H +# error "Never include directly; \ + include instead." +#endif diff --git a/configure b/configure index 24888d9..fd1a9b9 100755 --- a/configure +++ b/configure @@ -774,6 +774,7 @@ enable_systemtap enable_build_nscd enable_nscd enable_pt_chown +enable_mathvec with_cpu ' ac_precious_vars='build_alias @@ -1437,6 +1438,8 @@ Optional Features: --disable-build-nscd disable building and installing the nscd daemon --disable-nscd library functions will not contact the nscd daemon --enable-pt_chown Enable building and installing pt_chown + --enable-mathvec Enable building and installing mathvec [default + depends on architecture] Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -3730,6 +3733,14 @@ if test "$build_pt_chown" = yes; then fi +# Check whether --enable-mathvec was given. +if test "${enable_mathvec+set}" = set; then : + enableval=$enable_mathvec; build_mathvec=$enableval +else + build_mathvec=notset +fi + + # We keep the original values in `$config_*' and never modify them, so we # can write them unchanged into config.make. Everything else uses # $machine, $vendor, and $os, and changes them whenever convenient. diff --git a/configure.ac b/configure.ac index 9dd2c68..f86ed2e 100644 --- a/configure.ac +++ b/configure.ac @@ -353,6 +353,12 @@ if test "$build_pt_chown" = yes; then AC_DEFINE(HAVE_PT_CHOWN) fi +AC_ARG_ENABLE([mathvec], + [AS_HELP_STRING([--enable-mathvec], + [Enable building and installing mathvec @<:@default depends on architecture@:>@])], + [build_mathvec=$enableval], + [build_mathvec=notset]) + # We keep the original values in `$config_*' and never modify them, so we # can write them unchanged into config.make. Everything else uses # $machine, $vendor, and $os, and changes them whenever convenient. diff --git a/include/libm-simd-decl-stubs.h b/include/libm-simd-decl-stubs.h new file mode 100644 index 0000000..0048717 --- /dev/null +++ b/include/libm-simd-decl-stubs.h @@ -0,0 +1,35 @@ +/* Empty definitions required for __MATHCALL_VEC unfolding in mathcalls.h. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Needed definitions could be generated with: + for func in $(grep __MATHCALL_VEC math/bits/mathcalls.h |\ + sed -r "s|__MATHCALL_VEC.?\(||; s|,.*||"); do + echo "#define __DECL_SIMD_${func}"; + echo "#define __DECL_SIMD_${func}f"; + echo "#define __DECL_SIMD_${func}l"; + done + */ + +#ifndef _LIBM_SIMD_DECL_STUBS_H +#define _LIBM_SIMD_DECL_STUBS_H 1 + +#define __DECL_SIMD_cos +#define __DECL_SIMD_cosf +#define __DECL_SIMD_cosl + +#endif diff --git a/math/Makefile b/math/Makefile index 866bc0f..dee39d1 100644 --- a/math/Makefile +++ b/math/Makefile @@ -26,7 +26,7 @@ headers := math.h bits/mathcalls.h bits/mathinline.h bits/huge_val.h \ bits/huge_valf.h bits/huge_vall.h bits/inf.h bits/nan.h \ fpu_control.h complex.h bits/cmathcalls.h fenv.h \ bits/fenv.h bits/fenvinline.h bits/mathdef.h tgmath.h \ - bits/math-finite.h + bits/math-finite.h bits/math-vector.h libm-simd-decl-stubs.h # FPU support code. aux := setfpucw fpu_control @@ -85,6 +85,22 @@ generated += $(foreach s,.c .S l.c l.S f.c f.S,$(calls:s_%=m_%$s)) routines = $(calls) $(calls:=f) $(long-c-$(long-double-fcts)) long-c-yes = $(calls:=l) +ifeq ($(build-mathvec),yes) +# We need to install libm.so as linker script +# for more comfortable use of vector math library. +install-lib-ldscripts := libm.so +install_subdir: $(inst_libdir)/libm.so +$(inst_libdir)/libm.so: $(common-objpfx)format.lds \ + $(libm) \ + $(common-objpfx)mathvec/libmvec.so$(libmvec.so-version) \ + $(+force) + (echo '/* GNU ld script'; echo '*/';\ + cat $<; \ + echo 'GROUP ( $(slibdir)/libm.so$(libm.so-version) ' \ + 'AS_NEEDED ( $(slibdir)/libmvec.so$(libmvec.so-version) ) )' \ + ) > $@ +endif + # Rules for the test suite. tests = test-matherr test-fenv atest-exp atest-sincos atest-exp2 basic-test \ test-misc test-fpucw test-fpucw-ieee tst-definitions test-tgmath \ @@ -97,12 +113,13 @@ tests-static = test-fpucw-static test-fpucw-ieee-static test-longdouble-yes = test-ldouble test-ildoubl ifneq (no,$(PERL)) +libm-vec-tests = $(addprefix test-,$(libmvec-tests)) libm-tests = test-float test-double $(test-longdouble-$(long-double-fcts)) \ - test-ifloat test-idouble + test-ifloat test-idouble $(libm-vec-tests) libm-tests.o = $(addsuffix .o,$(libm-tests)) tests += $(libm-tests) -libm-tests-generated = libm-test-ulps.h libm-test.c +libm-tests-generated = libm-test-ulps.h libm-have-vector-test.h libm-test.c generated += $(libm-tests-generated) libm-test.stmp # This is needed for dependencies @@ -113,9 +130,10 @@ ulps-file = $(firstword $(wildcard $(sysdirs:%=%/libm-test-ulps))) $(addprefix $(objpfx), $(libm-tests-generated)): $(objpfx)libm-test.stmp $(objpfx)libm-test.stmp: $(ulps-file) libm-test.inc gen-libm-test.pl \ - auto-libm-test-out + gen-libm-have-vector-test.sh auto-libm-test-out $(make-target-directory) $(PERL) gen-libm-test.pl -u $< -o "$(objpfx)" + $(BASH) gen-libm-have-vector-test.sh > $(objpfx)libm-have-vector-test.h @echo > $@ $(objpfx)test-float.o: $(objpfx)libm-test.stmp @@ -124,8 +142,22 @@ $(objpfx)test-double.o: $(objpfx)libm-test.stmp $(objpfx)test-idouble.o: $(objpfx)libm-test.stmp $(objpfx)test-ldouble.o: $(objpfx)libm-test.stmp $(objpfx)test-ildoubl.o: $(objpfx)libm-test.stmp + +$(objpfx)test-double-vlen4.o: $(objpfx)libm-test.stmp +$(objpfx)test-float-vlen8.o: $(objpfx)libm-test.stmp + +$(objpfx)test-double-vlen4: $(common-objpfx)mathvec/libmvec.so \ + $(objpfx)init-arch.o +$(objpfx)test-float-vlen8: $(common-objpfx)mathvec/libmvec.so \ + $(objpfx)init-arch.o endif +CFLAGS-test-double-vlen4.c = -fno-inline -ffloat-store -fno-builtin -frounding-math \ + -D__FAST_MATH__ -DTEST_FAST_MATH -D_OPENMP=201307 \ + -Wno-unknown-pragmas $(arch-ext-cflags) +CFLAGS-test-float-vlen8.c = -fno-inline -ffloat-store -fno-builtin -frounding-math \ + -D__FAST_MATH__ -DTEST_FAST_MATH -D_OPENMP=201307 \ + -Wno-unknown-pragmas $(arch-ext-cflags) CFLAGS-test-float.c = -fno-inline -ffloat-store -fno-builtin -frounding-math CFLAGS-test-double.c = -fno-inline -ffloat-store -fno-builtin -frounding-math CFLAGS-test-ldouble.c = -fno-inline -ffloat-store -fno-builtin -frounding-math diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 8a94a7e..82928a1 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -60,7 +60,7 @@ __MATHCALL (atan,, (_Mdouble_ __x)); __MATHCALL (atan2,, (_Mdouble_ __y, _Mdouble_ __x)); /* Cosine of X. */ -__MATHCALL (cos,, (_Mdouble_ __x)); +__MATHCALL_VEC (cos,, (_Mdouble_ __x)); /* Sine of X. */ __MATHCALL (sin,, (_Mdouble_ __x)); /* Tangent of X. */ diff --git a/math/gen-libm-have-vector-test.sh b/math/gen-libm-have-vector-test.sh new file mode 100755 index 0000000..95c7bef --- /dev/null +++ b/math/gen-libm-have-vector-test.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# Copyright (C) 1999-2014 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . + +# Generate series of definitions used for vector math functions tests. +print_defs() +{ + echo "#if defined TEST_VECTOR_$1 && TEST_VECTOR_$1" + echo "# define HAVE_VECTOR_$1 1" + echo "# define VEC_PREFIX_$1 WRAPPER_NAME($1)" + echo "#else" + echo "# define HAVE_VECTOR_$1 0" + echo "# define VEC_PREFIX_$1 $1" + echo "#endif" + echo +} + +for func in $(grep ALL_RM_TEST libm-test.inc | grep -v define | sed -r "s/.*\(//; s/,.*//"); do + print_defs ${func} + print_defs ${func}f + print_defs ${func}l +done + +print_defs jn +print_defs jnf +print_defs jnl + +print_defs cexp +print_defs cexpf +print_defs cexpl + +print_defs tgamma +print_defs tgammaf +print_defs tgammal diff --git a/math/libm-test.inc b/math/libm-test.inc index f86a4fa..79bcfca 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -678,13 +678,17 @@ test_exceptions (const char *test_name, int exception) feclearexcept (FE_ALL_EXCEPT); } +#ifndef TEST_MATHVEC +# define TEST_MATHVEC 0 +#endif + /* Test whether errno for TEST_NAME, set to ERRNO_VALUE, has value EXPECTED_VALUE (description EXPECTED_NAME). */ static void test_single_errno (const char *test_name, int errno_value, int expected_value, const char *expected_name) { -#ifndef TEST_INLINE +#if !defined TEST_INLINE && !TEST_MATHVEC if (errno_value == expected_value) { if (print_screen (1)) @@ -1295,16 +1299,17 @@ struct test_fFF_11_data /* Run an individual test, including any required setup and checking of results, or loop over all tests in an array. */ -#define RUN_TEST_f_f(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ - EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_float (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ - EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_f_f(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ + EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG), \ + EXPECTED, \ + EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_f_f(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1313,16 +1318,16 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.expected, \ (ARRAY)[i].RM_##ROUNDING_MODE.exceptions); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_2_f(ARG_STR, FUNC_NAME, ARG1, ARG2, EXPECTED, \ - EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_float (test_name, FUNC (FUNC_NAME) (ARG1, ARG2), \ - EXPECTED, EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_2_f(ARG_STR, FUNC_NAME, ARG1, ARG2, EXPECTED, \ + EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG1, ARG2), \ + EXPECTED, EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_2_f(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1340,16 +1345,16 @@ struct test_fFF_11_data #define RUN_TEST_LOOP_fl_f RUN_TEST_LOOP_2_f #define RUN_TEST_if_f RUN_TEST_2_f #define RUN_TEST_LOOP_if_f RUN_TEST_LOOP_2_f -#define RUN_TEST_fff_f(ARG_STR, FUNC_NAME, ARG1, ARG2, ARG3, \ - EXPECTED, EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_float (test_name, FUNC (FUNC_NAME) (ARG1, ARG2, ARG3), \ - EXPECTED, EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_fff_f(ARG_STR, FUNC_NAME, ARG1, ARG2, ARG3, \ + EXPECTED, EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG1, ARG2, ARG3), \ + EXPECTED, EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_fff_f(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1359,17 +1364,17 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.expected, \ (ARRAY)[i].RM_##ROUNDING_MODE.exceptions); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_c_f(ARG_STR, FUNC_NAME, ARG1, ARG2, EXPECTED, \ - EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_float (test_name, \ - FUNC (FUNC_NAME) (BUILD_COMPLEX (ARG1, ARG2)), \ - EXPECTED, EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_c_f(ARG_STR, FUNC_NAME, ARG1, ARG2, EXPECTED, \ + EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_float (test_name, \ + FUNC_TEST (FUNC_NAME) (BUILD_COMPLEX (ARG1, ARG2)), \ + EXPECTED, EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_c_f(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1387,7 +1392,7 @@ struct test_fFF_11_data { \ COMMON_TEST_SETUP (ARG_STR); \ (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ - check_float (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG), EXPECTED, \ EXCEPTIONS); \ EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ if (EXTRA_TEST) \ @@ -1406,22 +1411,22 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.extra_test, \ (ARRAY)[i].RM_##ROUNDING_MODE.extra_expected); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_fF_f1(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ - EXCEPTIONS, EXTRA_VAR, EXTRA_TEST, \ - EXTRA_EXPECTED) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ - check_float (test_name, FUNC (FUNC_NAME) (ARG, &(EXTRA_VAR)), \ - EXPECTED, EXCEPTIONS); \ - EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ - if (EXTRA_TEST) \ - check_float (extra1_name, EXTRA_VAR, EXTRA_EXPECTED, 0); \ - EXTRA_OUTPUT_TEST_CLEANUP (1); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_fF_f1(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ + EXCEPTIONS, EXTRA_VAR, EXTRA_TEST, \ + EXTRA_EXPECTED) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG, &(EXTRA_VAR)), \ + EXPECTED, EXCEPTIONS); \ + EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ + if (EXTRA_TEST) \ + check_float (extra1_name, EXTRA_VAR, EXTRA_EXPECTED, 0); \ + EXTRA_OUTPUT_TEST_CLEANUP (1); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_fF_f1(FUNC_NAME, ARRAY, ROUNDING_MODE, EXTRA_VAR) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1433,22 +1438,22 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.extra_test, \ (ARRAY)[i].RM_##ROUNDING_MODE.extra_expected); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_fI_f1(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ - EXCEPTIONS, EXTRA_VAR, EXTRA_TEST, \ - EXTRA_EXPECTED) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ - check_float (test_name, FUNC (FUNC_NAME) (ARG, &(EXTRA_VAR)), \ - EXPECTED, EXCEPTIONS); \ - EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ - if (EXTRA_TEST) \ - check_int (extra1_name, EXTRA_VAR, EXTRA_EXPECTED, 0); \ - EXTRA_OUTPUT_TEST_CLEANUP (1); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_fI_f1(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ + EXCEPTIONS, EXTRA_VAR, EXTRA_TEST, \ + EXTRA_EXPECTED) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG, &(EXTRA_VAR)), \ + EXPECTED, EXCEPTIONS); \ + EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ + if (EXTRA_TEST) \ + check_int (extra1_name, EXTRA_VAR, EXTRA_EXPECTED, 0); \ + EXTRA_OUTPUT_TEST_CLEANUP (1); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_fI_f1(FUNC_NAME, ARRAY, ROUNDING_MODE, EXTRA_VAR) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1469,7 +1474,7 @@ struct test_fFF_11_data COMMON_TEST_SETUP (ARG_STR); \ (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ check_float (test_name, \ - FUNC (FUNC_NAME) (ARG1, ARG2, &(EXTRA_VAR)), \ + FUNC_TEST (FUNC_NAME) (ARG1, ARG2, &(EXTRA_VAR)), \ EXPECTED, EXCEPTIONS); \ EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ if (EXTRA_TEST) \ @@ -1490,17 +1495,17 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.extra_test, \ (ARRAY)[i].RM_##ROUNDING_MODE.extra_expected); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_c_c(ARG_STR, FUNC_NAME, ARGR, ARGC, EXPR, EXPC, \ - EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_complex (test_name, \ - FUNC (FUNC_NAME) (BUILD_COMPLEX (ARGR, ARGC)), \ - BUILD_COMPLEX (EXPR, EXPC), EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_c_c(ARG_STR, FUNC_NAME, ARGR, ARGC, EXPR, EXPC, \ + EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_complex (test_name, \ + FUNC_TEST (FUNC_NAME) (BUILD_COMPLEX (ARGR, ARGC)), \ + BUILD_COMPLEX (EXPR, EXPC), EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_c_c(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1511,18 +1516,18 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.expc, \ (ARRAY)[i].RM_##ROUNDING_MODE.exceptions); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_cc_c(ARG_STR, FUNC_NAME, ARG1R, ARG1C, ARG2R, ARG2C, \ - EXPR, EXPC, EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_complex (test_name, \ - FUNC (FUNC_NAME) (BUILD_COMPLEX (ARG1R, ARG1C), \ - BUILD_COMPLEX (ARG2R, ARG2C)), \ - BUILD_COMPLEX (EXPR, EXPC), EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_cc_c(ARG_STR, FUNC_NAME, ARG1R, ARG1C, ARG2R, ARG2C, \ + EXPR, EXPC, EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_complex (test_name, \ + FUNC_TEST (FUNC_NAME) (BUILD_COMPLEX (ARG1R, ARG1C), \ + BUILD_COMPLEX (ARG2R, ARG2C)), \ + BUILD_COMPLEX (EXPR, EXPC), EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_cc_c(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1539,7 +1544,7 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - check_int (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ + check_int (test_name, FUNC_TEST (FUNC_NAME) (ARG), EXPECTED, \ EXCEPTIONS); \ COMMON_TEST_CLEANUP; \ } \ @@ -1592,7 +1597,7 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - check_bool (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ + check_bool (test_name, FUNC_TEST (FUNC_NAME) (ARG), EXPECTED, \ EXCEPTIONS); \ COMMON_TEST_CLEANUP; \ } \ @@ -1626,7 +1631,7 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - check_long (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ + check_long (test_name, FUNC_TEST (FUNC_NAME) (ARG), EXPECTED, \ EXCEPTIONS); \ COMMON_TEST_CLEANUP; \ } \ @@ -1643,8 +1648,8 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - check_longlong (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ - EXCEPTIONS); \ + check_longlong (test_name, FUNC_TEST (FUNC_NAME) (ARG), \ + EXPECTED, EXCEPTIONS); \ COMMON_TEST_CLEANUP; \ } \ while (0) @@ -1663,7 +1668,7 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - FUNC (FUNC_NAME) (ARG, &(EXTRA1_VAR), &(EXTRA2_VAR)); \ + FUNC_TEST (FUNC_NAME) (ARG, &(EXTRA1_VAR), &(EXTRA2_VAR)); \ EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ if (EXTRA1_TEST) \ check_float (extra1_name, EXTRA1_VAR, EXTRA1_EXPECTED, \ @@ -1690,9 +1695,31 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.extra2_expected); \ ROUND_RESTORE_ ## ROUNDING_MODE +#ifndef INIT_ARCH_EXT +# define INIT_ARCH_EXT +# define CHECK_ARCH_EXT +#endif + +#ifndef VEC_PREFIX +# define VEC_PREFIX +#endif + +#ifndef FUNC_TEST +# define FUNC_TEST FUNC +#endif + +#include "libm-have-vector-test.h" + +#define STR_CONCAT(a,b,c) __STRING(a##b##c) +#define STR_CON3(a,b,c) STR_CONCAT(a,b,c) + +#define HAVE_VECTOR(func) __CONCAT(HAVE_VECTOR_,func) + /* Start and end the tests for a given function. */ -#define START(FUNC, EXACT) \ - const char *this_func = #FUNC; \ +#define START(FUN, SUFF, EXACT) \ + CHECK_ARCH_EXT \ + if (TEST_MATHVEC && !HAVE_VECTOR(FUNC(FUN))) return; \ + const char *this_func = STR_CON3(VEC_PREFIX,FUN,SUFF); \ init_max_error (this_func, EXACT) #define END \ print_max_error (this_func) @@ -1705,28 +1732,28 @@ struct test_fFF_11_data { \ do \ { \ - START (FUNC, EXACT); \ + START (FUNC, , EXACT); \ LOOP_MACRO (FUNC, ARRAY, , ## __VA_ARGS__); \ END_MACRO; \ } \ while (0); \ do \ { \ - START (FUNC ## _downward, EXACT); \ + START (FUNC, _downward, EXACT); \ LOOP_MACRO (FUNC, ARRAY, FE_DOWNWARD, ## __VA_ARGS__); \ END_MACRO; \ } \ while (0); \ do \ { \ - START (FUNC ## _towardzero, EXACT); \ + START (FUNC, _towardzero, EXACT); \ LOOP_MACRO (FUNC, ARRAY, FE_TOWARDZERO, ## __VA_ARGS__); \ END_MACRO; \ } \ while (0); \ do \ { \ - START (FUNC ## _upward, EXACT); \ + START (FUNC, _upward, EXACT); \ LOOP_MACRO (FUNC, ARRAY, FE_UPWARD, ## __VA_ARGS__); \ END_MACRO; \ } \ @@ -6034,7 +6061,7 @@ static const struct test_c_c_data cexp_test_data[] = static void cexp_test (void) { - START (cexp, 0); + START (cexp, , 0); RUN_TEST_LOOP_c_c (cexp, cexp_test_data, ); END_COMPLEX; } @@ -7548,7 +7575,7 @@ static const struct test_if_f_data jn_test_data[] = static void jn_test (void) { - START (jn, 0); + START (jn, , 0); RUN_TEST_LOOP_if_f (jn, jn_test_data, ); END; } @@ -9374,7 +9401,7 @@ static const struct test_f_f_data tgamma_test_data[] = static void tgamma_test (void) { - START (tgamma, 0); + START (tgamma, , 0); RUN_TEST_LOOP_f_f (tgamma, tgamma_test_data, ); END; } @@ -9824,6 +9851,8 @@ main (int argc, char **argv) initialize (); printf (TEST_MSG); + INIT_ARCH_EXT + check_ulp (); /* Keep the tests a wee bit ordered (according to ISO C99). */ diff --git a/math/math.h b/math/math.h index dc532b7..b44a23b 100644 --- a/math/math.h +++ b/math/math.h @@ -27,6 +27,9 @@ __BEGIN_DECLS +/* Get machine-dependent vector math functions declarations */ +#include + /* Get machine-dependent HUGE_VAL value (returned on overflow). On all IEEE754 machines, this is +Infinity. */ #include @@ -49,6 +52,12 @@ __BEGIN_DECLS so we can easily declare each function as both `name' and `__name', and can declare the float versions `namef' and `__namef'. */ +#define __SIMD_DECL(function) __CONCAT(__DECL_SIMD_,function) + +#define __MATHCALL_VEC(function,suffix, args) \ + __SIMD_DECL(__MATH_PRECNAME(function,suffix)) \ + __MATHCALL(function,suffix, args) + #define __MATHCALL(function,suffix, args) \ __MATHDECL (_Mdouble_,function,suffix, args) #define __MATHDECL(type, function,suffix, args) \ diff --git a/math/test-double-vlen4.h b/math/test-double-vlen4.h new file mode 100644 index 0000000..a71a3d0 --- /dev/null +++ b/math/test-double-vlen4.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define FLOAT double +#define FUNC(function) function +#define TEST_MSG "testing double vector math (without inline functions)\n" +#define MATHCONST(x) x +#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cdouble +#define PRINTF_EXPR "e" +#define PRINTF_XEXPR "a" +#define PRINTF_NEXPR "f" +#define TEST_DOUBLE 1 +#define TEST_MATHVEC 1 + +#ifndef __NO_MATH_INLINES +# define __NO_MATH_INLINES +#endif + +#define EXCEPTION_TESTS_double 0 +#define ROUNDING_TESTS_double(MODE) ((MODE) == FE_TONEAREST) + +#define VEC_PREFIX vlen4_ + +#define CONCAT(prefix,func) __CONCAT(prefix,func) + +#define WRAPPER_NAME(function) CONCAT(VEC_PREFIX,function) + +#define FUNC_TEST(function) VEC_PREFIX_ ## function diff --git a/math/test-float-vlen8.h b/math/test-float-vlen8.h new file mode 100644 index 0000000..a1a86a1 --- /dev/null +++ b/math/test-float-vlen8.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define FLOAT float +#define FUNC(function) function ## f +#define TEST_MSG "testing float vector math (without inline functions)\n" +#define MATHCONST(x) x +#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cfloat +#define PRINTF_EXPR "e" +#define PRINTF_XEXPR "a" +#define PRINTF_NEXPR "f" +#define TEST_FLOAT 1 +#define TEST_MATHVEC 1 + +#ifndef __NO_MATH_INLINES +# define __NO_MATH_INLINES +#endif + +#define EXCEPTION_TESTS_float 0 +#define ROUNDING_TESTS_float(MODE) ((MODE) == FE_TONEAREST) + +#define VEC_PREFIX vlen8_ + +#define CONCAT(prefix,func) __CONCAT(prefix,func) + +#define WRAPPER_NAME(function) CONCAT(VEC_PREFIX,function) + +#define FUNC_TEST(function) VEC_PREFIX_ ## function ## f diff --git a/mathvec/Depend b/mathvec/Depend new file mode 100644 index 0000000..ede10ab --- /dev/null +++ b/mathvec/Depend @@ -0,0 +1 @@ +math diff --git a/mathvec/Makefile b/mathvec/Makefile new file mode 100644 index 0000000..26c552c --- /dev/null +++ b/mathvec/Makefile @@ -0,0 +1,35 @@ +# Copyright (C) 2014 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . + +# Makefile for the vector math library. + +subdir := mathvec + +include ../Makeconfig + +ifeq ($(build-mathvec),yes) +extra-libs := libmvec +extra-libs-others = $(extra-libs) + +libmvec-routines = $(strip $(libmvec-support)) + +$(objpfx)libmvec.so: $(libm) +endif + +# Rules for the test suite are in math directory + +include ../Rules diff --git a/shlib-versions b/shlib-versions index e05b248..fa3cf1d 100644 --- a/shlib-versions +++ b/shlib-versions @@ -71,3 +71,6 @@ libanl=1 # This defines the libgcc soname version this glibc is to load for # asynchronous cancellation to work correctly. libgcc_s=1 + +# The vector math library +libmvec=1 diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist new file mode 100644 index 0000000..8272ddd --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -0,0 +1,3 @@ +GLIBC_2.21 + GLIBC_2.21 A + _ZGVdN4v_cos F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h new file mode 100644 index 0000000..33ffabb --- /dev/null +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -0,0 +1,53 @@ +/* Platform-specific SIMD declarations of math functions. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _MATH_H +# error "Never include directly; \ + include instead." +#endif + +/* Get default empty definitions for simd declarations */ +#include + +#if defined __x86_64__ && defined __FAST_MATH__ +# if defined _OPENMP && _OPENMP >= 201307 +/* OpenMP case. */ +# define __DECL_SIMD_AVX2 _Pragma("omp declare simd notinbranch simdlen(4)") +# define __DECL_SIMD_SSE4 _Pragma("omp declare simd notinbranch simdlen(8)") +# undef __DECL_SIMD_cos +# define __DECL_SIMD_cos __DECL_SIMD_AVX2 +# undef __DECL_SIMD_cosf +# define __DECL_SIMD_cosf __DECL_SIMD_SSE4 +# elif defined _CILKPLUS && _CILKPLUS >= 0 +/* CilkPlus case. + * TODO _CILKPLUS currently nowhere defined, + * add reserved-namespace versions and __GNUC_PREREQ +# define __DECL_SIMD_AVX2 __attribute__((__vector__(__vectorlength__(4),\ + __nomask__,\ + __processor__(\ + __core_4th_gen_avx__)))) +# define __DECL_SIMD_SSE4 __attribute__((__vector__(__vectorlength__(8),\ + __nomask__,\ + __processor__(\ + __core_i7_sse4_2__)))) +# undef __DECL_SIMD_cos +# define __DECL_SIMD_cos __DECL_SIMD_AVX2 +# undef __DECL_SIMD_cosf +# define __DECL_SIMD_cosf __DECL_SIMD_SSE4 */ +# endif +#endif diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index 7d4dadd..9773770 100644 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -275,6 +275,16 @@ fi config_vars="$config_vars config-cflags-avx2 = $libc_cv_cc_avx2" +if test x"$build_mathvec" = xnotset; then + if test x"$base_machine" = xx86_64; then + build_mathvec=yes + else + build_mathvec=no + fi +fi +config_vars="$config_vars +build-mathvec = $build_mathvec" + $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h # work around problem with autoconf and empty lines at the end of files diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index c9f9a51..0b73d5b 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -99,6 +99,15 @@ if test $libc_cv_cc_avx2 = yes; then fi LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2]) +if test x"$build_mathvec" = xnotset; then + if test x"$base_machine" = xx86_64; then + build_mathvec=yes + else + build_mathvec=no + fi +fi +LIBC_CONFIG_VAR([build-mathvec], [$build_mathvec]) + dnl It is always possible to access static and hidden symbols in an dnl position independent way. AC_DEFINE(PI_STATIC_AND_HIDDEN) diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile new file mode 100644 index 0000000..1b65b09 --- /dev/null +++ b/sysdeps/x86_64/fpu/Makefile @@ -0,0 +1,13 @@ +ifeq ($(subdir),mathvec) +libmvec-support += svml_d_cos4_core svml_d_cos_data +endif + +# Rules for libmvec tests +ifeq ($(subdir),math) +ifeq ($(build-mathvec),yes) +libmvec-tests += double-vlen4 float-vlen8 + +arch-ext-cflags = -mavx2 + +endif +endif diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions new file mode 100644 index 0000000..3d433d2 --- /dev/null +++ b/sysdeps/x86_64/fpu/Versions @@ -0,0 +1,5 @@ +libmvec { + GLIBC_2.21 { + _ZGVdN4v_cos; + } +} diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 36e1b76..9e4f8cd 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1961,6 +1961,12 @@ ifloat: 3 ildouble: 4 ldouble: 4 +Function: "vlen4_cos": +double: 1 + +Function: "vlen8_cos": +float: 1 + Function: "y0": double: 2 float: 1 diff --git a/sysdeps/x86_64/fpu/math-tests.h b/sysdeps/x86_64/fpu/math-tests.h new file mode 100644 index 0000000..466b97b --- /dev/null +++ b/sysdeps/x86_64/fpu/math-tests.h @@ -0,0 +1,34 @@ +/* Configuration for math tests. x86_64 version. + Copyright (C) 2013-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef REQUIRE_AVX2 +# include + + static int avx2_usable; /* Set to 1 if AVX2 supported */ + +# define INIT_ARCH_EXT \ + __init_cpu_features (); \ + avx2_usable = __cpu_features.feature[index_AVX2_Usable] \ + & bit_AVX2_Usable; + +# define CHECK_ARCH_EXT \ + if (!avx2_usable) return; + +#endif + +#include_next diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/svml_d_cos4_core.S new file mode 100644 index 0000000..7c9f62e --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_cos4_core.S @@ -0,0 +1,186 @@ +/* Function cos vectorized with AVX2. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + + .text +ENTRY(_ZGVdN4v_cos) + +/* ALGORITHM DESCRIPTION: + * + * ( low accuracy ( < 4ulp ) or enhanced performance + * ( half of correct mantissa ) implementation ) + * + * Argument representation: + * arg + Pi/2 = (N*Pi + R) + * + * Result calculation: + * cos(arg) = sin(arg+Pi/2) = sin(N*Pi + R) = (-1)^N * sin(R) + * sin(R) is approximated by corresponding polynomial + */ + pushq %rbp + movq %rsp, %rbp + andq $-64, %rsp + subq $448, %rsp + movq __gnu_svml_dcos_data@GOTPCREL(%rip), %rax + vmovapd %ymm0, %ymm1 + vmovupd 192(%rax), %ymm4 + vmovupd 256(%rax), %ymm5 + +/* ARGUMENT RANGE REDUCTION: + * Add Pi/2 to argument: X' = X+Pi/2 + */ + vaddpd 128(%rax), %ymm1, %ymm7 + +/* Get absolute argument value: X' = |X'| */ + vandpd (%rax), %ymm7, %ymm2 + +/* Y = X'*InvPi + RS : right shifter add */ + vfmadd213pd %ymm5, %ymm4, %ymm7 + vmovupd 1216(%rax), %ymm4 + +/* Check for large arguments path */ + vcmpnle_uqpd 64(%rax), %ymm2, %ymm3 + +/* N = Y - RS : right shifter sub */ + vsubpd %ymm5, %ymm7, %ymm6 + vmovupd 640(%rax), %ymm2 + +/* SignRes = Y<<63 : shift LSB to MSB place for result sign */ + vpsllq $63, %ymm7, %ymm7 + +/* N = N - 0.5 */ + vsubpd 320(%rax), %ymm6, %ymm0 + vmovmskpd %ymm3, %ecx + +/* R = X - N*Pi1 */ + vmovapd %ymm1, %ymm3 + vfnmadd231pd %ymm0, %ymm2, %ymm3 + +/* R = R - N*Pi2 */ + vfnmadd231pd 704(%rax), %ymm0, %ymm3 + +/* R = R - N*Pi3 */ + vfnmadd132pd 768(%rax), %ymm3, %ymm0 + +/* POLYNOMIAL APPROXIMATION: + * R2 = R*R + */ + vmulpd %ymm0, %ymm0, %ymm5 + vfmadd213pd 1152(%rax), %ymm5, %ymm4 + vfmadd213pd 1088(%rax), %ymm5, %ymm4 + vfmadd213pd 1024(%rax), %ymm5, %ymm4 + +/* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ + vfmadd213pd 960(%rax), %ymm5, %ymm4 + vfmadd213pd 896(%rax), %ymm5, %ymm4 + vfmadd213pd 832(%rax), %ymm5, %ymm4 + vmulpd %ymm5, %ymm4, %ymm6 + vfmadd213pd %ymm0, %ymm0, %ymm6 + +/* RECONSTRUCTION: + * Final sign setting: Res = Poly^SignRes + */ + vxorpd %ymm7, %ymm6, %ymm0 + testl %ecx, %ecx + jne _LBL_1_3 + +_LBL_1_2: + movq %rbp, %rsp + popq %rbp + ret + +_LBL_1_3: + vmovupd %ymm1, 320(%rsp) + vmovupd %ymm0, 384(%rsp) + je _LBL_1_2 + + xorb %dl, %dl + xorl %eax, %eax + vmovups %ymm8, 224(%rsp) + vmovups %ymm9, 192(%rsp) + vmovups %ymm10, 160(%rsp) + vmovups %ymm11, 128(%rsp) + vmovups %ymm12, 96(%rsp) + vmovups %ymm13, 64(%rsp) + vmovups %ymm14, 32(%rsp) + vmovups %ymm15, (%rsp) + movq %rsi, 264(%rsp) + movq %rdi, 256(%rsp) + movq %r12, 296(%rsp) + movb %dl, %r12b + movq %r13, 288(%rsp) + movl %ecx, %r13d + movq %r14, 280(%rsp) + movl %eax, %r14d + movq %r15, 272(%rsp) + +_LBL_1_6: + btl %r14d, %r13d + jc _LBL_1_12 + +_LBL_1_7: + lea 1(%r14), %esi + btl %esi, %r13d + jc _LBL_1_10 + +_LBL_1_8: + incb %r12b + addl $2, %r14d + cmpb $16, %r12b + jb _LBL_1_6 + + vmovups 224(%rsp), %ymm8 + vmovups 192(%rsp), %ymm9 + vmovups 160(%rsp), %ymm10 + vmovups 128(%rsp), %ymm11 + vmovups 96(%rsp), %ymm12 + vmovups 64(%rsp), %ymm13 + vmovups 32(%rsp), %ymm14 + vmovups (%rsp), %ymm15 + vmovupd 384(%rsp), %ymm0 + movq 264(%rsp), %rsi + movq 256(%rsp), %rdi + movq 296(%rsp), %r12 + movq 288(%rsp), %r13 + movq 280(%rsp), %r14 + movq 272(%rsp), %r15 + jmp _LBL_1_2 + +_LBL_1_10: + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 328(%rsp,%r15), %xmm0 + vzeroupper + + call cos@PLT + + vmovsd %xmm0, 392(%rsp,%r15) + jmp _LBL_1_8 + +_LBL_1_12: + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 320(%rsp,%r15), %xmm0 + vzeroupper + + call cos@PLT + + vmovsd %xmm0, 384(%rsp,%r15) + jmp _LBL_1_7 +END(_ZGVdN4v_cos) diff --git a/sysdeps/x86_64/fpu/svml_d_cos_data.S b/sysdeps/x86_64/fpu/svml_d_cos_data.S new file mode 100644 index 0000000..5c4431a --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_cos_data.S @@ -0,0 +1,493 @@ +/* Data for vectorized cos. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* TODO Make tables more readable according comments */ + + .section .rodata, "a" + + .align 64 + .globl __gnu_svml_dcos_data + +/* Data table for vector implementations of function cos. + * The table may contain polynomial, reduction, lookup + * coefficients and other constants obtained through different + * methods of research and experimental work. + */ +__gnu_svml_dcos_data: + +/* General constants: + * lAbsMask + */ + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + +/* lRangeVal */ + .long 0x00000000 + .long 0x41600000 + .long 0x00000000 + .long 0x41600000 + .long 0x00000000 + .long 0x41600000 + .long 0x00000000 + .long 0x41600000 + .long 0x00000000 + .long 0x41600000 + .long 0x00000000 + .long 0x41600000 + .long 0x00000000 + .long 0x41600000 + .long 0x00000000 + .long 0x41600000 + +/* HalfPI */ + .long 0x54442d18 + .long 0x3ff921fb + .long 0x54442d18 + .long 0x3ff921fb + .long 0x54442d18 + .long 0x3ff921fb + .long 0x54442d18 + .long 0x3ff921fb + .long 0x54442d18 + .long 0x3ff921fb + .long 0x54442d18 + .long 0x3ff921fb + .long 0x54442d18 + .long 0x3ff921fb + .long 0x54442d18 + .long 0x3ff921fb + +/* InvPI */ + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + +/* RShifter */ + .long 0x00000000 + .long 0x43380000 + .long 0x00000000 + .long 0x43380000 + .long 0x00000000 + .long 0x43380000 + .long 0x00000000 + .long 0x43380000 + .long 0x00000000 + .long 0x43380000 + .long 0x00000000 + .long 0x43380000 + .long 0x00000000 + .long 0x43380000 + .long 0x00000000 + .long 0x43380000 + +/* OneHalf */ + .long 0x00000000 + .long 0x3fe00000 + .long 0x00000000 + .long 0x3fe00000 + .long 0x00000000 + .long 0x3fe00000 + .long 0x00000000 + .long 0x3fe00000 + .long 0x00000000 + .long 0x3fe00000 + .long 0x00000000 + .long 0x3fe00000 + .long 0x00000000 + .long 0x3fe00000 + .long 0x00000000 + .long 0x3fe00000 + +/* Range reduction PI-based constants: + * PI1 + */ + .long 0x40000000 + .long 0x400921fb + .long 0x40000000 + .long 0x400921fb + .long 0x40000000 + .long 0x400921fb + .long 0x40000000 + .long 0x400921fb + .long 0x40000000 + .long 0x400921fb + .long 0x40000000 + .long 0x400921fb + .long 0x40000000 + .long 0x400921fb + .long 0x40000000 + .long 0x400921fb + +/* PI2 */ + .long 0x00000000 + .long 0x3e84442d + .long 0x00000000 + .long 0x3e84442d + .long 0x00000000 + .long 0x3e84442d + .long 0x00000000 + .long 0x3e84442d + .long 0x00000000 + .long 0x3e84442d + .long 0x00000000 + .long 0x3e84442d + .long 0x00000000 + .long 0x3e84442d + .long 0x00000000 + .long 0x3e84442d + +/* PI3 */ + .long 0x80000000 + .long 0x3d084698 + .long 0x80000000 + .long 0x3d084698 + .long 0x80000000 + .long 0x3d084698 + .long 0x80000000 + .long 0x3d084698 + .long 0x80000000 + .long 0x3d084698 + .long 0x80000000 + .long 0x3d084698 + .long 0x80000000 + .long 0x3d084698 + .long 0x80000000 + .long 0x3d084698 + +/* PI4 */ + .long 0x701b839a + .long 0x3b88cc51 + .long 0x701b839a + .long 0x3b88cc51 + .long 0x701b839a + .long 0x3b88cc51 + .long 0x701b839a + .long 0x3b88cc51 + .long 0x701b839a + .long 0x3b88cc51 + .long 0x701b839a + .long 0x3b88cc51 + .long 0x701b839a + .long 0x3b88cc51 + .long 0x701b839a + .long 0x3b88cc51 + +/* Range reduction PI-based constants if FMA available: + * PI1_FMA + */ + .long 0x54442d18 + .long 0x400921fb + .long 0x54442d18 + .long 0x400921fb + .long 0x54442d18 + .long 0x400921fb + .long 0x54442d18 + .long 0x400921fb + .long 0x54442d18 + .long 0x400921fb + .long 0x54442d18 + .long 0x400921fb + .long 0x54442d18 + .long 0x400921fb + .long 0x54442d18 + .long 0x400921fb + +/* PI2_FMA */ + .long 0x33145c06 + .long 0x3ca1a626 + .long 0x33145c06 + .long 0x3ca1a626 + .long 0x33145c06 + .long 0x3ca1a626 + .long 0x33145c06 + .long 0x3ca1a626 + .long 0x33145c06 + .long 0x3ca1a626 + .long 0x33145c06 + .long 0x3ca1a626 + .long 0x33145c06 + .long 0x3ca1a626 + .long 0x33145c06 + .long 0x3ca1a626 + +/* PI3_FMA */ + .long 0x29024e09 + .long 0x395c1cd1 + .long 0x29024e09 + .long 0x395c1cd1 + .long 0x29024e09 + .long 0x395c1cd1 + .long 0x29024e09 + .long 0x395c1cd1 + .long 0x29024e09 + .long 0x395c1cd1 + .long 0x29024e09 + .long 0x395c1cd1 + .long 0x29024e09 + .long 0x395c1cd1 + .long 0x29024e09 + .long 0x395c1cd1 + +/* Polynomial coeffifients (relative error 2^(-52.115)): + * C1 + */ + .long 0x555554a7 + .long 0xbfc55555 + .long 0x555554a7 + .long 0xbfc55555 + .long 0x555554a7 + .long 0xbfc55555 + .long 0x555554a7 + .long 0xbfc55555 + .long 0x555554a7 + .long 0xbfc55555 + .long 0x555554a7 + .long 0xbfc55555 + .long 0x555554a7 + .long 0xbfc55555 + .long 0x555554a7 + .long 0xbfc55555 + +/* C2 */ + .long 0x1110a4a8 + .long 0x3f811111 + .long 0x1110a4a8 + .long 0x3f811111 + .long 0x1110a4a8 + .long 0x3f811111 + .long 0x1110a4a8 + .long 0x3f811111 + .long 0x1110a4a8 + .long 0x3f811111 + .long 0x1110a4a8 + .long 0x3f811111 + .long 0x1110a4a8 + .long 0x3f811111 + .long 0x1110a4a8 + .long 0x3f811111 + +/* C3 */ + .long 0x19a5b86d + .long 0xbf2a01a0 + .long 0x19a5b86d + .long 0xbf2a01a0 + .long 0x19a5b86d + .long 0xbf2a01a0 + .long 0x19a5b86d + .long 0xbf2a01a0 + .long 0x19a5b86d + .long 0xbf2a01a0 + .long 0x19a5b86d + .long 0xbf2a01a0 + .long 0x19a5b86d + .long 0xbf2a01a0 + .long 0x19a5b86d + .long 0xbf2a01a0 + +/* C4 */ + .long 0x8030fea0 + .long 0x3ec71de3 + .long 0x8030fea0 + .long 0x3ec71de3 + .long 0x8030fea0 + .long 0x3ec71de3 + .long 0x8030fea0 + .long 0x3ec71de3 + .long 0x8030fea0 + .long 0x3ec71de3 + .long 0x8030fea0 + .long 0x3ec71de3 + .long 0x8030fea0 + .long 0x3ec71de3 + .long 0x8030fea0 + .long 0x3ec71de3 + +/* C5 */ + .long 0x46002231 + .long 0xbe5ae635 + .long 0x46002231 + .long 0xbe5ae635 + .long 0x46002231 + .long 0xbe5ae635 + .long 0x46002231 + .long 0xbe5ae635 + .long 0x46002231 + .long 0xbe5ae635 + .long 0x46002231 + .long 0xbe5ae635 + .long 0x46002231 + .long 0xbe5ae635 + .long 0x46002231 + .long 0xbe5ae635 + +/* C6 */ + .long 0x57a2f220 + .long 0x3de60e68 + .long 0x57a2f220 + .long 0x3de60e68 + .long 0x57a2f220 + .long 0x3de60e68 + .long 0x57a2f220 + .long 0x3de60e68 + .long 0x57a2f220 + .long 0x3de60e68 + .long 0x57a2f220 + .long 0x3de60e68 + .long 0x57a2f220 + .long 0x3de60e68 + .long 0x57a2f220 + .long 0x3de60e68 + +/* C7 */ + .long 0x0811aac8 + .long 0xbd69f0d6 + .long 0x0811aac8 + .long 0xbd69f0d6 + .long 0x0811aac8 + .long 0xbd69f0d6 + .long 0x0811aac8 + .long 0xbd69f0d6 + .long 0x0811aac8 + .long 0xbd69f0d6 + .long 0x0811aac8 + .long 0xbd69f0d6 + .long 0x0811aac8 + .long 0xbd69f0d6 + .long 0x0811aac8 + .long 0xbd69f0d6 + +/* Additional constants: + * AbsMask + */ + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + .long 0xffffffff + .long 0x7fffffff + +/* InvPI */ + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + .long 0x6dc9c883 + .long 0x3fd45f30 + +/* RShifter_la */ + .long 0x00000000 + .long 0x43300000 + .long 0x00000000 + .long 0x43300000 + .long 0x00000000 + .long 0x43300000 + .long 0x00000000 + .long 0x43300000 + .long 0x00000000 + .long 0x43300000 + .long 0x00000000 + .long 0x43300000 + .long 0x00000000 + .long 0x43300000 + .long 0x00000000 + .long 0x43300000 + +/* RShifter_la */ + .long 0xffffffff + .long 0x432fffff + .long 0xffffffff + .long 0x432fffff + .long 0xffffffff + .long 0x432fffff + .long 0xffffffff + .long 0x432fffff + .long 0xffffffff + .long 0x432fffff + .long 0xffffffff + .long 0x432fffff + .long 0xffffffff + .long 0x432fffff + .long 0xffffffff + .long 0x432fffff + +/* RSXmax_la */ + .long 0x007ffffe + .long 0x43300000 + .long 0x007ffffe + .long 0x43300000 + .long 0x007ffffe + .long 0x43300000 + .long 0x007ffffe + .long 0x43300000 + .long 0x007ffffe + .long 0x43300000 + .long 0x007ffffe + .long 0x43300000 + .long 0x007ffffe + .long 0x43300000 + .long 0x007ffffe + .long 0x43300000 + .type __gnu_svml_dcos_data,@object + .size __gnu_svml_dcos_data,.-__gnu_svml_dcos_data diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.c new file mode 100644 index 0000000..68b07ca --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-vlen4.c @@ -0,0 +1,46 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "test-double-vlen4.h" + +// Wrapper from scalar to vector function implemented in AVX2. +#define VECTOR_WRAPPER(scalar_func,vector_func) \ +extern __m256d vector_func(__m256d);\ +FLOAT scalar_func(FLOAT x)\ +{\ + int i;\ + __m256d mx = _mm256_set1_pd(x);\ + __m256d mr = vector_func(mx);\ + for(i=1;i<4;i++)\ + {\ + if (((FLOAT*)&mr)[0]!=((FLOAT*)&mr)[i])\ + {\ + return ((FLOAT*)&mr)[0]+0.1;\ + }\ + }\ + return ((FLOAT*)&mr)[0];\ +} + +#include + +VECTOR_WRAPPER (WRAPPER_NAME(cos),_ZGVdN4v_cos) + +#define TEST_VECTOR_cos 1 + +#define REQUIRE_AVX2 + +#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-float-vlen8.c b/sysdeps/x86_64/fpu/test-float-vlen8.c new file mode 100644 index 0000000..3898df9 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-vlen8.c @@ -0,0 +1,45 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "test-float-vlen8.h" + +#define VECTOR_WRAPPER(scalar_func,vector_func) \ +/*extern __m256 vector_func(__m256);*/\ +FLOAT scalar_func(FLOAT x)\ +{\ + int i;\ + __m256 mx = _mm256_set1_ps(x);\ + __m256 mr = mx; /*vector_func(mx);*/\ + for(i=1;i<8;i++)\ + {\ + if(((FLOAT*)&mr)[0]!=((FLOAT*)&mr)[i])\ + {\ + return ((FLOAT*)&mr)[0]+0.1;\ + }\ + }\ + return ((FLOAT*)&mr)[0];\ +} + +#include + +VECTOR_WRAPPER (WRAPPER_NAME(cosf),_ZGVdN8v_cosf) + +#define TEST_VECTOR_cosf 0 + +#define REQUIRE_AVX2 + +#include "libm-test.c"