diff --git a/Makeconfig b/Makeconfig index 24a3b82..4672008 100644 --- a/Makeconfig +++ b/Makeconfig @@ -476,7 +476,7 @@ link-libc = $(link-libc-rpath-link) $(link-libc-before-gnulib) $(gnulib) link-libc-tests = $(link-libc-tests-rpath-link) \ $(link-libc-before-gnulib) $(gnulib-tests) # This is how to find at build-time things that will be installed there. -rpath-dirs = math elf dlfcn nss nis rt resolv crypt +rpath-dirs = math elf dlfcn nss nis rt resolv crypt mathvec rpath-link = \ $(common-objdir):$(subst $(empty) ,:,$(patsubst ../$(subdir),.,$(rpath-dirs:%=$(common-objpfx)%))) else @@ -1018,7 +1018,7 @@ all-subdirs = csu assert ctype locale intl catgets math setjmp signal \ stdlib stdio-common libio malloc string wcsmbs time dirent \ grp pwd posix io termios resource misc socket sysvipc gmon \ gnulib iconv iconvdata wctype manual shadow gshadow po argp \ - crypt localedata timezone rt conform debug \ + crypt localedata timezone rt conform debug mathvec \ $(add-on-subdirs) dlfcn elf ifndef avoid-generated diff --git a/bits/math-vector.h b/bits/math-vector.h new file mode 100644 index 0000000..c8fe5cb --- /dev/null +++ b/bits/math-vector.h @@ -0,0 +1,22 @@ +/* Platform-specific SIMD declarations of math functions. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _MATH_H +# error "Never include directly; \ + include instead." +#endif diff --git a/configure b/configure index 0cb54ec..a3ea531 100755 --- a/configure +++ b/configure @@ -774,6 +774,7 @@ enable_systemtap enable_build_nscd enable_nscd enable_pt_chown +enable_mathvec with_cpu ' ac_precious_vars='build_alias @@ -1437,6 +1438,8 @@ Optional Features: --disable-build-nscd disable building and installing the nscd daemon --disable-nscd library functions will not contact the nscd daemon --enable-pt_chown Enable building and installing pt_chown + --enable-mathvec Enable building and installing mathvec [default + depends on architecture] Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -3730,6 +3733,14 @@ if test "$build_pt_chown" = yes; then fi +# Check whether --enable-mathvec was given. +if test "${enable_mathvec+set}" = set; then : + enableval=$enable_mathvec; build_mathvec=$enableval +else + build_mathvec=notset +fi + + # We keep the original values in `$config_*' and never modify them, so we # can write them unchanged into config.make. Everything else uses # $machine, $vendor, and $os, and changes them whenever convenient. @@ -7039,6 +7050,12 @@ $as_echo "running configure fragment for $dir" >&6; } fi done +if test x"$build_mathvec" = xnotset; then + build_mathvec=no +fi +config_vars="$config_vars +build-mathvec = $build_mathvec" + diff --git a/configure.ac b/configure.ac index b2c4b1f..f6805aa 100644 --- a/configure.ac +++ b/configure.ac @@ -353,6 +353,12 @@ if test "$build_pt_chown" = yes; then AC_DEFINE(HAVE_PT_CHOWN) fi +AC_ARG_ENABLE([mathvec], + [AS_HELP_STRING([--enable-mathvec], + [Enable building and installing mathvec @<:@default depends on architecture@:>@])], + [build_mathvec=$enableval], + [build_mathvec=notset]) + # We keep the original values in `$config_*' and never modify them, so we # can write them unchanged into config.make. Everything else uses # $machine, $vendor, and $os, and changes them whenever convenient. @@ -1939,6 +1945,11 @@ for dir in $sysnames; do fi done +if test x"$build_mathvec" = xnotset; then + build_mathvec=no +fi +LIBC_CONFIG_VAR([build-mathvec], [$build_mathvec]) + AC_SUBST(libc_extra_cflags) AC_SUBST(libc_extra_cppflags) diff --git a/include/libm-simd-decl-stubs.h b/include/libm-simd-decl-stubs.h new file mode 100644 index 0000000..0048717 --- /dev/null +++ b/include/libm-simd-decl-stubs.h @@ -0,0 +1,35 @@ +/* Empty definitions required for __MATHCALL_VEC unfolding in mathcalls.h. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Needed definitions could be generated with: + for func in $(grep __MATHCALL_VEC math/bits/mathcalls.h |\ + sed -r "s|__MATHCALL_VEC.?\(||; s|,.*||"); do + echo "#define __DECL_SIMD_${func}"; + echo "#define __DECL_SIMD_${func}f"; + echo "#define __DECL_SIMD_${func}l"; + done + */ + +#ifndef _LIBM_SIMD_DECL_STUBS_H +#define _LIBM_SIMD_DECL_STUBS_H 1 + +#define __DECL_SIMD_cos +#define __DECL_SIMD_cosf +#define __DECL_SIMD_cosl + +#endif diff --git a/math/Makefile b/math/Makefile index 866bc0f..4981358 100644 --- a/math/Makefile +++ b/math/Makefile @@ -26,7 +26,7 @@ headers := math.h bits/mathcalls.h bits/mathinline.h bits/huge_val.h \ bits/huge_valf.h bits/huge_vall.h bits/inf.h bits/nan.h \ fpu_control.h complex.h bits/cmathcalls.h fenv.h \ bits/fenv.h bits/fenvinline.h bits/mathdef.h tgmath.h \ - bits/math-finite.h + bits/math-finite.h bits/math-vector.h libm-simd-decl-stubs.h # FPU support code. aux := setfpucw fpu_control @@ -85,6 +85,22 @@ generated += $(foreach s,.c .S l.c l.S f.c f.S,$(calls:s_%=m_%$s)) routines = $(calls) $(calls:=f) $(long-c-$(long-double-fcts)) long-c-yes = $(calls:=l) +ifeq ($(build-mathvec),yes) +# We need to install libm.so as linker script +# for more comfortable use of vector math library. +install-lib-ldscripts := libm.so +install_subdir: $(inst_libdir)/libm.so +$(inst_libdir)/libm.so: $(common-objpfx)format.lds \ + $(libm) \ + $(common-objpfx)mathvec/libmvec.so$(libmvec.so-version) \ + $(+force) + (echo '/* GNU ld script'; echo '*/';\ + cat $<; \ + echo 'GROUP ( $(slibdir)/libm.so$(libm.so-version) ' \ + 'AS_NEEDED ( $(slibdir)/libmvec.so$(libmvec.so-version) ) )' \ + ) > $@ +endif + # Rules for the test suite. tests = test-matherr test-fenv atest-exp atest-sincos atest-exp2 basic-test \ test-misc test-fpucw test-fpucw-ieee tst-definitions test-tgmath \ @@ -97,12 +113,13 @@ tests-static = test-fpucw-static test-fpucw-ieee-static test-longdouble-yes = test-ldouble test-ildoubl ifneq (no,$(PERL)) +libm-vec-tests = $(addprefix test-,$(libmvec-tests)) libm-tests = test-float test-double $(test-longdouble-$(long-double-fcts)) \ - test-ifloat test-idouble + test-ifloat test-idouble $(libm-vec-tests) libm-tests.o = $(addsuffix .o,$(libm-tests)) tests += $(libm-tests) -libm-tests-generated = libm-test-ulps.h libm-test.c +libm-tests-generated = libm-test-ulps.h libm-have-vector-test.h libm-test.c generated += $(libm-tests-generated) libm-test.stmp # This is needed for dependencies @@ -113,9 +130,10 @@ ulps-file = $(firstword $(wildcard $(sysdirs:%=%/libm-test-ulps))) $(addprefix $(objpfx), $(libm-tests-generated)): $(objpfx)libm-test.stmp $(objpfx)libm-test.stmp: $(ulps-file) libm-test.inc gen-libm-test.pl \ - auto-libm-test-out + gen-libm-have-vector-test.sh auto-libm-test-out $(make-target-directory) $(PERL) gen-libm-test.pl -u $< -o "$(objpfx)" + $(BASH) gen-libm-have-vector-test.sh > $(objpfx)libm-have-vector-test.h @echo > $@ $(objpfx)test-float.o: $(objpfx)libm-test.stmp @@ -124,8 +142,22 @@ $(objpfx)test-double.o: $(objpfx)libm-test.stmp $(objpfx)test-idouble.o: $(objpfx)libm-test.stmp $(objpfx)test-ldouble.o: $(objpfx)libm-test.stmp $(objpfx)test-ildoubl.o: $(objpfx)libm-test.stmp + +$(objpfx)test-double-vlen2.o: $(objpfx)libm-test.stmp +$(objpfx)test-double-vlen4.o: $(objpfx)libm-test.stmp + +$(objpfx)test-double-vlen2: $(common-objpfx)mathvec/libmvec.so \ + $(objpfx)init-arch.o +$(objpfx)test-double-vlen4: $(common-objpfx)mathvec/libmvec.so \ + $(objpfx)init-arch.o endif +CFLAGS-test-double-vlen2.c = -fno-inline -ffloat-store -fno-builtin -frounding-math \ + -D__FAST_MATH__ -DTEST_FAST_MATH -D_OPENMP=201307 \ + -Wno-unknown-pragmas +CFLAGS-test-double-vlen4.c = -fno-inline -ffloat-store -fno-builtin -frounding-math \ + -D__FAST_MATH__ -DTEST_FAST_MATH -D_OPENMP=201307 \ + -Wno-unknown-pragmas $(arch-ext-cflags) CFLAGS-test-float.c = -fno-inline -ffloat-store -fno-builtin -frounding-math CFLAGS-test-double.c = -fno-inline -ffloat-store -fno-builtin -frounding-math CFLAGS-test-ldouble.c = -fno-inline -ffloat-store -fno-builtin -frounding-math diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index 8a94a7e..82928a1 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -60,7 +60,7 @@ __MATHCALL (atan,, (_Mdouble_ __x)); __MATHCALL (atan2,, (_Mdouble_ __y, _Mdouble_ __x)); /* Cosine of X. */ -__MATHCALL (cos,, (_Mdouble_ __x)); +__MATHCALL_VEC (cos,, (_Mdouble_ __x)); /* Sine of X. */ __MATHCALL (sin,, (_Mdouble_ __x)); /* Tangent of X. */ diff --git a/math/gen-libm-have-vector-test.sh b/math/gen-libm-have-vector-test.sh new file mode 100755 index 0000000..95c7bef --- /dev/null +++ b/math/gen-libm-have-vector-test.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# Copyright (C) 1999-2014 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . + +# Generate series of definitions used for vector math functions tests. +print_defs() +{ + echo "#if defined TEST_VECTOR_$1 && TEST_VECTOR_$1" + echo "# define HAVE_VECTOR_$1 1" + echo "# define VEC_PREFIX_$1 WRAPPER_NAME($1)" + echo "#else" + echo "# define HAVE_VECTOR_$1 0" + echo "# define VEC_PREFIX_$1 $1" + echo "#endif" + echo +} + +for func in $(grep ALL_RM_TEST libm-test.inc | grep -v define | sed -r "s/.*\(//; s/,.*//"); do + print_defs ${func} + print_defs ${func}f + print_defs ${func}l +done + +print_defs jn +print_defs jnf +print_defs jnl + +print_defs cexp +print_defs cexpf +print_defs cexpl + +print_defs tgamma +print_defs tgammaf +print_defs tgammal diff --git a/math/libm-test.inc b/math/libm-test.inc index f86a4fa..b22bbad 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -678,13 +678,17 @@ test_exceptions (const char *test_name, int exception) feclearexcept (FE_ALL_EXCEPT); } +#ifndef TEST_MATHVEC +# define TEST_MATHVEC 0 +#endif + /* Test whether errno for TEST_NAME, set to ERRNO_VALUE, has value EXPECTED_VALUE (description EXPECTED_NAME). */ static void test_single_errno (const char *test_name, int errno_value, int expected_value, const char *expected_name) { -#ifndef TEST_INLINE +#if !defined TEST_INLINE && !TEST_MATHVEC if (errno_value == expected_value) { if (print_screen (1)) @@ -1295,16 +1299,17 @@ struct test_fFF_11_data /* Run an individual test, including any required setup and checking of results, or loop over all tests in an array. */ -#define RUN_TEST_f_f(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ - EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_float (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ - EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_f_f(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ + EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG), \ + EXPECTED, \ + EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_f_f(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1313,16 +1318,16 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.expected, \ (ARRAY)[i].RM_##ROUNDING_MODE.exceptions); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_2_f(ARG_STR, FUNC_NAME, ARG1, ARG2, EXPECTED, \ - EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_float (test_name, FUNC (FUNC_NAME) (ARG1, ARG2), \ - EXPECTED, EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_2_f(ARG_STR, FUNC_NAME, ARG1, ARG2, EXPECTED, \ + EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG1, ARG2), \ + EXPECTED, EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_2_f(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1340,16 +1345,16 @@ struct test_fFF_11_data #define RUN_TEST_LOOP_fl_f RUN_TEST_LOOP_2_f #define RUN_TEST_if_f RUN_TEST_2_f #define RUN_TEST_LOOP_if_f RUN_TEST_LOOP_2_f -#define RUN_TEST_fff_f(ARG_STR, FUNC_NAME, ARG1, ARG2, ARG3, \ - EXPECTED, EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_float (test_name, FUNC (FUNC_NAME) (ARG1, ARG2, ARG3), \ - EXPECTED, EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_fff_f(ARG_STR, FUNC_NAME, ARG1, ARG2, ARG3, \ + EXPECTED, EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG1, ARG2, ARG3), \ + EXPECTED, EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_fff_f(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1359,17 +1364,17 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.expected, \ (ARRAY)[i].RM_##ROUNDING_MODE.exceptions); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_c_f(ARG_STR, FUNC_NAME, ARG1, ARG2, EXPECTED, \ - EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_float (test_name, \ - FUNC (FUNC_NAME) (BUILD_COMPLEX (ARG1, ARG2)), \ - EXPECTED, EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_c_f(ARG_STR, FUNC_NAME, ARG1, ARG2, EXPECTED, \ + EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_float (test_name, \ + FUNC_TEST (FUNC_NAME) (BUILD_COMPLEX (ARG1, ARG2)), \ + EXPECTED, EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_c_f(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1387,7 +1392,7 @@ struct test_fFF_11_data { \ COMMON_TEST_SETUP (ARG_STR); \ (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ - check_float (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG), EXPECTED, \ EXCEPTIONS); \ EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ if (EXTRA_TEST) \ @@ -1406,22 +1411,22 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.extra_test, \ (ARRAY)[i].RM_##ROUNDING_MODE.extra_expected); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_fF_f1(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ - EXCEPTIONS, EXTRA_VAR, EXTRA_TEST, \ - EXTRA_EXPECTED) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ - check_float (test_name, FUNC (FUNC_NAME) (ARG, &(EXTRA_VAR)), \ - EXPECTED, EXCEPTIONS); \ - EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ - if (EXTRA_TEST) \ - check_float (extra1_name, EXTRA_VAR, EXTRA_EXPECTED, 0); \ - EXTRA_OUTPUT_TEST_CLEANUP (1); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_fF_f1(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ + EXCEPTIONS, EXTRA_VAR, EXTRA_TEST, \ + EXTRA_EXPECTED) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG, &(EXTRA_VAR)), \ + EXPECTED, EXCEPTIONS); \ + EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ + if (EXTRA_TEST) \ + check_float (extra1_name, EXTRA_VAR, EXTRA_EXPECTED, 0); \ + EXTRA_OUTPUT_TEST_CLEANUP (1); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_fF_f1(FUNC_NAME, ARRAY, ROUNDING_MODE, EXTRA_VAR) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1433,22 +1438,22 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.extra_test, \ (ARRAY)[i].RM_##ROUNDING_MODE.extra_expected); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_fI_f1(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ - EXCEPTIONS, EXTRA_VAR, EXTRA_TEST, \ - EXTRA_EXPECTED) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ - check_float (test_name, FUNC (FUNC_NAME) (ARG, &(EXTRA_VAR)), \ - EXPECTED, EXCEPTIONS); \ - EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ - if (EXTRA_TEST) \ - check_int (extra1_name, EXTRA_VAR, EXTRA_EXPECTED, 0); \ - EXTRA_OUTPUT_TEST_CLEANUP (1); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_fI_f1(ARG_STR, FUNC_NAME, ARG, EXPECTED, \ + EXCEPTIONS, EXTRA_VAR, EXTRA_TEST, \ + EXTRA_EXPECTED) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ + check_float (test_name, FUNC_TEST (FUNC_NAME) (ARG, &(EXTRA_VAR)), \ + EXPECTED, EXCEPTIONS); \ + EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ + if (EXTRA_TEST) \ + check_int (extra1_name, EXTRA_VAR, EXTRA_EXPECTED, 0); \ + EXTRA_OUTPUT_TEST_CLEANUP (1); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_fI_f1(FUNC_NAME, ARRAY, ROUNDING_MODE, EXTRA_VAR) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1469,7 +1474,7 @@ struct test_fFF_11_data COMMON_TEST_SETUP (ARG_STR); \ (EXTRA_VAR) = (EXTRA_EXPECTED) == 0 ? 1 : 0; \ check_float (test_name, \ - FUNC (FUNC_NAME) (ARG1, ARG2, &(EXTRA_VAR)), \ + FUNC_TEST (FUNC_NAME) (ARG1, ARG2, &(EXTRA_VAR)), \ EXPECTED, EXCEPTIONS); \ EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ if (EXTRA_TEST) \ @@ -1490,17 +1495,17 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.extra_test, \ (ARRAY)[i].RM_##ROUNDING_MODE.extra_expected); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_c_c(ARG_STR, FUNC_NAME, ARGR, ARGC, EXPR, EXPC, \ - EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_complex (test_name, \ - FUNC (FUNC_NAME) (BUILD_COMPLEX (ARGR, ARGC)), \ - BUILD_COMPLEX (EXPR, EXPC), EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_c_c(ARG_STR, FUNC_NAME, ARGR, ARGC, EXPR, EXPC, \ + EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_complex (test_name, \ + FUNC_TEST (FUNC_NAME) (BUILD_COMPLEX (ARGR, ARGC)), \ + BUILD_COMPLEX (EXPR, EXPC), EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_c_c(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1511,18 +1516,18 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.expc, \ (ARRAY)[i].RM_##ROUNDING_MODE.exceptions); \ ROUND_RESTORE_ ## ROUNDING_MODE -#define RUN_TEST_cc_c(ARG_STR, FUNC_NAME, ARG1R, ARG1C, ARG2R, ARG2C, \ - EXPR, EXPC, EXCEPTIONS) \ - do \ - if (enable_test (EXCEPTIONS)) \ - { \ - COMMON_TEST_SETUP (ARG_STR); \ - check_complex (test_name, \ - FUNC (FUNC_NAME) (BUILD_COMPLEX (ARG1R, ARG1C), \ - BUILD_COMPLEX (ARG2R, ARG2C)), \ - BUILD_COMPLEX (EXPR, EXPC), EXCEPTIONS); \ - COMMON_TEST_CLEANUP; \ - } \ +#define RUN_TEST_cc_c(ARG_STR, FUNC_NAME, ARG1R, ARG1C, ARG2R, ARG2C, \ + EXPR, EXPC, EXCEPTIONS) \ + do \ + if (enable_test (EXCEPTIONS)) \ + { \ + COMMON_TEST_SETUP (ARG_STR); \ + check_complex (test_name, \ + FUNC_TEST (FUNC_NAME) (BUILD_COMPLEX (ARG1R, ARG1C), \ + BUILD_COMPLEX (ARG2R, ARG2C)), \ + BUILD_COMPLEX (EXPR, EXPC), EXCEPTIONS); \ + COMMON_TEST_CLEANUP; \ + } \ while (0) #define RUN_TEST_LOOP_cc_c(FUNC_NAME, ARRAY, ROUNDING_MODE) \ IF_ROUND_INIT_ ## ROUNDING_MODE \ @@ -1539,7 +1544,7 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - check_int (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ + check_int (test_name, FUNC_TEST (FUNC_NAME) (ARG), EXPECTED, \ EXCEPTIONS); \ COMMON_TEST_CLEANUP; \ } \ @@ -1592,7 +1597,7 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - check_bool (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ + check_bool (test_name, FUNC_TEST (FUNC_NAME) (ARG), EXPECTED, \ EXCEPTIONS); \ COMMON_TEST_CLEANUP; \ } \ @@ -1626,7 +1631,7 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - check_long (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ + check_long (test_name, FUNC_TEST (FUNC_NAME) (ARG), EXPECTED, \ EXCEPTIONS); \ COMMON_TEST_CLEANUP; \ } \ @@ -1643,8 +1648,8 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - check_longlong (test_name, FUNC (FUNC_NAME) (ARG), EXPECTED, \ - EXCEPTIONS); \ + check_longlong (test_name, FUNC_TEST (FUNC_NAME) (ARG), \ + EXPECTED, EXCEPTIONS); \ COMMON_TEST_CLEANUP; \ } \ while (0) @@ -1663,7 +1668,7 @@ struct test_fFF_11_data if (enable_test (EXCEPTIONS)) \ { \ COMMON_TEST_SETUP (ARG_STR); \ - FUNC (FUNC_NAME) (ARG, &(EXTRA1_VAR), &(EXTRA2_VAR)); \ + FUNC_TEST (FUNC_NAME) (ARG, &(EXTRA1_VAR), &(EXTRA2_VAR)); \ EXTRA_OUTPUT_TEST_SETUP (ARG_STR, 1); \ if (EXTRA1_TEST) \ check_float (extra1_name, EXTRA1_VAR, EXTRA1_EXPECTED, \ @@ -1690,9 +1695,31 @@ struct test_fFF_11_data (ARRAY)[i].RM_##ROUNDING_MODE.extra2_expected); \ ROUND_RESTORE_ ## ROUNDING_MODE +#ifndef INIT_ARCH_EXT +# define INIT_ARCH_EXT +# define CHECK_ARCH_EXT +#endif + +#ifndef VEC_PREFIX +# define VEC_PREFIX +#endif + +#ifndef FUNC_TEST +# define FUNC_TEST FUNC +#endif + +#include "libm-have-vector-test.h" + +#define STR_CONCAT(a,b,c) __STRING (a##b##c) +#define STR_CON3(a,b,c) STR_CONCAT (a,b,c) + +#define HAVE_VECTOR(func) __CONCAT (HAVE_VECTOR_,func) + /* Start and end the tests for a given function. */ -#define START(FUNC, EXACT) \ - const char *this_func = #FUNC; \ +#define START(FUN, SUFF, EXACT) \ + CHECK_ARCH_EXT \ + if (TEST_MATHVEC && !HAVE_VECTOR (FUNC (FUN))) return; \ + const char *this_func = STR_CON3 (VEC_PREFIX,FUN,SUFF); \ init_max_error (this_func, EXACT) #define END \ print_max_error (this_func) @@ -1705,28 +1732,28 @@ struct test_fFF_11_data { \ do \ { \ - START (FUNC, EXACT); \ + START (FUNC, , EXACT); \ LOOP_MACRO (FUNC, ARRAY, , ## __VA_ARGS__); \ END_MACRO; \ } \ while (0); \ do \ { \ - START (FUNC ## _downward, EXACT); \ + START (FUNC, _downward, EXACT); \ LOOP_MACRO (FUNC, ARRAY, FE_DOWNWARD, ## __VA_ARGS__); \ END_MACRO; \ } \ while (0); \ do \ { \ - START (FUNC ## _towardzero, EXACT); \ + START (FUNC, _towardzero, EXACT); \ LOOP_MACRO (FUNC, ARRAY, FE_TOWARDZERO, ## __VA_ARGS__); \ END_MACRO; \ } \ while (0); \ do \ { \ - START (FUNC ## _upward, EXACT); \ + START (FUNC, _upward, EXACT); \ LOOP_MACRO (FUNC, ARRAY, FE_UPWARD, ## __VA_ARGS__); \ END_MACRO; \ } \ @@ -6034,7 +6061,7 @@ static const struct test_c_c_data cexp_test_data[] = static void cexp_test (void) { - START (cexp, 0); + START (cexp, , 0); RUN_TEST_LOOP_c_c (cexp, cexp_test_data, ); END_COMPLEX; } @@ -7548,7 +7575,7 @@ static const struct test_if_f_data jn_test_data[] = static void jn_test (void) { - START (jn, 0); + START (jn, , 0); RUN_TEST_LOOP_if_f (jn, jn_test_data, ); END; } @@ -9374,7 +9401,7 @@ static const struct test_f_f_data tgamma_test_data[] = static void tgamma_test (void) { - START (tgamma, 0); + START (tgamma, , 0); RUN_TEST_LOOP_f_f (tgamma, tgamma_test_data, ); END; } @@ -9824,6 +9851,8 @@ main (int argc, char **argv) initialize (); printf (TEST_MSG); + INIT_ARCH_EXT + check_ulp (); /* Keep the tests a wee bit ordered (according to ISO C99). */ diff --git a/math/math.h b/math/math.h index dc532b7..8609c22 100644 --- a/math/math.h +++ b/math/math.h @@ -27,6 +27,9 @@ __BEGIN_DECLS +/* Get machine-dependent vector math functions declarations */ +#include + /* Get machine-dependent HUGE_VAL value (returned on overflow). On all IEEE754 machines, this is +Infinity. */ #include @@ -49,6 +52,12 @@ __BEGIN_DECLS so we can easily declare each function as both `name' and `__name', and can declare the float versions `namef' and `__namef'. */ +#define __SIMD_DECL(function) __CONCAT (__DECL_SIMD_,function) + +#define __MATHCALL_VEC(function,suffix, args) \ + __SIMD_DECL (__MATH_PRECNAME(function,suffix)) \ + __MATHCALL (function,suffix, args) + #define __MATHCALL(function,suffix, args) \ __MATHDECL (_Mdouble_,function,suffix, args) #define __MATHDECL(type, function,suffix, args) \ diff --git a/math/test-double-vlen2.h b/math/test-double-vlen2.h new file mode 100644 index 0000000..d5e92d1 --- /dev/null +++ b/math/test-double-vlen2.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define FLOAT double +#define FUNC(function) function +#define TEST_MSG "testing double vector math (without inline functions)\n" +#define MATHCONST(x) x +#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cdouble +#define PRINTF_EXPR "e" +#define PRINTF_XEXPR "a" +#define PRINTF_NEXPR "f" +#define TEST_DOUBLE 1 +#define TEST_MATHVEC 1 + +#ifndef __NO_MATH_INLINES +# define __NO_MATH_INLINES +#endif + +#define EXCEPTION_TESTS_double 0 +#define ROUNDING_TESTS_double(MODE) ((MODE) == FE_TONEAREST) + +#define VEC_PREFIX vlen2_ + +#define CONCAT(prefix,func) __CONCAT (prefix,func) + +#define WRAPPER_NAME(function) CONCAT (VEC_PREFIX,function) + +#define FUNC_TEST(function) VEC_PREFIX_ ## function diff --git a/math/test-double-vlen4.h b/math/test-double-vlen4.h new file mode 100644 index 0000000..f8fc66e --- /dev/null +++ b/math/test-double-vlen4.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define FLOAT double +#define FUNC(function) function +#define TEST_MSG "testing double vector math (without inline functions)\n" +#define MATHCONST(x) x +#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cdouble +#define PRINTF_EXPR "e" +#define PRINTF_XEXPR "a" +#define PRINTF_NEXPR "f" +#define TEST_DOUBLE 1 +#define TEST_MATHVEC 1 + +#ifndef __NO_MATH_INLINES +# define __NO_MATH_INLINES +#endif + +#define EXCEPTION_TESTS_double 0 +#define ROUNDING_TESTS_double(MODE) ((MODE) == FE_TONEAREST) + +#define CONCAT(prefix,func) __CONCAT (prefix,func) + +#define WRAPPER_NAME(function) CONCAT (VEC_PREFIX,function) + +#define FUNC_TEST(function) VEC_PREFIX_ ## function diff --git a/math/test-float-vlen8.h b/math/test-float-vlen8.h new file mode 100644 index 0000000..2984e0c --- /dev/null +++ b/math/test-float-vlen8.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define FLOAT float +#define FUNC(function) function ## f +#define TEST_MSG "testing float vector math (without inline functions)\n" +#define MATHCONST(x) x +#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cfloat +#define PRINTF_EXPR "e" +#define PRINTF_XEXPR "a" +#define PRINTF_NEXPR "f" +#define TEST_FLOAT 1 +#define TEST_MATHVEC 1 + +#ifndef __NO_MATH_INLINES +# define __NO_MATH_INLINES +#endif + +#define EXCEPTION_TESTS_float 0 +#define ROUNDING_TESTS_float(MODE) ((MODE) == FE_TONEAREST) + +#define VEC_PREFIX vlen8_ + +#define CONCAT(prefix,func) __CONCAT (prefix,func) + +#define WRAPPER_NAME(function) CONCAT (VEC_PREFIX,function) + +#define FUNC_TEST(function) VEC_PREFIX_ ## function ## f diff --git a/mathvec/Depend b/mathvec/Depend new file mode 100644 index 0000000..ede10ab --- /dev/null +++ b/mathvec/Depend @@ -0,0 +1 @@ +math diff --git a/mathvec/Makefile b/mathvec/Makefile new file mode 100644 index 0000000..26c552c --- /dev/null +++ b/mathvec/Makefile @@ -0,0 +1,35 @@ +# Copyright (C) 2014 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . + +# Makefile for the vector math library. + +subdir := mathvec + +include ../Makeconfig + +ifeq ($(build-mathvec),yes) +extra-libs := libmvec +extra-libs-others = $(extra-libs) + +libmvec-routines = $(strip $(libmvec-support)) + +$(objpfx)libmvec.so: $(libm) +endif + +# Rules for the test suite are in math directory + +include ../Rules diff --git a/shlib-versions b/shlib-versions index e05b248..fa3cf1d 100644 --- a/shlib-versions +++ b/shlib-versions @@ -71,3 +71,6 @@ libanl=1 # This defines the libgcc soname version this glibc is to load for # asynchronous cancellation to work correctly. libgcc_s=1 + +# The vector math library +libmvec=1 diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist new file mode 100644 index 0000000..b984207 --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -0,0 +1,5 @@ +GLIBC_2.21 + GLIBC_2.21 A + _ZGVbN2v_cos F + _ZGVcN4v_cos F + _ZGVdN4v_cos F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h new file mode 100644 index 0000000..0d71ce9 --- /dev/null +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -0,0 +1,50 @@ +/* Platform-specific SIMD declarations of math functions. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _MATH_H +# error "Never include directly; \ + include instead." +#endif + +/* Get default empty definitions for simd declarations */ +#include + +#if defined __x86_64__ && defined __FAST_MATH__ +# if defined _OPENMP && _OPENMP >= 201307 +/* OpenMP case. */ +/* TODO document about pragma meaning */ +# define __DECL_SIMD_AVX2 _Pragma ("omp declare simd notinbranch simdlen(4)") +# define __DECL_SIMD_SSE4 _Pragma ("omp declare simd notinbranch simdlen(8)") +# undef __DECL_SIMD_cos +# define __DECL_SIMD_cos __DECL_SIMD_AVX2 +# undef __DECL_SIMD_cosf +# define __DECL_SIMD_cosf __DECL_SIMD_SSE4 +# elif defined _CILKPLUS && _CILKPLUS >= 0 +/* CilkPlus case. */ +/* TODO _CILKPLUS currently nowhere defined, + * add reserved-namespace versions and __GNUC_PREREQ +# define __DECL_SIMD_AVX2 __attribute__ ((__vector__ (__vectorlength__(4),\ + __nomask__))) +# define __DECL_SIMD_SSE4 __attribute__ ((__vector__ (__vectorlength__(8),\ + __nomask__))) +# undef __DECL_SIMD_cos +# define __DECL_SIMD_cos __DECL_SIMD_AVX2 +# undef __DECL_SIMD_cosf +# define __DECL_SIMD_cosf __DECL_SIMD_SSE4 */ +# endif +#endif diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index 7d4dadd..685c036 100644 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -275,6 +275,8 @@ fi config_vars="$config_vars config-cflags-avx2 = $libc_cv_cc_avx2" +build_mathvec=yes + $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h # work around problem with autoconf and empty lines at the end of files diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index c9f9a51..e9eceb1 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -99,6 +99,9 @@ if test $libc_cv_cc_avx2 = yes; then fi LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2]) +dnl Set build_mathvec +build_mathvec=yes + dnl It is always possible to access static and hidden symbols in an dnl position independent way. AC_DEFINE(PI_STATIC_AND_HIDDEN) diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile new file mode 100644 index 0000000..a994c73 --- /dev/null +++ b/sysdeps/x86_64/fpu/Makefile @@ -0,0 +1,22 @@ +ifeq ($(subdir),mathvec) +libmvec-support += svml_d_cos2_core svml_d_cos4_core_avx \ + svml_d_cos4_core_avx2 svml_d_cos_data +endif + +# Rules for libmvec tests +ifeq ($(subdir),math) +ifeq ($(build-mathvec),yes) +libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 + +arch-ext-cflags = -mavx + +$(objpfx)test-double-vlen4-avx2.o: $(objpfx)libm-test.stmp + +$(objpfx)test-double-vlen4-avx2: $(common-objpfx)mathvec/libmvec.so \ + $(objpfx)init-arch.o + +CFLAGS-test-double-vlen4-avx2.c = -fno-inline -ffloat-store -fno-builtin -frounding-math \ + -D__FAST_MATH__ -DTEST_FAST_MATH -D_OPENMP=201307 \ + -Wno-unknown-pragmas -mavx2 +endif +endif diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions new file mode 100644 index 0000000..c18d985 --- /dev/null +++ b/sysdeps/x86_64/fpu/Versions @@ -0,0 +1,7 @@ +libmvec { + GLIBC_2.21 { + _ZGVbN2v_cos; + _ZGVcN4v_cos; + _ZGVdN4v_cos; + } +} diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 36e1b76..b5c88d4 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1961,6 +1961,15 @@ ifloat: 3 ildouble: 4 ldouble: 4 +Function: "vlen2_cos": +double: 1 + +Function: "vlen4_avx2_cos": +double: 1 + +Function: "vlen4_avx_cos": +double: 1 + Function: "y0": double: 2 float: 1 diff --git a/sysdeps/x86_64/fpu/math-tests.h b/sysdeps/x86_64/fpu/math-tests.h new file mode 100644 index 0000000..466b97b --- /dev/null +++ b/sysdeps/x86_64/fpu/math-tests.h @@ -0,0 +1,34 @@ +/* Configuration for math tests. x86_64 version. + Copyright (C) 2013-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef REQUIRE_AVX2 +# include + + static int avx2_usable; /* Set to 1 if AVX2 supported */ + +# define INIT_ARCH_EXT \ + __init_cpu_features (); \ + avx2_usable = __cpu_features.feature[index_AVX2_Usable] \ + & bit_AVX2_Usable; + +# define CHECK_ARCH_EXT \ + if (!avx2_usable) return; + +#endif + +#include_next diff --git a/sysdeps/x86_64/fpu/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/svml_d_cos2_core.S new file mode 100644 index 0000000..47288c2 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_cos2_core.S @@ -0,0 +1,210 @@ +/* Function cos vectorized with SSE4. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define _DATA_TABLE_OFFSETS_ONLY_ +#include "svml_d_cos_data.S" + + .text +ENTRY(_ZGVbN2v_cos) + +/* ALGORITHM DESCRIPTION: + * + * ( low accuracy ( < 4ulp ) or enhanced performance ( half of correct mantissa ) implementation ) + * + * Argument representation: + * arg + Pi/2 = (N*Pi + R) + * + * Result calculation: + * cos(arg) = sin(arg+Pi/2) = sin(N*Pi + R) = (-1)^N * sin(R) + * sin(R) is approximated by corresponding polynomial + */ + pushq %rbp + movq %rsp, %rbp + andq $-64, %rsp + subq $320, %rsp + movaps %xmm0, %xmm3 + movq __svml_dcos_data@GOTPCREL(%rip), %rax + movups __dHalfPI(%rax), %xmm2 + +/* ARGUMENT RANGE REDUCTION: + * Add Pi/2 to argument: X' = X+Pi/2 + */ + addpd %xmm3, %xmm2 + movups __dInvPI(%rax), %xmm5 + movups __dAbsMask(%rax), %xmm4 + +/* Get absolute argument value: X' = |X'| */ + andps %xmm2, %xmm4 + +/* Y = X'*InvPi + RS : right shifter add */ + mulpd %xmm5, %xmm2 + +/* Check for large arguments path */ + cmpnlepd __dRangeVal(%rax), %xmm4 + movups __dRShifter(%rax), %xmm6 + addpd %xmm6, %xmm2 + movmskpd %xmm4, %ecx + +/* N = Y - RS : right shifter sub */ + movaps %xmm2, %xmm1 + +/* SignRes = Y<<63 : shift LSB to MSB place for result sign */ + psllq $63, %xmm2 + subpd %xmm6, %xmm1 + +/* N = N - 0.5 */ + subpd __dOneHalf(%rax), %xmm1 + movups __dPI1(%rax), %xmm7 + +/* R = X - N*Pi1 */ + mulpd %xmm1, %xmm7 + movups __dPI2(%rax), %xmm4 + +/* R = R - N*Pi2 */ + mulpd %xmm1, %xmm4 + subpd %xmm7, %xmm0 + movups __dPI3(%rax), %xmm5 + +/* R = R - N*Pi3 */ + mulpd %xmm1, %xmm5 + subpd %xmm4, %xmm0 + +/* R = R - N*Pi4 */ + movups __dPI4(%rax), %xmm6 + mulpd %xmm6, %xmm1 + subpd %xmm5, %xmm0 + subpd %xmm1, %xmm0 + +/* POLYNOMIAL APPROXIMATION: + * R2 = R*R + */ + movaps %xmm0, %xmm4 + mulpd %xmm0, %xmm4 + movups __dC7(%rax), %xmm1 + mulpd %xmm4, %xmm1 + addpd __dC6(%rax), %xmm1 + mulpd %xmm4, %xmm1 + addpd __dC5(%rax), %xmm1 + mulpd %xmm4, %xmm1 + addpd __dC4(%rax), %xmm1 + +/* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ + mulpd %xmm4, %xmm1 + addpd __dC3(%rax), %xmm1 + +/* Poly = R+R*(R2*(C1+R2*(C2+R2*Poly))) */ + mulpd %xmm4, %xmm1 + addpd __dC2(%rax), %xmm1 + mulpd %xmm4, %xmm1 + addpd __dC1(%rax), %xmm1 + mulpd %xmm1, %xmm4 + mulpd %xmm0, %xmm4 + addpd %xmm4, %xmm0 + +/* RECONSTRUCTION: + * Final sign setting: Res = Poly^SignRes + */ + xorps %xmm2, %xmm0 + testl %ecx, %ecx + jne .LBL_1_3 + +.LBL_1_2: + movq %rbp, %rsp + popq %rbp + ret + +.LBL_1_3: + movups %xmm3, 192(%rsp) + movups %xmm0, 256(%rsp) + je .LBL_1_2 + + xorb %dl, %dl + xorl %eax, %eax + movups %xmm8, 112(%rsp) + movups %xmm9, 96(%rsp) + movups %xmm10, 80(%rsp) + movups %xmm11, 64(%rsp) + movups %xmm12, 48(%rsp) + movups %xmm13, 32(%rsp) + movups %xmm14, 16(%rsp) + movups %xmm15, (%rsp) + movq %rsi, 136(%rsp) + movq %rdi, 128(%rsp) + movq %r12, 168(%rsp) + movb %dl, %r12b + movq %r13, 160(%rsp) + movl %ecx, %r13d + movq %r14, 152(%rsp) + movl %eax, %r14d + movq %r15, 144(%rsp) + +.LBL_1_6: + btl %r14d, %r13d + jc .LBL_1_12 + +.LBL_1_7: + lea 1(%r14), %esi + btl %esi, %r13d + jc .LBL_1_10 + +.LBL_1_8: + incb %r12b + addl $2, %r14d + cmpb $16, %r12b + jb .LBL_1_6 + + movups 112(%rsp), %xmm8 + movups 96(%rsp), %xmm9 + movups 80(%rsp), %xmm10 + movups 64(%rsp), %xmm11 + movups 48(%rsp), %xmm12 + movups 32(%rsp), %xmm13 + movups 16(%rsp), %xmm14 + movups (%rsp), %xmm15 + movq 136(%rsp), %rsi + movq 128(%rsp), %rdi + movq 168(%rsp), %r12 + movq 160(%rsp), %r13 + movq 152(%rsp), %r14 + movq 144(%rsp), %r15 + movups 256(%rsp), %xmm0 + jmp .LBL_1_2 + +.LBL_1_10: + movzbl %r12b, %r15d + shlq $4, %r15 + movsd 200(%rsp,%r15), %xmm0 + + call cos@PLT + + movsd %xmm0, 264(%rsp,%r15) + jmp .LBL_1_8 + +.LBL_1_12: + movzbl %r12b, %r15d + shlq $4, %r15 + movsd 192(%rsp,%r15), %xmm0 + + call cos@PLT + + movsd %xmm0, 256(%rsp,%r15) + jmp .LBL_1_7 + +END(_ZGVbN2v_cos) diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S new file mode 100644 index 0000000..24b4f75 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S @@ -0,0 +1,39 @@ +/* Function cos vectorized in AVX ISA as wrapper to SSE4 ISA version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + + .text +ENTRY(_ZGVcN4v_cos) + pushq %rbp + movq %rsp, %rbp + andq $-32, %rsp + subq $32, %rsp + vextractf128 $1, %ymm0, (%rsp) + vzeroupper + call _ZGVbN2v_cos@PLT + vmovapd %xmm0, 16(%rsp) + vmovaps (%rsp), %xmm0 + call _ZGVbN2v_cos@PLT + vmovapd %xmm0, %xmm1 + vmovapd 16(%rsp), %xmm0 + vinsertf128 $1, %xmm1, %ymm0, %ymm0 + movq %rbp, %rsp + popq %rbp + ret +END(_ZGVcN4v_cos) diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx2.S new file mode 100644 index 0000000..95db6b3 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx2.S @@ -0,0 +1,195 @@ +/* Function cos vectorized with AVX2. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define _DATA_TABLE_OFFSETS_ONLY_ +#include "svml_d_cos_data.S" + + .text +ENTRY(_ZGVdN4v_cos) + +/* ALGORITHM DESCRIPTION: + * + * ( low accuracy ( < 4ulp ) or enhanced performance + * ( half of correct mantissa ) implementation ) + * + * Argument representation: + * arg + Pi/2 = (N*Pi + R) + * + * Result calculation: + * cos(arg) = sin(arg+Pi/2) = sin(N*Pi + R) = (-1)^N * sin(R) + * sin(R) is approximated by corresponding polynomial + */ + pushq %rbp + movq %rsp, %rbp + andq $-64, %rsp + subq $448, %rsp + movq __svml_dcos_data@GOTPCREL(%rip), %rax + vmovapd %ymm0, %ymm1 + vmovupd __dInvPI(%rax), %ymm4 + vmovupd __dRShifter(%rax), %ymm5 + +/* + * ARGUMENT RANGE REDUCTION: + * Add Pi/2 to argument: X' = X+Pi/2 + */ + vaddpd __dHalfPI(%rax), %ymm1, %ymm7 + +/* Get absolute argument value: X' = |X'| */ + vandpd __dAbsMask(%rax), %ymm7, %ymm2 + +/* Y = X'*InvPi + RS : right shifter add */ + vfmadd213pd %ymm5, %ymm4, %ymm7 + vmovupd __dC7(%rax), %ymm4 + +/* Check for large arguments path */ + vcmpnle_uqpd __dRangeVal(%rax), %ymm2, %ymm3 + +/* N = Y - RS : right shifter sub */ + vsubpd %ymm5, %ymm7, %ymm6 + vmovupd __dPI1_FMA(%rax), %ymm2 + +/* SignRes = Y<<63 : shift LSB to MSB place for result sign */ + vpsllq $63, %ymm7, %ymm7 + +/* N = N - 0.5 */ + vsubpd __dOneHalf(%rax), %ymm6, %ymm0 + vmovmskpd %ymm3, %ecx + +/* R = X - N*Pi1 */ + vmovapd %ymm1, %ymm3 + vfnmadd231pd %ymm0, %ymm2, %ymm3 + +/* R = R - N*Pi2 */ + vfnmadd231pd __dPI2_FMA(%rax), %ymm0, %ymm3 + +/* R = R - N*Pi3 */ + vfnmadd132pd __dPI3_FMA(%rax), %ymm3, %ymm0 + +/* + * POLYNOMIAL APPROXIMATION: + * R2 = R*R + */ + vmulpd %ymm0, %ymm0, %ymm5 + vfmadd213pd __dC6(%rax), %ymm5, %ymm4 + vfmadd213pd __dC5(%rax), %ymm5, %ymm4 + vfmadd213pd __dC4(%rax), %ymm5, %ymm4 + +/* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ + vfmadd213pd __dC3(%rax), %ymm5, %ymm4 + +/* Poly = R+R*(R2*(C1+R2*(C2+R2*Poly))) */ + vfmadd213pd __dC2(%rax), %ymm5, %ymm4 + vfmadd213pd __dC1(%rax), %ymm5, %ymm4 + vmulpd %ymm5, %ymm4, %ymm6 + vfmadd213pd %ymm0, %ymm0, %ymm6 + +/* + * RECONSTRUCTION: + * Final sign setting: Res = Poly^SignRes + */ + vxorpd %ymm7, %ymm6, %ymm0 + testl %ecx, %ecx + jne .LBL_1_3 + +.LBL_1_2: + movq %rbp, %rsp + popq %rbp + ret + +.LBL_1_3: + vmovupd %ymm1, 320(%rsp) + vmovupd %ymm0, 384(%rsp) + je .LBL_1_2 + + xorb %dl, %dl + xorl %eax, %eax + vmovups %ymm8, 224(%rsp) + vmovups %ymm9, 192(%rsp) + vmovups %ymm10, 160(%rsp) + vmovups %ymm11, 128(%rsp) + vmovups %ymm12, 96(%rsp) + vmovups %ymm13, 64(%rsp) + vmovups %ymm14, 32(%rsp) + vmovups %ymm15, (%rsp) + movq %rsi, 264(%rsp) + movq %rdi, 256(%rsp) + movq %r12, 296(%rsp) + movb %dl, %r12b + movq %r13, 288(%rsp) + movl %ecx, %r13d + movq %r14, 280(%rsp) + movl %eax, %r14d + movq %r15, 272(%rsp) + +.LBL_1_6: + btl %r14d, %r13d + jc .LBL_1_12 + +.LBL_1_7: + lea 1(%r14), %esi + btl %esi, %r13d + jc .LBL_1_10 + +.LBL_1_8: + incb %r12b + addl $2, %r14d + cmpb $16, %r12b + jb .LBL_1_6 + + vmovups 224(%rsp), %ymm8 + vmovups 192(%rsp), %ymm9 + vmovups 160(%rsp), %ymm10 + vmovups 128(%rsp), %ymm11 + vmovups 96(%rsp), %ymm12 + vmovups 64(%rsp), %ymm13 + vmovups 32(%rsp), %ymm14 + vmovups (%rsp), %ymm15 + vmovupd 384(%rsp), %ymm0 + movq 264(%rsp), %rsi + movq 256(%rsp), %rdi + movq 296(%rsp), %r12 + movq 288(%rsp), %r13 + movq 280(%rsp), %r14 + movq 272(%rsp), %r15 + jmp .LBL_1_2 + +.LBL_1_10: + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 328(%rsp,%r15), %xmm0 + vzeroupper + + call cos@PLT + + vmovsd %xmm0, 392(%rsp,%r15) + jmp .LBL_1_8 + +.LBL_1_12: + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 320(%rsp,%r15), %xmm0 + vzeroupper + + call cos@PLT + + vmovsd %xmm0, 384(%rsp,%r15) + jmp .LBL_1_7 + +END(_ZGVdN4v_cos) diff --git a/sysdeps/x86_64/fpu/svml_d_cos_data.S b/sysdeps/x86_64/fpu/svml_d_cos_data.S new file mode 100644 index 0000000..4e9f36b --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_cos_data.S @@ -0,0 +1,147 @@ +/* Data for vectorized cos. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef D_COS_DATA +#define D_COS_DATA + +/* Offsets for data table + */ +#define __dAbsMask 0 +#define __dRangeVal 64 +#define __dHalfPI 128 +#define __dInvPI 192 +#define __dRShifter 256 +#define __dOneHalf 320 +#define __dPI1 384 +#define __dPI2 448 +#define __dPI3 512 +#define __dPI4 576 +#define __dPI1_FMA 640 +#define __dPI2_FMA 704 +#define __dPI3_FMA 768 +#define __dC1 832 +#define __dC2 896 +#define __dC3 960 +#define __dC4 1024 +#define __dC5 1088 +#define __dC6 1152 +#define __dC7 1216 +#define __dAbsMask_la 1280 +#define __dInvPI_la 1344 +#define __dRShifter_la 1408 +#define __dRShifterm5_la 1472 +#define __dRXmax_la 1536 + +#ifndef _DATA_TABLE_OFFSETS_ONLY_ + +.macro double_vector offset value +.if .-__svml_dcos_data != \offset +.err +.endif +.rept 8 +.quad \value +.endr +.endm + + .section .rodata, "a" + .align 64 + +/* Data table for vector implementations of function cos. + * The table may contain polynomial, reduction, lookup + * coefficients and other constants obtained through different + * methods of research and experimental work. + */ + .globl __svml_dcos_data +__svml_dcos_data: + +/* General purpose constants: + * absolute value mask + */ +double_vector __dAbsMask 0x7fffffffffffffff + +/* working range threshold */ +double_vector __dRangeVal 0x4160000000000000 + +/* PI/2 */ +double_vector __dHalfPI 0x3ff921fb54442d18 + +/* 1/PI */ +double_vector __dInvPI 0x3fd45f306dc9c883 + +/* right-shifter constant */ +double_vector __dRShifter 0x4338000000000000 + +/* 0.5 */ +double_vector __dOneHalf 0x3fe0000000000000 + +/* Range reduction PI-based constants: + * PI high part + */ +double_vector __dPI1 0x400921fb40000000 + +/* PI mid part 1 */ +double_vector __dPI2 0x3e84442d00000000 + +/* PI mid part 2 */ +double_vector __dPI3 0x3d08469880000000 + +/* PI low part */ +double_vector __dPI4 0x3b88cc51701b839a + +/* Range reduction PI-based constants if FMA available: + * PI high part (FMA available) + */ +double_vector __dPI1_FMA 0x400921fb54442d18 + +/* PI mid part (FMA available) */ +double_vector __dPI2_FMA 0x3ca1a62633145c06 + +/* PI low part (FMA available) */ +double_vector __dPI3_FMA 0x395c1cd129024e09 + +/* Polynomial coefficients (relative error 2^(-52.115)): */ +double_vector __dC1 0xbfc55555555554a7 +double_vector __dC2 0x3f8111111110a4a8 +double_vector __dC3 0xbf2a01a019a5b86d +double_vector __dC4 0x3ec71de38030fea0 +double_vector __dC5 0xbe5ae63546002231 +double_vector __dC6 0x3de60e6857a2f220 +double_vector __dC7 0xbd69f0d60811aac8 + +/* + * Additional constants: + * absolute value mask + */ +double_vector __dAbsMask_la 0x7fffffffffffffff + +/* 1/PI */ +double_vector __dInvPI_la 0x3fd45f306dc9c883 + +/* right-shifer for low accuracy version */ +double_vector __dRShifter_la 0x4330000000000000 + +/* right-shifer-1.0 for low accuracy version */ +double_vector __dRShifterm5_la 0x432fffffffffffff + +/* right-shifer with low mask for low accuracy version */ +double_vector __dRXmax_la 0x43300000007ffffe + + .type __svml_dcos_data,@object + .size __svml_dcos_data,.-__svml_dcos_data +#endif +#endif diff --git a/sysdeps/x86_64/fpu/test-double-vlen2.c b/sysdeps/x86_64/fpu/test-double-vlen2.c new file mode 100755 index 0000000..674c5de --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-vlen2.c @@ -0,0 +1,44 @@ +/* Tests for SSE4 ISA versions of vector math functions. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "test-double-vlen2.h" +#include + +// Wrapper from scalar to vector function implemented in SSE4. +#define VECTOR_WRAPPER(scalar_func,vector_func) \ +extern __m128d vector_func ( __m128d);\ +FLOAT scalar_func (FLOAT x)\ +{\ + int i;\ + __m128d mx = _mm_set1_pd (x);\ + __m128d mr = vector_func (mx);\ + for (i=1;i<2;i++)\ + {\ + if (((FLOAT*)&mr)[0]!=((FLOAT*)&mr)[i])\ + {\ + return ((FLOAT*)&mr)[0]+0.1;\ + }\ + }\ + return ((FLOAT*)&mr)[0];\ +} + +VECTOR_WRAPPER (WRAPPER_NAME (cos),_ZGVbN2v_cos) + +#define TEST_VECTOR_cos 1 + +#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c new file mode 100644 index 0000000..15b7930 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c @@ -0,0 +1,48 @@ +/* Tests for AVX2 ISA versions of vector math functions. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "test-double-vlen4.h" +#include + +// Wrapper from scalar to vector function implemented in AVX2. +#define VECTOR_WRAPPER(scalar_func,vector_func) \ +extern __m256d vector_func (__m256d);\ +FLOAT scalar_func (FLOAT x)\ +{\ + int i;\ + __m256d mx = _mm256_set1_pd (x);\ + __m256d mr = vector_func (mx);\ + for (i=1;i<4;i++)\ + {\ + if (((FLOAT*)&mr)[0]!=((FLOAT*)&mr)[i])\ + {\ + return ((FLOAT*)&mr)[0]+0.1;\ + }\ + }\ + return ((FLOAT*)&mr)[0];\ +} + +#define VEC_PREFIX vlen4_avx2_ + +VECTOR_WRAPPER (WRAPPER_NAME (cos),_ZGVdN4v_cos) + +#define TEST_VECTOR_cos 1 + +#define REQUIRE_AVX2 + +#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.c new file mode 100644 index 0000000..5f68af5 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-vlen4.c @@ -0,0 +1,46 @@ +/* Tests for AVX ISA versions of vector math functions. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "test-double-vlen4.h" +#include + +// Wrapper from scalar to vector function implemented in AVX. +#define VECTOR_WRAPPER(scalar_func,vector_func) \ +extern __m256d vector_func (__m256d);\ +FLOAT scalar_func (FLOAT x)\ +{\ + int i;\ + __m256d mx = _mm256_set1_pd (x);\ + __m256d mr = vector_func (mx);\ + for (i=1;i<4;i++)\ + {\ + if (((FLOAT*)&mr)[0]!=((FLOAT*)&mr)[i])\ + {\ + return ((FLOAT*)&mr)[0]+0.1;\ + }\ + }\ + return ((FLOAT*)&mr)[0];\ +} + +#define VEC_PREFIX vlen4_avx_ + +VECTOR_WRAPPER (WRAPPER_NAME (cos),_ZGVcN4v_cos) + +#define TEST_VECTOR_cos 1 + +#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-float-vlen8.c b/sysdeps/x86_64/fpu/test-float-vlen8.c new file mode 100644 index 0000000..fdb3b5f --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-vlen8.c @@ -0,0 +1,45 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "test-float-vlen8.h" + +#define VECTOR_WRAPPER(scalar_func,vector_func) \ +/*extern __m256 vector_func (__m256);*/\ +FLOAT scalar_func (FLOAT x)\ +{\ + int i;\ + __m256 mx = _mm256_set1_ps (x);\ + __m256 mr = mx; /*vector_func (mx);*/\ + for (i=1;i<8;i++)\ + {\ + if (((FLOAT*)&mr)[0]!=((FLOAT*)&mr)[i])\ + {\ + return ((FLOAT*)&mr)[0]+0.1;\ + }\ + }\ + return ((FLOAT*)&mr)[0];\ +} + +#include + +VECTOR_WRAPPER (WRAPPER_NAME (cosf),_ZGVdN8v_cosf) + +#define TEST_VECTOR_cosf 0 + +#define REQUIRE_AVX2 + +#include "libm-test.c"