[PATCH 2/2] Enable libmvec support for AArch64
Szabolcs Nagy
szabolcs.nagy@arm.com
Thu Apr 6 16:26:13 GMT 2023
The 03/24/2023 12:10, Joe Ramsay via Libc-alpha wrote:
> The proposed change is mainly implementing build infrastructure to add
> the new routines to ABI, tests and benchmarks. I have demonstrated how
> this all fits together by adding implementations for vector cos, in
> both single and double precision, targeting both Advanced SIMD and
> SVE.
>
> The implementations of the routines themselves are just loops over the
> scalar routine from libm for now, as we are more concerned with
> getting the plumbing right at this point. We plan to contribute vector
> routines from the Arm Optimized Routines repo that are compliant with
> requirements described in the libmvec wiki.
>
> Building libmvec requires minimum GCC 10 for SVE ACLE. To avoid raising
> the minimum GCC by such a big jump, we allow users to disable libmvec
> if their compiler is too old.
>
> Note that at this point users have to manually call the vector math
> functions. This seems to be acceptable to some downstream users.
>
> Thanks,
> Joe
looks mostly ok, some fixes are needed:
> rename from sysdeps/x86_64/fpu/bench-libmvec-skeleton.c
> rename to benchtests/bench-libmvec-skeleton.c
this has some x86 specific code (cpu feature tests) so i think
we should probably create a copy or factor out the cpu tests
e.g. like
#include "bench-libmvec-arch.h"
...
#ifdef INIT_ARCH
INIT_ARCH
#endif
bench_start ();
and in bench-libmvec-arch.h
#define INIT_ARCH do{if (!supported()) return 77;}while(0)
static bool supported () {...}
but if this becomes to hairy change then just copy.
> --- a/sysdeps/aarch64/configure.ac
> +++ b/sysdeps/aarch64/configure.ac
> @@ -101,3 +101,35 @@ rm -f conftest*])
> if test $libc_cv_aarch64_sve_asm = yes; then
> AC_DEFINE(HAVE_AARCH64_SVE_ASM)
> fi
> +
> +if test x"$build_mathvec" = xnotset; then
> + build_mathvec=yes
> +fi
> +
> +# Check if compiler is sufficient to build mathvec (needs SVE ACLE)
> +AC_CACHE_CHECK(for availability of SVE ACLE, libc_cv_has_sve_acle, [dnl
> + if test $build_mathvec = yes; then
> + cat > conftest.c <<EOF
> +#include <arm_sve.h>
> +EOF
> + if ! ${CC-cc} conftest.c -fsyntax-only; then
> + as_fn_error 1 "mathvec is enabled but compiler does not have SVE ACLE. Either use a compatible compiler or disable mathvec (this results in incomplete ABI)."
please include the config option in the error message, e.g.
s/disable mathvec/configure with --disable-mathvec/
> + fi
> + rm conftest.c
> + fi])
> +
> +# Check if the local system can run SVE binary
> +AC_CACHE_CHECK(for local SVE hardware, libc_cv_can_run_sve, [dnl
> + cat > conftest.c <<EOF
> +#include <sys/auxv.h>
> +int main(void) {
> + if (! (getauxval (AT_HWCAP) & HWCAP_SVE))
> + return 1;
> + return 0;
> +}
> +EOF
> + libc_cv_can_run_sve=yes
> + ${CC-cc} conftest.c -o conftest
> + ./conftest || libc_cv_can_run_sve=no
> + rm -f conftest*])
> +LIBC_CONFIG_VAR([aarch64-can-run-sve], [$libc_cv_can_run_sve])
in case of cross build we should not try running a target exe.
and i think benchmarks can be run in cross environment when the
test-wrapper make variable is specified, so configure time test
does not help. i.e. update bench-libmvec-skeleton.c with runtime
hwcap check.
> --- /dev/null
> +++ b/sysdeps/aarch64/fpu/Makefile
> @@ -0,0 +1,66 @@
> +float-advsimd-funcs = cos
> +
> +double-advsimd-funcs = cos
> +
> +float-sve-funcs = cos
> +
> +double-sve-funcs = cos
> +
> +ifeq ($(subdir),mathvec)
> +libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
> + $(addsuffix _advsimd,$(double-advsimd-funcs)) \
> + $(addsuffix f_sve,$(float-sve-funcs)) \
> + $(addsuffix _sve,$(double-sve-funcs))
> +endif
> +
> +sve-cflags = -march=armv8-a+sve
> +
> +
> +ifeq ($(build-mathvec),yes)
> +bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \
> + $(addprefix double-advsimd-,$(double-advsimd-funcs))
> +
> +# If not on an SVE-enabled machine, do not add SVE routines to benchmarks.
> +# The routines are still built.
> +ifeq ($(aarch64-can-run-sve),yes)
run the benchmarks just exit early if no sve.
> + bench-libmvec += $(addprefix float-sve-,$(float-sve-funcs)) \
> + $(addprefix double-sve-,$(double-sve-funcs))
> +endif
> +endif
> +
> +$(objpfx)bench-float-advsimd-%.c:
> + $(PYTHON) $(..)sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py $(basename $(@F)) > $@
> +$(objpfx)bench-double-advsimd-%.c:
> + $(PYTHON) $(..)sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py $(basename $(@F)) > $@
> +$(objpfx)bench-float-sve-%.c:
> + $(PYTHON) $(..)sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py $(basename $(@F)) > $@
> +$(objpfx)bench-double-sve-%.c:
> + $(PYTHON) $(..)sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py $(basename $(@F)) > $@
> +
> +ifeq (${STATIC-BENCHTESTS},yes)
> +libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a
> +else
> +libmvec-benchtests = $(libmvec) $(libm)
> +endif
> +
> +$(addprefix $(objpfx)bench-,$(bench-libmvec)): $(libmvec-benchtests)
> +
> +ifeq ($(build-mathvec),yes)
> +libmvec-tests += float-advsimd double-advsimd float-sve double-sve
> +endif
> +
> +define sve-float-cflags-template
> +CFLAGS-$(1)f_sve.c += $(sve-cflags)
> +CFLAGS-bench-float-sve-$(1).c += $(sve-cflags)
> +endef
> +
> +define sve-double-cflags-template
> +CFLAGS-$(1)_sve.c += $(sve-cflags)
> +CFLAGS-bench-double-sve-$(1).c += $(sve-cflags)
> +endef
> +
> +$(foreach f,$(float-sve-funcs), $(eval $(call sve-float-cflags-template,$(f))))
> +$(foreach f,$(double-sve-funcs), $(eval $(call sve-double-cflags-template,$(f))))
> +
> +CFLAGS-test-float-sve-wrappers.c = $(sve-cflags)
> +CFLAGS-test-double-sve-wrappers.c = $(sve-cflags)
> --- /dev/null
> +++ b/sysdeps/aarch64/fpu/bits/math-vector.h
> @@ -0,0 +1,64 @@
> +/* Platform-specific SIMD declarations of math functions.
> +
> + Copyright (C) 2023 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _MATH_H
> +# error "Never include <bits/math-vector.h> directly;\
> + include <math.h> instead."
> +#endif
> +
> +/* Get default empty definitions for simd declarations. */
> +#include <bits/libm-simd-decl-stubs.h>
> +
> +#if __GNUC_PREREQ (9, 0)
> +# define __ADVSIMD_VEC_MATH_SUPPORTED
> +typedef __Float32x4_t __f32x4_t;
> +typedef __Float64x2_t __f64x2_t;
> +#elif __clang_major__ >= 8
replace with __glibc_clang_prereq (8, 0)
gcc-8 fails to build glibc otherwise (since -Wundef -Werror).
> +# define __ADVSIMD_VEC_MATH_SUPPORTED
> +typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
> +typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
> +#endif
> +
> +#if __GNUC_PREREQ (10, 0) || __clang_major >= 11
likewise, use __glibc_clang_prereq (11, 0)
> +# define __SVE_VEC_MATH_SUPPORTED
> +typedef __SVFloat32_t __sv_f32_t;
> +typedef __SVFloat64_t __sv_f64_t;
> +typedef __SVBool_t __sv_bool_t;
> +#endif
> +
> +/* If vector types and vector PCS are unsupported in the working
> + compiler, no choice but to omit vector math declarations. */
> +
> +#ifdef __ADVSIMD_VEC_MATH_SUPPORTED
> +
> +# define __vpcs __attribute__((__aarch64_vector_pcs__))
> +
> +__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
> +__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
> +
> +#undef __ADVSIMD_VEC_MATH_SUPPORTED
> +#endif /* __ADVSIMD_VEC_MATH_SUPPORTED */
> +
> +#ifdef __SVE_VEC_MATH_SUPPORTED
> +
> +__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
> +__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
> +
> +#undef __SVE_VEC_MATH_SUPPORTED
> +#endif /* __SVE_VEC_MATH_SUPPORTED */
> --- /dev/null
> +++ b/sysdeps/aarch64/fpu/math-tests-arch.h
> @@ -0,0 +1,33 @@
> +/* Runtime architecture check for math tests. AArch64 version.
> +
> + Copyright (C) 2023 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifdef REQUIRE_SVE
> +# include <sys/auxv.h>
> +
> +# define INIT_ARCH_EXT
> +# define CHECK_ARCH_EXT \
> + do \
> + { \
> + if (!(getauxval (AT_HWCAP) & HWCAP_SVE)) return; \
> + } \
> + while (0)
> +
> +#else
> +# include <sysdeps/generic/math-tests-arch.h>
> +#endif
OK.
the bench code might need something similar
maybe even share the code.
> --- a/sysdeps/x86_64/fpu/Makefile
> +++ b/sysdeps/x86_64/fpu/Makefile
> @@ -94,7 +94,7 @@ endif
>
> $(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests)
> $(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests)
> -bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile
> +bench-libmvec-deps = $(..)benchtests/bench-libmvec-skeleton.c bench-timing.h Makefile
>
> $(objpfx)bench-float-%.c: $(bench-libmvec-deps)
> { if [ -n "$($*-INCLUDE)" ]; then \
this might need to change.
More information about the Libc-alpha
mailing list