This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] [ARM] ] Add support for fenv_private on ARM
- From: "Wilco" <wdijkstr at arm dot com>
- To: <libc-alpha at sourceware dot org>
- Date: Thu, 6 Mar 2014 14:36:42 -0000
- Subject: [PATCH] [ARM] ] Add support for fenv_private on ARM
- Authentication-results: sourceware.org; auth=none
Hi,
This patch improves performance of common math functions by avoiding unnecessary
writes to FPSCR. Add fenv_private.h with faster inline variants of fenv functions
which avoid no-change writes to FPSCR. The number of FPSCR reads/writes reduces
from 4/3 for a call to sin() to 3/1 with the inline fenv implementation, and 1/0
for the HAVE_RM_CTX implementation.
A summary of performance on Cortex-A15:
No fenv_private.h:
cos(): ITERS:2.07e+07: TOTAL:10.6831s, MAX:1519.12ns, MIN:231.833ns, 1.93763e+06 iter/s
exp(): ITERS:3.598e+06: TOTAL:10.6089s, MAX:11415.5ns, MIN:175.375ns, 339148 iter/s
pow(): ITERS:3.3712e+07: TOTAL:9.91444s, MAX:531.669ns, MIN:57.833ns, 3.40029e+06 iter/s
sin(): ITERS:1.96e+07: TOTAL:10.5283s, MAX:1498.83ns, MIN:224.166ns, 1.86165e+06 iter/s
sincos(): ITERS:1.8684e+07: TOTAL:9.84671s, MAX:1599.79ns, MIN:499.417ns, 1.89749e+06 iter/s
tan(): ITERS:2.2701e+07: TOTAL:11.0817s, MAX:1001.79ns, MIN:225.333ns, 2.04852e+06 iter/s
With fenv_private.h:
cos(): ITERS:2.99e+07: TOTAL:9.93882s, MAX:2341.34ns, MIN:43.875ns, 3.00841e+06 iter/s
exp(): ITERS:3.598e+06: TOTAL:10.0066s, MAX:10440.2ns, MIN:26.5ns, 359562 iter/s
pow(): ITERS:5.8093e+07: TOTAL:9.86581s, MAX:1102.29ns, MIN:63.042ns, 5.88832e+06 iter/s
sin(): ITERS:3.08e+07: TOTAL:10.8619s, MAX:3371.59ns, MIN:37.708ns, 2.8356e+06 iter/s
sincos(): ITERS:5.7708e+07: TOTAL:9.88083s, MAX:1348.21ns, MIN:148.875ns, 5.8404e+06 iter/s
tan(): ITERS:3.243e+07: TOTAL:10.1926s, MAX:1840.3ns, MIN:50.042ns, 3.18171e+06 iter/s
GLIBC tests pass with same number of failures with the new fenv_private.h (both with
and without HAVE_RM_CTX).
OK for commit?
Wilco
>From ba7c978b428967ee8217f7edef88156a288c8014 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wdijkstr@localhost.localdomain>
Date: Tue, 4 Mar 2014 13:44:44 +0000
Subject: [PATCH 1/2] Add support for fenv_private on ARM.
---
sysdeps/arm/fenv_private.h | 250 ++++++++++++++++++++++++++++++++++++++++++++
sysdeps/arm/fpu_control.h | 7 +-
sysdeps/arm/math_private.h | 6 ++
3 files changed, 262 insertions(+), 1 deletion(-)
create mode 100644 sysdeps/arm/fenv_private.h
create mode 100644 sysdeps/arm/math_private.h
diff --git a/sysdeps/arm/fenv_private.h b/sysdeps/arm/fenv_private.h
new file mode 100644
index 0000000..6c65cfa
--- /dev/null
+++ b/sysdeps/arm/fenv_private.h
@@ -0,0 +1,250 @@
+/* Private floating point rounding and exceptions handling. ARM VFP version.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef FENV_PRIVATE_H
+#define FENV_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fpu_control.h>
+#include <arm-features.h>
+
+#if ARM_HAVE_VFP
+
+static __always_inline void
+libc_feholdexcept_vfp (fenv_t *envp)
+{
+ fpu_control_t fpscr;
+
+ _FPU_GETCW (fpscr);
+ envp->__cw = fpscr;
+
+ /* Clear exception flags and set all exceptions to non-stop. */
+ fpscr &= ~_FPU_MASK_EXCEPT;
+ _FPU_SETCW (fpscr);
+}
+
+static __always_inline void
+libc_fesetround_vfp (int round)
+{
+ fpu_control_t fpscr;
+
+ _FPU_GETCW (fpscr);
+
+ /* Set new rounding mode if different. */
+ if (__glibc_unlikely ((fpscr & FE_TOWARDZERO) != round))
+ _FPU_SETCW ((fpscr & ~FE_TOWARDZERO) | round);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_vfp (fenv_t *envp, int round)
+{
+ fpu_control_t fpscr;
+
+ _FPU_GETCW (fpscr);
+ envp->__cw = fpscr;
+
+ /* Clear exception flags, set all exceptions to non-stop,
+ and set new rounding mode. */
+ fpscr &= ~(_FPU_MASK_EXCEPT | FE_TOWARDZERO);
+ _FPU_SETCW (fpscr | round);
+}
+
+static __always_inline void
+libc_feholdsetround_vfp (fenv_t *envp, int round)
+{
+ fpu_control_t fpscr;
+
+ _FPU_GETCW (fpscr);
+ envp->__cw = fpscr;
+
+ /* Set new rounding mode if different. */
+ if (__glibc_unlikely ((fpscr & FE_TOWARDZERO) != round))
+ _FPU_SETCW ((fpscr & ~FE_TOWARDZERO) | round);
+}
+
+static __always_inline void
+libc_feresetround_vfp (fenv_t *envp)
+{
+ fpu_control_t fpscr, round;
+
+ _FPU_GETCW (fpscr);
+
+ /* Check whether rounding modes are different. */
+ round = (envp->__cw ^ fpscr) & FE_TOWARDZERO;
+
+ /* Restore the rounding mode if it was changed. */
+ if (__glibc_unlikely (round != 0))
+ _FPU_SETCW (fpscr ^ round);
+}
+
+static __always_inline int
+libc_fetestexcept_vfp (int ex)
+{
+ fpu_control_t fpscr;
+
+ _FPU_GETCW (fpscr);
+ return fpscr & ex & FE_ALL_EXCEPT;
+}
+
+static __always_inline void
+libc_fesetenv_vfp (fenv_t *envp)
+{
+ fpu_control_t fpscr, new_fpscr;
+
+ _FPU_GETCW (fpscr);
+ new_fpscr = envp->__cw;
+
+ /* Write new FPSCR if different (ignoring NZCV flags). */
+ if (__glibc_unlikely (((fpscr ^ new_fpscr) & ~_FPU_MASK_NZCV) != 0))
+ _FPU_SETCW (new_fpscr);
+}
+
+static __always_inline int
+libc_feupdateenv_test_vfp (fenv_t *envp, int ex)
+{
+ fpu_control_t fpscr, new_fpscr;
+ int excepts;
+
+ _FPU_GETCW (fpscr);
+
+ /* Merge current exception flags with the saved fenv. */
+ excepts = fpscr & FE_ALL_EXCEPT;
+ new_fpscr = envp->__cw | excepts;
+
+ /* Write new FPSCR if different (ignoring NZCV flags). */
+ if (__glibc_unlikely (((fpscr ^ new_fpscr) & ~_FPU_MASK_NZCV) != 0))
+ _FPU_SETCW (new_fpscr);
+
+ /* Raise the exceptions if enabled in the new FP state. */
+ if (__glibc_unlikely (excepts & (new_fpscr >> FE_EXCEPT_SHIFT)))
+ feraiseexcept (excepts);
+
+ return excepts & ex;
+}
+
+static __always_inline void
+libc_feupdateenv_vfp (fenv_t *envp)
+{
+ libc_feupdateenv_test_vfp (envp, 0);
+}
+
+#define libc_feholdexcept libc_feholdexcept_vfp
+#define libc_feholdexceptf libc_feholdexcept_vfp
+#define libc_feholdexceptl libc_feholdexcept_vfp
+
+#define libc_fesetround libc_fesetround_vfp
+#define libc_fesetroundf libc_fesetround_vfp
+#define libc_fesetroundl libc_fesetround_vfp
+
+#define libc_feresetround libc_feresetround_vfp
+#define libc_feresetroundf libc_feresetround_vfp
+#define libc_feresetroundl libc_feresetround_vfp
+
+#define libc_feresetround_noex libc_fesetenv_vfp
+#define libc_feresetround_noexf libc_fesetenv_vfp
+#define libc_feresetround_noexl libc_fesetenv_vfp
+
+#define libc_feholdexcept_setround libc_feholdexcept_setround_vfp
+#define libc_feholdexcept_setroundf libc_feholdexcept_setround_vfp
+#define libc_feholdexcept_setroundl libc_feholdexcept_setround_vfp
+
+#define libc_feholdsetround libc_feholdsetround_vfp
+#define libc_feholdsetroundf libc_feholdsetround_vfp
+#define libc_feholdsetroundl libc_feholdsetround_vfp
+
+#define libc_fetestexcept libc_fetestexcept_vfp
+#define libc_fetestexceptf libc_fetestexcept_vfp
+#define libc_fetestexceptl libc_fetestexcept_vfp
+
+#define libc_fesetenv libc_fesetenv_vfp
+#define libc_fesetenvf libc_fesetenv_vfp
+#define libc_fesetenvl libc_fesetenv_vfp
+
+#define libc_feupdateenv libc_feupdateenv_vfp
+#define libc_feupdateenvf libc_feupdateenv_vfp
+#define libc_feupdateenvl libc_feupdateenv_vfp
+
+#define libc_feupdateenv_test libc_feupdateenv_test_vfp
+#define libc_feupdateenv_testf libc_feupdateenv_test_vfp
+#define libc_feupdateenv_testl libc_feupdateenv_test_vfp
+
+/* We have support for rounding mode context. */
+#define HAVE_RM_CTX 1
+
+static __always_inline void
+libc_feholdsetround_vfp_ctx (struct rm_ctx *ctx, int r)
+{
+ fpu_control_t fpscr, round;
+
+ _FPU_GETCW (fpscr);
+ ctx->updated_status = false;
+ ctx->env.__cw = fpscr;
+
+ /* Check whether rounding modes are different. */
+ round = (fpscr ^ r) & FE_TOWARDZERO;
+
+ /* Set the rounding mode if changed. */
+ if (__glibc_unlikely (round != 0))
+ {
+ ctx->updated_status = true;
+ _FPU_SETCW (fpscr ^ round);
+ }
+}
+
+static __always_inline void
+libc_feresetround_vfp_ctx (struct rm_ctx *ctx)
+{
+ /* Restore the rounding mode if updated. */
+ if (__glibc_unlikely (ctx->updated_status))
+ {
+ fpu_control_t fpscr;
+
+ _FPU_GETCW (fpscr);
+ fpscr = (fpscr & ~FE_TOWARDZERO) | (ctx->env.__cw & FE_TOWARDZERO);
+ _FPU_SETCW (fpscr);
+ }
+}
+
+static __always_inline void
+libc_fesetenv_vfp_ctx (struct rm_ctx *ctx)
+{
+ fpu_control_t fpscr, new_fpscr;
+
+ _FPU_GETCW (fpscr);
+ new_fpscr = ctx->env.__cw;
+
+ /* Write new FPSCR if different (ignoring NZCV flags). */
+ if (__glibc_unlikely (((fpscr ^ new_fpscr) & ~_FPU_MASK_NZCV) != 0))
+ _FPU_SETCW (new_fpscr);
+}
+
+#define libc_feholdsetround_ctx libc_feholdsetround_vfp_ctx
+#define libc_feresetround_ctx libc_feresetround_vfp_ctx
+#define libc_feresetround_noex_ctx libc_fesetenv_vfp_ctx
+
+#define libc_feholdsetroundf_ctx libc_feholdsetround_vfp_ctx
+#define libc_feresetroundf_ctx libc_feresetround_vfp_ctx
+#define libc_feresetround_noexf_ctx libc_fesetenv_vfp_ctx
+
+#define libc_feholdsetroundl_ctx libc_feholdsetround_vfp_ctx
+#define libc_feresetroundl_ctx libc_feresetround_vfp_ctx
+#define libc_feresetround_noexl_ctx libc_fesetenv_vfp_ctx
+
+#endif
+
+#endif /* FENV_PRIVATE_H */
diff --git a/sysdeps/arm/fpu_control.h b/sysdeps/arm/fpu_control.h
index 6d54b9b..0377697 100644
--- a/sysdeps/arm/fpu_control.h
+++ b/sysdeps/arm/fpu_control.h
@@ -37,11 +37,16 @@ extern fpu_control_t __fpu_control;
#define _FPU_MASK_UM 0x00000800 /* underflow */
#define _FPU_MASK_PM 0x00001000 /* inexact */
+#define _FPU_MASK_NZCV 0xF0000000 /* NZCV flags */
+
+#define _FPU_MASK_EXCEPT 0x00001f1f /* all exception flags */
+
/* Some bits in the FPSCR are not yet defined. They must be preserved when
modifying the contents. */
#define _FPU_RESERVED 0x00086060
#define _FPU_DEFAULT 0x00000000
-/* Default + exceptions enabled. */
+
+/* Default + exceptions enabled. */
#define _FPU_IEEE (_FPU_DEFAULT | 0x00001f00)
/* Type of the control word. */
diff --git a/sysdeps/arm/math_private.h b/sysdeps/arm/math_private.h
new file mode 100644
index 0000000..541a7f8
--- /dev/null
+++ b/sysdeps/arm/math_private.h
@@ -0,0 +1,6 @@
+#ifndef _MATH_PRIVATE_H
+
+#include "fenv_private.h"
+#include_next <math_private.h>
+
+#endif
--
1.7.9.5