This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] [ARM] ] Add support for fenv_private on ARM


Hi,

This patch improves performance of common math functions by avoiding unnecessary
writes to FPSCR. Add fenv_private.h with faster inline variants of fenv functions 
which avoid no-change writes to FPSCR. The number of FPSCR reads/writes reduces 
from 4/3 for a call to sin() to 3/1 with the inline fenv implementation, and 1/0 
for the HAVE_RM_CTX implementation.

A summary of performance on Cortex-A15:

No fenv_private.h:

cos(): ITERS:2.07e+07: TOTAL:10.6831s, MAX:1519.12ns, MIN:231.833ns, 1.93763e+06 iter/s
exp(): ITERS:3.598e+06: TOTAL:10.6089s, MAX:11415.5ns, MIN:175.375ns, 339148 iter/s
pow(): ITERS:3.3712e+07: TOTAL:9.91444s, MAX:531.669ns, MIN:57.833ns, 3.40029e+06 iter/s
sin(): ITERS:1.96e+07: TOTAL:10.5283s, MAX:1498.83ns, MIN:224.166ns, 1.86165e+06 iter/s
sincos(): ITERS:1.8684e+07: TOTAL:9.84671s, MAX:1599.79ns, MIN:499.417ns, 1.89749e+06 iter/s
tan(): ITERS:2.2701e+07: TOTAL:11.0817s, MAX:1001.79ns, MIN:225.333ns, 2.04852e+06 iter/s

With fenv_private.h:

cos(): ITERS:2.99e+07: TOTAL:9.93882s, MAX:2341.34ns, MIN:43.875ns, 3.00841e+06 iter/s
exp(): ITERS:3.598e+06: TOTAL:10.0066s, MAX:10440.2ns, MIN:26.5ns, 359562 iter/s
pow(): ITERS:5.8093e+07: TOTAL:9.86581s, MAX:1102.29ns, MIN:63.042ns, 5.88832e+06 iter/s
sin(): ITERS:3.08e+07: TOTAL:10.8619s, MAX:3371.59ns, MIN:37.708ns, 2.8356e+06 iter/s
sincos(): ITERS:5.7708e+07: TOTAL:9.88083s, MAX:1348.21ns, MIN:148.875ns, 5.8404e+06 iter/s
tan(): ITERS:3.243e+07: TOTAL:10.1926s, MAX:1840.3ns, MIN:50.042ns, 3.18171e+06 iter/s

GLIBC tests pass with same number of failures with the new fenv_private.h (both with 
and without HAVE_RM_CTX).

OK for commit?

Wilco

>From ba7c978b428967ee8217f7edef88156a288c8014 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wdijkstr@localhost.localdomain>
Date: Tue, 4 Mar 2014 13:44:44 +0000
Subject: [PATCH 1/2] Add support for fenv_private on ARM.

---
 sysdeps/arm/fenv_private.h |  250 ++++++++++++++++++++++++++++++++++++++++++++
 sysdeps/arm/fpu_control.h  |    7 +-
 sysdeps/arm/math_private.h |    6 ++
 3 files changed, 262 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/arm/fenv_private.h
 create mode 100644 sysdeps/arm/math_private.h

diff --git a/sysdeps/arm/fenv_private.h b/sysdeps/arm/fenv_private.h
new file mode 100644
index 0000000..6c65cfa
--- /dev/null
+++ b/sysdeps/arm/fenv_private.h
@@ -0,0 +1,250 @@
+/* Private floating point rounding and exceptions handling.  ARM VFP version.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef FENV_PRIVATE_H
+#define FENV_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fpu_control.h>
+#include <arm-features.h>
+
+#if ARM_HAVE_VFP
+
+static __always_inline void
+libc_feholdexcept_vfp (fenv_t *envp)
+{
+  fpu_control_t fpscr;
+
+  _FPU_GETCW (fpscr);
+  envp->__cw = fpscr;
+
+  /* Clear exception flags and set all exceptions to non-stop.  */
+  fpscr &= ~_FPU_MASK_EXCEPT;
+  _FPU_SETCW (fpscr);
+}
+
+static __always_inline void
+libc_fesetround_vfp (int round)
+{
+  fpu_control_t fpscr;
+
+  _FPU_GETCW (fpscr);
+
+  /* Set new rounding mode if different.  */
+  if (__glibc_unlikely ((fpscr & FE_TOWARDZERO) != round))
+    _FPU_SETCW ((fpscr & ~FE_TOWARDZERO) | round);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_vfp (fenv_t *envp, int round)
+{
+  fpu_control_t fpscr;
+
+  _FPU_GETCW (fpscr);
+  envp->__cw = fpscr;
+
+  /* Clear exception flags, set all exceptions to non-stop,
+     and set new rounding mode.  */
+  fpscr &= ~(_FPU_MASK_EXCEPT | FE_TOWARDZERO);
+  _FPU_SETCW (fpscr | round);
+}
+
+static __always_inline void
+libc_feholdsetround_vfp (fenv_t *envp, int round)
+{
+  fpu_control_t fpscr;
+
+  _FPU_GETCW (fpscr);
+  envp->__cw = fpscr;
+
+  /* Set new rounding mode if different.  */
+  if (__glibc_unlikely ((fpscr & FE_TOWARDZERO) != round))
+    _FPU_SETCW ((fpscr & ~FE_TOWARDZERO) | round);
+}
+
+static __always_inline void
+libc_feresetround_vfp (fenv_t *envp)
+{
+  fpu_control_t fpscr, round;
+
+  _FPU_GETCW (fpscr);
+
+  /* Check whether rounding modes are different.  */
+  round = (envp->__cw ^ fpscr) & FE_TOWARDZERO;
+
+  /* Restore the rounding mode if it was changed.  */
+  if (__glibc_unlikely (round != 0))
+    _FPU_SETCW (fpscr ^ round);
+}
+
+static __always_inline int
+libc_fetestexcept_vfp (int ex)
+{
+  fpu_control_t fpscr;
+
+  _FPU_GETCW (fpscr);
+  return fpscr & ex & FE_ALL_EXCEPT;
+}
+
+static __always_inline void
+libc_fesetenv_vfp (fenv_t *envp)
+{
+  fpu_control_t fpscr, new_fpscr;
+
+  _FPU_GETCW (fpscr);
+  new_fpscr = envp->__cw;
+
+  /* Write new FPSCR if different (ignoring NZCV flags).  */
+  if (__glibc_unlikely (((fpscr ^ new_fpscr) & ~_FPU_MASK_NZCV) != 0))
+    _FPU_SETCW (new_fpscr);
+}
+
+static __always_inline int
+libc_feupdateenv_test_vfp (fenv_t *envp, int ex)
+{
+  fpu_control_t fpscr, new_fpscr;
+  int excepts;
+
+  _FPU_GETCW (fpscr);
+
+  /* Merge current exception flags with the saved fenv.  */
+  excepts = fpscr & FE_ALL_EXCEPT;
+  new_fpscr = envp->__cw | excepts;
+
+  /* Write new FPSCR if different (ignoring NZCV flags).  */
+  if (__glibc_unlikely (((fpscr ^ new_fpscr) & ~_FPU_MASK_NZCV) != 0))
+    _FPU_SETCW (new_fpscr);
+
+  /* Raise the exceptions if enabled in the new FP state.  */
+  if (__glibc_unlikely (excepts & (new_fpscr >> FE_EXCEPT_SHIFT)))
+    feraiseexcept (excepts);
+
+  return excepts & ex;
+}
+
+static __always_inline void
+libc_feupdateenv_vfp (fenv_t *envp)
+{
+  libc_feupdateenv_test_vfp (envp, 0);
+}
+
+#define libc_feholdexcept  libc_feholdexcept_vfp
+#define libc_feholdexceptf libc_feholdexcept_vfp
+#define libc_feholdexceptl libc_feholdexcept_vfp
+
+#define libc_fesetround  libc_fesetround_vfp
+#define libc_fesetroundf libc_fesetround_vfp
+#define libc_fesetroundl libc_fesetround_vfp
+
+#define libc_feresetround  libc_feresetround_vfp
+#define libc_feresetroundf libc_feresetround_vfp
+#define libc_feresetroundl libc_feresetround_vfp
+
+#define libc_feresetround_noex  libc_fesetenv_vfp
+#define libc_feresetround_noexf libc_fesetenv_vfp
+#define libc_feresetround_noexl libc_fesetenv_vfp
+
+#define libc_feholdexcept_setround  libc_feholdexcept_setround_vfp
+#define libc_feholdexcept_setroundf libc_feholdexcept_setround_vfp
+#define libc_feholdexcept_setroundl libc_feholdexcept_setround_vfp
+
+#define libc_feholdsetround  libc_feholdsetround_vfp
+#define libc_feholdsetroundf libc_feholdsetround_vfp
+#define libc_feholdsetroundl libc_feholdsetround_vfp
+
+#define libc_fetestexcept  libc_fetestexcept_vfp
+#define libc_fetestexceptf libc_fetestexcept_vfp
+#define libc_fetestexceptl libc_fetestexcept_vfp
+
+#define libc_fesetenv  libc_fesetenv_vfp
+#define libc_fesetenvf libc_fesetenv_vfp
+#define libc_fesetenvl libc_fesetenv_vfp
+
+#define libc_feupdateenv  libc_feupdateenv_vfp
+#define libc_feupdateenvf libc_feupdateenv_vfp
+#define libc_feupdateenvl libc_feupdateenv_vfp
+
+#define libc_feupdateenv_test  libc_feupdateenv_test_vfp
+#define libc_feupdateenv_testf libc_feupdateenv_test_vfp
+#define libc_feupdateenv_testl libc_feupdateenv_test_vfp
+
+/* We have support for rounding mode context.  */
+#define HAVE_RM_CTX 1
+
+static __always_inline void
+libc_feholdsetround_vfp_ctx (struct rm_ctx *ctx, int r)
+{
+  fpu_control_t fpscr, round;
+
+  _FPU_GETCW (fpscr);
+  ctx->updated_status = false;
+  ctx->env.__cw = fpscr;
+
+  /* Check whether rounding modes are different.  */
+  round = (fpscr ^ r) & FE_TOWARDZERO;
+
+  /* Set the rounding mode if changed.  */
+  if (__glibc_unlikely (round != 0))
+    {
+      ctx->updated_status = true;
+      _FPU_SETCW (fpscr ^ round);
+    }
+}
+
+static __always_inline void
+libc_feresetround_vfp_ctx (struct rm_ctx *ctx)
+{
+  /* Restore the rounding mode if updated.  */
+  if (__glibc_unlikely (ctx->updated_status))
+    {
+      fpu_control_t fpscr;
+
+      _FPU_GETCW (fpscr);
+      fpscr = (fpscr & ~FE_TOWARDZERO) | (ctx->env.__cw & FE_TOWARDZERO);
+      _FPU_SETCW (fpscr);
+    }
+}
+
+static __always_inline void
+libc_fesetenv_vfp_ctx (struct rm_ctx *ctx)
+{
+  fpu_control_t fpscr, new_fpscr;
+
+  _FPU_GETCW (fpscr);
+  new_fpscr = ctx->env.__cw;
+
+  /* Write new FPSCR if different (ignoring NZCV flags).  */
+  if (__glibc_unlikely (((fpscr ^ new_fpscr) & ~_FPU_MASK_NZCV) != 0))
+    _FPU_SETCW (new_fpscr);
+}
+
+#define libc_feholdsetround_ctx		libc_feholdsetround_vfp_ctx
+#define libc_feresetround_ctx		libc_feresetround_vfp_ctx
+#define libc_feresetround_noex_ctx	libc_fesetenv_vfp_ctx
+
+#define libc_feholdsetroundf_ctx	libc_feholdsetround_vfp_ctx
+#define libc_feresetroundf_ctx		libc_feresetround_vfp_ctx
+#define libc_feresetround_noexf_ctx	libc_fesetenv_vfp_ctx
+
+#define libc_feholdsetroundl_ctx	libc_feholdsetround_vfp_ctx
+#define libc_feresetroundl_ctx		libc_feresetround_vfp_ctx
+#define libc_feresetround_noexl_ctx	libc_fesetenv_vfp_ctx
+
+#endif
+
+#endif /* FENV_PRIVATE_H */
diff --git a/sysdeps/arm/fpu_control.h b/sysdeps/arm/fpu_control.h
index 6d54b9b..0377697 100644
--- a/sysdeps/arm/fpu_control.h
+++ b/sysdeps/arm/fpu_control.h
@@ -37,11 +37,16 @@ extern fpu_control_t __fpu_control;
 #define _FPU_MASK_UM	0x00000800	/* underflow */
 #define _FPU_MASK_PM	0x00001000	/* inexact */
 
+#define _FPU_MASK_NZCV	0xF0000000	/* NZCV flags */
+
+#define _FPU_MASK_EXCEPT 0x00001f1f	/* all exception flags */
+
 /* Some bits in the FPSCR are not yet defined.  They must be preserved when
    modifying the contents.  */
 #define _FPU_RESERVED	0x00086060
 #define _FPU_DEFAULT    0x00000000
-/* Default + exceptions enabled. */
+
+/* Default + exceptions enabled.  */
 #define _FPU_IEEE	(_FPU_DEFAULT | 0x00001f00)
 
 /* Type of the control word.  */
diff --git a/sysdeps/arm/math_private.h b/sysdeps/arm/math_private.h
new file mode 100644
index 0000000..541a7f8
--- /dev/null
+++ b/sysdeps/arm/math_private.h
@@ -0,0 +1,6 @@
+#ifndef _MATH_PRIVATE_H
+
+#include "fenv_private.h"
+#include_next <math_private.h>
+
+#endif
-- 
1.7.9.5





Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]