This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.18-432-g41e8926
- From: azanella at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 25 Nov 2013 12:35:11 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.18-432-g41e8926
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 41e8926aa4b7f17bc95984737ee82a254ad0911c (commit)
from 085b5ddfe301eb74bc9f824ff293f054c1c910b7 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=41e8926aa4b7f17bc95984737ee82a254ad0911c
commit 41e8926aa4b7f17bc95984737ee82a254ad0911c
Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Date: Mon Nov 25 06:12:30 2013 -0600
PowerPC: Set/restore rounding mode only when needed
This patch helps some math functions performance by adding the libc_fexxx
variant of inline functions to handle both FPU round and exception set/restore
and by using them on the libc_fexxx_ctx functions. It is based on already coded
fexxx family functions for PPC with fpu.
Here is the summary of performance improvements due this patch (measured on a
POWER7 machine):
Before:
cos(): ITERS:9.5895e+07: TOTAL:5116.03Mcy, MAX:77.6cy, MIN:49.792cy, 18744 calls/Mcy
exp(): ITERS:2.827e+07: TOTAL:5187.15Mcy, MAX:494.018cy, MIN:38.422cy, 5450.01 calls/Mcy
pow(): ITERS:6.1705e+07: TOTAL:5144.26Mcy, MAX:171.95cy, MIN:29.935cy, 11994.9 calls/Mcy
sin(): ITERS:8.6898e+07: TOTAL:5117.06Mcy, MAX:83.841cy, MIN:46.582cy, 16982 calls/Mcy
tan(): ITERS:2.9473e+07: TOTAL:5115.39Mcy, MAX:191.017cy, MIN:172.352cy, 5761.63 calls/Mcy
After:
cos(): ITERS:2.05265e+08: TOTAL:5111.37Mcy, MAX:78.754cy, MIN:24.196cy, 40158.5 calls/Mcy
exp(): ITERS:3.341e+07: TOTAL:5170.84Mcy, MAX:476.317cy, MIN:15.574cy, 6461.23 calls/Mcy
pow(): ITERS:7.6153e+07: TOTAL:5129.1Mcy, MAX:147.5cy, MIN:30.916cy, 14847.2 calls/Mcy
sin(): ITERS:1.58816e+08: TOTAL:5115.11Mcy, MAX:1490.39cy, MIN:22.341cy, 31048.4 calls/Mcy
tan(): ITERS:3.4964e+07: TOTAL:5114.18Mcy, MAX:177.422cy, MIN:146.115cy, 6836.68 calls/Mcy
diff --git a/ChangeLog b/ChangeLog
index b9201fc..9e2a0c6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2013-11-25 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
+
+ * sysdeps/ieee754/ldbl-128ibm/e_expl.c (__ieee754_expl): Use
+ SET_RESTORE_ROUND instead of feholdexcept/fesetround/fesetenv.
+ * sysdeps/powerpc/fpu/fenv_libc.h (__fegetround): Remove define.
+ (__fesetround): Remove define.
+ * sysdeps/powerpc/fpu/fenv_private.h: New file: Inline floating point
+ rounding and exceptions handling.
+ * sysdeps/powerpc/fpu/math_private.h: Include fenv_private.h.
+ * sysdeps/powerpc/fpu/fenv_libc.h (__fe_mask_env): Define as hidden.
+ (__fe_nomask_env): Likewise.
+ * sysdeps/ieee754/ldbl-128ibm/s_llrintl.c (__llrintl): Use
+ __fegetround instead of fegetround.
+ * sysdeps/ieee754/ldbl-128ibm/s_lrintl.c (__lrintl): Likewise.
+ * sysdeps/ieee754/ldbl-128ibm/s_rintl.c (__rintl): Likewise.
+
2013-11-21 Roland McGrath <roland@hack.frob.com>
* malloc/malloc.c: Move #include <sys/param.h> to the top; comment why
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
index f7c50bf..65ef185 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_expl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
@@ -134,18 +134,17 @@ static const long double C[] = {
long double
__ieee754_expl (long double x)
{
+ long double result, x22;
+ union ibm_extended_long_double ex2_u, scale_u;
+ int unsafe;
+
/* Check for usual case. */
if (isless (x, himark) && isgreater (x, lomark))
{
- int tval1, tval2, unsafe, n_i, exponent2;
- long double x22, n, result, xl;
- union ibm_extended_long_double ex2_u, scale_u;
- fenv_t oldenv;
-
- feholdexcept (&oldenv);
-#ifdef FE_TONEAREST
- fesetround (FE_TONEAREST);
-#endif
+ int tval1, tval2, n_i, exponent2;
+ long double n, xl;
+
+ SET_RESTORE_ROUND (FE_TONEAREST);
n = __roundl (x*M_1_LN2);
x = x-n*M_LN2_0;
@@ -201,11 +200,6 @@ __ieee754_expl (long double x)
less than 4.8e-39. */
x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
- /* Return result. */
- fesetenv (&oldenv);
-
- result = x22 * ex2_u.ld + ex2_u.ld;
-
/* Now we can test whether the result is ultimate or if we are unsure.
In the later case we should probably call a mpn based routine to give
the ultimate result.
@@ -235,10 +229,6 @@ __ieee754_expl (long double x)
return __ieee754_expl_proc2 (origx);
}
*/
- if (!unsafe)
- return result;
- else
- return result * scale_u.ld;
}
/* Exceptional cases: */
else if (isless (x, himark))
@@ -253,5 +243,10 @@ __ieee754_expl (long double x)
else
/* Return x, if x is a NaN or Inf; or overflow, otherwise. */
return TWO1023*x;
+
+ result = x22 * ex2_u.ld + ex2_u.ld;
+ if (!unsafe)
+ return result;
+ return result * scale_u.ld;
}
strong_alias (__ieee754_expl, __expl_finite)
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
index 8560349..3503973 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
@@ -43,7 +43,7 @@ __llrintl (long double x)
#endif
)
{
- save_round = fegetround ();
+ save_round = __fegetround ();
if (__builtin_expect ((xh == -(double) (-__LONG_LONG_MAX__ - 1)), 0))
{
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
index 588098d..49dbd42 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
@@ -49,7 +49,7 @@ __lrintl (long double x)
#endif
)
{
- save_round = fegetround ();
+ save_round = __fegetround ();
#if __LONG_MAX__ == 2147483647
long long llhi = (long long) xh;
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
index 48dbe85..5fd6bb8 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
@@ -40,7 +40,7 @@ __rintl (long double x)
__builtin_inf ()), 1))
{
double orig_xh;
- int save_round = fegetround ();
+ int save_round = __fegetround ();
/* Long double arithmetic, including the canonicalisation below,
only works in round-to-nearest mode. */
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index cb15c1c..ecd6b91 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -23,9 +23,9 @@
#include <ldsodefs.h>
#include <sysdep.h>
-extern const fenv_t *__fe_nomask_env (void);
+extern const fenv_t *__fe_nomask_env (void) attribute_hidden;
-extern const fenv_t *__fe_mask_env (void);
+extern const fenv_t *__fe_mask_env (void) attribute_hidden;
/* The sticky bits in the FPSCR indicating exceptions have occurred. */
#define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID)
@@ -83,7 +83,6 @@ __fegetround (void)
"mfcr %0" : "=r"(result) : : "cr7");
return result & 3;
}
-#define fegetround() __fegetround()
static inline int
__fesetround (int round)
@@ -107,7 +106,6 @@ __fesetround (int round)
return 0;
}
-#define fesetround(mode) __fesetround(mode)
/* Definitions of all the FPSCR bit numbers */
enum {
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
new file mode 100644
index 0000000..293f840
--- /dev/null
+++ b/sysdeps/powerpc/fpu/fenv_private.h
@@ -0,0 +1,274 @@
+/* Private floating point rounding and exceptions handling. PowerPC version.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef FENV_PRIVATE_H
+#define FENV_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fenv_libc.h>
+#include <fpu_control.h>
+
+#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
+ | _FPU_MASK_XM | _FPU_MASK_IM)
+
+/* Mask everything but the rounding moded and non-IEEE arithmetic flags. */
+#define _FPU_MASK_ROUNDING 0xffffffff00000007LL
+
+/* Mask restore rounding mode and exception enabled. */
+#define _FPU_MASK_EXCEPT_ROUND 0xffffffff1fffff00LL
+
+/* Mask exception enable but fraction rounded/inexact and FP result/CC
+ bits. */
+#define _FPU_MASK_FRAC_INEX_RET_CC 0x1ff80fff
+
+static __always_inline void
+libc_feholdexcept_ppc (fenv_t *envp)
+{
+ fenv_union_t old, new;
+
+ old.fenv = *envp = fegetenv_register ();
+
+ new.l = old.l & _FPU_MASK_ROUNDING;
+
+ /* If the old env had any enabled exceptions, then mask SIGFPE in the
+ MSR FE0/FE1 bits. This may allow the FPU to run faster because it
+ always takes the default action and can not generate SIGFPE. */
+ if ((old.l & _FPU_MASK_ALL) != 0)
+ (void) __fe_mask_env ();
+
+ fesetenv_register (new.fenv);
+}
+
+static __always_inline void
+libc_fesetround_ppc (int r)
+{
+ __fesetround (r);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_ppc (fenv_t *envp, int r)
+{
+ fenv_union_t old, new;
+
+ old.fenv = *envp = fegetenv_register ();
+
+ new.l = (old.l & _FPU_MASK_ROUNDING) | r;
+
+ if ((old.l & _FPU_MASK_ALL) != 0)
+ (void) __fe_mask_env ();
+
+ fesetenv_register (new.fenv);
+}
+
+static __always_inline int
+libc_fetestexcept_ppc (int e)
+{
+ fenv_union_t u;
+ u.fenv = fegetenv_register ();
+ return u.l & e;
+}
+
+static __always_inline void
+libc_fesetenv_ppc (const fenv_t *envp)
+{
+ fenv_union_t old, new;
+
+ new.fenv = *envp;
+ old.fenv = fegetenv_register ();
+
+ /* If the old env has no enabled exceptions and the new env has any enabled
+ exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
+ hardware into "precise mode" and may cause the FPU to run slower on some
+ hardware. */
+ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+ (void) __fe_nomask_env ();
+
+ /* If the old env had any enabled exceptions and the new env has no enabled
+ exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
+ FPU to run faster because it always takes the default action and can not
+ generate SIGFPE. */
+ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+ (void) __fe_mask_env ();
+
+ fesetenv_register (*envp);
+}
+
+static __always_inline int
+libc_feupdateenv_test_ppc (fenv_t *envp, int ex)
+{
+ fenv_union_t old, new;
+
+ new.fenv = *envp;
+ old.fenv = fegetenv_register ();
+
+ /* Restore rounding mode and exception enable from *envp and merge
+ exceptions. Leave fraction rounded/inexact and FP result/CC bits
+ unchanged. */
+ new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
+ | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
+
+ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+ (void) __fe_nomask_env ();
+
+ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+ (void) __fe_mask_env ();
+
+ fesetenv_register (new.fenv);
+
+ return old.l & ex;
+}
+
+static __always_inline void
+libc_feupdateenv_ppc (fenv_t *e)
+{
+ libc_feupdateenv_test_ppc (e, 0);
+}
+
+static __always_inline void
+libc_feholdsetround_ppc (fenv_t *e, int r)
+{
+ fenv_union_t old, new;
+
+ old.fenv = fegetenv_register ();
+ /* Clear current precision and set newer one. */
+ new.l = (old.l & ~0x3) | r;
+ *e = old.fenv;
+
+ if ((old.l & _FPU_MASK_ALL) != 0)
+ (void) __fe_mask_env ();
+ fesetenv_register (new.fenv);
+}
+
+static __always_inline void
+libc_feresetround_ppc (fenv_t *envp)
+{
+ fenv_union_t old, new;
+
+ new.fenv = *envp;
+ old.fenv = fegetenv_register ();
+
+ /* Restore rounding mode and exception enable from *envp and merge
+ exceptions. Leave fraction rounded/inexact and FP result/CC bits
+ unchanged. */
+ new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
+ | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
+
+ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+ (void) __fe_nomask_env ();
+
+ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+ (void) __fe_mask_env ();
+
+ /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
+ fesetenv_register (new.fenv);
+}
+
+#define libc_feholdexceptf libc_feholdexcept_ppc
+#define libc_feholdexcept libc_feholdexcept_ppc
+#define libc_feholdexcept_setroundf libc_feholdexcept_setround_ppc
+#define libc_feholdexcept_setround libc_feholdexcept_setround_ppc
+#define libc_fetestexceptf libc_fetestexcept_ppc
+#define libc_fetestexcept libc_fetestexcept_ppc
+#define libc_fesetroundf libc_fesetround_ppc
+#define libc_fesetround libc_fesetround_ppc
+#define libc_fesetenvf libc_fesetenv_ppc
+#define libc_fesetenv libc_fesetenv_ppc
+#define libc_feupdateenv_testf libc_feupdateenv_test_ppc
+#define libc_feupdateenv_test libc_feupdateenv_test_ppc
+#define libc_feupdateenvf libc_feupdateenv_ppc
+#define libc_feupdateenv libc_feupdateenv_ppc
+#define libc_feholdsetroundf libc_feholdsetround_ppc
+#define libc_feholdsetround libc_feholdsetround_ppc
+#define libc_feresetroundf libc_feresetround_ppc
+#define libc_feresetround libc_feresetround_ppc
+
+
+/* We have support for rounding mode context. */
+#define HAVE_RM_CTX 1
+
+static __always_inline void
+libc_feholdexcept_setround_ppc_ctx (struct rm_ctx *ctx, int r)
+{
+ fenv_union_t old, new;
+
+ old.fenv = fegetenv_register ();
+
+ new.l = (old.l & _FPU_MASK_ROUNDING) | r;
+ ctx->env = old.fenv;
+ if (__glibc_unlikely (new.l != old.l))
+ {
+ if ((old.l & _FPU_MASK_ALL) != 0)
+ (void) __fe_mask_env ();
+ fesetenv_register (new.fenv);
+ ctx->updated_status = true;
+ }
+ else
+ ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_fesetenv_ppc_ctx (struct rm_ctx *ctx)
+{
+ libc_fesetenv_ppc (&ctx->env);
+}
+
+static __always_inline void
+libc_feupdateenv_ppc_ctx (struct rm_ctx *ctx)
+{
+ if (__glibc_unlikely (ctx->updated_status))
+ libc_feupdateenv_test_ppc (&ctx->env, 0);
+}
+
+static __always_inline void
+libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
+{
+ fenv_union_t old, new;
+
+ old.fenv = fegetenv_register ();
+ new.l = (old.l & ~0x3) | r;
+ ctx->env = old.fenv;
+ if (__glibc_unlikely (new.l != old.l))
+ {
+ if ((old.l & _FPU_MASK_ALL) != 0)
+ (void) __fe_mask_env ();
+ fesetenv_register (new.fenv);
+ ctx->updated_status = true;
+ }
+ else
+ ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_feresetround_ppc_ctx (struct rm_ctx *ctx)
+{
+ if (__glibc_unlikely (ctx->updated_status))
+ libc_feresetround_ppc (&ctx->env);
+}
+
+#define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_ppc_ctx
+#define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_ppc_ctx
+#define libc_fesetenv_ctx libc_fesetenv_ppc_ctx
+#define libc_fesetenvf_ctx libc_fesetenv_ppc_ctx
+#define libc_feholdsetround_ctx libc_feholdsetround_ppc_ctx
+#define libc_feholdsetroundf_ctx libc_feholdsetround_ppc_ctx
+#define libc_feresetround_ctx libc_feresetround_ppc_ctx
+#define libc_feresetroundf_ctx libc_feresetround_ppc_ctx
+#define libc_feupdateenvf_ctx libc_feupdateenv_ppc_ctx
+#define libc_feupdateenv_ctx libc_feupdateenv_ppc_ctx
+
+#endif
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 6c00785..c8833d6 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -22,6 +22,7 @@
#include <sysdep.h>
#include <ldsodefs.h>
#include <dl-procinfo.h>
+#include <fenv_private.h>
#include_next <math_private.h>
# if __WORDSIZE == 64 || defined _ARCH_PWR4
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 16 ++
sysdeps/ieee754/ldbl-128ibm/e_expl.c | 31 ++--
sysdeps/ieee754/ldbl-128ibm/s_llrintl.c | 2 +-
sysdeps/ieee754/ldbl-128ibm/s_lrintl.c | 2 +-
sysdeps/ieee754/ldbl-128ibm/s_rintl.c | 2 +-
sysdeps/powerpc/fpu/fenv_libc.h | 6 +-
sysdeps/powerpc/fpu/fenv_private.h | 274 +++++++++++++++++++++++++++++++
sysdeps/powerpc/fpu/math_private.h | 1 +
8 files changed, 309 insertions(+), 25 deletions(-)
create mode 100644 sysdeps/powerpc/fpu/fenv_private.h
hooks/post-receive
--
GNU C Library master sources