This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.18-432-g41e8926


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  41e8926aa4b7f17bc95984737ee82a254ad0911c (commit)
      from  085b5ddfe301eb74bc9f824ff293f054c1c910b7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=41e8926aa4b7f17bc95984737ee82a254ad0911c

commit 41e8926aa4b7f17bc95984737ee82a254ad0911c
Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Date:   Mon Nov 25 06:12:30 2013 -0600

    PowerPC: Set/restore rounding mode only when needed
    
    This patch helps some math functions performance by adding the libc_fexxx
    variant of inline functions to handle both FPU round and exception set/restore
    and by using them on the libc_fexxx_ctx functions. It is based on already coded
    fexxx family functions for PPC with fpu.
    
    Here is the summary of performance improvements due this patch (measured on a
    POWER7 machine):
    
    Before:
    
    cos(): ITERS:9.5895e+07: TOTAL:5116.03Mcy, MAX:77.6cy, MIN:49.792cy, 18744 calls/Mcy
    exp(): ITERS:2.827e+07: TOTAL:5187.15Mcy, MAX:494.018cy, MIN:38.422cy, 5450.01 calls/Mcy
    pow(): ITERS:6.1705e+07: TOTAL:5144.26Mcy, MAX:171.95cy, MIN:29.935cy, 11994.9 calls/Mcy
    sin(): ITERS:8.6898e+07: TOTAL:5117.06Mcy, MAX:83.841cy, MIN:46.582cy, 16982 calls/Mcy
    tan(): ITERS:2.9473e+07: TOTAL:5115.39Mcy, MAX:191.017cy, MIN:172.352cy, 5761.63 calls/Mcy
    
    After:
    
    cos(): ITERS:2.05265e+08: TOTAL:5111.37Mcy, MAX:78.754cy, MIN:24.196cy, 40158.5 calls/Mcy
    exp(): ITERS:3.341e+07: TOTAL:5170.84Mcy, MAX:476.317cy, MIN:15.574cy, 6461.23 calls/Mcy
    pow(): ITERS:7.6153e+07: TOTAL:5129.1Mcy, MAX:147.5cy, MIN:30.916cy, 14847.2 calls/Mcy
    sin(): ITERS:1.58816e+08: TOTAL:5115.11Mcy, MAX:1490.39cy, MIN:22.341cy, 31048.4 calls/Mcy
    tan(): ITERS:3.4964e+07: TOTAL:5114.18Mcy, MAX:177.422cy, MIN:146.115cy, 6836.68 calls/Mcy

diff --git a/ChangeLog b/ChangeLog
index b9201fc..9e2a0c6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2013-11-25  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+	* sysdeps/ieee754/ldbl-128ibm/e_expl.c (__ieee754_expl): Use
+	SET_RESTORE_ROUND instead of feholdexcept/fesetround/fesetenv.
+	* sysdeps/powerpc/fpu/fenv_libc.h (__fegetround): Remove define.
+	(__fesetround): Remove define.
+	* sysdeps/powerpc/fpu/fenv_private.h: New file: Inline floating point
+	rounding and exceptions handling.
+	* sysdeps/powerpc/fpu/math_private.h: Include fenv_private.h.
+	* sysdeps/powerpc/fpu/fenv_libc.h (__fe_mask_env): Define as hidden.
+	(__fe_nomask_env): Likewise.
+	* sysdeps/ieee754/ldbl-128ibm/s_llrintl.c (__llrintl): Use
+	__fegetround instead of fegetround.
+	* sysdeps/ieee754/ldbl-128ibm/s_lrintl.c (__lrintl): Likewise.
+	* sysdeps/ieee754/ldbl-128ibm/s_rintl.c (__rintl): Likewise.
+
 2013-11-21  Roland McGrath  <roland@hack.frob.com>
 
 	* malloc/malloc.c: Move #include <sys/param.h> to the top; comment why
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
index f7c50bf..65ef185 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_expl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
@@ -134,18 +134,17 @@ static const long double C[] = {
 long double
 __ieee754_expl (long double x)
 {
+  long double result, x22;
+  union ibm_extended_long_double ex2_u, scale_u;
+  int unsafe;
+
   /* Check for usual case.  */
   if (isless (x, himark) && isgreater (x, lomark))
     {
-      int tval1, tval2, unsafe, n_i, exponent2;
-      long double x22, n, result, xl;
-      union ibm_extended_long_double ex2_u, scale_u;
-      fenv_t oldenv;
-
-      feholdexcept (&oldenv);
-#ifdef FE_TONEAREST
-      fesetround (FE_TONEAREST);
-#endif
+      int tval1, tval2, n_i, exponent2;
+      long double n, xl;
+
+      SET_RESTORE_ROUND (FE_TONEAREST);
 
       n = __roundl (x*M_1_LN2);
       x = x-n*M_LN2_0;
@@ -201,11 +200,6 @@ __ieee754_expl (long double x)
 	 less than 4.8e-39.  */
       x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
 
-      /* Return result.  */
-      fesetenv (&oldenv);
-
-      result = x22 * ex2_u.ld + ex2_u.ld;
-
       /* Now we can test whether the result is ultimate or if we are unsure.
 	 In the later case we should probably call a mpn based routine to give
 	 the ultimate result.
@@ -235,10 +229,6 @@ __ieee754_expl (long double x)
 	    return __ieee754_expl_proc2 (origx);
 	  }
        */
-      if (!unsafe)
-	return result;
-      else
-	return result * scale_u.ld;
     }
   /* Exceptional cases:  */
   else if (isless (x, himark))
@@ -253,5 +243,10 @@ __ieee754_expl (long double x)
   else
     /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
     return TWO1023*x;
+
+  result = x22 * ex2_u.ld + ex2_u.ld;
+  if (!unsafe)
+    return result;
+  return result * scale_u.ld;
 }
 strong_alias (__ieee754_expl, __expl_finite)
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
index 8560349..3503973 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
@@ -43,7 +43,7 @@ __llrintl (long double x)
 #endif
     )
     {
-      save_round = fegetround ();
+      save_round = __fegetround ();
 
       if (__builtin_expect ((xh == -(double) (-__LONG_LONG_MAX__ - 1)), 0))
 	{
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
index 588098d..49dbd42 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
@@ -49,7 +49,7 @@ __lrintl (long double x)
 #endif
     )
     {
-      save_round = fegetround ();
+      save_round = __fegetround ();
 
 #if __LONG_MAX__ == 2147483647
       long long llhi = (long long) xh;
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
index 48dbe85..5fd6bb8 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
@@ -40,7 +40,7 @@ __rintl (long double x)
 					     __builtin_inf ()), 1))
     {
       double orig_xh;
-      int save_round = fegetround ();
+      int save_round = __fegetround ();
 
       /* Long double arithmetic, including the canonicalisation below,
 	 only works in round-to-nearest mode.  */
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index cb15c1c..ecd6b91 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -23,9 +23,9 @@
 #include <ldsodefs.h>
 #include <sysdep.h>
 
-extern const fenv_t *__fe_nomask_env (void);
+extern const fenv_t *__fe_nomask_env (void) attribute_hidden;
 
-extern const fenv_t *__fe_mask_env (void);
+extern const fenv_t *__fe_mask_env (void) attribute_hidden;
 
 /* The sticky bits in the FPSCR indicating exceptions have occurred.  */
 #define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID)
@@ -83,7 +83,6 @@ __fegetround (void)
 		"mfcr  %0" : "=r"(result) : : "cr7");
   return result & 3;
 }
-#define fegetround() __fegetround()
 
 static inline int
 __fesetround (int round)
@@ -107,7 +106,6 @@ __fesetround (int round)
 
   return 0;
 }
-#define fesetround(mode) __fesetround(mode)
 
 /* Definitions of all the FPSCR bit numbers */
 enum {
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
new file mode 100644
index 0000000..293f840
--- /dev/null
+++ b/sysdeps/powerpc/fpu/fenv_private.h
@@ -0,0 +1,274 @@
+/* Private floating point rounding and exceptions handling. PowerPC version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef FENV_PRIVATE_H
+#define FENV_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fenv_libc.h>
+#include <fpu_control.h>
+
+#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
+                      | _FPU_MASK_XM | _FPU_MASK_IM)
+
+/* Mask everything but the rounding moded and non-IEEE arithmetic flags.  */
+#define _FPU_MASK_ROUNDING 0xffffffff00000007LL
+
+/* Mask restore rounding mode and exception enabled.  */
+#define _FPU_MASK_EXCEPT_ROUND 0xffffffff1fffff00LL
+
+/* Mask exception enable but fraction rounded/inexact and FP result/CC
+   bits.  */
+#define _FPU_MASK_FRAC_INEX_RET_CC 0x1ff80fff
+
+static __always_inline void
+libc_feholdexcept_ppc (fenv_t *envp)
+{
+  fenv_union_t old, new;
+
+  old.fenv = *envp = fegetenv_register ();
+
+  new.l = old.l & _FPU_MASK_ROUNDING;
+
+  /* If the old env had any enabled exceptions, then mask SIGFPE in the
+     MSR FE0/FE1 bits.  This may allow the FPU to run faster because it
+     always takes the default action and can not generate SIGFPE.  */
+  if ((old.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_mask_env ();
+
+  fesetenv_register (new.fenv);
+}
+
+static __always_inline void
+libc_fesetround_ppc (int r)
+{
+  __fesetround (r);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_ppc (fenv_t *envp, int r)
+{
+  fenv_union_t old, new;
+
+  old.fenv = *envp = fegetenv_register ();
+
+  new.l = (old.l & _FPU_MASK_ROUNDING) | r;
+
+  if ((old.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_mask_env ();
+
+  fesetenv_register (new.fenv);
+}
+
+static __always_inline int
+libc_fetestexcept_ppc (int e)
+{
+  fenv_union_t u;
+  u.fenv = fegetenv_register ();
+  return u.l & e;
+}
+
+static __always_inline void
+libc_fesetenv_ppc (const fenv_t *envp)
+{
+  fenv_union_t old, new;
+
+  new.fenv = *envp;
+  old.fenv = fegetenv_register ();
+
+  /* If the old env has no enabled exceptions and the new env has any enabled
+     exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put the
+     hardware into "precise mode" and may cause the FPU to run slower on some
+     hardware.  */
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_nomask_env ();
+
+  /* If the old env had any enabled exceptions and the new env has no enabled
+     exceptions, then mask SIGFPE in the MSR FE0/FE1 bits.  This may allow the
+     FPU to run faster because it always takes the default action and can not
+     generate SIGFPE.  */
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+    (void) __fe_mask_env ();
+
+  fesetenv_register (*envp);
+}
+
+static __always_inline int
+libc_feupdateenv_test_ppc (fenv_t *envp, int ex)
+{
+  fenv_union_t old, new;
+
+  new.fenv = *envp;
+  old.fenv = fegetenv_register ();
+
+  /* Restore rounding mode and exception enable from *envp and merge
+     exceptions.  Leave fraction rounded/inexact and FP result/CC bits
+     unchanged.  */
+  new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
+          | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
+
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_nomask_env ();
+
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+    (void) __fe_mask_env ();
+
+  fesetenv_register (new.fenv);
+
+  return old.l & ex;
+}
+
+static __always_inline void
+libc_feupdateenv_ppc (fenv_t *e)
+{
+  libc_feupdateenv_test_ppc (e, 0);
+}
+
+static __always_inline void
+libc_feholdsetround_ppc (fenv_t *e, int r)
+{
+  fenv_union_t old, new;
+
+  old.fenv = fegetenv_register ();
+  /* Clear current precision and set newer one.  */
+  new.l = (old.l & ~0x3) | r;
+  *e = old.fenv;
+
+  if ((old.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_mask_env ();
+  fesetenv_register (new.fenv);
+}
+
+static __always_inline void
+libc_feresetround_ppc (fenv_t *envp)
+{
+  fenv_union_t old, new;
+
+  new.fenv = *envp;
+  old.fenv = fegetenv_register ();
+
+  /* Restore rounding mode and exception enable from *envp and merge
+     exceptions.  Leave fraction rounded/inexact and FP result/CC bits
+     unchanged.  */
+  new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
+          | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
+
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_nomask_env ();
+
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+    (void) __fe_mask_env ();
+
+  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
+  fesetenv_register (new.fenv);
+}
+
+#define libc_feholdexceptf           libc_feholdexcept_ppc
+#define libc_feholdexcept            libc_feholdexcept_ppc
+#define libc_feholdexcept_setroundf  libc_feholdexcept_setround_ppc
+#define libc_feholdexcept_setround   libc_feholdexcept_setround_ppc
+#define libc_fetestexceptf           libc_fetestexcept_ppc
+#define libc_fetestexcept            libc_fetestexcept_ppc
+#define libc_fesetroundf             libc_fesetround_ppc
+#define libc_fesetround              libc_fesetround_ppc
+#define libc_fesetenvf               libc_fesetenv_ppc
+#define libc_fesetenv                libc_fesetenv_ppc
+#define libc_feupdateenv_testf       libc_feupdateenv_test_ppc
+#define libc_feupdateenv_test        libc_feupdateenv_test_ppc
+#define libc_feupdateenvf            libc_feupdateenv_ppc
+#define libc_feupdateenv             libc_feupdateenv_ppc
+#define libc_feholdsetroundf         libc_feholdsetround_ppc
+#define libc_feholdsetround          libc_feholdsetround_ppc
+#define libc_feresetroundf           libc_feresetround_ppc
+#define libc_feresetround            libc_feresetround_ppc
+
+
+/* We have support for rounding mode context.  */
+#define HAVE_RM_CTX 1
+
+static __always_inline void
+libc_feholdexcept_setround_ppc_ctx (struct rm_ctx *ctx, int r)
+{
+  fenv_union_t old, new;
+
+  old.fenv = fegetenv_register ();
+
+  new.l = (old.l & _FPU_MASK_ROUNDING) | r;
+  ctx->env = old.fenv;
+  if (__glibc_unlikely (new.l != old.l))
+    {
+      if ((old.l & _FPU_MASK_ALL) != 0)
+	(void) __fe_mask_env ();
+      fesetenv_register (new.fenv);
+      ctx->updated_status = true;
+    }
+  else
+    ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_fesetenv_ppc_ctx (struct rm_ctx *ctx)
+{
+  libc_fesetenv_ppc (&ctx->env);
+}
+
+static __always_inline void
+libc_feupdateenv_ppc_ctx (struct rm_ctx *ctx)
+{
+  if (__glibc_unlikely (ctx->updated_status))
+    libc_feupdateenv_test_ppc (&ctx->env, 0);
+}
+
+static __always_inline void
+libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
+{
+  fenv_union_t old, new;
+
+  old.fenv = fegetenv_register ();
+  new.l = (old.l & ~0x3) | r;
+  ctx->env = old.fenv;
+  if (__glibc_unlikely (new.l != old.l))
+    {
+      if ((old.l & _FPU_MASK_ALL) != 0)
+	(void) __fe_mask_env ();
+      fesetenv_register (new.fenv);
+      ctx->updated_status = true;
+    }
+  else
+    ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_feresetround_ppc_ctx (struct rm_ctx *ctx)
+{
+  if (__glibc_unlikely (ctx->updated_status))
+    libc_feresetround_ppc (&ctx->env);
+}
+
+#define libc_feholdexcept_setroundf_ctx  libc_feholdexcept_setround_ppc_ctx
+#define libc_feholdexcept_setround_ctx   libc_feholdexcept_setround_ppc_ctx
+#define libc_fesetenv_ctx                libc_fesetenv_ppc_ctx
+#define libc_fesetenvf_ctx               libc_fesetenv_ppc_ctx
+#define libc_feholdsetround_ctx          libc_feholdsetround_ppc_ctx
+#define libc_feholdsetroundf_ctx         libc_feholdsetround_ppc_ctx
+#define libc_feresetround_ctx            libc_feresetround_ppc_ctx
+#define libc_feresetroundf_ctx           libc_feresetround_ppc_ctx
+#define libc_feupdateenvf_ctx            libc_feupdateenv_ppc_ctx
+#define libc_feupdateenv_ctx             libc_feupdateenv_ppc_ctx
+
+#endif
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 6c00785..c8833d6 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -22,6 +22,7 @@
 #include <sysdep.h>
 #include <ldsodefs.h>
 #include <dl-procinfo.h>
+#include <fenv_private.h>
 #include_next <math_private.h>
 
 # if __WORDSIZE == 64 || defined _ARCH_PWR4

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                               |   16 ++
 sysdeps/ieee754/ldbl-128ibm/e_expl.c    |   31 ++--
 sysdeps/ieee754/ldbl-128ibm/s_llrintl.c |    2 +-
 sysdeps/ieee754/ldbl-128ibm/s_lrintl.c  |    2 +-
 sysdeps/ieee754/ldbl-128ibm/s_rintl.c   |    2 +-
 sysdeps/powerpc/fpu/fenv_libc.h         |    6 +-
 sysdeps/powerpc/fpu/fenv_private.h      |  274 +++++++++++++++++++++++++++++++
 sysdeps/powerpc/fpu/math_private.h      |    1 +
 8 files changed, 309 insertions(+), 25 deletions(-)
 create mode 100644 sysdeps/powerpc/fpu/fenv_private.h


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]