This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 7/8] Create and use SET_RESTORE_{ENV_,}ROUND{,F,L}.


	* math/math_private.h (SET_RESTORE_ROUND, SET_RESTORE_ROUNDF,
	SET_RESTORE_ROUNDL, SET_RESTORE_ENV_ROUND, SET_RESTORE_ENV_ROUNDF,
	SET_RESTORE_ENV_ROUNDL): New.
	* sysdeps/ieee754/dbl-64/e_exp2.c (__ieee754_exp2): Use
	SET_RESTORE_ENV_ROUND.
	* sysdeps/ieee754/flt-32/e_exp2f.c (__ieee754_exp2f): Use
	SET_RESTORE_ENV_ROUNDF.
	* sysdeps/ieee754/flt-32/e_expf.c (__ieee754_expf): Likewise.
	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use SET_RESTORE_ROUND.
	* sysdeps/ieee754/dbl-64/e_exp.c (__ieee754_exp): Likewise.
	* sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Likewise.
	* sysdeps/ieee754/dbl-64/s_fmaf.c (__fmaf): Likewise.
	* sysdeps/ieee754/dbl-64/s_sin.c (__sin, __cos): Likewise.
	* sysdeps/ieee754/dbl-64/s_tan.c (tan): Likewise.

---
 math/math_private.h              |   25 ++++++++
 sysdeps/ieee754/dbl-64/e_exp.c   |    4 +-
 sysdeps/ieee754/dbl-64/e_exp2.c  |   97 +++++++++++++++---------------
 sysdeps/ieee754/dbl-64/e_pow.c   |    4 +-
 sysdeps/ieee754/dbl-64/s_fma.c   |  121 ++++++++++++++++++-------------------
 sysdeps/ieee754/dbl-64/s_fmaf.c  |   16 +++--
 sysdeps/ieee754/dbl-64/s_sin.c   |    8 +--
 sysdeps/ieee754/dbl-64/s_tan.c   |    4 +-
 sysdeps/ieee754/flt-32/e_exp2f.c |   89 ++++++++++++++--------------
 sysdeps/ieee754/flt-32/e_expf.c  |   49 ++++++++--------
 10 files changed, 214 insertions(+), 203 deletions(-)

diff --git a/math/math_private.h b/math/math_private.h
index 20b4b5a..c9cb4e8 100644
--- a/math/math_private.h
+++ b/math/math_private.h
@@ -449,6 +449,31 @@ default_libc_feupdateenv (fenv_t *e)
 # define libc_feupdateenvl default_libc_feupdateenv
 #endif
 
+/* Save and restore the rounding mode within a lexical block.  */
+
+#define SET_RESTORE_ROUND(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feupdateenv)));	\
+  libc_feholdexcept_setround (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUNDF(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feupdateenvf)));	\
+  libc_feholdexcept_setroundf (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUNDL(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_feupdateenvl)));	\
+  libc_feholdexcept_setroundl (&__libc_save_rm, (RM))
+
+/* Save and restore the rounding mode within a lexical block, and also
+   the set of exceptions raised within the block may be discarded.  */
+
+#define SET_RESTORE_ENV_ROUND(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_fesetenv)));	\
+  libc_feholdexcept_setround (&__libc_save_rm, (RM))
+#define SET_RESTORE_ENV_ROUNDF(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_fesetenvf)));	\
+  libc_feholdexcept_setroundf (&__libc_save_rm, (RM))
+#define SET_RESTORE_ENV_ROUNDL(RM) \
+  fenv_t __libc_save_rm __attribute__((cleanup(libc_fesetenvl)));	\
+  libc_feholdexcept_setroundl (&__libc_save_rm, (RM))
+
 #define __nan(str) \
   (__builtin_constant_p (str) && str[0] == '\0' ? NAN : __nan (str))
 #define __nanf(str) \
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index 8231c56..e6af6b8 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -59,10 +59,9 @@ __ieee754_exp(double x) {
   int4 k;
 #endif
   int4 i,j,m,n,ex;
-  fenv_t env;
   double retval;
 
-  libc_feholdexcept_setround (&env, FE_TONEAREST);
+  SET_RESTORE_ROUND (FE_TONEAREST);
 
   junk1.x = x;
   m = junk1.i[HIGH_HALF];
@@ -157,7 +156,6 @@ __ieee754_exp(double x) {
     else { retval = __slowexp(x); goto ret; }
   }
  ret:
-  libc_feupdateenv (&env);
   return retval;
 }
 #ifndef __ieee754_exp
diff --git a/sysdeps/ieee754/dbl-64/e_exp2.c b/sysdeps/ieee754/dbl-64/e_exp2.c
index 4cf879b..477ecf2 100644
--- a/sysdeps/ieee754/dbl-64/e_exp2.c
+++ b/sysdeps/ieee754/dbl-64/e_exp2.c
@@ -61,57 +61,56 @@ __ieee754_exp2 (double x)
       int tval, unsafe;
       double rx, x22, result;
       union ieee754_double ex2_u, scale_u;
-      fenv_t oldenv;
-
-      libc_feholdexcept_setround (&oldenv, FE_TONEAREST);
-
-      /* 1. Argument reduction.
-	 Choose integers ex, -256 <= t < 256, and some real
-	 -1/1024 <= x1 <= 1024 so that
-	 x = ex + t/512 + x1.
-
-	 First, calculate rx = ex + t/512.  */
-      rx = x + THREEp42;
-      rx -= THREEp42;
-      x -= rx;  /* Compute x=x1. */
-      /* Compute tval = (ex*512 + t)+256.
-	 Now, t = (tval mod 512)-256 and ex=tval/512  [that's mod, NOT %; and
-	 /-round-to-nearest not the usual c integer /].  */
-      tval = (int) (rx * 512.0 + 256.0);
-
-      /* 2. Adjust for accurate table entry.
-	 Find e so that
-	 x = ex + t/512 + e + x2
-	 where -1e6 < e < 1e6, and
-	 (double)(2^(t/512+e))
-	 is accurate to one part in 2^-64.  */
-
-      /* 'tval & 511' is the same as 'tval%512' except that it's always
-	 positive.
-	 Compute x = x2.  */
-      x -= exp2_deltatable[tval & 511];
-
-      /* 3. Compute ex2 = 2^(t/512+e+ex).  */
-      ex2_u.d = exp2_accuratetable[tval & 511];
-      tval >>= 9;
-      unsafe = abs(tval) >= -DBL_MIN_EXP - 1;
-      ex2_u.ieee.exponent += tval >> unsafe;
-      scale_u.d = 1.0;
-      scale_u.ieee.exponent += tval - (tval >> unsafe);
-
-      /* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial,
-	 with maximum error in [-2^-10-2^-30,2^-10+2^-30]
-	 less than 10^-19.  */
-
-      x22 = (((.0096181293647031180
-	       * x + .055504110254308625)
-	      * x + .240226506959100583)
-	     * x + .69314718055994495) * ex2_u.d;
-      math_opt_barrier (x22);
 
-      /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
-      libc_fesetenv (&oldenv);
+      {
+	SET_RESTORE_ENV_ROUND (FE_TONEAREST);
+
+	/* 1. Argument reduction.
+	   Choose integers ex, -256 <= t < 256, and some real
+	   -1/1024 <= x1 <= 1024 so that
+	   x = ex + t/512 + x1.
+
+	   First, calculate rx = ex + t/512.  */
+	rx = x + THREEp42;
+	rx -= THREEp42;
+	x -= rx;  /* Compute x=x1. */
+	/* Compute tval = (ex*512 + t)+256.
+	   Now, t = (tval mod 512)-256 and ex=tval/512  [that's mod, NOT %;
+	   and /-round-to-nearest not the usual c integer /].  */
+	tval = (int) (rx * 512.0 + 256.0);
+
+	/* 2. Adjust for accurate table entry.
+	   Find e so that
+	   x = ex + t/512 + e + x2
+	   where -1e6 < e < 1e6, and
+	   (double)(2^(t/512+e))
+	   is accurate to one part in 2^-64.  */
+
+	/* 'tval & 511' is the same as 'tval%512' except that it's always
+	   positive.
+	   Compute x = x2.  */
+	x -= exp2_deltatable[tval & 511];
+
+	/* 3. Compute ex2 = 2^(t/512+e+ex).  */
+	ex2_u.d = exp2_accuratetable[tval & 511];
+	tval >>= 9;
+	unsafe = abs(tval) >= -DBL_MIN_EXP - 1;
+	ex2_u.ieee.exponent += tval >> unsafe;
+	scale_u.d = 1.0;
+	scale_u.ieee.exponent += tval - (tval >> unsafe);
+
+	/* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial,
+	   with maximum error in [-2^-10-2^-30,2^-10+2^-30]
+	   less than 10^-19.  */
+
+	x22 = (((.0096181293647031180
+		 * x + .055504110254308625)
+		* x + .240226506959100583)
+	       * x + .69314718055994495) * ex2_u.d;
+        math_opt_barrier (x22);
+      }
 
+      /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
       result = x22 * x + ex2_u.d;
 
       if (!unsafe)
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index f668b4b..aa73a9f 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -85,10 +85,9 @@ __ieee754_pow(double x, double y) {
        (u.i[HIGH_HALF]==0 && u.i[LOW_HALF]!=0))  &&
 				      /*   2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
       (v.i[HIGH_HALF]&0x7fffffff) < 0x4ff00000) {              /* if y<-1 or y>1   */
-    fenv_t env;
     double retval;
 
-    libc_feholdexcept_setround (&env, FE_TONEAREST);
+    SET_RESTORE_ROUND (FE_TONEAREST);
 
     z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
     t = y*134217729.0;
@@ -105,7 +104,6 @@ __ieee754_pow(double x, double y) {
     t = __exp1(a1,a2,1.9e16*error);     /* return -10 or 0 if wasn't computed exactly */
     retval = (t>0)?t:power1(x,y);
 
-    libc_feupdateenv (&env);
     return retval;
   }
 
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index a27e246..54a69d4 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -147,74 +147,71 @@ __fma (double x, double y, double z)
   t2 = z - t2;
   double a2 = t1 + t2;
 
-  fenv_t env;
-  libc_feholdexcept_setround (&env, FE_TOWARDZERO);
-  /* Perform m2 + a2 addition with round to odd.  */
-  u.d = a2 + m2;
+  {
+    SET_RESTORE_ROUND (FE_TOWARDZERO);
 
-  if (__builtin_expect (adjust == 0, 1))
-    {
-      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
-      /* Result is a1 + u.d.  */
-      return a1 + u.d;
-    }
-  else if (__builtin_expect (adjust > 0, 1))
-    {
-      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
-      /* Result is a1 + u.d, scaled up.  */
-      return (a1 + u.d) * 0x1p53;
-    }
-  else
+    /* Perform m2 + a2 addition with round to odd.  */
+    u.d = a2 + m2;
+
+    if (__builtin_expect (adjust == 0, 1))
+      {
+	if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+	  u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
+	/* Result is a1 + u.d.  */
+	return a1 + u.d;
+      }
+    else if (__builtin_expect (adjust > 0, 1))
+      {
+	if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+	  u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
+	/* Result is a1 + u.d, scaled up.  */
+	return (a1 + u.d) * 0x1p53;
+      }
+
+    if ((u.ieee.mantissa1 & 1) == 0)
+      u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
+    v.d = a1 + u.d;
+  }
+
+  /* Ensure the following computations are performed in default rounding
+     mode instead of just reusing the round to zero computation.  */
+  math_opt_barrier (u);
+
+  int j = libc_fetestexcept (FE_INEXACT) != 0;
+  /* If a1 + u.d is exact, the only rounding happens during scaling down.  */
+  if (j == 0)
+    return v.d * 0x1p-106;
+  /* If result rounded to zero is not subnormal, no double
+     rounding will occur.  */
+  if (v.ieee.exponent > 106)
+    return (a1 + u.d) * 0x1p-106;
+  /* If v.d * 0x1p-106 with round to zero is a subnormal above
+     or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
+     down just by 1 bit, which means v.ieee.mantissa1 |= j would
+     change the round bit, not sticky or guard bit.
+     v.d * 0x1p-106 never normalizes by shifting up,
+     so round bit plus sticky bit should be already enough
+     for proper rounding.  */
+  if (v.ieee.exponent == 106)
     {
-      if ((u.ieee.mantissa1 & 1) == 0)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      v.d = a1 + u.d;
-      int j = libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
-      /* Ensure the following computations are performed in default rounding
-	 mode instead of just reusing the round to zero computation.  */
-      asm volatile ("" : "=m" (u) : "m" (u));
-      /* If a1 + u.d is exact, the only rounding happens during
-	 scaling down.  */
-      if (j == 0)
-	return v.d * 0x1p-106;
-      /* If result rounded to zero is not subnormal, no double
-	 rounding will occur.  */
-      if (v.ieee.exponent > 106)
-	return (a1 + u.d) * 0x1p-106;
-      /* If v.d * 0x1p-106 with round to zero is a subnormal above
-	 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
-	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
-	 change the round bit, not sticky or guard bit.
-	 v.d * 0x1p-106 never normalizes by shifting up,
-	 so round bit plus sticky bit should be already enough
-	 for proper rounding.  */
-      if (v.ieee.exponent == 106)
+      /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
+         v.ieee.mantissa1 & 1 is the round bit and j is our sticky
+         bit.  In round-to-nearest 001 rounds down like 00,
+         011 rounds up, even though 01 rounds down (thus we need
+         to adjust), 101 rounds down like 10 and 111 rounds up like 11.  */
+      if ((v.ieee.mantissa1 & 3) == 1)
 	{
-	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
-	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
-	     bit.  In round-to-nearest 001 rounds down like 00,
-	     011 rounds up, even though 01 rounds down (thus we need
-	     to adjust), 101 rounds down like 10 and 111 rounds up
-	     like 11.  */
-	  if ((v.ieee.mantissa1 & 3) == 1)
-	    {
-	      v.d *= 0x1p-106;
-	      if (v.ieee.negative)
-		return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
-	      else
-		return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
-	    }
+	  v.d *= 0x1p-106;
+	  if (v.ieee.negative)
+	    return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
 	  else
-	    return v.d * 0x1p-106;
+	    return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
 	}
-      v.ieee.mantissa1 |= j;
-      return v.d * 0x1p-106;
+      else
+	return v.d * 0x1p-106;
     }
+  v.ieee.mantissa1 |= j;
+  return v.d * 0x1p-106;
 }
 #ifndef __fma
 weak_alias (__fma, fma)
diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/dbl-64/s_fmaf.c
index 00cd382..30e4e33 100644
--- a/sysdeps/ieee754/dbl-64/s_fmaf.c
+++ b/sysdeps/ieee754/dbl-64/s_fmaf.c
@@ -31,16 +31,18 @@
 float
 __fmaf (float x, float y, float z)
 {
-  fenv_t env;
   /* Multiplication is always exact.  */
   double temp = (double) x * (double) y;
   union ieee754_double u;
-  libc_feholdexcept_setroundf (&env, FE_TOWARDZERO);
-  /* Perform addition with round to odd.  */
-  u.d = temp + (double) z;
-  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-    u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-  libc_feupdateenv (&env);
+
+  {
+    SET_RESTORE_ROUND (FE_TOWARDZERO);
+    /* Perform addition with round to odd.  */
+    u.d = temp + (double) z;
+    if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+      u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
+  }
+
   /* And finally truncation with round to nearest.  */
   return (float) u.d;
 }
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 32ba66d..1f02a54 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -108,10 +108,9 @@ __sin(double x){
 #if 0
 	int4 nn;
 #endif
-	fenv_t env;
 	double retval = 0;
 
-	libc_feholdexcept_setround (&env, FE_TONEAREST);
+	SET_RESTORE_ROUND (FE_TONEAREST);
 
 	u.x = x;
 	m = u.i[HIGH_HALF];
@@ -365,7 +364,6 @@ __sin(double x){
 	}
 
  ret:
-	libc_feupdateenv (&env);
 	return retval;
 }
 
@@ -383,10 +381,9 @@ __cos(double x)
   mynumber u,v;
   int4 k,m,n;
 
-  fenv_t env;
   double retval = 0;
 
-  libc_feholdexcept_setround (&env, FE_TONEAREST);
+  SET_RESTORE_ROUND (FE_TONEAREST);
 
   u.x = x;
   m = u.i[HIGH_HALF];
@@ -635,7 +632,6 @@ __cos(double x)
   }
 
  ret:
-  libc_feupdateenv (&env);
   return retval;
 }
 
diff --git a/sysdeps/ieee754/dbl-64/s_tan.c b/sysdeps/ieee754/dbl-64/s_tan.c
index 2c26756..c155603 100644
--- a/sysdeps/ieee754/dbl-64/s_tan.c
+++ b/sysdeps/ieee754/dbl-64/s_tan.c
@@ -68,13 +68,12 @@ tan(double x) {
   mp_no mpy;
 #endif
 
-  fenv_t env;
   double retval;
 
   int __branred(double, double *, double *);
   int __mpranred(double, mp_no *, int);
 
-  libc_feholdexcept_setround (&env, FE_TONEAREST);
+  SET_RESTORE_ROUND (FE_TONEAREST);
 
   /* x=+-INF, x=NaN */
   num.d = x;  ux = num.i[HIGH_HALF];
@@ -503,7 +502,6 @@ tan(double x) {
   goto ret;
 
  ret:
-  libc_feupdateenv (&env);
   return retval;
 }
 
diff --git a/sysdeps/ieee754/flt-32/e_exp2f.c b/sysdeps/ieee754/flt-32/e_exp2f.c
index e728e6e..4ea4811 100644
--- a/sysdeps/ieee754/flt-32/e_exp2f.c
+++ b/sysdeps/ieee754/flt-32/e_exp2f.c
@@ -54,53 +54,52 @@ __ieee754_exp2f (float x)
       int tval, unsafe;
       float rx, x22, result;
       union ieee754_float ex2_u, scale_u;
-      fenv_t oldenv;
-
-      libc_feholdexcept_setroundf (&oldenv, FE_TONEAREST);
-
-      /* 1. Argument reduction.
-	 Choose integers ex, -128 <= t < 128, and some real
-	 -1/512 <= x1 <= 1/512 so that
-	 x = ex + t/512 + x1.
-
-	 First, calculate rx = ex + t/256.  */
-      rx = x + THREEp14;
-      rx -= THREEp14;
-      x -= rx;  /* Compute x=x1. */
-      /* Compute tval = (ex*256 + t)+128.
-	 Now, t = (tval mod 256)-128 and ex=tval/256  [that's mod, NOT %; and
-	 /-round-to-nearest not the usual c integer /].  */
-      tval = (int) (rx * 256.0f + 128.0f);
-
-      /* 2. Adjust for accurate table entry.
-	 Find e so that
-	 x = ex + t/256 + e + x2
-	 where -7e-4 < e < 7e-4, and
-	 (float)(2^(t/256+e))
-	 is accurate to one part in 2^-64.  */
-
-      /* 'tval & 255' is the same as 'tval%256' except that it's always
-	 positive.
-	 Compute x = x2.  */
-      x -= __exp2f_deltatable[tval & 255];
-
-      /* 3. Compute ex2 = 2^(t/255+e+ex).  */
-      ex2_u.f = __exp2f_atable[tval & 255];
-      tval >>= 8;
-      unsafe = abs(tval) >= -FLT_MIN_EXP - 1;
-      ex2_u.ieee.exponent += tval >> unsafe;
-      scale_u.f = 1.0;
-      scale_u.ieee.exponent += tval - (tval >> unsafe);
-
-      /* 4. Approximate 2^x2 - 1, using a second-degree polynomial,
-	 with maximum error in [-2^-9 - 2^-14, 2^-9 + 2^-14]
-	 less than 1.3e-10.  */
-
-      x22 = (.24022656679f * x + .69314736128f) * ex2_u.f;
 
-      /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
-      libc_fesetenv (&oldenv);
+      {
+	SET_RESTORE_ENV_ROUNDF (FE_TONEAREST);
+
+	/* 1. Argument reduction.
+	   Choose integers ex, -128 <= t < 128, and some real
+	   -1/512 <= x1 <= 1/512 so that
+	   x = ex + t/512 + x1.
+
+	   First, calculate rx = ex + t/256.  */
+	rx = x + THREEp14;
+	rx -= THREEp14;
+	x -= rx;  /* Compute x=x1. */
+	/* Compute tval = (ex*256 + t)+128.
+	   Now, t = (tval mod 256)-128 and ex=tval/256  [that's mod, NOT %;
+	   and /-round-to-nearest not the usual c integer /].  */
+	tval = (int) (rx * 256.0f + 128.0f);
+
+	/* 2. Adjust for accurate table entry.
+	   Find e so that
+	   x = ex + t/256 + e + x2
+	   where -7e-4 < e < 7e-4, and
+	   (float)(2^(t/256+e))
+	   is accurate to one part in 2^-64.  */
+
+	/* 'tval & 255' is the same as 'tval%256' except that it's always
+	   positive.
+	   Compute x = x2.  */
+	x -= __exp2f_deltatable[tval & 255];
+
+	/* 3. Compute ex2 = 2^(t/255+e+ex).  */
+	ex2_u.f = __exp2f_atable[tval & 255];
+	tval >>= 8;
+	unsafe = abs(tval) >= -FLT_MIN_EXP - 1;
+	ex2_u.ieee.exponent += tval >> unsafe;
+	scale_u.f = 1.0;
+	scale_u.ieee.exponent += tval - (tval >> unsafe);
+
+	/* 4. Approximate 2^x2 - 1, using a second-degree polynomial,
+	   with maximum error in [-2^-9 - 2^-14, 2^-9 + 2^-14]
+	   less than 1.3e-10.  */
+
+	x22 = (.24022656679f * x + .69314736128f) * ex2_u.f;
+      }
 
+      /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
       result = x22 * x + ex2_u.f;
 
       if (!unsafe)
diff --git a/sysdeps/ieee754/flt-32/e_expf.c b/sysdeps/ieee754/flt-32/e_expf.c
index e69e7f6..af5bf9e 100644
--- a/sysdeps/ieee754/flt-32/e_expf.c
+++ b/sysdeps/ieee754/flt-32/e_expf.c
@@ -80,40 +80,39 @@ __ieee754_expf (float x)
       double x22, t, result, dx;
       float n, delta;
       union ieee754_double ex2_u;
-      fenv_t oldenv;
 
-      libc_feholdexcept_setroundf (&oldenv, FE_TONEAREST);
+      {
+	SET_RESTORE_ENV_ROUNDF (FE_TONEAREST);
 
-      /* Calculate n.  */
-      n = x * M_1_LN2 + THREEp22;
-      n -= THREEp22;
-      dx = x - n*M_LN2;
+	/* Calculate n.  */
+	n = x * M_1_LN2 + THREEp22;
+	n -= THREEp22;
+	dx = x - n*M_LN2;
 
-      /* Calculate t/512.  */
-      t = dx + THREEp42;
-      t -= THREEp42;
-      dx -= t;
+	/* Calculate t/512.  */
+	t = dx + THREEp42;
+	t -= THREEp42;
+	dx -= t;
 
-      /* Compute tval = t.  */
-      tval = (int) (t * 512.0);
+	/* Compute tval = t.  */
+	tval = (int) (t * 512.0);
 
-      if (t >= 0)
-	delta = - __exp_deltatable[tval];
-      else
-	delta = __exp_deltatable[-tval];
+	if (t >= 0)
+	  delta = - __exp_deltatable[tval];
+	else
+	  delta = __exp_deltatable[-tval];
 
-      /* Compute ex2 = 2^n e^(t/512+delta[t]).  */
-      ex2_u.d = __exp_atable[tval+177];
-      ex2_u.ieee.exponent += (int) n;
+	/* Compute ex2 = 2^n e^(t/512+delta[t]).  */
+	ex2_u.d = __exp_atable[tval+177];
+	ex2_u.ieee.exponent += (int) n;
 
-      /* Approximate e^(dx+delta) - 1, using a second-degree polynomial,
-	 with maximum error in [-2^-10-2^-28,2^-10+2^-28]
-	 less than 5e-11.  */
-      x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta;
+	/* Approximate e^(dx+delta) - 1, using a second-degree polynomial,
+	   with maximum error in [-2^-10-2^-28,2^-10+2^-28]
+	   less than 5e-11.  */
+	x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta;
+      }
 
       /* Return result.  */
-      libc_fesetenvf (&oldenv);
-
       result = x22 * ex2_u.d + ex2_u.d;
       return (float) result;
     }
-- 
1.7.7.6


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]