This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 3/4] Create and use SET_RESTORE_ROUND{,_NOEX}{,F,L}.
- From: Richard Henderson <rth at twiddle dot net>
- To: libc-alpha at sourceware dot org
- Date: Sat, 10 Mar 2012 09:25:58 -0800
- Subject: [PATCH 3/4] Create and use SET_RESTORE_ROUND{,_NOEX}{,F,L}.
- References: <1331400359-25803-1-git-send-email-rth@twiddle.net>
---
sysdeps/generic/math_private.h | 64 ++++++++++++++++++++++++
sysdeps/ieee754/dbl-64/e_exp.c | 4 +-
sysdeps/ieee754/dbl-64/e_exp2.c | 97 ++++++++++++++++++-------------------
sysdeps/ieee754/dbl-64/e_pow.c | 4 +-
sysdeps/ieee754/dbl-64/s_sin.c | 8 +--
sysdeps/ieee754/dbl-64/s_tan.c | 4 +-
sysdeps/ieee754/flt-32/e_exp2f.c | 89 +++++++++++++++++-----------------
sysdeps/ieee754/flt-32/e_expf.c | 49 +++++++++----------
sysdeps/x86_64/fpu/math_private.h | 23 +++++++++
9 files changed, 208 insertions(+), 134 deletions(-)
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index 66bd778..5b07986 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -449,6 +449,70 @@ default_libc_feupdateenv (fenv_t *e)
# define libc_feupdateenvl default_libc_feupdateenv
#endif
+/* Save and set the rounding mode. The use of fenv_t to store the old mode
+ allows a target-specific version of this function to avoid converting the
+ rounding mode from the fpu format. By default we have no choice but to
+ manipulate the entire env. */
+
+#ifndef libc_feholdsetround
+# define libc_feholdsetround libc_feholdexcept_setround
+#endif
+#ifndef libc_feholdsetroundf
+# define libc_feholdsetroundf libc_feholdexcept_setroundf
+#endif
+#ifndef libc_feholdsetroundl
+# define libc_feholdsetroundl libc_feholdexcept_setroundl
+#endif
+
+/* ... and the reverse. */
+
+#ifndef libc_feresetround
+# define libc_feresetround libc_feupdateenv
+#endif
+#ifndef libc_feresetroundf
+# define libc_feresetroundf libc_feupdateenvf
+#endif
+#ifndef libc_feresetroundl
+# define libc_feresetroundl libc_feupdateenvl
+#endif
+
+/* ... and a version that may also discard exceptions. */
+
+#ifndef libc_feresetround_noex
+# define libc_feresetround_noex libc_fesetenv
+#endif
+#ifndef libc_feresetround_noexf
+# define libc_feresetround_noexf libc_fesetenvf
+#endif
+#ifndef libc_feresetround_noexl
+# define libc_feresetround_noexl libc_fesetenvl
+#endif
+
+/* Save and restore the rounding mode within a lexical block. */
+
+#define SET_RESTORE_ROUND(RM) \
+ fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround))); \
+ libc_feholdsetround (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUNDF(RM) \
+ fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetroundf))); \
+ libc_feholdsetroundf (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUNDL(RM) \
+ fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetroundl))); \
+ libc_feholdsetroundl (&__libc_save_rm, (RM))
+
+/* Save and restore the rounding mode within a lexical block, and also
+ the set of exceptions raised within the block may be discarded. */
+
+#define SET_RESTORE_ROUND_NOEX(RM) \
+ fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noex))); \
+ libc_feholdsetround (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUND_NOEXF(RM) \
+ fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noexf))); \
+ libc_feholdsetroundf (&__libc_save_rm, (RM))
+#define SET_RESTORE_ROUND_NOEXL(RM) \
+ fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noexl))); \
+ libc_feholdsetroundl (&__libc_save_rm, (RM))
+
#define __nan(str) \
(__builtin_constant_p (str) && str[0] == '\0' ? NAN : __nan (str))
#define __nanf(str) \
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index cb8d9e8..5deba5e 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -59,10 +59,9 @@ __ieee754_exp(double x) {
int4 k;
#endif
int4 i,j,m,n,ex;
- fenv_t env;
double retval;
- libc_feholdexcept_setround (&env, FE_TONEAREST);
+ SET_RESTORE_ROUND (FE_TONEAREST);
junk1.x = x;
m = junk1.i[HIGH_HALF];
@@ -157,7 +156,6 @@ __ieee754_exp(double x) {
else { retval = __slowexp(x); goto ret; }
}
ret:
- libc_feupdateenv (&env);
return retval;
}
#ifndef __ieee754_exp
diff --git a/sysdeps/ieee754/dbl-64/e_exp2.c b/sysdeps/ieee754/dbl-64/e_exp2.c
index 4cf879b..e57ec92 100644
--- a/sysdeps/ieee754/dbl-64/e_exp2.c
+++ b/sysdeps/ieee754/dbl-64/e_exp2.c
@@ -61,57 +61,56 @@ __ieee754_exp2 (double x)
int tval, unsafe;
double rx, x22, result;
union ieee754_double ex2_u, scale_u;
- fenv_t oldenv;
-
- libc_feholdexcept_setround (&oldenv, FE_TONEAREST);
-
- /* 1. Argument reduction.
- Choose integers ex, -256 <= t < 256, and some real
- -1/1024 <= x1 <= 1024 so that
- x = ex + t/512 + x1.
-
- First, calculate rx = ex + t/512. */
- rx = x + THREEp42;
- rx -= THREEp42;
- x -= rx; /* Compute x=x1. */
- /* Compute tval = (ex*512 + t)+256.
- Now, t = (tval mod 512)-256 and ex=tval/512 [that's mod, NOT %; and
- /-round-to-nearest not the usual c integer /]. */
- tval = (int) (rx * 512.0 + 256.0);
-
- /* 2. Adjust for accurate table entry.
- Find e so that
- x = ex + t/512 + e + x2
- where -1e6 < e < 1e6, and
- (double)(2^(t/512+e))
- is accurate to one part in 2^-64. */
-
- /* 'tval & 511' is the same as 'tval%512' except that it's always
- positive.
- Compute x = x2. */
- x -= exp2_deltatable[tval & 511];
-
- /* 3. Compute ex2 = 2^(t/512+e+ex). */
- ex2_u.d = exp2_accuratetable[tval & 511];
- tval >>= 9;
- unsafe = abs(tval) >= -DBL_MIN_EXP - 1;
- ex2_u.ieee.exponent += tval >> unsafe;
- scale_u.d = 1.0;
- scale_u.ieee.exponent += tval - (tval >> unsafe);
-
- /* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial,
- with maximum error in [-2^-10-2^-30,2^-10+2^-30]
- less than 10^-19. */
-
- x22 = (((.0096181293647031180
- * x + .055504110254308625)
- * x + .240226506959100583)
- * x + .69314718055994495) * ex2_u.d;
- math_opt_barrier (x22);
- /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */
- libc_fesetenv (&oldenv);
+ {
+ SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
+
+ /* 1. Argument reduction.
+ Choose integers ex, -256 <= t < 256, and some real
+ -1/1024 <= x1 <= 1024 so that
+ x = ex + t/512 + x1.
+
+ First, calculate rx = ex + t/512. */
+ rx = x + THREEp42;
+ rx -= THREEp42;
+ x -= rx; /* Compute x=x1. */
+ /* Compute tval = (ex*512 + t)+256.
+ Now, t = (tval mod 512)-256 and ex=tval/512 [that's mod, NOT %;
+ and /-round-to-nearest not the usual c integer /]. */
+ tval = (int) (rx * 512.0 + 256.0);
+
+ /* 2. Adjust for accurate table entry.
+ Find e so that
+ x = ex + t/512 + e + x2
+ where -1e6 < e < 1e6, and
+ (double)(2^(t/512+e))
+ is accurate to one part in 2^-64. */
+
+ /* 'tval & 511' is the same as 'tval%512' except that it's always
+ positive.
+ Compute x = x2. */
+ x -= exp2_deltatable[tval & 511];
+
+ /* 3. Compute ex2 = 2^(t/512+e+ex). */
+ ex2_u.d = exp2_accuratetable[tval & 511];
+ tval >>= 9;
+ unsafe = abs(tval) >= -DBL_MIN_EXP - 1;
+ ex2_u.ieee.exponent += tval >> unsafe;
+ scale_u.d = 1.0;
+ scale_u.ieee.exponent += tval - (tval >> unsafe);
+
+ /* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial,
+ with maximum error in [-2^-10-2^-30,2^-10+2^-30]
+ less than 10^-19. */
+
+ x22 = (((.0096181293647031180
+ * x + .055504110254308625)
+ * x + .240226506959100583)
+ * x + .69314718055994495) * ex2_u.d;
+ math_opt_barrier (x22);
+ }
+ /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */
result = x22 * x + ex2_u.d;
if (!unsafe)
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index 550633c..f936a72 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -85,10 +85,9 @@ __ieee754_pow(double x, double y) {
(u.i[HIGH_HALF]==0 && u.i[LOW_HALF]!=0)) &&
/* 2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
(v.i[HIGH_HALF]&0x7fffffff) < 0x4ff00000) { /* if y<-1 or y>1 */
- fenv_t env;
double retval;
- libc_feholdexcept_setround (&env, FE_TONEAREST);
+ SET_RESTORE_ROUND (FE_TONEAREST);
z = log1(x,&aa,&error); /* x^y =e^(y log (X)) */
t = y*134217729.0;
@@ -105,7 +104,6 @@ __ieee754_pow(double x, double y) {
t = __exp1(a1,a2,1.9e16*error); /* return -10 or 0 if wasn't computed exactly */
retval = (t>0)?t:power1(x,y);
- libc_feupdateenv (&env);
return retval;
}
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index e3e3a2a..7289800 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -108,10 +108,9 @@ __sin(double x){
#if 0
int4 nn;
#endif
- fenv_t env;
double retval = 0;
- libc_feholdexcept_setround (&env, FE_TONEAREST);
+ SET_RESTORE_ROUND (FE_TONEAREST);
u.x = x;
m = u.i[HIGH_HALF];
@@ -365,7 +364,6 @@ __sin(double x){
}
ret:
- libc_feupdateenv (&env);
return retval;
}
@@ -383,10 +381,9 @@ __cos(double x)
mynumber u,v;
int4 k,m,n;
- fenv_t env;
double retval = 0;
- libc_feholdexcept_setround (&env, FE_TONEAREST);
+ SET_RESTORE_ROUND (FE_TONEAREST);
u.x = x;
m = u.i[HIGH_HALF];
@@ -635,7 +632,6 @@ __cos(double x)
}
ret:
- libc_feupdateenv (&env);
return retval;
}
diff --git a/sysdeps/ieee754/dbl-64/s_tan.c b/sysdeps/ieee754/dbl-64/s_tan.c
index acff67c..b163f1d 100644
--- a/sysdeps/ieee754/dbl-64/s_tan.c
+++ b/sysdeps/ieee754/dbl-64/s_tan.c
@@ -68,13 +68,12 @@ tan(double x) {
mp_no mpy;
#endif
- fenv_t env;
double retval;
int __branred(double, double *, double *);
int __mpranred(double, mp_no *, int);
- libc_feholdexcept_setround (&env, FE_TONEAREST);
+ SET_RESTORE_ROUND (FE_TONEAREST);
/* x=+-INF, x=NaN */
num.d = x; ux = num.i[HIGH_HALF];
@@ -503,7 +502,6 @@ tan(double x) {
goto ret;
ret:
- libc_feupdateenv (&env);
return retval;
}
diff --git a/sysdeps/ieee754/flt-32/e_exp2f.c b/sysdeps/ieee754/flt-32/e_exp2f.c
index e728e6e..267d81b 100644
--- a/sysdeps/ieee754/flt-32/e_exp2f.c
+++ b/sysdeps/ieee754/flt-32/e_exp2f.c
@@ -54,53 +54,52 @@ __ieee754_exp2f (float x)
int tval, unsafe;
float rx, x22, result;
union ieee754_float ex2_u, scale_u;
- fenv_t oldenv;
-
- libc_feholdexcept_setroundf (&oldenv, FE_TONEAREST);
-
- /* 1. Argument reduction.
- Choose integers ex, -128 <= t < 128, and some real
- -1/512 <= x1 <= 1/512 so that
- x = ex + t/512 + x1.
-
- First, calculate rx = ex + t/256. */
- rx = x + THREEp14;
- rx -= THREEp14;
- x -= rx; /* Compute x=x1. */
- /* Compute tval = (ex*256 + t)+128.
- Now, t = (tval mod 256)-128 and ex=tval/256 [that's mod, NOT %; and
- /-round-to-nearest not the usual c integer /]. */
- tval = (int) (rx * 256.0f + 128.0f);
-
- /* 2. Adjust for accurate table entry.
- Find e so that
- x = ex + t/256 + e + x2
- where -7e-4 < e < 7e-4, and
- (float)(2^(t/256+e))
- is accurate to one part in 2^-64. */
-
- /* 'tval & 255' is the same as 'tval%256' except that it's always
- positive.
- Compute x = x2. */
- x -= __exp2f_deltatable[tval & 255];
-
- /* 3. Compute ex2 = 2^(t/255+e+ex). */
- ex2_u.f = __exp2f_atable[tval & 255];
- tval >>= 8;
- unsafe = abs(tval) >= -FLT_MIN_EXP - 1;
- ex2_u.ieee.exponent += tval >> unsafe;
- scale_u.f = 1.0;
- scale_u.ieee.exponent += tval - (tval >> unsafe);
-
- /* 4. Approximate 2^x2 - 1, using a second-degree polynomial,
- with maximum error in [-2^-9 - 2^-14, 2^-9 + 2^-14]
- less than 1.3e-10. */
-
- x22 = (.24022656679f * x + .69314736128f) * ex2_u.f;
- /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */
- libc_fesetenv (&oldenv);
+ {
+ SET_RESTORE_ROUND_NOEXF (FE_TONEAREST);
+
+ /* 1. Argument reduction.
+ Choose integers ex, -128 <= t < 128, and some real
+ -1/512 <= x1 <= 1/512 so that
+ x = ex + t/512 + x1.
+
+ First, calculate rx = ex + t/256. */
+ rx = x + THREEp14;
+ rx -= THREEp14;
+ x -= rx; /* Compute x=x1. */
+ /* Compute tval = (ex*256 + t)+128.
+ Now, t = (tval mod 256)-128 and ex=tval/256 [that's mod, NOT %;
+ and /-round-to-nearest not the usual c integer /]. */
+ tval = (int) (rx * 256.0f + 128.0f);
+
+ /* 2. Adjust for accurate table entry.
+ Find e so that
+ x = ex + t/256 + e + x2
+ where -7e-4 < e < 7e-4, and
+ (float)(2^(t/256+e))
+ is accurate to one part in 2^-64. */
+
+ /* 'tval & 255' is the same as 'tval%256' except that it's always
+ positive.
+ Compute x = x2. */
+ x -= __exp2f_deltatable[tval & 255];
+
+ /* 3. Compute ex2 = 2^(t/255+e+ex). */
+ ex2_u.f = __exp2f_atable[tval & 255];
+ tval >>= 8;
+ unsafe = abs(tval) >= -FLT_MIN_EXP - 1;
+ ex2_u.ieee.exponent += tval >> unsafe;
+ scale_u.f = 1.0;
+ scale_u.ieee.exponent += tval - (tval >> unsafe);
+
+ /* 4. Approximate 2^x2 - 1, using a second-degree polynomial,
+ with maximum error in [-2^-9 - 2^-14, 2^-9 + 2^-14]
+ less than 1.3e-10. */
+
+ x22 = (.24022656679f * x + .69314736128f) * ex2_u.f;
+ }
+ /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */
result = x22 * x + ex2_u.f;
if (!unsafe)
diff --git a/sysdeps/ieee754/flt-32/e_expf.c b/sysdeps/ieee754/flt-32/e_expf.c
index e69e7f6..57aff16 100644
--- a/sysdeps/ieee754/flt-32/e_expf.c
+++ b/sysdeps/ieee754/flt-32/e_expf.c
@@ -80,40 +80,39 @@ __ieee754_expf (float x)
double x22, t, result, dx;
float n, delta;
union ieee754_double ex2_u;
- fenv_t oldenv;
- libc_feholdexcept_setroundf (&oldenv, FE_TONEAREST);
+ {
+ SET_RESTORE_ROUND_NOEXF (FE_TONEAREST);
- /* Calculate n. */
- n = x * M_1_LN2 + THREEp22;
- n -= THREEp22;
- dx = x - n*M_LN2;
+ /* Calculate n. */
+ n = x * M_1_LN2 + THREEp22;
+ n -= THREEp22;
+ dx = x - n*M_LN2;
- /* Calculate t/512. */
- t = dx + THREEp42;
- t -= THREEp42;
- dx -= t;
+ /* Calculate t/512. */
+ t = dx + THREEp42;
+ t -= THREEp42;
+ dx -= t;
- /* Compute tval = t. */
- tval = (int) (t * 512.0);
+ /* Compute tval = t. */
+ tval = (int) (t * 512.0);
- if (t >= 0)
- delta = - __exp_deltatable[tval];
- else
- delta = __exp_deltatable[-tval];
+ if (t >= 0)
+ delta = - __exp_deltatable[tval];
+ else
+ delta = __exp_deltatable[-tval];
- /* Compute ex2 = 2^n e^(t/512+delta[t]). */
- ex2_u.d = __exp_atable[tval+177];
- ex2_u.ieee.exponent += (int) n;
+ /* Compute ex2 = 2^n e^(t/512+delta[t]). */
+ ex2_u.d = __exp_atable[tval+177];
+ ex2_u.ieee.exponent += (int) n;
- /* Approximate e^(dx+delta) - 1, using a second-degree polynomial,
- with maximum error in [-2^-10-2^-28,2^-10+2^-28]
- less than 5e-11. */
- x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta;
+ /* Approximate e^(dx+delta) - 1, using a second-degree polynomial,
+ with maximum error in [-2^-10-2^-28,2^-10+2^-28]
+ less than 5e-11. */
+ x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta;
+ }
/* Return result. */
- libc_fesetenvf (&oldenv);
-
result = x22 * ex2_u.d + ex2_u.d;
return (float) result;
}
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 8b1fe70..3289afc 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -119,6 +119,29 @@ libc_feupdateenv (fenv_t *e)
#define libc_feupdateenv libc_feupdateenv
#define libc_feupdateenvf libc_feupdateenv
+static __always_inline void
+libc_feholdsetround (fenv_t *e, int r)
+{
+ unsigned int mxcsr;
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ e->__mxcsr = mxcsr;
+ mxcsr = (mxcsr & ~0x6000) | (r << 3);
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+#define libc_feholdsetround libc_feholdsetround
+#define libc_feholdsetroundf libc_feholdsetround
+
+static __always_inline void
+libc_feresetround (fenv_t *e)
+{
+ unsigned int mxcsr;
+ asm (STMXCSR " %0" : "=m" (*&mxcsr));
+ mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+}
+#define libc_feresetround libc_feresetround
+#define libc_feresetroundf libc_feresetround
+
#include_next <math_private.h>
extern __always_inline double
--
1.7.7.6