From eb92c487b35e26aa1e08815c4480d0bc5cc9f370 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 10 Mar 2012 08:55:53 -0800 Subject: [PATCH] Create and use SET_RESTORE_ROUND{,_NOEX,_53BIT}{,F,L}. --- ChangeLog | 24 ++++++++ sysdeps/generic/math_private.h | 69 ++++++++++++++++++++++ sysdeps/ieee754/dbl-64/e_exp.c | 4 +- sysdeps/ieee754/dbl-64/e_exp2.c | 97 +++++++++++++++---------------- sysdeps/ieee754/dbl-64/e_pow.c | 4 +- sysdeps/ieee754/dbl-64/s_sin.c | 8 +-- sysdeps/ieee754/dbl-64/s_tan.c | 4 +- sysdeps/ieee754/flt-32/e_exp2f.c | 89 ++++++++++++++-------------- sysdeps/ieee754/flt-32/e_expf.c | 49 ++++++++-------- sysdeps/x86_64/fpu/math_private.h | 23 ++++++++ 10 files changed, 237 insertions(+), 134 deletions(-) diff --git a/ChangeLog b/ChangeLog index aace9ef3db..277a201fa6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,29 @@ 2012-03-19 Richard Henderson + * sysdeps/generic/math_private.h (libc_feholdsetround): New. + (libc_feholdsetroundf, libc_feholdsetroundl): New. + (libc_feresetround, libc_feresetroundf, libc_feresetroundl): New. + (libc_feresetround_noex): New. + (libc_feresetround_noexf): New. + (libc_feresetround_noexl): New. + (SET_RESTORE_ROUND, SET_RESTORE_ROUNDF, SET_RESTORE_ROUNDL): New. + (SET_RESTORE_ROUND_NOEX, SET_RESTORE_ROUND_NOEXF): New. + (SET_RESTORE_ROUND_NOEXL, SET_RESTORE_ROUND_53BIT): New. + * sysdeps/ieee754/dbl-64/e_exp.c (__ieee754_exp): Use + SET_RESTORE_ROUND. + * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Likewise. + * sysdeps/ieee754/dbl-64/s_sin.c (__sin): Use SET_RESTORE_ROUND_53BIT. + (__cos): Likewise. + * sysdeps/ieee754/dbl-64/s_tan.c (__tan): Likewise. + * sysdeps/ieee754/dbl-64/e_exp2.c (__ieee754_exp2): Use + SET_RESTORE_ROUND_NOEX. + * sysdeps/ieee754/dbl-64/e_exp2f.c (__ieee754_exp2f): Use + SET_RESTORE_ROUND_NOEXF. + * sysdeps/ieee754/flt-32/e_expf.c (__ieee754_expf): Likewise. + * sysdeps/x86_64/fpu/math_private.h (libc_feholdsetround): New. + (libc_feholdsetroundf): New. + (libc_feresetround, libc_feresetroundf): New. + * sysdeps/i386/fpu/math_private.h: Include , . (libc_feholdexcept_setround_53bit): Convert from macro to function. (libc_feupdateenv_53bit): Likewise. Don't force _FPU_EXTENDED. diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h index ab4b47bfe0..0b945f9afc 100644 --- a/sysdeps/generic/math_private.h +++ b/sysdeps/generic/math_private.h @@ -457,6 +457,75 @@ default_libc_feupdateenv (fenv_t *e) # define libc_feupdateenv_53bit libc_feupdateenv #endif +/* Save and set the rounding mode. The use of fenv_t to store the old mode + allows a target-specific version of this function to avoid converting the + rounding mode from the fpu format. By default we have no choice but to + manipulate the entire env. */ + +#ifndef libc_feholdsetround +# define libc_feholdsetround libc_feholdexcept_setround +#endif +#ifndef libc_feholdsetroundf +# define libc_feholdsetroundf libc_feholdexcept_setroundf +#endif +#ifndef libc_feholdsetroundl +# define libc_feholdsetroundl libc_feholdexcept_setroundl +#endif + +/* ... and the reverse. */ + +#ifndef libc_feresetround +# define libc_feresetround libc_feupdateenv +#endif +#ifndef libc_feresetroundf +# define libc_feresetroundf libc_feupdateenvf +#endif +#ifndef libc_feresetroundl +# define libc_feresetroundl libc_feupdateenvl +#endif + +/* ... and a version that may also discard exceptions. */ + +#ifndef libc_feresetround_noex +# define libc_feresetround_noex libc_fesetenv +#endif +#ifndef libc_feresetround_noexf +# define libc_feresetround_noexf libc_fesetenvf +#endif +#ifndef libc_feresetround_noexl +# define libc_feresetround_noexl libc_fesetenvl +#endif + +/* Save and restore the rounding mode within a lexical block. */ + +#define SET_RESTORE_ROUND(RM) \ + fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround))); \ + libc_feholdsetround (&__libc_save_rm, (RM)) +#define SET_RESTORE_ROUNDF(RM) \ + fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetroundf))); \ + libc_feholdsetroundf (&__libc_save_rm, (RM)) +#define SET_RESTORE_ROUNDL(RM) \ + fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetroundl))); \ + libc_feholdsetroundl (&__libc_save_rm, (RM)) + +/* Save and restore the rounding mode within a lexical block, and also + the set of exceptions raised within the block may be discarded. */ + +#define SET_RESTORE_ROUND_NOEX(RM) \ + fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noex))); \ + libc_feholdsetround (&__libc_save_rm, (RM)) +#define SET_RESTORE_ROUND_NOEXF(RM) \ + fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noexf))); \ + libc_feholdsetroundf (&__libc_save_rm, (RM)) +#define SET_RESTORE_ROUND_NOEXL(RM) \ + fenv_t __libc_save_rm __attribute__((cleanup(libc_feresetround_noexl))); \ + libc_feholdsetroundl (&__libc_save_rm, (RM)) + +/* Like SET_RESTORE_ROUND, but also set rounding precision to 53 bits. */ +#define SET_RESTORE_ROUND_53BIT(RM) \ + fenv_t __libc_save_rm __attribute__((cleanup(libc_feupdateenv_53bit))); \ + libc_feholdexcept_setround_53bit (&__libc_save_rm, (RM)) + #define __nan(str) \ (__builtin_constant_p (str) && str[0] == '\0' ? NAN : __nan (str)) #define __nanf(str) \ diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c index cb8d9e8d9d..5deba5e445 100644 --- a/sysdeps/ieee754/dbl-64/e_exp.c +++ b/sysdeps/ieee754/dbl-64/e_exp.c @@ -59,10 +59,9 @@ __ieee754_exp(double x) { int4 k; #endif int4 i,j,m,n,ex; - fenv_t env; double retval; - libc_feholdexcept_setround (&env, FE_TONEAREST); + SET_RESTORE_ROUND (FE_TONEAREST); junk1.x = x; m = junk1.i[HIGH_HALF]; @@ -157,7 +156,6 @@ __ieee754_exp(double x) { else { retval = __slowexp(x); goto ret; } } ret: - libc_feupdateenv (&env); return retval; } #ifndef __ieee754_exp diff --git a/sysdeps/ieee754/dbl-64/e_exp2.c b/sysdeps/ieee754/dbl-64/e_exp2.c index 4cf879b7f9..e57ec92116 100644 --- a/sysdeps/ieee754/dbl-64/e_exp2.c +++ b/sysdeps/ieee754/dbl-64/e_exp2.c @@ -61,57 +61,56 @@ __ieee754_exp2 (double x) int tval, unsafe; double rx, x22, result; union ieee754_double ex2_u, scale_u; - fenv_t oldenv; - - libc_feholdexcept_setround (&oldenv, FE_TONEAREST); - - /* 1. Argument reduction. - Choose integers ex, -256 <= t < 256, and some real - -1/1024 <= x1 <= 1024 so that - x = ex + t/512 + x1. - - First, calculate rx = ex + t/512. */ - rx = x + THREEp42; - rx -= THREEp42; - x -= rx; /* Compute x=x1. */ - /* Compute tval = (ex*512 + t)+256. - Now, t = (tval mod 512)-256 and ex=tval/512 [that's mod, NOT %; and - /-round-to-nearest not the usual c integer /]. */ - tval = (int) (rx * 512.0 + 256.0); - - /* 2. Adjust for accurate table entry. - Find e so that - x = ex + t/512 + e + x2 - where -1e6 < e < 1e6, and - (double)(2^(t/512+e)) - is accurate to one part in 2^-64. */ - - /* 'tval & 511' is the same as 'tval%512' except that it's always - positive. - Compute x = x2. */ - x -= exp2_deltatable[tval & 511]; - - /* 3. Compute ex2 = 2^(t/512+e+ex). */ - ex2_u.d = exp2_accuratetable[tval & 511]; - tval >>= 9; - unsafe = abs(tval) >= -DBL_MIN_EXP - 1; - ex2_u.ieee.exponent += tval >> unsafe; - scale_u.d = 1.0; - scale_u.ieee.exponent += tval - (tval >> unsafe); - - /* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial, - with maximum error in [-2^-10-2^-30,2^-10+2^-30] - less than 10^-19. */ - - x22 = (((.0096181293647031180 - * x + .055504110254308625) - * x + .240226506959100583) - * x + .69314718055994495) * ex2_u.d; - math_opt_barrier (x22); - /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */ - libc_fesetenv (&oldenv); + { + SET_RESTORE_ROUND_NOEX (FE_TONEAREST); + + /* 1. Argument reduction. + Choose integers ex, -256 <= t < 256, and some real + -1/1024 <= x1 <= 1024 so that + x = ex + t/512 + x1. + + First, calculate rx = ex + t/512. */ + rx = x + THREEp42; + rx -= THREEp42; + x -= rx; /* Compute x=x1. */ + /* Compute tval = (ex*512 + t)+256. + Now, t = (tval mod 512)-256 and ex=tval/512 [that's mod, NOT %; + and /-round-to-nearest not the usual c integer /]. */ + tval = (int) (rx * 512.0 + 256.0); + + /* 2. Adjust for accurate table entry. + Find e so that + x = ex + t/512 + e + x2 + where -1e6 < e < 1e6, and + (double)(2^(t/512+e)) + is accurate to one part in 2^-64. */ + + /* 'tval & 511' is the same as 'tval%512' except that it's always + positive. + Compute x = x2. */ + x -= exp2_deltatable[tval & 511]; + + /* 3. Compute ex2 = 2^(t/512+e+ex). */ + ex2_u.d = exp2_accuratetable[tval & 511]; + tval >>= 9; + unsafe = abs(tval) >= -DBL_MIN_EXP - 1; + ex2_u.ieee.exponent += tval >> unsafe; + scale_u.d = 1.0; + scale_u.ieee.exponent += tval - (tval >> unsafe); + + /* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial, + with maximum error in [-2^-10-2^-30,2^-10+2^-30] + less than 10^-19. */ + + x22 = (((.0096181293647031180 + * x + .055504110254308625) + * x + .240226506959100583) + * x + .69314718055994495) * ex2_u.d; + math_opt_barrier (x22); + } + /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */ result = x22 * x + ex2_u.d; if (!unsafe) diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c index 550633cf9b..f936a72de7 100644 --- a/sysdeps/ieee754/dbl-64/e_pow.c +++ b/sysdeps/ieee754/dbl-64/e_pow.c @@ -85,10 +85,9 @@ __ieee754_pow(double x, double y) { (u.i[HIGH_HALF]==0 && u.i[LOW_HALF]!=0)) && /* 2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */ (v.i[HIGH_HALF]&0x7fffffff) < 0x4ff00000) { /* if y<-1 or y>1 */ - fenv_t env; double retval; - libc_feholdexcept_setround (&env, FE_TONEAREST); + SET_RESTORE_ROUND (FE_TONEAREST); z = log1(x,&aa,&error); /* x^y =e^(y log (X)) */ t = y*134217729.0; @@ -105,7 +104,6 @@ __ieee754_pow(double x, double y) { t = __exp1(a1,a2,1.9e16*error); /* return -10 or 0 if wasn't computed exactly */ retval = (t>0)?t:power1(x,y); - libc_feupdateenv (&env); return retval; } diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c index 4b4b67573d..7b9252f81e 100644 --- a/sysdeps/ieee754/dbl-64/s_sin.c +++ b/sysdeps/ieee754/dbl-64/s_sin.c @@ -108,10 +108,9 @@ __sin(double x){ #if 0 int4 nn; #endif - fenv_t env; double retval = 0; - libc_feholdexcept_setround_53bit (&env, FE_TONEAREST); + SET_RESTORE_ROUND_53BIT (FE_TONEAREST); u.x = x; m = u.i[HIGH_HALF]; @@ -365,7 +364,6 @@ __sin(double x){ } ret: - libc_feupdateenv_53bit (&env); return retval; } @@ -383,10 +381,9 @@ __cos(double x) mynumber u,v; int4 k,m,n; - fenv_t env; double retval = 0; - libc_feholdexcept_setround_53bit (&env, FE_TONEAREST); + SET_RESTORE_ROUND_53BIT (FE_TONEAREST); u.x = x; m = u.i[HIGH_HALF]; @@ -635,7 +632,6 @@ __cos(double x) } ret: - libc_feupdateenv_53bit (&env); return retval; } diff --git a/sysdeps/ieee754/dbl-64/s_tan.c b/sysdeps/ieee754/dbl-64/s_tan.c index 8eee383933..f8507eaa4c 100644 --- a/sysdeps/ieee754/dbl-64/s_tan.c +++ b/sysdeps/ieee754/dbl-64/s_tan.c @@ -68,13 +68,12 @@ tan(double x) { mp_no mpy; #endif - fenv_t env; double retval; int __branred(double, double *, double *); int __mpranred(double, mp_no *, int); - libc_feholdexcept_setround_53bit (&env, FE_TONEAREST); + SET_RESTORE_ROUND_53BIT (FE_TONEAREST); /* x=+-INF, x=NaN */ num.d = x; ux = num.i[HIGH_HALF]; @@ -503,7 +502,6 @@ tan(double x) { goto ret; ret: - libc_feupdateenv_53bit (&env); return retval; } diff --git a/sysdeps/ieee754/flt-32/e_exp2f.c b/sysdeps/ieee754/flt-32/e_exp2f.c index e728e6ec74..267d81b23f 100644 --- a/sysdeps/ieee754/flt-32/e_exp2f.c +++ b/sysdeps/ieee754/flt-32/e_exp2f.c @@ -54,53 +54,52 @@ __ieee754_exp2f (float x) int tval, unsafe; float rx, x22, result; union ieee754_float ex2_u, scale_u; - fenv_t oldenv; - - libc_feholdexcept_setroundf (&oldenv, FE_TONEAREST); - - /* 1. Argument reduction. - Choose integers ex, -128 <= t < 128, and some real - -1/512 <= x1 <= 1/512 so that - x = ex + t/512 + x1. - - First, calculate rx = ex + t/256. */ - rx = x + THREEp14; - rx -= THREEp14; - x -= rx; /* Compute x=x1. */ - /* Compute tval = (ex*256 + t)+128. - Now, t = (tval mod 256)-128 and ex=tval/256 [that's mod, NOT %; and - /-round-to-nearest not the usual c integer /]. */ - tval = (int) (rx * 256.0f + 128.0f); - - /* 2. Adjust for accurate table entry. - Find e so that - x = ex + t/256 + e + x2 - where -7e-4 < e < 7e-4, and - (float)(2^(t/256+e)) - is accurate to one part in 2^-64. */ - - /* 'tval & 255' is the same as 'tval%256' except that it's always - positive. - Compute x = x2. */ - x -= __exp2f_deltatable[tval & 255]; - - /* 3. Compute ex2 = 2^(t/255+e+ex). */ - ex2_u.f = __exp2f_atable[tval & 255]; - tval >>= 8; - unsafe = abs(tval) >= -FLT_MIN_EXP - 1; - ex2_u.ieee.exponent += tval >> unsafe; - scale_u.f = 1.0; - scale_u.ieee.exponent += tval - (tval >> unsafe); - - /* 4. Approximate 2^x2 - 1, using a second-degree polynomial, - with maximum error in [-2^-9 - 2^-14, 2^-9 + 2^-14] - less than 1.3e-10. */ - - x22 = (.24022656679f * x + .69314736128f) * ex2_u.f; - /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */ - libc_fesetenv (&oldenv); + { + SET_RESTORE_ROUND_NOEXF (FE_TONEAREST); + + /* 1. Argument reduction. + Choose integers ex, -128 <= t < 128, and some real + -1/512 <= x1 <= 1/512 so that + x = ex + t/512 + x1. + + First, calculate rx = ex + t/256. */ + rx = x + THREEp14; + rx -= THREEp14; + x -= rx; /* Compute x=x1. */ + /* Compute tval = (ex*256 + t)+128. + Now, t = (tval mod 256)-128 and ex=tval/256 [that's mod, NOT %; + and /-round-to-nearest not the usual c integer /]. */ + tval = (int) (rx * 256.0f + 128.0f); + + /* 2. Adjust for accurate table entry. + Find e so that + x = ex + t/256 + e + x2 + where -7e-4 < e < 7e-4, and + (float)(2^(t/256+e)) + is accurate to one part in 2^-64. */ + + /* 'tval & 255' is the same as 'tval%256' except that it's always + positive. + Compute x = x2. */ + x -= __exp2f_deltatable[tval & 255]; + + /* 3. Compute ex2 = 2^(t/255+e+ex). */ + ex2_u.f = __exp2f_atable[tval & 255]; + tval >>= 8; + unsafe = abs(tval) >= -FLT_MIN_EXP - 1; + ex2_u.ieee.exponent += tval >> unsafe; + scale_u.f = 1.0; + scale_u.ieee.exponent += tval - (tval >> unsafe); + + /* 4. Approximate 2^x2 - 1, using a second-degree polynomial, + with maximum error in [-2^-9 - 2^-14, 2^-9 + 2^-14] + less than 1.3e-10. */ + + x22 = (.24022656679f * x + .69314736128f) * ex2_u.f; + } + /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */ result = x22 * x + ex2_u.f; if (!unsafe) diff --git a/sysdeps/ieee754/flt-32/e_expf.c b/sysdeps/ieee754/flt-32/e_expf.c index e69e7f6ae0..57aff16ab7 100644 --- a/sysdeps/ieee754/flt-32/e_expf.c +++ b/sysdeps/ieee754/flt-32/e_expf.c @@ -80,40 +80,39 @@ __ieee754_expf (float x) double x22, t, result, dx; float n, delta; union ieee754_double ex2_u; - fenv_t oldenv; - libc_feholdexcept_setroundf (&oldenv, FE_TONEAREST); + { + SET_RESTORE_ROUND_NOEXF (FE_TONEAREST); - /* Calculate n. */ - n = x * M_1_LN2 + THREEp22; - n -= THREEp22; - dx = x - n*M_LN2; + /* Calculate n. */ + n = x * M_1_LN2 + THREEp22; + n -= THREEp22; + dx = x - n*M_LN2; - /* Calculate t/512. */ - t = dx + THREEp42; - t -= THREEp42; - dx -= t; + /* Calculate t/512. */ + t = dx + THREEp42; + t -= THREEp42; + dx -= t; - /* Compute tval = t. */ - tval = (int) (t * 512.0); + /* Compute tval = t. */ + tval = (int) (t * 512.0); - if (t >= 0) - delta = - __exp_deltatable[tval]; - else - delta = __exp_deltatable[-tval]; + if (t >= 0) + delta = - __exp_deltatable[tval]; + else + delta = __exp_deltatable[-tval]; - /* Compute ex2 = 2^n e^(t/512+delta[t]). */ - ex2_u.d = __exp_atable[tval+177]; - ex2_u.ieee.exponent += (int) n; + /* Compute ex2 = 2^n e^(t/512+delta[t]). */ + ex2_u.d = __exp_atable[tval+177]; + ex2_u.ieee.exponent += (int) n; - /* Approximate e^(dx+delta) - 1, using a second-degree polynomial, - with maximum error in [-2^-10-2^-28,2^-10+2^-28] - less than 5e-11. */ - x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta; + /* Approximate e^(dx+delta) - 1, using a second-degree polynomial, + with maximum error in [-2^-10-2^-28,2^-10+2^-28] + less than 5e-11. */ + x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta; + } /* Return result. */ - libc_fesetenvf (&oldenv); - result = x22 * ex2_u.d + ex2_u.d; return (float) result; } diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h index 8b1fe70d2c..3289afc58b 100644 --- a/sysdeps/x86_64/fpu/math_private.h +++ b/sysdeps/x86_64/fpu/math_private.h @@ -119,6 +119,29 @@ libc_feupdateenv (fenv_t *e) #define libc_feupdateenv libc_feupdateenv #define libc_feupdateenvf libc_feupdateenv +static __always_inline void +libc_feholdsetround (fenv_t *e, int r) +{ + unsigned int mxcsr; + asm (STMXCSR " %0" : "=m" (*&mxcsr)); + e->__mxcsr = mxcsr; + mxcsr = (mxcsr & ~0x6000) | (r << 3); + asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); +} +#define libc_feholdsetround libc_feholdsetround +#define libc_feholdsetroundf libc_feholdsetround + +static __always_inline void +libc_feresetround (fenv_t *e) +{ + unsigned int mxcsr; + asm (STMXCSR " %0" : "=m" (*&mxcsr)); + mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000); + asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); +} +#define libc_feresetround libc_feresetround +#define libc_feresetroundf libc_feresetround + #include_next extern __always_inline double -- 2.43.5