From d38f1dba009689d78af371cffa091b27e4ebe17d Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 18 Oct 2011 09:00:46 -0400 Subject: [PATCH] Start optimizing the use of the fenv interfaces in libm itself --- ChangeLog | 15 +++++++ math/math_private.h | 22 ++++++++++ sysdeps/ieee754/dbl-64/e_exp2.c | 19 +++------ .../ieee754/dbl-64/wordsize-64/s_nearbyint.c | 28 +++++-------- sysdeps/x86_64/fpu/math_private.h | 42 +++++++++++++++++++ 5 files changed, 95 insertions(+), 31 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1ca1cca3c4..c391f612ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2011-10-18 Ulrich Drepper + + * math/math_private.h: Define defaults for libc_fegetround, + libc_fegetroundf, libc_fegetroundl, libc_fesetround, libc_fesetroundf, + libc_fesetroundl, libc_feholdexcept, libc_feholdexceptf, + libc_feholdexceptl, libc_fesetenv, libc_fesetenvf, libc_fesetenvl. + * sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c: Use + libc_feholdexcept, libc_fesetround, libc_fesetenv instead of the + standard functions. + * sysdeps/ieee754/dbl-64/e_exp2.c: Likewise. + Remove comments and hacks for old compiler versions. + * sysdeps/x86_64/fpu/math_private.h: Define special versions of + libc_fegetround, libc_fesetround, libc_feholdexcept, and + libc_feholdexceptl. + 2011-10-18 Andreas Schwab * sysdeps/x86_64/fpu/bits/fenv.h: Add C linkage markers. diff --git a/math/math_private.h b/math/math_private.h index c5fbf15f65..a1ce0142b1 100644 --- a/math/math_private.h +++ b/math/math_private.h @@ -358,4 +358,26 @@ extern void __docos (double __x, double __dx, double __v[]); #define math_force_eval(x) __asm __volatile ("" : : "m" (x)) #endif + +/* The standards only specify one variant of the fenv.h interfaces. + But at least for some architectures we can be more efficient if we + know what operations are going to be performed. Therefore we + define additional interfaces. By default they refer to the normal + interfaces. */ +#define libc_fegetround() fegetround () +#define libc_fegetroundf() fegetround () +#define libc_fegetroundl() fegetround () + +#define libc_fesetround(r) (void) fesetround (r) +#define libc_fesetroundf(r) (void) fesetround (r) +#define libc_fesetroundl(r) (void) fesetround (r) + +#define libc_feholdexcept(e) (void) feholdexcept (e) +#define libc_feholdexceptf(e) (void) feholdexcept (e) +#define libc_feholdexceptl(e) (void) feholdexcept (e) + +#define libc_fesetenv(e) (void) fesetenv (e) +#define libc_fesetenvf(e) (void) fesetenv (e) +#define libc_fesetenvl(e) (void) fesetenv (e) + #endif /* _MATH_PRIVATE_H_ */ diff --git a/sysdeps/ieee754/dbl-64/e_exp2.c b/sysdeps/ieee754/dbl-64/e_exp2.c index c973f35673..734e476ce5 100644 --- a/sysdeps/ieee754/dbl-64/e_exp2.c +++ b/sysdeps/ieee754/dbl-64/e_exp2.c @@ -25,9 +25,6 @@ 17 (1), March 1991, pp. 26-45. It has been slightly modified to compute 2^x instead of e^x. */ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif #include #include #include @@ -38,13 +35,8 @@ #include "t_exp2.h" -/* XXX I know the assembler generates a warning about incorrect section - attributes. But without the attribute here the compiler places the - constants in the .data section. Ideally the constant is placed in - .rodata.cst8 so that it can be merged, but gcc sucks, it ICEs when - we try to force this section on it. --drepper */ -static const volatile double TWO1023 = 8.988465674311579539e+307; -static const volatile double TWOM1000 = 9.3326361850321887899e-302; +static const double TWO1023 = 8.988465674311579539e+307; +static const double TWOM1000 = 9.3326361850321887899e-302; double __ieee754_exp2 (double x) @@ -72,10 +64,10 @@ __ieee754_exp2 (double x) union ieee754_double ex2_u, scale_u; fenv_t oldenv; - feholdexcept (&oldenv); + libc_feholdexcept (&oldenv); #ifdef FE_TONEAREST /* If we don't have this, it's too bad. */ - fesetround (FE_TONEAREST); + libc_fesetround (FE_TONEAREST); #endif /* 1. Argument reduction. @@ -120,9 +112,10 @@ __ieee754_exp2 (double x) * x + .055504110254308625) * x + .240226506959100583) * x + .69314718055994495) * ex2_u.d; + math_opt_barrier (x22); /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */ - fesetenv (&oldenv); + libc_fesetenv (&oldenv); result = x22 * x + ex2_u.d; diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c index cb49019ddb..861da20b10 100644 --- a/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c +++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c @@ -24,22 +24,14 @@ #include "math.h" #include "math_private.h" -#ifdef __STDC__ static const double -#else -static double -#endif TWO52[2]={ 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */ -4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */ }; -#ifdef __STDC__ - double __nearbyint(double x) -#else - double __nearbyint(x) - double x; -#endif +double +__nearbyint(double x) { fenv_t env; int64_t i0,sx; @@ -47,20 +39,19 @@ TWO52[2]={ EXTRACT_WORDS64(i0,x); sx = (i0>>63)&1; j0 = ((i0>>52)&0x7ff)-0x3ff; - if(j0<52) { + if(__builtin_expect(j0<52, 1)) { if(j0<0) { if((i0&UINT64_C(0x7fffffffffffffff))==0) return x; uint64_t i = i0 & UINT64_C(0xfffffffffffff); i0 &= UINT64_C(0xfffe000000000000); i0 |= (((i|-i) >> 12) & UINT64_C(0x8000000000000)); INSERT_WORDS64(x,i0); - feholdexcept (&env); + libc_feholdexcept (&env); double w = TWO52[sx]+x; double t = w-TWO52[sx]; - fesetenv (&env); - EXTRACT_WORDS64(i0,t); - INSERT_WORDS64(t,(i0&UINT64_C(0x7fffffffffffffff))|(sx<<63)); - return t; + math_opt_barrier(t); + libc_fesetenv (&env); + return copysign(t, x); } else { uint64_t i = UINT64_C(0x000fffffffffffff)>>j0; if((i0&i)==0) return x; /* x is integral */ @@ -73,10 +64,11 @@ TWO52[2]={ else return x; /* x is integral */ } INSERT_WORDS64(x,i0); - feholdexcept (&env); + libc_feholdexcept (&env); double w = TWO52[sx]+x; double t = w-TWO52[sx]; - fesetenv (&env); + math_opt_barrier (t); + libc_fesetenv (&env); return t; } weak_alias (__nearbyint, nearbyint) diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h index 6cff8b3161..4886c64dc3 100644 --- a/sysdeps/x86_64/fpu/math_private.h +++ b/sysdeps/x86_64/fpu/math_private.h @@ -118,3 +118,45 @@ do { \ __res; }) # endif #endif + + +/* Specialized variants of the interfaces which only handle + either the FPU or the SSE unit. */ +#undef libc_fegetround +#define libc_fegetround() \ + ({ \ + unsigned int mxcsr; \ + asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \ + (mxcsr & 0x6000) >> 3; \ + }) +// #define libc_fegetroundf() fegetround () +// #define libc_fegetroundl() fegetround () + +#undef libc_fesetround +#define libc_fesetround(r) \ + do { \ + unsigned int mxcsr; \ + asm ("stmxcsr %0" : "=m" (*&mxcsr)); \ + mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \ + asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \ + } while (0) +// #define libc_fesetroundf(r) (void) fesetround (r) +// #define libc_fesetroundl(r) (void) fesetround (r) + +#undef libc_feholdexcept +#define libc_feholdexcept(e) \ + do { \ + unsigned int mxcsr; \ + asm ("stmxcsr %0" : "=m" (*&mxcsr)); \ + (e)->__mxcsr = mxcsr; \ + mxcsr = (mxcsr | 0x1f80) & ~0x3f; \ + asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \ + } while (0) +// #define libc_feholdexceptf(e) (void) feholdexcept (e) +// #define libc_feholdexceptl(e) (void) feholdexcept (e) + +#undef libc_fesetenv +#define libc_fesetenv(e) \ + asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr)) +// #define libc_fesetenvf(e) (void) fesetenv (e) +// #define libc_fesetenvl(e) (void) fesetenv (e) -- 2.43.5