From d38f1dba009689d78af371cffa091b27e4ebe17d Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@gmail.com>
Date: Tue, 18 Oct 2011 09:00:46 -0400
Subject: [PATCH] Start optimizing the use of the fenv interfaces in libm
 itself

---
 ChangeLog                                     | 15 +++++++
 math/math_private.h                           | 22 ++++++++++
 sysdeps/ieee754/dbl-64/e_exp2.c               | 19 +++------
 .../ieee754/dbl-64/wordsize-64/s_nearbyint.c  | 28 +++++--------
 sysdeps/x86_64/fpu/math_private.h             | 42 +++++++++++++++++++
 5 files changed, 95 insertions(+), 31 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 1ca1cca3c4..c391f612ea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2011-10-18  Ulrich Drepper  <drepper@gmail.com>
+
+	* math/math_private.h: Define defaults for libc_fegetround,
+	libc_fegetroundf, libc_fegetroundl, libc_fesetround, libc_fesetroundf,
+	libc_fesetroundl, libc_feholdexcept, libc_feholdexceptf,
+	libc_feholdexceptl, libc_fesetenv, libc_fesetenvf, libc_fesetenvl.
+	* sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c: Use
+	libc_feholdexcept, libc_fesetround, libc_fesetenv instead of the
+	standard functions.
+	* sysdeps/ieee754/dbl-64/e_exp2.c: Likewise.
+	Remove comments and hacks for old compiler versions.
+	* sysdeps/x86_64/fpu/math_private.h: Define special versions of
+	libc_fegetround, libc_fesetround, libc_feholdexcept, and
+	libc_feholdexceptl.
+
 2011-10-18  Andreas Schwab  <schwab@redhat.com>
 
 	* sysdeps/x86_64/fpu/bits/fenv.h: Add C linkage markers.
diff --git a/math/math_private.h b/math/math_private.h
index c5fbf15f65..a1ce0142b1 100644
--- a/math/math_private.h
+++ b/math/math_private.h
@@ -358,4 +358,26 @@ extern void __docos (double __x, double __dx, double __v[]);
 #define math_force_eval(x) __asm __volatile ("" : : "m" (x))
 #endif
 
+
+/* The standards only specify one variant of the fenv.h interfaces.
+   But at least for some architectures we can be more efficient if we
+   know what operations are going to be performed.  Therefore we
+   define additional interfaces.  By default they refer to the normal
+   interfaces.  */
+#define libc_fegetround() fegetround ()
+#define libc_fegetroundf() fegetround ()
+#define libc_fegetroundl() fegetround ()
+
+#define libc_fesetround(r) (void) fesetround (r)
+#define libc_fesetroundf(r) (void) fesetround (r)
+#define libc_fesetroundl(r) (void) fesetround (r)
+
+#define libc_feholdexcept(e) (void) feholdexcept (e)
+#define libc_feholdexceptf(e) (void) feholdexcept (e)
+#define libc_feholdexceptl(e) (void) feholdexcept (e)
+
+#define libc_fesetenv(e) (void) fesetenv (e)
+#define libc_fesetenvf(e) (void) fesetenv (e)
+#define libc_fesetenvl(e) (void) fesetenv (e)
+
 #endif /* _MATH_PRIVATE_H_ */
diff --git a/sysdeps/ieee754/dbl-64/e_exp2.c b/sysdeps/ieee754/dbl-64/e_exp2.c
index c973f35673..734e476ce5 100644
--- a/sysdeps/ieee754/dbl-64/e_exp2.c
+++ b/sysdeps/ieee754/dbl-64/e_exp2.c
@@ -25,9 +25,6 @@
    17 (1), March 1991, pp. 26-45.
    It has been slightly modified to compute 2^x instead of e^x.
    */
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
 #include <stdlib.h>
 #include <float.h>
 #include <ieee754.h>
@@ -38,13 +35,8 @@
 
 #include "t_exp2.h"
 
-/* XXX I know the assembler generates a warning about incorrect section
-   attributes. But without the attribute here the compiler places the
-   constants in the .data section.  Ideally the constant is placed in
-   .rodata.cst8 so that it can be merged, but gcc sucks, it ICEs when
-   we try to force this section on it.  --drepper  */
-static const volatile double TWO1023 = 8.988465674311579539e+307;
-static const volatile double TWOM1000 = 9.3326361850321887899e-302;
+static const double TWO1023 = 8.988465674311579539e+307;
+static const double TWOM1000 = 9.3326361850321887899e-302;
 
 double
 __ieee754_exp2 (double x)
@@ -72,10 +64,10 @@ __ieee754_exp2 (double x)
       union ieee754_double ex2_u, scale_u;
       fenv_t oldenv;
 
-      feholdexcept (&oldenv);
+      libc_feholdexcept (&oldenv);
 #ifdef FE_TONEAREST
       /* If we don't have this, it's too bad.  */
-      fesetround (FE_TONEAREST);
+      libc_fesetround (FE_TONEAREST);
 #endif
 
       /* 1. Argument reduction.
@@ -120,9 +112,10 @@ __ieee754_exp2 (double x)
 	       * x + .055504110254308625)
 	      * x + .240226506959100583)
 	     * x + .69314718055994495) * ex2_u.d;
+      math_opt_barrier (x22);
 
       /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
-      fesetenv (&oldenv);
+      libc_fesetenv (&oldenv);
 
       result = x22 * x + ex2_u.d;
 
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c
index cb49019ddb..861da20b10 100644
--- a/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c
@@ -24,22 +24,14 @@
 #include "math.h"
 #include "math_private.h"
 
-#ifdef __STDC__
 static const double
-#else
-static double
-#endif
 TWO52[2]={
   4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
  -4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
 };
 
-#ifdef __STDC__
-	double __nearbyint(double x)
-#else
-	double __nearbyint(x)
-	double x;
-#endif
+double
+__nearbyint(double x)
 {
 	fenv_t env;
 	int64_t i0,sx;
@@ -47,20 +39,19 @@ TWO52[2]={
 	EXTRACT_WORDS64(i0,x);
 	sx = (i0>>63)&1;
 	j0 = ((i0>>52)&0x7ff)-0x3ff;
-	if(j0<52) {
+	if(__builtin_expect(j0<52, 1)) {
 	    if(j0<0) {
 	      if((i0&UINT64_C(0x7fffffffffffffff))==0) return x;
 		uint64_t i = i0 & UINT64_C(0xfffffffffffff);
 		i0 &= UINT64_C(0xfffe000000000000);
 		i0 |= (((i|-i) >> 12) & UINT64_C(0x8000000000000));
 		INSERT_WORDS64(x,i0);
-		feholdexcept (&env);
+		libc_feholdexcept (&env);
 		double w = TWO52[sx]+x;
 		double t =  w-TWO52[sx];
-		fesetenv (&env);
-		EXTRACT_WORDS64(i0,t);
-		INSERT_WORDS64(t,(i0&UINT64_C(0x7fffffffffffffff))|(sx<<63));
-		return t;
+		math_opt_barrier(t);
+		libc_fesetenv (&env);
+		return copysign(t, x);
 	    } else {
 		uint64_t i = UINT64_C(0x000fffffffffffff)>>j0;
 		if((i0&i)==0) return x; /* x is integral */
@@ -73,10 +64,11 @@ TWO52[2]={
 	    else return x;		/* x is integral */
 	}
 	INSERT_WORDS64(x,i0);
-	feholdexcept (&env);
+	libc_feholdexcept (&env);
 	double w = TWO52[sx]+x;
 	double t = w-TWO52[sx];
-	fesetenv (&env);
+	math_opt_barrier (t);
+	libc_fesetenv (&env);
 	return t;
 }
 weak_alias (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 6cff8b3161..4886c64dc3 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -118,3 +118,45 @@ do {								\
      __res; })
 # endif
 #endif
+
+
+/* Specialized variants of the <fenv.h> interfaces which only handle
+   either the FPU or the SSE unit.  */
+#undef libc_fegetround
+#define libc_fegetround() \
+  ({									      \
+     unsigned int mxcsr;						      \
+     asm volatile ("stmxcsr %0" : "=m" (*&mxcsr));			      \
+     (mxcsr & 0x6000) >> 3;						      \
+  })
+// #define libc_fegetroundf() fegetround ()
+// #define libc_fegetroundl() fegetround ()
+
+#undef libc_fesetround
+#define libc_fesetround(r) \
+  do {									      \
+     unsigned int mxcsr;						      \
+     asm ("stmxcsr %0" : "=m" (*&mxcsr));				      \
+     mxcsr = (mxcsr & ~0x6000) | ((r) << 3);				      \
+     asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr));			      \
+  } while (0)
+// #define libc_fesetroundf(r) (void) fesetround (r)
+// #define libc_fesetroundl(r) (void) fesetround (r)
+
+#undef libc_feholdexcept
+#define libc_feholdexcept(e) \
+  do {			     \
+     unsigned int mxcsr;						      \
+     asm ("stmxcsr %0" : "=m" (*&mxcsr));				      \
+     (e)->__mxcsr = mxcsr;						      \
+     mxcsr = (mxcsr | 0x1f80) & ~0x3f;					      \
+     asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr));			      \
+  } while (0)
+// #define libc_feholdexceptf(e) (void) feholdexcept (e)
+// #define libc_feholdexceptl(e) (void) feholdexcept (e)
+
+#undef libc_fesetenv
+#define libc_fesetenv(e) \
+  asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr))
+// #define libc_fesetenvf(e) (void) fesetenv (e)
+// #define libc_fesetenvl(e) (void) fesetenv (e)
-- 
2.43.5