]> sourceware.org Git - glibc.git/commitdiff
powerpc: Use faster means to access FPSCR when possible in some cases
authorPaul A. Clarke <pc@us.ibm.com>
Thu, 20 Jun 2019 16:57:18 +0000 (11:57 -0500)
committerTulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Sun, 30 Jun 2019 11:40:44 +0000 (08:40 -0300)
Using 'mffs' instruction to read the Floating Point Status Control Register
(FPSCR) can force a processor flush in some cases, with undesirable
performance impact.  If the values of the bits in the FPSCR which force the
flush are not needed, an instruction that is new to POWER9 (ISA version 3.0),
'mffsl' can be used instead.

Cases included:  get_rounding_mode, fegetround, fegetmode, fegetexcept.

* sysdeps/powerpc/bits/fenvinline.h (__fegetround): Use
__fegetround_ISA300() or __fegetround_ISA2() as appropriate.
(__fegetround_ISA300) New.
(__fegetround_ISA2) New.
* sysdeps/powerpc/fpu_control.h (IS_ISA300): New.
(_FPU_MFFS): Move implementation...
(_FPU_GETCW): Here.
(_FPU_MFFSL): Move implementation....
(_FPU_GET_RC_ISA300): Here. New.
(_FPU_GET_RC): Use _FPU_GET_RC_ISA300() or _FPU_GETCW() as appropriate.
* sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_status_ISA300): New.
(fegetenv_status): New.
* sysdeps/powerpc/fpu/fegetmode.c (fegetmode): Use fegetenv_status()
instead of fegetenv_register().
* sysdeps/powerpc/fpu/fegetexcept.c (__fegetexcept): Likewise.

Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
ChangeLog
sysdeps/powerpc/bits/fenvinline.h
sysdeps/powerpc/fpu/fegetexcept.c
sysdeps/powerpc/fpu/fegetmode.c
sysdeps/powerpc/fpu/fenv_libc.h
sysdeps/powerpc/fpu_control.h

index aece032385bdb6af8a8a46aec45e68e3f126137c..abab02f1540be04dc5cc0ae574d435e6dbc098ab 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2019-06-30  Paul A. Clarke  <pc@us.ibm.com>
+
+       * sysdeps/powerpc/bits/fenvinline.h (__fegetround): Use
+       __fegetround_ISA300() or __fegetround_ISA2() as appropriate.
+       (__fegetround_ISA300) New.
+       (__fegetround_ISA2) New.
+       * sysdeps/powerpc/fpu_control.h (IS_ISA300): New.
+       (_FPU_MFFS): Move implementation...
+       (_FPU_GETCW): Here.
+       (_FPU_MFFSL): Move implementation....
+       (_FPU_GET_RC_ISA300): Here. New.
+       (_FPU_GET_RC): Use _FPU_GET_RC_ISA300() or _FPU_GETCW() as appropriate.
+       * sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_status_ISA300): New.
+       (fegetenv_status): New.
+       * sysdeps/powerpc/fpu/fegetmode.c (fegetmode): Use fegetenv_status()
+       instead of fegetenv_register().
+       * sysdeps/powerpc/fpu/fegetexcept.c (__fegetexcept): Likewise.
+
 2019-06-28  Wilco Dijkstra  <wdijkstr@arm.com>
 
        * benchtests/bench-math-inlines.c: Increase iterations.
index 7079d1a46d44c6cebc4e9d9b58218cb57a0c6c8d..56ac0f3ef3315e1c045a2da5773d410d37dbaec8 100644 (file)
 
 #if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__
 
-/* Inline definition for fegetround.  */
-# define __fegetround() \
-  (__extension__  ({ int __fegetround_result;                                \
-                    __asm__ __volatile__                                     \
-                      ("mcrfs 7,7 ; mfcr %0"                                 \
-                       : "=r"(__fegetround_result) : : "cr7");               \
-                    __fegetround_result & 3; }))
+/* Inline definitions for fegetround.  */
+# define __fegetround_ISA300()                                         \
+  (__extension__  ({                                                   \
+    union { double __d; unsigned long long __ll; } __u;                        \
+    __asm__ __volatile__ (                                             \
+      ".machine push; .machine \"power9\"; mffsl %0; .machine pop"     \
+      : "=f" (__u.__d));                                               \
+    __u.__ll & 0x0000000000000003LL;                                   \
+  }))
+
+# define __fegetround_ISA2()                                           \
+  (__extension__  ({                                                   \
+     int __fegetround_result;                                          \
+     __asm__ __volatile__ ("mcrfs 7,7 ; mfcr %0"                       \
+                          : "=r"(__fegetround_result) : : "cr7");      \
+     __fegetround_result & 3;                                          \
+  }))
+
+# ifdef _ARCH_PWR9
+#  define __fegetround() __fegetround_ISA300()
+# elif defined __BUILTIN_CPU_SUPPORTS__
+#  define __fegetround()                                               \
+  (__glibc_likely (__builtin_cpu_supports ("arch_3_00"))               \
+   ? __fegetround_ISA300()                                             \
+   : __fegetround_ISA2()                                               \
+  )
+# else
+#  define __fegetround() __fegetround_ISA2()
+# endif
+
 # define fegetround() __fegetround ()
 
 # ifndef __NO_MATH_INLINES
index 2173d77d1dd284c4a2c3a6f3ba3dcc7768c91f83..10a37f0d44b20b4a9e61b166e7d4a922f2913e89 100644 (file)
@@ -24,7 +24,7 @@ __fegetexcept (void)
 {
   fenv_union_t fe;
 
-  fe.fenv = fegetenv_register ();
+  fe.fenv = fegetenv_status ();
 
   return fenv_reg_to_exceptions (fe.l);
 }
index f43ab60f333b4b752d513587661507076c38cad3..466f5b70986d56e48ae82a817610722229fbaae8 100644 (file)
@@ -21,6 +21,6 @@
 int
 fegetmode (femode_t *modep)
 {
-  *modep = fegetenv_register ();
+  *modep = fegetenv_status ();
   return 0;
 }
index f66bf246cb89c2b53b5ceae0a8c2ca4c6d1cd54e..55b1697c03b8b336fe96d9dc3a375f5620446c00 100644 (file)
@@ -34,6 +34,27 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
    pointer.  */
 #define fegetenv_register() __builtin_mffs()
 
+/* Equivalent to fegetenv_register, but only returns bits for
+   status, exception enables, and mode.  */
+
+#define fegetenv_status_ISA300()                                       \
+  ({register double __fr;                                              \
+    __asm__ __volatile__ (                                             \
+      ".machine push; .machine \"power9\"; mffsl %0; .machine pop"     \
+      : "=f" (__fr));                                                  \
+    __fr;                                                              \
+  })
+
+#ifdef _ARCH_PWR9
+# define fegetenv_status() fegetenv_status_ISA300()
+#else
+# define fegetenv_status()                                             \
+  (__glibc_likely (__builtin_cpu_supports ("arch_3_00"))               \
+   ? fegetenv_status_ISA300()                                          \
+   : fegetenv_register()                                               \
+  )
+#endif
+
 /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
 #define fesetenv_register(env) \
        do { \
index fa04a67643599973a3e15c1aaab59993cfc678be..d0fdad7b45c6c878d5d2ee887e434a9fe762b58f 100644 (file)
@@ -65,35 +65,37 @@ extern fpu_control_t __fpu_control;
 typedef unsigned int fpu_control_t;
 
 /* Macros for accessing the hardware control word.  */
-# define __FPU_MFFS()                                          \
-  ({register double __fr;                                      \
-    __asm__ __volatile__("mffs %0" : "=f" (__fr));             \
-    __fr;                                                      \
-  })
-
 # define _FPU_GETCW(cw)                                                \
   ({union { double __d; unsigned long long __ll; } __u;                \
-    __u.__d = __FPU_MFFS();                                    \
+    __asm__ __volatile__("mffs %0" : "=f" (__u.__d));          \
     (cw) = (fpu_control_t) __u.__ll;                           \
     (fpu_control_t) __u.__ll;                                  \
   })
 
-#ifdef _ARCH_PWR9
-# define __FPU_MFFSL()                                         \
-  ({register double __fr;                                      \
-    __asm__ __volatile__("mffsl %0" : "=f" (__fr));            \
-    __fr;                                                      \
+# define _FPU_GET_RC_ISA300()                                          \
+  ({union { double __d; unsigned long long __ll; } __u;                        \
+    __asm__ __volatile__(                                              \
+      ".machine push; .machine \"power9\"; mffsl %0; .machine pop"     \
+      : "=f" (__u.__d));                                               \
+    (fpu_control_t) (__u.__ll & _FPU_MASK_RC);                         \
   })
-#else
-# define __FPU_MFFSL() __FPU_MFFS()
-#endif
-    
-# define _FPU_GET_RC()                                         \
-  ({union { double __d; unsigned long long __ll; } __u;                \
-    __u.__d = __FPU_MFFSL();                                   \
-    __u.__ll &= _FPU_MASK_RC;                                  \
-    (fpu_control_t) __u.__ll;                                  \
+
+# ifdef _ARCH_PWR9
+#  define _FPU_GET_RC() _FPU_GET_RC_ISA300()
+# elif defined __BUILTIN_CPU_SUPPORTS__
+#  define _FPU_GET_RC()                                                        \
+  ({fpu_control_t __rc;                                                        \
+    __rc = __glibc_likely (__builtin_cpu_supports ("arch_3_00"))       \
+      ? _FPU_GET_RC_ISA300 ()                                          \
+      : _FPU_GETCW (__rc) & _FPU_MASK_RC;                              \
+    __rc;                                                              \
+  })
+# else
+#  define _FPU_GET_RC()                                                \
+  ({fpu_control_t __rc = _FPU_GETCW (__rc) & _FPU_MASK_RC;     \
+    __rc;                                                      \
   })
+# endif
 
 # define _FPU_SETCW(cw)                                                \
   { union { double __d; unsigned long long __ll; } __u;                \
This page took 0.080291 seconds and 5 git commands to generate.