This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] PowerPC - ilogb[f|l] optimization for POWER7


On 05/10/2012 04:46 PM, Adhemerval Zanella wrote:
> On 05/10/2012 12:55 PM, Richard Henderson wrote:
>> Is this really so much better than what the compiler can generate?
>> If the dispatch group split is that critical, you ought to fix it
>> in the scheduler -- it's not like these are the only two double->int
>> conversions in the entire library.
>>
>> The documentation for this macro is sorely lacking too.  At first I
>> thought you meant EXTRACT_WORDS64 but hadn't known what the macro
>> was supposed to be called.
> Currently, yes it is. For POWER7, on FP<->INT transformations using unions or casts
> the compiler will not reschedule neither, in case the reschedule is not possible, 
> it will generate dispatch groups split by inserting an 'ori 2,0,0' between the
> load and/or store instructions. And libm uses it extensively.
>
> I already point out some improvements I got by reimplementing somes FP<->INT macros
> in "sysdeps/generic/math_private.h" by manually inserting the dispatch group split,
> but I'm not sure why compiler guys didn't take a look at it yet.
>
Your questions made me reevaluate my previous patch and I could create a faster
and simple version. I reimplemented ilogb for wordsize-64 and added new macros
to transform double and float to int64_t and int32_t. The macros are pretty
much what the compiler would do, but with the optimization to explicit add an
'ori 2,0,0' between the store and load.

This patch provides optimized ilogb (75% on PPC32 and PPC64), and ilogbf 
(60% on PPC32 and 50% on PPC64). I also got a small speedup (about 5%)
on x86_64.

This patch has another advantage of not just optimize ilogb for POWER7, but various
libm functions that rely on the macro.

Tested on ppc32, ppc64 and x86_64.

---

2012-05-11 Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

	* sysdeps/ieee754/dbl-64/wordsize-64/e_ilogb.c: New file.
	* sysdeps/powerpc/fpu/math_private.h: Added floating-point to integer
	macros for POWER7 with explicit dispatch group split instruction.
	* sysdeps/powerpc/powerpc64/Implies: Add ieee754/dbl-64/wordsize-64.
	* math/libm-test.inc: Add more ilogb tests.


diff --git a/math/libm-test.inc b/math/libm-test.inc
index 5a38dbf..170d3fd 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -4100,6 +4100,22 @@ ilogb_test (void)
   TEST_f_i (ilogb, minus_infty, INT_MAX, INVALID_EXCEPTION);
   check_int ("errno for ilogb(-Inf) unchanged", errno, EDOM, 0, 0, 0);
 
+  TEST_f_f (ilogb, 0x0.1p-127, -131);
+  TEST_f_f (ilogb, 0x0.01p-127, -135);
+  TEST_f_f (ilogb, 0x0.011p-127, -135);
+#ifndef TEST_FLOAT
+  TEST_f_f (ilogb, 0x0.8p-1022, -1023);
+  TEST_f_f (ilogb, 0x0.1p-1022, -1026);
+  TEST_f_f (ilogb, 0x0.00111p-1022, -1034);
+  TEST_f_f (ilogb, 0x0.00001p-1022, -1042);
+  TEST_f_f (ilogb, 0x0.000011p-1022, -1042);
+  TEST_f_f (ilogb, 0x0.0000000000001p-1022, -1074);
+#endif
+#if defined TEST_LDOUBLE && LDBL_MIN_EXP - LDBL_MANT_DIG <= -16400
+  TEST_f_f (ilogb, 0x1p-16400L, -16400);
+  TEST_f_f (ilogb, 0x.00000000001p-16382L, -16426);
+#endif
+
   END (ilogb);
 }
 
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_ilogb.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_ilogb.c
new file mode 100644
index 0000000..428c676
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_ilogb.c
@@ -0,0 +1,49 @@
+/* @(#)s_ilogb.c 5.1 93/09/24 */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+
+/* ilogb(double x)
+ * return the binary exponent of non-zero x
+ * ilogb(0) = FP_ILOGB0
+ * ilogb(NaN) = FP_ILOGBNAN (no signal is raised)
+ * ilogb(+-Inf) = INT_MAX (no signal is raised)
+ */
+
+#include <limits.h>
+#include <math.h>
+#include <math_private.h>
+
+int
+__ieee754_ilogb (double x)
+{
+  int64_t lx;
+  EXTRACT_WORDS64(lx, x);
+
+  lx &= INT64_C (0x7fffffffffffffff);
+  if (lx < INT64_C (0x0010000000000000))
+    {
+      if (lx == 0)
+	return FP_ILOGB0;	/* ilogb(0) = FP_ILOGB0 */
+      /* subnormal x */
+      int m = __builtin_clzll (lx);
+      return -1022 + (11 - m);
+    }
+  else if (lx < INT64_C (0x7ff0000000000000))
+    return (lx >> 52) - 1023;
+  else if (FP_ILOGBNAN != INT_MAX)
+    {
+      /* ISO C99 requires ilogb(+-Inf) == INT_MAX.  */
+      if ((lx ^ INT64_C (0x7ff0000000000000)) == 0)
+	return INT_MAX;
+    }
+  return FP_ILOGBNAN;
+}
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index a916be3..921994f 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -25,6 +25,73 @@
 #include <dl-procinfo.h>
 #include_next <math_private.h>
 
+#if defined(_ARCH_PWR7)
+
+# if __WORDSIZE == 64
+
+/* Direct movement of floats into integers register.  */
+#undef EXTRACT_WORDS64
+#define EXTRACT_WORDS64(i, d)                          \
+  do {                                                 \
+    int64_t i_;                                        \
+    ieee_double_shape_type iw_u;                       \
+    __asm (                                            \
+      "stfd   %1,%2\n"                                 \
+      "ori    2,2,0\n"                                 \
+      "ld     %0,%3"                                   \
+      : "=r" (i_)                                      \
+      : "f" (d), "m" (iw_u.value), "m" (iw_u.word));   \
+    (i) = i_;                                          \
+  } while (0)
+# endif /* __WORDSIZE == 64  */
+
+#undef GET_HIGH_WORD
+#define GET_HIGH_WORD(i, d)                            \
+  do {                                                 \
+    int32_t i_;                                        \
+    ieee_double_shape_type iw_u;                       \
+    __asm (                                            \
+      "stfd   %1,%2\n"                                 \
+      "ori    2,2,0\n"                                 \
+      "lwz    %0,%3"                                   \
+      : "=r" (i_)                                      \
+      : "f" (d), "m" (iw_u.value),                     \
+        "m" (iw_u.parts.msw));                         \
+    (i) = i_;                                          \
+  } while (0)
+
+#undef GET_LOW_WORD
+#define GET_LOW_WORD(i, d)                             \
+  do {                                                 \
+    int32_t i_;                                        \
+    ieee_double_shape_type iw_u;                       \
+    __asm (                                            \
+      "stfd   %1,%2\n"                                 \
+      "ori    2,2,0\n"                                 \
+      "lwz    %0,%3"                                   \
+      : "=r" (i_)                                      \
+      : "f" (d), "m" (iw_u.value),                     \
+        "m" (iw_u.parts.lsw));                         \
+    (i) = i_;                                          \
+  } while (0)
+
+/* Direct movement of float into integer register.  */
+#undef GET_FLOAT_WORD
+#define GET_FLOAT_WORD(i, f)                           \
+  do {                                                 \
+    int32_t i_;                                        \
+    ieee_float_shape_type gf_u;                        \
+    __asm (                                            \
+      "stfs   %1,%2\n"                                 \
+      "ori    2,2,0\n"                                 \
+      "lwz    %0,%3\n"                                 \
+      : "=r" (i_)                                      \
+      : "f" (f), "m" (gf_u.value), "m" (gf_u.word));   \
+    (i) = i_;                                          \
+  } while (0)
+#endif
+
+
 # if __WORDSIZE == 64 || defined _ARCH_PWR4
 #  define __CPU_HAS_FSQRT 1
 # else
diff --git a/sysdeps/powerpc/powerpc64/Implies b/sysdeps/powerpc/powerpc64/Implies
index a8cae95..7603c98 100644
--- a/sysdeps/powerpc/powerpc64/Implies
+++ b/sysdeps/powerpc/powerpc64/Implies
@@ -1 +1,2 @@
 wordsize-64
+ieee754/dbl-64/wordsize-64
-- 
1.7.9.5



-- 
Adhemerval Zanella Netto
  Software Engineer
  Linux Technology Center Brazil
  Toolchain / GLIBC on Power Architecture
  azanella@linux.vnet.ibm.com / azanella@br.ibm.com
  +55 61 8642-9890


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]