This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] PowerPC - ilogb[f|l] optimization for POWER7


This patch provides optimized ilogb (60% on PPC32 and 20% PPC64),
ilogbf (60% on PPC32 and 50% on PPC64), and ilogbl (3% on PPC32
and 8% on PPC64). The optimization is done by avoiding float-point
to integer transformation and by using VSX float-point bitwise
instructions.

---

2012-05-08  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc32/power7/fpu/w_ilogb.c: New file: optimized
	ilogb for POWER7.
	* sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbf.c: New file: optimized
	ilogbf for POWER7.
	* sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbl.c: New file: optimized
	ilogbl for POWER7.
	* sysdeps/powerpc/powerpc64/power7/fpu/w_ilogb.c: New file: wrapper
	for the optimized logb for PPC64.
	* sysdeps/powerpc/powerpc64/power7/fpu/w_ilogbf.c: New file: wrapper
	for the optimized logbf for PPC64.
	* sysdeps/powerpc/powerpc64/power7/fpu/w_ilogbl.c: New file: wrapper
	for the optimized logbl for PPC64.
	* sysdeps/powerpc/powerpc32/power7/fpu/e_ilogb.c: New file: black file
	to avoid compilation of default implementation.
	* sysdeps/powerpc/powerpc32/power7/fpu/e_ilogbf.c: Likewise.
	* sysdeps/powerpc/powerpc32/power7/fpu/e_ilogbl.c: Likewise.
	* sysdeps/powerpc/powerpc64/power7/fpu/e_ilogb.c: Likewise.
	* sysdeps/powerpc/powerpc64/power7/fpu/e_ilogbf.c: Likewise.
	* sysdeps/powerpc/powerpc64/power7/fpu/e_ilogbl.c: Likewise.
	* sysdeps/powerpc/fpu/math_private.h: Add Optimized double to word
	cast for POWER7.
	* math/libm-test.inc (ilogb_test): New ilogb tests. 

diff --git a/math/libm-test.inc b/math/libm-test.inc
index 542131d..677e5e9 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -4082,6 +4082,10 @@ ilogb_test (void)
   TEST_f_i (ilogb, M_El, 1);
   TEST_f_i (ilogb, 1024, 10);
   TEST_f_i (ilogb, -2000, 10);
+  TEST_f_i (ilogb, 1.701412e+38, 127);
+#ifndef TEST_FLOAT
+  TEST_f_i (ilogb, 8.988466e+307, 1023);
+#endif
 
   /* ilogb (0.0) == FP_ILOGB0 plus invalid exception  */
   errno = 0;
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index a916be3..aa3053a 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -25,6 +25,29 @@
 #include <dl-procinfo.h>
 #include_next <math_private.h>
 
+#if defined(_ARCH_PWR7)
+
+/* Optimized double to word cast for POWER7: the 'ori 2,2,0'
+   instructions between the store double / load integer is
+   to force a new dispatch group.  */
+#undef DOUBLE_TO_WORDS
+#define DOUBLE_TO_WORDS(d, i)				\
+  do {							\
+    double d__ = d;					\
+    int32_t i__;					\
+    ieee_double_shape_type iw_u;			\
+    __asm (						\
+      "fctiwz  %1,%1\n"					\
+      "stfd    %1,%2\n"					\
+      "ori     2,2,0\n"					\
+      "lwz     %0,%3\n"					\
+      : "=r" (i__)					\
+      : "f" (d__), "m" (iw_u.value), "m" (iw_u.word));	\
+    i = i__;						\
+  } while (0)
+
+#endif /* __ARCH_PWR7  */
+
 # if __WORDSIZE == 64 || defined _ARCH_PWR4
 #  define __CPU_HAS_FSQRT 1
 # else
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogb.c b/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogb.c
new file mode 100644
index 0000000..34f1a9b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogb.c
@@ -0,0 +1 @@
+/* ilogb implementation is at w_ilogb.c  */
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogbf.c b/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogbf.c
new file mode 100644
index 0000000..1bbe157
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogbf.c
@@ -0,0 +1 @@
+/* ilogbf implementation is at w_ilogbf.c  */
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogbl.c b/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogbl.c
new file mode 100644
index 0000000..7684390
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/e_ilogbl.c
@@ -0,0 +1 @@
+/* ilogbl implementation is at w_ilogbl.c  */
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogb.c b/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogb.c
new file mode 100644
index 0000000..525f76b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogb.c
@@ -0,0 +1,83 @@
+/* ilogb(). PowerPC64/POWER7 version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+
+
+/* This implementation avoid FP to INT conversions by using VSX bitwise
+ * instructions over FP values.  */
+
+static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52 */
+static const double two10m1   = -1023.0;		/* 2**10 -1 */
+
+static const union {
+  unsigned long long mask;
+  double d;
+}
+exp_mask = { 0x7ff0000000000000ULL },
+sig_mask = { 0x000FFFFFFFFFFFFFULL };
+
+int
+__ilogb (double x)
+{
+  double dret;
+  int ret;
+
+  if (__builtin_expect (x == 0.0, 0))
+    {
+      feraiseexcept (FE_INVALID);
+      __set_errno (EDOM);
+      return FP_ILOGB0;
+    }
+  /* dret = x & 0x7ff0000000000000;  */
+  asm (
+    "xxland %x0,%x1,%x2\n"
+    "fcfid  %0,%0"
+    : "=f" (dret)
+    : "f" (x), "f" (exp_mask.d));
+  /* dret = (x >> 52) - 1023.0;  */
+  dret = (dret * two1div52) + two10m1;
+  if (__builtin_expect (dret > -two10m1, 0))
+    {
+      double sig;
+      /* sig = x & 0x000FFFFFFFFFFFFF;  */
+      asm (
+	"xxland %x0,%x1,%x2\n"
+	: "=f" (sig)
+	: "f" (x), "f" (sig_mask.d));
+      feraiseexcept (FE_INVALID);
+      __set_errno (EDOM);
+      /* NaN differs by INF by having some bits set in significand.  */
+      if (sig > 0.0)
+	return FP_ILOGBNAN;
+      return INT_MAX;
+    }
+  DOUBLE_TO_WORDS (dret, ret);
+  return ret;
+}
+
+weak_alias (__ilogb, ilogb)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__ilogb, __ilogbl)
+weak_alias (__ilogb, ilogbl)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbf.c b/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbf.c
new file mode 100644
index 0000000..a8b56e5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbf.c
@@ -0,0 +1,78 @@
+/* ilogbf(). PowerPC64/POWER7 version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+
+
+/* This implementation avoid FP to INT conversions by using VSX bitwise
+ * instructions over FP values.  */
+
+static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52  */
+static const double two10m1   = -1023.0;		/* -2**10 + 1  */
+static const double two7m1    = -127.0;			/* -2**7 + 1  */
+
+static const union {
+  unsigned long long mask;
+  double d;
+}
+exp_mask = { 0x7ff0000000000000ULL },
+sig_mask = { 0x000FFFFFFFFFFFFFULL };
+
+int
+__ilogbf (float x)
+{
+  double dret;
+  int ret;
+
+  if (__builtin_expect (x == 0.0, 0))
+    {
+      feraiseexcept (FE_INVALID);
+      __set_errno (EDOM);
+      return FP_ILOGB0;
+    }
+  /* dret = x & 0x7ff0000000000000;  */
+  asm (
+    "xxland %x0,%x1,%x2\n"
+    "fcfid  %0,%0"
+    : "=f" (dret)
+    : "f" (x), "f" (exp_mask.d));
+  /* dret = (x >> 52) - 1023.0;  */
+  dret = (dret * two1div52) + two10m1;
+  if (__builtin_expect (dret > -two7m1, 0))
+    {
+      double sig;
+      /* sig = x & 0x000FFFFFFFFFFFFF;  */
+      asm (
+	"xxland %x0,%x1,%x2\n"
+	: "=f" (sig)
+	: "f" (x), "f" (sig_mask.d));
+      feraiseexcept (FE_INVALID);
+      __set_errno (EDOM);
+      /* NaN differs by INF by having some bits set in significand.  */
+      if (sig > 0.0)
+	return FP_ILOGBNAN;
+      return INT_MAX;
+    }
+  DOUBLE_TO_WORDS (dret, ret);
+  return ret;
+}
+weak_alias (__ilogbf, ilogbf)
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbl.c b/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbl.c
new file mode 100644
index 0000000..7563020
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbl.c
@@ -0,0 +1,81 @@
+/* ilogb(). PowerPC64/POWER7 version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+#include <math_ldbl_opt.h>
+
+/* This implementation avoid FP to INT conversions by using VSX bitwise
+ * instructions over FP values.  */
+
+static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52 */
+static const double two10m1   = -1023.0;		/* 2**10 -1 */
+
+static const union {
+  unsigned long long mask;
+  double d;
+}
+exp_mask = { 0x7ff0000000000000ULL },
+sig_mask = { 0x000FFFFFFFFFFFFFULL };
+
+int
+__ilogbl (long double x)
+{
+  double xh, xl;
+  double dret;
+  int ret;
+
+  if (__builtin_expect (x == 0.0L, 0))
+    {
+      feraiseexcept (FE_INVALID);
+      __set_errno (EDOM);
+      return FP_ILOGB0;
+    }
+
+  ldbl_unpack (x, &xh, &xl);
+  /* dret = x & 0x7ff0000000000000;  */
+  asm (
+    "xxland %x0,%x1,%x2\n"
+    "fcfid  %0,%0"
+    : "=f" (dret)
+    : "f" (xh), "f" (exp_mask.d));
+  /* dret = (x >> 52) - 1023.0;  */
+  dret = (dret * two1div52) + two10m1;
+  if (__builtin_expect (dret > -two10m1, 0))
+    {
+      double sig;
+      /* sig = x & 0x000FFFFFFFFFFFFF;  */
+      asm (
+	"xxland %x0,%x1,%x2\n"
+	: "=f" (sig)
+	: "f" (x), "f" (sig_mask.d));
+      feraiseexcept (FE_INVALID);
+      __set_errno (EDOM);
+      /* NaN differs by INF by having some bits set in significand.  */
+      if (sig > 0.0)
+	return FP_ILOGBNAN;
+      return INT_MAX;
+    }
+  DOUBLE_TO_WORDS (dret, ret);
+  return ret;
+}
+
+long_double_symbol (libm, __ilogbl, ilogbl);
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogb.c b/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogb.c
new file mode 100644
index 0000000..34f1a9b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogb.c
@@ -0,0 +1 @@
+/* ilogb implementation is at w_ilogb.c  */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogbf.c b/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogbf.c
new file mode 100644
index 0000000..1bbe157
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogbf.c
@@ -0,0 +1 @@
+/* ilogbf implementation is at w_ilogbf.c  */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogbl.c b/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogbl.c
new file mode 100644
index 0000000..7684390
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/e_ilogbl.c
@@ -0,0 +1 @@
+/* ilogbl implementation is at w_ilogbl.c  */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogb.c b/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogb.c
new file mode 100644
index 0000000..48a85ca
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogb.c
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/powerpc32/power7/fpu/w_ilogb.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogbf.c b/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogbf.c
new file mode 100644
index 0000000..9220977
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogbf.c
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbf.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogbl.c b/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogbl.c
new file mode 100644
index 0000000..bd76c6e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/w_ilogbl.c
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/powerpc32/power7/fpu/w_ilogbl.c>
-- 
1.6.0.2


-- 
Adhemerval Zanella Netto
  Software Engineer
  Linux Technology Center Brazil
  Toolchain / GLIBC on Power Architecture
  azanella@linux.vnet.ibm.com / azanella@br.ibm.com
  +55 61 8642-9890


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]