This is the mail archive of the
libc-ports@sources.redhat.com
mailing list for the libc-ports project.
[PATCH 2/5] alpha: fix corner cases in ceil, floor, rint.
- From: Matt Turner <mattst88 at gmail dot com>
- To: Richard Henderson <rth at twiddle dot net>, "Joseph S. Myers" <joseph at codesourcery dot com>
- Cc: libc-ports at sourceware dot org, toolchain at gentoo dot org, Aurelien Jarno <aurelien at aurel32 dot net>, Matt Turner <mattst88 at gmail dot com>
- Date: Mon, 3 May 2010 23:25:05 -0400
- Subject: [PATCH 2/5] alpha: fix corner cases in ceil, floor, rint.
- References: <1272943508-11016-1-git-send-email-mattst88@gmail.com> <1272943508-11016-2-git-send-email-mattst88@gmail.com>
From: Aurelien Jarno <aurelien@aurel32.net>
Partial revert of bebc49030c15. Even with the revert, ceil and floor are
still faster than libcpml's equivalent.
Fixes bug 5350.
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
ChangeLog.alpha | 9 +++++++++
sysdeps/alpha/fpu/s_ceil.c | 27 ++++++++++++++++-----------
sysdeps/alpha/fpu/s_ceilf.c | 32 +++++++++++++++++++++-----------
sysdeps/alpha/fpu/s_floor.c | 32 +++++++++++++++++++-------------
sysdeps/alpha/fpu/s_floorf.c | 37 ++++++++++++++++++++++++-------------
sysdeps/alpha/fpu/s_rint.c | 27 ++++++++++++++++++---------
sysdeps/alpha/fpu/s_rintf.c | 33 ++++++++++++++++++++++++---------
7 files changed, 131 insertions(+), 66 deletions(-)
diff --git a/ChangeLog.alpha b/ChangeLog.alpha
index 80236fe..8b0a824 100644
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@@ -1,3 +1,12 @@
+2010-05-03 Aurelien Jarno <aurelien@aurel32.net>
+
+ * sysdeps/alpha/fpu/s_ceil.c: Fix corner cases.
+ * sysdeps/alpha/fpu/s_ceilf.c: Likewise.
+ * sysdeps/alpha/fpu/s_floor.c: Likewise.
+ * sysdeps/alpha/fpu/s_floorf.c: Likewise.
+ * sysdeps/alpha/fpu/s_rint.c: Likewise.
+ * sysdeps/alpha/fpu/s_rintf.c: Likewise.
+
2010-05-03 GOTO Masanori <gotom@debian.or.jp>
* sysdeps/unix/sysv/linux/alpha/kernel-features.h: Define
diff --git a/sysdeps/alpha/fpu/s_ceil.c b/sysdeps/alpha/fpu/s_ceil.c
index 40c2379..fe20902 100644
--- a/sysdeps/alpha/fpu/s_ceil.c
+++ b/sysdeps/alpha/fpu/s_ceil.c
@@ -27,20 +27,25 @@
double
__ceil (double x)
{
- double two52 = copysign (0x1.0p52, x);
- double r, tmp;
-
- __asm (
+ if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */
+ {
+ double tmp1, new_x;
+
+ new_x = -x;
+ __asm (
#ifdef _IEEE_FP_INEXACT
- "addt/suim %2, %3, %1\n\tsubt/suim %1, %3, %0"
+ "cvttq/svim %2,%1\n\t"
#else
- "addt/sum %2, %3, %1\n\tsubt/sum %1, %3, %0"
+ "cvttq/svm %2,%1\n\t"
#endif
- : "=&f"(r), "=&f"(tmp)
- : "f"(-x), "f"(-two52));
-
- /* Fix up the negation we did above, as well as handling -0 properly. */
- return copysign (r, x);
+ "cvtqt/m %1,%0\n\t"
+ : "=f"(new_x), "=&f"(tmp1)
+ : "f"(new_x));
+
+ /* Fix up the negation we did above, as well as handling -0 properly. */
+ x = copysign(new_x, x);
+ }
+ return x;
}
weak_alias (__ceil, ceil)
diff --git a/sysdeps/alpha/fpu/s_ceilf.c b/sysdeps/alpha/fpu/s_ceilf.c
index 0df651f..c722e72 100644
--- a/sysdeps/alpha/fpu/s_ceilf.c
+++ b/sysdeps/alpha/fpu/s_ceilf.c
@@ -26,20 +26,30 @@
float
__ceilf (float x)
{
- float two23 = copysignf (0x1.0p23, x);
- float r, tmp;
-
- __asm (
+ if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */
+ {
+ /* Note that Alpha S_Floating is stored in registers in a
+ restricted T_Floating format, so we don't even need to
+ convert back to S_Floating in the end. The initial
+ conversion to T_Floating is needed to handle denormals. */
+
+ float tmp1, tmp2, new_x;
+
+ new_x = -x;
+ __asm ("cvtst/s %3,%2\n\t"
#ifdef _IEEE_FP_INEXACT
- "adds/suim %2, %3, %1\n\tsubs/suim %1, %3, %0"
+ "cvttq/svim %2,%1\n\t"
#else
- "adds/sum %2, %3, %1\n\tsubs/sum %1, %3, %0"
+ "cvttq/svm %2,%1\n\t"
#endif
- : "=&f"(r), "=&f"(tmp)
- : "f"(-x), "f"(-two23));
-
- /* Fix up the negation we did above, as well as handling -0 properly. */
- return copysignf (r, x);
+ "cvtqt/m %1,%0\n\t"
+ : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
+ : "f"(new_x));
+
+ /* Fix up the negation we did above, as well as handling -0 properly. */
+ x = copysignf(new_x, x);
+ }
+ return x;
}
weak_alias (__ceilf, ceilf)
diff --git a/sysdeps/alpha/fpu/s_floor.c b/sysdeps/alpha/fpu/s_floor.c
index 5af6386..6b16401 100644
--- a/sysdeps/alpha/fpu/s_floor.c
+++ b/sysdeps/alpha/fpu/s_floor.c
@@ -21,26 +21,32 @@
#include <math_ldbl_opt.h>
-/* Use the -inf rounding mode conversion instructions to implement floor. */
+/* Use the -inf rounding mode conversion instructions to implement
+ floor. We note when the exponent is large enough that the value
+ must be integral, as this avoids unpleasant integer overflows. */
double
__floor (double x)
{
- double two52 = copysign (0x1.0p52, x);
- double r, tmp;
-
- __asm (
+ if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */
+ {
+ double tmp1, new_x;
+
+ __asm (
#ifdef _IEEE_FP_INEXACT
- "addt/suim %2, %3, %1\n\tsubt/suim %1, %3, %0"
+ "cvttq/svim %2,%1\n\t"
#else
- "addt/sum %2, %3, %1\n\tsubt/sum %1, %3, %0"
+ "cvttq/svm %2,%1\n\t"
#endif
- : "=&f"(r), "=&f"(tmp)
- : "f"(x), "f"(two52));
-
- /* floor(-0) == -0, and in general we'll always have the same
- sign as our input. */
- return copysign (r, x);
+ "cvtqt/m %1,%0\n\t"
+ : "=f"(new_x), "=&f"(tmp1)
+ : "f"(x));
+
+ /* floor(-0) == -0, and in general we'll always have the same
+ sign as our input. */
+ x = copysign(new_x, x);
+ }
+ return x;
}
weak_alias (__floor, floor)
diff --git a/sysdeps/alpha/fpu/s_floorf.c b/sysdeps/alpha/fpu/s_floorf.c
index 8b42170..5da08ae 100644
--- a/sysdeps/alpha/fpu/s_floorf.c
+++ b/sysdeps/alpha/fpu/s_floorf.c
@@ -20,26 +20,37 @@
#include <math.h>
-/* Use the -inf rounding mode conversion instructions to implement floor. */
+/* Use the -inf rounding mode conversion instructions to implement
+ floor. We note when the exponent is large enough that the value
+ must be integral, as this avoids unpleasant integer overflows. */
float
__floorf (float x)
{
- float two23 = copysignf (0x1.0p23, x);
- float r, tmp;
-
- __asm (
+ if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */
+ {
+ /* Note that Alpha S_Floating is stored in registers in a
+ restricted T_Floating format, so we don't even need to
+ convert back to S_Floating in the end. The initial
+ conversion to T_Floating is needed to handle denormals. */
+
+ float tmp1, tmp2, new_x;
+
+ __asm ("cvtst/s %3,%2\n\t"
#ifdef _IEEE_FP_INEXACT
- "adds/suim %2, %3, %1\n\tsubs/suim %1, %3, %0"
+ "cvttq/svim %2,%1\n\t"
#else
- "adds/sum %2, %3, %1\n\tsubs/sum %1, %3, %0"
+ "cvttq/svm %2,%1\n\t"
#endif
- : "=&f"(r), "=&f"(tmp)
- : "f"(x), "f"(two23));
-
- /* floor(-0) == -0, and in general we'll always have the same
- sign as our input. */
- return copysignf (r, x);
+ "cvtqt/m %1,%0\n\t"
+ : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
+ : "f"(x));
+
+ /* floor(-0) == -0, and in general we'll always have the same
+ sign as our input. */
+ x = copysignf(new_x, x);
+ }
+ return x;
}
weak_alias (__floorf, floorf)
diff --git a/sysdeps/alpha/fpu/s_rint.c b/sysdeps/alpha/fpu/s_rint.c
index e9aa028..9624631 100644
--- a/sysdeps/alpha/fpu/s_rint.c
+++ b/sysdeps/alpha/fpu/s_rint.c
@@ -24,15 +24,24 @@
double
__rint (double x)
{
- double two52 = copysign (0x1.0p52, x);
- double r;
-
- r = x + two52;
- r = r - two52;
-
- /* rint(-0.1) == -0, and in general we'll always have the same sign
- as our input. */
- return copysign (r, x);
+ if (isless (fabs (x), 9007199254740992.0)) /* 1 << DBL_MANT_DIG */
+ {
+ double tmp1, new_x;
+ __asm (
+#ifdef _IEEE_FP_INEXACT
+ "cvttq/svid %2,%1\n\t"
+#else
+ "cvttq/svd %2,%1\n\t"
+#endif
+ "cvtqt/d %1,%0\n\t"
+ : "=f"(new_x), "=&f"(tmp1)
+ : "f"(x));
+
+ /* rint(-0.1) == -0, and in general we'll always have the same
+ sign as our input. */
+ x = copysign(new_x, x);
+ }
+ return x;
}
weak_alias (__rint, rint)
diff --git a/sysdeps/alpha/fpu/s_rintf.c b/sysdeps/alpha/fpu/s_rintf.c
index 9e4cbd1..39fb72f 100644
--- a/sysdeps/alpha/fpu/s_rintf.c
+++ b/sysdeps/alpha/fpu/s_rintf.c
@@ -23,15 +23,30 @@
float
__rintf (float x)
{
- float two23 = copysignf (0x1.0p23, x);
- float r;
-
- r = x + two23;
- r = r - two23;
-
- /* rint(-0.1) == -0, and in general we'll always have the same sign
- as our input. */
- return copysign (r, x);
+ if (isless (fabsf (x), 16777216.0f)) /* 1 << FLT_MANT_DIG */
+ {
+ /* Note that Alpha S_Floating is stored in registers in a
+ restricted T_Floating format, so we don't even need to
+ convert back to S_Floating in the end. The initial
+ conversion to T_Floating is needed to handle denormals. */
+
+ float tmp1, tmp2, new_x;
+
+ __asm ("cvtst/s %3,%2\n\t"
+#ifdef _IEEE_FP_INEXACT
+ "cvttq/svid %2,%1\n\t"
+#else
+ "cvttq/svd %2,%1\n\t"
+#endif
+ "cvtqt/d %1,%0\n\t"
+ : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
+ : "f"(x));
+
+ /* rint(-0.1) == -0, and in general we'll always have the same
+ sign as our input. */
+ x = copysignf(new_x, x);
+ }
+ return x;
}
weak_alias (__rintf, rintf)
--
1.6.4.4