This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH v3] force eval for fma implementations
- From: Richard Henderson <rth at twiddle dot net>
- To: libc-alpha at sourceware dot org
- Cc: joseph at codesourcery dot com
- Date: Fri, 1 Aug 2014 07:51:55 -1000
- Subject: [PATCH v3] force eval for fma implementations
- Authentication-results: sourceware.org; auth=none
Now with corresponding fixes to ldbl-96/s_fma.c, ldbl-96/s_fmal.c
and ldbl-128/s_fmal.c, as suggested by Joseph.
Ok?
r~
* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use math_force_eval before
feclearexcept; use math_opt_barrier instead of open-coded asm; fix
typo in exact zero test.
* sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
* sysdeps/ieee754/ldbl-96/s_fma.c (__fma): Likewise.
* sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
---
sysdeps/ieee754/dbl-64/s_fma.c | 11 ++++++-----
sysdeps/ieee754/ldbl-128/s_fmal.c | 11 ++++++-----
sysdeps/ieee754/ldbl-96/s_fma.c | 11 ++++++-----
sysdeps/ieee754/ldbl-96/s_fmal.c | 11 ++++++-----
4 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 389acd4..77065aa 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -198,16 +198,17 @@ __fma (double x, double y, double z)
t1 = m1 - t1;
t2 = z - t2;
double a2 = t1 + t2;
+ /* Ensure the arithmetic is not scheduled after feclearexcept call. */
+ math_force_eval (m2);
+ math_force_eval (a2);
feclearexcept (FE_INEXACT);
- /* If the result is an exact zero, ensure it has the correct
- sign. */
+ /* If the result is an exact zero, ensure it has the correct sign. */
if (a1 == 0 && m2 == 0)
{
libc_feupdateenv (&env);
- /* Ensure that round-to-nearest value of z + m1 is not
- reused. */
- asm volatile ("" : "=m" (z) : "m" (z));
+ /* Ensure that round-to-nearest value of z + m1 is not reused. */
+ z = math_opt_barrier (z);
return z + m1;
}
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index 1d5f27b..8147094 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -203,16 +203,17 @@ __fmal (long double x, long double y, long double z)
t1 = m1 - t1;
t2 = z - t2;
long double a2 = t1 + t2;
+ /* Ensure the arithmetic is not scheduled after feclearexcept call. */
+ math_force_eval (m2);
+ math_force_eval (a2);
feclearexcept (FE_INEXACT);
- /* If the result is an exact zero, ensure it has the correct
- sign. */
+ /* If the result is an exact zero, ensure it has the correct sign. */
if (a1 == 0 && m2 == 0)
{
feupdateenv (&env);
- /* Ensure that round-to-nearest value of z + m1 is not
- reused. */
- asm volatile ("" : "=m" (z) : "m" (z));
+ /* Ensure that round-to-nearest value of z + m1 is not reused. */
+ z = math_opt_barrier (z);
return z + m1;
}
diff --git a/sysdeps/ieee754/ldbl-96/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fma.c
index 354be4e..a27102e 100644
--- a/sysdeps/ieee754/ldbl-96/s_fma.c
+++ b/sysdeps/ieee754/ldbl-96/s_fma.c
@@ -64,16 +64,17 @@ __fma (double x, double y, double z)
t1 = m1 - t1;
t2 = z - t2;
long double a2 = t1 + t2;
+ /* Ensure the arithmetic is not scheduled after feclearexcept call. */
+ math_force_eval (m2);
+ math_force_eval (a2);
feclearexcept (FE_INEXACT);
- /* If the result is an exact zero, ensure it has the correct
- sign. */
+ /* If the result is an exact zero, ensure it has the correct sign. */
if (a1 == 0 && m2 == 0)
{
feupdateenv (&env);
- /* Ensure that round-to-nearest value of z + m1 is not
- reused. */
- asm volatile ("" : "=m" (z) : "m" (z));
+ /* Ensure that round-to-nearest value of z + m1 is not reused. */
+ z = math_opt_barrier (z);
return z + m1;
}
diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
index 4983eda..0f299da 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -201,16 +201,17 @@ __fmal (long double x, long double y, long double z)
t1 = m1 - t1;
t2 = z - t2;
long double a2 = t1 + t2;
+ /* Ensure the arithmetic is not scheduled after feclearexcept call. */
+ math_force_eval (m2);
+ math_force_eval (a2);
feclearexcept (FE_INEXACT);
- /* If the result is an exact zero, ensure it has the correct
- sign. */
+ /* If the result is an exact zero, ensure it has the correct sign. */
if (a1 == 0 && m2 == 0)
{
feupdateenv (&env);
- /* Ensure that round-to-nearest value of z + m1 is not
- reused. */
- asm volatile ("" : "=m" (z) : "m" (z));
+ /* Ensure that round-to-nearest value of z + m1 is not reused. */
+ z = math_opt_barrier (z);
return z + m1;
}
--
1.9.3