This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
GNU C Library master sources branch, master, updated. glibc-2.12-188-g5e90846

From: drepper at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 14 Oct 2010 02:27:27 -0000
Subject: GNU C Library master sources branch, master, updated. glibc-2.12-188-g5e90846
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  5e908464b9603e9cd95047d2edd00de5f78fedaf (commit)
      from  f90681487d90a6eea8f1c60021bc8dd83313c59b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5e908464b9603e9cd95047d2edd00de5f78fedaf

commit 5e908464b9603e9cd95047d2edd00de5f78fedaf
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Wed Oct 13 22:27:03 2010 -0400

    Implement accurate fma.

diff --git a/ChangeLog b/ChangeLog
index e2a4a70..2fdca19 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,35 @@
+2010-10-13  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #3268]
+	* math/libm-test.inc (fma_test): Some more fmaf and fma tests.
+	* sysdeps/i386/i686/multiarch/s_fma.c: Include ldbl-96 version
+	instead of dbl-64.
+	* sysdeps/i386/fpu/bits/mathinline.h (fma, fmaf, fmal): Remove
+	inlines.
+	* sysdeps/ieee754/ldbl-96/s_fma.c: New file.
+	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Fix exponent adjustment
+	if one of x and y is very large and the other is subnormal.
+	* sysdeps/s390/fpu/s_fmaf.c: New file.
+	* sysdeps/s390/fpu/s_fma.c: New file.
+	* sysdeps/powerpc/fpu/s_fmaf.S: New file.
+	* sysdeps/powerpc/fpu/s_fma.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_fma.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_fma.S: New file.
+	* sysdeps/unix/sysv/linux/s390/fpu/s_fma.c: New file.
+
+2010-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #3268]
+	* math/libm-test.inc (fma_test): Add some more fmaf tests, add
+	fma tests.
+	* sysdeps/ieee754/dbl-64/s_fmaf.c (__fmaf): Fix Inf/Nan check.
+	* sysdeps/ieee754/dbl-64/s_fma.c: New file.
+	* sysdeps/i386/i686/multiarch/s_fma.c: Include
+	sysdeps/ieee754/dbl-64/s_fma.c instead of math/s_fma.c.
+	* sysdeps/x86_64/multiarch/s_fma.c: Likewise.
+	* sysdeps/ieee754/ldbl-opt/s_fma.c: Likewise.
+	* sysdeps/ieee754/ldbl-128/s_fma.c: New file.
+
 2010-10-12  Ulrich Drepper  <drepper@redhat.com>
 
 	[BZ #12078]
diff --git a/math/libm-test.inc b/math/libm-test.inc
index 2d48f71..34c4fa9 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -2789,9 +2789,25 @@ fma_test (void)
   TEST_fff_f (fma, minus_infty, minus_infty, minus_infty, nan_value, INVALID_EXCEPTION);
 
   TEST_fff_f (fma, 1.25L, 0.75L, 0.0625L, 1.0L);
-#ifdef TEST_FLOAT
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
   TEST_fff_f (fma, 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13);
   TEST_fff_f (fma, 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20);
+  TEST_fff_f (fma, 0x1.9abcdep+127, 0x0.9abcdep-126, -0x1.f08948p+0, 0x1.bb421p-25);
+  TEST_fff_f (fma, 0x1.9abcdep+100, 0x0.9abcdep-126, -0x1.f08948p-27, 0x1.bb421p-52);
+  TEST_fff_f (fma, 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115);
+  TEST_fff_f (fma, -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127);
+  TEST_fff_f (fma, 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+  TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13);
+  TEST_fff_f (fma, 0x1.fffp+0, 0x1.0000000000001p+0, -0x1.fffp+0, 0x1.fffp-52);
+  TEST_fff_f (fma, 0x1.0000002p+0, 0x1.ffffffcp-1, 0x1p-300, 1.0);
+  TEST_fff_f (fma, 0x1.0000002p+0, 0x1.ffffffcp-1, -0x1p-300, 0x1.fffffffffffffp-1);
+  TEST_fff_f (fma, 0x1.deadbeef2feedp+1023, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp+1, 0x1.0989687bc9da4p-53);
+  TEST_fff_f (fma, 0x1.deadbeef2feedp+900, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp-122, 0x1.0989687bc9da4p-176);
+  TEST_fff_f (fma, 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011);
+  TEST_fff_f (fma, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023);
+  TEST_fff_f (fma, 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023);
 #endif
 
   END (fma);
diff --git a/sysdeps/i386/fpu/bits/mathinline.h b/sysdeps/i386/fpu/bits/mathinline.h
index 8005997..666ec2b 100644
--- a/sysdeps/i386/fpu/bits/mathinline.h
+++ b/sysdeps/i386/fpu/bits/mathinline.h
@@ -1,6 +1,6 @@
 /* Inline math functions for i387.
-   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2003,2004,2006,2007,2009
-   Free Software Foundation, Inc.
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2003,2004,2006,2007,2009,
+   2010 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by John C. Bowman <bowman@math.ualberta.ca>, 1995.
 
@@ -657,8 +657,6 @@ __NTH (ldexpl (long double __x, int __y))
   __ldexp_code;
 }
 
-__inline_mathcodeNP3 (fma, __x, __y, __z, return (__x * __y) + __z)
-
 __inline_mathopNP (rint, "frndint")
 # endif /* __FAST_MATH__ */
 
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c
index d9291b0..2373caa 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/i386/i686/multiarch/s_fma.c
@@ -33,4 +33,4 @@ weak_alias (__fma, fma)
 # define __fma __fma_ia32
 #endif
 
-#include <math/s_fma.c>
+#include <sysdeps/ieee754/ldbl-96/s_fma.c>
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
new file mode 100644
index 0000000..ca7300c
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -0,0 +1,146 @@
+/* Compute x * y + z as ternary operation.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <float.h>
+#include <math.h>
+#include <fenv.h>
+#include <ieee754.h>
+
+/* This implementation uses rounding to odd to avoid problems with
+   double rounding.  See a paper by Boldo and Melquiond:
+   http://www.lri.fr/~melquion/doc/08-tc.pdf  */
+
+double
+__fma (double x, double y, double z)
+{
+  union ieee754_double u, v, w;
+  int adjust = 0;
+  u.d = x;
+  v.d = y;
+  w.d = z;
+  if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
+			>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
+      || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
+      || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
+      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0))
+    {
+      /* If x or y or z is Inf/NaN or if fma will certainly overflow,
+	 compute as x * y + z.  */
+      if (u.ieee.exponent == 0x7ff
+	  || v.ieee.exponent == 0x7ff
+	  || w.ieee.exponent == 0x7ff
+	  || u.ieee.exponent + v.ieee.exponent
+	     > 0x7ff + IEEE754_DOUBLE_BIAS)
+	return x * y + z;
+      if (u.ieee.exponent + v.ieee.exponent
+	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
+	{
+	  /* Compute 1p-53 times smaller result and multiply
+	     at the end.  */
+	  if (u.ieee.exponent > v.ieee.exponent)
+	    u.ieee.exponent -= DBL_MANT_DIG;
+	  else
+	    v.ieee.exponent -= DBL_MANT_DIG;
+	  /* If x + y exponent is very large and z exponent is very small,
+	     it doesn't matter if we don't adjust it.  */
+	  if (w.ieee.exponent > DBL_MANT_DIG)
+	    w.ieee.exponent -= DBL_MANT_DIG;
+	  adjust = 1;
+	}
+      else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
+	{
+	  /* Similarly.
+	     If z exponent is very large and x and y exponents are
+	     very small, it doesn't matter if we don't adjust it.  */
+	  if (u.ieee.exponent > v.ieee.exponent)
+	    {
+	      if (u.ieee.exponent > DBL_MANT_DIG)
+		u.ieee.exponent -= DBL_MANT_DIG;
+	    }
+	  else if (v.ieee.exponent > DBL_MANT_DIG)
+	    v.ieee.exponent -= DBL_MANT_DIG;
+	  w.ieee.exponent -= DBL_MANT_DIG;
+	  adjust = 1;
+	}
+      else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
+	{
+	  u.ieee.exponent -= DBL_MANT_DIG;
+	  if (v.ieee.exponent)
+	    v.ieee.exponent += DBL_MANT_DIG;
+	  else
+	    v.d *= 0x1p53;
+	}
+      else
+	{
+	  v.ieee.exponent -= DBL_MANT_DIG;
+	  if (u.ieee.exponent)
+	    u.ieee.exponent += DBL_MANT_DIG;
+	  else
+	    u.d *= 0x1p53;
+	}
+      x = u.d;
+      y = v.d;
+      z = w.d;
+    }
+  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
+#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
+  double x1 = x * C;
+  double y1 = y * C;
+  double m1 = x * y;
+  x1 = (x - x1) + x1;
+  y1 = (y - y1) + y1;
+  double x2 = x - x1;
+  double y2 = y - y1;
+  double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
+
+  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
+  double a1 = z + m1;
+  double t1 = a1 - z;
+  double t2 = a1 - t1;
+  t1 = m1 - t1;
+  t2 = z - t2;
+  double a2 = t1 + t2;
+
+  fenv_t env;
+  feholdexcept (&env);
+  fesetround (FE_TOWARDZERO);
+  /* Perform m2 + a2 addition with round to odd.  */
+  u.d = a2 + m2;
+  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+    u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+  feupdateenv (&env);
+
+  /* Add that to a1.  */
+  a1 = a1 + u.d;
+
+  /* And adjust exponent if needed.  */
+  if (__builtin_expect (adjust, 0))
+    a1 *= 0x1p53;
+
+  return a1;
+}
+#ifndef __fma
+weak_alias (__fma, fma)
+#endif
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__fma, __fmal)
+weak_alias (__fmal, fmal)
+#endif
diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/dbl-64/s_fmaf.c
index 9721876..cd16cd1 100644
--- a/sysdeps/ieee754/dbl-64/s_fmaf.c
+++ b/sysdeps/ieee754/dbl-64/s_fmaf.c
@@ -39,7 +39,7 @@ __fmaf (float x, float y, float z)
   fesetround (FE_TOWARDZERO);
   /* Perform addition with round to odd.  */
   u.d = temp + (double) z;
-  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0xff)
+  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
     u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
   feupdateenv (&env);
   /* And finally truncation with round to nearest.  */
diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/ldbl-128/s_fma.c
similarity index 73%
copy from sysdeps/ieee754/dbl-64/s_fmaf.c
copy to sysdeps/ieee754/ldbl-128/s_fma.c
index 9721876..78055e5 100644
--- a/sysdeps/ieee754/dbl-64/s_fmaf.c
+++ b/sysdeps/ieee754/ldbl-128/s_fma.c
@@ -22,29 +22,29 @@
 #include <fenv.h>
 #include <ieee754.h>
 
-/* This implementation relies on double being more than twice as
-   precise as float and uses rounding to odd in order to avoid problems
+/* This implementation relies on long double being more than twice as
+   precise as double and uses rounding to odd in order to avoid problems
    with double rounding.
    See a paper by Boldo and Melquiond:
    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
 
-float
-__fmaf (float x, float y, float z)
+double
+__fma (double x, double y, double z)
 {
   fenv_t env;
   /* Multiplication is always exact.  */
-  double temp = (double) x * (double) y;
-  union ieee754_double u;
+  long double temp = (long double) x * (long double) y;
+  union ieee854_long_double u;
   feholdexcept (&env);
   fesetround (FE_TOWARDZERO);
   /* Perform addition with round to odd.  */
-  u.d = temp + (double) z;
-  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0xff)
-    u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+  u.d = temp + (long double) z;
+  if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
+    u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
   feupdateenv (&env);
   /* And finally truncation with round to nearest.  */
-  return (float) u.d;
+  return (double) u.d;
 }
-#ifndef __fmaf
-weak_alias (__fmaf, fmaf)
+#ifndef __fma
+weak_alias (__fma, fma)
 #endif
diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/ldbl-96/s_fma.c
similarity index 52%
copy from sysdeps/ieee754/dbl-64/s_fmaf.c
copy to sysdeps/ieee754/ldbl-96/s_fma.c
index 9721876..90c6d1f 100644
--- a/sysdeps/ieee754/dbl-64/s_fmaf.c
+++ b/sysdeps/ieee754/ldbl-96/s_fma.c
@@ -18,33 +18,53 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <float.h>
 #include <math.h>
 #include <fenv.h>
 #include <ieee754.h>
 
-/* This implementation relies on double being more than twice as
-   precise as float and uses rounding to odd in order to avoid problems
-   with double rounding.
-   See a paper by Boldo and Melquiond:
+/* This implementation uses rounding to odd to avoid problems with
+   double rounding.  See a paper by Boldo and Melquiond:
    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
 
-float
-__fmaf (float x, float y, float z)
+double
+__fma (double x, double y, double z)
 {
+  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
+#define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)
+  long double x1 = x * C;
+  long double y1 = y * C;
+  long double m1 = x * y;
+  x1 = (x - x1) + x1;
+  y1 = (y - y1) + y1;
+  long double x2 = x - x1;
+  long double y2 = y - y1;
+  long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
+
+  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
+  long double a1 = z + m1;
+  long double t1 = a1 - z;
+  long double t2 = a1 - t1;
+  t1 = m1 - t1;
+  t2 = z - t2;
+  long double a2 = t1 + t2;
+
   fenv_t env;
-  /* Multiplication is always exact.  */
-  double temp = (double) x * (double) y;
-  union ieee754_double u;
   feholdexcept (&env);
   fesetround (FE_TOWARDZERO);
-  /* Perform addition with round to odd.  */
-  u.d = temp + (double) z;
-  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0xff)
+  /* Perform m2 + a2 addition with round to odd.  */
+  a2 = a2 + m2;
+
+  /* Add that to a1 again using rounding to odd.  */
+  union ieee854_long_double u;
+  u.d = a1 + a2;
+  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
     u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
   feupdateenv (&env);
-  /* And finally truncation with round to nearest.  */
-  return (float) u.d;
+
+  /* Add finally round to double precision.  */
+  return u.d;
 }
-#ifndef __fmaf
-weak_alias (__fmaf, fmaf)
+#ifndef __fma
+weak_alias (__fma, fma)
 #endif
diff --git a/sysdeps/ieee754/ldbl-opt/s_fma.c b/sysdeps/ieee754/ldbl-opt/s_fma.c
index 2ee7214..1723c5c 100644
--- a/sysdeps/ieee754/ldbl-opt/s_fma.c
+++ b/sysdeps/ieee754/ldbl-opt/s_fma.c
@@ -1,5 +1,5 @@
 #include <math_ldbl_opt.h>
-#include <math/s_fma.c>
+#include <sysdeps/ieee754/dbl-64/s_fma.c>
 #if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
 compat_symbol (libm, __fma, fmal, GLIBC_2_1);
 #endif
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/powerpc/fpu/s_fma.S
similarity index 59%
copy from sysdeps/i386/i686/multiarch/s_fma.c
copy to sysdeps/powerpc/fpu/s_fma.S
index d9291b0..9c650ea 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/powerpc/fpu/s_fma.S
@@ -1,6 +1,5 @@
-/* Multiple versions of fma.
+/* Compute x * y + z as ternary operation.  PowerPC version.
    Copyright (C) 2010 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -10,27 +9,25 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, write to the Free
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA. */
+   02111-1307 USA.  */
 
-#include <config.h>
+#include <sysdep.h>
 
-#ifdef HAVE_AVX_SUPPORT
-#include <math.h>
-#include <init-arch.h>
+ENTRY(__fma)
+/* double [f1] fma (double [f1] x, double [f2] y, double [f3] z); */
+	fmadd	fp1,fp1,fp2,fp3
+	blr
+END(__fma)
 
-extern double __fma_ia32 (double x, double y, double z) attribute_hidden;
-extern double __fma_fma (double x, double y, double z) attribute_hidden;
+weak_alias (__fma,fma)
 
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_ia32);
-weak_alias (__fma, fma)
-
-# define __fma __fma_ia32
+#ifdef NO_LONG_DOUBLE
+weak_alias (__fma,__fmal)
+weak_alias (__fma,fmal)
 #endif
-
-#include <math/s_fma.c>
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/powerpc/fpu/s_fmaf.S
similarity index 59%
copy from sysdeps/i386/i686/multiarch/s_fma.c
copy to sysdeps/powerpc/fpu/s_fmaf.S
index d9291b0..1a755fe 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/powerpc/fpu/s_fmaf.S
@@ -1,6 +1,5 @@
-/* Multiple versions of fma.
+/* Compute x * y + z as ternary operation.  PowerPC version.
    Copyright (C) 2010 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -10,27 +9,20 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, write to the Free
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA. */
+   02111-1307 USA.  */
 
-#include <config.h>
+#include <sysdep.h>
 
-#ifdef HAVE_AVX_SUPPORT
-#include <math.h>
-#include <init-arch.h>
+ENTRY(__fmaf)
+/* float [f1] fmaf (float [f1] x, float [f2] y, float [f3] z); */
+	fmadds	fp1,fp1,fp2,fp3
+	blr
+END(__fmaf)
 
-extern double __fma_ia32 (double x, double y, double z) attribute_hidden;
-extern double __fma_fma (double x, double y, double z) attribute_hidden;
-
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_ia32);
-weak_alias (__fma, fma)
-
-# define __fma __fma_ia32
-#endif
-
-#include <math/s_fma.c>
+weak_alias (__fmaf,fmaf)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_fma.S b/sysdeps/powerpc/powerpc32/fpu/s_fma.S
new file mode 100644
index 0000000..d40695c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/s_fma.S
@@ -0,0 +1,5 @@
+#include <math_ldbl_opt.h>
+#include <sysdeps/powerpc/fpu/s_fma.S>
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __fma, fmal, GLIBC_2_1)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_fma.S b/sysdeps/powerpc/powerpc64/fpu/s_fma.S
new file mode 100644
index 0000000..d40695c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/s_fma.S
@@ -0,0 +1,5 @@
+#include <math_ldbl_opt.h>
+#include <sysdeps/powerpc/fpu/s_fma.S>
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
+compat_symbol (libm, __fma, fmal, GLIBC_2_1)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/s390/fpu/s_fma.c
similarity index 62%
copy from sysdeps/i386/i686/multiarch/s_fma.c
copy to sysdeps/s390/fpu/s_fma.c
index d9291b0..f682362 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/s390/fpu/s_fma.c
@@ -1,7 +1,7 @@
-/* Multiple versions of fma.
+/* Compute x * y + z as ternary operation.  S/390 version.
    Copyright (C) 2010 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
    This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -10,27 +10,28 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, write to the Free
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA. */
+   02111-1307 USA.  */
 
-#include <config.h>
-
-#ifdef HAVE_AVX_SUPPORT
 #include <math.h>
-#include <init-arch.h>
-
-extern double __fma_ia32 (double x, double y, double z) attribute_hidden;
-extern double __fma_fma (double x, double y, double z) attribute_hidden;
 
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_ia32);
+double
+__fma (double x, double y, double z)
+{
+  double r;
+  asm ("madbr %0,%1,%2" : "=f" (r) : "%f" (x), "fR" (y), "0" (z));
+  return r;
+}
+#ifndef __fma
 weak_alias (__fma, fma)
-
-# define __fma __fma_ia32
 #endif
 
-#include <math/s_fma.c>
+#ifdef NO_LONG_DOUBLE
+strong_alias (__fma, __fmal)
+weak_alias (__fmal, fmal)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/s390/fpu/s_fmaf.c
similarity index 60%
copy from sysdeps/i386/i686/multiarch/s_fma.c
copy to sysdeps/s390/fpu/s_fmaf.c
index d9291b0..030d7b2 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/s390/fpu/s_fmaf.c
@@ -1,7 +1,7 @@
-/* Multiple versions of fma.
+/* Compute x * y + z as ternary operation.  S/390 version.
    Copyright (C) 2010 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
    This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -10,27 +10,23 @@
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, write to the Free
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA. */
+   02111-1307 USA.  */
 
-#include <config.h>
-
-#ifdef HAVE_AVX_SUPPORT
 #include <math.h>
-#include <init-arch.h>
-
-extern double __fma_ia32 (double x, double y, double z) attribute_hidden;
-extern double __fma_fma (double x, double y, double z) attribute_hidden;
 
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_ia32);
-weak_alias (__fma, fma)
-
-# define __fma __fma_ia32
+float
+__fmaf (float x, float y, float z)
+{
+  float r;
+  asm ("maebr %0,%1,%2" : "=f" (r) : "%f" (x), "fR" (y), "0" (z));
+  return r;
+}
+#ifndef __fmaf
+weak_alias (__fmaf, fmaf)
 #endif
-
-#include <math/s_fma.c>
diff --git a/sysdeps/ieee754/ldbl-opt/s_fma.c b/sysdeps/unix/sysv/linux/s390/fpu/s_fma.c
similarity index 76%
copy from sysdeps/ieee754/ldbl-opt/s_fma.c
copy to sysdeps/unix/sysv/linux/s390/fpu/s_fma.c
index 2ee7214..2b2d505 100644
--- a/sysdeps/ieee754/ldbl-opt/s_fma.c
+++ b/sysdeps/unix/sysv/linux/s390/fpu/s_fma.c
@@ -1,5 +1,5 @@
 #include <math_ldbl_opt.h>
-#include <math/s_fma.c>
+#include <sysdeps/s390/fpu/s_fma.c>
 #if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
 compat_symbol (libm, __fma, fmal, GLIBC_2_1);
 #endif
diff --git a/sysdeps/x86_64/multiarch/s_fma.c b/sysdeps/x86_64/multiarch/s_fma.c
index cfecf9b..9a680c6 100644
--- a/sysdeps/x86_64/multiarch/s_fma.c
+++ b/sysdeps/x86_64/multiarch/s_fma.c
@@ -1,5 +1,5 @@
 /* FMA version of fma.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -40,4 +40,4 @@ weak_alias (__fma, fma)
 # define __fma __fma_sse2
 #endif
 
-#include <math/s_fma.c>
+#include <sysdeps/ieee754/dbl-64/s_fma.c>

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                |   32 +++++++
 math/libm-test.inc                       |   18 ++++-
 sysdeps/i386/fpu/bits/mathinline.h       |    6 +-
 sysdeps/i386/i686/multiarch/s_fma.c      |    2 +-
 sysdeps/ieee754/dbl-64/s_fma.c           |  146 ++++++++++++++++++++++++++++++
 sysdeps/ieee754/dbl-64/s_fmaf.c          |    2 +-
 sysdeps/ieee754/ldbl-128/s_fma.c         |   50 ++++++++++
 sysdeps/ieee754/ldbl-96/s_fma.c          |   70 ++++++++++++++
 sysdeps/ieee754/ldbl-opt/s_fma.c         |    2 +-
 sysdeps/powerpc/fpu/s_fma.S              |   33 +++++++
 sysdeps/powerpc/fpu/s_fmaf.S             |   28 ++++++
 sysdeps/powerpc/powerpc32/fpu/s_fma.S    |    5 +
 sysdeps/powerpc/powerpc64/fpu/s_fma.S    |    5 +
 sysdeps/s390/fpu/s_fma.c                 |   37 ++++++++
 sysdeps/s390/fpu/s_fmaf.c                |   32 +++++++
 sysdeps/unix/sysv/linux/s390/fpu/s_fma.c |    5 +
 sysdeps/x86_64/multiarch/s_fma.c         |    4 +-
 17 files changed, 467 insertions(+), 10 deletions(-)
 create mode 100644 sysdeps/ieee754/dbl-64/s_fma.c
 create mode 100644 sysdeps/ieee754/ldbl-128/s_fma.c
 create mode 100644 sysdeps/ieee754/ldbl-96/s_fma.c
 create mode 100644 sysdeps/powerpc/fpu/s_fma.S
 create mode 100644 sysdeps/powerpc/fpu/s_fmaf.S
 create mode 100644 sysdeps/powerpc/powerpc32/fpu/s_fma.S
 create mode 100644 sysdeps/powerpc/powerpc64/fpu/s_fma.S
 create mode 100644 sysdeps/s390/fpu/s_fma.c
 create mode 100644 sysdeps/s390/fpu/s_fmaf.c
 create mode 100644 sysdeps/unix/sysv/linux/s390/fpu/s_fma.c


hooks/post-receive
-- 
GNU C Library master sources
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]