This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch siddhesh/libm-mpa updated. glibc-2.17-174-g21b0d2d


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, siddhesh/libm-mpa has been updated
       via  21b0d2d025b3949198a246ac183bfcc1b9fcb5bb (commit)
       via  3bb4b7f80010b0d8237c887e681c1e7a3e6ebe35 (commit)
       via  f32ec4787e50436065f150b7aca181e576a2dcc5 (commit)
       via  ffde6a00cac4320960dd81afd5970403b890ab88 (commit)
       via  fffdf8b075f7e62f8d2dc999e9a290963838803f (commit)
       via  26126c66e09412bef9b3782a0aab137905bd52dd (commit)
      from  7974e4817608adf4e1e3e9132309a90335928557 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=21b0d2d025b3949198a246ac183bfcc1b9fcb5bb

commit 21b0d2d025b3949198a246ac183bfcc1b9fcb5bb
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Tue Feb 12 19:37:41 2013 +0530

    Clean up add_magnitudes and sub_magnitudes

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index d170248..f9e2488 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -335,7 +335,7 @@ static void
 SECTION
 add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p2)
 {
-  long i, j, k, p = p2;
+  long i, j, k, p = p2, zk;
 
   EZ = EX;
 
@@ -343,35 +343,38 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p2)
   j = p + EY - EX;
   k = p + 1;
 
-  if (j < 1)
+  if (__glibc_unlikely (j < 1))
     {
       __cpy (x, z, p);
       return;
     }
-  else
-    Z[k] = 0;
+
+  zk = 0;
 
   for (; j > 0; i--, j--)
     {
-      long tmp = Z[k] + X[i] + Y[j];
-      Z[k] = tmp % I_RADIX;
-      Z[--k] = tmp / I_RADIX;
+      zk += X[i] + Y[j];
+      Z[k--] = zk % I_RADIX;
+      zk /= I_RADIX;
     }
 
   for (; i > 0; i--)
     {
-      long tmp = Z[k] + X[i];
-      Z[k] = tmp % I_RADIX;
-      Z[--k] = tmp / I_RADIX;
+      zk += X[i];
+      Z[k--] = zk % I_RADIX;
+      zk /= I_RADIX;
     }
 
-  if (Z[1] == 0)
+  if (zk == 0)
     {
       for (i = 1; i <= p; i++)
 	Z[i] = Z[i + 1];
     }
   else
-    EZ++;
+    {
+      Z[1] = zk;
+      EZ++;
+    }
 }
 
 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
@@ -382,52 +385,46 @@ static void
 SECTION
 sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p2)
 {
-  long i, j, k, p = p2;
+  long i, j, k, p = p2, zk;
 
   EZ = EX;
 
-  if (EX == EY)
+  i = p;
+  j = p + EY - EX;
+  k = p;
+
+  /* Y is too small compared to X, copy X over to the result.  */
+  if (__glibc_unlikely (j < 1))
     {
-      i = j = k = p;
-      Z[k] = Z[k + 1] = 0;
+      __cpy (x, z, p);
+      return;
     }
-  else
-    {
-      j = EX - EY;
-      if (j > p)
-	{
-	  __cpy (x, z, p);
-	  return;
-	}
-      else
-	{
-	  long tmp;
 
-	  i = p;
-	  j = p + 1 - j;
-	  k = p;
-
-	  tmp = I_RADIX - Y[j];
-	  Z[k + 1] = tmp % I_RADIX;
-	  Z[k] = tmp / I_RADIX - 1;
-	  j--;
-	}
+  /* The relevant least significant digit in Y is non-zero, so we factor it in
+     to enhance accuracy.  */
+  if (j < p && Y[j + 1] > 0)
+    {
+      Z[k + 1] = RADIX - Y[j + 1];
+      zk = -1;
     }
+  else
+    zk = Z[k + 1] = ZERO;
 
+  /* Subtract and borrow.  */
   for (; j > 0; i--, j--)
     {
-      long tmp = I_RADIX + Z[k] + X[i] - Y[j];
+      zk += I_RADIX + X[i] - Y[j];
 
-      Z[k] = tmp % I_RADIX;
-      Z[--k] = tmp / I_RADIX - 1;
+      Z[k--] = zk % I_RADIX;
+      zk = zk / I_RADIX - 1;
     }
 
   for (; i > 0; i--)
     {
-      long tmp = I_RADIX + Z[k] + X[i];
+      zk += I_RADIX + X[i];
 
-      Z[k] = tmp % I_RADIX;
-      Z[--k] = tmp / I_RADIX - 1;
+      Z[k--] = zk % I_RADIX;
+      zk = zk / I_RADIX - 1;
     }
 
   for (i = 1; Z[i] == 0; i++);

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3bb4b7f80010b0d8237c887e681c1e7a3e6ebe35

commit 3bb4b7f80010b0d8237c887e681c1e7a3e6ebe35
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Tue Feb 12 19:21:53 2013 +0530

    Sync up latest patches
    
    Port __sqr to this branch and also fix up the broken parts of __mul.

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 967921a..d170248 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -552,6 +552,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p2)
   long i, j, k, ip, ip2, p = p2;
   int64_t zk;
   int64_t *diag;
+  const mp_no *a;
 
   /* Is z=0?  */
   if (__glibc_unlikely (X[0] * Y[0] == 0))
@@ -566,9 +567,14 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p2)
     if (X[ip2] | Y[ip2])
       break;
 
+  if (X[ip2])
+    a = y;
+  else
+    a = x;
+
   /* ... and here, at least one of them is still zero.  */
   for (ip = ip2; ip > 0; ip--)
-    if (X[ip] * Y[ip])
+    if (a->d[ip])
       break;
 
   /* Multiply, add and carry.  */
@@ -581,19 +587,19 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p2)
   while (k > ip + ip2 + 1)
     Z[k--] = 0;
 
-  zk = Z[k] = 0;
+  zk = 0;
 
   /* Precompute sums of diagonal elements so that we can directly use them
      later.  See the next comment to know we why need them.  */
   diag = alloca (k * sizeof (int64_t));
-  diag[0] = 0;
+  int64_t d = ZERO;
   for (i = 1; i <= ip; i++)
     {
-      diag[i] = X[i] * Y[i];
-      diag[i] += diag[i - 1];
+      d += X[i] * Y[i];
+      diag[i] = d;
     }
   while (i < k)
-    diag[i++] = diag[ip];
+    diag[i++] = d;
 
   /* This gives us additional precision if required.  This is only executed
      when P < IP1 + IP2 + 1, i.e. at least one of the numbers have precision
@@ -677,31 +683,31 @@ SECTION
 __sqr (const mp_no *x, mp_no *y, int p)
 {
   long i, j, k, ip;
-  double u, yk;
+  int64_t yk;
 
   /* Is z=0?  */
-  if (__glibc_unlikely (X[0] == ZERO))
+  if (__glibc_unlikely (X[0] == 0))
     {
-      Y[0] = ZERO;
+      Y[0] = 0;
       return;
     }
 
   /* We need not iterate through all X's since it's pointless to
      multiply zeroes.  */
   for (ip = p; ip > 0; ip--)
-    if (X[ip] != ZERO)
+    if (X[ip])
       break;
 
   k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
 
   while (k > 2 * ip + 1)
-    Y[k--] = ZERO;
+    Y[k--] = 0;
 
-  yk = ZERO;
+  yk = 0;
 
   while (k > p)
     {
-      double yk2 = 0.0;
+      int64_t yk2 = 0;
       long lim = k / 2;
 
       if (k % 2 == 0)
@@ -713,21 +719,15 @@ __sqr (const mp_no *x, mp_no *y, int p)
       for (i = k - p, j = p; i <= lim; i++, j--)
 	yk2 += X[i] * X[j];
 
-      yk += 2.0 * yk2;
-
-      if (i == j)
-          yk +=  X[i] * X[i];
+      yk += yk2 * 2;
 
-      u = (yk + CUTTER) - CUTTER;
-      if (u > yk)
-	u -= RADIX;
-      Y[k--] = yk - u;
-      yk = u * RADIXI;
+      Y[k--] = yk % I_RADIX;
+      yk /= I_RADIX;
     }
 
   while (k > 1)
     {
-      double yk2 = 0.0;
+      int64_t yk2 = 0;
       long lim = k / 2;
 
       if (k % 2 == 0)
@@ -739,22 +739,19 @@ __sqr (const mp_no *x, mp_no *y, int p)
       for (i = 1, j = k - 1; i <= lim; i++, j--)
 	yk2 += X[i] * X[j];
 
-      yk += 2.0 * yk2;
+      yk += yk2 * 2;
 
-      u = (yk + CUTTER) - CUTTER;
-      if (u > yk)
-	u -= RADIX;
-      Y[k--] = yk - u;
-      yk = u * RADIXI;
+      Y[k--] = yk % I_RADIX;
+      yk /= I_RADIX;
     }
   Y[k] = yk;
 
   /* Squares are always positive.  */
-  Y[0] = 1.0;
+  Y[0] = 1;
 
   EY = 2 * EX;
   /* Is there a carry beyond the most significant digit?  */
-  if (__glibc_unlikely (Y[1] == ZERO))
+  if (__glibc_unlikely (Y[1] == 0))
     {
       for (i = 1; i <= p; i++)
 	Y[i] = Y[i + 1];

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=f32ec4787e50436065f150b7aca181e576a2dcc5

commit f32ec4787e50436065f150b7aca181e576a2dcc5
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Tue Feb 12 18:54:23 2013 +0530

    Use reduced mantissa calculation for the truncated mantissa as well

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 064953b..967921a 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -583,20 +583,6 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p2)
 
   zk = Z[k] = 0;
 
-  /* This gives us additional precision if required.  This is only executed
-     when P < IP1 + IP2 + 1, i.e. at least one of the numbers have precision
-     of greater than or equal to half of what's required (P).  Anything less
-     and we're just wasting our time since we'll be spinning around
-     multiplying zeroes.  */
-  while (k > p)
-    {
-      for (i = k - p, j = p; i < p + 1; i++, j--)
-	zk += (int64_t) X[i] * Y[j];
-
-      Z[k--] = zk % I_RADIX;
-      zk /= I_RADIX;
-    }
-
   /* Precompute sums of diagonal elements so that we can directly use them
      later.  See the next comment to know we why need them.  */
   diag = alloca (k * sizeof (int64_t));
@@ -609,6 +595,32 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p2)
   while (i < k)
     diag[i++] = diag[ip];
 
+  /* This gives us additional precision if required.  This is only executed
+     when P < IP1 + IP2 + 1, i.e. at least one of the numbers have precision
+     of greater than or equal to half of what's required (P).  Anything less
+     and we're just wasting our time since we'll be spinning around
+     multiplying zeroes.  */
+  while (k > p)
+    {
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  /* We want to add this only once, but since we subtract it in the sum
+	     of products above, we add twice.  */
+          zk += 2 * X[lim] * Y[lim];
+	  lim--;
+	}
+
+      for (i = k - p, j = p; i <= lim; i++, j--)
+	zk += (int64_t) (X[i] + X[j]) * (Y[i] + Y[j]);
+
+      zk -= diag[k - 1];
+
+      Z[k--] = zk % I_RADIX;
+      zk /= I_RADIX;
+    }
+
   /* The real deal.  Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
      goes from 1 -> k - 1 and j goes the same range in reverse.  To reduce the
      number of multiplications, we halve the range and if k is an even number,
@@ -624,7 +636,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p2)
      used in the loop below.  */
   while (k > 1)
     {
-      int lim = k / 2;
+      long lim = k / 2;
 
       if (k % 2 == 0)
         {

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ffde6a00cac4320960dd81afd5970403b890ab88

commit ffde6a00cac4320960dd81afd5970403b890ab88
Author: Andreas Schwab <schwab@linux-m68k.org>
Date:   Sun Jan 20 02:03:42 2013 +0100

    Remove use of mpa2.h

diff --git a/ChangeLog b/ChangeLog
index 647ce1d..5e2ace9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2013-01-20  Andreas Schwab  <schwab@linux-m68k.org>
+
+	* sysdeps/powerpc/powerpc32/power4/fpu/mpa.c: Don't include
+	"mpa2.h".
+	* sysdeps/powerpc/powerpc64/power4/fpu/mpa.c: Likewise.
+
 2013-01-18  Siddhesh Poyarekar  <siddhesh@redhat.com>
 
 	[BZ #14496]
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
index e83943c..ef7a57f 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
@@ -43,7 +43,6 @@
 
 #include "endian.h"
 #include "mpa.h"
-#include "mpa2.h"
 #include <sys/param.h>
 
 const mp_no mpone = {1, {1.0, 1.0}};
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
index e83943c..ef7a57f 100644
--- a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
@@ -43,7 +43,6 @@
 
 #include "endian.h"
 #include "mpa.h"
-#include "mpa2.h"
 #include <sys/param.h>
 
 const mp_no mpone = {1, {1.0, 1.0}};

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=fffdf8b075f7e62f8d2dc999e9a290963838803f

commit fffdf8b075f7e62f8d2dc999e9a290963838803f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Tue Feb 12 12:49:39 2013 +0530

    New __sqr function as a faster special case of __mul

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 830e157..064953b 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -656,6 +656,100 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p2)
   Z[0] = X[0] * Y[0];
 }
 
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
+   error is bounded by 1.001 ULP.  This is a faster special case of
+   multiplication.  */
+void
+SECTION
+__sqr (const mp_no *x, mp_no *y, int p)
+{
+  long i, j, k, ip;
+  double u, yk;
+
+  /* Is z=0?  */
+  if (__glibc_unlikely (X[0] == ZERO))
+    {
+      Y[0] = ZERO;
+      return;
+    }
+
+  /* We need not iterate through all X's since it's pointless to
+     multiply zeroes.  */
+  for (ip = p; ip > 0; ip--)
+    if (X[ip] != ZERO)
+      break;
+
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+  while (k > 2 * ip + 1)
+    Y[k--] = ZERO;
+
+  yk = ZERO;
+
+  while (k > p)
+    {
+      double yk2 = 0.0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  yk += X[lim] * X[lim];
+	  lim--;
+	}
+
+      for (i = k - p, j = p; i <= lim; i++, j--)
+	yk2 += X[i] * X[j];
+
+      yk += 2.0 * yk2;
+
+      if (i == j)
+          yk +=  X[i] * X[i];
+
+      u = (yk + CUTTER) - CUTTER;
+      if (u > yk)
+	u -= RADIX;
+      Y[k--] = yk - u;
+      yk = u * RADIXI;
+    }
+
+  while (k > 1)
+    {
+      double yk2 = 0.0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  yk += X[lim] * X[lim];
+	  lim--;
+	}
+
+      for (i = 1, j = k - 1; i <= lim; i++, j--)
+	yk2 += X[i] * X[j];
+
+      yk += 2.0 * yk2;
+
+      u = (yk + CUTTER) - CUTTER;
+      if (u > yk)
+	u -= RADIX;
+      Y[k--] = yk - u;
+      yk = u * RADIXI;
+    }
+  Y[k] = yk;
+
+  /* Squares are always positive.  */
+  Y[0] = 1.0;
+
+  EY = 2 * EX;
+  /* Is there a carry beyond the most significant digit?  */
+  if (__glibc_unlikely (Y[1] == ZERO))
+    {
+      for (i = 1; i <= p; i++)
+	Y[i] = Y[i + 1];
+      EY--;
+    }
+}
+
 /* Invert *X and store in *Y.  Relative error bound:
    - For P = 2: 1.001 * R ^ (1 - P)
    - For P = 3: 1.063 * R ^ (1 - P)
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 3185299..581c401 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -118,6 +118,7 @@ void __dbl_mp (double, mp_no *, int);
 void __add (const mp_no *, const mp_no *, mp_no *, int);
 void __sub (const mp_no *, const mp_no *, mp_no *, int);
 void __mul (const mp_no *, const mp_no *, mp_no *, int);
+void __sqr (const mp_no *, mp_no *, int);
 void __dvd (const mp_no *, const mp_no *, mp_no *, int);
 
 extern void __mpatan (mp_no *, mp_no *, int);
diff --git a/sysdeps/ieee754/dbl-64/mpexp.c b/sysdeps/ieee754/dbl-64/mpexp.c
index 35c4e80..2db4536 100644
--- a/sysdeps/ieee754/dbl-64/mpexp.c
+++ b/sysdeps/ieee754/dbl-64/mpexp.c
@@ -152,14 +152,14 @@ __mpexp (mp_no *x, mp_no *y, int p)
   /* Raise polynomial value to the power of 2**m. Put result in y.  */
   for (k = 0, j = 0; k < m;)
     {
-      __mul (&mpt2, &mpt2, &mpt1, p);
+      __sqr (&mpt2, &mpt1, p);
       k++;
       if (k == m)
 	{
 	  j = 1;
 	  break;
 	}
-      __mul (&mpt1, &mpt1, &mpt2, p);
+      __sqr (&mpt1, &mpt2, p);
       k++;
     }
   if (j)
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
index 58c0c54..e83943c 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
@@ -688,6 +688,99 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
   return;
 }
 
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
+   error is bounded by 1.001 ULP.  This is a faster special case of
+   multiplication.  */
+void
+__sqr (const mp_no *x, mp_no *y, int p)
+{
+  long i, j, k, ip;
+  double u, yk;
+
+  /* Is z=0?  */
+  if (__glibc_unlikely (X[0] == ZERO))
+    {
+      Y[0] = ZERO;
+      return;
+    }
+
+  /* We need not iterate through all X's since it's pointless to
+     multiply zeroes.  */
+  for (ip = p; ip > 0; ip--)
+    if (X[ip] != ZERO)
+      break;
+
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+  while (k > 2 * ip + 1)
+    Y[k--] = ZERO;
+
+  yk = ZERO;
+
+  while (k > p)
+    {
+      double yk2 = 0.0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  yk += X[lim] * X[lim];
+	  lim--;
+	}
+
+      for (i = k - p, j = p; i <= lim; i++, j--)
+	yk2 += X[i] * X[j];
+
+      yk += 2.0 * yk2;
+
+      if (i == j)
+          yk +=  X[i] * X[i];
+
+      u = (yk + CUTTER) - CUTTER;
+      if (u > yk)
+	u -= RADIX;
+      Y[k--] = yk - u;
+      yk = u * RADIXI;
+    }
+
+  while (k > 1)
+    {
+      double yk2 = 0.0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  yk += X[lim] * X[lim];
+	  lim--;
+	}
+
+      for (i = 1, j = k - 1; i <= lim; i++, j--)
+	yk2 += X[i] * X[j];
+
+      yk += 2.0 * yk2;
+
+      u = (yk + CUTTER) - CUTTER;
+      if (u > yk)
+	u -= RADIX;
+      Y[k--] = yk - u;
+      yk = u * RADIXI;
+    }
+  Y[k] = yk;
+
+  /* Squares are always positive.  */
+  Y[0] = 1.0;
+
+  EY = 2 * EX;
+  /* Is there a carry beyond the most significant digit?  */
+  if (__glibc_unlikely (Y[1] == ZERO))
+    {
+      for (i = 1; i <= p; i++)
+	Y[i] = Y[i + 1];
+      EY--;
+    }
+}
+
 /* Invert *X and store in *Y.  Relative error bound:
    - For P = 2: 1.001 * R ^ (1 - P)
    - For P = 3: 1.063 * R ^ (1 - P)
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
index 58c0c54..e83943c 100644
--- a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
@@ -688,6 +688,99 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
   return;
 }
 
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
+   error is bounded by 1.001 ULP.  This is a faster special case of
+   multiplication.  */
+void
+__sqr (const mp_no *x, mp_no *y, int p)
+{
+  long i, j, k, ip;
+  double u, yk;
+
+  /* Is z=0?  */
+  if (__glibc_unlikely (X[0] == ZERO))
+    {
+      Y[0] = ZERO;
+      return;
+    }
+
+  /* We need not iterate through all X's since it's pointless to
+     multiply zeroes.  */
+  for (ip = p; ip > 0; ip--)
+    if (X[ip] != ZERO)
+      break;
+
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
+
+  while (k > 2 * ip + 1)
+    Y[k--] = ZERO;
+
+  yk = ZERO;
+
+  while (k > p)
+    {
+      double yk2 = 0.0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  yk += X[lim] * X[lim];
+	  lim--;
+	}
+
+      for (i = k - p, j = p; i <= lim; i++, j--)
+	yk2 += X[i] * X[j];
+
+      yk += 2.0 * yk2;
+
+      if (i == j)
+          yk +=  X[i] * X[i];
+
+      u = (yk + CUTTER) - CUTTER;
+      if (u > yk)
+	u -= RADIX;
+      Y[k--] = yk - u;
+      yk = u * RADIXI;
+    }
+
+  while (k > 1)
+    {
+      double yk2 = 0.0;
+      long lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  yk += X[lim] * X[lim];
+	  lim--;
+	}
+
+      for (i = 1, j = k - 1; i <= lim; i++, j--)
+	yk2 += X[i] * X[j];
+
+      yk += 2.0 * yk2;
+
+      u = (yk + CUTTER) - CUTTER;
+      if (u > yk)
+	u -= RADIX;
+      Y[k--] = yk - u;
+      yk = u * RADIXI;
+    }
+  Y[k] = yk;
+
+  /* Squares are always positive.  */
+  Y[0] = 1.0;
+
+  EY = 2 * EX;
+  /* Is there a carry beyond the most significant digit?  */
+  if (__glibc_unlikely (Y[1] == ZERO))
+    {
+      for (i = 1; i <= p; i++)
+	Y[i] = Y[i + 1];
+      EY--;
+    }
+}
+
 /* Invert *X and store in *Y.  Relative error bound:
    - For P = 2: 1.001 * R ^ (1 - P)
    - For P = 3: 1.063 * R ^ (1 - P)
diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-avx.c b/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
index d3f4d7a..366b0b7 100644
--- a/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
@@ -1,5 +1,6 @@
 #define __add __add_avx
 #define __mul __mul_avx
+#define __sqr __sqr_avx
 #define __sub __sub_avx
 #define __dbl_mp __dbl_mp_avx
 #define __dvd __dvd_avx
diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
index 6abb671..a4a7594 100644
--- a/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
@@ -1,5 +1,6 @@
 #define __add __add_fma4
 #define __mul __mul_fma4
+#define __sqr __sqr_fma4
 #define __sub __sub_fma4
 #define __dbl_mp __dbl_mp_fma4
 #define __dvd __dvd_fma4

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=26126c66e09412bef9b3782a0aab137905bd52dd

commit 26126c66e09412bef9b3782a0aab137905bd52dd
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Mon Feb 11 19:45:03 2013 +0530

    Better exp polynomial

diff --git a/sysdeps/ieee754/dbl-64/mpexp.c b/sysdeps/ieee754/dbl-64/mpexp.c
index 8d288ff..35c4e80 100644
--- a/sysdeps/ieee754/dbl-64/mpexp.c
+++ b/sysdeps/ieee754/dbl-64/mpexp.c
@@ -49,6 +49,15 @@ __mpexp (mp_no *x, mp_no *y, int p)
       0, 0, 0, 0, 3, 3, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8
     };
+
+  /* Factorials for the values of np above.  */
+  static const double nfa[33] =
+    {
+      1.0, 1.0, 1.0, 1.0, 6.0, 6.0, 24.0, 24.0, 120.0, 24.0, 24.0,
+      120.0, 120.0, 120.0, 720.0, 720.0, 720.0, 720.0, 720.0, 720.0,
+      720.0, 720.0, 720.0, 720.0, 5040.0, 5040.0, 5040.0, 5040.0,
+      40320.0, 40320.0, 40320.0, 40320.0, 40320.0
+    };
   static const int m1p[33] =
     {
       0, 0, 0, 0,
@@ -71,16 +80,7 @@ __mpexp (mp_no *x, mp_no *y, int p)
       {0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 39, 43, 47, 51, 55, 59, 63},
       {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 47, 50, 54}
     };
-  mp_no mpk =
-    {
-      0,
-      {
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-	0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-      }
-    };
-  mp_no mps, mpak, mpt1, mpt2;
+  mp_no mps, mpk, mpt1, mpt2;
 
   /* Choose m,n and compute a=2**(-m).  */
   n = np[p];
@@ -115,24 +115,38 @@ __mpexp (mp_no *x, mp_no *y, int p)
 	  break;
     }
 
-  /* Compute s=x*2**(-m). Put result in mps.  */
+  /* Compute s=x*2**(-m). Put result in mps.  This is the range-reduced input
+     that we will use to compute e^s.  For the final result, simply raise it
+     to 2^m.  */
   __pow_mp (-m, &mpt1, p);
   __mul (x, &mpt1, &mps, p);
 
-  /* Evaluate the polynomial. Put result in mpt2.  */
-  mpk.e = 1;
-  mpk.d[0] = ONE;
-  mpk.d[1] = n;
-  __dvd (&mps, &mpk, &mpt1, p);
-  __add (&mpone, &mpt1, &mpak, p);
-  for (k = n - 1; k > 1; k--)
+  /* Compute the Taylor series for e^s:
+
+         1 + x/1! + x^2/2! + x^3/3! ...
+
+     for N iterations.  We compute this as:
+
+         e^x = 1 + (x * n!/1! + x^2 * n!/2! + x^3 * n!/3!) / n!
+             = 1 + (x * (n!/1! + x * (n!/2! + x * (n!/3! + x ...)))) / n!
+
+     n! is pre-computed and saved while k! is computed on the fly.  */
+  __cpy (&mps, &mpt2, p);
+
+  double kf = 1.0;
+
+  /* Evaluate the rest.  The result will be in mpt2.  */
+  for (k = n - 1; k > 0; k--)
     {
-      __mul (&mps, &mpak, &mpt1, p);
-      mpk.d[1] = k;
-      __dvd (&mpt1, &mpk, &mpt2, p);
-      __add (&mpone, &mpt2, &mpak, p);
+      /* n! / k! = n * (n - 1) ... * (n - k + 1) */
+      kf *= k + 1;
+
+      __dbl_mp (kf, &mpk, p);
+      __add (&mpt2, &mpk, &mpt1, p);
+      __mul (&mps, &mpt1, &mpt2, p);
     }
-  __mul (&mps, &mpak, &mpt1, p);
+  __dbl_mp (nfa[p], &mpk, p);
+  __dvd (&mpt2, &mpk, &mpt1, p);
   __add (&mpone, &mpt1, &mpt2, p);
 
   /* Raise polynomial value to the power of 2**m. Put result in y.  */

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                  |    6 +
 sysdeps/ieee754/dbl-64/mpa.c               |  218 ++++++++++++++++++++--------
 sysdeps/ieee754/dbl-64/mpa.h               |    1 +
 sysdeps/ieee754/dbl-64/mpexp.c             |   64 +++++---
 sysdeps/powerpc/powerpc32/power4/fpu/mpa.c |   94 ++++++++++++-
 sysdeps/powerpc/powerpc64/power4/fpu/mpa.c |   94 ++++++++++++-
 sysdeps/x86_64/fpu/multiarch/mpa-avx.c     |    1 +
 sysdeps/x86_64/fpu/multiarch/mpa-fma4.c    |    1 +
 8 files changed, 393 insertions(+), 86 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]