This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] sync up add_magnitudes and sub_magnitudes in ppc mpa.c
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: libc-alpha at sourceware dot org
- Cc: rsa at us dot ibm dot com
- Date: Thu, 21 Feb 2013 12:26:10 +0530
- Subject: Re: [PATCH] sync up add_magnitudes and sub_magnitudes in ppc mpa.c
- References: <20130214092726.GA11884@spoyarek.pnq.redhat.com>
Ping!
On Thu, Feb 14, 2013 at 02:57:26PM +0530, Siddhesh Poyarekar wrote:
> Hi,
>
> The patch below syncs up the logic in add_magnitudes and
> sub_magnitudes in powerpc code, which makes the code run a *tiny* bit
> faster. Built and tested on ppc64 and ppc on Power7. OK to commit?
>
> Siddhesh
>
> * sysdeps/powerpc/powerpc32/power4/fpu/mpa.c (add_magnitudes):
> Use ZK to minimize writes to Z.
> (sub_magnitudes): Simplify code a bit.
> * sysdeps/powerpc/powerpc64/power4/fpu/mpa.c (add_magnitudes):
> Use ZK to minimize writes to Z.
> (sub_magnitudes): Simplify code a bit.
>
> diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
> index 7ebf50b..f948293 100644
> --- a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
> +++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
> @@ -371,6 +371,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
> {
> long i, j, k;
> long p2 = p;
> + double zk;
>
> EZ = EX;
>
> @@ -378,45 +379,54 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
> j = p2 + EY - EX;
> k = p2 + 1;
>
> - if (j < 1)
> + if (__glibc_unlikely (j < 1))
> {
> __cpy (x, z, p);
> return;
> }
> - else
> - Z[k] = ZERO;
> +
> + zk = ZERO;
>
> for (; j > 0; i--, j--)
> {
> - Z[k] += X[i] + Y[j];
> - if (Z[k] >= RADIX)
> + zk += X[i] + Y[j];
> + if (zk >= RADIX)
> {
> - Z[k] -= RADIX;
> - Z[--k] = ONE;
> + Z[k--] = zk - RADIX;
> + zk = ONE;
> }
> else
> - Z[--k] = ZERO;
> + {
> + Z[k--] = zk;
> + zk = ZERO;
> + }
> }
>
> for (; i > 0; i--)
> {
> - Z[k] += X[i];
> - if (Z[k] >= RADIX)
> + zk += X[i];
> + if (zk >= RADIX)
> {
> - Z[k] -= RADIX;
> - Z[--k] = ONE;
> + Z[k--] = zk - RADIX;
> + zk = ONE;
> }
> else
> - Z[--k] = ZERO;
> + {
> + Z[k--] = zk;
> + zk = ZERO;
> + }
> }
>
> - if (Z[1] == ZERO)
> + if (zk == ZERO)
> {
> for (i = 1; i <= p2; i++)
> Z[i] = Z[i + 1];
> }
> else
> - EZ += ONE;
> + {
> + Z[1] = zk;
> + EZ += ONE;
> + }
> }
>
> /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
> @@ -428,65 +438,63 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
> {
> long i, j, k;
> long p2 = p;
> + double zk;
>
> EZ = EX;
> + i = p2;
> + j = p2 + EY - EX;
> + k = p2;
>
> - if (EX == EY)
> + /* Y is too small compared to X, copy X over to the result. */
> + if (__glibc_unlikely (j < 1))
> {
> - i = j = k = p2;
> - Z[k] = Z[k + 1] = ZERO;
> + __cpy (x, z, p);
> + return;
> }
> - else
> +
> + /* The relevant least significant digit in Y is non-zero, so we factor it in
> + to enhance accuracy. */
> + if (j < p2 && Y[j + 1] > ZERO)
> {
> - j = EX - EY;
> - if (j > p2)
> - {
> - __cpy (x, z, p);
> - return;
> - }
> - else
> - {
> - i = p2;
> - j = p2 + 1 - j;
> - k = p2;
> - if (Y[j] > ZERO)
> - {
> - Z[k + 1] = RADIX - Y[j--];
> - Z[k] = MONE;
> - }
> - else
> - {
> - Z[k + 1] = ZERO;
> - Z[k] = ZERO;
> - j--;
> - }
> - }
> + Z[k + 1] = RADIX - Y[j + 1];
> + zk = MONE;
> }
> + else
> + zk = Z[k + 1] = ZERO;
>
> + /* Subtract and borrow. */
> for (; j > 0; i--, j--)
> {
> - Z[k] += (X[i] - Y[j]);
> - if (Z[k] < ZERO)
> + zk += (X[i] - Y[j]);
> + if (zk < ZERO)
> {
> - Z[k] += RADIX;
> - Z[--k] = MONE;
> + Z[k--] = zk + RADIX;
> + zk = MONE;
> }
> else
> - Z[--k] = ZERO;
> + {
> + Z[k--] = zk;
> + zk = ZERO;
> + }
> }
>
> + /* We're done with digits from Y, so it's just digits in X. */
> for (; i > 0; i--)
> {
> - Z[k] += X[i];
> - if (Z[k] < ZERO)
> + zk += X[i];
> + if (zk < ZERO)
> {
> - Z[k] += RADIX;
> - Z[--k] = MONE;
> + Z[k--] = zk + RADIX;
> + zk = MONE;
> }
> else
> - Z[--k] = ZERO;
> + {
> + Z[k--] = zk;
> + zk = ZERO;
> + }
> }
>
> + /* Normalize. */
> for (i = 1; Z[i] == ZERO; i++);
> EZ = EZ - i + 1;
> for (k = 1; i <= p2 + 1;)
> diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
> index 7ebf50b..f948293 100644
> --- a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
> +++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
> @@ -371,6 +371,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
> {
> long i, j, k;
> long p2 = p;
> + double zk;
>
> EZ = EX;
>
> @@ -378,45 +379,54 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
> j = p2 + EY - EX;
> k = p2 + 1;
>
> - if (j < 1)
> + if (__glibc_unlikely (j < 1))
> {
> __cpy (x, z, p);
> return;
> }
> - else
> - Z[k] = ZERO;
> +
> + zk = ZERO;
>
> for (; j > 0; i--, j--)
> {
> - Z[k] += X[i] + Y[j];
> - if (Z[k] >= RADIX)
> + zk += X[i] + Y[j];
> + if (zk >= RADIX)
> {
> - Z[k] -= RADIX;
> - Z[--k] = ONE;
> + Z[k--] = zk - RADIX;
> + zk = ONE;
> }
> else
> - Z[--k] = ZERO;
> + {
> + Z[k--] = zk;
> + zk = ZERO;
> + }
> }
>
> for (; i > 0; i--)
> {
> - Z[k] += X[i];
> - if (Z[k] >= RADIX)
> + zk += X[i];
> + if (zk >= RADIX)
> {
> - Z[k] -= RADIX;
> - Z[--k] = ONE;
> + Z[k--] = zk - RADIX;
> + zk = ONE;
> }
> else
> - Z[--k] = ZERO;
> + {
> + Z[k--] = zk;
> + zk = ZERO;
> + }
> }
>
> - if (Z[1] == ZERO)
> + if (zk == ZERO)
> {
> for (i = 1; i <= p2; i++)
> Z[i] = Z[i + 1];
> }
> else
> - EZ += ONE;
> + {
> + Z[1] = zk;
> + EZ += ONE;
> + }
> }
>
> /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
> @@ -428,65 +438,63 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
> {
> long i, j, k;
> long p2 = p;
> + double zk;
>
> EZ = EX;
> + i = p2;
> + j = p2 + EY - EX;
> + k = p2;
>
> - if (EX == EY)
> + /* Y is too small compared to X, copy X over to the result. */
> + if (__glibc_unlikely (j < 1))
> {
> - i = j = k = p2;
> - Z[k] = Z[k + 1] = ZERO;
> + __cpy (x, z, p);
> + return;
> }
> - else
> +
> + /* The relevant least significant digit in Y is non-zero, so we factor it in
> + to enhance accuracy. */
> + if (j < p2 && Y[j + 1] > ZERO)
> {
> - j = EX - EY;
> - if (j > p2)
> - {
> - __cpy (x, z, p);
> - return;
> - }
> - else
> - {
> - i = p2;
> - j = p2 + 1 - j;
> - k = p2;
> - if (Y[j] > ZERO)
> - {
> - Z[k + 1] = RADIX - Y[j--];
> - Z[k] = MONE;
> - }
> - else
> - {
> - Z[k + 1] = ZERO;
> - Z[k] = ZERO;
> - j--;
> - }
> - }
> + Z[k + 1] = RADIX - Y[j + 1];
> + zk = MONE;
> }
> + else
> + zk = Z[k + 1] = ZERO;
>
> + /* Subtract and borrow. */
> for (; j > 0; i--, j--)
> {
> - Z[k] += (X[i] - Y[j]);
> - if (Z[k] < ZERO)
> + zk += (X[i] - Y[j]);
> + if (zk < ZERO)
> {
> - Z[k] += RADIX;
> - Z[--k] = MONE;
> + Z[k--] = zk + RADIX;
> + zk = MONE;
> }
> else
> - Z[--k] = ZERO;
> + {
> + Z[k--] = zk;
> + zk = ZERO;
> + }
> }
>
> + /* We're done with digits from Y, so it's just digits in X. */
> for (; i > 0; i--)
> {
> - Z[k] += X[i];
> - if (Z[k] < ZERO)
> + zk += X[i];
> + if (zk < ZERO)
> {
> - Z[k] += RADIX;
> - Z[--k] = MONE;
> + Z[k--] = zk + RADIX;
> + zk = MONE;
> }
> else
> - Z[--k] = ZERO;
> + {
> + Z[k--] = zk;
> + zk = ZERO;
> + }
> }
>
> + /* Normalize. */
> for (i = 1; Z[i] == ZERO; i++);
> EZ = EZ - i + 1;
> for (k = 1; i <= p2 + 1;)