This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] Consolidate and inline calls to __acr
"Joseph S. Myers" <joseph@codesourcery.com> writes:
> how they were originally different from the architecture-independent
> versions,
$ git diff d9204af:sysdeps/ieee754/dbl-64/mpa.c d9204af:sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
diff --git a/d9204af:sysdeps/ieee754/dbl-64/mpa.c b/d9204af:sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
index 68647ba..4a232e2 100644
--- a/d9204af:sysdeps/ieee754/dbl-64/mpa.c
+++ b/d9204af:sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
@@ -2,7 +2,7 @@
/*
* IBM Accurate Mathematical Library
* written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation
+ * Copyright (C) 2001, 2006 Free Software Foundation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
@@ -53,8 +53,9 @@
/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */
/* disregarded. */
static int mcr(const mp_no *x, const mp_no *y, int p) {
- int i;
- for (i=1; i<=p; i++) {
+ long i;
+ long p2 = p;
+ for (i=1; i<=p2; i++) {
if (X[i] == Y[i]) continue;
else if (X[i] > Y[i]) return 1;
else return -1; }
@@ -65,7 +66,7 @@ static int mcr(const mp_no *x, const mp_no *y, int p) {
/* acr() compares the absolute values of two multiple precision numbers */
int __acr(const mp_no *x, const mp_no *y, int p) {
- int i;
+ long i;
if (X[0] == ZERO) {
if (Y[0] == ZERO) i= 0;
@@ -97,7 +98,7 @@ int __cr(const mp_no *x, const mp_no *y, int p) {
/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */
void __cpy(const mp_no *x, mp_no *y, int p) {
- int i;
+ long i;
EY = EX;
for (i=0; i <= p; i++) Y[i] = X[i];
@@ -114,11 +115,13 @@ void __cpy(const mp_no *x, mp_no *y, int p) {
void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
- int i,k;
+ long i,k;
+ long n2 = n;
+ long m2 = m;
- EY = EX; k=MIN(m,n);
+ EY = EX; k=MIN(m2,n2);
for (i=0; i <= k; i++) Y[i] = X[i];
- for ( ; i <= n; i++) Y[i] = ZERO;
+ for ( ; i <= n2; i++) Y[i] = ZERO;
return;
}
@@ -128,7 +131,7 @@ void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
static void norm(const mp_no *x, double *y, int p)
{
#define R radixi.d
- int i;
+ long i;
#if 0
int k;
#endif
@@ -182,7 +185,8 @@ static void norm(const mp_no *x, double *y, int p)
/* number *y, denormalized case (|x| < 2**(-1022))) */
static void denorm(const mp_no *x, double *y, int p)
{
- int i,k;
+ long i,k;
+ long p2 = p;
double c,u,z[5];
#if 0
double a,v;
@@ -192,12 +196,12 @@ static void denorm(const mp_no *x, double *y, int p)
if (EX<-44 || (EX==-44 && X[1]<TWO5))
{ *y=ZERO; return; }
- if (p==1) {
+ if (p2==1) {
if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;}
else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;}
else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
}
- else if (p==2) {
+ else if (p2==2) {
if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;}
else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;}
else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
@@ -213,7 +217,7 @@ static void denorm(const mp_no *x, double *y, int p)
if (u > z[3]) u -= TWO5;
if (u==z[3]) {
- for (i=k+1; i <= p; i++) {
+ for (i=k+1; i <= p2; i++) {
if (X[i] == ZERO) continue;
else {z[3] += ONE; break; }
}
@@ -250,7 +254,8 @@ void __mp_dbl(const mp_no *x, double *y, int p) {
void __dbl_mp(double x, mp_no *y, int p) {
- int i,n;
+ long i,n;
+ long p2 = p;
double u;
/* Sign */
@@ -263,12 +268,12 @@ void __dbl_mp(double x, mp_no *y, int p) {
for ( ; x < ONE; EY -= ONE) x *= RADIX;
/* Digits */
- n=MIN(p,4);
+ n=MIN(p2,4);
for (i=1; i<=n; i++) {
u = (x + TWO52) - TWO52;
if (u>x) u -= ONE;
Y[i] = u; x -= u; x *= RADIX; }
- for ( ; i<=p; i++) Y[i] = ZERO;
+ for ( ; i<=p2; i++) Y[i] = ZERO;
return;
}
@@ -281,11 +286,12 @@ void __dbl_mp(double x, mp_no *y, int p) {
static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
- int i,j,k;
+ long i,j,k;
+ long p2 = p;
EZ = EX;
- i=p; j=p+ EY - EX; k=p+1;
+ i=p2; j=p2+ EY - EX; k=p2+1;
if (j<1)
{__cpy(x,z,p); return; }
@@ -310,7 +316,7 @@ static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
}
if (Z[1] == ZERO) {
- for (i=1; i<=p; i++) Z[i] = Z[i+1]; }
+ for (i=1; i<=p2; i++) Z[i] = Z[i+1]; }
else EZ += ONE;
}
@@ -323,18 +329,19 @@ static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
- int i,j,k;
+ long i,j,k;
+ long p2 = p;
EZ = EX;
if (EX == EY) {
- i=j=k=p;
+ i=j=k=p2;
Z[k] = Z[k+1] = ZERO; }
else {
j= EX - EY;
- if (j > p) {__cpy(x,z,p); return; }
+ if (j > p2) {__cpy(x,z,p); return; }
else {
- i=p; j=p+1-j; k=p;
+ i=p2; j=p2+1-j; k=p2;
if (Y[j] > ZERO) {
Z[k+1] = RADIX - Y[j--];
Z[k] = MONE; }
@@ -364,9 +371,9 @@ static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
for (i=1; Z[i] == ZERO; i++) ;
EZ = EZ - i + 1;
- for (k=1; i <= p+1; )
+ for (k=1; i <= p2+1; )
Z[k++] = Z[i++];
- for (; k <= p; )
+ for (; k <= p2; )
Z[k++] = ZERO;
return;
@@ -428,30 +435,64 @@ void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
- int i, i1, i2, j, k, k2;
- double u;
+ long i, i1, i2, j, k, k2;
+ long p2 = p;
+ double u, zk, zk2;
/* Is z=0? */
if (X[0]*Y[0]==ZERO)
{ Z[0]=ZERO; return; }
/* Multiply, add and carry */
- k2 = (p<3) ? p+p : p+3;
- Z[k2]=ZERO;
+ k2 = (p2<3) ? p2+p2 : p2+3;
+ zk = Z[k2]=ZERO;
for (k=k2; k>1; ) {
- if (k > p) {i1=k-p; i2=p+1; }
+ if (k > p2) {i1=k-p2; i2=p2+1; }
else {i1=1; i2=k; }
- for (i=i1,j=i2-1; i<i2; i++,j--) Z[k] += X[i]*Y[j];
+#if 1
+ /* rearange this inner loop to allow the fmadd instructions to be
+ independent and execute in parallel on processors that have
+ dual symetrical FP pipelines. */
+ if (i1 < (i2-1))
+ {
+ /* make sure we have at least 2 iterations */
+ if (((i2 - i1) & 1L) == 1L)
+ {
+ /* Handle the odd iterations case. */
+ zk2 = x->d[i2-1]*y->d[i1];
+ }
+ else
+ zk2 = zero.d;
+ /* Do two multiply/adds per loop iteration, using independent
+ accumulators; zk and zk2. */
+ for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2)
+ {
+ zk += x->d[i]*y->d[j];
+ zk2 += x->d[i+1]*y->d[j-1];
+ }
+ zk += zk2; /* final sum. */
+ }
+ else
+ {
+ /* Special case when iterations is 1. */
+ zk += x->d[i1]*y->d[i1];
+ }
+#else
+ /* The orginal code. */
+ for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j];
+#endif
- u = (Z[k] + CUTTER)-CUTTER;
- if (u > Z[k]) u -= RADIX;
- Z[k] -= u;
- Z[--k] = u*RADIXI;
+ u = (zk + CUTTER)-CUTTER;
+ if (u > zk) u -= RADIX;
+ Z[k] = zk - u;
+ zk = u*RADIXI;
+ --k;
}
+ Z[k] = zk;
/* Is there a carry beyond the most significant digit? */
if (Z[1] == ZERO) {
- for (i=1; i<=p; i++) Z[i]=Z[i+1];
+ for (i=1; i<=p2; i++) Z[i]=Z[i+1];
EZ = EX + EY - 1; }
else
EZ = EX + EY;
@@ -467,7 +508,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
/* *x=0 is not permissible. *x is left unchanged. */
void __inv(const mp_no *x, mp_no *y, int p) {
- int i;
+ long i;
#if 0
int l;
#endif
Andreas.
--
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5
"And now for something completely different."