This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: PowerPC's math_ldbl.h
- From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Date: Thu, 07 Mar 2013 14:32:36 -0300
- Subject: Re: PowerPC's math_ldbl.h
- References: <87ehg9akpn.fsf@kepler.schwinge.homeip.net> <512626E1.4090801@linux.vnet.ibm.com> <5126DF54.4020806@linux.vnet.ibm.com> <Pine.LNX.4.64.1302221545410.6146@digraph.polyomino.org.uk> <5127C2DD.4090406@linux.vnet.ibm.com> <512E5E17.9000102@linux.vnet.ibm.com>
Ping.
On 02/27/2013 04:27 PM, Adhemerval Zanella wrote:
> Ping.
>
> On 02/22/2013 04:11 PM, Adhemerval Zanella wrote:
>> On 02/22/2013 12:46 PM, Joseph S. Myers wrote:
>>> On Fri, 22 Feb 2013, Adhemerval Zanella wrote:
>>>
>>>> * sysdeps/unix/sysv/linux/powerpc/Implies: Add powerpc/fpu prior to
>>>> ieee754/ldbl-128ibm so sysdeps/powerpc/fpu/math_ldbl.h will be included
>>>> first.
>>> That's wrong - powerpc --without-fp will be using
>>> sysdeps/unix/sysv/linux/powerpc but can't use sysdeps/powerpc/fpu.
>>>
>> Indeed I didn't take nofpu in consideration. This patch now removes the
>> sysdeps/unix/sysv/linux/powerpc/Implies and adds the ldbl after ldbl-opt
>> on sysdeps/powerpc/Implies. This makes sysdeps/powerpc/fpu not appear with
>> nofpu build while add making it show before sysdeps/ieee754/ldbl-128ibm
>> so the PPC specific optimization is still used.
>>
>>
>> 2013-02-22 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
>>
>> * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h (ldbl_pack): Renamed to
>> default_ldbl_pack and using as default implementation.
>> (ldbl_unpack): Renamed to default_ldbl_unpack and using as default
>> implementation.
>> * sysdeps/powerpc/fpu/math_ldbl.h (ldbl_extract_mantissa): removed
>> redundant definition.
>> (ldbl_insert_mantissa): Likewise.
>> (ldbl_canonicalize): Likewise.
>> (ldbl_nearbyint): Likewise.
>> (ldbl_pack): Renamed to ldbl_pack_ppc.
>> (ldbl_unpack): Renamed to ldbl_unpack_ppc.
>> * sysdeps/unix/sysv/linux/powerpc/Implies: Remove file.
>> * sysdeps/powerpc/Implies: Add ieee754/ldbl-opt after ieee754/ldbl-128ibm.
>>
>> --
>>
>> diff --git a/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h b/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
>> index be9ac71..1cce1fc 100644
>> --- a/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
>> +++ b/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
>> @@ -125,7 +125,7 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
>> /* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
>> of long double implemented as double double. */
>> static inline long double
>> -ldbl_pack (double a, double aa)
>> +default_ldbl_pack (double a, double aa)
>> {
>> union ibm_extended_long_double u;
>> u.dd[0] = a;
>> @@ -134,7 +134,7 @@ ldbl_pack (double a, double aa)
>> }
>>
>> static inline void
>> -ldbl_unpack (long double l, double *a, double *aa)
>> +default_ldbl_unpack (long double l, double *a, double *aa)
>> {
>> union ibm_extended_long_double u;
>> u.d = l;
>> @@ -142,6 +142,12 @@ ldbl_unpack (long double l, double *a, double *aa)
>> *aa = u.dd[1];
>> }
>>
>> +#ifndef ldbl_pack
>> +# define ldbl_pack default_ldbl_pack
>> +#endif
>> +#ifndef ldbl_unpack
>> +# define ldbl_unpack default_ldbl_unpack
>> +#endif
>>
>> /* Convert a finite long double to canonical form.
>> Does not handle +/-Inf properly. */
>> diff --git a/sysdeps/powerpc/Implies b/sysdeps/powerpc/Implies
>> index 7ccf9a7..78dba95 100644
>> --- a/sysdeps/powerpc/Implies
>> +++ b/sysdeps/powerpc/Implies
>> @@ -1,4 +1,5 @@
>> # On PowerPC we use the IBM extended long double format.
>> ieee754/ldbl-128ibm
>> +ieee754/ldbl-opt
>> ieee754/dbl-64
>> ieee754/flt-32
>> diff --git a/sysdeps/powerpc/fpu/math_ldbl.h b/sysdeps/powerpc/fpu/math_ldbl.h
>> index 20224e6..36378c0 100644
>> --- a/sysdeps/powerpc/fpu/math_ldbl.h
>> +++ b/sysdeps/powerpc/fpu/math_ldbl.h
>> @@ -2,132 +2,12 @@
>> #error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
>> #endif
>>
>> -#include <sysdeps/ieee754/ldbl-128/math_ldbl.h>
>> -#include <ieee754.h>
>> -
>> -static inline void
>> -ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x)
>> -{
>> - /* We have 105 bits of mantissa plus one implicit digit. Since
>> - 106 bits are representable we use the first implicit digit for
>> - the number before the decimal point and the second implicit bit
>> - as bit 53 of the mantissa. */
>> - unsigned long long hi, lo;
>> - int ediff;
>> - union ibm_extended_long_double eldbl;
>> - eldbl.d = x;
>> - *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
>> -
>> - lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3;
>> - hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1;
>> - /* If the lower double is not a denomal or zero then set the hidden
>> - 53rd bit. */
>> - if (eldbl.ieee.exponent2 > 0x001)
>> - {
>> - lo |= (1ULL << 52);
>> - lo = lo << 7; /* pre-shift lo to match ieee854. */
>> - /* The lower double is normalized separately from the upper. We
>> - may need to adjust the lower mantissa to reflect this. */
>> - ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;
>> - if (ediff > 53)
>> - lo = lo >> (ediff-53);
>> - }
>> - hi |= (1ULL << 52);
>> -
>> - if ((eldbl.ieee.negative != eldbl.ieee.negative2)
>> - && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL)))
>> - {
>> - hi--;
>> - lo = (1ULL << 60) - lo;
>> - if (hi < (1ULL << 52))
>> - {
>> - /* we have a borrow from the hidden bit, so shift left 1. */
>> - hi = (hi << 1) | (lo >> 59);
>> - lo = 0xfffffffffffffffLL & (lo << 1);
>> - *exp = *exp - 1;
>> - }
>> - }
>> - *lo64 = (hi << 60) | lo;
>> - *hi64 = hi >> 4;
>> -}
>> -
>> -static inline long double
>> -ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
>> -{
>> - union ibm_extended_long_double u;
>> - unsigned long hidden2, lzcount;
>> - unsigned long long hi, lo;
>> -
>> - u.ieee.negative = sign;
>> - u.ieee.negative2 = sign;
>> - u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS;
>> - u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
>> - /* Expect 113 bits (112 bits + hidden) right justified in two longs.
>> - The low order 53 bits (52 + hidden) go into the lower double */
>> - lo = (lo64 >> 7)& ((1ULL << 53) - 1);
>> - hidden2 = (lo64 >> 59) & 1ULL;
>> - /* The high order 53 bits (52 + hidden) go into the upper double */
>> - hi = (lo64 >> 60) & ((1ULL << 11) - 1);
>> - hi |= (hi64 << 4);
>> -
>> - if (lo != 0LL)
>> - {
>> - /* hidden2 bit of low double controls rounding of the high double.
>> - If hidden2 is '1' then round up hi and adjust lo (2nd mantissa)
>> - plus change the sign of the low double to compensate. */
>> - if (hidden2)
>> - {
>> - hi++;
>> - u.ieee.negative2 = !sign;
>> - lo = (1ULL << 53) - lo;
>> - }
>> - /* The hidden bit of the lo mantissa is zero so we need to
>> - normalize the it for the low double. Shift it left until the
>> - hidden bit is '1' then adjust the 2nd exponent accordingly. */
>> -
>> - if (sizeof (lo) == sizeof (long))
>> - lzcount = __builtin_clzl (lo);
>> - else if ((lo >> 32) != 0)
>> - lzcount = __builtin_clzl ((long) (lo >> 32));
>> - else
>> - lzcount = __builtin_clzl ((long) lo) + 32;
>> - lzcount = lzcount - 11;
>> - if (lzcount > 0)
>> - {
>> - int expnt2 = u.ieee.exponent2 - lzcount;
>> - if (expnt2 >= 1)
>> - {
>> - /* Not denormal. Normalize and set low exponent. */
>> - lo = lo << lzcount;
>> - u.ieee.exponent2 = expnt2;
>> - }
>> - else
>> - {
>> - /* Is denormal. */
>> - lo = lo << (lzcount + expnt2);
>> - u.ieee.exponent2 = 0;
>> - }
>> - }
>> - }
>> - else
>> - {
>> - u.ieee.negative2 = 0;
>> - u.ieee.exponent2 = 0;
>> - }
>> -
>> - u.ieee.mantissa3 = lo & ((1ULL << 32) - 1);
>> - u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1);
>> - u.ieee.mantissa1 = hi & ((1ULL << 32) - 1);
>> - u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
>> - return u.d;
>> -}
>> -
>> -/* gcc generates disgusting code to pack and unpack long doubles.
>> - This tells gcc that pack/unpack is really a nop. We use fr1/fr2
>> - because those are the regs used to pass/return a single
>> - long double arg. */
>> +/* GCC does not optimize the default ldbl_pack code to not spill register
>> + in the stack. The following optimization tells gcc that pack/unpack
>> + is really a nop. We use fr1/fr2 because those are the regs used to
>> + pass/return a single long double arg. */
>> static inline long double
>> -ldbl_pack (double a, double aa)
>> +ldbl_pack_ppc (double a, double aa)
>> {
>> register long double x __asm__ ("fr1");
>> register double xh __asm__ ("fr1");
>> @@ -139,7 +19,7 @@ ldbl_pack (double a, double aa)
>> }
>>
>> static inline void
>> -ldbl_unpack (long double l, double *a, double *aa)
>> +ldbl_unpack_ppc (long double l, double *a, double *aa)
>> {
>> register long double x __asm__ ("fr1");
>> register double xh __asm__ ("fr1");
>> @@ -150,40 +30,7 @@ ldbl_unpack (long double l, double *a, double *aa)
>> *aa = xl;
>> }
>>
>> +#define ldbl_pack ldbl_pack_ppc
>> +#define ldbl_unpack ldbl_unpack_ppc
>>
>> -/* Convert a finite long double to canonical form.
>> - Does not handle +/-Inf properly. */
>> -static inline void
>> -ldbl_canonicalize (double *a, double *aa)
>> -{
>> - double xh, xl;
>> -
>> - xh = *a + *aa;
>> - xl = (*a - xh) + *aa;
>> - *a = xh;
>> - *aa = xl;
>> -}
>> -
>> -/* Simple inline nearbyint (double) function .
>> - Only works in the default rounding mode
>> - but is useful in long double rounding functions. */
>> -static inline double
>> -ldbl_nearbyint (double a)
>> -{
>> - double two52 = 0x10000000000000LL;
>> -
>> - if (__builtin_expect ((__builtin_fabs (a) < two52), 1))
>> - {
>> - if (__builtin_expect ((a > 0.0), 1))
>> - {
>> - a += two52;
>> - a -= two52;
>> - }
>> - else if (__builtin_expect ((a < 0.0), 1))
>> - {
>> - a = two52 - a;
>> - a = -(a - two52);
>> - }
>> - }
>> - return a;
>> -}
>> +#include <sysdeps/ieee754/ldbl-128ibm/math_ldbl.h>
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/Implies b/sysdeps/unix/sysv/linux/powerpc/Implies
>> deleted file mode 100644
>> index ff27cdb..0000000
>> --- a/sysdeps/unix/sysv/linux/powerpc/Implies
>> +++ /dev/null
>> @@ -1,4 +0,0 @@
>> -# Make sure these routines come before ldbl-opt.
>> -ieee754/ldbl-128ibm
>> -# These supply the ABI compatibility for when long double was double.
>> -ieee754/ldbl-opt