[PATCH 10/11] math: Use lgammaf from CORE-MATH

DJ Delorie dj@redhat.com
Thu Nov 21 02:38:59 GMT 2024


LGTM
Reviewed-by: DJ Delorie <dj@redhat.com>

Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> The CORE-MATH implementation is correctly rounded (for any rounding mode)
> and shows better performance to the generic lgammaf.
> diff --git a/SHARED-FILES b/SHARED-FILES
> index e6e29bcadc..033ce7f092 100644
> --- a/SHARED-FILES
> +++ b/SHARED-FILES
> @@ -280,3 +280,11 @@ sysdeps/ieee754/flt-32/s_erfcf.c
>    (file src/binary32/erfc/erfcf.c in CORE-MATH)
>    - The code was adapted to use glibc code style and internal
>      functions to handle errno, overflow, and underflow.
> +sysdeps/ieee754/flt-32/e_lgammaf_r.c:
> +  (file src/binary32/lgamma/lgammaf.c in CORE-MATH)
> +  - change the function name from cr_lgammaf to __ieee754_lgammaf_r
> +  - add "int *signgamp" as 2nd argument and add at the beginning:
> +    if (signgamp != NULL) *signgamp = 1;
> +  - remove the errno stuff (this is done by the wrapper)
> +  - replace 0x1p127f * 0x1p127f by math_narrow_eval (x * 0x1p127f)
> +  - add libm_alias_finite (__ieee754_lgammaf_r, __lgammaf_r) at the end

Ok.

> diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
> diff --git a/sysdeps/alpha/fpu/libm-test-ulps b/sysdeps/alpha/fpu/libm-test-ulps
> diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps
> diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps
> diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps
> diff --git a/sysdeps/csky/fpu/libm-test-ulps b/sysdeps/csky/fpu/libm-test-ulps
> diff --git a/sysdeps/csky/nofpu/libm-test-ulps b/sysdeps/csky/nofpu/libm-test-ulps
> diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps
> diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
> diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps

Ok.

> diff --git a/sysdeps/ieee754/flt-32/e_lgammaf_r.c b/sysdeps/ieee754/flt-32/e_lgammaf_r.c

> -/* e_lgammaf_r.c -- float version of e_lgamma_r.c.
> - */
> -
> -/*
> - * ====================================================
> - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
> - *
> - * Developed at SunPro, a Sun Microsystems, Inc. business.
> - * Permission to use, copy, modify, and distribute this
> - * software is freely granted, provided that this notice
> - * is preserved.
> - * ====================================================
> - */
> +/* Correctly-rounded logarithm of the absolute value of the gamma function
> +   for binary32 value.
>  
> +Copyright (c) 2023, 2024 Alexei Sibidanov.
> +
> +This file is part of the CORE-MATH project
> +project (file src/binary32/lgamma/lgammaf.c, revision bc385c2).
> +
> +Permission is hereby granted, free of charge, to any person obtaining a copy
> +of this software and associated documentation files (the "Software"), to deal
> +in the Software without restriction, including without limitation the rights
> +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> +copies of the Software, and to permit persons to whom the Software is
> +furnished to do so, subject to the following conditions:
> +
> +The above copyright notice and this permission notice shall be included in all
> +copies or substantial portions of the Software.
> +
> +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> +SOFTWARE.
> +*/
> +

Ok.

> +/* Changes with respect to the original CORE-MATH code:
> +   - removed the dealing with errno
> +     (this is done in the wrapper math/w_lgammaf_compat2.c).
> +   - usage of math_narrow_eval to deal with underflow/overflow.
> +   - deal with signamp.  */
> +
> +#include <array_length.h>
> +#include <stdint.h>
>  #include <math.h>
> -#include <math-narrow-eval.h>
> -#include <math_private.h>
> -#include <libc-diag.h>
>  #include <libm-alias-finite.h>
> +#include <limits.h>
> +#include <math-narrow-eval.h>
> +#include "math_config.h"

Ok.

> -static const float
> -two23=  8.3886080000e+06, /* 0x4b000000 */
> -half=  5.0000000000e-01, /* 0x3f000000 */
> -one =  1.0000000000e+00, /* 0x3f800000 */
> -pi  =  3.1415927410e+00, /* 0x40490fdb */
> -a0  =  7.7215664089e-02, /* 0x3d9e233f */
> -a1  =  3.2246702909e-01, /* 0x3ea51a66 */
> -a2  =  6.7352302372e-02, /* 0x3d89f001 */
> -a3  =  2.0580807701e-02, /* 0x3ca89915 */
> -a4  =  7.3855509982e-03, /* 0x3bf2027e */
> -a5  =  2.8905137442e-03, /* 0x3b3d6ec6 */
> -a6  =  1.1927076848e-03, /* 0x3a9c54a1 */
> -a7  =  5.1006977446e-04, /* 0x3a05b634 */
> -a8  =  2.2086278477e-04, /* 0x39679767 */
> -a9  =  1.0801156895e-04, /* 0x38e28445 */
> -a10 =  2.5214456400e-05, /* 0x37d383a2 */
> -a11 =  4.4864096708e-05, /* 0x383c2c75 */
> -tc  =  1.4616321325e+00, /* 0x3fbb16c3 */
> -tf  = -1.2148628384e-01, /* 0xbdf8cdcd */
> -/* tt = -(tail of tf) */
> -tt  =  6.6971006518e-09, /* 0x31e61c52 */
> -t0  =  4.8383611441e-01, /* 0x3ef7b95e */
> -t1  = -1.4758771658e-01, /* 0xbe17213c */
> -t2  =  6.4624942839e-02, /* 0x3d845a15 */
> -t3  = -3.2788541168e-02, /* 0xbd064d47 */
> -t4  =  1.7970675603e-02, /* 0x3c93373d */
> -t5  = -1.0314224288e-02, /* 0xbc28fcfe */
> -t6  =  6.1005386524e-03, /* 0x3bc7e707 */
> -t7  = -3.6845202558e-03, /* 0xbb7177fe */
> -t8  =  2.2596477065e-03, /* 0x3b141699 */
> -t9  = -1.4034647029e-03, /* 0xbab7f476 */
> -t10 =  8.8108185446e-04, /* 0x3a66f867 */
> -t11 = -5.3859531181e-04, /* 0xba0d3085 */
> -t12 =  3.1563205994e-04, /* 0x39a57b6b */
> -t13 = -3.1275415677e-04, /* 0xb9a3f927 */
> -t14 =  3.3552918467e-04, /* 0x39afe9f7 */
> -u0  = -7.7215664089e-02, /* 0xbd9e233f */
> -u1  =  6.3282704353e-01, /* 0x3f2200f4 */
> -u2  =  1.4549225569e+00, /* 0x3fba3ae7 */
> -u3  =  9.7771751881e-01, /* 0x3f7a4bb2 */
> -u4  =  2.2896373272e-01, /* 0x3e6a7578 */
> -u5  =  1.3381091878e-02, /* 0x3c5b3c5e */
> -v1  =  2.4559779167e+00, /* 0x401d2ebe */
> -v2  =  2.1284897327e+00, /* 0x4008392d */
> -v3  =  7.6928514242e-01, /* 0x3f44efdf */
> -v4  =  1.0422264785e-01, /* 0x3dd572af */
> -v5  =  3.2170924824e-03, /* 0x3b52d5db */
> -s0  = -7.7215664089e-02, /* 0xbd9e233f */
> -s1  =  2.1498242021e-01, /* 0x3e5c245a */
> -s2  =  3.2577878237e-01, /* 0x3ea6cc7a */
> -s3  =  1.4635047317e-01, /* 0x3e15dce6 */
> -s4  =  2.6642270386e-02, /* 0x3cda40e4 */
> -s5  =  1.8402845599e-03, /* 0x3af135b4 */
> -s6  =  3.1947532989e-05, /* 0x3805ff67 */
> -r1  =  1.3920053244e+00, /* 0x3fb22d3b */
> -r2  =  7.2193557024e-01, /* 0x3f38d0c5 */
> -r3  =  1.7193385959e-01, /* 0x3e300f6e */
> -r4  =  1.8645919859e-02, /* 0x3c98bf54 */
> -r5  =  7.7794247773e-04, /* 0x3a4beed6 */
> -r6  =  7.3266842264e-06, /* 0x36f5d7bd */
> -w0  =  4.1893854737e-01, /* 0x3ed67f1d */
> -w1  =  8.3333335817e-02, /* 0x3daaaaab */
> -w2  = -2.7777778450e-03, /* 0xbb360b61 */
> -w3  =  7.9365057172e-04, /* 0x3a500cfd */
> -w4  = -5.9518753551e-04, /* 0xba1c065c */
> -w5  =  8.3633989561e-04, /* 0x3a5b3dd2 */
> -w6  = -1.6309292987e-03; /* 0xbad5c4e8 */
> -
> -static const float zero=  0.0000000000e+00;
> -

Ok.

> -static float
> -sin_pif(float x)
> -	float y,z;
> -	int n,ix;
> -
> -	GET_FLOAT_WORD(ix,x);
> -	ix &= 0x7fffffff;
> -
> -	if(ix<0x3e800000) return __sinf (pi*x);
> -	y = -x;		/* x is assume negative */
> -
> -    /*
> -     * argument reduction, make sure inexact flag not raised if input
> -     * is an integer
> -     */
> -	z = floorf(y);
> -	if(z!=y) {				/* inexact anyway */
> -	    y  *= (float)0.5;
> -	    y   = (float)2.0*(y - floorf(y));	/* y = |x| mod 2.0 */
> -	    n   = (int) (y*(float)4.0);
> -	} else {
> -	    if(ix>=0x4b800000) {
> -		y = zero; n = 0;                 /* y must be even */
> -	    } else {
> -		if(ix<0x4b000000) z = y+two23;	/* exact */
> -		GET_FLOAT_WORD(n,z);
> -		n &= 1;
> -		y  = n;
> -		n<<= 2;
> -	    }
> -	}
> -	switch (n) {
> -	    case 0:   y =  __sinf (pi*y); break;
> -	    case 1:
> -	    case 2:   y =  __cosf (pi*((float)0.5-y)); break;
> -	    case 3:
> -	    case 4:   y =  __sinf (pi*(one-y)); break;
> -	    case 5:
> -	    case 6:   y = -__cosf (pi*(y-(float)1.5)); break;
> -	    default:  y =  __sinf (pi*(y-(float)2.0)); break;
> -	    }
> -	return -y;

Ok.

> +static double
> +as_r7 (double x, const double *c)
>  {
> +  return (((x - c[0]) * (x - c[1])) * ((x - c[2]) * (x - c[3])))
> +	 * (((x - c[4]) * (x - c[5])) * ((x - c[6])));
>  }

Ok

> +static double
> +as_r8 (double x, const double *c)
> +{
> +  return (((x - c[0]) * (x - c[1])) * ((x - c[2]) * (x - c[3])))
> +	 * (((x - c[4]) * (x - c[5])) * ((x - c[6]) * (x - c[7])));
> +}

Ok.

> +static double
> +as_sinpi (double x)
> +{
> +  static const double c[] =
> +    {
> +       0x1p+2,                -0x1.de9e64df22ea4p+1,   0x1.472be122401f8p+0,
> +      -0x1.d4fcd82df91bp-3,    0x1.9f05c97e0aab2p-6,  -0x1.f3091c427b611p-10,
> +       0x1.b22c9bfdca547p-14, -0x1.15484325ef569p-18
> +    };
> +  x -= 0.5;
> +  double x2 = x * x, x4 = x2 * x2, x8 = x4 * x4;
> +  return (0.25 - x2)
> +	 * ((c[0] + x2 * c[1]) + x4 * (c[2] + x2 * c[3])
> +	    + x8 * ((c[4] + x2 * c[5]) + x4 * (c[6] + x2 * c[7])));
> +}

Ok.

> +static double
> +as_ln (double x)
> +{
> +  uint64_t t = asuint64 (x);
> +  int e = (t >> 52) - 0x3ff;
> +  static const double c[] =
> +    {
> +       0x1.fffffffffff24p-1, -0x1.ffffffffd1d67p-2,  0x1.55555537802dep-2,
> +      -0x1.ffffeca81b866p-3,  0x1.999611761d772p-3, -0x1.54f3e581b61bfp-3,
> +       0x1.1e642b4cb5143p-3, -0x1.9115a5af1e1edp-4
> +    };
> +  static const double il[] =
> +    {
> +      0x1.59caeec280116p-57, 0x1.f0a30c01162aap-5, 0x1.e27076e2af2ebp-4,
> +      0x1.5ff3070a793d6p-3,  0x1.c8ff7c79a9a2p-3,  0x1.1675cababa60fp-2,
> +      0x1.4618bc21c5ec2p-2,  0x1.739d7f6bbd007p-2, 0x1.9f323ecbf984dp-2,
> +      0x1.c8ff7c79a9a21p-2,  0x1.f128f5faf06ecp-2, 0x1.0be72e4252a83p-1,
> +      0x1.1e85f5e7040d1p-1,  0x1.307d7334f10bep-1, 0x1.41d8fe84672afp-1,
> +      0x1.52a2d265bc5abp-1
> +    };
> +  static const double ix[] =
> +    {
> +      0x1p+0,                0x1.e1e1e1e1e1e1ep-1, 0x1.c71c71c71c71cp-1,
> +      0x1.af286bca1af28p-1,  0x1.999999999999ap-1, 0x1.8618618618618p-1,
> +      0x1.745d1745d1746p-1,  0x1.642c8590b2164p-1, 0x1.5555555555555p-1,
> +      0x1.47ae147ae147bp-1,  0x1.3b13b13b13b14p-1, 0x1.2f684bda12f68p-1,
> +      0x1.2492492492492p-1,  0x1.1a7b9611a7b96p-1, 0x1.1111111111111p-1,
> +      0x1.0842108421084p-1
> +    };
> +  int i = (t >> 48) & 0xf;
> +  t = (t & (~UINT64_C(0) >> 12)) | (INT64_C(0x3ff) << 52);
> +  double z = ix[i] * asdouble (t) - 1;
> +  double z2 = z * z, z4 = z2 * z2;
> +  return e * 0x1.62e42fefa39efp-1 + il[i]
> +	 + z * ((c[0] + z * c[1]) + z2 * (c[2] + z * c[3])
> +		+ z4 * ((c[4] + z * c[5]) + z2 * (c[6] + z * c[7])));
> +}

Ok.

>  float
> -__ieee754_lgammaf_r(float x, int *signgamp)
> +__ieee754_lgammaf_r (float x, int *signgamp)
>  {

Ok.

> -	float t,y,z,nadj,p,p1,p2,p3,q,r,w;
> -	int i,hx,ix;
> -
> -	GET_FLOAT_WORD(hx,x);
> -
> -    /* purge off +-inf, NaN, +-0, and negative arguments */
> -	*signgamp = 1;
> -	ix = hx&0x7fffffff;
> -	if(__builtin_expect(ix>=0x7f800000, 0)) return x*x;
> -	if(__builtin_expect(ix==0, 0))
> -	  {
> -	    if (hx < 0)
> -	      *signgamp = -1;
> -	    return one/fabsf(x);
> -	  }
> -	if(__builtin_expect(ix<0x30800000, 0)) {
> -	    /* |x|<2**-30, return -log(|x|) */
> -	    if(hx<0) {
> -		*signgamp = -1;
> -		return -__ieee754_logf(-x);
> -	    } else return -__ieee754_logf(x);

Ok.

> +  static const struct
> +  {
> +    float x;
> +    float f;
> +    float df;
> +  } tb[] = {
> +    { -0x1.efc2a2p+14, -0x1.222dbcp+18,   -0x1p-7 },
> +    { -0x1.627346p+7,  -0x1.73235ep+9,   -0x1p-16 },
> +    { -0x1.08b14p+4,   -0x1.f0cbe6p+4,   -0x1p-21 },
> +    { -0x1.69d628p+3,  -0x1.0eac2ap+4,   -0x1p-21 },
> +    { -0x1.904902p+2,  -0x1.65532cp+2,    0x1p-23 },
> +    { -0x1.9272d2p+1,  -0x1.170b98p-8,    0x1p-33 },
> +    { -0x1.625edap+1,   0x1.6a6c4ap-5,   -0x1p-30 },
> +    { -0x1.5fc2aep+1,   0x1.c0a484p-11,  -0x1p-36 },
> +    { -0x1.5fb43ep+1,   0x1.5b697p-17,    0x1p-42 },
> +    { -0x1.5fa20cp+1,  -0x1.132f7ap-10,   0x1p-35 },
> +    { -0x1.580c1ep+1,  -0x1.5787c6p-4,    0x1p-29 },
> +    { -0x1.3a7fcap+1,  -0x1.e4cf24p-24,  -0x1p-49 },
> +    { -0x1.c2f04p-30,   0x1.43a6f6p+4,    0x1p-21 },
> +    { -0x1.ade594p-30,  0x1.446ab2p+4,   -0x1p-21 },
> +    { -0x1.437e74p-40,  0x1.b7dec2p+4,   -0x1p-21 },
> +    { -0x1.d85bfep-43,  0x1.d31592p+4,   -0x1p-21 },
> +    { -0x1.f51c8ep-49,  0x1.0a572ap+5,   -0x1p-20 },
> +    { -0x1.108a5ap-66,  0x1.6d7b18p+5,   -0x1p-20 },
> +    { -0x1.ecf3fep-73,  0x1.8f8e5ap+5,   -0x1p-20 },
> +    { -0x1.25cb66p-123, 0x1.547a44p+6,   -0x1p-19 },
> +    { 0x1.ecf3fep-73,   0x1.8f8e5ap+5,   -0x1p-20 },
> +    { 0x1.108a5ap-66,   0x1.6d7b18p+5,   -0x1p-20 },
> +    { 0x1.a68bbcp-42,   0x1.c9c6e8p+4,    0x1p-21 },
> +    { 0x1.ddfd06p-12,   0x1.ec5ba8p+2,   -0x1p-23 },
> +    { 0x1.f8a754p-9,    0x1.63acc2p+2,    0x1p-23 },
> +    { 0x1.8d16b2p+5,    0x1.1e4b4ep+7,    0x1p-18 },
> +    { 0x1.359e0ep+10,   0x1.d9ad02p+12,  -0x1p-13 },
> +    { 0x1.a82a2cp+13,   0x1.c38036p+16,   0x1p-9 },
> +    { 0x1.62c646p+14,   0x1.9075bep+17,  -0x1p-8 },
> +    { 0x1.7f298p+31,    0x1.f44946p+35,  -0x1p+10 },
> +    { 0x1.a45ea4p+33,   0x1.25dcbcp+38,  -0x1p+13 },
> +    { 0x1.f9413ep+76,   0x1.9d5ab4p+82,  -0x1p+57 },
> +    { 0x1.dcbbaap+99,   0x1.fc5772p+105,  0x1p+80 },
> +    { 0x1.58ace8p+112,  0x1.9e4f66p+118, -0x1p+93 },
> +    { 0x1.87bdfp+115,   0x1.e465aep+121,  0x1p+96 },
> +  };

Ok.

> +  float fx = floor (x);
> +  float ax = fabsf (x);
> +  uint32_t t = asuint (ax);
> +  if (__glibc_unlikely (t >= (0xffu << 23)))
> +    {
> +      *signgamp = 1;
> +      if (t == (0xffu << 23))
> +	return INFINITY;
> +      return x + x; /* nan */
> +    }

Ok.

> +  if (__glibc_unlikely (fx == x))
> +    {
> +      if (x <= 0.0f)
> +	{
> +	  *signgamp = asuint (x) >> 31 ? -1 : 1;
> +	  return 1.0f / 0.0f;
>  	}

Ok.

> -	if(hx<0) {
> -	    if(ix>=0x4b000000)	/* |x|>=2**23, must be -integer */
> -		return fabsf (x)/zero;
> -	    if (ix > 0x40000000 /* X < 2.0f.  */
> -		&& ix < 0x41700000 /* X > -15.0f.  */)
> -		return __lgamma_negf (x, signgamp);
> -	    t = sin_pif(x);
> -	    if(t==zero) return one/fabsf(t); /* -integer */
> -	    nadj = __ieee754_logf(pi/fabsf(t*x));
> -	    if(t<zero) *signgamp = -1;
> -	    x = -x;

Ok.

> +      if (x == 1.0f || x == 2.0f)
> +	{
> +	  *signgamp = 1;
> +	  return 0.0f;
>  	}
> +    }
> +

Ok.

> +  /* Check the value of fx to avoid a spurious invalid exception.
> +     Note that for a binary32 |x| >= 2^23, x is necessarily an integer,
> +     and we already dealed with negative integers, thus now:
> +     -2^23 < x < +Inf and x is not a negative integer nor 0, 1, 2. */
> +  int k;
> +  if (__builtin_expect (fx >= 0x1p31f, 0))
> +    k = INT_MAX;
> +  else
> +    k = fx;
> +  *signgamp = 1 - (((k & (k >> 31)) & 1) << 1);
>  

Ok.

> -    /* purge off 1 and 2 */
> -	if (ix==0x3f800000||ix==0x40000000) r = 0;
> -    /* for x < 2.0 */
> -	else if(ix<0x40000000) {
> -	    if(ix<=0x3f666666) {	/* lgamma(x) = lgamma(x+1)-log(x) */
> -		r = -__ieee754_logf(x);
> -		if(ix>=0x3f3b4a20) {y = one-x; i= 0;}
> -		else if(ix>=0x3e6d3308) {y= x-(tc-one); i=1;}
> -		else {y = x; i=2;}
> -	    } else {
> -		r = zero;
> -		if(ix>=0x3fdda618) {y=(float)2.0-x;i=0;} /* [1.7316,2] */
> -		else if(ix>=0x3F9da620) {y=x-tc;i=1;} /* [1.23,1.73] */
> -		else {y=x-one;i=2;}

Ok.

> +  double z = ax, f;
> +  if (__glibc_unlikely (ax < 0x1.52p-1f))
> +    {
> +      static const double rn[] =
> +	{
> +	  -0x1.505bdf4b65acp+4,  -0x1.51c80eb47e068p+2,
> +	   0x1.0000000007cb8p+0, -0x1.4ac529250a1fcp+1,
> +	  -0x1.a8c99dbe1621ap+0, -0x1.4abdcc74115eap+0,
> +	  -0x1.1b87fe5a5b923p+0, -0x1.05b8a4d47ff64p+0
> +	};
> +      const double c0 = 0x1.0fc0fad268c4dp+2;
> +      static const double rd[] =
> +	{
> +	  -0x1.4db2cfe9a5265p+5, -0x1.062e99d1c4f27p+3,
> +	  -0x1.c81bc2ecf25f6p+1, -0x1.108e55c10091bp+1,
> +	  -0x1.7dd25af0b83d4p+0, -0x1.36bf1880125fcp+0,
> +	  -0x1.1379fc8023d9cp+0, -0x1.03712e41525d2p+0
> +	};
> +      double s = x;
> +      f = (c0 * s) * as_r8 (s, rn) / as_r8 (s, rd) - as_ln (z);
> +    }

Ok.

> +  else
> +    {
> +      if (ax > 0x1.afc1ap+1f)
> +	{
> +	  if (__glibc_unlikely (x > 0x1.895f1cp+121f))
> +	    return math_narrow_eval (0x1p127f * 0x1p127f);
> +	  /* |x|>=2**23, must be -integer */
> +	  if (__glibc_unlikely (x < 0.0f && ax > 0x1p+23))
> +	    return ax / 0.0f;
> +	  double lz = as_ln (z);
> +	  f = (z - 0.5) * (lz - 1) + 0x1.acfe390c97d69p-2;
> +	  if (ax < 0x1.0p+20f)
> +	    {
> +	      double iz = 1.0 / z, iz2 = iz * iz;
> +	      if (ax > 1198.0f)
> +		f += iz * (1. / 12.);

Ok.

> +	      else if (ax > 0x1.279a7p+6f)
> +		{
> +		  static const double c[] =
> +		    {
> +		      0x1.555555547fbadp-4, -0x1.6c0fd270c465p-9
> +		    };
> +		  f += iz * (c[0] + iz2 * c[1]);
> +		}

Ok.

> +	      else if (ax > 0x1.555556p+3f)
> +		{
> +		  static const double c[] =
> +		    {
> +		      0x1.555555554de0bp-4,  -0x1.6c16bdc45944fp-9,
> +		      0x1.a0077f300ecb3p-11, -0x1.2e9cfff3b29c2p-11
> +		    };
> +		  double iz4 = iz2 * iz2;
> +		  f += iz * ((c[0] + iz2 * c[1]) + iz4 * (c[2] + iz2 * c[3]));
> +		}

Ok.

> +	      else
> +		{
> +		  static const double c[] =
> +		    {
> +		      0x1.5555555551286p-4,  -0x1.6c16c0e7c4cf4p-9,
> +		      0x1.a0193267fe6f2p-11, -0x1.37e87ec19cb45p-11,
> +		      0x1.b40011dfff081p-11, -0x1.c16c8946b19b6p-10,
> +		      0x1.e9f47ace150d8p-9,  -0x1.4f5843a71a338p-8
> +		    };
> +		  double iz4 = iz2 * iz2, iz8 = iz4 * iz4;
> +		  double p = ((c[0] + iz2 * c[1]) + iz4 * (c[2] + iz2 * c[3]))
> +			     + iz8 * ((c[4] + iz2 * c[5])
> +				      + iz4 * (c[6] + iz2 * c[7]));
> +		  f += iz * p;
> +		}
>  	    }

Ok.

> -	    switch(i) {
> -	      case 0:
> -		z = y*y;
> -		p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
> -		p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
> -		p  = y*p1+p2;
> -		r  += (p-(float)0.5*y); break;
> -	      case 1:
> -		z = y*y;
> -		w = z*y;
> -		p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12)));	/* parallel comp */
> -		p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
> -		p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
> -		p  = z*p1-(tt-w*(p2+y*p3));
> -		r += (tf + p); break;
> -	      case 2:
> -		p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
> -		p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
> -		r += (-(float)0.5*y + p1/p2);

Ok.

> +	  if (x < 0.0f)
> +	    {
> +	      f = 0x1.250d048e7a1bdp+0 - f - lz;
> +	      double lp = as_ln (as_sinpi (x - fx));
> +	      f -= lp;
>  	    }
>  	}

Ok.

> -	else if(ix<0x41000000) {			/* x < 8.0 */
> -	    i = (int)x;
> -	    t = zero;
> -	    y = x-(float)i;
> -	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
> -	    q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
> -	    r = half*y+p/q;
> -	    z = one;	/* lgamma(1+s) = log(s) + lgamma(s) */
> -	    switch(i) {
> -	    case 7: z *= (y+(float)6.0);	/* FALLTHRU */
> -	    case 6: z *= (y+(float)5.0);	/* FALLTHRU */
> -	    case 5: z *= (y+(float)4.0);	/* FALLTHRU */
> -	    case 4: z *= (y+(float)3.0);	/* FALLTHRU */
> -	    case 3: z *= (y+(float)2.0);	/* FALLTHRU */
> -		    r += __ieee754_logf(z); break;

Ok.

> +      else
> +	{
> +	  static const double rn[] =
> +	    {
> +	      -0x1.667923ff14df7p+5, -0x1.2d35f25ad8f64p+3,
> +	      -0x1.b8c9eab9d5bd3p+1, -0x1.7a4a97f494127p+0,
> +	      -0x1.3a6c8295b4445p-1, -0x1.da44e8b810024p-3,
> +	      -0x1.9061e81c77e4ap-5
> +	    };
> +	  if (x < 0.0f)
> +	    {
> +	      int ni = floorf (-2 * x);
> +	      if ((ni & 1) == 0 && ni == -2 * x)
> +		return 1.0f / 0.0f;
> +	    }
> +	  const double c0 = 0x1.3cc0e6a0106b3p+2;
> +	  static const double rd[] =
> +	    {
> +	      -0x1.491a899e84c52p+6, -0x1.d202961b9e098p+3,
> +	      -0x1.4ced68c631ed6p+2, -0x1.2589eedf40738p+1,
> +	      -0x1.1302e3337271p+0,  -0x1.c36b802f26dffp-2,
> +	      -0x1.3ded448acc39dp-3, -0x1.bffc491078eafp-6
> +	    };
> +	  f = (z - 1) * (z - 2) * c0 * as_r7 (z, rn) / as_r8 (z, rd);

Ok.

> +	  if (x < 0.0f)
> +	    {
> +	      if (__glibc_unlikely (t < 0x40301b93u && t > 0x402f95c2u))
> +		{
> +		  double h = (x + 0x1.5fb410a1bd901p+1)
> +		    - 0x1.a19a96d2e6f85p-54;
> +		  double h2 = h * h;
> +		  double h4 = h2 * h2;
> +		  static const double c[] =
> +		    {
> +		      -0x1.ea12da904b18cp+0,  0x1.3267f3c265a54p+3,
> +		      -0x1.4185ac30cadb3p+4,  0x1.f504accc3f2e4p+5,
> +		      -0x1.8588444c679b4p+7,  0x1.43740491dc22p+9,
> +		      -0x1.12400ea23f9e6p+11, 0x1.dac829f365795p+12
> +		    };
> +		  f = h * ((c[0] + h * c[1]) + h2 * (c[2] + h * c[3])
> +			 + h4 * ((c[4] + h * c[5]) + h2 * (c[6] + h * c[7])));
> +		}

Ok.

> +	      else if (__glibc_unlikely (t > 0x401ceccbu && t < 0x401d95cau))
> +		{
> +		  double h = (x + 0x1.3a7fc9600f86cp+1)
> +		    + 0x1.55f64f98af8dp-55;
> +		  double h2 = h * h;
> +		  double h4 = h2 * h2;
> +		  static const double c[] =
> +		    {
> +		      0x1.83fe966af535fp+0, 0x1.36eebb002f61ap+2,
> +		      0x1.694a60589a0b3p+0, 0x1.1718d7aedb0b5p+3,
> +		      0x1.733a045eca0d3p+2, 0x1.8d4297421205bp+4,
> +		      0x1.7feea5fb29965p+4
> +		    };
> +		  f = h
> +		      * ((c[0] + h * c[1]) + h2 * (c[2] + h * c[3])
> +			 + h4 * ((c[4] + h * c[5]) + h2 * (c[6])));
> +		}

Ok.

> +	      else if (__glibc_unlikely (t > 0x40492009u && t < 0x404940efu))
> +		{
> +		  double h = (x + 0x1.9260dbc9e59afp+1)
> +		    + 0x1.f717cd335a7b3p-53;
> +		  double h2 = h * h;
> +		  double h4 = h2 * h2;
> +		  static const double c[] =
> +		    {
> +		      0x1.f20a65f2fac55p+2,  0x1.9d4d297715105p+4,
> +		      0x1.c1137124d5b21p+6,  0x1.267203d24de38p+9,
> +		      0x1.99a63399a0b44p+11, 0x1.2941214faaf0cp+14,
> +		      0x1.bb912c0c9cdd1p+16
> +		    };
> +		  f = h * ((c[0] + h * c[1]) + h2 * (c[2] + h * c[3])
> +			 + h4 * ((c[4] + h * c[5]) + h2 * (c[6])));
> +		}

Ok.

> +	      else
> +		{
> +		  f = 0x1.250d048e7a1bdp+0 - f;
> +		  double lp = as_ln (as_sinpi (x - fx) * z);
> +		  f -= lp;
> +		}

Ok.

>  	    }
> -    /* 8.0 <= x < 2**26 */
> -	} else if (ix < 0x4c800000) {
> -	    t = __ieee754_logf(x);
> -	    z = one/x;
> -	    y = z*z;
> -	    w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
> -	    r = (x-half)*(t-one)+w;
> -	} else
> -    /* 2**26 <= x <= inf */
> -	    r =  math_narrow_eval (x*(__ieee754_logf(x)-one));
> -	/* NADJ is set for negative arguments but not otherwise,
> -	   resulting in warnings that it may be used uninitialized
> -	   although in the cases where it is used it has always been
> -	   set.  */
> -	DIAG_PUSH_NEEDS_COMMENT;
> -	DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wmaybe-uninitialized");
> -	if(hx<0) r = nadj - r;
> -	DIAG_POP_NEEDS_COMMENT;
> -	return r;

Ok.

> +	}
> +    }
> +
> +  uint64_t tl = (asuint64 (f) + 5) & 0xfffffff;
> +  float r = f;
> +  if (__glibc_unlikely (tl <= 31u))
> +    {
> +      t = asuint (x);
> +      for (unsigned i = 0; i < array_length (tb); i++)
> +	{
> +	  if (t == asuint (tb[i].x))
> +	    return tb[i].f + tb[i].df;
> +	}
> +    }
> +  return r;
>  }
>  libm_alias_finite (__ieee754_lgammaf_r, __lgammaf_r)

Ok.

> diff --git a/sysdeps/ieee754/flt-32/lgamma_negf.c b/sysdeps/ieee754/flt-32/lgamma_negf.c
> -/* lgammaf expanding around zeros.
> -   Copyright (C) 2015-2024 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> . . .
> -}
> +/* Not needed.  */

Ok.  I assume the file must continue to exist to mask a generic
implementation of it.

> diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps
> diff --git a/sysdeps/m68k/coldfire/fpu/libm-test-ulps b/sysdeps/m68k/coldfire/fpu/libm-test-ulps
> diff --git a/sysdeps/m68k/m680x0/fpu/libm-test-ulps b/sysdeps/m68k/m680x0/fpu/libm-test-ulps
> diff --git a/sysdeps/microblaze/libm-test-ulps b/sysdeps/microblaze/libm-test-ulps
> diff --git a/sysdeps/mips/mips32/libm-test-ulps b/sysdeps/mips/mips32/libm-test-ulps
> diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps
> diff --git a/sysdeps/nios2/libm-test-ulps b/sysdeps/nios2/libm-test-ulps
> diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps
> diff --git a/sysdeps/or1k/nofpu/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps
> diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
> diff --git a/sysdeps/powerpc/nofpu/libm-test-ulps b/sysdeps/powerpc/nofpu/libm-test-ulps
> diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps
> diff --git a/sysdeps/riscv/rvd/libm-test-ulps b/sysdeps/riscv/rvd/libm-test-ulps
> diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps
> diff --git a/sysdeps/sh/libm-test-ulps b/sysdeps/sh/libm-test-ulps
> diff --git a/sysdeps/sparc/fpu/libm-test-ulps b/sysdeps/sparc/fpu/libm-test-ulps
> diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps

Ok.



More information about the Libc-alpha mailing list