This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] PowerPC - logb[f|l] optimization for POWER7
- From: "Ryan S. Arnold" <ryan dot arnold at gmail dot com>
- To: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- Cc: "GNU C. Library" <libc-alpha at sourceware dot org>
- Date: Tue, 8 May 2012 10:45:37 -0500
- Subject: Re: [PATCH] PowerPC - logb[f|l] optimization for POWER7
- References: <4FA911C7.4000301@linux.vnet.ibm.com>
On Tue, May 8, 2012 at 7:29 AM, Adhemerval Zanella
<azanella@linux.vnet.ibm.com> wrote:
> This patch provides optimized logb (1.2x on PPC32 and 2.5x on PPC64),
> logbf (1.1x on PPC32 and 2.2x on PPC64), and logbl (1.3x on PPC32 and
> 50% on PPC64).
>
> ---
>
> 2012-05-08 ÂAdhemerval Zanella Â<azanella@linux.vnet.ibm.com>
>
> Â Â Â Â* sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c: New file: optimized
> Â Â Â Âlogb for POWER7.
> Â Â Â Â* sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c: New file: optimized
> Â Â Â Âlogbf for POWER7.
> Â Â Â Â* sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c: New file: optimized
> Â Â Â Âlogbl for POWER7.
No colon necessary after "New file:". It should be "New file.
Optimize logb for POWER7".
> Â Â Â Â* sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c: New file: wrapper for
> Â Â Â Âthe optimized logb for PPC64.
> Â Â Â Â* sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c: New file: wrapper for
> Â Â Â Âthe optimized logbf for PPC64.
> Â Â Â Â* sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c: New file: wrapper for
> Â Â Â Âthe optimized logbl for PPC64.
I prefer that this mention that it #includes the powerpc32/power7/ .c
file. Something like:
"New file. Use powerpc32/power7/logb[fl].c via #include.
> diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c b/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c
> new file mode 100644
> index 0000000..5f57c49
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c
> @@ -0,0 +1,74 @@
> +/* logb(). PowerPC/POWER7 version.
> + Â Copyright (C) 2012 Free Software Foundation, Inc.
> + Â Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
Roland has indicated that he no longer wants "Contributed by"
statements.. The git log will serve as attribution. This applies to
all files in this patchset.
> + Â This file is part of the GNU C Library.
> +
> + Â The GNU C Library is free software; you can redistribute it and/or
> + Â modify it under the terms of the GNU Lesser General Public
> + Â License as published by the Free Software Foundation; either
> + Â version 2.1 of the License, or (at your option) any later version.
> +
> + Â The GNU C Library is distributed in the hope that it will be useful,
> + Â but WITHOUT ANY WARRANTY; without even the implied warranty of
> + Â MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ÂSee the GNU
> + Â Lesser General Public License for more details.
> +
> + Â You should have received a copy of the GNU Lesser General Public
> + Â License along with the GNU C Library; if not, see
> + Â <http://www.gnu.org/licenses/>. Â*/
> +
> +#include "math_private.h"
> +
> +/* This implementation avoid FP to INT conversions by using VSX bitwise
> + Â instructions over FP values. Â*/
> +
> +static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 Â*/
> +static const double two10m1 Â = -1023.0; Â Â Â Â Â Â Â /* 2**10 -1 Â*/
> +
> +/* FP mask to extract the exponent Â*/
> +static const union {
> + Âunsigned long long mask;
> + Âdouble d;
> +} mask = { 0x7ff0000000000000ULL };
> +
> +double
> +__logb (double x)
> +{
> + Âdouble ret;
> +
> + Âif (__builtin_expect(x == 0.0, 0))
This should have a space between __builtin_expect and the ().
> + Â Âreturn -1.0 / __builtin_fabs (x);
I would like if there were a comment indicating what's going on, i.e.,
/* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */
> +
> + Â/* ret = x & 0x7ff0000000000000; Â*/
> + Âasm (
> + Â Â"xxland %x0,%x1,%x2\n"
> + Â Â"fcfid Â%0,%0"
> + Â Â: "=f" (ret)
> + Â Â: "f" (x), "f" (mask.d));
> + Â/* ret = (ret >> 52) - 1023.0; Â*/
> + Âret = (ret * two1div52) + two10m1;
> + Âif (__builtin_expect (ret > -two10m1, 0))
> + Â Âreturn (x * x);
Once again, please comment the special case with an indication of
what's being returned (if it is a special value indicated by the
spec). Please indicate the special value in the comment rather than
just a magic number that represents it. This applies to all files in
this patchset.
> + Âelse if (__builtin_expect (ret == two10m1, 0))
> + Â Â{
> + Â Â Â/* POSIX specifies that denormal number is treated as
> + Â Â Â Â though it were normalized. Â*/
> + Â Â Âint32_t lx, ix;
> + Â Â Âint m1, m2, ma;
> +
> + Â Â ÂEXTRACT_WORDS (ix , lx, x);
> + Â Â Âm1 = (ix == 0) ? 0 : __builtin_clz (ix);
> + Â Â Âm2 = (lx == 0) ? 0 : __builtin_clz (lx);
> + Â Â Âma = (m1 == 0) ? m2 + 32 : m1;
> + Â Â Âreturn -1022.0 + (double)(11 - ma);
> + Â Â}
> + Â/* Test to avoid logb_downward (0.0) == -0.0 Â*/
> + Âreturn ret == -0.0 ? 0.0 : ret;
Is this faster than an builtin abs call?
> +}
> +
> +weak_alias (__logb, logb)
> +
> +#ifdef NO_LONG_DOUBLE
> +strong_alias (__logb, __logbl)
> +weak_alias (__logb, logbl)
> +#endif
> diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c
> new file mode 100644
> index 0000000..eb276f8
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c
> @@ -0,0 +1,59 @@
> +/* logbf(). PowerPC/POWER7 version.
> + Â Copyright (C) 2012 Free Software Foundation, Inc.
> + Â Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
> + Â This file is part of the GNU C Library.
> +
> + Â The GNU C Library is free software; you can redistribute it and/or
> + Â modify it under the terms of the GNU Lesser General Public
> + Â License as published by the Free Software Foundation; either
> + Â version 2.1 of the License, or (at your option) any later version.
> +
> + Â The GNU C Library is distributed in the hope that it will be useful,
> + Â but WITHOUT ANY WARRANTY; without even the implied warranty of
> + Â MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ÂSee the GNU
> + Â Lesser General Public License for more details.
> +
> + Â You should have received a copy of the GNU Lesser General Public
> + Â License along with the GNU C Library; if not, see
> + Â <http://www.gnu.org/licenses/>. Â*/
> +
> +#include "math_private.h"
> +
> +/* This implementation avoid FP to INT conversions by using VSX bitwise
> + Â instructions over FP values. Â*/
> +
> +static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 Â*/
> +static const double two10m1 Â = -1023.0; Â Â Â Â Â Â Â /* -2**10 + 1 Â*/
> +static const double two7m1 Â Â= -127.0; Â Â Â Â Â Â Â Â Â Â Â Â/* -2**7 + 1 Â*/
> +
> +/* FP mask to extract the exponent Â*/
> +static const union {
> + Âunsigned long long mask;
> + Âdouble d;
> +} mask = { 0x7ff0000000000000ULL };
> +
> +float
> +__logbf (float x)
> +{
> + Â/* VSX operation are all done internally as double Â*/
> + Âdouble ret;
> +
> + Âif (__builtin_expect (x == 0.0, 0))
> + Â Âreturn -1.0 / __builtin_fabsf (x);
> +
> + Â/* ret = x & 0x7f800000; Â*/
> + Âasm (
> + Â Â"xxland %x0,%x1,%x2\n"
> + Â Â"fcfid Â%0,%0"
> + Â Â: "=f"(ret)
> + Â Â: "f" (x), "f" (mask.d));
> + Â/* ret = (ret >> 52) - 1023.0, since ret is double Â*/
> + Âret = (ret * two1div52) + two10m1;
> + Âif (__builtin_expect (ret > -two7m1, 0))
> + Â Âreturn (x * x);
> + Â/* Since operations are done with double, not need to
> + Â Â additional tests for subnormal numbers.
Should be "Since operations are done with double we don't need additional..."
> + Â Â The test is to avoid logb_downward (0.0) == -0.0 Â*/
> + Âreturn ret == -0.0 ? 0.0 : ret;
> +}
> +weak_alias (__logbf, logbf)
Ryan S. Arnold