This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] PowerPC: Set/restore rounding mode only when needed
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- Cc: "GNU C. Library" <libc-alpha at sourceware dot org>
- Date: Mon, 25 Nov 2013 15:23:30 +0530
- Subject: Re: [PATCH] PowerPC: Set/restore rounding mode only when needed
- Authentication-results: sourceware.org; auth=none
- References: <5280DDA5 dot 1060906 at linux dot vnet dot ibm dot com> <20131119131935 dot GE24544 at spoyarek dot pnq dot redhat dot com> <528CBBE7 dot 8040109 at linux dot vnet dot ibm dot com>
On Wed, Nov 20, 2013 at 11:40:55AM -0200, Adhemerval Zanella wrote:
> Thanks for the review Siddhesh, below I'm sending a version 2 of my patch.
> I have corrected all the suggestion you have made and I also included a fix
> to work correctly with my patch 'PowerPC: Fix __fe_mask_env export'. I also
> added some ldbl-128 fixes that I forgot to add on previous patch (change the
> fegetround to __fegetround to avoid PLT calls since fegetround if not an
> inline version anymore).
>
> ---
>
> 2013-11-20 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
>
> * sysdeps/ieee754/ldbl-128ibm/e_expl.c (__ieee754_expl): Use
> SET_RESTORE_ROUND instead of feholdexcept/fesetround/fesetenv.
> * sysdeps/powerpc/fpu/fenv_libc.h (__fegetround): Remove define.
> (__fesetround): Remove define.
> * sysdeps/powerpc/fpu/fenv_private.h: New file: Inline floating point
> rounding and exceptions handling.
> * sysdeps/powerpc/fpu/math_private.h: Include fenv_private.h.
> * sysdeps/powerpc/fpu/fenv_libc.h (__fe_mask_env): Define as hidden.
> (__fe_nomask_env): Likewise.
> * sysdeps/ieee754/ldbl-128ibm/s_llrintl.c (__llrintl): Use
> __fegetround instead of fegetround.
> * sysdeps/ieee754/ldbl-128ibm/s_lrintl.c (__lrintl): Likewise.
> * sysdeps/ieee754/ldbl-128ibm/s_rintl.c (__rintl): Likewise.
>
> --
>
> diff --git a/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
> index f7c50bf..65ef185 100644
> --- a/sysdeps/ieee754/ldbl-128ibm/e_expl.c
> +++ b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
> @@ -134,18 +134,17 @@ static const long double C[] = {
> long double
> __ieee754_expl (long double x)
> {
> + long double result, x22;
> + union ibm_extended_long_double ex2_u, scale_u;
> + int unsafe;
> +
> /* Check for usual case. */
> if (isless (x, himark) && isgreater (x, lomark))
> {
> - int tval1, tval2, unsafe, n_i, exponent2;
> - long double x22, n, result, xl;
> - union ibm_extended_long_double ex2_u, scale_u;
> - fenv_t oldenv;
> -
> - feholdexcept (&oldenv);
> -#ifdef FE_TONEAREST
> - fesetround (FE_TONEAREST);
> -#endif
> + int tval1, tval2, n_i, exponent2;
> + long double n, xl;
> +
> + SET_RESTORE_ROUND (FE_TONEAREST);
>
> n = __roundl (x*M_1_LN2);
> x = x-n*M_LN2_0;
> @@ -201,11 +200,6 @@ __ieee754_expl (long double x)
> less than 4.8e-39. */
> x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
>
> - /* Return result. */
> - fesetenv (&oldenv);
> -
> - result = x22 * ex2_u.ld + ex2_u.ld;
> -
> /* Now we can test whether the result is ultimate or if we are unsure.
> In the later case we should probably call a mpn based routine to give
> the ultimate result.
> @@ -235,10 +229,6 @@ __ieee754_expl (long double x)
> return __ieee754_expl_proc2 (origx);
> }
> */
> - if (!unsafe)
> - return result;
> - else
> - return result * scale_u.ld;
> }
> /* Exceptional cases: */
> else if (isless (x, himark))
> @@ -253,5 +243,10 @@ __ieee754_expl (long double x)
> else
> /* Return x, if x is a NaN or Inf; or overflow, otherwise. */
> return TWO1023*x;
> +
> + result = x22 * ex2_u.ld + ex2_u.ld;
> + if (!unsafe)
> + return result;
> + return result * scale_u.ld;
> }
> strong_alias (__ieee754_expl, __expl_finite)
> diff --git a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
> index 8560349..3503973 100644
> --- a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
> +++ b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
> @@ -43,7 +43,7 @@ __llrintl (long double x)
> #endif
> )
> {
> - save_round = fegetround ();
> + save_round = __fegetround ();
>
> if (__builtin_expect ((xh == -(double) (-__LONG_LONG_MAX__ - 1)), 0))
> {
> diff --git a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
> index 588098d..49dbd42 100644
> --- a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
> +++ b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
> @@ -49,7 +49,7 @@ __lrintl (long double x)
> #endif
> )
> {
> - save_round = fegetround ();
> + save_round = __fegetround ();
>
> #if __LONG_MAX__ == 2147483647
> long long llhi = (long long) xh;
> diff --git a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
> index 48dbe85..5fd6bb8 100644
> --- a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
> +++ b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
> @@ -40,7 +40,7 @@ __rintl (long double x)
> __builtin_inf ()), 1))
> {
> double orig_xh;
> - int save_round = fegetround ();
> + int save_round = __fegetround ();
>
> /* Long double arithmetic, including the canonicalisation below,
> only works in round-to-nearest mode. */
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index cb15c1c..ecd6b91 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -23,9 +23,9 @@
> #include <ldsodefs.h>
> #include <sysdep.h>
>
> -extern const fenv_t *__fe_nomask_env (void);
> +extern const fenv_t *__fe_nomask_env (void) attribute_hidden;
>
> -extern const fenv_t *__fe_mask_env (void);
> +extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>
> /* The sticky bits in the FPSCR indicating exceptions have occurred. */
> #define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID)
> @@ -83,7 +83,6 @@ __fegetround (void)
> "mfcr %0" : "=r"(result) : : "cr7");
> return result & 3;
> }
> -#define fegetround() __fegetround()
>
> static inline int
> __fesetround (int round)
> @@ -107,7 +106,6 @@ __fesetround (int round)
>
> return 0;
> }
> -#define fesetround(mode) __fesetround(mode)
>
> /* Definitions of all the FPSCR bit numbers */
> enum {
> diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
> new file mode 100644
> index 0000000..52db35a
> --- /dev/null
> +++ b/sysdeps/powerpc/fpu/fenv_private.h
> @@ -0,0 +1,274 @@
> +/* Private floating point rounding and exceptions handling. PowerPC version.
> + Copyright (C) 2013 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library. If not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#ifndef FENV_PRIVATE_H
> +#define FENV_PRIVATE_H 1
> +
> +#include <fenv.h>
> +#include <fenv_libc.h>
> +#include <fpu_control.h>
> +
> +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
> + | _FPU_MASK_XM | _FPU_MASK_IM)
> +
> +/* Mask everything but the rounding moded and non-IEEE arithmetic flags. */
> +#define _FPU_MASK_ROUNDING 0xffffffff00000007LL
> +
> +/* Mask restore rounding mode and exception enabled. */
> +#define _FPU_MASK_EXCEPT_ROUND 0xffffffff1fffff00LL
> +
> +/* Mask exception enable but fraction rounded/inexact and FP result/CC
> + bits. */
> +#define _FPU_MASK_FRAC_INEX_RET_CC 0x1ff80fff
> +
> +static __always_inline void
> +libc_feholdexcept_ppc (fenv_t *envp)
> +{
> + fenv_union_t old, new;
> +
> + old.fenv = *envp = fegetenv_register ();
> +
> + new.l = old.l & _FPU_MASK_ROUNDING;
> +
> + /* If the old env had any enabled exceptions, then mask SIGFPE in the
> + MSR FE0/FE1 bits. This may allow the FPU to run faster because it
> + always takes the default action and can not generate SIGFPE. */
> + if ((old.l & _FPU_MASK_ALL) != 0)
> + (void) __fe_mask_env ();
> +
> + fesetenv_register (new.fenv);
> +}
> +
> +static __always_inline void
> +libc_fesetround_ppc (int r)
> +{
> + fesetround (r);
Shouldn't this be __fesetround? The rest of the patch looks OK to me.
Siddhesh
> +}
> +
> +static __always_inline void
> +libc_feholdexcept_setround_ppc (fenv_t *envp, int r)
> +{
> + fenv_union_t old, new;
> +
> + old.fenv = *envp = fegetenv_register ();
> +
> + new.l = (old.l & _FPU_MASK_ROUNDING) | r;
> +
> + if ((old.l & _FPU_MASK_ALL) != 0)
> + (void) __fe_mask_env ();
> +
> + fesetenv_register (new.fenv);
> +}
> +
> +static __always_inline int
> +libc_fetestexcept_ppc (int e)
> +{
> + fenv_union_t u;
> + u.fenv = fegetenv_register ();
> + return u.l & e;
> +}
> +
> +static __always_inline void
> +libc_fesetenv_ppc (const fenv_t *envp)
> +{
> + fenv_union_t old, new;
> +
> + new.fenv = *envp;
> + old.fenv = fegetenv_register ();
> +
> + /* If the old env has no enabled exceptions and the new env has any enabled
> + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
> + hardware into "precise mode" and may cause the FPU to run slower on some
> + hardware. */
> + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
> + (void) __fe_nomask_env ();
> +
> + /* If the old env had any enabled exceptions and the new env has no enabled
> + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
> + FPU to run faster because it always takes the default action and can not
> + generate SIGFPE. */
> + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
> + (void) __fe_mask_env ();
> +
> + fesetenv_register (*envp);
> +}
> +
> +static __always_inline int
> +libc_feupdateenv_test_ppc (fenv_t *envp, int ex)
> +{
> + fenv_union_t old, new;
> +
> + new.fenv = *envp;
> + old.fenv = fegetenv_register ();
> +
> + /* Restore rounding mode and exception enable from *envp and merge
> + exceptions. Leave fraction rounded/inexact and FP result/CC bits
> + unchanged. */
> + new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
> + | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
> +
> + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
> + (void) __fe_nomask_env ();
> +
> + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
> + (void) __fe_mask_env ();
> +
> + fesetenv_register (new.fenv);
> +
> + return old.l & ex;
> +}
> +
> +static __always_inline void
> +libc_feupdateenv_ppc (fenv_t *e)
> +{
> + libc_feupdateenv_test_ppc (e, 0);
> +}
> +
> +static __always_inline void
> +libc_feholdsetround_ppc (fenv_t *e, int r)
> +{
> + fenv_union_t old, new;
> +
> + old.fenv = fegetenv_register ();
> + /* Clear current precision and set newer one. */
> + new.l = (old.l & ~0x3) | r;
> + *e = old.fenv;
> +
> + if ((old.l & _FPU_MASK_ALL) != 0)
> + (void) __fe_mask_env ();
> + fesetenv_register (new.fenv);
> +}
> +
> +static __always_inline void
> +libc_feresetround_ppc (fenv_t *envp)
> +{
> + fenv_union_t old, new;
> +
> + new.fenv = *envp;
> + old.fenv = fegetenv_register ();
> +
> + /* Restore rounding mode and exception enable from *envp and merge
> + exceptions. Leave fraction rounded/inexact and FP result/CC bits
> + unchanged. */
> + new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
> + | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
> +
> + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
> + (void) __fe_nomask_env ();
> +
> + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
> + (void) __fe_mask_env ();
> +
> + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
> + fesetenv_register (new.fenv);
> +}
> +
> +#define libc_feholdexceptf libc_feholdexcept_ppc
> +#define libc_feholdexcept libc_feholdexcept_ppc
> +#define libc_feholdexcept_setroundf libc_feholdexcept_setround_ppc
> +#define libc_feholdexcept_setround libc_feholdexcept_setround_ppc
> +#define libc_fetestexceptf libc_fetestexcept_ppc
> +#define libc_fetestexcept libc_fetestexcept_ppc
> +#define libc_fesetroundf libc_fesetround_ppc
> +#define libc_fesetround libc_fesetround_ppc
> +#define libc_fesetenvf libc_fesetenv_ppc
> +#define libc_fesetenv libc_fesetenv_ppc
> +#define libc_feupdateenv_testf libc_feupdateenv_test_ppc
> +#define libc_feupdateenv_test libc_feupdateenv_test_ppc
> +#define libc_feupdateenvf libc_feupdateenv_ppc
> +#define libc_feupdateenv libc_feupdateenv_ppc
> +#define libc_feholdsetroundf libc_feholdsetround_ppc
> +#define libc_feholdsetround libc_feholdsetround_ppc
> +#define libc_feresetroundf libc_feresetround_ppc
> +#define libc_feresetround libc_feresetround_ppc
> +
> +
> +/* We have support for rounding mode context. */
> +#define HAVE_RM_CTX 1
> +
> +static __always_inline void
> +libc_feholdexcept_setround_ppc_ctx (struct rm_ctx *ctx, int r)
> +{
> + fenv_union_t old, new;
> +
> + old.fenv = fegetenv_register ();
> +
> + new.l = (old.l & _FPU_MASK_ROUNDING) | r;
> + ctx->env = old.fenv;
> + if (__glibc_unlikely (new.l != old.l))
> + {
> + if ((old.l & _FPU_MASK_ALL) != 0)
> + (void) __fe_mask_env ();
> + fesetenv_register (new.fenv);
> + ctx->updated_status = true;
> + }
> + else
> + ctx->updated_status = false;
> +}
> +
> +static __always_inline void
> +libc_fesetenv_ppc_ctx (struct rm_ctx *ctx)
> +{
> + libc_fesetenv_ppc (&ctx->env);
> +}
> +
> +static __always_inline void
> +libc_feupdateenv_ppc_ctx (struct rm_ctx *ctx)
> +{
> + if (__glibc_unlikely (ctx->updated_status))
> + libc_feupdateenv_test_ppc (&ctx->env, 0);
> +}
> +
> +static __always_inline void
> +libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
> +{
> + fenv_union_t old, new;
> +
> + old.fenv = fegetenv_register ();
> + new.l = (old.l & ~0x3) | r;
> + ctx->env = old.fenv;
> + if (__glibc_unlikely (new.l != old.l))
> + {
> + if ((old.l & _FPU_MASK_ALL) != 0)
> + (void) __fe_mask_env ();
> + fesetenv_register (new.fenv);
> + ctx->updated_status = true;
> + }
> + else
> + ctx->updated_status = false;
> +}
> +
> +static __always_inline void
> +libc_feresetround_ppc_ctx (struct rm_ctx *ctx)
> +{
> + if (__glibc_unlikely (ctx->updated_status))
> + libc_feresetround_ppc (&ctx->env);
> +}
> +
> +#define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_ppc_ctx
> +#define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_ppc_ctx
> +#define libc_fesetenv_ctx libc_fesetenv_ppc_ctx
> +#define libc_fesetenvf_ctx libc_fesetenv_ppc_ctx
> +#define libc_feholdsetround_ctx libc_feholdsetround_ppc_ctx
> +#define libc_feholdsetroundf_ctx libc_feholdsetround_ppc_ctx
> +#define libc_feresetround_ctx libc_feresetround_ppc_ctx
> +#define libc_feresetroundf_ctx libc_feresetround_ppc_ctx
> +#define libc_feupdateenvf_ctx libc_feupdateenv_ppc_ctx
> +#define libc_feupdateenv_ctx libc_feupdateenv_ppc_ctx
> +
> +#endif
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 6c00785..c8833d6 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -22,6 +22,7 @@
> #include <sysdep.h>
> #include <ldsodefs.h>
> #include <dl-procinfo.h>
> +#include <fenv_private.h>
> #include_next <math_private.h>
>
> # if __WORDSIZE == 64 || defined _ARCH_PWR4
>