This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] powerpc: Cleanup fenv_private.h
- From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- To: libc-alpha at sourceware dot org
- Date: Fri, 10 Jun 2016 17:25:13 -0300
- Subject: Re: [PATCH] powerpc: Cleanup fenv_private.h
- Authentication-results: sourceware.org; auth=none
- References: <cffe19b8-a09a-fadb-7dd3-ef5088d58556 at linux dot vnet dot ibm dot com>
On 10/06/2016 16:50, Paul E. Murphy wrote:
> Tested on ppc64le, with results before and after.
>
> ---8<---
> Some of the masks are wrong, and the naming is confusing.
>
> There are two basic cases we really care about:
>
> 1. Stacking a new rounding mode when running certain
> sections of code, and pausing exception handling.
>
> 2. Likewise, but discarding any exceptions which occur
> while running under the new rounding mode.
>
> libc_feholdexcept_setround_ppc_ctx has been removed as it basically
> does the same thing as libc_feholdsetround_ppc_ctx but also clearing
> any sticky bits. The restore behavior is what differentiates these
> two cases as the SET_RESTORE_ROUND{,_NOEX} macros will either merge
> or discard all exceptions occurring during scope of their usage.
>
> Likewise, there are a number of routines to swap, replace,
> or merge FP environments. This change reduces much of
> the common and sometimes wrong code.
It seems a nice cleanup and about the wrong code, I presume it is
the one that might use _FPU_MASK_FRAC_INEX_RET_CC (which was indeed
wrongly defined). Does it trigger any user visible issue in math
tests?
>
> * sysdeps/powerpc/fpu/fenv_private.h:
> (_FPU_MASK_ALL): Rename to
> (_FPU_ALL_TRAPS): New macro representing ISA VE OE UE ZE and
> XE FPSCR bits.
>
> (_FPU_MASK_RN): New macro to mask out ISA RN bits in FPSCR.
>
> (_FPU_MASK_ROUNDING): Rename to
> (_FPU_MASK_NOT_RN_NI): New macro to mask out all but ISA RN and
> NI bits.
>
> (_FPU_MASK_EXCEPT_ROUND): Rename to
> (_FPU_MASK_TRAPS_RN): New macro to mask out exception enable
> bits and rounding bits.
>
> (__libc_feholdbits_ppc): New inline function to mask, set,
> and pontentially clear FSPCR bits, and change MSR[FE] bits.
> (libc_feholdexcept_ppc): Redefine using __libc_feholdbits_ppc.
> (libc_feholdexcept_setround_ppc): Likewise.
>
> (__libc_femergeenv_ppc): New function to dynamically mask both
> old and new FP environments and merge.
> (libc_fesetenv_ppc): Redefine in terms of __libc_femergeenv_ppc.
> (libc_feresetround_ppc): Likewise.
> (libc_feupdateenv_test_ppc): Likewise.
> (libc_feupdateenv_ppc): Likewise.
>
> (libc_feholdsetround_ppc_ctx): Fix usage to include masking
> of ISA RN bits, and update macro names.
> (libc_feholdexcept_setround_ppc_ctx): Remove as it is
> effectively the same as the previously mentioned function.
>
> (libc_feupdateenv_ppc_ctx): Replace libc_feupdatedenv_test_ppc
> usage with fe_resetround_ppc.
>
> (libc_feholdexcept_setround_ctx): Remove, this doesn't appear
> to be used.
> (libc_feholdexcept_setround_ctxf): Likewise.
> (libc_feholdexcept_setround_ctxl): Likewise.
> ---
> sysdeps/powerpc/fpu/fenv_private.h | 156 +++++++++++++------------------------
> 1 file changed, 53 insertions(+), 103 deletions(-)
>
> diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
> index 02ac980..ecc05bc 100644
> --- a/sysdeps/powerpc/fpu/fenv_private.h
> +++ b/sysdeps/powerpc/fpu/fenv_private.h
> @@ -23,56 +23,58 @@
> #include <fenv_libc.h>
> #include <fpu_control.h>
>
> -#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
> +/* Mask for the exception enable bits. */
> +#define _FPU_ALL_TRAPS (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
> | _FPU_MASK_XM | _FPU_MASK_IM)
>
> +/* Mask the rounding mode bits. */
> +#define _FPU_MASK_RN (~0x3)
> +
> /* Mask everything but the rounding moded and non-IEEE arithmetic flags. */
> -#define _FPU_MASK_ROUNDING 0xffffffff00000007LL
> +#define _FPU_MASK_NOT_RN_NI 0xffffffff00000007LL
>
> /* Mask restore rounding mode and exception enabled. */
> -#define _FPU_MASK_EXCEPT_ROUND 0xffffffff1fffff00LL
> +#define _FPU_MASK_TRAPS_RN 0xffffffff1fffff00LL
>
> /* Mask exception enable but fraction rounded/inexact and FP result/CC
> bits. */
> -#define _FPU_MASK_FRAC_INEX_RET_CC 0x1ff80fff
> +#define _FPU_MASK_FRAC_INEX_RET_CC 0xffffffff1ff80fff
>
> static __always_inline void
> -libc_feholdexcept_ppc (fenv_t *envp)
> +__libc_feholdbits_ppc (fenv_t *envp, unsigned long long mask,
> + unsigned long long bits)
> {
> fenv_union_t old, new;
>
> old.fenv = *envp = fegetenv_register ();
>
> - new.l = old.l & _FPU_MASK_ROUNDING;
> + new.l = (old.l & mask) | bits;
>
> /* If the old env had any enabled exceptions, then mask SIGFPE in the
> MSR FE0/FE1 bits. This may allow the FPU to run faster because it
> always takes the default action and can not generate SIGFPE. */
> - if ((old.l & _FPU_MASK_ALL) != 0)
> + if ((old.l & _FPU_ALL_TRAPS) != 0)
> (void) __fe_mask_env ();
>
> fesetenv_register (new.fenv);
> }
>
> static __always_inline void
> -libc_fesetround_ppc (int r)
> +libc_feholdexcept_ppc (fenv_t *envp)
> {
> - __fesetround_inline (r);
> + __libc_feholdbits_ppc (envp, _FPU_MASK_NOT_RN_NI, 0LL);
> }
>
> static __always_inline void
> libc_feholdexcept_setround_ppc (fenv_t *envp, int r)
> {
> - fenv_union_t old, new;
> -
> - old.fenv = *envp = fegetenv_register ();
> -
> - new.l = (old.l & _FPU_MASK_ROUNDING) | r;
> -
> - if ((old.l & _FPU_MASK_ALL) != 0)
> - (void) __fe_mask_env ();
> + __libc_feholdbits_ppc (envp, _FPU_MASK_NOT_RN_NI & _FPU_MASK_RN, r);
> +}
>
> - fesetenv_register (new.fenv);
> +static __always_inline void
> +libc_fesetround_ppc (int r)
> +{
> + __fesetround_inline (r);
> }
>
> static __always_inline int
> @@ -84,98 +86,67 @@ libc_fetestexcept_ppc (int e)
> }
>
> static __always_inline void
> -libc_fesetenv_ppc (const fenv_t *envp)
> +libc_feholdsetround_ppc (fenv_t *e, int r)
> +{
> + __libc_feholdbits_ppc (e, _FPU_MASK_TRAPS_RN, r);
> +}
> +
> +static __always_inline unsigned long long
> +__libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask,
> + unsigned long long new_mask)
> {
> fenv_union_t old, new;
>
> new.fenv = *envp;
> old.fenv = fegetenv_register ();
>
> + /* Merge bits while masking unwanted bits from new and old env. */
> + new.l = (old.l & old_mask) | (new.l & new_mask);
> +
> /* If the old env has no enabled exceptions and the new env has any enabled
> exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
> hardware into "precise mode" and may cause the FPU to run slower on some
> hardware. */
> - if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
> + if ((old.l & _FPU_ALL_TRAPS) == 0 && (new.l & _FPU_ALL_TRAPS) != 0)
> (void) __fe_nomask_env_priv ();
>
> /* If the old env had any enabled exceptions and the new env has no enabled
> exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
> FPU to run faster because it always takes the default action and can not
> generate SIGFPE. */
> - if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
> - (void) __fe_mask_env ();
> -
> - fesetenv_register (*envp);
> -}
> -
> -static __always_inline int
> -libc_feupdateenv_test_ppc (fenv_t *envp, int ex)
> -{
> - fenv_union_t old, new;
> -
> - new.fenv = *envp;
> - old.fenv = fegetenv_register ();
> -
> - /* Restore rounding mode and exception enable from *envp and merge
> - exceptions. Leave fraction rounded/inexact and FP result/CC bits
> - unchanged. */
> - new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
> - | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
> -
> - if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
> - (void) __fe_nomask_env_priv ();
> -
> - if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
> + if ((old.l & _FPU_ALL_TRAPS) != 0 && (new.l & _FPU_ALL_TRAPS) == 0)
> (void) __fe_mask_env ();
>
> + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
> fesetenv_register (new.fenv);
>
> - return old.l & ex;
> + return old.l;
> }
>
> static __always_inline void
> -libc_feupdateenv_ppc (fenv_t *e)
> +libc_fesetenv_ppc (const fenv_t *envp)
> {
> - libc_feupdateenv_test_ppc (e, 0);
> + /* Replace the entire environment. */
> + __libc_femergeenv_ppc (envp, 0LL, -1LL);
> }
>
> static __always_inline void
> -libc_feholdsetround_ppc (fenv_t *e, int r)
> +libc_feresetround_ppc (fenv_t *envp)
> {
> - fenv_union_t old, new;
> -
> - old.fenv = fegetenv_register ();
> - /* Clear current precision and set newer one. */
> - new.l = (old.l & ~0x3 & ~_FPU_MASK_ALL) | r;
> - *e = old.fenv;
> + __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN, _FPU_MASK_FRAC_INEX_RET_CC);
> +}
>
> - if ((old.l & _FPU_MASK_ALL) != 0)
> - (void) __fe_mask_env ();
> - fesetenv_register (new.fenv);
> +static __always_inline int
> +libc_feupdateenv_test_ppc (fenv_t *envp, int ex)
> +{
> + return __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN,
> + _FPU_MASK_FRAC_INEX_RET_CC) & ex;
> }
>
> static __always_inline void
> -libc_feresetround_ppc (fenv_t *envp)
> +libc_feupdateenv_ppc (fenv_t *e)
> {
> - fenv_union_t old, new;
> -
> - new.fenv = *envp;
> - old.fenv = fegetenv_register ();
> -
> - /* Restore rounding mode and exception enable from *envp and merge
> - exceptions. Leave fraction rounded/inexact and FP result/CC bits
> - unchanged. */
> - new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
> - | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
> -
> - if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
> - (void) __fe_nomask_env_priv ();
> -
> - if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
> - (void) __fe_mask_env ();
> -
> - /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
> - fesetenv_register (new.fenv);
> + libc_feupdateenv_test_ppc (e, 0);
> }
>
> #define libc_feholdexceptf libc_feholdexcept_ppc
> @@ -202,17 +173,18 @@ libc_feresetround_ppc (fenv_t *envp)
> #define HAVE_RM_CTX 1
>
> static __always_inline void
> -libc_feholdexcept_setround_ppc_ctx (struct rm_ctx *ctx, int r)
> +libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
> {
> fenv_union_t old, new;
>
> old.fenv = fegetenv_register ();
>
> - new.l = (old.l & _FPU_MASK_ROUNDING) | r;
> + new.l = (old.l & _FPU_MASK_TRAPS_RN) | r;
> +
> ctx->env = old.fenv;
> if (__glibc_unlikely (new.l != old.l))
> {
> - if ((old.l & _FPU_MASK_ALL) != 0)
> + if ((old.l & _FPU_ALL_TRAPS) != 0)
> (void) __fe_mask_env ();
> fesetenv_register (new.fenv);
> ctx->updated_status = true;
> @@ -231,26 +203,7 @@ static __always_inline void
> libc_feupdateenv_ppc_ctx (struct rm_ctx *ctx)
> {
> if (__glibc_unlikely (ctx->updated_status))
> - libc_feupdateenv_test_ppc (&ctx->env, 0);
> -}
> -
> -static __always_inline void
> -libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
> -{
> - fenv_union_t old, new;
> -
> - old.fenv = fegetenv_register ();
> - new.l = (old.l & ~0x3 & ~_FPU_MASK_ALL) | r;
> - ctx->env = old.fenv;
> - if (__glibc_unlikely (new.l != old.l))
> - {
> - if ((old.l & _FPU_MASK_ALL) != 0)
> - (void) __fe_mask_env ();
> - fesetenv_register (new.fenv);
> - ctx->updated_status = true;
> - }
> - else
> - ctx->updated_status = false;
> + libc_feresetround_ppc (&ctx->env);
> }
>
> static __always_inline void
> @@ -260,9 +213,6 @@ libc_feresetround_ppc_ctx (struct rm_ctx *ctx)
> libc_feresetround_ppc (&ctx->env);
> }
>
> -#define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_ppc_ctx
> -#define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_ppc_ctx
> -#define libc_feholdexcept_setroundl_ctx libc_feholdexcept_setround_ppc_ctx
> #define libc_fesetenv_ctx libc_fesetenv_ppc_ctx
> #define libc_fesetenvf_ctx libc_fesetenv_ppc_ctx
> #define libc_fesetenvl_ctx libc_fesetenv_ppc_ctx
>