[PATCH] [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses
Paul E Murphy
murphyp@linux.ibm.com
Thu Aug 1 23:04:00 GMT 2019
On 8/1/19 2:36 PM, Paul A. Clarke wrote:
> From: "Paul A. Clarke" <pc@us.ibm.com>
>
> Since fe{en,dis}ableexcept() and fesetmode() read-modify-write just the
> "mode" (exception enable and rounding mode) bits of the Floating Point Status
> Control Register (FPSCR), the lighter weight 'mffsl' instruction can be used
> to read the FPSCR (enables and rounding mode), and 'mtfsf 0b00000011' can be
> used to write just those bits back to the FPSCR. The net is better performance.
>
> In addition, fe{en,dis}ableexcept() read the FPSCR again after writing it, or
> they determine that it doesn't need to be written because it is not changing.
> In either case, the local variable holds the current values of the enable
> bits in the FPSCR. This local variable can be used instead of again reading
> the FPSCR.
>
> Also, that value of the FPSCR which is read the second time is validated
> against the requested enables. Since the write can't fail, this validation
> step is unnecessary, and can be removed.
The write to FPSCR may not fail, but it may not change all the requested
bits. e.g fedisableexcept(FE_INVALID_SQRT). Should the existing behavior
be preserved?
>
> Finally, convert the local macros in fesetmode.c to more generally useful
> macros in fenv_libc.h.
>
> 2019-08-01 Paul A. Clarke <pc@us.ibm.com>
>
> * sysdeps/powerpc/fpu/fenv_libc.h (fesetenv_mode): New.
> (FPSCR_FPRF_MASK): New. (FPSCR_STATUS_MASK): New.
> * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Use lighter-
> weight access to FPSCR; remove unnecessary second FPSCR read and
> validate.
> * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Likewise.
> * sysdeps/powerpc/fpu/fesetmode.c (fesetmode): Use lighter-weight
> access to FPSCR; Use macros in fenv_libc.h in favor of local.
> ---
> sysdeps/powerpc/fpu/fedisblxcpt.c | 8 +++-----
> sysdeps/powerpc/fpu/feenablxcpt.c | 9 +++------
> sysdeps/powerpc/fpu/fenv_libc.h | 10 +++++++++-
> sysdeps/powerpc/fpu/fesetmode.c | 15 +++++----------
> 4 files changed, 20 insertions(+), 22 deletions(-)
>
> diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
> index 5cc8799..fa666b0 100644
> --- a/sysdeps/powerpc/fpu/fedisblxcpt.c
> +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
> @@ -26,7 +26,7 @@ fedisableexcept (int excepts)
> int result, new;
>
> /* Get current exception mask to return. */
> - fe.fenv = curr.fenv = fegetenv_register ();
> + fe.fenv = curr.fenv = fegetenv_status ();
> result = fenv_reg_to_exceptions (fe.l);
>
> if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
> @@ -36,13 +36,11 @@ fedisableexcept (int excepts)
> fe.l &= ~ fenv_exceptions_to_reg (excepts);
>
> if (fe.l != curr.l)
> - fesetenv_register (fe.fenv);
> + fesetenv_mode (fe.fenv);
>
> - new = __fegetexcept ();
> + new = fenv_reg_to_exceptions (fe.l);
> if (new == 0 && result != 0)
> (void)__fe_mask_env ();
>
> - if ((new & excepts) != 0)
> - result = -1;
> return result;
> }
> diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
> index 3b64398..c658290 100644
> --- a/sysdeps/powerpc/fpu/feenablxcpt.c
> +++ b/sysdeps/powerpc/fpu/feenablxcpt.c
> @@ -26,7 +26,7 @@ feenableexcept (int excepts)
> int result, new;
>
> /* Get current exception mask to return. */
> - fe.fenv = curr.fenv = fegetenv_register ();
> + fe.fenv = curr.fenv = fegetenv_status ();
> result = fenv_reg_to_exceptions (fe.l);
>
> if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
> @@ -36,14 +36,11 @@ feenableexcept (int excepts)
> fe.l |= fenv_exceptions_to_reg (excepts);
>
> if (fe.l != curr.l)
> - fesetenv_register (fe.fenv);
> + fesetenv_mode (fe.fenv);
>
> - new = __fegetexcept ();
> + new = fenv_reg_to_exceptions (fe.l);
> if (new != 0 && result == 0)
> (void) __fe_nomask_env_priv ();
>
> - if ((new & excepts) != excepts)
> - result = -1;
> -
> return result;
> }
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 853239f..8ba4832 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -70,6 +70,11 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
> __builtin_mtfsf (0xff, d); \
> } while(0)
>
> +/* Set the last 2 nibbles of the FPSCR, which contain the
> + exception enables and the rounding mode.
> + 'fegetenv_status' retrieves these bits by reading the FPSCR. */
> +#define fesetenv_mode(env) __builtin_mtfsf (0b00000011, (env));
> +
> /* This very handy macro:
> - Sets the rounding mode to 'round to nearest';
> - Sets the processor into IEEE mode; and
> @@ -206,8 +211,11 @@ enum {
> (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
> #define FPSCR_BASIC_EXCEPTIONS_MASK \
> (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
> -
> +#define FPSCR_FPRF_MASK \
> + (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
> + FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
> #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
> +#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
>
> /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
> in the FPSCR, albeit shifted to different but corresponding locations.
> diff --git a/sysdeps/powerpc/fpu/fesetmode.c b/sysdeps/powerpc/fpu/fesetmode.c
> index 4f4f71a..e92559b 100644
> --- a/sysdeps/powerpc/fpu/fesetmode.c
> +++ b/sysdeps/powerpc/fpu/fesetmode.c
> @@ -19,11 +19,6 @@
> #include <fenv_libc.h>
> #include <fpu_control.h>
>
> -#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
> - | _FPU_MASK_XM | _FPU_MASK_IM)
> -
> -#define FPU_STATUS 0xbffff700ULL
> -
> int
> fesetmode (const femode_t *modep)
> {
> @@ -32,18 +27,18 @@ fesetmode (const femode_t *modep)
> /* Logic regarding enabled exceptions as in fesetenv. */
>
> new.fenv = *modep;
> - old.fenv = fegetenv_register ();
> - new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS);
> + old.fenv = fegetenv_status ();
> + new.l = (new.l & ~FPSCR_STATUS_MASK) | (old.l & FPSCR_STATUS_MASK);
>
> if (old.l == new.l)
> return 0;
>
> - if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
> + if ((old.l & FPSCR_ENABLES_MASK) == 0 && (new.l & FPSCR_ENABLES_MASK) != 0)
> (void) __fe_nomask_env_priv ();
>
> - if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
> + if ((old.l & FPSCR_ENABLES_MASK) != 0 && (new.l & FPSCR_ENABLES_MASK) == 0)
> (void) __fe_mask_env ();
>
> - fesetenv_register (new.fenv);
> + fesetenv_mode (new.fenv);
> return 0;
> }
>
More information about the Libc-alpha
mailing list