[PATCH] [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses

Paul E Murphy murphyp@linux.ibm.com
Thu Aug 1 23:04:00 GMT 2019



On 8/1/19 2:36 PM, Paul A. Clarke wrote:
> From: "Paul A. Clarke" <pc@us.ibm.com>
> 
> Since fe{en,dis}ableexcept() and fesetmode() read-modify-write just the
> "mode" (exception enable and rounding mode) bits of the Floating Point Status
> Control Register (FPSCR), the lighter weight 'mffsl' instruction can be used
> to read the FPSCR (enables and rounding mode), and 'mtfsf 0b00000011' can be
> used to write just those bits back to the FPSCR.  The net is better performance.
> 
> In addition, fe{en,dis}ableexcept() read the FPSCR again after writing it, or
> they determine that it doesn't need to be written because it is not changing.
> In either case, the local variable holds the current values of the enable
> bits in the FPSCR.  This local variable can be used instead of again reading
> the FPSCR.
> 
> Also, that value of the FPSCR which is read the second time is validated
> against the requested enables.  Since the write can't fail, this validation
> step is unnecessary, and can be removed.

The write to FPSCR may not fail, but it may not change all the requested 
bits. e.g fedisableexcept(FE_INVALID_SQRT). Should the existing behavior 
be preserved?

> 
> Finally, convert the local macros in fesetmode.c to more generally useful
> macros in fenv_libc.h.
> 
> 2019-08-01  Paul A. Clarke  <pc@us.ibm.com>
> 
> 	* sysdeps/powerpc/fpu/fenv_libc.h (fesetenv_mode): New.
> 	(FPSCR_FPRF_MASK): New. (FPSCR_STATUS_MASK): New.
> 	* sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Use lighter-
> 	weight access to FPSCR; remove unnecessary second FPSCR read and
> 	validate.
> 	* sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Likewise.
> 	* sysdeps/powerpc/fpu/fesetmode.c (fesetmode): Use lighter-weight
> 	access to FPSCR; Use macros in fenv_libc.h in favor of local.
> ---
>   sysdeps/powerpc/fpu/fedisblxcpt.c |  8 +++-----
>   sysdeps/powerpc/fpu/feenablxcpt.c |  9 +++------
>   sysdeps/powerpc/fpu/fenv_libc.h   | 10 +++++++++-
>   sysdeps/powerpc/fpu/fesetmode.c   | 15 +++++----------
>   4 files changed, 20 insertions(+), 22 deletions(-)
> 
> diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
> index 5cc8799..fa666b0 100644
> --- a/sysdeps/powerpc/fpu/fedisblxcpt.c
> +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
> @@ -26,7 +26,7 @@ fedisableexcept (int excepts)
>     int result, new;
> 
>     /* Get current exception mask to return.  */
> -  fe.fenv = curr.fenv = fegetenv_register ();
> +  fe.fenv = curr.fenv = fegetenv_status ();
>     result = fenv_reg_to_exceptions (fe.l);
> 
>     if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
> @@ -36,13 +36,11 @@ fedisableexcept (int excepts)
>     fe.l &= ~ fenv_exceptions_to_reg (excepts);
> 
>     if (fe.l != curr.l)
> -    fesetenv_register (fe.fenv);
> +    fesetenv_mode (fe.fenv);
> 
> -  new = __fegetexcept ();
> +  new = fenv_reg_to_exceptions (fe.l);
>     if (new == 0 && result != 0)
>       (void)__fe_mask_env ();
> 
> -  if ((new & excepts) != 0)
> -    result = -1;
>     return result;
>   }
> diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
> index 3b64398..c658290 100644
> --- a/sysdeps/powerpc/fpu/feenablxcpt.c
> +++ b/sysdeps/powerpc/fpu/feenablxcpt.c
> @@ -26,7 +26,7 @@ feenableexcept (int excepts)
>     int result, new;
> 
>     /* Get current exception mask to return.  */
> -  fe.fenv = curr.fenv = fegetenv_register ();
> +  fe.fenv = curr.fenv = fegetenv_status ();
>     result = fenv_reg_to_exceptions (fe.l);
> 
>     if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
> @@ -36,14 +36,11 @@ feenableexcept (int excepts)
>     fe.l |= fenv_exceptions_to_reg (excepts);
> 
>     if (fe.l != curr.l)
> -    fesetenv_register (fe.fenv);
> +    fesetenv_mode (fe.fenv);
> 
> -  new = __fegetexcept ();
> +  new = fenv_reg_to_exceptions (fe.l);
>     if (new != 0 && result == 0)
>       (void) __fe_nomask_env_priv ();
> 
> -  if ((new & excepts) != excepts)
> -    result = -1;
> -
>     return result;
>   }
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 853239f..8ba4832 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -70,6 +70,11 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>   	    __builtin_mtfsf (0xff, d); \
>   	} while(0)
> 
> +/* Set the last 2 nibbles of the FPSCR, which contain the
> +   exception enables and the rounding mode.
> +   'fegetenv_status' retrieves these bits by reading the FPSCR.  */
> +#define fesetenv_mode(env) __builtin_mtfsf (0b00000011, (env));
> +
>   /* This very handy macro:
>      - Sets the rounding mode to 'round to nearest';
>      - Sets the processor into IEEE mode; and
> @@ -206,8 +211,11 @@ enum {
>     (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
>   #define FPSCR_BASIC_EXCEPTIONS_MASK \
>     (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
> -
> +#define FPSCR_FPRF_MASK \
> +  (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
> +   FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
>   #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
> +#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
> 
>   /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
>      in the FPSCR, albeit shifted to different but corresponding locations.
> diff --git a/sysdeps/powerpc/fpu/fesetmode.c b/sysdeps/powerpc/fpu/fesetmode.c
> index 4f4f71a..e92559b 100644
> --- a/sysdeps/powerpc/fpu/fesetmode.c
> +++ b/sysdeps/powerpc/fpu/fesetmode.c
> @@ -19,11 +19,6 @@
>   #include <fenv_libc.h>
>   #include <fpu_control.h>
> 
> -#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM	\
> -		       | _FPU_MASK_XM | _FPU_MASK_IM)
> -
> -#define FPU_STATUS 0xbffff700ULL
> -
>   int
>   fesetmode (const femode_t *modep)
>   {
> @@ -32,18 +27,18 @@ fesetmode (const femode_t *modep)
>     /* Logic regarding enabled exceptions as in fesetenv.  */
> 
>     new.fenv = *modep;
> -  old.fenv = fegetenv_register ();
> -  new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS);
> +  old.fenv = fegetenv_status ();
> +  new.l = (new.l & ~FPSCR_STATUS_MASK) | (old.l & FPSCR_STATUS_MASK);
> 
>     if (old.l == new.l)
>       return 0;
> 
> -  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
> +  if ((old.l & FPSCR_ENABLES_MASK) == 0 && (new.l & FPSCR_ENABLES_MASK) != 0)
>       (void) __fe_nomask_env_priv ();
> 
> -  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
> +  if ((old.l & FPSCR_ENABLES_MASK) != 0 && (new.l & FPSCR_ENABLES_MASK) == 0)
>       (void) __fe_mask_env ();
> 
> -  fesetenv_register (new.fenv);
> +  fesetenv_mode (new.fenv);
>     return 0;
>   }
> 



More information about the Libc-alpha mailing list