[PATCH] [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses

Paul Clarke pc@us.ibm.com
Fri Aug 2 04:01:00 GMT 2019


On 8/1/19 7:04 PM, Paul E Murphy wrote:
> On 8/1/19 2:36 PM, Paul A. Clarke wrote:
>> Since fe{en,dis}ableexcept() and fesetmode() read-modify-write just the
>> "mode" (exception enable and rounding mode) bits of the Floating Point Status
>> Control Register (FPSCR), the lighter weight 'mffsl' instruction can be used
>> to read the FPSCR (enables and rounding mode), and 'mtfsf 0b00000011' can be
>> used to write just those bits back to the FPSCR.  The net is better performance.
>>
>> In addition, fe{en,dis}ableexcept() read the FPSCR again after writing it, or
>> they determine that it doesn't need to be written because it is not changing.
>> In either case, the local variable holds the current values of the enable
>> bits in the FPSCR.  This local variable can be used instead of again reading
>> the FPSCR.
>>
>> Also, that value of the FPSCR which is read the second time is validated
>> against the requested enables.  Since the write can't fail, this validation
>> step is unnecessary, and can be removed.
> 
> The write to FPSCR may not fail, but it may not change all the
> requested bits. e.g fedisableexcept(FE_INVALID_SQRT). Should the
> existing behavior be preserved?

That's an interesting question.  The current unpatched code will apparently accept all possible exception bits, enable (or disable) the ones it can, and return -1 if the set (or unset) bits don't match the requested bits.

For the INVALID bits, these are transformed into FE_INVALID iff all of the defined INVALID bits are set.

So, the case of "feenableexcept (FE_OVERFLOW|FE_INVALID_SQRT)" would result in FE_OVERFLOW being enabled, yet the function returns failure because FE_INVALID_SQRT is not.  Is the side effect of enabling FE_OVERFLOW but returning failure appropriate?  Should the enablement be reverted in that case (all-or-nothing enabled) to avoid side-effects on failure?

Are you just asking about the whether the requested bits _can_ be set?  That could be determined without reading (or re-reading) the FPSCR.  Should that be done up-front, before attempting to set the FPSCR at all, and returning failure if so?

Is there a crisp definition of the expected operation of these functions?  The man page is wanting.

>> Finally, convert the local macros in fesetmode.c to more generally useful
>> macros in fenv_libc.h.
>>
>> 2019-08-01  Paul A. Clarke  <pc@us.ibm.com>
>>
>>     * sysdeps/powerpc/fpu/fenv_libc.h (fesetenv_mode): New.
>>     (FPSCR_FPRF_MASK): New. (FPSCR_STATUS_MASK): New.
>>     * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Use lighter-
>>     weight access to FPSCR; remove unnecessary second FPSCR read and
>>     validate.
>>     * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Likewise.
>>     * sysdeps/powerpc/fpu/fesetmode.c (fesetmode): Use lighter-weight
>>     access to FPSCR; Use macros in fenv_libc.h in favor of local.
>> ---
>>   sysdeps/powerpc/fpu/fedisblxcpt.c |  8 +++-----
>>   sysdeps/powerpc/fpu/feenablxcpt.c |  9 +++------
>>   sysdeps/powerpc/fpu/fenv_libc.h   | 10 +++++++++-
>>   sysdeps/powerpc/fpu/fesetmode.c   | 15 +++++----------
>>   4 files changed, 20 insertions(+), 22 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
>> index 5cc8799..fa666b0 100644
>> --- a/sysdeps/powerpc/fpu/fedisblxcpt.c
>> +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
>> @@ -26,7 +26,7 @@ fedisableexcept (int excepts)
>>     int result, new;
>>
>>     /* Get current exception mask to return.  */
>> -  fe.fenv = curr.fenv = fegetenv_register ();
>> +  fe.fenv = curr.fenv = fegetenv_status ();
>>     result = fenv_reg_to_exceptions (fe.l);
>>
>>     if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
>> @@ -36,13 +36,11 @@ fedisableexcept (int excepts)
>>     fe.l &= ~ fenv_exceptions_to_reg (excepts);
>>
>>     if (fe.l != curr.l)
>> -    fesetenv_register (fe.fenv);
>> +    fesetenv_mode (fe.fenv);
>>
>> -  new = __fegetexcept ();
>> +  new = fenv_reg_to_exceptions (fe.l);
>>     if (new == 0 && result != 0)
>>       (void)__fe_mask_env ();
>>
>> -  if ((new & excepts) != 0)
>> -    result = -1;
>>     return result;
>>   }
>> diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
>> index 3b64398..c658290 100644
>> --- a/sysdeps/powerpc/fpu/feenablxcpt.c
>> +++ b/sysdeps/powerpc/fpu/feenablxcpt.c
>> @@ -26,7 +26,7 @@ feenableexcept (int excepts)
>>     int result, new;
>>
>>     /* Get current exception mask to return.  */
>> -  fe.fenv = curr.fenv = fegetenv_register ();
>> +  fe.fenv = curr.fenv = fegetenv_status ();
>>     result = fenv_reg_to_exceptions (fe.l);
>>
>>     if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
>> @@ -36,14 +36,11 @@ feenableexcept (int excepts)
>>     fe.l |= fenv_exceptions_to_reg (excepts);
>>
>>     if (fe.l != curr.l)
>> -    fesetenv_register (fe.fenv);
>> +    fesetenv_mode (fe.fenv);
>>
>> -  new = __fegetexcept ();
>> +  new = fenv_reg_to_exceptions (fe.l);
>>     if (new != 0 && result == 0)
>>       (void) __fe_nomask_env_priv ();
>>
>> -  if ((new & excepts) != excepts)
>> -    result = -1;
>> -
>>     return result;
>>   }
>> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
>> index 853239f..8ba4832 100644
>> --- a/sysdeps/powerpc/fpu/fenv_libc.h
>> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
>> @@ -70,6 +70,11 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>>           __builtin_mtfsf (0xff, d); \
>>       } while(0)
>>
>> +/* Set the last 2 nibbles of the FPSCR, which contain the
>> +   exception enables and the rounding mode.
>> +   'fegetenv_status' retrieves these bits by reading the FPSCR.  */
>> +#define fesetenv_mode(env) __builtin_mtfsf (0b00000011, (env));
>> +
>>   /* This very handy macro:
>>      - Sets the rounding mode to 'round to nearest';
>>      - Sets the processor into IEEE mode; and
>> @@ -206,8 +211,11 @@ enum {
>>     (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
>>   #define FPSCR_BASIC_EXCEPTIONS_MASK \
>>     (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
>> -
>> +#define FPSCR_FPRF_MASK \
>> +  (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
>> +   FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
>>   #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
>> +#define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
>>
>>   /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
>>      in the FPSCR, albeit shifted to different but corresponding locations.
>> diff --git a/sysdeps/powerpc/fpu/fesetmode.c b/sysdeps/powerpc/fpu/fesetmode.c
>> index 4f4f71a..e92559b 100644
>> --- a/sysdeps/powerpc/fpu/fesetmode.c
>> +++ b/sysdeps/powerpc/fpu/fesetmode.c
>> @@ -19,11 +19,6 @@
>>   #include <fenv_libc.h>
>>   #include <fpu_control.h>
>>
>> -#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM    \
>> -               | _FPU_MASK_XM | _FPU_MASK_IM)
>> -
>> -#define FPU_STATUS 0xbffff700ULL
>> -
>>   int
>>   fesetmode (const femode_t *modep)
>>   {
>> @@ -32,18 +27,18 @@ fesetmode (const femode_t *modep)
>>     /* Logic regarding enabled exceptions as in fesetenv.  */
>>
>>     new.fenv = *modep;
>> -  old.fenv = fegetenv_register ();
>> -  new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS);
>> +  old.fenv = fegetenv_status ();
>> +  new.l = (new.l & ~FPSCR_STATUS_MASK) | (old.l & FPSCR_STATUS_MASK);
>>
>>     if (old.l == new.l)
>>       return 0;
>>
>> -  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
>> +  if ((old.l & FPSCR_ENABLES_MASK) == 0 && (new.l & FPSCR_ENABLES_MASK) != 0)
>>       (void) __fe_nomask_env_priv ();
>>
>> -  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
>> +  if ((old.l & FPSCR_ENABLES_MASK) != 0 && (new.l & FPSCR_ENABLES_MASK) == 0)
>>       (void) __fe_mask_env ();
>>
>> -  fesetenv_register (new.fenv);
>> +  fesetenv_mode (new.fenv);
>>     return 0;
>>   }

PC



More information about the Libc-alpha mailing list