This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] [powerpc] fe{en,dis}ableexcept optimize bit translations
- From: Adhemerval Zanella <adhemerval dot zanella at linaro dot org>
- To: libc-alpha at sourceware dot org
- Date: Wed, 28 Aug 2019 09:58:19 -0300
- Subject: Re: [PATCH] [powerpc] fe{en,dis}ableexcept optimize bit translations
- References: <1564802553-6645-1-git-send-email-pc@us.ibm.com>
On 03/08/2019 00:22, Paul A. Clarke wrote:
> From: "Paul A. Clarke" <pc@us.ibm.com>
>
> The exceptions passed to fe{en,dis}ableexcept() are defined in the ABI
> as a bitmask, a combination of FE_INVALID, FE_OVERFLOW, etc.
> Within the functions, these bits must be translated to/from the corresponding
> enable bits in the Floating Point Status Control Register (FPSCR).
> This translation is currently done bit-by-bit. The compiler generates
> a series of conditional bit operations. Nicely, the "FE" exception
> bits are all a uniform offset from the FPSCR enable bits, so the bit-by-bit
> operation can instead be performed by a shift with appropriate masking.
>
> 2019-08-02 Paul A. Clarke <pc@us.ibm.com>
>
> * sysdeps/powerpc/fpu/fenv_libc.h: Define FPSCR bitmasks.
> (fenv_reg_to_exceptions): Replace bitwise operations with mask-shift.
> (fenv_exceptions_to_reg): New.
> * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Replace bitwise
> operation with call to fenv_exceptions_to_reg().
> * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Likewise.
>
> This patch is a prerequisite for the two patches I sent over the past two days:
> - [powerpc] fe{en,dis}ableexcept, fesetmode: optimize FPSCR accesses
> - [powerpc] SET_RESTORE_ROUND improvements
> Apologies for sending these out-of-order. I forgot about this one during the
> freeze window.
LGTM with a suggestion below.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
>
> ---
> sysdeps/powerpc/fpu/fedisblxcpt.c | 11 +-----
> sysdeps/powerpc/fpu/feenablxcpt.c | 11 +-----
> sysdeps/powerpc/fpu/fenv_libc.h | 72 ++++++++++++++++++++++++++++++++-------
> 3 files changed, 61 insertions(+), 33 deletions(-)
>
> diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
> index 2872b1b..5cc8799 100644
> --- a/sysdeps/powerpc/fpu/fedisblxcpt.c
> +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
> @@ -33,16 +33,7 @@ fedisableexcept (int excepts)
> excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
>
> /* Sets the new exception mask. */
> - if (excepts & FE_INEXACT)
> - fe.l &= ~(1 << (31 - FPSCR_XE));
> - if (excepts & FE_DIVBYZERO)
> - fe.l &= ~(1 << (31 - FPSCR_ZE));
> - if (excepts & FE_UNDERFLOW)
> - fe.l &= ~(1 << (31 - FPSCR_UE));
> - if (excepts & FE_OVERFLOW)
> - fe.l &= ~(1 << (31 - FPSCR_OE));
> - if (excepts & FE_INVALID)
> - fe.l &= ~(1 << (31 - FPSCR_VE));
> + fe.l &= ~ fenv_exceptions_to_reg (excepts);
>
> if (fe.l != curr.l)
> fesetenv_register (fe.fenv);
Ok.
> diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
> index dbaffdc..3b64398 100644
> --- a/sysdeps/powerpc/fpu/feenablxcpt.c
> +++ b/sysdeps/powerpc/fpu/feenablxcpt.c
> @@ -33,16 +33,7 @@ feenableexcept (int excepts)
> excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
>
> /* Sets the new exception mask. */
> - if (excepts & FE_INEXACT)
> - fe.l |= (1 << (31 - FPSCR_XE));
> - if (excepts & FE_DIVBYZERO)
> - fe.l |= (1 << (31 - FPSCR_ZE));
> - if (excepts & FE_UNDERFLOW)
> - fe.l |= (1 << (31 - FPSCR_UE));
> - if (excepts & FE_OVERFLOW)
> - fe.l |= (1 << (31 - FPSCR_OE));
> - if (excepts & FE_INVALID)
> - fe.l |= (1 << (31 - FPSCR_VE));
> + fe.l |= fenv_exceptions_to_reg (excepts);
>
> if (fe.l != curr.l)
> fesetenv_register (fe.fenv);
Ok.
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 9861f18..853239f 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -131,57 +131,103 @@ __fesetround_inline_nocheck (const int round)
> /* Definitions of all the FPSCR bit numbers */
> enum {
> FPSCR_FX = 0, /* exception summary */
> +#define FPSCR_FX_MASK (1 << (31 - FPSCR_FX))
> FPSCR_FEX, /* enabled exception summary */
> +#define FPSCR_FEX_MASK (1 << (31 - FPSCR_FEX))
> FPSCR_VX, /* invalid operation summary */
> +#define FPSCR_VX_MASK (1 << (31 - FPSCR_VX))
> FPSCR_OX, /* overflow */
> +#define FPSCR_OX_MASK (1 << (31 - FPSCR_OX))
> FPSCR_UX, /* underflow */
> +#define FPSCR_UX_MASK (1 << (31 - FPSCR_UX))
> FPSCR_ZX, /* zero divide */
> +#define FPSCR_ZX_MASK (1 << (31 - FPSCR_ZX))
> FPSCR_XX, /* inexact */
> +#define FPSCR_XX_MASK (1 << (31 - FPSCR_XX))
> FPSCR_VXSNAN, /* invalid operation for sNaN */
> +#define FPSCR_VXSNAN_MASK (1 << (31 - FPSCR_VXSNAN))
> FPSCR_VXISI, /* invalid operation for Inf-Inf */
> +#define FPSCR_VXISI_MASK (1 << (31 - FPSCR_VXISI))
> FPSCR_VXIDI, /* invalid operation for Inf/Inf */
> +#define FPSCR_VXIDI_MASK (1 << (31 - FPSCR_VXIDI))
> FPSCR_VXZDZ, /* invalid operation for 0/0 */
> +#define FPSCR_VXZDZ_MASK (1 << (31 - FPSCR_VXZDZ))
> FPSCR_VXIMZ, /* invalid operation for Inf*0 */
> +#define FPSCR_VXIMZ_MASK (1 << (31 - FPSCR_VXIMZ))
> FPSCR_VXVC, /* invalid operation for invalid compare */
> +#define FPSCR_VXVC_MASK (1 << (31 - FPSCR_VXVC))
> FPSCR_FR, /* fraction rounded [fraction was incremented by round] */
> +#define FPSCR_FR_MASK (1 << (31 - FPSCR_FR))
> FPSCR_FI, /* fraction inexact */
> +#define FPSCR_FI_MASK (1 << (31 - FPSCR_FI))
> FPSCR_FPRF_C, /* result class descriptor */
> +#define FPSCR_FPRF_C_MASK (1 << (31 - FPSCR_FPRF_C))
> FPSCR_FPRF_FL, /* result less than (usually, less than 0) */
> +#define FPSCR_FPRF_FL_MASK (1 << (31 - FPSCR_FPRF_FL))
> FPSCR_FPRF_FG, /* result greater than */
> +#define FPSCR_FPRF_FG_MASK (1 << (31 - FPSCR_FPRF_FG))
> FPSCR_FPRF_FE, /* result equal to */
> +#define FPSCR_FPRF_FE_MASK (1 << (31 - FPSCR_FPRF_FE))
> FPSCR_FPRF_FU, /* result unordered */
> +#define FPSCR_FPRF_FU_MASK (1 << (31 - FPSCR_FPRF_FU))
> FPSCR_20, /* reserved */
> FPSCR_VXSOFT, /* invalid operation set by software */
> +#define FPSCR_VXSOFT_MASK (1 << (31 - FPSCR_VXSOFT))
> FPSCR_VXSQRT, /* invalid operation for square root */
> +#define FPSCR_VXSQRT_MASK (1 << (31 - FPSCR_VXSQRT))
> FPSCR_VXCVI, /* invalid operation for invalid integer convert */
> +#define FPSCR_VXCVI_MASK (1 << (31 - FPSCR_VXCVI))
> FPSCR_VE, /* invalid operation exception enable */
> +#define FPSCR_VE_MASK (1 << (31 - FPSCR_VE))
> FPSCR_OE, /* overflow exception enable */
> +#define FPSCR_OE_MASK (1 << (31 - FPSCR_OE))
> FPSCR_UE, /* underflow exception enable */
> +#define FPSCR_UE_MASK (1 << (31 - FPSCR_UE))
> FPSCR_ZE, /* zero divide exception enable */
> +#define FPSCR_ZE_MASK (1 << (31 - FPSCR_ZE))
> FPSCR_XE, /* inexact exception enable */
> +#define FPSCR_XE_MASK (1 << (31 - FPSCR_XE))
> #ifdef _ARCH_PWR6
> FPSCR_29, /* Reserved in ISA 2.05 */
> +#define FPSCR_NI_MASK (1 << (31 - FPSCR_29))
> #else
> - FPSCR_NI /* non-IEEE mode (typically, no denormalised numbers) */
> + FPSCR_NI, /* non-IEEE mode (typically, no denormalised numbers) */
> +#define FPSCR_NI_MASK (1 << (31 - FPSCR_NI))
> #endif /* _ARCH_PWR6 */
> /* the remaining two least-significant bits keep the rounding mode */
> + FPSCR_RN_hi,
> +#define FPSCR_RN_hi_MASK (1 << (31 - FPSCR_RN_hi))
> + FPSCR_RN_lo
> +#define FPSCR_RN_lo_MASK (1 << (31 - FPSCR_RN_lo))
> };
>
> +#define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK)
> +#define FPSCR_ENABLES_MASK \
> + (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
> +#define FPSCR_BASIC_EXCEPTIONS_MASK \
> + (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
> +
> +#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
It is ok, but one suggestion I have is it would be simple to just define a
macro to create the mask and use it. Like:
#define FPSCR_MASK(bit) (1 << (31 - (bit)))
#define FPSCR_RN_MASK (FPSCR_MASK (FPSCR_RN_hi) | FPSCR_MASK (FPSCR_RN_lo))
#define FPSCR_ENABLES_MASK \
(FPSCR_MASK(FPSCR_VE) | FPSCR_MASK (FPSCR_OE) | FPSCR_MASK (FPSCR_UE) \
| FPSCR_MASK (FPSCR_ZE) | FPSCR_MASK (FPSCR_XE))
#define FPSCR_BASIC_EXCEPTIONS_MASK \
(FPSCR_MASK (FPSCR_VX) | FPSCR_MASK (FPSCR_OX) | FPSCR_MASK (FPSCR_UX) \
| FPSCR_MASK (FPSCR_ZX) | FPSCR_MASK (FPSCR_XX))
> +
> +/* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
> + in the FPSCR, albeit shifted to different but corresponding locations.
> + Similarly, the exception indicator bits in the FPSCR correspond one-to-one
> + with the exception enable bits. It is thus possible to map the FENV(1)
> + exceptions directly to the FPSCR enables with a simple mask and shift,
> + and vice versa. */
> +#define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22
> +
> static inline int
> fenv_reg_to_exceptions (unsigned long long l)
> {
> - int result = 0;
> - if (l & (1 << (31 - FPSCR_XE)))
> - result |= FE_INEXACT;
> - if (l & (1 << (31 - FPSCR_ZE)))
> - result |= FE_DIVBYZERO;
> - if (l & (1 << (31 - FPSCR_UE)))
> - result |= FE_UNDERFLOW;
> - if (l & (1 << (31 - FPSCR_OE)))
> - result |= FE_OVERFLOW;
> - if (l & (1 << (31 - FPSCR_VE)))
> - result |= FE_INVALID;
> - return result;
> + return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT;
> +}
> +
> +static inline unsigned long long
> +fenv_exceptions_to_reg (int excepts)
> +{
> + return (unsigned long long)
> + (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT;
> }
>
> #ifdef _ARCH_PWR6
>
Ok.