This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] PowerPC: Suppress unnecessary FPSCR write


On Mon, Apr 28, 2014 at 06:39:53PM -0300, Adhemerval Zanella wrote:
> This patch optimizes the FPSCR update on exception and rounding change
> functions by just updating its value if new value if different from
> current one.  It also optimizes fedisableexcept and feenableexcept by
> removing an unecessary FPSCR read.
> 
> Tested on powerpc32, powerpc64, and powerpc64le.
> 
> ---
> 
> 2014-04-28  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
> 
> 	* sysdeps/powerpc/fpu/fclrexcpt.c (__feclearexcept): Do not update
> 	FPSCR if value do not change.
> 	* sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Likewise.
> 	* sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Likewise.
> 	* sysdeps/powerpc/fpu/feholdexcpt.c (feholdexcept): Likewise.
> 	* sysdeps/powerpc/fpu/fesetenv.c (__fesetenv): Likewise.
> 	* sysdeps/powerpc/fpu/fsetexcptflg.c (__fesetexceptflag): Likewise.
> 	* sysdeps/powerpc/fpu/fenv_libc.h (fenv_reg_to_exceptions): New helper
> 	function.
> 
> diff --git a/sysdeps/powerpc/fpu/fclrexcpt.c b/sysdeps/powerpc/fpu/fclrexcpt.c
> index cda2810..4607f62 100644
> --- a/sysdeps/powerpc/fpu/fclrexcpt.c
> +++ b/sysdeps/powerpc/fpu/fclrexcpt.c
> @@ -22,17 +22,18 @@
>  int
>  __feclearexcept (int excepts)
>  {
> -  fenv_union_t u;
> +  fenv_union_t u, n;
>  
>    /* Get the current state.  */
>    u.fenv = fegetenv_register ();
>  
>    /* Clear the relevant bits.  */
> -  u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
> +  n.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
>  		| (excepts & FPSCR_STICKY_BITS));
>  
>    /* Put the new state in effect.  */
> -  fesetenv_register (u.fenv);
> +  if (u.l != n.l)
> +    fesetenv_register (n.fenv);
>  
>    /* Success.  */
>    return 0;
> diff --git a/sysdeps/powerpc/fpu/fedisblxcpt.c b/sysdeps/powerpc/fpu/fedisblxcpt.c
> index 5883e09..94c01ab 100644
> --- a/sysdeps/powerpc/fpu/fedisblxcpt.c
> +++ b/sysdeps/powerpc/fpu/fedisblxcpt.c
> @@ -22,15 +22,17 @@
>  int
>  fedisableexcept (int excepts)
>  {
> -  fenv_union_t fe;
> -  int result, new;
> +  fenv_union_t fe, curr;
> +  int result = 0, new;

result doesn't need to be initialized.

>  
> -  result = __fegetexcept ();
> +  /* Get current exception mask to return.  */
> +  fe.fenv = curr.fenv = fegetenv_register ();
> +  result = fenv_reg_to_exceptions (fe.l);
>  
>    if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
>      excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
>  
> -  fe.fenv = fegetenv_register ();
> +  /* Sets the new exception mask.  */
>    if (excepts & FE_INEXACT)
>      fe.l &= ~(1 << (31 - FPSCR_XE));
>    if (excepts & FE_DIVBYZERO)
> @@ -41,7 +43,9 @@ fedisableexcept (int excepts)
>      fe.l &= ~(1 << (31 - FPSCR_OE));
>    if (excepts & FE_INVALID)
>      fe.l &= ~(1 << (31 - FPSCR_VE));
> -  fesetenv_register (fe.fenv);
> +
> +  if (fe.l != curr.l)
> +    fesetenv_register (fe.fenv);
>  
>    new = __fegetexcept ();
>    if (new == 0 && result != 0)
> diff --git a/sysdeps/powerpc/fpu/feenablxcpt.c b/sysdeps/powerpc/fpu/feenablxcpt.c
> index 8ad0f97..01a68cf 100644
> --- a/sysdeps/powerpc/fpu/feenablxcpt.c
> +++ b/sysdeps/powerpc/fpu/feenablxcpt.c
> @@ -22,15 +22,17 @@
>  int
>  feenableexcept (int excepts)
>  {
> -  fenv_union_t fe;
> -  int result, new;
> +  fenv_union_t fe, curr;
> +  int result = 0, new;
>  

Likewise.

> -  result = __fegetexcept ();
> +  /* Get current exception mask to return.  */
> +  fe.fenv = curr.fenv = fegetenv_register ();
> +  result = fenv_reg_to_exceptions (fe.l);
>  
>    if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID)
>      excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID;
>  
> -  fe.fenv = fegetenv_register ();
> +  /* Sets the new exception mask.  */
>    if (excepts & FE_INEXACT)
>      fe.l |= (1 << (31 - FPSCR_XE));
>    if (excepts & FE_DIVBYZERO)
> @@ -41,7 +43,9 @@ feenableexcept (int excepts)
>      fe.l |= (1 << (31 - FPSCR_OE));
>    if (excepts & FE_INVALID)
>      fe.l |= (1 << (31 - FPSCR_VE));
> -  fesetenv_register (fe.fenv);
> +
> +  if (fe.l != curr.l)
> +    fesetenv_register (fe.fenv);
>  
>    new = __fegetexcept ();
>    if (new != 0 && result == 0)
> diff --git a/sysdeps/powerpc/fpu/feholdexcpt.c b/sysdeps/powerpc/fpu/feholdexcpt.c
> index 1375a2f..764dd38 100644
> --- a/sysdeps/powerpc/fpu/feholdexcpt.c
> +++ b/sysdeps/powerpc/fpu/feholdexcpt.c
> @@ -32,6 +32,9 @@ feholdexcept (fenv_t *envp)
>       flag.  */
>    new.l = old.l & 0xffffffff00000007LL;
>  
> +  if (new.l == old.l)
> +    return 0;
> +
>    /* If the old env had any enabled exceptions, then mask SIGFPE in the
>       MSR FE0/FE1 bits.  This may allow the FPU to run faster because it
>       always takes the default action and can not generate SIGFPE. */
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 28e4d1f..c1df5ce 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -146,6 +146,23 @@ enum {
>    /* the remaining two least-significant bits keep the rounding mode */
>  };
>  
> +static inline int
> +fenv_reg_to_exceptions (unsigned long long l)
> +{
> +  int result = 0;
> +  if (l & (1 << (31 - FPSCR_XE)))
> +    result |= FE_INEXACT;
> +  if (l & (1 << (31 - FPSCR_ZE)))
> +    result |= FE_DIVBYZERO;
> +  if (l & (1 << (31 - FPSCR_UE)))
> +    result |= FE_UNDERFLOW;
> +  if (l & (1 << (31 - FPSCR_OE)))
> +    result |= FE_OVERFLOW;
> +  if (l & (1 << (31 - FPSCR_VE)))
> +    result |= FE_INVALID;
> +  return result;
> +}
> +
>  #ifdef _ARCH_PWR6
>    /* Not supported in ISA 2.05.  Provided for source compat only.  */
>  # define FPSCR_NI 29
> diff --git a/sysdeps/powerpc/fpu/fesetenv.c b/sysdeps/powerpc/fpu/fesetenv.c
> index fa99ddb..138bde0 100644
> --- a/sysdeps/powerpc/fpu/fesetenv.c
> +++ b/sysdeps/powerpc/fpu/fesetenv.c
> @@ -29,6 +29,8 @@ __fesetenv (const fenv_t *envp)
>    /* get the currently set exceptions.  */
>    new.fenv = *envp;
>    old.fenv = fegetenv_register ();
> +  if (old.l == new.l)
> +    return 0;
>  
>    /* If the old env has no enabled exceptions and the new env has any enabled
>       exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put the
> diff --git a/sysdeps/powerpc/fpu/fsetexcptflg.c b/sysdeps/powerpc/fpu/fsetexcptflg.c
> index 95193c1..cee6682 100644
> --- a/sysdeps/powerpc/fpu/fsetexcptflg.c
> +++ b/sysdeps/powerpc/fpu/fsetexcptflg.c
> @@ -21,7 +21,7 @@
>  int
>  __fesetexceptflag (const fexcept_t *flagp, int excepts)
>  {
> -  fenv_union_t u;
> +  fenv_union_t u, n;
>    fexcept_t flag;
>  
>    /* Get the current state.  */
> @@ -31,7 +31,7 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
>    flag = *flagp & excepts;
>  
>    /* Replace the exception status */
> -  u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
> +  n.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
>  	 | (flag & FPSCR_STICKY_BITS)
>  	 | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
>  	    & FE_INVALID_SOFTWARE));
> @@ -39,7 +39,8 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
>    /* Store the new status word (along with the rest of the environment).
>       This may cause floating-point exceptions if the restored state
>       requests it.  */
> -  fesetenv_register (u.fenv);
> +  if (n.l != u.l)
> +    fesetenv_register (u.fenv);
>  
>    /* Deal with FE_INVALID_SOFTWARE not being implemented on some chips.  */
>    if (flag & FE_INVALID)
> 

Looks good to me with those changes.

Siddhesh

Attachment: pgpAfmrMnp2JV.pgp
Description: PGP signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]