This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 2/2 v2] [powerpc] Use faster means to access FPSCR when possible in some cases


"Paul A. Clarke" <pc@us.ibm.com> writes:

> diff --git a/sysdeps/powerpc/bits/fenvinline.h b/sysdeps/powerpc/bits/fenvinline.h
> index 7079d1a..72cd263 100644
> --- a/sysdeps/powerpc/bits/fenvinline.h
> +++ b/sysdeps/powerpc/bits/fenvinline.h
> @@ -19,12 +19,22 @@
>  #if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__
>  
>  /* Inline definition for fegetround.  */
> -# define __fegetround() \
> -  (__extension__  ({ int __fegetround_result;				      \
> -		     __asm__ __volatile__				      \
> -		       ("mcrfs 7,7 ; mfcr %0"				      \
> -			: "=r"(__fegetround_result) : : "cr7");		      \
> -		     __fegetround_result & 3; }))
> +#ifdef _ARCH_PWR9
> +# define __fegetround()							\
> +  __extension__  ({							\
> +    union { double __d; unsigned long long __ll; } __u;			\
> +    __asm__ ("mffsl %0" : "=f" (__u.__d));				\
> +    __u.__ll & 0x0000000000000003LL;					\
> +  })
> +#else
> +# define __fegetround()							\
> +  __extension__  ({							\
> +        int __fegetround_result;					\
> +        __asm__ __volatile__ ("mcrfs 7,7 ; mfcr %0"			\
> +                              : "=r"(__fegetround_result) : : "cr7");	\
> +        __fegetround_result & 3;					\
> +  })

This is removing a parenthesis around "__extension__ ...".
Indentation needs to be reviewed too.

> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index f66bf24..79e70df 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -33,6 +33,25 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>  /* Equivalent to fegetenv, but returns a fenv_t instead of taking a
>     pointer.  */
>  #define fegetenv_register() __builtin_mffs()
> +

Trailing whitespace here.

> +#ifdef _ARCH_PWR9
> +#define IS_ISA300() 1
> +#else
> +#define IS_ISA300() __builtin_cpu_supports ("arch_3_00")
> +#endif
> +
> +/* Equivalent to fegetenv_register, but only returns bits for
> +   status, exception enables, and mode.  */
> +#define fegetenv_status()						\
> +  ({register double __fr;						\
> +    if (__glibc_likely(IS_ISA300())) 					\
> +      __asm__ __volatile__ (						\
> +        ".machine push; .machine \"power9\"; mffsl %0; .machine pop"	\
> +        : "=f" (__fr));							\
> +    else								\
> +      __fr = __builtin_mffs();						\
> +    __fr;								\
> +  })
>  
>  /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
>  #define fesetenv_register(env) \
> diff --git a/sysdeps/powerpc/fpu_control.h b/sysdeps/powerpc/fpu_control.h
> index 0ab9331..4c151e9 100644
> --- a/sysdeps/powerpc/fpu_control.h
> +++ b/sysdeps/powerpc/fpu_control.h
> @@ -23,6 +23,12 @@
>  # error "SPE/e500 is no longer supported"
>  #endif
>  
> +#ifdef _ARCH_PWR9
> +#define IS_ISA300() 1

Indentation missing.
This is an installed header.  The macro should start with __ in order to avoid
a namespace conflict.

> +#else
> +#define IS_ISA300() __builtin_cpu_supports ("arch_3_00")

Likewise.

> @@ -65,34 +71,31 @@ extern fpu_control_t __fpu_control;
>  typedef unsigned int fpu_control_t;
>  
>  /* Macros for accessing the hardware control word.  */
> -# define __FPU_MFFS()						\
> -  ({register double __fr;					\
> -    __asm__ __volatile__("mffs %0" : "=f" (__fr));				\
> -    __fr;							\
> -  })
> -

You've just added it, so no problem if you remove before the release.

>  # define _FPU_GETCW(cw)						\
>    ({union { double __d; unsigned long long __ll; } __u;		\
> -    __u.__d = __FPU_MFFS();					\
> +    register double __fr;					\
> +    __asm__ __volatile__("mffs %0" : "=f" (__fr));		\
> +    __u.__d = __fr;						\
>      (cw) = (fpu_control_t) __u.__ll;				\
>      (fpu_control_t) __u.__ll;					\
>    })
>  
> -#ifdef _ARCH_PWR9
> -# define __FPU_MFFSL()						\
> -  ({register double __fr;					\
> -    __asm__ __volatile__("mffsl %0" : "=f" (__fr));				\
> -    __fr;							\
> +# define _FPU_GET_RC_FAST()						\
> +  ({union { double __d; unsigned long long __ll; } __u;			\
> +    register double __fr;						\
> +    __asm__ __volatile__(						\
> +      ".machine push; .machine \"power9\"; mffsl %0; .machine pop" 	\
> +      : "=f" (__fr));							\
> +    __u.__d = __fr;							\
> +    __u.__ll &= _FPU_MASK_RC;						\
> +    (fpu_control_t) __u.__ll;						\
>    })
> -#else
> -# define __FPU_MFFSL() __FPU_MFFS()
> -#endif
> -    
> +
>  # define _FPU_GET_RC()						\
> -  ({union { double __d; unsigned long long __ll; } __u;		\
> -    __u.__d = __FPU_MFFSL();					\
> -    __u.__ll &= _FPU_MASK_RC;					\
> -    (fpu_control_t) __u.__ll;					\
> +  ({fpu_control_t rc = __glibc_likely(IS_ISA300())		\
> +    ? _FPU_GET_RC_FAST()					\
> +    : _FPU_GETCW(rc);						\

This is missing '& _FPU_MASK_RC'.
s/rc/__rc/ too

-- 
Tulio Magno


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]