This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 2/2 v2] [powerpc] Use faster means to access FPSCR when possible in some cases
"Paul A. Clarke" <pc@us.ibm.com> writes:
> diff --git a/sysdeps/powerpc/bits/fenvinline.h b/sysdeps/powerpc/bits/fenvinline.h
> index 7079d1a..72cd263 100644
> --- a/sysdeps/powerpc/bits/fenvinline.h
> +++ b/sysdeps/powerpc/bits/fenvinline.h
> @@ -19,12 +19,22 @@
> #if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__
>
> /* Inline definition for fegetround. */
> -# define __fegetround() \
> - (__extension__ ({ int __fegetround_result; \
> - __asm__ __volatile__ \
> - ("mcrfs 7,7 ; mfcr %0" \
> - : "=r"(__fegetround_result) : : "cr7"); \
> - __fegetround_result & 3; }))
> +#ifdef _ARCH_PWR9
> +# define __fegetround() \
> + __extension__ ({ \
> + union { double __d; unsigned long long __ll; } __u; \
> + __asm__ ("mffsl %0" : "=f" (__u.__d)); \
> + __u.__ll & 0x0000000000000003LL; \
> + })
> +#else
> +# define __fegetround() \
> + __extension__ ({ \
> + int __fegetround_result; \
> + __asm__ __volatile__ ("mcrfs 7,7 ; mfcr %0" \
> + : "=r"(__fegetround_result) : : "cr7"); \
> + __fegetround_result & 3; \
> + })
This is removing a parenthesis around "__extension__ ...".
Indentation needs to be reviewed too.
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index f66bf24..79e70df 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -33,6 +33,25 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
> /* Equivalent to fegetenv, but returns a fenv_t instead of taking a
> pointer. */
> #define fegetenv_register() __builtin_mffs()
> +
Trailing whitespace here.
> +#ifdef _ARCH_PWR9
> +#define IS_ISA300() 1
> +#else
> +#define IS_ISA300() __builtin_cpu_supports ("arch_3_00")
> +#endif
> +
> +/* Equivalent to fegetenv_register, but only returns bits for
> + status, exception enables, and mode. */
> +#define fegetenv_status() \
> + ({register double __fr; \
> + if (__glibc_likely(IS_ISA300())) \
> + __asm__ __volatile__ ( \
> + ".machine push; .machine \"power9\"; mffsl %0; .machine pop" \
> + : "=f" (__fr)); \
> + else \
> + __fr = __builtin_mffs(); \
> + __fr; \
> + })
>
> /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */
> #define fesetenv_register(env) \
> diff --git a/sysdeps/powerpc/fpu_control.h b/sysdeps/powerpc/fpu_control.h
> index 0ab9331..4c151e9 100644
> --- a/sysdeps/powerpc/fpu_control.h
> +++ b/sysdeps/powerpc/fpu_control.h
> @@ -23,6 +23,12 @@
> # error "SPE/e500 is no longer supported"
> #endif
>
> +#ifdef _ARCH_PWR9
> +#define IS_ISA300() 1
Indentation missing.
This is an installed header. The macro should start with __ in order to avoid
a namespace conflict.
> +#else
> +#define IS_ISA300() __builtin_cpu_supports ("arch_3_00")
Likewise.
> @@ -65,34 +71,31 @@ extern fpu_control_t __fpu_control;
> typedef unsigned int fpu_control_t;
>
> /* Macros for accessing the hardware control word. */
> -# define __FPU_MFFS() \
> - ({register double __fr; \
> - __asm__ __volatile__("mffs %0" : "=f" (__fr)); \
> - __fr; \
> - })
> -
You've just added it, so no problem if you remove before the release.
> # define _FPU_GETCW(cw) \
> ({union { double __d; unsigned long long __ll; } __u; \
> - __u.__d = __FPU_MFFS(); \
> + register double __fr; \
> + __asm__ __volatile__("mffs %0" : "=f" (__fr)); \
> + __u.__d = __fr; \
> (cw) = (fpu_control_t) __u.__ll; \
> (fpu_control_t) __u.__ll; \
> })
>
> -#ifdef _ARCH_PWR9
> -# define __FPU_MFFSL() \
> - ({register double __fr; \
> - __asm__ __volatile__("mffsl %0" : "=f" (__fr)); \
> - __fr; \
> +# define _FPU_GET_RC_FAST() \
> + ({union { double __d; unsigned long long __ll; } __u; \
> + register double __fr; \
> + __asm__ __volatile__( \
> + ".machine push; .machine \"power9\"; mffsl %0; .machine pop" \
> + : "=f" (__fr)); \
> + __u.__d = __fr; \
> + __u.__ll &= _FPU_MASK_RC; \
> + (fpu_control_t) __u.__ll; \
> })
> -#else
> -# define __FPU_MFFSL() __FPU_MFFS()
> -#endif
> -
> +
> # define _FPU_GET_RC() \
> - ({union { double __d; unsigned long long __ll; } __u; \
> - __u.__d = __FPU_MFFSL(); \
> - __u.__ll &= _FPU_MASK_RC; \
> - (fpu_control_t) __u.__ll; \
> + ({fpu_control_t rc = __glibc_likely(IS_ISA300()) \
> + ? _FPU_GET_RC_FAST() \
> + : _FPU_GETCW(rc); \
This is missing '& _FPU_MASK_RC'.
s/rc/__rc/ too
--
Tulio Magno