This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
RE: [PATCH 12/13] AArch64: Cleanup fenv implementation
- From: "Wilco Dijkstra" <wdijkstr at arm dot com>
- To: <marcus dot shawcroft at gmail dot com>
- Cc: <libc-alpha at sourceware dot org>
- Date: Thu, 18 Dec 2014 15:58:11 -0000
- Subject: RE: [PATCH 12/13] AArch64: Cleanup fenv implementation
- Authentication-results: sourceware.org; auth=none
- References:
ping
> -----Original Message-----
> From: Wilco Dijkstra [mailto:wdijkstr@arm.com]
> Sent: 23 October 2014 18:36
> To: 'libc-alpha@sourceware.org'
> Subject: [PATCH 12/13] AArch64: Cleanup fenv implementation
>
> Improve fesetenv performance by avoiding unnecessary FPSR/FPCR reads/writes.
> It uses the same logic as the ARM version. The common case removes 1 FPSR
> and 1 FPCR read. For FE_DFL_ENV and FE_NOMASK_ENV a FPCR read is avoided in
> case the FPCR does not change.
>
> ChangeLog:
> 2014-10-23 Wilco Dijkstra <wdijkstr@arm.com>
>
> * sysdeps/aarch64/fpu/fesetenv.c (fesetenv):
> Optimize to reduce FPCR/FPSR accesses.
>
> ---
> sysdeps/aarch64/fpu/fesetenv.c | 40 +++++++++++++++++++++++-----------------
> 1 file changed, 23 insertions(+), 17 deletions(-)
>
> diff --git a/sysdeps/aarch64/fpu/fesetenv.c b/sysdeps/aarch64/fpu/fesetenv.c
> index c19680d..f71014d 100644
> --- a/sysdeps/aarch64/fpu/fesetenv.c
> +++ b/sysdeps/aarch64/fpu/fesetenv.c
> @@ -29,8 +29,20 @@ fesetenv (const fenv_t *envp)
> fpu_fpsr_t fpsr_new;
>
> _FPU_GETCW (fpcr);
> - _FPU_GETFPSR (fpsr);
>
> + if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV))
> + {
> + /* The new FPCR/FPSR are valid, so don't merge the reserved flags. */
> + fpcr_new = envp->__fpcr;
> +
> + if (fpcr != fpcr_new)
> + _FPU_SETCW (fpcr_new);
> +
> + _FPU_SETFPSR (envp->__fpsr);
> + return 0;
> + }
> +
> + _FPU_GETFPSR (fpsr);
> fpcr_new = fpcr & _FPU_RESERVED;
> fpsr_new = fpsr & _FPU_FPSR_RESERVED;
>
> @@ -39,31 +51,25 @@ fesetenv (const fenv_t *envp)
> fpcr_new |= _FPU_DEFAULT;
> fpsr_new |= _FPU_FPSR_DEFAULT;
> }
> - else if (envp == FE_NOMASK_ENV)
> + else
> {
> fpcr_new |= _FPU_FPCR_IEEE;
> fpsr_new |= _FPU_FPSR_IEEE;
> }
> - else
> - {
> - fpcr_new |= envp->__fpcr & ~_FPU_RESERVED;
> - fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
> - }
>
> - if (fpsr != fpsr_new)
> - _FPU_SETFPSR (fpsr_new);
> + _FPU_SETFPSR (fpsr_new);
>
> if (fpcr != fpcr_new)
> - _FPU_SETCW (fpcr_new);
> + {
> + _FPU_SETCW (fpcr_new);
>
> - /* Trapping exceptions are optional in AArch64 the relevant enable
> - bits in FPCR are RES0 hence the absence of support can be
> - detected by reading back the FPCR and comparing with the required
> - value. */
> + /* Trapping exceptions are optional in AArch64; the relevant enable
> + bits in FPCR are RES0 hence the absence of support can be detected
> + by reading back the FPCR and comparing with the required value. */
> + _FPU_GETCW (updated_fpcr);
>
> - _FPU_GETCW (updated_fpcr);
> - if ((updated_fpcr & fpcr_new) != fpcr_new)
> - return 1;
> + return fpcr_new & ~updated_fpcr;
> + }
>
> return 0;
> }
> --
> 1.9.1