This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] [powerpc] SET_RESTORE_ROUND optimizations and bug fix


On 9/10/19 5:06 PM, Paul E Murphy wrote:
> On 9/10/19 1:15 PM, Paul A. Clarke wrote:
>> SET_RESTORE_ROUND brackets a block of code, temporarily setting and
>> restoring the rounding mode and letting everything else, including
>> exceptions generated within the block, pass through.
>>
>> On powerpc, the current code clears the exception enables, which will hide
>> exceptions generated within the block.  This issue was introduced by me
>> in commit e905212627350d54b58426214b5a54ddc852b0c9.
>>
>> Fix this by not clearing exception enable bits in the prologue.
>>
>> Also, since we are no longer changing the enable bits in either the
>> prologue or the epilogue, there is no need to test for entering/exiting
>> non-stop mode.
>>
>> Also, optimize the prologue get/save/set rounding mode operations for
>> POWER9 and later by using 'mffscrn' when possible.
>>
>> Fixes: e905212627350d54b58426214b5a54ddc852b0c9
>>
>> 2019-09-10  Paul A. Clarke  <pc@us.ibm.com>
>>
>>     * sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_and_set_rn): New.
>>     (__fe_mffscrn): New.
>>     * sysdeps/powerpc/fpu/fenv_private.h (libc_feholdsetround_ppc_ctx):
>>     Do not clear enable bits, remove obsolete code, use
>>     fegetenv_and_set_rn.
>>     (libc_feresetround_ppc): Remove obsolete code, use
>>     fegetenv_and_set_rn.
>> ---
>>   sysdeps/powerpc/fpu/fenv_libc.h    | 32 ++++++++++++++++++++++++++++++++
>>   sysdeps/powerpc/fpu/fenv_private.h | 23 ++++-------------------
>>   2 files changed, 36 insertions(+), 19 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
>> index 0aad897..3173bc2 100644
>> --- a/sysdeps/powerpc/fpu/fenv_libc.h
>> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
>> @@ -48,6 +48,38 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>>       __fr;                                \
>>     })
>>   +#define __fe_mffscrn(rn)                        \
>> +  ({register fenv_union_t __fr;                        \
>> +    if (__builtin_constant_p (rn))                    \
>> +      __asm__ __volatile__ (                        \
>> +        ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
>> +        : "=f" (__fr.fenv) : "i" (rn));                    \
>> +    else                                \
>> +    {                                    \
>> +      __fr.l = (rn);                            \
>> +      __asm__ __volatile__ (                        \
>> +        ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
>> +        : "=f" (__fr.fenv) : "f" (__fr.fenv));                \
>> +    }                                    \
>> +    __fr.fenv;                                \
>> +  })
> OK
> 
>> +
>> +/* Like fegetenv_status, but also sets the rounding mode.  */
>> +#ifdef _ARCH_PWR9
>> +#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
>> +#else
>> +/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
>> +   but not sufficient, because it does not set the rounding mode.
>> +   Explicitly set the rounding mode when 'mffscrn' actually doesn't.  */
>> +#define fegetenv_and_set_rn(rn)                        \
>> +  ({register fenv_union_t __fr;                        \
>> +    __fr.fenv = __fe_mffscrn (rn);                    \
>> +    if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)))    \
>> +      __fesetround_inline (rn);                        \
>> +    __fr.fenv;                                \
>> +  })
>> +#endif

> Is the conditional code faster than unconditionally calling __fesetround_inline?

My measurements indicate that unconditionally calling _fesetround_inline is a bit slower than with the condition (on POWER9, where the condition will always fail).

>> +
>>   /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
>>   #define fesetenv_register(env) \
>>       do { \
>> diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
>> index 3286b4e..504f7b8 100644
>> --- a/sysdeps/powerpc/fpu/fenv_private.h
>> +++ b/sysdeps/powerpc/fpu/fenv_private.h
>> @@ -101,11 +101,7 @@ static __always_inline void
>>   libc_feresetround_ppc (fenv_t *envp)
>>   {
>>     fenv_union_t new = { .fenv = *envp };
>> -
>> -  __TEST_AND_EXIT_NON_STOP (-1ULL, new.l);
>> -
>> -  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
>> -  fesetenv_mode (new.fenv);
>> +  fegetenv_and_set_rn (new.l & FPSCR_RN_MASK);
>>   }
> OK
> 
> 
>>     static __always_inline int
>> @@ -147,21 +143,10 @@ libc_feupdateenv_ppc (fenv_t *e)
>>   static __always_inline void
>>   libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
>>   {
>> -  fenv_union_t old, new;
>> -
>> -  old.fenv = fegetenv_status ();
>> +  fenv_union_t old;
>>   -  new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
>> -
>> -  ctx->env = old.fenv;
>> -  if (__glibc_unlikely (new.l != old.l))
>> -    {
>> -      __TEST_AND_ENTER_NON_STOP (old.l, 0ULL);
>> -      fesetenv_mode (new.fenv);
>> -      ctx->updated_status = true;
>> -    }
>> -  else
>> -    ctx->updated_status = false;
>> +  ctx->env = old.fenv = fegetenv_and_set_rn (r);
>> +  ctx->updated_status = (r != (old.l & FPSCR_RN_MASK))OK

PC


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]