Improvement of fmod()
Edison von Myositis
edison.von.myosotis@gmail.com
Tue Mar 7 15:46:11 GMT 2023
I've implemented fmod() in a way that it runs +105% to +150% faster than
the 30 yrs. old implementation from sun. It requires sth. like SETcc and BSR
/ LZCNT on x86.
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
#define MAX_EXP (0x7FF)
#define SIGN_BIT ((uint64_t)1 << 63)
#define EXP_MASK ((uint64_t)MAX_EXP << 52)
#define IMPLCIT_BIT ((uint64_t)1 << 52)
#define MANT_MASK (IMPLCIT_BIT - 1)
#define HAS_MAX_EXP(b) ((b) >= EXP_MASK)
#define HAS_INF_MANT(b) (!((b) & MANT_MASK))
inline uint64_t bin( double d )
{
uint64_t u;
memcpy( &u, &d, sizeof d );
return u;
}
inline double dbl( uint64_t u )
{
double d;
memcpy( &d, &u, sizeof u );
return d;
}
inline void normalize( uint64_t *mant, int *exp )
{
unsigned bits = __builtin_clzll( *mant ) - 11;
*mant <<= bits;
*exp -= bits;
}
double myFmodC<( double counter, double denom )
{
uint64_t
bCounter = bin( counter ),
bDenom = bin( denom ) & ~SIGN_BIT,
bSign = bCounter & SIGN_BIT;
bCounter &= ~SIGN_BIT;
if( UNLIKELY(!bDenom) || UNLIKELY(HAS_MAX_EXP(bCounter)) )
return (counter * denom) / (counter * denom);
if( UNLIKELY(HAS_MAX_EXP(bDenom)) )
if( LIKELY(HAS_INF_MANT(bDenom)) )
return counter;
else
return (counter * denom) / (counter * denom);
if( UNLIKELY(!bCounter) )
return counter;
int
counterExp = bCounter >> 52 & MAX_EXP,
denomExp = bDenom >> 52 & MAX_EXP;
uint64_t
counterMant = (uint64_t)(counterExp != 0) << 52 | bCounter &
MANT_MASK,
denomMant = (uint64_t)(denomExp != 0) << 52 | bDenom & MANT_MASK;
if( UNLIKELY(!counterExp) )
// normalize counter
normalize( &counterMant, &counterExp ),
++counterExp;
if( UNLIKELY(!denomExp) )
// normalize denominator
normalize( &denomMant, &denomExp ),
++denomExp;
int remExp = counterExp;
uint64_t remMant = counterMant;
for( ; ; )
{
int below = remMant < denomMant;
if( UNLIKELY(remExp - below < denomExp) )
break;
remExp -= below;
remMant <<= below;
if( UNLIKELY(!(remMant -= denomMant)) )
{
remExp = 0;
break;
}
normalize( &remMant, &remExp );
};
if( UNLIKELY(remExp <= 0) )
// denormal result
remMant >>= -remExp + 1,
remExp = 0;
return dbl( bSign | (uint64_t)remExp << 52 | remMant & MANT_MASK );
}
The results are binary-compatible to those of glibc i.e. all the (S)NaN-
and Inf-results are all the same and all finite results are the same.
More information about the Libc-alpha
mailing list