This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Improve erfcf accuracy (bugs 2541, 4108) (and erfcl for ldbl-128ibm)

From: "Joseph S. Myers" <joseph at codesourcery dot com>
To: libc-alpha at sourceware dot org
Date: Sun, 26 Feb 2012 16:31:14 +0000 (UTC)
Subject: Improve erfcf accuracy (bugs 2541, 4108) (and erfcl for ldbl-128ibm)
Bugs 2541 and 4108 report problems with the accuracy of erfcf for
arguments slightly below 4 and 8.

The problems arise from the code

            GET_FLOAT_WORD(ix,x);
            SET_FLOAT_WORD(z,ix&0xfffff000);
            r  =  __ieee754_expf(-z*z-(float)0.5625)*
                        __ieee754_expf((z-x)*(z+x)+R/S);

which splits x into upper and lower parts before squaring, rather than
directly computing expf (-x*x), because a direct computation of x*x
would be inexact.  Computing exp of an inexact value magifies rounding
error roughly if the inexact value has positive exponent (so fewer
than 24 bits after the point, for float).  The idea is that z*z is
exact; (z-x)*(z+x) may not be exact but the inexactness is in bits low
enough not to matter for the final result.

However, while z*z is exact, the problem is that adding 0.5625
increases the exponent, so (-z*z-(float)0.5625) is not exact - one bit
has been lost - and expf then magnifies the effect of losing that one
bit into the errors reported in those bug reports.  As long as,
roughly, z has as many bits after the point as before it, and
z*z+0.5625 is exact, the precise number of bits masked off does not
matter.  This patch fixes the problem by putting one fewer bit in z
(so z has 11 of the 24 bits of x).

Examining the other implementations shows that the only one that looks
like it has a similar problem is the ldbl-128ibm implementation.  (The
long double test added by this patch had a 23ulp error before the
patch (and 1ulp after) - there are probably cases which had larger
errors.)  The code in that implementation zeroed 45 bits of the low
double - but that is not enough for z*z to be exact.  Since addition
and multiplication of IBM long doubles are not always exact when the
low double is involved, the safest approach seemed to be to zero
enough bits for z*z+0.5625 to be exact as a double (which still leaves
enough nonzeroed bits for the algorithm to work), which this patch
does.

Tested and ulps updated for x86_64, i386, powerpc (on the basis that
other ulps files will be updated before 2.16 release, whether by
architecture maintainers or by others testing on those architectures).

2012-02-26  Joseph Myers  <joseph@codesourcery.com>

	[BZ #2541]
	[BZ #4108]
	* sysdeps/ieee754/flt-32/s_erff.c (__erfcf): Mask out one more bit
	before squaring exponent.
	* sysdeps/ieee754/ldbl-128ibm/s_erfl.c (__erfcl): Mask out whole
	bottom long double and 27 bits of top long double before squaring
	exponent.
	* math/libm-test.inc (erfc_test): Add more tests.
	* sysdeps/i386/fpu/libm-test-ulps: Update.
	* sysdeps/powerpc/fpu/libm-test-ulps: Likewise.
	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.

diff --git a/math/libm-test.inc b/math/libm-test.inc
index 1016753..5b254cd 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -2478,12 +2478,18 @@ erfc_test (void)
   TEST_f_f (erfc, 0.75L, 0.288844366346484868401062165408589223L);
   TEST_f_f (erfc, 1.25L, 0.0770998717435417698634765188027188596L);
   TEST_f_f (erfc, 2.0L, 0.00467773498104726583793074363274707139L);
+  TEST_f_f (erfc, 0x1.f7303cp+1L, 2.705500297238986897105236321218861842255e-8L);
   TEST_f_f (erfc, 4.125L, 0.542340079956506600531223408575531062e-8L);
+  TEST_f_f (erfc, 0x1.ffa002p+2L, 1.233585992097580296336099501489175967033e-29L);
+  TEST_f_f (erfc, 0x1.ffffc8p+2L, 1.122671365033056305522366683719541099329e-29L);
 #ifdef TEST_LDOUBLE
   /* The result can only be represented in long double.  */
 # if LDBL_MIN_10_EXP < -319
   TEST_f_f (erfc, 27.0L, 0.523704892378925568501606768284954709e-318L);
 # endif
+# if LDBL_MANT_DIG >= 106
+  TEST_f_f (erfc, 0x1.ffff56789abcdef0123456789a8p+2L, 1.123161416304655390092138725253789378459e-29L);
+# endif
 #endif
 
   END (erfc);
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index 83a68af..e687c1e 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -422,6 +422,17 @@ idouble: 1
 Test "erfc (0.75) == 0.288844366346484868401062165408589223":
 float: 1
 ifloat: 1
+Test "erfc (0x1.f7303cp+1) == 2.705500297238986897105236321218861842255e-8":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "erfc (0x1.ffa002p+2) == 1.233585992097580296336099501489175967033e-29":
+ildouble: 1
+ldouble: 1
+Test "erfc (0x1.ffffc8p+2) == 1.122671365033056305522366683719541099329e-29":
+double: 1
+idouble: 1
 Test "erfc (1.25) == 0.0770998717435417698634765188027188596":
 ildouble: 1
 ldouble: 1
diff --git a/sysdeps/ieee754/flt-32/s_erff.c b/sysdeps/ieee754/flt-32/s_erff.c
index 5766183..8a0610d 100644
--- a/sysdeps/ieee754/flt-32/s_erff.c
+++ b/sysdeps/ieee754/flt-32/s_erff.c
@@ -200,7 +200,7 @@ float __erfcf(float x)
 				sb5+s*(sb6+s*sb7))))));
 	    }
 	    GET_FLOAT_WORD(ix,x);
-	    SET_FLOAT_WORD(z,ix&0xfffff000);
+	    SET_FLOAT_WORD(z,ix&0xffffe000);
 	    r  =  __ieee754_expf(-z*z-(float)0.5625)*
 			__ieee754_expf((z-x)*(z+x)+R/S);
 	    if(hx>0) return r/x; else return two-r/x;
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_erfl.c b/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
index 85cdbe0..8868f7e 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
@@ -920,7 +920,8 @@ __erfcl (long double x)
 	}
       u.value = x;
       u.parts32.w3 = 0;
-      u.parts32.w2 &= 0xffffe000;
+      u.parts32.w2 = 0;
+      u.parts32.w1 &= 0xf8000000;
       z = u.value;
       r = __ieee754_expl (-z * z - 0.5625) *
 	__ieee754_expl ((z - x) * (z + x) + p);
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index 589bae1..00e994c 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -422,6 +422,15 @@ idouble: 1
 Test "erfc (0.75) == 0.288844366346484868401062165408589223":
 float: 1
 ifloat: 1
+Test "erfc (0x1.f7303cp+1) == 2.705500297238986897105236321218861842255e-8":
+double: 1
+idouble: 1
+Test "erfc (0x1.ffa002p+2) == 1.233585992097580296336099501489175967033e-29":
+float: 1
+ifloat: 1
+Test "erfc (0x1.ffff56789abcdef0123456789a8p+2) == 1.123161416304655390092138725253789378459e-29":
+ildouble: 1
+ldouble: 1
 Test "erfc (2.0) == 0.00467773498104726583793074363274707139":
 double: 1
 idouble: 1
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index a2a82e6..75b2922 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -467,6 +467,16 @@ double: 1
 idouble: 1
 
 # erfc
+Test "erfc (0x1.f7303cp+1) == 2.705500297238986897105236321218861842255e-8":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "erfc (0x1.ffa002p+2) == 1.233585992097580296336099501489175967033e-29":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
 Test "erfc (1.25) == 0.0770998717435417698634765188027188596":
 ildouble: 1
 ldouble: 1
@@ -1248,7 +1258,9 @@ idouble: 1
 
 Function: "erfc":
 double: 1
+float: 1
 idouble: 1
+ifloat: 1
 ildouble: 1
 ldouble: 1
 

-- 
Joseph S. Myers
joseph@codesourcery.com
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]