This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix fma missing underflows and bad results for some subnormal results(bugs 14152, 14783)


As noted in bug 14783, various fma implementations try to replicate
round-to-nearest rounding in the case of a result (rounded toward
zero) that is a subnormal value with the most significant mantissa bit
set.  This yields both missing underflow exceptions (bug 14152) and
incorrect results in other rounding modes (bug 14783).

This patch fixes this by changing the strategy (for dealing with the
issue of shifting the adjusted result only shifting out a single
mantissa bit, so being unable to merge in a sticky bit in the usual
way) to one of extracting the low order two bits of the value to be
shifted and putting those in a separate floating-point number together
with the sticky bit, then shifting the two numbers separately and
adding them.  This both ensures an underflow exception (the sticky bit
is always 1 in the problem case, so the underflow when the three-bit
value is shifted is inexact and results in such an exception) and
ensures results are correct for the current rounding direction.

Note that this leaves corner cases where, on after-rounding machines,
there may be an underflow exception that should only occur on
before-rounding machines.  I propose to fix this separately (using
tininess.h from my strtod patch
<http://sourceware.org/ml/libc-alpha/2012-10/msg00815.html> to provide
the relevant information to the implementation and the testcases) to
keep the individual patches more straightforward.  Note also that
there are other fma bugs (14784, 14785) with spurious underflows and
incorrect results in certain rounding modes, in the part of the
function that uses x * y + z "if x * y is less than half of
DBL_DENORM_MIN" (actually, the logic appears to mean this case is only
active if x * y is less than 1/8 of DBL_DENORM_MIN); again, I propose
to fix those separately.

Tested x86_64 (both multi-arch enabled, and multi-arch disabled) and
x86.  Could someone test on sparc or s390 to confirm the ldbl-128
changes work (there should be no ulps for any fma test)?

2012-10-30  Joseph Myers  <joseph@codesourcery.com>

	[BZ #14152]
	[BZ #14783]
	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Extract low bits of
	result and shift together with sticky bit instead of replicating
	round-to-nearest rounding.
	* sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
	* sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
	* math/libm-test.inc (fma_test): Add more tests.  Do not permit
	missing underflow exceptions.
	(fma_test_towardzero): Add more tests.
	(fma_test_downward): Likewise.
	(fma_test_upward): Likewise.

diff --git a/math/libm-test.inc b/math/libm-test.inc
index e828fc2..1e067fe 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -4616,6 +4616,8 @@ fma_test (void)
   TEST_fff_f (fma, 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115);
   TEST_fff_f (fma, -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127);
   TEST_fff_f (fma, 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127);
+  TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
 #endif
 #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
   TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13);
@@ -4635,10 +4637,11 @@ fma_test (void)
   TEST_fff_f (fma, 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022, UNDERFLOW_EXCEPTION);
-  /* Sometimes the FE_UNDERFLOW is not set, so be prepared.  See Bug 14152.  */
-  TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION_OK);
+  TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022, UNDERFLOW_EXCEPTION);
   TEST_fff_f (fma, -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983);
+  TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
 #endif
 #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
   TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L);
@@ -4646,8 +4649,9 @@ fma_test (void)
   TEST_fff_f (fma, 0xc.7fc000003ffffffp-1194L, 0x8.1e0003fffffffffp+15327L, -0x8.fffep+14072L, 0xc.ae9f164020effffp+14136L);
   TEST_fff_f (fma, -0x8.0001fc000000003p+1798L, 0xcp-2230L, 0x8.f7e000000000007p-468L, -0xc.0002f9ffee10404p-429L);
   TEST_fff_f (fma, 0xc.0000000000007ffp+10130L, -0x8.000000000000001p+4430L, 0xc.07000000001ffffp+14513L, -0xb.fffffffffffd7e4p+14563L);
-  /* Bug 14152: underflow exception may be missing.  */
-  TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION_OK);
+  TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
 #endif
 #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
   TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L);
@@ -4663,6 +4667,8 @@ fma_test (void)
   TEST_fff_f (fma, 0x1.00000000000007ffffffffffffffp-9045L, -0x1.ffffffffffff80000001ffffffffp+4773L, -0x1.f8p-4316L, -0x1.00000000000f88000000fffffdffp-4271L);
   TEST_fff_f (fma, 0x1.4e922764c90701d4a2f21d01893dp-8683L, -0x1.955a12e2d7c9447c27fa022fc865p+212L, -0x1.e9634462eaef96528b90b6944578p-8521L, -0x1.08e1783184a371943d3598e10865p-8470L);
   TEST_fff_f (fma, 0x1.801181509c03bdbef10d6165588cp-15131L, 0x1.ad86f8e57d3d40bfa8007780af63p-368L, -0x1.6e9df0dab1c9f1d7a6043c390741p-15507L, 0x1.417c9b2b15e2ad57dc9e0e920844p-15498L);
+  TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+  TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
 #endif
 
   END (fma);
@@ -4717,6 +4723,23 @@ fma_test_towardzero (void)
       TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+      TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+      TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+#endif
     }
 
   fesetround (save_round_mode);
@@ -4773,6 +4796,23 @@ fma_test_downward (void)
       TEST_fff_f (fma, -min_value, min_value, minus_zero, -min_subnorm_value, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+      TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00008p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+      TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
     }
 
   fesetround (save_round_mode);
@@ -4829,6 +4869,23 @@ fma_test_upward (void)
       TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, plus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
       TEST_fff_f (fma, -min_value, -min_value, minus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+      TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00008p-127, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+      TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+      TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+      TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+#endif
     }
 
   fesetround (save_round_mode);
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 5e21461..108fd66 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -210,20 +210,14 @@ __fma (double x, double y, double z)
 	{
 	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
-	     bit.  In round-to-nearest 001 rounds down like 00,
-	     011 rounds up, even though 01 rounds down (thus we need
-	     to adjust), 101 rounds down like 10 and 111 rounds up
-	     like 11.  */
-	  if ((v.ieee.mantissa1 & 3) == 1)
-	    {
-	      v.d *= 0x1p-106;
-	      if (v.ieee.negative)
-		return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
-	      else
-		return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
-	    }
-	  else
-	    return v.d * 0x1p-106;
+	     bit.  */
+	  w.d = 0.0;
+	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
+	  w.ieee.negative = v.ieee.negative;
+	  v.ieee.mantissa1 &= ~3U;
+	  v.d *= 0x1p-106;
+	  w.d *= 0x1p-2;
+	  return v.d + w.d;
 	}
       v.ieee.mantissa1 |= j;
       return v.d * 0x1p-106;
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index 46b3d81..f26eaad 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z)
 	{
 	  /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
 	     v.ieee.mantissa3 & 1 is the round bit and j is our sticky
-	     bit.  In round-to-nearest 001 rounds down like 00,
-	     011 rounds up, even though 01 rounds down (thus we need
-	     to adjust), 101 rounds down like 10 and 111 rounds up
-	     like 11.  */
-	  if ((v.ieee.mantissa3 & 3) == 1)
-	    {
-	      v.d *= 0x1p-226L;
-	      if (v.ieee.negative)
-		return v.d - 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
-	      else
-		return v.d + 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
-	    }
-	  else
-	    return v.d * 0x1p-226L;
+	     bit.  */
+	  w.d = 0.0L;
+	  w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
+	  w.ieee.negative = v.ieee.negative;
+	  v.ieee.mantissa3 &= ~3U;
+	  v.d *= 0x1p-226L;
+	  w.d *= 0x1p-2L;
+	  return v.d + w.d;
 	}
       v.ieee.mantissa3 |= j;
       return v.d * 0x1p-226L;
diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
index d125124..7310491 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z)
 	{
 	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
-	     bit.  In round-to-nearest 001 rounds down like 00,
-	     011 rounds up, even though 01 rounds down (thus we need
-	     to adjust), 101 rounds down like 10 and 111 rounds up
-	     like 11.  */
-	  if ((v.ieee.mantissa1 & 3) == 1)
-	    {
-	      v.d *= 0x1p-128L;
-	      if (v.ieee.negative)
-		return v.d - 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
-	      else
-		return v.d + 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
-	    }
-	  else
-	    return v.d * 0x1p-128L;
+	     bit.  */
+	  w.d = 0.0L;
+	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
+	  w.ieee.negative = v.ieee.negative;
+	  v.ieee.mantissa1 &= ~3U;
+	  v.d *= 0x1p-128L;
+	  w.d *= 0x1p-2L;
+	  return v.d + w.d;
 	}
       v.ieee.mantissa1 |= j;
       return v.d * 0x1p-128L;

-- 
Joseph S. Myers
joseph@codesourcery.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]