Fix spurious underflows from pow with results close to 1 (bug 14811).

author Joseph Myers <joseph@codesourcery.com>

Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)

committer Joseph Myers <joseph@codesourcery.com>

Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
author Joseph Myers <joseph@codesourcery.com>
Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
committer Joseph Myers <joseph@codesourcery.com>
Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
diff --git a/ChangeLog b/ChangeLog

index f811967cfe78b1802f5c38ff12ac8c3d876c7a8a..aeebbc462c3be4d17a41966dd905dbc0e0c777d8 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2012-11-07  Joseph Myers  <joseph@codesourcery.com>
+
+       [BZ #14811]
+       * sysdeps/i386/fpu/e_powl.S (pm79): New object.
+       (__ieee754_powl): Saturate nonzero exponents with absolute value
+       below 0x1p-79 to +/- 0x1p-79.
+       * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Saturate nonzero
+       exponents with absolute value below 0x1p-64 to +/- 0x1p-64.
+       * sysdeps/ieee754/flt-32/e_powf.c (__ieee754_powf): Saturate
+       nonzero exponents with absolute value below 0x1p-32 to +/-
+       0x1p-32.
+       * sysdeps/x86_64/fpu/e_powl.S (pm79): New object.
+       (__ieee754_powl): Saturate nonzero exponents with absolute value
+       below 0x1p-79 to +/- 0x1p-79.
+       * math/libm-test.inc (pow_test): Add more tests.
+
  2012-11-07  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
  
         * sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags): Sync
diff --git a/NEWS b/NEWS

index 331d21263fb6f94360e9747c1d11fa267395b67c..dbe5e3128182a2fbfbe277606d751e2b5b45b3d5 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -18,7 +18,7 @@ Version 2.17
    14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14562,
    14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14638,
    14645, 14648, 14652, 14660, 14661, 14669, 14683, 14694, 14716, 14743,
-  14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805.
+  14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805, 14811.
  
  * Support for STT_GNU_IFUNC symbols added for s390 and s390x.
    Optimized versions of memcpy, memset, and memcmp added for System z10 and
diff --git a/math/libm-test.inc b/math/libm-test.inc

index a52ce6aa2d985a9249417c07f36eb6de5f66131b..74488e7b6a8ae923864991f5ab7f74fd3cc457e2 100644 (file)
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -7743,10 +7743,12 @@ pow_test (void)
    TEST_ff_f (pow, plus_infty, 1e-7L, plus_infty);
    TEST_ff_f (pow, plus_infty, 1, plus_infty);
    TEST_ff_f (pow, plus_infty, 1e7L, plus_infty);
+  TEST_ff_f (pow, plus_infty, min_subnorm_value, plus_infty);
  
    TEST_ff_f (pow, plus_infty, -1e-7L, 0);
    TEST_ff_f (pow, plus_infty, -1, 0);
    TEST_ff_f (pow, plus_infty, -1e7L, 0);
+  TEST_ff_f (pow, plus_infty, -min_subnorm_value, 0);
  
    TEST_ff_f (pow, minus_infty, 1, minus_infty);
    TEST_ff_f (pow, minus_infty, 11, minus_infty);
@@ -7759,6 +7761,7 @@ pow_test (void)
    TEST_ff_f (pow, minus_infty, 1.1L, plus_infty);
    TEST_ff_f (pow, minus_infty, 11.1L, plus_infty);
    TEST_ff_f (pow, minus_infty, 1001.1L, plus_infty);
+  TEST_ff_f (pow, minus_infty, min_subnorm_value, plus_infty);
  
    TEST_ff_f (pow, minus_infty, -1, minus_zero);
    TEST_ff_f (pow, minus_infty, -11, minus_zero);
@@ -7771,6 +7774,7 @@ pow_test (void)
    TEST_ff_f (pow, minus_infty, -1.1L, 0);
    TEST_ff_f (pow, minus_infty, -11.1L, 0);
    TEST_ff_f (pow, minus_infty, -1001.1L, 0);
+  TEST_ff_f (pow, minus_infty, -min_subnorm_value, 0);
  #endif
  
    TEST_ff_f (pow, nan_value, nan_value, nan_value);
@@ -7793,6 +7797,8 @@ pow_test (void)
    TEST_ff_f (pow, nan_value, minus_infty, nan_value);
    TEST_ff_f (pow, nan_value, 2.5, nan_value);
    TEST_ff_f (pow, nan_value, -2.5, nan_value);
+  TEST_ff_f (pow, nan_value, min_subnorm_value, nan_value);
+  TEST_ff_f (pow, nan_value, -min_subnorm_value, nan_value);
  
    TEST_ff_f (pow, 1, plus_infty, 1);
    TEST_ff_f (pow, -1, plus_infty, 1);
@@ -7806,6 +7812,8 @@ pow_test (void)
    TEST_ff_f (pow, 1, 0x1p63L, 1);
    TEST_ff_f (pow, 1, 0x1p64L, 1);
    TEST_ff_f (pow, 1, 0x1p72L, 1);
+  TEST_ff_f (pow, 1, min_subnorm_value, 1);
+  TEST_ff_f (pow, 1, -min_subnorm_value, 1);
  
    /* pow (x, +-0) == 1.  */
    TEST_ff_f (pow, plus_infty, 0, 1);
@@ -7825,6 +7833,10 @@ pow_test (void)
    TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
    TEST_ff_f (pow, -10.1L, 1.1L, nan_value, INVALID_EXCEPTION);
    TEST_ff_f (pow, -10.1L, -1.1L, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
  
    errno = 0;
    TEST_ff_f (pow, 0, -1, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
@@ -7910,6 +7922,9 @@ pow_test (void)
    TEST_ff_f (pow, 0, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
    check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
    errno = 0;
+  TEST_ff_f (pow, 0, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
    TEST_ff_f (pow, 0, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
    check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
    errno = 0;
@@ -7925,6 +7940,9 @@ pow_test (void)
    TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
    check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
    errno = 0;
+  TEST_ff_f (pow, minus_zero, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
    TEST_ff_f (pow, minus_zero, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
    check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
    errno = 0;
@@ -8114,12 +8132,14 @@ pow_test (void)
    TEST_ff_f (pow, 0.0, 0x1p24, 0.0);
    TEST_ff_f (pow, 0.0, 0x1p127, 0.0);
    TEST_ff_f (pow, 0.0, max_value, 0.0);
+  TEST_ff_f (pow, 0.0, min_subnorm_value, 0.0);
  
    /* pow (-0, y) == +0 for y > 0 and not an odd integer.  */
    TEST_ff_f (pow, minus_zero, 4, 0.0);
    TEST_ff_f (pow, minus_zero, 0x1p24, 0.0);
    TEST_ff_f (pow, minus_zero, 0x1p127, 0.0);
    TEST_ff_f (pow, minus_zero, max_value, 0.0);
+  TEST_ff_f (pow, minus_zero, min_subnorm_value, 0.0);
  
    TEST_ff_f (pow, 16, 0.25L, 2);
    TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
@@ -8407,6 +8427,15 @@ pow_test (void)
    TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
  #endif
  
+  TEST_ff_f (pow, min_subnorm_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, min_subnorm_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, -min_subnorm_value, 1.0L);
+
    TEST_ff_f (pow, 2.0L, -100000.0L, plus_zero, UNDERFLOW_EXCEPTION);
  
    END (pow);
diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S

index 933418cf82aa111fed5736f9e7e694954e3be2b9..ac4842cf639f873d3c10fa25e4684601b4fc92e1 100644 (file)
--- a/sysdeps/i386/fpu/e_powl.S
+++ b/sysdeps/i386/fpu/e_powl.S
@@ -38,6 +38,9 @@ p64:  .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
         .type p78,@object
  p78:   .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
         ASM_SIZE_DIRECTIVE(p78)
+       .type pm79,@object
+pm79:  .byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+       ASM_SIZE_DIRECTIVE(pm79)
  
         .section .rodata.cst16,"aM",@progbits,16
  
@@ -120,9 +123,25 @@ ENTRY(__ieee754_powl)
         fucomp  %st(1)          // y : x
         fnstsw
         sahf
-       jne     3f
+       je      9f
  
-       /* OK, we have an integer value for y.  */
+       // If y has absolute value at most 0x1p-79, then any finite
+       // nonzero x will result in 1.  Saturate y to those bounds to
+       // avoid underflow in the calculation of y*log2(x).
+       fld     %st             // y : y : x
+       fabs                    // |y| : y : x
+       fcompl  MO(pm79)        // y : x
+       fnstsw
+       sahf
+       jnc     3f
+       fstp    %st(0)          // pop y
+       fldl    MO(pm79)        // 0x1p-79 : x
+       testb   $2, %dl
+       jnz     3f              // y > 0
+       fchs                    // -0x1p-79 : x
+       jmp     3f
+
+9:     /* OK, we have an integer value for y.  */
         popl    %eax
         cfi_adjust_cfa_offset (-4)
         popl    %edx
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c

index 3fd5e6507f05d9b4bdcc2c9fa6c75fddea94d9ff..513171891c9826566fda77ce50b0a2d0a6b247d4 100644 (file)
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -90,6 +90,10 @@ __ieee754_pow(double x, double y) {
  
      SET_RESTORE_ROUND (FE_TONEAREST);
  
+    /* Avoid internal underflow for tiny y.  The exact value of y does
+       not matter if |y| <= 2**-64.  */
+    if (ABS (y) < 0x1p-64)
+      y = y < 0 ? -0x1p-64 : 0x1p-64;
      z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
      t = y*134217729.0;
      y1 = t - (t-y);
diff --git a/sysdeps/ieee754/flt-32/e_powf.c b/sysdeps/ieee754/flt-32/e_powf.c

index 43069479a5934437b028db165519408cf72f7828..12c408f93c6c7b2326430968cc10ee42f504690c 100644 (file)
--- a/sysdeps/ieee754/flt-32/e_powf.c
+++ b/sysdeps/ieee754/flt-32/e_powf.c
@@ -141,6 +141,10 @@ __ieee754_powf(float x, float y)
             t2 = v-(t1-u);
         } else {
             float s2,s_h,s_l,t_h,t_l;
+           /* Avoid internal underflow for tiny y.  The exact value
+              of y does not matter if |y| <= 2**-32.  */
+           if (iy < 0x2f800000)
+             SET_FLOAT_WORD (y, (hy & 0x80000000) | 0x2f800000);
             n = 0;
         /* take care subnormal number */
             if(ix<0x00800000)
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S

index 4fe23c06af87a57c6a60f8b5a69f4c0f112eb77d..1b3718522d53ea2b03a26737b8a2f9579bc920ba 100644 (file)
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -38,6 +38,9 @@ p64:  .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
         .type p78,@object
  p78:   .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
         ASM_SIZE_DIRECTIVE(p78)
+       .type pm79,@object
+pm79:  .byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+       ASM_SIZE_DIRECTIVE(pm79)
  
         .section .rodata.cst16,"aM",@progbits,16
  
@@ -110,9 +113,25 @@ ENTRY(__ieee754_powl)
         fistpll -8(%rsp)        // y : x
         fildll  -8(%rsp)        // int(y) : y : x
         fucomip %st(1),%st      // y : x
-       jne     3f
+       je      9f
+
+       // If y has absolute value at most 0x1p-79, then any finite
+       // nonzero x will result in 1.  Saturate y to those bounds to
+       // avoid underflow in the calculation of y*log2(x).
+       fldl    MO(pm79)        // 0x1p-79 : y : x
+       fld     %st(1)          // y : 0x1p-79 : y : x
+       fabs                    // |y| : 0x1p-79 : y : x
+       fcomip  %st(1), %st     // 0x1p-79 : y : x
+       fstp    %st(0)          // y : x
+       jnc     3f
+       fstp    %st(0)          // pop y
+       fldl    MO(pm79)        // 0x1p-79 : x
+       testb   $2, %dl
+       jnz     3f              // y > 0
+       fchs                    // -0x1p-79 : x
+       jmp     3f
  
-       /* OK, we have an integer value for y.  */
+9:     /* OK, we have an integer value for y.  */
         mov     -8(%rsp),%eax
         mov     -4(%rsp),%edx
         orl     $0, %edx
author	Joseph Myers <joseph@codesourcery.com>
	Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
committer	Joseph Myers <joseph@codesourcery.com>
	Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
ChangeLog		patch \| blob \| blame \| history
NEWS		patch \| blob \| blame \| history
math/libm-test.inc		patch \| blob \| blame \| history
sysdeps/i386/fpu/e_powl.S		patch \| blob \| blame \| history
sysdeps/ieee754/dbl-64/e_pow.c		patch \| blob \| blame \| history
sysdeps/ieee754/flt-32/e_powf.c		patch \| blob \| blame \| history
sysdeps/x86_64/fpu/e_powl.S		patch \| blob \| blame \| history