Fix spurious underflows from pow with results close to 1 (bug 14811).

author: Joseph Myers <joseph@codesourcery.com> 2012-11-07 13:03:31 +0000
committer: Joseph Myers <joseph@codesourcery.com> 2012-11-07 13:03:31 +0000
commit: 60e235ee2ae834bb9f7a884f1b192304b9fdcf33 (patch)
tree: 4053809680e9b6def9eab8272fc70ac6c6edb16c
parent: 0ab234b7db4991121eb572bf5c4971bfeb2d49a2 (diff)
download: glibc-60e235ee2ae834bb9f7a884f1b192304b9fdcf33.tar
glibc-60e235ee2ae834bb9f7a884f1b192304b9fdcf33.tar.gz
glibc-60e235ee2ae834bb9f7a884f1b192304b9fdcf33.tar.bz2
glibc-60e235ee2ae834bb9f7a884f1b192304b9fdcf33.zip
7 files changed, 96 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index f811967cfe..aeebbc462c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2012-11-07  Joseph Myers  <joseph@codesourcery.com>
+
+	[BZ #14811]
+	* sysdeps/i386/fpu/e_powl.S (pm79): New object.
+	(__ieee754_powl): Saturate nonzero exponents with absolute value
+	below 0x1p-79 to +/- 0x1p-79.
+	* sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Saturate nonzero
+	exponents with absolute value below 0x1p-64 to +/- 0x1p-64.
+	* sysdeps/ieee754/flt-32/e_powf.c (__ieee754_powf): Saturate
+	nonzero exponents with absolute value below 0x1p-32 to +/-
+	0x1p-32.
+	* sysdeps/x86_64/fpu/e_powl.S (pm79): New object.
+	(__ieee754_powl): Saturate nonzero exponents with absolute value
+	below 0x1p-79 to +/- 0x1p-79.
+	* math/libm-test.inc (pow_test): Add more tests.
+
 2012-11-07  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
 
 	* sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags): Sync
diff --git a/NEWS b/NEWS
index 331d21263f..dbe5e31281 100644
--- a/NEWS
+++ b/NEWS
@@ -18,7 +18,7 @@ Version 2.17
   14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14562,
   14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14638,
   14645, 14648, 14652, 14660, 14661, 14669, 14683, 14694, 14716, 14743,
-  14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805.
+  14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805, 14811.
 
 * Support for STT_GNU_IFUNC symbols added for s390 and s390x.
   Optimized versions of memcpy, memset, and memcmp added for System z10 and
diff --git a/math/libm-test.inc b/math/libm-test.inc
index a52ce6aa2d..74488e7b6a 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -7743,10 +7743,12 @@ pow_test (void)
   TEST_ff_f (pow, plus_infty, 1e-7L, plus_infty);
   TEST_ff_f (pow, plus_infty, 1, plus_infty);
   TEST_ff_f (pow, plus_infty, 1e7L, plus_infty);
+  TEST_ff_f (pow, plus_infty, min_subnorm_value, plus_infty);
 
   TEST_ff_f (pow, plus_infty, -1e-7L, 0);
   TEST_ff_f (pow, plus_infty, -1, 0);
   TEST_ff_f (pow, plus_infty, -1e7L, 0);
+  TEST_ff_f (pow, plus_infty, -min_subnorm_value, 0);
 
   TEST_ff_f (pow, minus_infty, 1, minus_infty);
   TEST_ff_f (pow, minus_infty, 11, minus_infty);
@@ -7759,6 +7761,7 @@ pow_test (void)
   TEST_ff_f (pow, minus_infty, 1.1L, plus_infty);
   TEST_ff_f (pow, minus_infty, 11.1L, plus_infty);
   TEST_ff_f (pow, minus_infty, 1001.1L, plus_infty);
+  TEST_ff_f (pow, minus_infty, min_subnorm_value, plus_infty);
 
   TEST_ff_f (pow, minus_infty, -1, minus_zero);
   TEST_ff_f (pow, minus_infty, -11, minus_zero);
@@ -7771,6 +7774,7 @@ pow_test (void)
   TEST_ff_f (pow, minus_infty, -1.1L, 0);
   TEST_ff_f (pow, minus_infty, -11.1L, 0);
   TEST_ff_f (pow, minus_infty, -1001.1L, 0);
+  TEST_ff_f (pow, minus_infty, -min_subnorm_value, 0);
 #endif
 
   TEST_ff_f (pow, nan_value, nan_value, nan_value);
@@ -7793,6 +7797,8 @@ pow_test (void)
   TEST_ff_f (pow, nan_value, minus_infty, nan_value);
   TEST_ff_f (pow, nan_value, 2.5, nan_value);
   TEST_ff_f (pow, nan_value, -2.5, nan_value);
+  TEST_ff_f (pow, nan_value, min_subnorm_value, nan_value);
+  TEST_ff_f (pow, nan_value, -min_subnorm_value, nan_value);
 
   TEST_ff_f (pow, 1, plus_infty, 1);
   TEST_ff_f (pow, -1, plus_infty, 1);
@@ -7806,6 +7812,8 @@ pow_test (void)
   TEST_ff_f (pow, 1, 0x1p63L, 1);
   TEST_ff_f (pow, 1, 0x1p64L, 1);
   TEST_ff_f (pow, 1, 0x1p72L, 1);
+  TEST_ff_f (pow, 1, min_subnorm_value, 1);
+  TEST_ff_f (pow, 1, -min_subnorm_value, 1);
 
   /* pow (x, +-0) == 1.  */
   TEST_ff_f (pow, plus_infty, 0, 1);
@@ -7825,6 +7833,10 @@ pow_test (void)
   TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
   TEST_ff_f (pow, -10.1L, 1.1L, nan_value, INVALID_EXCEPTION);
   TEST_ff_f (pow, -10.1L, -1.1L, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
 
   errno = 0;
   TEST_ff_f (pow, 0, -1, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
@@ -7910,6 +7922,9 @@ pow_test (void)
   TEST_ff_f (pow, 0, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
   errno = 0;
+  TEST_ff_f (pow, 0, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
   TEST_ff_f (pow, 0, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
   errno = 0;
@@ -7925,6 +7940,9 @@ pow_test (void)
   TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
   errno = 0;
+  TEST_ff_f (pow, minus_zero, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
   TEST_ff_f (pow, minus_zero, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
   errno = 0;
@@ -8114,12 +8132,14 @@ pow_test (void)
   TEST_ff_f (pow, 0.0, 0x1p24, 0.0);
   TEST_ff_f (pow, 0.0, 0x1p127, 0.0);
   TEST_ff_f (pow, 0.0, max_value, 0.0);
+  TEST_ff_f (pow, 0.0, min_subnorm_value, 0.0);
 
   /* pow (-0, y) == +0 for y > 0 and not an odd integer.  */
   TEST_ff_f (pow, minus_zero, 4, 0.0);
   TEST_ff_f (pow, minus_zero, 0x1p24, 0.0);
   TEST_ff_f (pow, minus_zero, 0x1p127, 0.0);
   TEST_ff_f (pow, minus_zero, max_value, 0.0);
+  TEST_ff_f (pow, minus_zero, min_subnorm_value, 0.0);
 
   TEST_ff_f (pow, 16, 0.25L, 2);
   TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
@@ -8407,6 +8427,15 @@ pow_test (void)
   TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
 #endif
 
+  TEST_ff_f (pow, min_subnorm_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, min_subnorm_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, -min_subnorm_value, 1.0L);
+
   TEST_ff_f (pow, 2.0L, -100000.0L, plus_zero, UNDERFLOW_EXCEPTION);
 
   END (pow);
diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S
index 933418cf82..ac4842cf63 100644
--- a/sysdeps/i386/fpu/e_powl.S
+++ b/sysdeps/i386/fpu/e_powl.S
@@ -38,6 +38,9 @@ p64:	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
 	.type p78,@object
 p78:	.byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
 	ASM_SIZE_DIRECTIVE(p78)
+	.type pm79,@object
+pm79:	.byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+	ASM_SIZE_DIRECTIVE(pm79)
 
 	.section .rodata.cst16,"aM",@progbits,16
 
@@ -120,9 +123,25 @@ ENTRY(__ieee754_powl)
 	fucomp	%st(1)		// y : x
 	fnstsw
 	sahf
-	jne	3f
+	je	9f
 
-	/* OK, we have an integer value for y.  */
+	// If y has absolute value at most 0x1p-79, then any finite
+	// nonzero x will result in 1.  Saturate y to those bounds to
+	// avoid underflow in the calculation of y*log2(x).
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(pm79)	// y : x
+	fnstsw
+	sahf
+	jnc	3f
+	fstp	%st(0)		// pop y
+	fldl	MO(pm79)	// 0x1p-79 : x
+	testb	$2, %dl
+	jnz	3f		// y > 0
+	fchs			// -0x1p-79 : x
+	jmp	3f
+
+9:	/* OK, we have an integer value for y.  */
 	popl	%eax
 	cfi_adjust_cfa_offset (-4)
 	popl	%edx
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index 3fd5e6507f..513171891c 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -90,6 +90,10 @@ __ieee754_pow(double x, double y) {
 
     SET_RESTORE_ROUND (FE_TONEAREST);
 
+    /* Avoid internal underflow for tiny y.  The exact value of y does
+       not matter if |y| <= 2**-64.  */
+    if (ABS (y) < 0x1p-64)
+      y = y < 0 ? -0x1p-64 : 0x1p-64;
     z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
     t = y*134217729.0;
     y1 = t - (t-y);
diff --git a/sysdeps/ieee754/flt-32/e_powf.c b/sysdeps/ieee754/flt-32/e_powf.c
index 43069479a5..12c408f93c 100644
--- a/sysdeps/ieee754/flt-32/e_powf.c
+++ b/sysdeps/ieee754/flt-32/e_powf.c
@@ -141,6 +141,10 @@ __ieee754_powf(float x, float y)
 	    t2 = v-(t1-u);
 	} else {
 	    float s2,s_h,s_l,t_h,t_l;
+	    /* Avoid internal underflow for tiny y.  The exact value
+	       of y does not matter if |y| <= 2**-32.  */
+	    if (iy < 0x2f800000)
+	      SET_FLOAT_WORD (y, (hy & 0x80000000) | 0x2f800000);
 	    n = 0;
 	/* take care subnormal number */
 	    if(ix<0x00800000)
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index 4fe23c06af..1b3718522d 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -38,6 +38,9 @@ p64:	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
 	.type p78,@object
 p78:	.byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
 	ASM_SIZE_DIRECTIVE(p78)
+	.type pm79,@object
+pm79:	.byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+	ASM_SIZE_DIRECTIVE(pm79)
 
 	.section .rodata.cst16,"aM",@progbits,16
 
@@ -110,9 +113,25 @@ ENTRY(__ieee754_powl)
 	fistpll	-8(%rsp)	// y : x
 	fildll	-8(%rsp)	// int(y) : y : x
 	fucomip	%st(1),%st	// y : x
-	jne	3f
+	je	9f
+
+	// If y has absolute value at most 0x1p-79, then any finite
+	// nonzero x will result in 1.  Saturate y to those bounds to
+	// avoid underflow in the calculation of y*log2(x).
+	fldl	MO(pm79)	// 0x1p-79 : y : x
+	fld	%st(1)		// y : 0x1p-79 : y : x
+	fabs			// |y| : 0x1p-79 : y : x
+	fcomip	%st(1), %st	// 0x1p-79 : y : x
+	fstp	%st(0)		// y : x
+	jnc	3f
+	fstp	%st(0)		// pop y
+	fldl	MO(pm79)	// 0x1p-79 : x
+	testb	$2, %dl
+	jnz	3f		// y > 0
+	fchs			// -0x1p-79 : x
+	jmp	3f
 
-	/* OK, we have an integer value for y.  */
+9:	/* OK, we have an integer value for y.  */
 	mov	-8(%rsp),%eax
 	mov	-4(%rsp),%edx
 	orl	$0, %edx
author	Joseph Myers <joseph@codesourcery.com>	2012-11-07 13:03:31 +0000
committer	Joseph Myers <joseph@codesourcery.com>	2012-11-07 13:03:31 +0000
commit	60e235ee2ae834bb9f7a884f1b192304b9fdcf33 (patch)
tree	4053809680e9b6def9eab8272fc70ac6c6edb16c
parent	0ab234b7db4991121eb572bf5c4971bfeb2d49a2 (diff)
download	glibc-60e235ee2ae834bb9f7a884f1b192304b9fdcf33.tar glibc-60e235ee2ae834bb9f7a884f1b192304b9fdcf33.tar.gz glibc-60e235ee2ae834bb9f7a884f1b192304b9fdcf33.tar.bz2 glibc-60e235ee2ae834bb9f7a884f1b192304b9fdcf33.zip