Fix x86 pow inaccuracy for large integer exponents (bug 706).

author: Joseph Myers <joseph@codesourcery.com> 2012-04-09 09:42:05 +0000
committer: Joseph Myers <joseph@codesourcery.com> 2012-04-09 09:42:05 +0000
commit: c483f6b4a4277bc209820efc1ae35d976af57b4e (patch)
tree: 501d3025d498d3e27308f32db0838e19c13f826e /sysdeps
parent: d2de7579f257386ba5c28dfca94fa8aef143b4e0 (diff)
download: glibc-c483f6b4a4277bc209820efc1ae35d976af57b4e.tar
glibc-c483f6b4a4277bc209820efc1ae35d976af57b4e.tar.gz
glibc-c483f6b4a4277bc209820efc1ae35d976af57b4e.tar.bz2
glibc-c483f6b4a4277bc209820efc1ae35d976af57b4e.zip
2 files changed, 64 insertions, 4 deletions
diff --git a/sysdeps/i386/fpu/e_pow.S b/sysdeps/i386/fpu/e_pow.S
index b61a946082..73d2421162 100644
--- a/sysdeps/i386/fpu/e_pow.S
+++ b/sysdeps/i386/fpu/e_pow.S
@@ -32,6 +32,9 @@ limit:	.double 0.29
 	ASM_TYPE_DIRECTIVE(p63,@object)
 p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
 	ASM_SIZE_DIRECTIVE(p63)
+	ASM_TYPE_DIRECTIVE(p10,@object)
+p10:	.byte 0, 0, 0, 0, 0, 0, 0x90, 0x40
+	ASM_SIZE_DIRECTIVE(p10)
 
 	.section .rodata.cst16,"aM",@progbits,16
 
@@ -116,7 +119,15 @@ ENTRY(__ieee754_pow)
 	sahf
 	jne	3f
 
-	/* OK, we have an integer value for y.  */
+	/* OK, we have an integer value for y.  If large enough that
+	   errors may propagate out of the 11 bits excess precision, use
+	   the algorithm for real exponent instead.  */
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p10)		// y : x
+	fnstsw
+	sahf
+	jnc	2f
 	popl	%eax
 	cfi_adjust_cfa_offset (-4)
 	popl	%edx
@@ -157,7 +168,9 @@ ENTRY(__ieee754_pow)
 
 	cfi_adjust_cfa_offset (8)
 	.align ALIGNARG(4)
-2:	/* y is a large integer (so even).  */
+2:	// y is a large integer (absolute value at least 1L<<10), but
+	// may be odd unless at least 1L<<64.  So it may be necessary
+	// to adjust the sign of a negative result afterwards.
 	fxch			// x : y
 	fabs			// |x| : y
 	fxch			// y : x
@@ -187,9 +200,41 @@ ENTRY(__ieee754_pow)
 	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
 	faddl	MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
 	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
-	addl	$8, %esp
-	cfi_adjust_cfa_offset (-8)
 	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
+	testb	$2, %dh
+	jz	292f
+	// x is negative.  If y is an odd integer, negate the result.
+	fldl	20(%esp)	// y : abs(result)
+	fld	%st		// y : y : abs(result)
+	fabs			// |y| : y : abs(result)
+	fcompl	MO(p63)		// y : abs(result)
+	fnstsw
+	sahf
+	jnc	291f
+
+	// We must find out whether y is an odd integer.
+	fld	%st		// y : y : abs(result)
+	fistpll	(%esp)		// y : abs(result)
+	fildll	(%esp)		// int(y) : y : abs(result)
+	fucompp			// abs(result)
+	fnstsw
+	sahf
+	jne	292f
+
+	// OK, the value is an integer, but is it odd?
+	popl	%eax
+	cfi_adjust_cfa_offset (-4)
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	andb	$1, %al
+	jz	290f		// jump if not odd
+	// It's an odd integer.
+	fchs
+290:	ret
+	cfi_adjust_cfa_offset (8)
+291:	fstp	%st(0)		// abs(result)
+292:	addl	$8, %esp
+	cfi_adjust_cfa_offset (-8)
 	ret
 
 
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index d43955aff8..dce60cfb5b 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1444,6 +1444,17 @@ Test "log1p (-0.25) == -0.287682072451780927439219005993827432":
 float: 1
 ifloat: 1
 
+# pow
+Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416":
+float: 1
+ifloat: 1
+Test "pow (0x0.ffffffp0, 0x1p24) == 0.3678794302077803437135155590023422899744":
+float: 1
+ifloat: 1
+Test "pow (0x1.000002p0, 0x1p24) == 7.3890552180866447284268641248075832310141":
+float: 1
+ifloat: 1
+
 # pow_downward
 Test "pow_downward (1.0625, 1.125) == 1.070582293028761362162622578677070098674":
 ildouble: 1
@@ -2413,6 +2424,10 @@ Function: "log1p":
 float: 1
 ifloat: 1
 
+Function: "pow":
+float: 1
+ifloat: 1
+
 Function: "pow_downward":
 float: 1
 ifloat: 1
author	Joseph Myers <joseph@codesourcery.com>	2012-04-09 09:42:05 +0000
committer	Joseph Myers <joseph@codesourcery.com>	2012-04-09 09:42:05 +0000
commit	c483f6b4a4277bc209820efc1ae35d976af57b4e (patch)
tree	501d3025d498d3e27308f32db0838e19c13f826e /sysdeps
parent	d2de7579f257386ba5c28dfca94fa8aef143b4e0 (diff)
download	glibc-c483f6b4a4277bc209820efc1ae35d976af57b4e.tar glibc-c483f6b4a4277bc209820efc1ae35d976af57b4e.tar.gz glibc-c483f6b4a4277bc209820efc1ae35d976af57b4e.tar.bz2 glibc-c483f6b4a4277bc209820efc1ae35d976af57b4e.zip