diff options
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/i386/fpu/e_powl.S | 23 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/e_pow.c | 4 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/e_powf.c | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_powl.S | 23 |
4 files changed, 50 insertions, 4 deletions
diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S index 933418cf82..ac4842cf63 100644 --- a/sysdeps/i386/fpu/e_powl.S +++ b/sysdeps/i386/fpu/e_powl.S @@ -38,6 +38,9 @@ p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 .type p78,@object p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44 ASM_SIZE_DIRECTIVE(p78) + .type pm79,@object +pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b + ASM_SIZE_DIRECTIVE(pm79) .section .rodata.cst16,"aM",@progbits,16 @@ -120,9 +123,25 @@ ENTRY(__ieee754_powl) fucomp %st(1) // y : x fnstsw sahf - jne 3f + je 9f - /* OK, we have an integer value for y. */ + // If y has absolute value at most 0x1p-79, then any finite + // nonzero x will result in 1. Saturate y to those bounds to + // avoid underflow in the calculation of y*log2(x). + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(pm79) // y : x + fnstsw + sahf + jnc 3f + fstp %st(0) // pop y + fldl MO(pm79) // 0x1p-79 : x + testb $2, %dl + jnz 3f // y > 0 + fchs // -0x1p-79 : x + jmp 3f + +9: /* OK, we have an integer value for y. */ popl %eax cfi_adjust_cfa_offset (-4) popl %edx diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c index 3fd5e6507f..513171891c 100644 --- a/sysdeps/ieee754/dbl-64/e_pow.c +++ b/sysdeps/ieee754/dbl-64/e_pow.c @@ -90,6 +90,10 @@ __ieee754_pow(double x, double y) { SET_RESTORE_ROUND (FE_TONEAREST); + /* Avoid internal underflow for tiny y. The exact value of y does + not matter if |y| <= 2**-64. */ + if (ABS (y) < 0x1p-64) + y = y < 0 ? -0x1p-64 : 0x1p-64; z = log1(x,&aa,&error); /* x^y =e^(y log (X)) */ t = y*134217729.0; y1 = t - (t-y); diff --git a/sysdeps/ieee754/flt-32/e_powf.c b/sysdeps/ieee754/flt-32/e_powf.c index 43069479a5..12c408f93c 100644 --- a/sysdeps/ieee754/flt-32/e_powf.c +++ b/sysdeps/ieee754/flt-32/e_powf.c @@ -141,6 +141,10 @@ __ieee754_powf(float x, float y) t2 = v-(t1-u); } else { float s2,s_h,s_l,t_h,t_l; + /* Avoid internal underflow for tiny y. The exact value + of y does not matter if |y| <= 2**-32. */ + if (iy < 0x2f800000) + SET_FLOAT_WORD (y, (hy & 0x80000000) | 0x2f800000); n = 0; /* take care subnormal number */ if(ix<0x00800000) diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S index 4fe23c06af..1b3718522d 100644 --- a/sysdeps/x86_64/fpu/e_powl.S +++ b/sysdeps/x86_64/fpu/e_powl.S @@ -38,6 +38,9 @@ p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 .type p78,@object p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44 ASM_SIZE_DIRECTIVE(p78) + .type pm79,@object +pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b + ASM_SIZE_DIRECTIVE(pm79) .section .rodata.cst16,"aM",@progbits,16 @@ -110,9 +113,25 @@ ENTRY(__ieee754_powl) fistpll -8(%rsp) // y : x fildll -8(%rsp) // int(y) : y : x fucomip %st(1),%st // y : x - jne 3f + je 9f + + // If y has absolute value at most 0x1p-79, then any finite + // nonzero x will result in 1. Saturate y to those bounds to + // avoid underflow in the calculation of y*log2(x). + fldl MO(pm79) // 0x1p-79 : y : x + fld %st(1) // y : 0x1p-79 : y : x + fabs // |y| : 0x1p-79 : y : x + fcomip %st(1), %st // 0x1p-79 : y : x + fstp %st(0) // y : x + jnc 3f + fstp %st(0) // pop y + fldl MO(pm79) // 0x1p-79 : x + testb $2, %dl + jnz 3f // y > 0 + fchs // -0x1p-79 : x + jmp 3f - /* OK, we have an integer value for y. */ +9: /* OK, we have an integer value for y. */ mov -8(%rsp),%eax mov -4(%rsp),%edx orl $0, %edx |