diff options
Diffstat (limited to 'sysdeps/libm-ieee754/e_exp.c')
-rw-r--r-- | sysdeps/libm-ieee754/e_exp.c | 330 |
1 files changed, 171 insertions, 159 deletions
diff --git a/sysdeps/libm-ieee754/e_exp.c b/sysdeps/libm-ieee754/e_exp.c index 9eba853c8f..a6d53eb9df 100644 --- a/sysdeps/libm-ieee754/e_exp.c +++ b/sysdeps/libm-ieee754/e_exp.c @@ -1,167 +1,179 @@ -/* @(#)e_exp.c 5.1 93/09/24 */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: e_exp.c,v 1.8 1995/05/10 20:45:03 jtc Exp $"; -#endif +/* Double-precision floating point e^x. + Copyright (C) 1997, 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Geoffrey Keating <geoffk@ozemail.com.au> -/* __ieee754_exp(x) - * Returns the exponential of x. - * - * Method - * 1. Argument reduction: - * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. - * Given x, find r and integer k such that - * - * x = k*ln2 + r, |r| <= 0.5*ln2. - * - * Here r will be represented as r = hi-lo for better - * accuracy. - * - * 2. Approximation of exp(r) by a special rational function on - * the interval [0,0.34658]: - * Write - * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... - * We use a special Reme algorithm on [0,0.34658] to generate - * a polynomial of degree 5 to approximate R. The maximum error - * of this polynomial approximation is bounded by 2**-59. In - * other words, - * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 - * (where z=r*r, and the values of P1 to P5 are listed below) - * and - * | 5 | -59 - * | 2.0+P1*z+...+P5*z - R(z) | <= 2 - * | | - * The computation of exp(r) thus becomes - * 2*r - * exp(r) = 1 + ------- - * R - r - * r*R1(r) - * = 1 + r + ----------- (for better accuracy) - * 2 - R1(r) - * where - * 2 4 10 - * R1(r) = r - (P1*r + P2*r + ... + P5*r ). - * - * 3. Scale back to obtain exp(x): - * From step 1, we have - * exp(x) = 2^k * exp(r) - * - * Special cases: - * exp(INF) is INF, exp(NaN) is NaN; - * exp(-INF) is 0, and - * for finite argument, only exp(0)=1 is exact. - * - * Accuracy: - * according to an error analysis, the error is always less than - * 1 ulp (unit in the last place). - * - * Misc. info. - * For IEEE double - * if x > 7.09782712893383973096e+02 then exp(x) overflow - * if x < -7.45133219101941108420e+02 then exp(x) underflow - * - * Constants: - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough - * to produce the hexadecimal values shown. - */ - -#include "math.h" -#include "math_private.h" - -#ifdef __STDC__ -static const double -#else -static double -#endif -one = 1.0, -halF[2] = {0.5,-0.5,}, -huge = 1.0e+300, -twom1000= 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/ -o_threshold= 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ -u_threshold= -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */ -ln2HI[2] ={ 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ - -6.93147180369123816490e-01,},/* 0xbfe62e42, 0xfee00000 */ -ln2LO[2] ={ 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ - -1.90821492927058770002e-10,},/* 0xbdea39ef, 0x35793c76 */ -invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ -P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ -P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ -P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ -P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ -P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ - - -#ifdef __STDC__ - double __ieee754_exp(double x) /* default IEEE double exp */ -#else - double __ieee754_exp(x) /* default IEEE double exp */ - double x; + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* How this works: + The basic design here is from + Shmuel Gal and Boris Bachelis, "An Accurate Elementary Mathematical + Library for the IEEE Floating Point Standard", ACM Trans. Math. Soft., + 17 (1), March 1991, pp. 26-45. + + The input value, x, is written as + + x = n * ln(2)_0 + t/512 + delta[t] + x + n * ln(2)_1 + + where: + - n is an integer, 1024 >= n >= -1075; + - ln(2)_0 is the first 43 bits of ln(2), and ln(2)_1 is the remainder, so + that |ln(2)_1| < 2^-32; + - t is an integer, 177 >= t >= -177 + - delta is based on a table entry, delta[t] < 2^-28 + - x is whatever is left, |x| < 2^-10 + + Then e^x is approximated as + + e^x = 2^n_1 ( 2^n_0 e^(t/512 + delta[t]) + + ( 2^n_0 e^(t/512 + delta[t]) + * ( p(x + n * ln(2)_1) + - n*ln(2)_1 + - n*ln(2)_1 * p(x + n * ln(2)_1) ) ) ) + + where + - p(x) is a polynomial approximating e(x)-1; + - e^(t/512 + delta[t]) is obtained from a table; + - n_1 + n_0 = n, so that |n_0| < DBL_MIN_EXP-1. + + If it happens that n_1 == 0 (this is the usual case), that multiplication + is omitted. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE #endif +#include <float.h> +#include <ieee754.h> +#include <math.h> +#include <fenv.h> +#include <inttypes.h> +#include <math_private.h> + +extern const float __exp_deltatable[178]; +extern const double __exp_atable[355] /* __attribute__((mode(DF))) */; + +static const volatile double TWO1023 = 8.988465674311579539e+307; +static const volatile double TWOM1000 = 9.3326361850321887899e-302; + +double +__ieee754_exp (double x) { - double y,hi,lo,c,t; - int32_t k,xsb; - u_int32_t hx; - - GET_HIGH_WORD(hx,x); - xsb = (hx>>31)&1; /* sign bit of x */ - hx &= 0x7fffffff; /* high word of |x| */ - - /* filter out non-finite argument */ - if(hx >= 0x40862E42) { /* if |x|>=709.78... */ - if(hx>=0x7ff00000) { - u_int32_t lx; - GET_LOW_WORD(lx,x); - if(((hx&0xfffff)|lx)!=0) - return x+x; /* NaN */ - else return (xsb==0)? x:0.0; /* exp(+-inf)={inf,0} */ - } - if(x > o_threshold) return huge*huge; /* overflow */ - if(x < u_threshold) return twom1000*twom1000; /* underflow */ + static const uint32_t a_minf = 0xff800000; + static const double himark = 709.7827128933840868; + static const double lomark = -745.1332191019412221; + /* Check for usual case. */ + if (isless (x, himark) && isgreater (x, lomark)) + { + static const float TWO43 = 8796093022208.0; + static const float TWO52 = 4503599627370496.0; + /* 1/ln(2). */ + static const double M_1_LN2 = 1.442695040888963387; + /* ln(2), part 1 */ + static const double M_LN2_0 = .6931471805598903302; + /* ln(2), part 2 */ + static const double M_LN2_1 = 5.497923018708371155e-14; + + int tval, unsafe, n_i; + double x22, n, t, dely, result; + union ieee754_double ex2_u, scale_u; + fenv_t oldenv; + + feholdexcept (&oldenv); + fesetround (FE_TONEAREST); + + /* Calculate n. */ + if (x >= 0) + { + n = x * M_1_LN2 + TWO52; + n -= TWO52; } + else + { + n = x * M_1_LN2 - TWO52; + n += TWO52; + } + x = x - n*M_LN2_0; + if (x >= 0) + { + /* Calculate t/512. */ + t = x + TWO43; + t -= TWO43; + x -= t; + + /* Compute tval = t. */ + tval = (int) (t * 512.0); - /* argument reduction */ - if(hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ - if(hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ - hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb; - } else { - k = invln2*x+halF[xsb]; - t = k; - hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */ - lo = t*ln2LO[0]; - } - x = hi - lo; - } - else if(hx < 0x3e300000) { /* when |x|<2**-28 */ - if(huge+x>one) return one+x;/* trigger inexact */ + x -= __exp_deltatable[tval]; } - else k = 0; - - /* x is now in primary range */ - t = x*x; - c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); - if(k==0) return one-((x*c)/(c-2.0)-x); - else y = one-((lo-(x*c)/(2.0-c))-hi); - if(k >= -1021) { - u_int32_t hy; - GET_HIGH_WORD(hy,y); - SET_HIGH_WORD(y,hy+(k<<20)); /* add k to y's exponent */ - return y; - } else { - u_int32_t hy; - GET_HIGH_WORD(hy,y); - SET_HIGH_WORD(y,hy+((k+1000)<<20)); /* add k to y's exponent */ - return y*twom1000; + else + { + /* As above, but x is negative. */ + t = x - TWO43; + t += TWO43; + x -= t; + + tval = (int) (t * 512.0); + + x += __exp_deltatable[-tval]; } + + /* Now, the variable x contains x + n*ln(2)_1. */ + dely = n*M_LN2_1; + + /* Compute ex2 = 2^n_0 e^(t/512+delta[t]). */ + ex2_u.d = __exp_atable[tval+177]; + n_i = (int)n; + /* 'unsafe' is 1 iff n_1 != 0. */ + unsafe = abs(n_i) >= -DBL_MIN_EXP - 1; + ex2_u.ieee.exponent += n_i >> unsafe; + + /* Compute scale = 2^n_1. */ + scale_u.d = 1.0; + scale_u.ieee.exponent += n_i - (n_i >> unsafe); + + /* Approximate e^x2 - 1, using a fourth-degree polynomial, + with maximum error in [-2^-10-2^-28,2^-10+2^-28] + less than 4.9e-19. */ + x22 = (((0.04166666898464281565 + * x + 0.1666666766008501610) + * x + 0.499999999999990008) + * x + 0.9999999999999976685) * x; + /* Allow for impact of dely. */ + x22 -= dely + dely*x22; + + /* Return result. */ + fesetenv (&oldenv); + + result = x22 * ex2_u.d + ex2_u.d; + if (!unsafe) + return result; + else + return result * scale_u.d; + } + /* Exceptional cases: */ + else if (isless (x, himark)) + { + if (x == *(const float *) &a_minf) + /* e^-inf == 0, with no error. */ + return 0; + else + /* Underflow */ + return TWOM1000 * TWOM1000; + } + else + /* Return x, if x is a NaN or Inf; or overflow, otherwise. */ + return TWO1023*x; } |