aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/ieee754/dbl-64/e_exp2.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ieee754/dbl-64/e_exp2.c')
-rw-r--r--sysdeps/ieee754/dbl-64/e_exp2.c221
1 files changed, 115 insertions, 106 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_exp2.c b/sysdeps/ieee754/dbl-64/e_exp2.c
index d07e787ec8..96afcf9c20 100644
--- a/sysdeps/ieee754/dbl-64/e_exp2.c
+++ b/sysdeps/ieee754/dbl-64/e_exp2.c
@@ -1,7 +1,6 @@
-/* Double-precision floating point 2^x.
- Copyright (C) 1997-2018 Free Software Foundation, Inc.
+/* Double-precision 2^x function.
+ Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Geoffrey Keating <geoffk@ozemail.com.au>
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -17,120 +16,130 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* The basic design here is from
- Shmuel Gal and Boris Bachelis, "An Accurate Elementary Mathematical
- Library for the IEEE Floating Point Standard", ACM Trans. Math. Soft.,
- 17 (1), March 1991, pp. 26-45.
- It has been slightly modified to compute 2^x instead of e^x.
- */
-#include <stdlib.h>
-#include <float.h>
-#include <ieee754.h>
#include <math.h>
-#include <fenv.h>
-#include <inttypes.h>
+#include <stdint.h>
#include <math-barriers.h>
-#include <math_private.h>
-#include <fenv_private.h>
-#include <math-underflow.h>
+#include <math-narrow-eval.h>
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+#define Shift __exp_data.exp2_shift
+#define T __exp_data.tab
+#define C1 __exp_data.exp2_poly[0]
+#define C2 __exp_data.exp2_poly[1]
+#define C3 __exp_data.exp2_poly[2]
+#define C4 __exp_data.exp2_poly[3]
+#define C5 __exp_data.exp2_poly[4]
+
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+{
+ double_t scale, y;
-#include "t_exp2.h"
+ if ((ki & 0x80000000) == 0)
+ {
+ /* k > 0, the exponent of scale might have overflowed by 1. */
+ sbits -= 1ull << 52;
+ scale = asdouble (sbits);
+ y = 2 * (scale + scale * tmp);
+ return check_oflow (y);
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ scale = asdouble (sbits);
+ y = scale + scale * tmp;
+ if (y < 1.0)
+ {
+ /* Round y to the right precision before scaling it into the subnormal
+ range to avoid double rounding that can cause 0.5+E/2 ulp error where
+ E is the worst-case ulp error outside the subnormal range. So this
+ is only useful if the goal is better than 1 ulp worst-case error. */
+ double_t hi, lo;
+ lo = scale - y + scale * tmp;
+ hi = 1.0 + y;
+ lo = 1.0 - hi + y + lo;
+ y = math_narrow_eval (hi + lo) - 1.0;
+ /* Avoid -0.0 with downward rounding. */
+ if (WANT_ROUNDING && y == 0.0)
+ y = 0.0;
+ /* The underflow exception needs to be signaled explicitly. */
+ math_force_eval (math_opt_barrier (0x1p-1022) * 0x1p-1022);
+ }
+ y = 0x1p-1022 * y;
+ return check_uflow (y);
+}
-static const double TWO1023 = 8.988465674311579539e+307;
-static const double TWOM1000 = 9.3326361850321887899e-302;
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t
+top12 (double x)
+{
+ return asuint64 (x) >> 52;
+}
double
__ieee754_exp2 (double x)
{
- static const double himark = (double) DBL_MAX_EXP;
- static const double lomark = (double) (DBL_MIN_EXP - DBL_MANT_DIG - 1);
-
- /* Check for usual case. */
- if (__glibc_likely (isless (x, himark)))
+ uint32_t abstop;
+ uint64_t ki, idx, top, sbits;
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t kd, r, r2, scale, tail, tmp;
+
+ abstop = top12 (x) & 0x7ff;
+ if (__glibc_unlikely (abstop - top12 (0x1p-54)
+ >= top12 (512.0) - top12 (0x1p-54)))
{
- /* Exceptional cases: */
- if (__glibc_unlikely (!isgreaterequal (x, lomark)))
- {
- if (isinf (x))
- /* e^-inf == 0, with no error. */
- return 0;
- else
- /* Underflow */
- return TWOM1000 * TWOM1000;
- }
-
- static const double THREEp42 = 13194139533312.0;
- int tval, unsafe;
- double rx, x22, result;
- union ieee754_double ex2_u, scale_u;
-
- if (fabs (x) < DBL_EPSILON / 4.0)
- return 1.0 + x;
-
- {
- SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
-
- /* 1. Argument reduction.
- Choose integers ex, -256 <= t < 256, and some real
- -1/1024 <= x1 <= 1024 so that
- x = ex + t/512 + x1.
-
- First, calculate rx = ex + t/512. */
- rx = x + THREEp42;
- rx -= THREEp42;
- x -= rx; /* Compute x=x1. */
- /* Compute tval = (ex*512 + t)+256.
- Now, t = (tval mod 512)-256 and ex=tval/512 [that's mod, NOT %;
- and /-round-to-nearest not the usual c integer /]. */
- tval = (int) (rx * 512.0 + 256.0);
-
- /* 2. Adjust for accurate table entry.
- Find e so that
- x = ex + t/512 + e + x2
- where -1e6 < e < 1e6, and
- (double)(2^(t/512+e))
- is accurate to one part in 2^-64. */
-
- /* 'tval & 511' is the same as 'tval%512' except that it's always
- positive.
- Compute x = x2. */
- x -= exp2_deltatable[tval & 511];
-
- /* 3. Compute ex2 = 2^(t/512+e+ex). */
- ex2_u.d = exp2_accuratetable[tval & 511];
- tval >>= 9;
- /* x2 is an integer multiple of 2^-54; avoid intermediate
- underflow from the calculation of x22 * x. */
- unsafe = abs (tval) >= -DBL_MIN_EXP - 56;
- ex2_u.ieee.exponent += tval >> unsafe;
- scale_u.d = 1.0;
- scale_u.ieee.exponent += tval - (tval >> unsafe);
-
- /* 4. Approximate 2^x2 - 1, using a fourth-degree polynomial,
- with maximum error in [-2^-10-2^-30,2^-10+2^-30]
- less than 10^-19. */
-
- x22 = (((.0096181293647031180
- * x + .055504110254308625)
- * x + .240226506959100583)
- * x + .69314718055994495) * ex2_u.d;
- math_opt_barrier (x22);
- }
-
- /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */
- result = x22 * x + ex2_u.d;
-
- if (!unsafe)
- return result;
- else
+ if (abstop - top12 (0x1p-54) >= 0x80000000)
+ /* Avoid spurious underflow for tiny x. */
+ /* Note: 0 is common input. */
+ return WANT_ROUNDING ? 1.0 + x : 1.0;
+ if (abstop >= top12 (1024.0))
{
- result *= scale_u.d;
- math_check_force_underflow_nonneg (result);
- return result;
+ if (asuint64 (x) == asuint64 (-INFINITY))
+ return 0.0;
+ if (abstop >= top12 (INFINITY))
+ return 1.0 + x;
+ if (!(asuint64 (x) >> 63))
+ return __math_oflow (0);
+ else if (asuint64 (x) >= asuint64 (-1075.0))
+ return __math_uflow (0);
}
+ if (2 * asuint64 (x) > 2 * asuint64 (928.0))
+ /* Large x is special cased below. */
+ abstop = 0;
}
- else
- /* Return x, if x is a NaN or Inf; or overflow, otherwise. */
- return TWO1023 * x;
+
+ /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
+ /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
+ kd = math_narrow_eval (x + Shift);
+ ki = asuint64 (kd); /* k. */
+ kd -= Shift; /* k/N for int k. */
+ r = x - kd;
+ /* 2^(k/N) ~= scale * (1 + tail). */
+ idx = 2 * (ki % N);
+ top = ki << (52 - EXP_TABLE_BITS);
+ tail = asdouble (T[idx]);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ sbits = T[idx + 1] + top;
+ /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ r2 = r * r;
+ /* Without fma the worst case error is 0.5/N ulp larger. */
+ /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
+ tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+ if (__glibc_unlikely (abstop == 0))
+ return specialcase (tmp, sbits, ki);
+ scale = asdouble (sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
+ is no spurious underflow here even without fma. */
+ return scale + scale * tmp;
}
+#ifndef __ieee754_exp2
strong_alias (__ieee754_exp2, __exp2_finite)
+#endif