Revert exp reimplementation (causes test failures).

Revert: 2017-12-19 Joseph Myers <joseph@codesourcery.com> * sysdeps/x86_64/fpu/libm-test-ulps: Update. 2017-12-19 Patrick McGehearty <patrick.mcgehearty@oracle.com> * sysdeps/ieee754/dbl-64/e_exp.c: Include <math-svid-compat.h> and <errno.h>. Include "eexp.tbl". (half): New constant. (one): Likewise. (__ieee754_exp): Rewrite. (__slowexp): Remove prototype. * sysdeps/ieee754/dbl-64/eexp.tbl: New file. * sysdeps/ieee754/dbl-64/slowexp.c: Remove file. * sysdeps/i386/fpu/slowexp.c: Likewise. * sysdeps/ia64/fpu/slowexp.c: Likewise. * sysdeps/m68k/m680x0/fpu/slowexp.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowexp-avx.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowexp-fma.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c: Likewise. * sysdeps/generic/math_private.h (__slowexp): Remove prototype. * sysdeps/ieee754/dbl-64/e_pow.c: Remove mention of slowexp.c in comment. * sysdeps/powerpc/power4/fpu/Makefile [$(subdir) = math] (CPPFLAGS-slowexp.c): Remove variable. * sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines): Remove slowexp-fma, slowexp-fma4 and slowexp-avx. (CFLAGS-slowexp-fma.c): Remove variable. (CFLAGS-slowexp-fma4.c): Likewise. (CFLAGS-slowexp-avx.c): Likewise. * sysdeps/x86_64/fpu/multiarch/e_exp-avx.c (__slowexp): Do not define as macro. * sysdeps/x86_64/fpu/multiarch/e_exp-fma.c (__slowexp): Likewise. * sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c (__slowexp): Likewise. * math/Makefile (type-double-routines): Remove slowexp. * manual/probes.texi (slowexp_p6): Remove. (slowexp_p32): Likewise.
author: Joseph Myers <joseph@codesourcery.com> 2017-12-19 18:11:37 +0000
committer: Joseph Myers <joseph@codesourcery.com> 2017-12-19 18:11:37 +0000
commit: f1e005022ebd246e1541386cd2f3286f008d2d98 (patch)
tree: dc80cd25916cd4cb63da26f9a6e32036157977af /sysdeps/ieee754/dbl-64/e_exp.c
parent: e184ac3a105a4a45b920bf2cdaa701a683c781a2 (diff)
download: glibc-f1e005022ebd246e1541386cd2f3286f008d2d98.tar
glibc-f1e005022ebd246e1541386cd2f3286f008d2d98.tar.gz
glibc-f1e005022ebd246e1541386cd2f3286f008d2d98.tar.bz2
glibc-f1e005022ebd246e1541386cd2f3286f008d2d98.zip
1 files changed, 183 insertions, 215 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index 6a7122f585..6757a14ce1 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -1,4 +1,3 @@
-/* EXP function - Compute double precision exponential */
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
@@ -24,7 +23,7 @@
 /*           exp1                                                          */
 /*                                                                         */
 /* FILES NEEDED:dla.h endian.h mpa.h mydefs.h uexp.h                       */
-/*              mpa.c mpexp.x                                              */
+/*              mpa.c mpexp.x slowexp.c                                    */
 /*                                                                         */
 /* An ultimate exp routine. Given an IEEE double machine number x          */
 /* it computes the correctly rounded (to nearest) value of e^x             */
@@ -33,238 +32,207 @@
 /*                                                                         */
 /***************************************************************************/
 
-/*  IBM exp(x) replaced by following exp(x) in 2017. IBM exp1(x,xx) remains.  */
-/* exp(x)
-   Hybrid algorithm of Peter Tang's Table driven method (for large
-   arguments) and an accurate table (for small arguments).
-   Written by K.C. Ng, November 1988.
-   Revised by Patrick McGehearty, Nov 2017 to use j/64 instead of j/32
-   Method (large arguments):
-	1. Argument Reduction: given the input x, find r and integer k
-	   and j such that
-	             x = (k+j/64)*(ln2) + r,  |r| <= (1/128)*ln2
-
-	2. exp(x) = 2^k * (2^(j/64) + 2^(j/64)*expm1(r))
-	   a. expm1(r) is approximated by a polynomial:
-	      expm1(r) ~ r + t1*r^2 + t2*r^3 + ... + t5*r^6
-	      Here t1 = 1/2 exactly.
-	   b. 2^(j/64) is represented to twice double precision
-	      as TBL[2j]+TBL[2j+1].
-
-   Note: If divide were fast enough, we could use another approximation
-	 in 2.a:
-	      expm1(r) ~ (2r)/(2-R), R = r - r^2*(t1 + t2*r^2)
-	      (for the same t1 and t2 as above)
-
-   Special cases:
-	exp(INF) is INF, exp(NaN) is NaN;
-	exp(-INF)=  0;
-	for finite argument, only exp(0)=1 is exact.
-
-   Accuracy:
-	According to an error analysis, the error is always less than
-	an ulp (unit in the last place).  The largest errors observed
-	are less than 0.55 ulp for normal results and less than 0.75 ulp
-	for subnormal results.
-
-   Misc. info.
-	For IEEE double
-		if x >  7.09782712893383973096e+02 then exp(x) overflow
-		if x < -7.45133219101941108420e+02 then exp(x) underflow.  */
-
 #include <math.h>
-#include <math-svid-compat.h>
-#include <math_private.h>
-#include <errno.h>
 #include "endian.h"
 #include "uexp.h"
-#include "uexp.tbl"
 #include "mydefs.h"
 #include "MathLib.h"
+#include "uexp.tbl"
+#include <math_private.h>
 #include <fenv.h>
 #include <float.h>
 
-extern double __ieee754_exp (double);
-
-#include "eexp.tbl"
-
-static const double
-  half = 0.5,
-  one = 1.0;
+#ifndef SECTION
+# define SECTION
+#endif
 
+double __slowexp (double);
 
+/* An ultimate exp routine. Given an IEEE double machine number x it computes
+   the correctly rounded (to nearest) value of e^x.  */
 double
-__ieee754_exp (double x_arg)
+SECTION
+__ieee754_exp (double x)
 {
-  double z, t;
+  double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
+  mynumber junk1, junk2, binexp = {{0, 0}};
+  int4 i, j, m, n, ex;
   double retval;
-  int hx, ix, k, j, m;
-  int fe_val;
-  union
-  {
-    int i_part[2];
-    double x;
-  } xx;
-  union
-  {
-    int y_part[2];
-    double y;
-  } yy;
-  xx.x = x_arg;
-
-  ix = xx.i_part[HIGH_HALF];
-  hx = ix & ~0x80000000;
-
-  if (hx < 0x3ff0a2b2)
-    {				/* |x| < 3/2 ln 2 */
-      if (hx < 0x3f862e42)
-	{			/* |x| < 1/64 ln 2 */
-	  if (hx < 0x3ed00000)
-	    {			/* |x| < 2^-18 */
-	      if (hx < 0x3e300000)
-		{
-		  retval = one + xx.x;
-		  return retval;
-		}
-	      retval = one + xx.x * (one + half * xx.x);
-	      return retval;
-	    }
-	  /* Use FE_TONEAREST rounding mode for computing yy.y.
-	     Avoid set/reset of rounding mode if in FE_TONEAREST mode.  */
-	  fe_val = get_rounding_mode ();
-	  if (fe_val == FE_TONEAREST)
-	    {
-	      t = xx.x * xx.x;
-	      yy.y = xx.x + (t * (half + xx.x * t2)
-			     + (t * t) * (t3 + xx.x * t4 + t * t5));
-	      retval = one + yy.y;
-	    }
-	  else
-	    {
-	      libc_fesetround (FE_TONEAREST);
-	      t = xx.x * xx.x;
-	      yy.y = xx.x + (t * (half + xx.x * t2)
-			     + (t * t) * (t3 + xx.x * t4 + t * t5));
-	      retval = one + yy.y;
-	      libc_fesetround (fe_val);
-	    }
-	  return retval;
-	}
-
-      /* Find the multiple of 2^-6 nearest x.  */
-      k = hx >> 20;
-      j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k);
-      j = (j - 1) & ~1;
-      if (ix < 0)
-	j += 134;
-      /* Use FE_TONEAREST rounding mode for computing yy.y.
-	 Avoid set/reset of rounding mode if in FE_TONEAREST mode.  */
-      fe_val = get_rounding_mode ();
-      if (fe_val == FE_TONEAREST)
-	{
-	  z = xx.x - TBL2[j];
-	  t = z * z;
-	  yy.y = z + (t * (half + (z * t2))
-		      + (t * t) * (t3 + z * t4 + t * t5));
-	  retval = TBL2[j + 1] + TBL2[j + 1] * yy.y;
-	}
-      else
-	{
-	  libc_fesetround (FE_TONEAREST);
-	  z = xx.x - TBL2[j];
-	  t = z * z;
-	  yy.y = z + (t * (half + (z * t2))
-		      + (t * t) * (t3 + z * t4 + t * t5));
-	  retval = TBL2[j + 1] + TBL2[j + 1] * yy.y;
-	  libc_fesetround (fe_val);
-	}
-      return retval;
-    }
-
-  if (hx >= 0x40862e42)
-    {				/* x is large, infinite, or nan.  */
-      if (hx >= 0x7ff00000)
-	{
-	  if (ix == 0xfff00000 && xx.i_part[LOW_HALF] == 0)
-	    return zero;	/* exp(-inf) = 0.  */
-	  return (xx.x * xx.x);	/* exp(nan/inf) is nan or inf.  */
-	}
-      if (xx.x > threshold1)
-	{			/* Set overflow error condition.  */
-	  retval = hhuge * hhuge;
-	  return retval;
-	}
-      if (-xx.x > threshold2)
-	{			/* Set underflow error condition.  */
-	  double force_underflow = tiny * tiny;
-	  math_force_eval (force_underflow);
-	  retval = force_underflow;
-	  return retval;
-	}
-    }
-
-  /* Use FE_TONEAREST rounding mode for computing yy.y.
-     Avoid set/reset of rounding mode if already in FE_TONEAREST mode.  */
-  fe_val = get_rounding_mode ();
-  if (fe_val == FE_TONEAREST)
-    {
-      t = invln2_64 * xx.x;
-      if (ix < 0)
-	t -= half;
-      else
-	t += half;
-      k = (int) t;
-      j = (k & 0x3f) << 1;
-      m = k >> 6;
-      z = (xx.x - k * ln2_64hi) - k * ln2_64lo;
-
-      /* z is now in primary range.  */
-      t = z * z;
-      yy.y = z + (t * (half + z * t2) + (t * t) * (t3 + z * t4 + t * t5));
-      yy.y = TBL[j] + (TBL[j + 1] + TBL[j] * yy.y);
-    }
-  else
-    {
-      libc_fesetround (FE_TONEAREST);
-      t = invln2_64 * xx.x;
-      if (ix < 0)
-	t -= half;
-      else
-	t += half;
-      k = (int) t;
-      j = (k & 0x3f) << 1;
-      m = k >> 6;
-      z = (xx.x - k * ln2_64hi) - k * ln2_64lo;
-
-      /* z is now in primary range.  */
-      t = z * z;
-      yy.y = z + (t * (half + z * t2) + (t * t) * (t3 + z * t4 + t * t5));
-      yy.y = TBL[j] + (TBL[j + 1] + TBL[j] * yy.y);
-      libc_fesetround (fe_val);
-    }
 
-  if (m < -1021)
-    {
-      yy.y_part[HIGH_HALF] += (m + 54) << 20;
-      retval = twom54 * yy.y;
-      if (retval < DBL_MIN)
-	{
-	  double force_underflow = tiny * tiny;
-	  math_force_eval (force_underflow);
-	}
-      return retval;
-    }
-  yy.y_part[HIGH_HALF] += m << 20;
-  return yy.y;
+  {
+    SET_RESTORE_ROUND (FE_TONEAREST);
+
+    junk1.x = x;
+    m = junk1.i[HIGH_HALF];
+    n = m & hugeint;
+
+    if (n > smallint && n < bigint)
+      {
+	y = x * log2e.x + three51.x;
+	bexp = y - three51.x;	/*  multiply the result by 2**bexp        */
+
+	junk1.x = y;
+
+	eps = bexp * ln_two2.x;	/* x = bexp*ln(2) + t - eps               */
+	t = x - bexp * ln_two1.x;
+
+	y = t + three33.x;
+	base = y - three33.x;	/* t rounded to a multiple of 2**-18      */
+	junk2.x = y;
+	del = (t - base) - eps;	/*  x = bexp*ln(2) + base + del           */
+	eps = del + del * del * (p3.x * del + p2.x);
+
+	binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
+
+	i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+	j = (junk2.i[LOW_HALF] & 511) << 1;
+
+	al = coar.x[i] * fine.x[j];
+	bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	       + coar.x[i + 1] * fine.x[j + 1]);
+
+	rem = (bet + bet * eps) + al * eps;
+	res = al + rem;
+	cor = (al - res) + rem;
+	if (res == (res + cor * err_0))
+	  {
+	    retval = res * binexp.x;
+	    goto ret;
+	  }
+	else
+	  {
+	    retval = __slowexp (x);
+	    goto ret;
+	  }			/*if error is over bound */
+      }
+
+    if (n <= smallint)
+      {
+	retval = 1.0;
+	goto ret;
+      }
+
+    if (n >= badint)
+      {
+	if (n > infint)
+	  {
+	    retval = x + x;
+	    goto ret;
+	  }			/* x is NaN */
+	if (n < infint)
+	  {
+	    if (x > 0)
+	      goto ret_huge;
+	    else
+	      goto ret_tiny;
+	  }
+	/* x is finite,  cause either overflow or underflow  */
+	if (junk1.i[LOW_HALF] != 0)
+	  {
+	    retval = x + x;
+	    goto ret;
+	  }			/*  x is NaN  */
+	retval = (x > 0) ? inf.x : zero;	/* |x| = inf;  return either inf or 0 */
+	goto ret;
+      }
+
+    y = x * log2e.x + three51.x;
+    bexp = y - three51.x;
+    junk1.x = y;
+    eps = bexp * ln_two2.x;
+    t = x - bexp * ln_two1.x;
+    y = t + three33.x;
+    base = y - three33.x;
+    junk2.x = y;
+    del = (t - base) - eps;
+    eps = del + del * del * (p3.x * del + p2.x);
+    i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+    j = (junk2.i[LOW_HALF] & 511) << 1;
+    al = coar.x[i] * fine.x[j];
+    bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	   + coar.x[i + 1] * fine.x[j + 1]);
+    rem = (bet + bet * eps) + al * eps;
+    res = al + rem;
+    cor = (al - res) + rem;
+    if (m >> 31)
+      {
+	ex = junk1.i[LOW_HALF];
+	if (res < 1.0)
+	  {
+	    res += res;
+	    cor += cor;
+	    ex -= 1;
+	  }
+	if (ex >= -1022)
+	  {
+	    binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+	    if (res == (res + cor * err_0))
+	      {
+		retval = res * binexp.x;
+		goto ret;
+	      }
+	    else
+	      {
+		retval = __slowexp (x);
+		goto check_uflow_ret;
+	      }			/*if error is over bound */
+	  }
+	ex = -(1022 + ex);
+	binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+	res *= binexp.x;
+	cor *= binexp.x;
+	eps = 1.0000000001 + err_0 * binexp.x;
+	t = 1.0 + res;
+	y = ((1.0 - t) + res) + cor;
+	res = t + y;
+	cor = (t - res) + y;
+	if (res == (res + eps * cor))
+	  {
+	    binexp.i[HIGH_HALF] = 0x00100000;
+	    retval = (res - 1.0) * binexp.x;
+	    goto check_uflow_ret;
+	  }
+	else
+	  {
+	    retval = __slowexp (x);
+	    goto check_uflow_ret;
+	  }			/*   if error is over bound    */
+      check_uflow_ret:
+	if (retval < DBL_MIN)
+	  {
+	    double force_underflow = tiny * tiny;
+	    math_force_eval (force_underflow);
+	  }
+	if (retval == 0)
+	  goto ret_tiny;
+	goto ret;
+      }
+    else
+      {
+	binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+	if (res == (res + cor * err_0))
+	  retval = res * binexp.x * t256.x;
+	else
+	  retval = __slowexp (x);
+	if (isinf (retval))
+	  goto ret_huge;
+	else
+	  goto ret;
+      }
+  }
+ret:
+  return retval;
+
+ ret_huge:
+  return hhuge * hhuge;
+
+ ret_tiny:
+  return tiny * tiny;
 }
 #ifndef __ieee754_exp
 strong_alias (__ieee754_exp, __exp_finite)
 #endif
 
-#ifndef SECTION
-# define SECTION
-#endif
-
 /* Compute e^(x+xx).  The routine also receives bound of error of previous
    calculation.  If after computing exp the error exceeds the allowed bounds,
    the routine returns a non-positive number.  Otherwise it returns the
author	Joseph Myers <joseph@codesourcery.com>	2017-12-19 18:11:37 +0000
committer	Joseph Myers <joseph@codesourcery.com>	2017-12-19 18:11:37 +0000
commit	f1e005022ebd246e1541386cd2f3286f008d2d98 (patch)
tree	dc80cd25916cd4cb63da26f9a6e32036157977af /sysdeps/ieee754/dbl-64/e_exp.c
parent	e184ac3a105a4a45b920bf2cdaa701a683c781a2 (diff)
download	glibc-f1e005022ebd246e1541386cd2f3286f008d2d98.tar glibc-f1e005022ebd246e1541386cd2f3286f008d2d98.tar.gz glibc-f1e005022ebd246e1541386cd2f3286f008d2d98.tar.bz2 glibc-f1e005022ebd246e1541386cd2f3286f008d2d98.zip