3 files changed, 98 insertions, 12 deletions
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 155a773e36..68c63a1987 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -56,16 +56,44 @@ __fma (double x, double y, double z)
       if (z == 0 && x != 0 && y != 0)
 	return x * y;
       /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
-	 or if x * y is less than half of DBL_DENORM_MIN,
-	 compute as x * y + z.  */
+	 or if x * y is zero, compute as x * y + z.  */
       if (u.ieee.exponent == 0x7ff
 	  || v.ieee.exponent == 0x7ff
 	  || w.ieee.exponent == 0x7ff
 	  || u.ieee.exponent + v.ieee.exponent
 	     > 0x7ff + IEEE754_DOUBLE_BIAS
-	  || u.ieee.exponent + v.ieee.exponent
-	     < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
+	  || x == 0
+	  || y == 0)
 	return x * y + z;
+      /* If x * y is less than 1/4 of DBL_DENORM_MIN, neither the
+	 result nor whether there is underflow depends on its exact
+	 value, only on its sign.  */
+      if (u.ieee.exponent + v.ieee.exponent
+	  < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
+	{
+	  int neg = u.ieee.negative ^ v.ieee.negative;
+	  double tiny = neg ? -0x1p-1074 : 0x1p-1074;
+	  if (w.ieee.exponent >= 3)
+	    return tiny + z;
+	  /* Scaling up, adding TINY and scaling down produces the
+	     correct result, because in round-to-nearest mode adding
+	     TINY has no effect and in other modes double rounding is
+	     harmless.  But it may not produce required underflow
+	     exceptions.  */
+	  v.d = z * 0x1p54 + tiny;
+	  if (TININESS_AFTER_ROUNDING
+	      ? v.ieee.exponent < 55
+	      : (w.ieee.exponent == 0
+		 || (w.ieee.exponent == 1
+		     && w.ieee.negative != neg
+		     && w.ieee.mantissa1 == 0
+		     && w.ieee.mantissa0 == 0)))
+	    {
+	      volatile double force_underflow = x * y;
+	      (void) force_underflow;
+	    }
+	  return v.d * 0x1p-54;
+	}
       if (u.ieee.exponent + v.ieee.exponent
 	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
 	{
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index 201bff94ca..c6a3d71f1c 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -57,16 +57,46 @@ __fmal (long double x, long double y, long double z)
       if (z == 0 && x != 0 && y != 0)
 	return x * y;
       /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
-	 or if x * y is less than half of LDBL_DENORM_MIN,
-	 compute as x * y + z.  */
+	 or if x * y is zero, compute as x * y + z.  */
       if (u.ieee.exponent == 0x7fff
 	  || v.ieee.exponent == 0x7fff
 	  || w.ieee.exponent == 0x7fff
 	  || u.ieee.exponent + v.ieee.exponent
 	     > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
-	  || u.ieee.exponent + v.ieee.exponent
-	     < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
+	  || x == 0
+	  || y == 0)
 	return x * y + z;
+      /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the
+	 result nor whether there is underflow depends on its exact
+	 value, only on its sign.  */
+      if (u.ieee.exponent + v.ieee.exponent
+	  < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
+	{
+	  int neg = u.ieee.negative ^ v.ieee.negative;
+	  long double tiny = neg ? -0x1p-16494L : 0x1p-16494L;
+	  if (w.ieee.exponent >= 3)
+	    return tiny + z;
+	  /* Scaling up, adding TINY and scaling down produces the
+	     correct result, because in round-to-nearest mode adding
+	     TINY has no effect and in other modes double rounding is
+	     harmless.  But it may not produce required underflow
+	     exceptions.  */
+	  v.d = z * 0x1p114L + tiny;
+	  if (TININESS_AFTER_ROUNDING
+	      ? v.ieee.exponent < 115
+	      : (w.ieee.exponent == 0
+		 || (w.ieee.exponent == 1
+		     && w.ieee.negative != neg
+		     && w.ieee.mantissa3 == 0
+		     && w.ieee.mantissa2 == 0
+		     && w.ieee.mantissa1 == 0
+		     && w.ieee.mantissa0 == 0)))
+	    {
+	      volatile long double force_underflow = x * y;
+	      (void) force_underflow;
+	    }
+	  return v.d * 0x1p-114L;
+	}
       if (u.ieee.exponent + v.ieee.exponent
 	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
 	{
diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
index 91333af353..b5fdcba6bc 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -57,16 +57,44 @@ __fmal (long double x, long double y, long double z)
       if (z == 0 && x != 0 && y != 0)
 	return x * y;
       /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
-	 or if x * y is less than half of LDBL_DENORM_MIN,
-	 compute as x * y + z.  */
+	 or if x * y is zero, compute as x * y + z.  */
       if (u.ieee.exponent == 0x7fff
 	  || v.ieee.exponent == 0x7fff
 	  || w.ieee.exponent == 0x7fff
 	  || u.ieee.exponent + v.ieee.exponent
 	     > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
-	  || u.ieee.exponent + v.ieee.exponent
-	     < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
+	  || x == 0
+	  || y == 0)
 	return x * y + z;
+      /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the
+	 result nor whether there is underflow depends on its exact
+	 value, only on its sign.  */
+      if (u.ieee.exponent + v.ieee.exponent
+	  < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
+	{
+	  int neg = u.ieee.negative ^ v.ieee.negative;
+	  long double tiny = neg ? -0x1p-16445L : 0x1p-16445L;
+	  if (w.ieee.exponent >= 3)
+	    return tiny + z;
+	  /* Scaling up, adding TINY and scaling down produces the
+	     correct result, because in round-to-nearest mode adding
+	     TINY has no effect and in other modes double rounding is
+	     harmless.  But it may not produce required underflow
+	     exceptions.  */
+	  v.d = z * 0x1p65L + tiny;
+	  if (TININESS_AFTER_ROUNDING
+	      ? v.ieee.exponent < 66
+	      : (w.ieee.exponent == 0
+		 || (w.ieee.exponent == 1
+		     && w.ieee.negative != neg
+		     && w.ieee.mantissa1 == 0
+		     && w.ieee.mantissa0 == 0x80000000)))
+	    {
+	      volatile long double force_underflow = x * y;
+	      (void) force_underflow;
+	    }
+	  return v.d * 0x1p-65L;
+	}
       if (u.ieee.exponent + v.ieee.exponent
 	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
 	{