diff options
author | Joseph Myers <joseph@codesourcery.com> | 2012-09-29 18:31:54 +0000 |
---|---|---|
committer | Joseph Myers <joseph@codesourcery.com> | 2012-09-29 18:31:54 +0000 |
commit | 8ec5b01346114da38e806ca1867da688d3a360e2 (patch) | |
tree | 8a55bd704c2a6ca8164202217e2048bb33a7dacb | |
parent | b1fa802e1ad4104060fe93b4b3b078ba46be0933 (diff) | |
download | glibc-8ec5b01346114da38e806ca1867da688d3a360e2.tar glibc-8ec5b01346114da38e806ca1867da688d3a360e2.tar.gz glibc-8ec5b01346114da38e806ca1867da688d3a360e2.tar.bz2 glibc-8ec5b01346114da38e806ca1867da688d3a360e2.zip |
Fix sign of exact zero return from fma (bug 14638).
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | math/libm-test.inc | 174 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_fma.c | 5 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_fmaf.c | 7 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/s_fma.c | 8 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/s_fmal.c | 5 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/s_fma.c | 6 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/s_fmal.c | 5 |
9 files changed, 227 insertions, 3 deletions
@@ -1,3 +1,21 @@ +2012-09-29 Joseph Myers <joseph@codesourcery.com> + + [BZ #14638] + * sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use x * y + z for exact + 0 + 0. + * sysdeps/ieee754/dbl-64/s_fmaf.c (__fmaf): Use original rounding + mode for addition resulting in exact zero. + * sysdeps/ieee754/ldbl-128/s_fma.c (__fma): Likewise. + * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Use x * y + z for + exact 0 + 0. + * sysdeps/ieee754/ldbl-96/s_fma.c (__fma): Likewise. + * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise. + * math/libm-test.inc (fma_test): Add more tests. + (fma_test_towardzero): New function. + (fma_test_downward): Likewise. + (fma_test_upward): Likewise. + (main): Call the new functions. + 2012-09-28 David S. Miller <davem@davemloft.net> * sysdeps/sparc/fpu/libm-test-ulps: Fix garbage in file. @@ -15,7 +15,7 @@ Version 2.17 14195, 14237, 14252, 14283, 14298, 14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14459, 14476, 14505, 14510, 14516, 14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14562, 14576, 14579, - 14583, 14587, 14621. + 14583, 14587, 14621, 14638. * Support for STT_GNU_IFUNC symbols added for s390 and s390x. Optimized versions of memcpy, memset, and memcmp added for System z10 and diff --git a/math/libm-test.inc b/math/libm-test.inc index e8398bd0ee..007eea1f30 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -4546,6 +4546,36 @@ fma_test (void) TEST_fff_f (fma, minus_infty, minus_infty, plus_infty, plus_infty); TEST_fff_f (fma, plus_infty, minus_infty, minus_infty, minus_infty); + TEST_fff_f (fma, plus_zero, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, plus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, minus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, 1.0, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, 1.0, plus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, 1.0, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, 1.0, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, -1.0, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, -1.0, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, -1.0, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, -1.0, minus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, 1.0, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, 1.0, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, -1.0, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, -1.0, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, 1.0, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, 1.0, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, -1.0, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, -1.0, minus_zero, plus_zero); + + TEST_fff_f (fma, 1.0, 1.0, -1.0, plus_zero); + TEST_fff_f (fma, 1.0, -1.0, 1.0, plus_zero); + TEST_fff_f (fma, -1.0, 1.0, 1.0, plus_zero); + TEST_fff_f (fma, -1.0, -1.0, -1.0, plus_zero); + #if defined (TEST_FLOAT) && FLT_MANT_DIG == 24 TEST_fff_f (fma, 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13); TEST_fff_f (fma, 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20); @@ -4608,6 +4638,147 @@ fma_test (void) static void +fma_test_towardzero (void) +{ + int save_round_mode; + START (fma_towardzero); + + save_round_mode = fegetround (); + + if (!fesetround (FE_TOWARDZERO)) + { + TEST_fff_f (fma, plus_zero, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, plus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, minus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, 1.0, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, 1.0, plus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, 1.0, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, 1.0, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, -1.0, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, -1.0, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, -1.0, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, -1.0, minus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, 1.0, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, 1.0, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, -1.0, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, -1.0, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, 1.0, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, 1.0, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, -1.0, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, -1.0, minus_zero, plus_zero); + + TEST_fff_f (fma, 1.0, 1.0, -1.0, plus_zero); + TEST_fff_f (fma, 1.0, -1.0, 1.0, plus_zero); + TEST_fff_f (fma, -1.0, 1.0, 1.0, plus_zero); + TEST_fff_f (fma, -1.0, -1.0, -1.0, plus_zero); + } + + fesetround (save_round_mode); + + END (fma_towardzero); +} + + +static void +fma_test_downward (void) +{ + int save_round_mode; + START (fma_downward); + + save_round_mode = fegetround (); + + if (!fesetround (FE_DOWNWARD)) + { + TEST_fff_f (fma, plus_zero, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, plus_zero, minus_zero, plus_zero, minus_zero); + TEST_fff_f (fma, plus_zero, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, plus_zero, plus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, 1.0, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, 1.0, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, 1.0, minus_zero, plus_zero, minus_zero); + TEST_fff_f (fma, 1.0, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, -1.0, plus_zero, plus_zero, minus_zero); + TEST_fff_f (fma, -1.0, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, -1.0, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, -1.0, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, plus_zero, 1.0, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, 1.0, minus_zero, minus_zero); + TEST_fff_f (fma, plus_zero, -1.0, plus_zero, minus_zero); + TEST_fff_f (fma, plus_zero, -1.0, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, 1.0, plus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, 1.0, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, -1.0, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, -1.0, minus_zero, minus_zero); + + TEST_fff_f (fma, 1.0, 1.0, -1.0, minus_zero); + TEST_fff_f (fma, 1.0, -1.0, 1.0, minus_zero); + TEST_fff_f (fma, -1.0, 1.0, 1.0, minus_zero); + TEST_fff_f (fma, -1.0, -1.0, -1.0, minus_zero); + } + + fesetround (save_round_mode); + + END (fma_downward); +} + + +static void +fma_test_upward (void) +{ + int save_round_mode; + START (fma_upward); + + save_round_mode = fegetround (); + + if (!fesetround (FE_UPWARD)) + { + TEST_fff_f (fma, plus_zero, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, plus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, minus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, 1.0, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, 1.0, plus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, 1.0, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, 1.0, minus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, -1.0, plus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, -1.0, plus_zero, minus_zero, minus_zero); + TEST_fff_f (fma, -1.0, minus_zero, plus_zero, plus_zero); + TEST_fff_f (fma, -1.0, minus_zero, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, 1.0, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, 1.0, minus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, -1.0, plus_zero, plus_zero); + TEST_fff_f (fma, plus_zero, -1.0, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, 1.0, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, 1.0, minus_zero, minus_zero); + TEST_fff_f (fma, minus_zero, -1.0, plus_zero, plus_zero); + TEST_fff_f (fma, minus_zero, -1.0, minus_zero, plus_zero); + + TEST_fff_f (fma, 1.0, 1.0, -1.0, plus_zero); + TEST_fff_f (fma, 1.0, -1.0, 1.0, plus_zero); + TEST_fff_f (fma, -1.0, 1.0, 1.0, plus_zero); + TEST_fff_f (fma, -1.0, -1.0, -1.0, plus_zero); + } + + fesetround (save_round_mode); + + END (fma_upward); +} + + +static void fmax_test (void) { START (fmax); @@ -9539,6 +9710,9 @@ main (int argc, char **argv) /* Multiply and add: */ fma_test (); + fma_test_towardzero (); + fma_test_downward (); + fma_test_upward (); /* Complex functions: */ cabs_test (); diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index ce3bd36fac..c9809fb180 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -128,6 +128,11 @@ __fma (double x, double y, double z) y = v.d; z = w.d; } + + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + return x * y + z; + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) double x1 = x * C; diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/dbl-64/s_fmaf.c index e7a0650f0f..a4f12d9f76 100644 --- a/sysdeps/ieee754/dbl-64/s_fmaf.c +++ b/sysdeps/ieee754/dbl-64/s_fmaf.c @@ -32,8 +32,15 @@ float __fmaf (float x, float y, float z) { fenv_t env; + /* Multiplication is always exact. */ double temp = (double) x * (double) y; + + /* Ensure correct sign of an exact zero result by performing the + addition in the original rounding mode in that case. */ + if (temp == -z) + return (float) temp + z; + union ieee754_double u; libc_feholdexcept_setround (&env, FE_TOWARDZERO); diff --git a/sysdeps/ieee754/ldbl-128/s_fma.c b/sysdeps/ieee754/ldbl-128/s_fma.c index 355b60ebbb..b08ff29c04 100644 --- a/sysdeps/ieee754/ldbl-128/s_fma.c +++ b/sysdeps/ieee754/ldbl-128/s_fma.c @@ -1,5 +1,5 @@ /* Compute x * y + z as ternary operation. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. @@ -33,6 +33,12 @@ __fma (double x, double y, double z) fenv_t env; /* Multiplication is always exact. */ long double temp = (long double) x * (long double) y; + + /* Ensure correct sign of an exact zero result by performing the + addition in the original rounding mode in that case. */ + if (temp == -z) + return (double) temp + z; + union ieee854_long_double u; feholdexcept (&env); fesetround (FE_TOWARDZERO); diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c index 963bbd7345..df68ade435 100644 --- a/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -129,6 +129,11 @@ __fmal (long double x, long double y, long double z) y = v.d; z = w.d; } + + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + return x * y + z; + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; diff --git a/sysdeps/ieee754/ldbl-96/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fma.c index 78c0b0db18..001d8063d4 100644 --- a/sysdeps/ieee754/ldbl-96/s_fma.c +++ b/sysdeps/ieee754/ldbl-96/s_fma.c @@ -1,5 +1,5 @@ /* Compute x * y + z as ternary operation. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. @@ -38,6 +38,10 @@ __fma (double x, double y, double z) return (x * y) + z; } + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + return x * y + z; + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = (long double) x * C; diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c index ca1e0905a7..c27b0bd852 100644 --- a/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -129,6 +129,11 @@ __fmal (long double x, long double y, long double z) y = v.d; z = w.d; } + + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + return x * y + z; + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; |