aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-03-18 17:40:50 +0000
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-07-08 17:21:15 -0300
commit69461d989669d3da051a2bfdae8d5b0ff3dc0749 (patch)
treeed4ae60dcce96b86f7ca77e556273b0f9c3814c8
parentf215dbbdf18893b08e0b00eb12d369e7cf4a946d (diff)
downloadglibc-69461d989669d3da051a2bfdae8d5b0ff3dc0749.tar
glibc-69461d989669d3da051a2bfdae8d5b0ff3dc0749.tar.gz
glibc-69461d989669d3da051a2bfdae8d5b0ff3dc0749.tar.bz2
glibc-69461d989669d3da051a2bfdae8d5b0ff3dc0749.zip
powerpc: hypot refactor and optimization
The powerpc hypot is slight optimized by: - Commit 8df4e219e43, both isnan and isinf are always inlined and thus the check TEST_INF_NAN does not make sense anymore. The generic check for POWER7 should be faster on all powerpc configuration. - The redundant check 'y > two60factor && (x / y) > two60' is removed. Both changes leads to unrequired ifunc especialization for power7 and thus they are removed. Finally The code is also cleanup a bit by inlining the constants floating points. The performance changes using the hypot benchtests are: - POWER9 without patch: "hypot": { "overflow": { "duration": 4.98585e+09, "iterations": 4.84932e+08, "max": 46.551, "min": 10.229, "mean": 10.2815 }, "higher_two500": { "duration": 5.00192e+09, "iterations": 4.24843e+08, "max": 33.319, "min": 11.606, "mean": 11.7736 }, "subnormal": { "duration": 5.0075e+09, "iterations": 4.06792e+08, "max": 22.178, "min": 12.15, "mean": 12.3097 }, "less_two500": { "duration": 5.00685e+09, "iterations": 4.08772e+08, "max": 22.784, "min": 12.052, "mean": 12.2485 }, "default": { "duration": 5.06002e+09, "iterations": 4.09894e+08, "max": 20.648, "min": 11.874, "mean": 12.3447 } } - POWER9 with patch: "hypot": { "overflow": { "duration": 4.91848e+09, "iterations": 7.28039e+08, "max": 47.958, "min": 6.436, "mean": 6.75579 }, "higher_two500": { "duration": 4.9359e+09, "iterations": 6.63376e+08, "max": 20.783, "min": 7.321, "mean": 7.44057 }, "subnormal": { "duration": 4.9479e+09, "iterations": 6.19772e+08, "max": 18.856, "min": 7.817, "mean": 7.98341 }, "less_two500": { "duration": 4.94275e+09, "iterations": 6.3889e+08, "max": 17.452, "min": 7.597, "mean": 7.73647 }, "default": { "duration": 5.03645e+09, "iterations": 5.70718e+08, "max": 18.904, "min": 8.55, "mean": 8.82476 } } - POWER7 without patch "hypot": { "overflow": { "duration": 4.86637e+09, "iterations": 6.43196e+08, "max": 53.958, "min": 7.328, "mean": 7.56592 }, "higher_two500": { "duration": 4.99842e+09, "iterations": 3.11012e+08, "max": 78.227, "min": 15.696, "mean": 16.0715 }, "subnormal": { "duration": 4.99841e+09, "iterations": 3.08935e+08, "max": 51.392, "min": 15.983, "mean": 16.1795 }, "less_two500": { "duration": 5.00108e+09, "iterations": 2.99464e+08, "max": 73.247, "min": 16.416, "mean": 16.7001 }, "default": { "duration": 5.04645e+09, "iterations": 3.52608e+08, "max": 70.073, "min": 13.38, "mean": 14.3118 } } - POWER7 with patch "hypot": { "overflow": { "duration": 4.80785e+09, "iterations": 8.00001e+08, "max": 66.262, "min": 5.888, "mean": 6.00981 }, "higher_two500": { "duration": 4.9859e+09, "iterations": 3.39449e+08, "max": 5148.44, "min": 14.539, "mean": 14.6882 }, "subnormal": { "duration": 4.9905e+09, "iterations": 3.28874e+08, "max": 64.905, "min": 14.971, "mean": 15.1745 }, "less_two500": { "duration": 4.99494e+09, "iterations": 3.19755e+08, "max": 103.696, "min": 14.972, "mean": 15.6211 }, "default": { "duration": 5.03951e+09, "iterations": 4.02502e+08, "max": 61.008, "min": 12.368, "mean": 12.5205 } } Checked on powerpc-linux-gnu (built without --with-cpu, with --with-cpu=power4 and with --with-cpu=power5+ and --disable-multi-arch), powerpc64-linux-gnu (built without --with-cp and with --with-cpu=power5+ and --disable-multi-arch). * sysdeps/powerpc/fpu/e_hypot.c (two60, two500, two600, two1022, twoM500, twoM600, two60factor, pdnum): Remove. (TEST_INFO_NAN, GET_TW0_HIGH_WORD): Remove macro. (__ieee754_hypot): Replace static variables with inline definition, remove ununsed branches. * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile (libm-sysdep_routines): Remove e_hypot-* objects. (CFLAGS-e_hypot-power7.c, CFLAGS-e_hypotf-power7.c): Remove rule. * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c: Remove file. * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c: Likewise. * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c: Likewise. * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c: Likewise. Reviewed-by: Gabriel F. T. Gomes <gabrielftg@linux.ibm.com>
-rw-r--r--ChangeLog16
-rw-r--r--sysdeps/powerpc/fpu/e_hypot.c94
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile5
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c19
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c26
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c33
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c19
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c26
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c33
9 files changed, 39 insertions, 232 deletions
diff --git a/ChangeLog b/ChangeLog
index d335dd173a..0b2d80ffcc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,21 @@
2019-07-08 Adhemerval Zanella <adhemerval.zanella@linaro.org>
+ * sysdeps/powerpc/fpu/e_hypot.c (two60, two500, two600, two1022,
+ twoM500, twoM600, two60factor, pdnum): Remove.
+ (TEST_INFO_NAN, GET_TW0_HIGH_WORD): Remove macro.
+ (__ieee754_hypot): Replace static variables with inline definition,
+ remove ununsed branches.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+ (libm-sysdep_routines): Remove e_hypot-* objects.
+ (CFLAGS-e_hypot-power7.c, CFLAGS-e_hypotf-power7.c): Remove rule.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c: Remove
+ file.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c: Likewise.
+
* benchtests/Makefile (bench-math): Add hypot.
* benchtests/hypot-inputs: New file.
diff --git a/sysdeps/powerpc/fpu/e_hypot.c b/sysdeps/powerpc/fpu/e_hypot.c
index 039e5be430..16cc55bed9 100644
--- a/sysdeps/powerpc/fpu/e_hypot.c
+++ b/sysdeps/powerpc/fpu/e_hypot.c
@@ -22,15 +22,6 @@
#include <math-underflow.h>
#include <stdint.h>
-static const double two60 = 1.152921504606847e+18;
-static const double two500 = 3.2733906078961419e+150;
-static const double two600 = 4.149515568880993e+180;
-static const double two1022 = 4.49423283715579e+307;
-static const double twoM500 = 3.054936363499605e-151;
-static const double twoM600 = 2.4099198651028841e-181;
-static const double two60factor = 1.5592502418239997e+290;
-static const double pdnum = 2.225073858507201e-308;
-
/* __ieee754_hypot(x,y)
*
* This a FP only version without any FP->INT conversion.
@@ -39,53 +30,18 @@ static const double pdnum = 2.225073858507201e-308;
* is needed.
*/
-#ifdef _ARCH_PWR7
-/* POWER7 isinf and isnan optimization are fast. */
-# define TEST_INF_NAN(x, y) \
- if ((isinf(x) || isinf(y)) \
- && !issignaling (x) && !issignaling (y)) \
- return INFINITY; \
- if (isnan(x) || isnan(y)) \
- return x + y;
-# else
-/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are
- * costly (especially for POWER6). */
-# define GET_TW0_HIGH_WORD(d1,d2,i1,i2) \
- do { \
- ieee_double_shape_type gh_u1; \
- ieee_double_shape_type gh_u2; \
- gh_u1.value = (d1); \
- gh_u2.value = (d2); \
- (i1) = gh_u1.parts.msw & 0x7fffffff; \
- (i2) = gh_u2.parts.msw & 0x7fffffff; \
- } while (0)
-
-# define TEST_INF_NAN(x, y) \
- do { \
- uint32_t hx, hy; \
- GET_TW0_HIGH_WORD(x, y, hx, hy); \
- if (hy > hx) { \
- uint32_t ht = hx; hx = hy; hy = ht; \
- } \
- if (hx >= 0x7ff00000) { \
- if ((hx == 0x7ff00000 || hy == 0x7ff00000) \
- && !issignaling (x) && !issignaling (y)) \
- return INFINITY; \
- return x + y; \
- } \
- } while (0)
-
-#endif
-
-
double
__ieee754_hypot (double x, double y)
{
+ if ((isinf (x) || isinf (y))
+ && !issignaling (x) && !issignaling (y))
+ return INFINITY;
+ if (isnan (x) || isnan (y))
+ return x + y;
+
x = fabs (x);
y = fabs (y);
- TEST_INF_NAN (x, y);
-
if (y > x)
{
double t = x;
@@ -94,40 +50,34 @@ __ieee754_hypot (double x, double y)
}
if (y == 0.0)
return x;
+
/* if y is higher enough, y * 2^60 might overflow. The tests if
y >= 1.7976931348623157e+308/2^60 (two60factor) and uses the
appropriate check to avoid the overflow exception generation. */
- if (y > two60factor)
- {
- if ((x / y) > two60)
- return x + y;
- }
- else
- {
- if (x > (y * two60))
- return x + y;
- }
- if (x > two500)
+ if (y <= 0x1.fffffffffffffp+963 && x > (y * 0x1p+60))
+ return x + y;
+
+ if (x > 0x1p+500)
{
- x *= twoM600;
- y *= twoM600;
- return sqrt (x * x + y * y) / twoM600;
+ x *= 0x1p-600;
+ y *= 0x1p-600;
+ return sqrt (x * x + y * y) / 0x1p-600;
}
- if (y < twoM500)
+ if (y < 0x1p-500)
{
- if (y <= pdnum)
+ if (y <= 0x0.fffffffffffffp-1022)
{
- x *= two1022;
- y *= two1022;
- double ret = sqrt (x * x + y * y) / two1022;
+ x *= 0x1p+1022;
+ y *= 0x1p+1022;
+ double ret = sqrt (x * x + y * y) / 0x1p+1022;
math_check_force_underflow_nonneg (ret);
return ret;
}
else
{
- x *= two600;
- y *= two600;
- return sqrt (x * x + y * y) / two600;
+ x *= 0x1p+600;
+ y *= 0x1p+600;
+ return sqrt (x * x + y * y) / 0x1p+600;
}
}
return sqrt (x * x + y * y);
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
index 118865a1a9..534d5a7133 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
@@ -8,8 +8,7 @@ sysdep_calls := s_modf-power5+ s_modf-ppc64 \
sysdep_routines += $(sysdep_calls)
libm-sysdep_routines += s_logb-power7 s_logbf-power7 \
s_logbl-power7 s_logb-ppc64 s_logbf-ppc64 \
- s_logbl-ppc64 e_hypot-ppc64 \
- e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \
+ s_logbl-ppc64 \
$(sysdep_calls:s_%=m_%)
CFLAGS-s_logbf-power7.c = -mcpu=power7
@@ -17,8 +16,6 @@ CFLAGS-s_logbl-power7.c = -mcpu=power7
CFLAGS-s_logb-power7.c = -mcpu=power7
CFLAGS-s_modf-power5+.c = -mcpu=power5+
CFLAGS-s_modff-power5+.c = -mcpu=power5+
-CFLAGS-e_hypot-power7.c = -mcpu=power7
-CFLAGS-e_hypotf-power7.c = -mcpu=power7
# These files quiet sNaNs in a way that is optimized away without
# -fsignaling-nans.
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c
deleted file mode 100644
index 69818d8438..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* __ieee_hypot() POWER7 version.
- Copyright (C) 2013-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c
deleted file mode 100644
index da1e80f0d6..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* __ieee_hypot() PowerPC64 version.
- Copyright (C) 2013-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <math.h>
-
-#undef strong_alias
-#define strong_alias(a, b)
-
-#define __ieee754_hypot __ieee754_hypot_ppc64
-
-#include <sysdeps/powerpc/fpu/e_hypot.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c
deleted file mode 100644
index 3bd04e9517..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Multiple versions of ieee754_hypot.
- Copyright (C) 2013-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <math.h>
-#include <math_private.h>
-#include <math_ldbl_opt.h>
-#include <shlib-compat.h>
-#include "init-arch.h"
-
-extern __typeof (__ieee754_hypot) __ieee754_hypot_ppc64 attribute_hidden;
-extern __typeof (__ieee754_hypot) __ieee754_hypot_power7 attribute_hidden;
-
-libc_ifunc (__ieee754_hypot,
- (hwcap & PPC_FEATURE_ARCH_2_06)
- ? __ieee754_hypot_power7
- : __ieee754_hypot_ppc64);
-
-strong_alias (__ieee754_hypot, __hypot_finite)
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c
deleted file mode 100644
index 223947a617..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* __ieee_hypotf() POWER7 version.
- Copyright (C) 2013-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c
deleted file mode 100644
index 6d5d54bb79..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* __ieee_hypot() PowerPC64 version.
- Copyright (C) 2013-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <math.h>
-
-#undef strong_alias
-#define strong_alias(a, b)
-
-#define __ieee754_hypotf __ieee754_hypotf_ppc64
-
-#include <sysdeps/powerpc/fpu/e_hypotf.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c
deleted file mode 100644
index 02c0ab497f..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Multiple versions of ieee754_hypot.
- Copyright (C) 2013-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <math.h>
-#include <math_private.h>
-#include <math_ldbl_opt.h>
-#include <shlib-compat.h>
-#include "init-arch.h"
-
-extern __typeof (__ieee754_hypotf) __ieee754_hypotf_ppc64 attribute_hidden;
-extern __typeof (__ieee754_hypotf) __ieee754_hypotf_power7 attribute_hidden;
-
-libc_ifunc (__ieee754_hypotf,
- (hwcap & PPC_FEATURE_ARCH_2_06)
- ? __ieee754_hypotf_power7
- : __ieee754_hypotf_ppc64);
-
-strong_alias (__ieee754_hypotf, __hypotf_finite)