aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/aarch64/fpu/exp10f_advsimd.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/aarch64/fpu/exp10f_advsimd.c')
-rw-r--r--sysdeps/aarch64/fpu/exp10f_advsimd.c21
1 files changed, 10 insertions, 11 deletions
diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c
index 7ee0c90948..ab117b69da 100644
--- a/sysdeps/aarch64/fpu/exp10f_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c
@@ -25,7 +25,8 @@
static const struct data
{
float32x4_t poly[5];
- float32x4_t shift, log10_2, log2_10_hi, log2_10_lo;
+ float32x4_t log10_2_and_inv, shift;
+
#if !WANT_SIMD_EXCEPT
float32x4_t scale_thresh;
#endif
@@ -38,9 +39,9 @@ static const struct data
.poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f),
V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) },
.shift = V4 (0x1.8p23f),
- .log10_2 = V4 (0x1.a934fp+1),
- .log2_10_hi = V4 (0x1.344136p-2),
- .log2_10_lo = V4 (-0x1.ec10cp-27),
+
+ /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */
+ .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 },
#if !WANT_SIMD_EXCEPT
.scale_thresh = V4 (ScaleBound)
#endif
@@ -98,24 +99,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x)
#if WANT_SIMD_EXCEPT
/* asuint(x) - TinyBound >= BigBound - TinyBound. */
uint32x4_t cmp = vcgeq_u32 (
- vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
- TinyBound),
- Thres);
+ vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres);
float32x4_t xm = x;
/* If any lanes are special, mask them with 1 and retain a copy of x to allow
special case handler to fix special lanes later. This is only necessary if
fenv exceptions are to be triggered correctly. */
if (__glibc_unlikely (v_any_u32 (cmp)))
- x = vbslq_f32 (cmp, v_f32 (1), x);
+ x = v_zerofy_f32 (x, cmp);
#endif
/* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
with poly(r) in [1/sqrt(2), sqrt(2)] and
x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */
- float32x4_t z = vfmaq_f32 (d->shift, x, d->log10_2);
+ float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0);
float32x4_t n = vsubq_f32 (z, d->shift);
- float32x4_t r = vfmsq_f32 (x, n, d->log2_10_hi);
- r = vfmsq_f32 (r, n, d->log2_10_lo);
+ float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1);
+ r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2);
uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));