aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog81
-rw-r--r--elf/get-dynamic-info.h3
-rw-r--r--nptl/pthread_mutex_unlock.c4
-rw-r--r--sysdeps/powerpc/fpu/libm-test-ulps2
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile4
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S26
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c24
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c31
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S26
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c26
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c31
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S303
-rw-r--r--sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S519
-rw-r--r--sysdeps/unix/sysv/linux/i386/lowlevellock.h2
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/elision-lock.c22
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/elision-trylock.c12
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/elision-unlock.c12
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/htm.h12
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/lowlevellock.h6
-rw-r--r--sysdeps/unix/sysv/linux/s390/lowlevellock.h2
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/lowlevellock.h2
21 files changed, 1103 insertions, 47 deletions
diff --git a/ChangeLog b/ChangeLog
index 5aa721d3be..34ab625d0b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -251,6 +251,58 @@
* sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c: Likewise.
* sysdeps/x86_64/fpu/Makefile: Added new tests.
+2016-10-14 Anton Blanchard <anton@samba.org>
+
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+ [$(subdir) = math] (libm-sysdep_routines): Add s_sinf-power8 and
+ s_sinf-ppc64.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S: New file.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S: Likewise.
+
+2016-10-14 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+
+ * sysdeps/powerpc/fpu/libm-test-ulps: Update.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+ [$(subdir) = math] (libm-sysdep_routines): Add e_expf-power8 and
+ e_expf-ppc64.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S: New file.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c: Likewise.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c: Likewise.
+ * sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S: Likewise.
+
+2016-04-12 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/unix/sysv/linux/i386/lowlevellock.h
+ (lll_unlock_elision): Add adapt_count parameter.
+
+2016-04-12 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
+
+ * sysdeps/unix/sysv/linux/powerpc/elision-lock.c
+ (__lll_lock_elision): Remove adapt_count decrement...
+ * sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
+ (__lll_trylock_elision): Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
+ (__lll_unlock_elision): ... to here. And utilize
+ new adapt_count parameter.
+ * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
+ (__lll_unlock_elision): Update to include adapt_count
+ parameter.
+ (lll_unlock_elision): Pass pointer to adapt_count
+ variable.
+
+2016-04-12 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
+
+ * nptl/pthread_mutex_unlock.c (lll_unlock_elision):
+ Add elision adapt_count parameter to list of arguments.
+ * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
+ (lll_unlock_elision): Update with new parameter list
+ * sysdeps/unix/sysv/linux/s390/lowlevellock.h
+ (lll_unlock_elision): Likewise
+ * sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
+ (lll_unlock_elision): Likewise
+
2016-08-17 Florian Weimer <fweimer@redhat.com>
Reduce time to expected nptl/tst-once5 failure.
@@ -700,6 +752,16 @@
2016-02-22 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
+ * sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
+ (__lll_trylock_elision): Fix setting of adapt_count.
+ * sysdeps/unix/sysv/linux/powerpc/htm.h
+ (_ABORT_PERSISTENT): Define to clarify persistent aborts.
+ (_ABORT_NESTED_TRYLOCK): Renumber, and make persistent.
+ (_ABORT_SYSCALL): Renumber, and clarify definition.
+ (_ABORT_LOCK_BUSY): Renumber, make non-persistent.
+
+2016-02-22 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
+
* sysdeps/unix/sysv/linux/powerpc/htm.h (__libc_tbegin): Remove
semicolon.
(__libc_tend): Likewise.
@@ -1042,6 +1104,25 @@
* sysdeps/hppa/dl-symaddr.c (_dl_symbol_address): Add rtld_hidden_def.
+2015-08-26 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
+
+ * sysdeps/unix/sysv/linux/powerpc/elision-lock.c
+ (__arch_compare_and_exchange_val_32_acq): Remove and use common
+ definition. ISA 2.07B no longer requires full sync.
+
+2015-08-26 Paul E. Murphy <murphyp@linux.vnet.ibm.com>
+
+ * sysdeps/powerpc/powerpc32/sysdep.h (ABORT_TRANSACTION): Use
+ register other than r0 for tabort, it has special meaning.
+ * sysdeps/powerpc/powerpc64/sysdep.h (ABORT_TRANSACTION): Likewise
+ * sysdeps/unix.sysv/linux/powerpc/syscall.S (syscall): Abort
+ transaction before starting syscall.
+
+2015-08-06 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+
+ * elf/get-dynamic-info.h (elf_get_dynamic_info): Remove assert
+ if DT_RUNPATH and DT_RPATH flags are found in ld.so.
+
2015-08-05 Zack Weinberg <zackw@panix.com>
* misc/regexp.h: Update comments.
diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h
index dc8359d36a..529ed6c178 100644
--- a/elf/get-dynamic-info.h
+++ b/elf/get-dynamic-info.h
@@ -138,9 +138,6 @@ elf_get_dynamic_info (struct link_map *l, ElfW(Dyn) *temp)
|| (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0);
assert (info[DT_FLAGS] == NULL
|| (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0);
- /* Flags must not be set for ld.so. */
- assert (info[DT_RUNPATH] == NULL);
- assert (info[DT_RPATH] == NULL);
#else
if (info[DT_FLAGS] != NULL)
{
diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c
index 80939ba8b0..9e864c1832 100644
--- a/nptl/pthread_mutex_unlock.c
+++ b/nptl/pthread_mutex_unlock.c
@@ -24,7 +24,7 @@
#include <stap-probe.h>
#ifndef lll_unlock_elision
-#define lll_unlock_elision(a,b) ({ lll_unlock (a,b); 0; })
+#define lll_unlock_elision(a,b,c) ({ lll_unlock (a,c); 0; })
#endif
static int
@@ -63,7 +63,7 @@ __pthread_mutex_unlock_usercnt (mutex, decr)
else if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_ELISION_NP))
{
/* Don't reset the owner/users fields for elision. */
- return lll_unlock_elision (mutex->__data.__lock,
+ return lll_unlock_elision (mutex->__data.__lock, mutex->__data.__elision,
PTHREAD_MUTEX_PSHARED (mutex));
}
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index 5a78b0baf1..3a47163837 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -1574,8 +1574,10 @@ ildouble: 1
ldouble: 1
Function: "exp_upward":
+float: 1
double: 1
idouble: 1
+ifloat: 1
ildouble: 1
ldouble: 1
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
index 0e3eac7190..add1fb85f0 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
@@ -24,7 +24,9 @@ libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \
s_modff-power5+ s_modff-ppc64 e_hypot-ppc64 \
e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \
s_isnan-power8 s_isinf-power8 s_finite-power8 \
- s_llrint-power8 s_llround-power8
+ s_llrint-power8 s_llround-power8 \
+ e_expf-power8 e_expf-ppc64 \
+ s_sinf-ppc64 s_sinf-power8
CFLAGS-s_logbf-power7.c = -mcpu=power7
CFLAGS-s_logbl-power7.c = -mcpu=power7
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S
new file mode 100644
index 0000000000..02eff243e1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S
@@ -0,0 +1,26 @@
+/* __ieee754_expf() POWER8 version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#undef strong_alias
+#define strong_alias(a, b)
+
+#define __ieee754_expf __ieee754_expf_power8
+
+#include <sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c
new file mode 100644
index 0000000000..40f9e3a8fa
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c
@@ -0,0 +1,24 @@
+/* __ieee_expf() PowerPC64 version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#undef strong_alias
+#define strong_alias(a, b)
+
+#define __ieee754_expf __ieee754_expf_ppc64
+
+#include <sysdeps/ieee754/flt-32/e_expf.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c
new file mode 100644
index 0000000000..1d9a8c69a0
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of ieee754_expf.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include <math_ldbl_opt.h>
+#include "init-arch.h"
+
+extern __typeof (__ieee754_expf) __ieee754_expf_ppc64 attribute_hidden;
+extern __typeof (__ieee754_expf) __ieee754_expf_power8 attribute_hidden;
+
+libc_ifunc (__ieee754_expf,
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __ieee754_expf_power8
+ : __ieee754_expf_ppc64);
+
+strong_alias (__ieee754_expf, __expf_finite)
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S
new file mode 100644
index 0000000000..579019c66d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S
@@ -0,0 +1,26 @@
+/* sinf(). PowerPC64/POWER8 version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#undef weak_alias
+#define weak_alias(a, b)
+
+#define __sinf __sinf_power8
+
+#include <sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c
new file mode 100644
index 0000000000..eaf83fa427
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c
@@ -0,0 +1,26 @@
+/* sinf(). PowerPC64 default version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#undef weak_alias
+#define weak_alias(a, b)
+
+#define __sinf __sinf_ppc64
+
+#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c
new file mode 100644
index 0000000000..4269d58d32
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of sinf.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include <shlib-compat.h>
+#include "init-arch.h"
+
+extern __typeof (__sinf) __sinf_ppc64 attribute_hidden;
+extern __typeof (__sinf) __sinf_power8 attribute_hidden;
+
+libc_ifunc (__sinf,
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __sinf_power8
+ : __sinf_ppc64);
+
+weak_alias (__sinf, sinf)
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S b/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S
new file mode 100644
index 0000000000..a5e68bbf7c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S
@@ -0,0 +1,303 @@
+/* Optimized expf(). PowerPC64/POWER8 version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Short algorithm description:
+ *
+ * Let K = 64 (table size).
+ * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y))
+ * where:
+ * x = m*log(2)/K + y, y in [0.0..log(2)/K]
+ * m = n*K + j, m,n,j - signed integer, j in [0..K-1]
+ * values of 2^(j/K) are tabulated as T[j].
+ *
+ * P(y) is a minimax polynomial approximation of expf(y)-1
+ * on small interval [0.0..log(2)/K].
+ *
+ * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as
+ * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y
+ *
+ * Special cases:
+ * expf(NaN) = NaN
+ * expf(+INF) = +INF
+ * expf(-INF) = 0
+ * expf(x) = 1 for subnormals
+ * for finite argument, only expf(0)=1 is exact
+ * expf(x) overflows if x>88.7228317260742190
+ * expf(x) underflows if x<-103.972076416015620
+ */
+
+#define C1 0x42ad496b /* Single precision 125*log(2). */
+#define C2 0x31800000 /* Single precision 2^(-28). */
+#define SP_INF 0x7f800000 /* Single precision Inf. */
+#define SP_EXP_BIAS 0x1fc0 /* Single precision exponent bias. */
+
+#define DATA_OFFSET r9
+
+/* Implements the function
+
+ float [fp1] expf (float [fp1] x) */
+
+ .machine power8
+EALIGN(__ieee754_expf, 4, 0)
+ addis DATA_OFFSET,r2,.Lanchor@toc@ha
+ addi DATA_OFFSET,DATA_OFFSET,.Lanchor@toc@l
+
+ xscvdpspn v0,v1
+ mfvsrd r8,v0 /* r8 = x */
+ lfd fp2,(.KLN2-.Lanchor)(DATA_OFFSET)
+ lfd fp3,(.P2-.Lanchor)(DATA_OFFSET)
+ rldicl r3,r8,32,33 /* r3 = |x| */
+ lis r4,C1@ha /* r4 = 125*log(2) */
+ ori r4,r4,C1@l
+ cmpw r3,r4
+ lfd fp5,(.P3-.Lanchor)(DATA_OFFSET)
+ lfd fp4,(.RS-.Lanchor)(DATA_OFFSET)
+ fmadd fp2,fp1,fp2,fp4 /* fp2 = x * K/log(2) + (2^23 + 2^22) */
+ bge L(special_paths) /* |x| >= 125*log(2) ? */
+
+ lis r4,C2@ha
+ ori r4,r4,C2@l
+ cmpw r3,r4
+ blt L(small_args) /* |x| < 2^(-28) ? */
+
+ /* Main path: here if 2^(-28) <= |x| < 125*log(2) */
+ frsp fp6,fp2
+ xscvdpsp v2,v2
+ mfvsrd r8,v2
+ mr r3,r8 /* r3 = m */
+ rldicl r8,r8,32,58 /* r8 = j */
+ lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET)
+ fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */
+ srdi r3,r3,32
+ clrrwi r3,r3,6 /* r3 = n */
+ lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET)
+ fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */
+ fmul fp2,fp0,fp0 /* fp2 = z = y^2 */
+ lfd fp4,(.P1-.Lanchor)(DATA_OFFSET)
+ lfd fp6,(.P0-.Lanchor)(DATA_OFFSET)
+ lis r4,SP_EXP_BIAS@ha
+ ori r4,r4,SP_EXP_BIAS@l
+ add r3,r3,r4
+ rldic r3,r3,49,1 /* r3 = 2^n */
+ fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */
+ fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */
+ mtvsrd v1,r3
+ xscvspdp v1,v1
+ fmul fp4,fp4,fp2 /* fp4 = (P3 * z + P1)*z */
+ fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */
+ sldi r8,r8,3 /* Access doublewords from T[j]. */
+ addi r6,DATA_OFFSET,(.Ttable-.Lanchor)
+ lfdx fp3,r6,r8
+ fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + P(y)) */
+ fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + P(y)) */
+ frsp fp1,fp1
+ blr
+
+ .align 4
+/* x is either underflow, overflow, infinite or NaN. */
+L(special_paths):
+ srdi r8,r8,32
+ rlwinm r8,r8,3,29,29 /* r8 = 0, if x positive.
+ r8 = 4, otherwise. */
+ addi r6,DATA_OFFSET,(.SPRANGE-.Lanchor)
+ lwzx r4,r6,r8 /* r4 = .SPRANGE[signbit(x)] */
+ cmpw r3,r4
+ /* |x| <= .SPRANGE[signbit(x)] */
+ ble L(near_under_or_overflow)
+
+ lis r4,SP_INF@ha
+ ori r4,r4,SP_INF@l
+ cmpw r3,r4
+ bge L(arg_inf_or_nan) /* |x| > Infinite ? */
+
+ addi r6,DATA_OFFSET,(.SPLARGE_SMALL-.Lanchor)
+ lfsx fp1,r6,r8
+ fmuls fp1,fp1,fp1
+ blr
+
+
+ .align 4
+L(small_args):
+ /* expf(x) = 1.0, where |x| < |2^(-28)| */
+ lfs fp2,(.SPone-.Lanchor)(DATA_OFFSET)
+ fadds fp1,fp1,fp2
+ blr
+
+
+ .align 4
+L(arg_inf_or_nan:)
+ bne L(arg_nan)
+
+ /* expf(+INF) = +INF
+ expf(-INF) = 0 */
+ addi r6,DATA_OFFSET,(.INF_ZERO-.Lanchor)
+ lfsx fp1,r6,r8
+ blr
+
+
+ .align 4
+L(arg_nan):
+ /* expf(NaN) = NaN */
+ fadd fp1,fp1,fp1
+ frsp fp1,fp1
+ blr
+
+ .align 4
+L(near_under_or_overflow):
+ frsp fp6,fp2
+ xscvdpsp v2,v2
+ mfvsrd r8,v2
+ mr r3,r8 /* r3 = m */
+ rldicl r8,r8,32,58 /* r8 = j */
+ lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET)
+ fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */
+ srdi r3,r3,32
+ clrrwi r3,r3,6 /* r3 = n */
+ lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET)
+ fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */
+ fmul fp2,fp0,fp0 /* fp2 = z = y^2 */
+ lfd fp4,(.P1-.Lanchor)(DATA_OFFSET)
+ lfd fp6,(.P0-.Lanchor)(DATA_OFFSET)
+ ld r4,(.DP_EXP_BIAS-.Lanchor)(DATA_OFFSET)
+ add r3,r3,r4
+ rldic r3,r3,46,1 /* r3 = 2 */
+ fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */
+ fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */
+ mtvsrd v1,r3
+ fmul fp4,fp4,fp2 /* fp4 = (P3*z + P1)*z */
+ fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */
+ sldi r8,r8,3 /* Access doublewords from T[j]. */
+ addi r6,DATA_OFFSET,(.Ttable-.Lanchor)
+ lfdx fp3,r6,r8
+ fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + T[j]) */
+ fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + T[j]) */
+ frsp fp1,fp1
+ blr
+END(__ieee754_expf)
+
+ .section .rodata, "a",@progbits
+.Lanchor:
+ .balign 8
+/* Table T[j] = 2^(j/K). Double precision. */
+.Ttable:
+ .8byte 0x3ff0000000000000
+ .8byte 0x3ff02c9a3e778061
+ .8byte 0x3ff059b0d3158574
+ .8byte 0x3ff0874518759bc8
+ .8byte 0x3ff0b5586cf9890f
+ .8byte 0x3ff0e3ec32d3d1a2
+ .8byte 0x3ff11301d0125b51
+ .8byte 0x3ff1429aaea92de0
+ .8byte 0x3ff172b83c7d517b
+ .8byte 0x3ff1a35beb6fcb75
+ .8byte 0x3ff1d4873168b9aa
+ .8byte 0x3ff2063b88628cd6
+ .8byte 0x3ff2387a6e756238
+ .8byte 0x3ff26b4565e27cdd
+ .8byte 0x3ff29e9df51fdee1
+ .8byte 0x3ff2d285a6e4030b
+ .8byte 0x3ff306fe0a31b715
+ .8byte 0x3ff33c08b26416ff
+ .8byte 0x3ff371a7373aa9cb
+ .8byte 0x3ff3a7db34e59ff7
+ .8byte 0x3ff3dea64c123422
+ .8byte 0x3ff4160a21f72e2a
+ .8byte 0x3ff44e086061892d
+ .8byte 0x3ff486a2b5c13cd0
+ .8byte 0x3ff4bfdad5362a27
+ .8byte 0x3ff4f9b2769d2ca7
+ .8byte 0x3ff5342b569d4f82
+ .8byte 0x3ff56f4736b527da
+ .8byte 0x3ff5ab07dd485429
+ .8byte 0x3ff5e76f15ad2148
+ .8byte 0x3ff6247eb03a5585
+ .8byte 0x3ff6623882552225
+ .8byte 0x3ff6a09e667f3bcd
+ .8byte 0x3ff6dfb23c651a2f
+ .8byte 0x3ff71f75e8ec5f74
+ .8byte 0x3ff75feb564267c9
+ .8byte 0x3ff7a11473eb0187
+ .8byte 0x3ff7e2f336cf4e62
+ .8byte 0x3ff82589994cce13
+ .8byte 0x3ff868d99b4492ed
+ .8byte 0x3ff8ace5422aa0db
+ .8byte 0x3ff8f1ae99157736
+ .8byte 0x3ff93737b0cdc5e5
+ .8byte 0x3ff97d829fde4e50
+ .8byte 0x3ff9c49182a3f090
+ .8byte 0x3ffa0c667b5de565
+ .8byte 0x3ffa5503b23e255d
+ .8byte 0x3ffa9e6b5579fdbf
+ .8byte 0x3ffae89f995ad3ad
+ .8byte 0x3ffb33a2b84f15fb
+ .8byte 0x3ffb7f76f2fb5e47
+ .8byte 0x3ffbcc1e904bc1d2
+ .8byte 0x3ffc199bdd85529c
+ .8byte 0x3ffc67f12e57d14b
+ .8byte 0x3ffcb720dcef9069
+ .8byte 0x3ffd072d4a07897c
+ .8byte 0x3ffd5818dcfba487
+ .8byte 0x3ffda9e603db3285
+ .8byte 0x3ffdfc97337b9b5f
+ .8byte 0x3ffe502ee78b3ff6
+ .8byte 0x3ffea4afa2a490da
+ .8byte 0x3ffefa1bee615a27
+ .8byte 0x3fff50765b6e4540
+ .8byte 0x3fffa7c1819e90d8
+
+.KLN2:
+ .8byte 0x40571547652b82fe /* Double precision K/log(2). */
+
+/* Double precision polynomial coefficients. */
+.P0:
+ .8byte 0x3fefffffffffe7c6
+.P1:
+ .8byte 0x3fe00000008d6118
+.P2:
+ .8byte 0x3fc55550da752d4f
+.P3:
+ .8byte 0x3fa56420eb78fa85
+
+.RS:
+ .8byte 0x4168000000000000 /* Double precision 2^23 + 2^22. */
+.NLN2K:
+ .8byte 0xbf862e42fefa39ef /* Double precision -log(2)/K. */
+.DP_EXP_BIAS:
+ .8byte 0x000000000000ffc0 /* Double precision exponent bias. */
+
+ .balign 4
+.SPone:
+ .4byte 0x3f800000 /* Single precision 1.0. */
+.SP_RS:
+ .4byte 0x4b400000 /* Single precision 2^23 + 2^22. */
+
+.SPRANGE: /* Single precision overflow/underflow bounds. */
+ .4byte 0x42b17217 /* if x>this bound, then result overflows. */
+ .4byte 0x42cff1b4 /* if x<this bound, then result underflows. */
+
+.SPLARGE_SMALL:
+ .4byte 0x71800000 /* 2^100. */
+ .4byte 0x0d800000 /* 2^-100. */
+
+.INF_ZERO:
+ .4byte 0x7f800000 /* Single precision Inf. */
+ .4byte 0 /* Single precision zero. */
+
+strong_alias (__ieee754_expf, __expf_finite)
diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S
new file mode 100644
index 0000000000..3b8f5af292
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S
@@ -0,0 +1,519 @@
+/* Optimized sinf(). PowerPC64/POWER8 version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#define _ERRNO_H 1
+#include <bits/errno.h>
+
+#define FRAMESIZE (FRAME_MIN_SIZE+16)
+
+#define FLOAT_EXPONENT_SHIFT 23
+#define FLOAT_EXPONENT_BIAS 127
+#define INTEGER_BITS 3
+
+#define PI_4 0x3f490fdb /* PI/4 */
+#define NINEPI_4 0x40e231d6 /* 9 * PI/4 */
+#define TWO_PN5 0x3d000000 /* 2^-5 */
+#define TWO_PN27 0x32000000 /* 2^-27 */
+#define INFINITY 0x7f800000
+#define TWO_P23 0x4b000000 /* 2^27 */
+#define FX_FRACTION_1_28 0x9249250 /* 0x100000000 / 28 + 1 */
+
+ /* Implements the function
+
+ float [fp1] sinf (float [fp1] x) */
+
+ .machine power8
+EALIGN(__sinf, 4, 0)
+ addis r9,r2,L(anchor)@toc@ha
+ addi r9,r9,L(anchor)@toc@l
+
+ lis r4,PI_4@h
+ ori r4,r4,PI_4@l
+
+ xscvdpspn v0,v1
+ mfvsrd r8,v0
+ rldicl r3,r8,32,33 /* Remove sign bit. */
+
+ cmpw r3,r4
+ bge L(greater_or_equal_pio4)
+
+ lis r4,TWO_PN5@h
+ ori r4,r4,TWO_PN5@l
+
+ cmpw r3,r4
+ blt L(less_2pn5)
+
+ /* Chebyshev polynomial of the form:
+ * x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */
+
+ lfd fp9,(L(S0)-L(anchor))(r9)
+ lfd fp10,(L(S1)-L(anchor))(r9)
+ lfd fp11,(L(S2)-L(anchor))(r9)
+ lfd fp12,(L(S3)-L(anchor))(r9)
+ lfd fp13,(L(S4)-L(anchor))(r9)
+
+ fmul fp2,fp1,fp1 /* x^2 */
+ fmul fp3,fp2,fp1 /* x^3 */
+
+ fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */
+ fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */
+ fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */
+ fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */
+ fmadd fp1,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */
+ frsp fp1,fp1 /* Round to single precision. */
+
+ blr
+
+ .balign 16
+L(greater_or_equal_pio4):
+ lis r4,NINEPI_4@h
+ ori r4,r4,NINEPI_4@l
+ cmpw r3,r4
+ bge L(greater_or_equal_9pio4)
+
+ /* Calculate quotient of |x|/(PI/4). */
+ lfd fp2,(L(invpio4)-L(anchor))(r9)
+ fabs fp1,fp1 /* |x| */
+ fmul fp2,fp1,fp2 /* |x|/(PI/4) */
+ fctiduz fp2,fp2
+ mfvsrd r3,v2 /* n = |x| mod PI/4 */
+
+ /* Now use that quotient to find |x| mod (PI/2). */
+ addi r7,r3,1
+ rldicr r5,r7,2,60 /* ((n+1) >> 1) << 3 */
+ addi r6,r9,(L(pio2_table)-L(anchor))
+ lfdx fp4,r5,r6
+ fsub fp1,fp1,fp4
+
+ .balign 16
+L(reduced):
+ /* Now we are in the range -PI/4 to PI/4. */
+
+ /* Work out if we are in a positive or negative primary interval. */
+ rldicl r4,r7,62,63 /* ((n+1) >> 2) & 1 */
+
+ /* We are operating on |x|, so we need to add back the original
+ sign. */
+ rldicl r8,r8,33,63 /* (x >> 31) & 1, ie the sign bit. */
+ xor r4,r4,r8 /* 0 if result should be positive,
+ 1 if negative. */
+
+ /* Load a 1.0 or -1.0. */
+ addi r5,r9,(L(ones)-L(anchor))
+ sldi r4,r4,3
+ lfdx fp0,r4,r5
+
+ /* Are we in the primary interval of sin or cos? */
+ andi. r4,r7,0x2
+ bne L(cos)
+
+ /* Chebyshev polynomial of the form:
+ x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */
+
+ lfd fp9,(L(S0)-L(anchor))(r9)
+ lfd fp10,(L(S1)-L(anchor))(r9)
+ lfd fp11,(L(S2)-L(anchor))(r9)
+ lfd fp12,(L(S3)-L(anchor))(r9)
+ lfd fp13,(L(S4)-L(anchor))(r9)
+
+ fmul fp2,fp1,fp1 /* x^2 */
+ fmul fp3,fp2,fp1 /* x^3 */
+
+ fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */
+ fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */
+ fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */
+ fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */
+ fmadd fp4,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */
+ fmul fp4,fp4,fp0 /* Add in the sign. */
+ frsp fp1,fp4 /* Round to single precision. */
+
+ blr
+
+ .balign 16
+L(cos):
+ /* Chebyshev polynomial of the form:
+ 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */
+
+ lfd fp9,(L(C0)-L(anchor))(r9)
+ lfd fp10,(L(C1)-L(anchor))(r9)
+ lfd fp11,(L(C2)-L(anchor))(r9)
+ lfd fp12,(L(C3)-L(anchor))(r9)
+ lfd fp13,(L(C4)-L(anchor))(r9)
+
+ fmul fp2,fp1,fp1 /* x^2 */
+ lfd fp3,(L(DPone)-L(anchor))(r9)
+
+ fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */
+ fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */
+ fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */
+ fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */
+ fmadd fp4,fp2,fp4,fp3 /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */
+ fmul fp4,fp4,fp0 /* Add in the sign. */
+ frsp fp1,fp4 /* Round to single precision. */
+
+ blr
+
+ .balign 16
+L(greater_or_equal_9pio4):
+ lis r4,INFINITY@h
+ ori r4,r4,INFINITY@l
+ cmpw r3,r4
+ bge L(inf_or_nan)
+
+ lis r4,TWO_P23@h
+ ori r4,r4,TWO_P23@l
+ cmpw r3,r4
+ bge L(greater_or_equal_2p23)
+
+ fabs fp1,fp1 /* |x| */
+
+ /* Calculate quotient of |x|/(PI/4). */
+ lfd fp2,(L(invpio4)-L(anchor))(r9)
+
+ lfd fp3,(L(DPone)-L(anchor))(r9)
+ lfd fp4,(L(DPhalf)-L(anchor))(r9)
+ fmul fp2,fp1,fp2 /* |x|/(PI/4) */
+ friz fp2,fp2 /* n = floor(|x|/(PI/4)) */
+
+ /* Calculate (n + 1) / 2. */
+ fadd fp2,fp2,fp3 /* n + 1 */
+ fmul fp3,fp2,fp4 /* (n + 1) / 2 */
+ friz fp3,fp3
+
+ lfd fp4,(L(pio2hi)-L(anchor))(r9)
+ lfd fp5,(L(pio2lo)-L(anchor))(r9)
+
+ fmul fp6,fp4,fp3
+ fadd fp6,fp6,fp1
+ fmadd fp1,fp5,fp3,fp6
+
+ fctiduz fp2,fp2
+ mfvsrd r7,v2 /* n + 1 */
+
+ b L(reduced)
+
+ .balign 16
+L(inf_or_nan):
+ bne L(skip_errno_setting) /* Is a NAN? */
+
+ /* We delayed the creation of the stack frame, as well as the saving of
+ the link register, because only at this point, we are sure that
+ doing so is actually needed. */
+
+ stfd fp1,-8(r1)
+
+ /* Save the link register. */
+ mflr r0
+ std r0,16(r1)
+ cfi_offset(lr, 16)
+
+ /* Create the stack frame. */
+ stdu r1,-FRAMESIZE(r1)
+ cfi_adjust_cfa_offset(FRAMESIZE)
+
+ bl JUMPTARGET(__errno_location)
+ nop
+
+ /* Restore the stack frame. */
+ addi r1,r1,FRAMESIZE
+ cfi_adjust_cfa_offset(-FRAMESIZE)
+ /* Restore the link register. */
+ ld r0,16(r1)
+ mtlr r0
+
+ lfd fp1,-8(r1)
+
+ /* errno = EDOM */
+ li r4,EDOM
+ stw r4,0(r3)
+
+L(skip_errno_setting):
+ fsub fp1,fp1,fp1 /* x - x */
+ blr
+
+ .balign 16
+L(greater_or_equal_2p23):
+ fabs fp1,fp1
+
+ srwi r4,r3,FLOAT_EXPONENT_SHIFT
+ subi r4,r4,FLOAT_EXPONENT_BIAS
+
+ /* We reduce the input modulo pi/4, so we need 3 bits of integer
+ to determine where in 2*pi we are. Index into our array
+ accordingly. */
+ addi r4,r4,INTEGER_BITS
+
+ /* To avoid an expensive divide, for the range we care about (0 - 127)
+ we can transform x/28 into:
+
+ x/28 = (x * ((0x100000000 / 28) + 1)) >> 32
+
+ mulhwu returns the top 32 bits of the 64 bit result, doing the
+ shift for us in the same instruction. The top 32 bits are undefined,
+ so we have to mask them. */
+
+ lis r6,FX_FRACTION_1_28@h
+ ori r6,r6,FX_FRACTION_1_28@l
+ mulhwu r5,r4,r6
+ clrldi r5,r5,32
+
+ /* Get our pointer into the invpio4_table array. */
+ sldi r4,r5,3
+ addi r6,r9,(L(invpio4_table)-L(anchor))
+ add r4,r4,r6
+
+ lfd fp2,0(r4)
+ lfd fp3,8(r4)
+ lfd fp4,16(r4)
+ lfd fp5,24(r4)
+
+ fmul fp6,fp2,fp1
+ fmul fp7,fp3,fp1
+ fmul fp8,fp4,fp1
+ fmul fp9,fp5,fp1
+
+ /* Mask off larger integer bits in highest double word that we don't
+ care about to avoid losing precision when combining with smaller
+ values. */
+ fctiduz fp10,fp6
+ mfvsrd r7,v10
+ rldicr r7,r7,0,(63-INTEGER_BITS)
+ mtvsrd v10,r7
+ fcfidu fp10,fp10 /* Integer bits. */
+
+ fsub fp6,fp6,fp10 /* highest -= integer bits */
+
+ /* Work out the integer component, rounded down. Use the top two
+ limbs for this. */
+ fadd fp10,fp6,fp7 /* highest + higher */
+
+ fctiduz fp10,fp10
+ mfvsrd r7,v10
+ andi. r0,r7,1
+ fcfidu fp10,fp10
+
+ /* Subtract integer component from highest limb. */
+ fsub fp12,fp6,fp10
+
+ beq L(even_integer)
+
+ /* Our integer component is odd, so we are in the -PI/4 to 0 primary
+ region. We need to shift our result down by PI/4, and to do this
+ in the mod (4/PI) space we simply subtract 1. */
+ lfd fp11,(L(DPone)-L(anchor))(r9)
+ fsub fp12,fp12,fp11
+
+ /* Now add up all the limbs in order. */
+ fadd fp12,fp12,fp7
+ fadd fp12,fp12,fp8
+ fadd fp12,fp12,fp9
+
+ /* And finally multiply by pi/4. */
+ lfd fp13,(L(pio4)-L(anchor))(r9)
+ fmul fp1,fp12,fp13
+
+ addi r7,r7,1
+ b L(reduced)
+
+L(even_integer):
+ lfd fp11,(L(DPone)-L(anchor))(r9)
+
+ /* Now add up all the limbs in order. */
+ fadd fp12,fp12,fp7
+ fadd fp12,r12,fp8
+ fadd fp12,r12,fp9
+
+ /* We need to check if the addition of all the limbs resulted in us
+ overflowing 1.0. */
+ fcmpu 0,fp12,fp11
+ bgt L(greater_than_one)
+
+ /* And finally multiply by pi/4. */
+ lfd fp13,(L(pio4)-L(anchor))(r9)
+ fmul fp1,fp12,fp13
+
+ addi r7,r7,1
+ b L(reduced)
+
+L(greater_than_one):
+ /* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our
+ integer, and subtract 1.0 from our result. Since that makes the
+ integer component odd, we need to subtract another 1.0 as
+ explained above. */
+ addi r7,r7,1
+
+ lfd fp11,(L(DPtwo)-L(anchor))(r9)
+ fsub fp12,fp12,fp11
+
+ /* And finally multiply by pi/4. */
+ lfd fp13,(L(pio4)-L(anchor))(r9)
+ fmul fp1,fp12,fp13
+
+ addi r7,r7,1
+ b L(reduced)
+
+ .balign 16
+L(less_2pn5):
+ lis r4,TWO_PN27@h
+ ori r4,r4,TWO_PN27@l
+
+ cmpw r3,r4
+ blt L(less_2pn27)
+
+ /* A simpler Chebyshev approximation is close enough for this range:
+ x+x^3*(SS0+x^2*SS1). */
+
+ lfd fp10,(L(SS0)-L(anchor))(r9)
+ lfd fp11,(L(SS1)-L(anchor))(r9)
+
+ fmul fp2,fp1,fp1 /* x^2 */
+ fmul fp3,fp2,fp1 /* x^3 */
+
+ fmadd fp4,fp2,fp11,fp10 /* SS0+x^2*SS1 */
+ fmadd fp1,fp3,fp4,fp1 /* x+x^3*(SS0+x^2*SS1) */
+
+ frsp fp1,fp1 /* Round to single precision. */
+
+ blr
+
+ .balign 16
+L(less_2pn27):
+ cmpwi r3,0
+ beq L(zero)
+
+ /* Handle some special cases:
+
+ sinf(subnormal) raises inexact/underflow
+ sinf(min_normalized) raises inexact/underflow
+ sinf(normalized) raises inexact. */
+
+ lfd fp2,(L(small)-L(anchor))(r9)
+
+ fmul fp2,fp1,fp2 /* x * small */
+ fsub fp1,fp1,fp2 /* x - x * small */
+
+ frsp fp1,fp1
+
+ blr
+
+ .balign 16
+L(zero):
+ blr
+
+END (__sinf)
+
+ .section .rodata, "a"
+
+ .balign 8
+
+L(anchor):
+
+ /* Chebyshev constants for sin, range -PI/4 - PI/4. */
+L(S0): .8byte 0xbfc5555555551cd9
+L(S1): .8byte 0x3f81111110c2688b
+L(S2): .8byte 0xbf2a019f8b4bd1f9
+L(S3): .8byte 0x3ec71d7264e6b5b4
+L(S4): .8byte 0xbe5a947e1674b58a
+
+ /* Chebyshev constants for sin, range 2^-27 - 2^-5. */
+L(SS0): .8byte 0xbfc555555543d49d
+L(SS1): .8byte 0x3f8110f475cec8c5
+
+ /* Chebyshev constants for cos, range -PI/4 - PI/4. */
+L(C0): .8byte 0xbfdffffffffe98ae
+L(C1): .8byte 0x3fa55555545c50c7
+L(C2): .8byte 0xbf56c16b348b6874
+L(C3): .8byte 0x3efa00eb9ac43cc0
+L(C4): .8byte 0xbe923c97dd8844d7
+
+L(invpio2):
+ .8byte 0x3fe45f306dc9c883 /* 2/PI */
+
+L(invpio4):
+ .8byte 0x3ff45f306dc9c883 /* 4/PI */
+
+L(invpio4_table):
+ .8byte 0x0000000000000000
+ .8byte 0x3ff45f306c000000
+ .8byte 0x3e3c9c882a000000
+ .8byte 0x3c54fe13a8000000
+ .8byte 0x3aaf47d4d0000000
+ .8byte 0x38fbb81b6c000000
+ .8byte 0x3714acc9e0000000
+ .8byte 0x3560e4107c000000
+ .8byte 0x33bca2c756000000
+ .8byte 0x31fbd778ac000000
+ .8byte 0x300b7246e0000000
+ .8byte 0x2e5d2126e8000000
+ .8byte 0x2c97003248000000
+ .8byte 0x2ad77504e8000000
+ .8byte 0x290921cfe0000000
+ .8byte 0x274deb1cb0000000
+ .8byte 0x25829a73e0000000
+ .8byte 0x23fd1046be000000
+ .8byte 0x2224baed10000000
+ .8byte 0x20709d338e000000
+ .8byte 0x1e535a2f80000000
+ .8byte 0x1cef904e64000000
+ .8byte 0x1b0d639830000000
+ .8byte 0x1964ce7d24000000
+ .8byte 0x17b908bf16000000
+
+L(pio4):
+ .8byte 0x3fe921fb54442d18 /* PI/4 */
+
+/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb
+ to avoid losing significant bits when multiplying with up to
+ (2^22)/(pi/2). */
+L(pio2hi):
+ .8byte 0xbff921fb54400000
+
+L(pio2lo):
+ .8byte 0xbdd0b4611a626332
+
+L(pio2_table):
+ .8byte 0
+ .8byte 0x3ff921fb54442d18 /* 1 * PI/2 */
+ .8byte 0x400921fb54442d18 /* 2 * PI/2 */
+ .8byte 0x4012d97c7f3321d2 /* 3 * PI/2 */
+ .8byte 0x401921fb54442d18 /* 4 * PI/2 */
+ .8byte 0x401f6a7a2955385e /* 5 * PI/2 */
+ .8byte 0x4022d97c7f3321d2 /* 6 * PI/2 */
+ .8byte 0x4025fdbbe9bba775 /* 7 * PI/2 */
+ .8byte 0x402921fb54442d18 /* 8 * PI/2 */
+ .8byte 0x402c463abeccb2bb /* 9 * PI/2 */
+ .8byte 0x402f6a7a2955385e /* 10 * PI/2 */
+
+L(small):
+ .8byte 0x3cd0000000000000 /* 2^-50 */
+
+L(ones):
+ .8byte 0x3ff0000000000000 /* +1.0 */
+ .8byte 0xbff0000000000000 /* -1.0 */
+
+L(DPhalf):
+ .8byte 0x3fe0000000000000 /* 0.5 */
+
+L(DPone):
+ .8byte 0x3ff0000000000000 /* 1.0 */
+
+L(DPtwo):
+ .8byte 0x4000000000000000 /* 2.0 */
+
+weak_alias(__sinf, sinf)
diff --git a/sysdeps/unix/sysv/linux/i386/lowlevellock.h b/sysdeps/unix/sysv/linux/i386/lowlevellock.h
index 58f5638e37..b8ccd31c0f 100644
--- a/sysdeps/unix/sysv/linux/i386/lowlevellock.h
+++ b/sysdeps/unix/sysv/linux/i386/lowlevellock.h
@@ -317,7 +317,7 @@ extern int __lll_trylock_elision(int *lock, short *adapt_count)
#define lll_lock_elision(futex, adapt_count, private) \
__lll_lock_elision (&(futex), &(adapt_count), private)
-#define lll_unlock_elision(futex, private) \
+#define lll_unlock_elision(futex, adapt_count, private) \
__lll_unlock_elision (&(futex), private)
#define lll_trylock_elision(futex, adapt_count) \
__lll_trylock_elision(&(futex), &(adapt_count))
diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
index e11ad1dc21..2a0e5407dd 100644
--- a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
+++ b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
@@ -23,27 +23,6 @@
#include <elision-conf.h>
#include "htm.h"
-/* PowerISA 2.0.7 Section B.5.5 defines isync to be insufficient as a
- barrier in acquire mechanism for HTM operations, a strong 'sync' is
- required. */
-#undef __arch_compare_and_exchange_val_32_acq
-#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
- ({ \
- __typeof (*(mem)) __tmp; \
- __typeof (mem) __memp = (mem); \
- __asm __volatile ( \
- "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \
- " cmpw %0,%2\n" \
- " bne 2f\n" \
- " stwcx. %3,0,%1\n" \
- " bne- 1b\n" \
- "2: sync" \
- : "=&r" (__tmp) \
- : "b" (__memp), "r" (oldval), "r" (newval) \
- : "cr0", "memory"); \
- __tmp; \
- })
-
#if !defined(LLL_LOCK) && !defined(EXTRAARG)
/* Make sure the configuration code is always linked in for static
libraries. */
@@ -68,7 +47,6 @@ __lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared)
{
if (*adapt_count > 0)
{
- (*adapt_count)--;
goto use_lock;
}
diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
index edec155058..b391116b64 100644
--- a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
+++ b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
@@ -36,7 +36,6 @@ __lll_trylock_elision (int *futex, short *adapt_count)
/* Only try a transaction if it's worth it. */
if (*adapt_count > 0)
{
- (*adapt_count)--;
goto use_lock;
}
@@ -45,8 +44,12 @@ __lll_trylock_elision (int *futex, short *adapt_count)
if (*futex == 0)
return 0;
- /* Lock was busy. Fall back to normal locking. */
- __libc_tabort (_ABORT_LOCK_BUSY);
+ /* Lock was busy. This is never a nested transaction.
+ End it, and set the adapt count. */
+ __libc_tend (0);
+
+ if (aconf.skip_lock_busy > 0)
+ *adapt_count = aconf.skip_lock_busy;
}
else
{
@@ -58,9 +61,6 @@ __lll_trylock_elision (int *futex, short *adapt_count)
if (aconf.skip_trylock_internal_abort > 0)
*adapt_count = aconf.skip_trylock_internal_abort;
}
-
- if (aconf.skip_lock_busy > 0)
- *adapt_count = aconf.skip_lock_busy;
}
use_lock:
diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
index 7234db6e2d..4b4ae62d9f 100644
--- a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
+++ b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
@@ -21,12 +21,20 @@
#include "htm.h"
int
-__lll_unlock_elision(int *lock, int pshared)
+__lll_unlock_elision (int *lock, short *adapt_count, int pshared)
{
/* When the lock was free we're in a transaction. */
if (*lock == 0)
__libc_tend (0);
else
- lll_unlock ((*lock), pshared);
+ {
+ lll_unlock ((*lock), pshared);
+
+ /* Update the adapt count AFTER completing the critical section.
+ Doing this here prevents unneeded stalling when entering
+ a critical section. Saving about 8% runtime on P8. */
+ if (*adapt_count > 0)
+ (*adapt_count)--;
+ }
return 0;
}
diff --git a/sysdeps/unix/sysv/linux/powerpc/htm.h b/sysdeps/unix/sysv/linux/powerpc/htm.h
index 7b49817710..75c99c236b 100644
--- a/sysdeps/unix/sysv/linux/powerpc/htm.h
+++ b/sysdeps/unix/sysv/linux/powerpc/htm.h
@@ -160,10 +160,12 @@
#endif /* __ASSEMBLER__ */
-/* Definitions used for TEXASR Failure code (bits 0:6), they need to be even
- because tabort. always sets the first bit. */
-#define _ABORT_LOCK_BUSY 0x3f /* Lock already used. */
-#define _ABORT_NESTED_TRYLOCK 0x3e /* Write operation in trylock. */
-#define _ABORT_SYSCALL 0x3d /* Syscall issued. */
+/* Definitions used for TEXASR Failure code (bits 0:7). If the failure
+ should be persistent, the abort code must be odd. 0xd0 through 0xff
+ are reserved for the kernel and potential hypervisor. */
+#define _ABORT_PERSISTENT 0x01 /* An unspecified persistent abort. */
+#define _ABORT_LOCK_BUSY 0x34 /* Busy lock, not persistent. */
+#define _ABORT_NESTED_TRYLOCK (0x32 | _ABORT_PERSISTENT)
+#define _ABORT_SYSCALL (0x30 | _ABORT_PERSISTENT)
#endif
diff --git a/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
index 67db1dece2..6769c253ce 100644
--- a/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
+++ b/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
@@ -32,7 +32,7 @@ extern int __lll_timedlock_elision
extern int __lll_lock_elision (int *futex, short *adapt_count, int private)
attribute_hidden;
-extern int __lll_unlock_elision(int *lock, int private)
+extern int __lll_unlock_elision (int *lock, short *adapt_count, int private)
attribute_hidden;
extern int __lll_trylock_elision(int *lock, short *adapt_count)
@@ -40,8 +40,8 @@ extern int __lll_trylock_elision(int *lock, short *adapt_count)
#define lll_lock_elision(futex, adapt_count, private) \
__lll_lock_elision (&(futex), &(adapt_count), private)
-#define lll_unlock_elision(futex, private) \
- __lll_unlock_elision (&(futex), private)
+#define lll_unlock_elision(futex, adapt_count, private) \
+ __lll_unlock_elision (&(futex), &(adapt_count), private)
#define lll_trylock_elision(futex, adapt_count) \
__lll_trylock_elision (&(futex), &(adapt_count))
diff --git a/sysdeps/unix/sysv/linux/s390/lowlevellock.h b/sysdeps/unix/sysv/linux/s390/lowlevellock.h
index 163a731bd3..cab5f4c7a3 100644
--- a/sysdeps/unix/sysv/linux/s390/lowlevellock.h
+++ b/sysdeps/unix/sysv/linux/s390/lowlevellock.h
@@ -41,7 +41,7 @@ extern int __lll_trylock_elision(int *futex, short *adapt_count)
# define lll_lock_elision(futex, adapt_count, private) \
__lll_lock_elision (&(futex), &(adapt_count), private)
-# define lll_unlock_elision(futex, private) \
+# define lll_unlock_elision(futex, adapt_count, private) \
__lll_unlock_elision (&(futex), private)
# define lll_trylock_elision(futex, adapt_count) \
__lll_trylock_elision(&(futex), &(adapt_count))
diff --git a/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h b/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
index de525cd4c7..1fbd31e1fa 100644
--- a/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
+++ b/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
@@ -342,7 +342,7 @@ extern int __lll_trylock_elision (int *lock, short *adapt_count)
#define lll_lock_elision(futex, adapt_count, private) \
__lll_lock_elision (&(futex), &(adapt_count), private)
-#define lll_unlock_elision(futex, private) \
+#define lll_unlock_elision(futex, adapt_count, private) \
__lll_unlock_elision (&(futex), private)
#define lll_trylock_elision(futex, adapt_count) \
__lll_trylock_elision (&(futex), &(adapt_count))