aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc64/power7
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power7')
-rw-r--r--sysdeps/powerpc/powerpc64/power7/Implies2
-rw-r--r--sysdeps/powerpc/powerpc64/power7/Makefile11
-rw-r--r--sysdeps/powerpc/powerpc64/power7/add_n.S98
-rw-r--r--sysdeps/powerpc/powerpc64/power7/bcopy.c1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S70
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S69
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S68
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/memchr.S199
-rw-r--r--sysdeps/powerpc/powerpc64/power7/memcmp.S1061
-rw-r--r--sysdeps/powerpc/powerpc64/power7/memcpy.S430
-rw-r--r--sysdeps/powerpc/powerpc64/power7/memmove.S835
-rw-r--r--sysdeps/powerpc/powerpc64/power7/mempcpy.S472
-rw-r--r--sysdeps/powerpc/powerpc64/power7/memrchr.S201
-rw-r--r--sysdeps/powerpc/powerpc64/power7/memset.S399
-rw-r--r--sysdeps/powerpc/powerpc64/power7/multiarch/Implies1
-rw-r--r--sysdeps/powerpc/powerpc64/power7/rawmemchr.S115
-rw-r--r--sysdeps/powerpc/powerpc64/power7/stpncpy.S24
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strcasecmp.S126
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S5
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strchr.S230
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strchrnul.S131
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strcmp.S168
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strlen.S107
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strncmp.S227
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strncpy.S722
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strnlen.S182
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strrchr.S260
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c27
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strstr.S521
-rw-r--r--sysdeps/powerpc/powerpc64/power7/sub_n.S23
38 files changed, 0 insertions, 6793 deletions
diff --git a/sysdeps/powerpc/powerpc64/power7/Implies b/sysdeps/powerpc/powerpc64/power7/Implies
deleted file mode 100644
index 9d68f39d22..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/Implies
+++ /dev/null
@@ -1,2 +0,0 @@
-powerpc/powerpc64/power6/fpu
-powerpc/powerpc64/power6
diff --git a/sysdeps/powerpc/powerpc64/power7/Makefile b/sysdeps/powerpc/powerpc64/power7/Makefile
deleted file mode 100644
index 89a2296085..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-ifeq ($(subdir),elf)
-# Prevent the use of VSX registers and insns in _dl_start, which under -O3
-# optimization may require a TOC reference before relocations are resolved.
-CFLAGS-rtld.c += -mno-vsx
-endif
-
-ifeq ($(subdir),string)
-sysdep_routines += strstr-ppc64
-CFLAGS-strncase.c += -funroll-loops
-CFLAGS-strncase_l.c += -funroll-loops
-endif
diff --git a/sysdeps/powerpc/powerpc64/power7/add_n.S b/sysdeps/powerpc/powerpc64/power7/add_n.S
deleted file mode 100644
index 6425afbc9f..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/add_n.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/* PowerPC64 mpn_lshift -- mpn_add_n/mpn_sub_n -- mpn addition and
- subtraction.
- Copyright (C) 2003-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* cycles/limb
- * POWER7 2.18
- */
-
-#ifdef USE_AS_SUB
-# define FUNC __mpn_sub_n
-# define ADDSUBC subfe
-#else
-# define FUNC __mpn_add_n
-# define ADDSUBC adde
-#endif
-
-#define RP r3
-#define UP r4
-#define VP r5
-#define N r6
-
-EALIGN(FUNC, 5, 0)
-#ifdef USE_AS_SUB
- addic r0, r0, 0
-#else
- addic r0, r1, -1
-#endif
- andi. r7, N, 1
- beq L(bx0)
-
- ld r7, 0(UP)
- ld r9, r0(VP)
- ADDSUBC r11, r9, r7
- std r11, r0(RP)
- cmpldi N, N, 1
- beq N, L(end)
- addi UP, UP, 8
- addi VP, VP, 8
- addi RP, RP, 8
-
-L(bx0): addi r0, N, 2
- srdi r0, r0, 2
- mtctr r0
-
- andi. r7, N, 2
- bne L(mid)
-
- addi UP, UP, 16
- addi VP, VP, 16
- addi RP, RP, 16
-
- .align 5
-L(top): ld r6, -16(UP)
- ld r7, -8(UP)
- ld r8, -16(VP)
- ld r9, -8(VP)
- ADDSUBC r10, r8, N
- ADDSUBC r11, r9, r7
- std r10, -16(RP)
- std r11, -8(RP)
-L(mid): ld r6, 0(UP)
- ld r7, 8(UP)
- ld r8, 0(VP)
- ld r9, 8(VP)
- ADDSUBC r10, r8, N
- ADDSUBC r11, r9, r7
- std r10, 0(RP)
- std r11, 8(RP)
- addi UP, UP, 32
- addi VP, VP, 32
- addi RP, RP, 32
- bdnz L(top)
-
-L(end): subfe r3, r0, r0
-#ifdef USE_AS_SUB
- neg r3, r3
-#else
- addi r3, r3, 1
-#endif
- blr
-END(FUNC)
diff --git a/sysdeps/powerpc/powerpc64/power7/bcopy.c b/sysdeps/powerpc/powerpc64/power7/bcopy.c
deleted file mode 100644
index 4a6a400e7a..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/bcopy.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Implemented at memmove.S */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/Implies b/sysdeps/powerpc/powerpc64/power7/fpu/Implies
deleted file mode 100644
index 30fa17646e..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power6/fpu
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies
deleted file mode 100644
index 410d289a6d..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power6/fpu/multiarch
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
deleted file mode 100644
index 9ccc758c9e..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
+++ /dev/null
@@ -1,70 +0,0 @@
-/* finite(). PowerPC64/POWER7 version.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-/* int __finite(x) */
- .section ".toc","aw"
-.LC0: /* 1.0 */
- .tc FD_ONE[TC],0x3ff0000000000000
- .section ".text"
- .type __finite, @function
- .machine power7
-EALIGN (__finite, 4, 0)
- CALL_MCOUNT 0
- lfd fp0,.LC0@toc(r2)
- ftdiv cr7,fp1,fp0
- li r3,1
- bflr 30
-
- /* If we are here, we either have +/-INF,
- NaN or denormal. */
-
- stfd fp1,-16(r1) /* Transfer FP to GPR's. */
- ori 2,2,0 /* Force a new dispatch group. */
- lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
- (biased exponent and sign bit). */
- clrlwi r4,r4,17 /* r4 = abs(r4). */
- cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
- bltlr cr7 /* LT means finite, other non-finite. */
- li r3,0
- blr
- END (__finite)
-
-hidden_def (__finite)
-weak_alias (__finite, finite)
-
-/* It turns out that the 'double' version will also always work for
- single-precision. */
-strong_alias (__finite, __finitef)
-hidden_def (__finitef)
-weak_alias (__finitef, finitef)
-
-#if IS_IN (libm)
-# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0)
-compat_symbol (libm, __finite, __finitel, GLIBC_2_0)
-compat_symbol (libm, finite, finitel, GLIBC_2_0)
-# endif
-#else
-# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0)
-compat_symbol (libc, __finite, __finitel, GLIBC_2_0);
-compat_symbol (libc, finite, finitel, GLIBC_2_0);
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S
deleted file mode 100644
index 54bd94176d..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S
+++ /dev/null
@@ -1 +0,0 @@
-/* This function uses the same code as s_finite.S. */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
deleted file mode 100644
index 4482cddcfa..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
+++ /dev/null
@@ -1,69 +0,0 @@
-/* isinf(). PowerPC64/POWER7 version.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-/* int __isinf(x) */
- .section ".toc","aw"
-.LC0: /* 1.0 */
- .tc FD_ONE[TC],0x3ff0000000000000
- .section ".text"
- .type __isinf, @function
- .machine power7
-EALIGN (__isinf, 4, 0)
- CALL_MCOUNT 0
- lfd fp0,.LC0@toc(r2)
- ftdiv cr7,fp1,fp0
- li r3,0
- bflr 29 /* If not INF, return. */
-
- /* Either we have -INF/+INF or a denormal. */
-
- stfd fp1,-16(r1) /* Transfer FP to GPR's. */
- ori 2,2,0 /* Force a new dispatch group. */
- lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
- (biased exponent and sign bit). */
- cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
- li r3,1
- beqlr cr7 /* EQ means INF, otherwise -INF. */
- li r3,-1
- blr
- END (__isinf)
-
-hidden_def (__isinf)
-weak_alias (__isinf, isinf)
-
-/* It turns out that the 'double' version will also always work for
- single-precision. */
-strong_alias (__isinf, __isinff)
-hidden_def (__isinff)
-weak_alias (__isinff, isinff)
-
-#ifdef NO_LONG_DOUBLE
-strong_alias (__isinf, __isinfl)
-weak_alias (__isinf, isinfl)
-#endif
-
-#if !IS_IN (libm)
-# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
-compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0);
-compat_symbol (libc, isinf, isinfl, GLIBC_2_0);
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S
deleted file mode 100644
index be759e091e..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S
+++ /dev/null
@@ -1 +0,0 @@
-/* This function uses the same code as s_isinf.S. */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S
deleted file mode 100644
index 46b08a0d37..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S
+++ /dev/null
@@ -1,68 +0,0 @@
-/* isnan(). PowerPC64/POWER7 version.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-/* int __isnan(x) */
- .section ".toc","aw"
-.LC0: /* 1.0 */
- .tc FD_ONE[TC],0x3ff0000000000000
- .section ".text"
- .type __isnan, @function
- .machine power7
-EALIGN (__isnan, 4, 0)
- CALL_MCOUNT 0
- lfd fp0,.LC0@toc(r2)
- ftdiv cr7,fp1,fp0
- li r3,0
- bflr 30 /* If not NaN, finish. */
-
- stfd fp1,-16(r1) /* Transfer FP to GPR's. */
- ori 2,2,0 /* Force a new dispatch group. */
- ld r4,-16(r1) /* Load FP into GPR. */
- lis r0,0x7ff0
- sldi r0,r0,32 /* const long r0 0x7ff00000 00000000. */
- clrldi r4,r4,1 /* x = fabs(x) */
- cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */
- blelr cr7 /* LE means not NaN. */
- li r3,1 /* else return 1 */
- blr
- END (__isnan)
-
-hidden_def (__isnan)
-weak_alias (__isnan, isnan)
-
-/* It turns out that the 'double' version will also always work for
- single-precision. */
-strong_alias (__isnan, __isnanf)
-hidden_def (__isnanf)
-weak_alias (__isnanf, isnanf)
-
-#ifdef NO_LONG_DOUBLE
-strong_alias (__isnan, __isnanl)
-weak_alias (__isnan, isnanl)
-#endif
-
-#if !IS_IN (libm)
-# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
-compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0);
-compat_symbol (libc, isnan, isnanl, GLIBC_2_0);
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S
deleted file mode 100644
index b48c85e0d3..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S
+++ /dev/null
@@ -1 +0,0 @@
-/* This function uses the same code as s_isnan.S. */
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c b/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c
deleted file mode 100644
index 2599c771d9..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/powerpc/power7/fpu/s_logb.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c
deleted file mode 100644
index 7a5a8032e0..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/powerpc/power7/fpu/s_logbf.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c
deleted file mode 100644
index 524ae2c78d..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/powerpc/power7/fpu/s_logbl.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/memchr.S b/sysdeps/powerpc/powerpc64/power7/memchr.S
deleted file mode 100644
index 5e9707aa02..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/memchr.S
+++ /dev/null
@@ -1,199 +0,0 @@
-/* Optimized memchr implementation for PowerPC64/POWER7 using cmpb insn.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */
-
-#ifndef MEMCHR
-# define MEMCHR __memchr
-#endif
- .machine power7
-ENTRY (MEMCHR)
- CALL_MCOUNT 3
- dcbt 0,r3
- clrrdi r8,r3,3
- insrdi r4,r4,8,48
-
- /* Calculate the last acceptable address and check for possible
- addition overflow by using satured math:
- r7 = r3 + r5
- r7 |= -(r7 < x) */
- add r7,r3,r5
- subfc r6,r3,r7
- subfe r9,r9,r9
- extsw r6,r9
- or r7,r7,r6
-
- insrdi r4,r4,16,32
- cmpldi r5,32
- li r9, -1
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
- insrdi r4,r4,32,0
- addi r7,r7,-1
-#ifdef __LITTLE_ENDIAN__
- sld r9,r9,r6
-#else
- srd r9,r9,r6
-#endif
- ble L(small_range)
-
- ld r12,0(r8) /* Load doubleword from memory. */
- cmpb r3,r12,r4 /* Check for BYTEs in DWORD1. */
- and r3,r3,r9
- clrldi r5,r7,61 /* Byte count - 1 in last dword. */
- clrrdi r7,r7,3 /* Address of last doubleword. */
- cmpldi cr7,r3,0 /* Does r3 indicate we got a hit? */
- bne cr7,L(done)
-
- mtcrf 0x01,r8
- /* Are we now aligned to a quadword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
- bt 28,L(loop_setup)
-
- /* Handle DWORD2 of pair. */
- ldu r12,8(r8)
- cmpb r3,r12,r4
- cmpldi cr7,r3,0
- bne cr7,L(done)
-
-L(loop_setup):
- /* The last dword we want to read in the loop below is the one
- containing the last byte of the string, ie. the dword at
- (s + size - 1) & ~7, or r7. The first dword read is at
- r8 + 8, we read 2 * cnt dwords, so the last dword read will
- be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives
- cnt = (r7 - r8) / 16 */
- sub r6,r7,r8
- srdi r6,r6,4 /* Number of loop iterations. */
- mtctr r6 /* Setup the counter. */
-
- /* Main loop to look for BYTE in the string. Since
- it's a small loop (8 instructions), align it to 32-bytes. */
- .align 5
-L(loop):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the byte-checking process for bigger strings. */
- ld r12,8(r8)
- ldu r11,16(r8)
- cmpb r3,r12,r4
- cmpb r9,r11,r4
- or r6,r9,r3 /* Merge everything in one doubleword. */
- cmpldi cr7,r6,0
- bne cr7,L(found)
- bdnz L(loop)
-
- /* We may have one more dword to read. */
- cmpld r8,r7
- beqlr
-
- ldu r12,8(r8)
- cmpb r3,r12,r4
- cmpldi cr6,r3,0
- bne cr6,L(done)
- blr
-
- .align 4
-L(found):
- /* OK, one (or both) of the doublewords contains BYTE. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains BYTE. */
- cmpldi cr6,r3,0
- addi r8,r8,-8
- bne cr6,L(done)
-
- /* BYTE must be in the second doubleword. Adjust the address
- again and move the result of cmpb to r3 so we can calculate the
- pointer. */
-
- mr r3,r9
- addi r8,r8,8
-
- /* r3 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as BYTE in the original
- doubleword from the string. Use that to calculate the pointer.
- We need to make sure BYTE is *before* the end of the range. */
-L(done):
-#ifdef __LITTLE_ENDIAN__
- addi r0,r3,-1
- andc r0,r0,r3
- popcntd r0,r0 /* Count trailing zeros. */
-#else
- cntlzd r0,r3 /* Count leading zeros before the match. */
-#endif
- cmpld r8,r7 /* Are we on the last dword? */
- srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
- add r3,r8,r0
- cmpld cr7,r0,r5 /* If on the last dword, check byte offset. */
- bnelr
- blelr cr7
- li r3,0
- blr
-
- .align 4
-L(null):
- li r3,0
- blr
-
-/* Deals with size <= 32. */
- .align 4
-L(small_range):
- cmpldi r5,0
- beq L(null)
- ld r12,0(r8) /* Load word from memory. */
- cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
- and r3,r3,r9
- cmpldi cr7,r3,0
- clrldi r5,r7,61 /* Byte count - 1 in last dword. */
- clrrdi r7,r7,3 /* Address of last doubleword. */
- cmpld r8,r7 /* Are we done already? */
- bne cr7,L(done)
- beqlr
-
- ldu r12,8(r8)
- cmpb r3,r12,r4
- cmpldi cr6,r3,0
- cmpld r8,r7
- bne cr6,L(done) /* Found something. */
- beqlr /* Hit end of string (length). */
-
- ldu r12,8(r8)
- cmpb r3,r12,r4
- cmpldi cr6,r3,0
- cmpld r8,r7
- bne cr6,L(done)
- beqlr
-
- ldu r12,8(r8)
- cmpb r3,r12,r4
- cmpldi cr6,r3,0
- cmpld r8,r7
- bne cr6,L(done)
- beqlr
-
- ldu r12,8(r8)
- cmpb r3,r12,r4
- cmpldi cr6,r3,0
- bne cr6,L(done)
- blr
-
-END (MEMCHR)
-weak_alias (__memchr, memchr)
-libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/powerpc/powerpc64/power7/memcmp.S b/sysdeps/powerpc/powerpc64/power7/memcmp.S
deleted file mode 100644
index 96ce8cee25..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/memcmp.S
+++ /dev/null
@@ -1,1061 +0,0 @@
-/* Optimized memcmp implementation for POWER7/PowerPC64.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* int [r3] memcmp (const char *s1 [r3],
- const char *s2 [r4],
- size_t size [r5]) */
-#ifndef MEMCMP
-# define MEMCMP memcmp
-#endif
- .machine power7
-EALIGN (MEMCMP, 4, 0)
- CALL_MCOUNT 3
-
-#define rRTN r3
-#define rSTR1 r3 /* first string arg */
-#define rSTR2 r4 /* second string arg */
-#define rN r5 /* max string length */
-#define rWORD1 r6 /* current word in s1 */
-#define rWORD2 r7 /* current word in s2 */
-#define rWORD3 r8 /* next word in s1 */
-#define rWORD4 r9 /* next word in s2 */
-#define rWORD5 r10 /* next word in s1 */
-#define rWORD6 r11 /* next word in s2 */
-
-#define rOFF8 r20 /* 8 bytes offset. */
-#define rOFF16 r21 /* 16 bytes offset. */
-#define rOFF24 r22 /* 24 bytes offset. */
-#define rOFF32 r23 /* 24 bytes offset. */
-#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */
-#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */
-#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */
-#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */
-#define rSHR r28 /* Unaligned shift right count. */
-#define rSHL r29 /* Unaligned shift left count. */
-#define rWORD7 r30 /* next word in s1 */
-#define rWORD8 r31 /* next word in s2 */
-
-#define rWORD8SAVE (-8)
-#define rWORD7SAVE (-16)
-#define rOFF8SAVE (-24)
-#define rOFF16SAVE (-32)
-#define rOFF24SAVE (-40)
-#define rOFF32SAVE (-48)
-#define rSHRSAVE (-56)
-#define rSHLSAVE (-64)
-#define rWORD8SHIFTSAVE (-72)
-#define rWORD2SHIFTSAVE (-80)
-#define rWORD4SHIFTSAVE (-88)
-#define rWORD6SHIFTSAVE (-96)
-
-#ifdef __LITTLE_ENDIAN__
-# define LD ldbrx
-#else
-# define LD ldx
-#endif
-
- xor r0, rSTR2, rSTR1
- cmpldi cr6, rN, 0
- cmpldi cr1, rN, 12
- clrldi. r0, r0, 61
- clrldi r12, rSTR1, 61
- cmpldi cr5, r12, 0
- beq- cr6, L(zeroLength)
- dcbt 0, rSTR1
- dcbt 0, rSTR2
-/* If less than 8 bytes or not aligned, use the unaligned
- byte loop. */
- blt cr1, L(bytealigned)
- std rWORD8, rWORD8SAVE(r1)
- std rWORD7, rWORD7SAVE(r1)
- std rOFF8, rOFF8SAVE(r1)
- std rOFF16, rOFF16SAVE(r1)
- std rOFF24, rOFF24SAVE(r1)
- std rOFF32, rOFF32SAVE(r1)
- cfi_offset(rWORD8, rWORD8SAVE)
- cfi_offset(rWORD7, rWORD7SAVE)
- cfi_offset(rOFF8, rOFF8SAVE)
- cfi_offset(rOFF16, rOFF16SAVE)
- cfi_offset(rOFF24, rOFF24SAVE)
- cfi_offset(rOFF32, rOFF32SAVE)
-
- li rOFF8,8
- li rOFF16,16
- li rOFF24,24
- li rOFF32,32
-
- bne L(unaligned)
-/* At this point we know both strings have the same alignment and the
- compare length is at least 8 bytes. r12 contains the low order
- 3 bits of rSTR1 and cr5 contains the result of the logical compare
- of r12 to 0. If r12 == 0 then we are already double word
- aligned and can perform the DW aligned loop.
-
- Otherwise we know the two strings have the same alignment (but not
- yet DW). So we force the string addresses to the next lower DW
- boundary and special case this first DW using shift left to
- eliminate bits preceding the first byte. Since we want to join the
- normal (DW aligned) compare loop, starting at the second double word,
- we need to adjust the length (rN) and special case the loop
- versioning for the first DW. This ensures that the loop count is
- correct and the first DW (shifted) is in the expected register pair. */
- .align 4
-L(samealignment):
- clrrdi rSTR1, rSTR1, 3
- clrrdi rSTR2, rSTR2, 3
- beq cr5, L(DWaligned)
- add rN, rN, r12
- sldi rWORD6, r12, 3
- srdi r0, rN, 5 /* Divide by 32 */
- andi. r12, rN, 24 /* Get the DW remainder */
- LD rWORD1, 0, rSTR1
- LD rWORD2, 0, rSTR2
- cmpldi cr1, r12, 16
- cmpldi cr7, rN, 32
- clrldi rN, rN, 61
- beq L(dPs4)
- mtctr r0
- bgt cr1, L(dPs3)
- beq cr1, L(dPs2)
-
-/* Remainder is 8 */
- .align 3
-L(dsP1):
- sld rWORD5, rWORD1, rWORD6
- sld rWORD6, rWORD2, rWORD6
- cmpld cr5, rWORD5, rWORD6
- blt cr7, L(dP1x)
-/* Do something useful in this cycle since we have to branch anyway. */
- LD rWORD1, rOFF8, rSTR1
- LD rWORD2, rOFF8, rSTR2
- cmpld cr7, rWORD1, rWORD2
- b L(dP1e)
-/* Remainder is 16 */
- .align 4
-L(dPs2):
- sld rWORD5, rWORD1, rWORD6
- sld rWORD6, rWORD2, rWORD6
- cmpld cr6, rWORD5, rWORD6
- blt cr7, L(dP2x)
-/* Do something useful in this cycle since we have to branch anyway. */
- LD rWORD7, rOFF8, rSTR1
- LD rWORD8, rOFF8, rSTR2
- cmpld cr5, rWORD7, rWORD8
- b L(dP2e)
-/* Remainder is 24 */
- .align 4
-L(dPs3):
- sld rWORD3, rWORD1, rWORD6
- sld rWORD4, rWORD2, rWORD6
- cmpld cr1, rWORD3, rWORD4
- b L(dP3e)
-/* Count is a multiple of 32, remainder is 0 */
- .align 4
-L(dPs4):
- mtctr r0
- sld rWORD1, rWORD1, rWORD6
- sld rWORD2, rWORD2, rWORD6
- cmpld cr7, rWORD1, rWORD2
- b L(dP4e)
-
-/* At this point we know both strings are double word aligned and the
- compare length is at least 8 bytes. */
- .align 4
-L(DWaligned):
- andi. r12, rN, 24 /* Get the DW remainder */
- srdi r0, rN, 5 /* Divide by 32 */
- cmpldi cr1, r12, 16
- cmpldi cr7, rN, 32
- clrldi rN, rN, 61
- beq L(dP4)
- bgt cr1, L(dP3)
- beq cr1, L(dP2)
-
-/* Remainder is 8 */
- .align 4
-L(dP1):
- mtctr r0
-/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
- (8-15 byte compare), we want to use only volatile registers. This
- means we can avoid restoring non-volatile registers since we did not
- change any on the early exit path. The key here is the non-early
- exit path only cares about the condition code (cr5), not about which
- register pair was used. */
- LD rWORD5, 0, rSTR1
- LD rWORD6, 0, rSTR2
- cmpld cr5, rWORD5, rWORD6
- blt cr7, L(dP1x)
- LD rWORD1, rOFF8, rSTR1
- LD rWORD2, rOFF8, rSTR2
- cmpld cr7, rWORD1, rWORD2
-L(dP1e):
- LD rWORD3, rOFF16, rSTR1
- LD rWORD4, rOFF16, rSTR2
- cmpld cr1, rWORD3, rWORD4
- LD rWORD5, rOFF24, rSTR1
- LD rWORD6, rOFF24, rSTR2
- cmpld cr6, rWORD5, rWORD6
- bne cr5, L(dLcr5x)
- bne cr7, L(dLcr7x)
-
- LD rWORD7, rOFF32, rSTR1
- LD rWORD8, rOFF32, rSTR2
- addi rSTR1, rSTR1, 32
- addi rSTR2, rSTR2, 32
- bne cr1, L(dLcr1)
- cmpld cr5, rWORD7, rWORD8
- bdnz L(dLoop)
- bne cr6, L(dLcr6)
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
- .align 3
-L(dP1x):
- sldi. r12, rN, 3
- bne cr5, L(dLcr5x)
- subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
- bne L(d00)
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 0
- blr
-
-/* Remainder is 16 */
- .align 4
-L(dP2):
- mtctr r0
- LD rWORD5, 0, rSTR1
- LD rWORD6, 0, rSTR2
- cmpld cr6, rWORD5, rWORD6
- blt cr7, L(dP2x)
- LD rWORD7, rOFF8, rSTR1
- LD rWORD8, rOFF8, rSTR2
- cmpld cr5, rWORD7, rWORD8
-L(dP2e):
- LD rWORD1, rOFF16, rSTR1
- LD rWORD2, rOFF16, rSTR2
- cmpld cr7, rWORD1, rWORD2
- LD rWORD3, rOFF24, rSTR1
- LD rWORD4, rOFF24, rSTR2
- cmpld cr1, rWORD3, rWORD4
- addi rSTR1, rSTR1, 8
- addi rSTR2, rSTR2, 8
- bne cr6, L(dLcr6)
- bne cr5, L(dLcr5)
- b L(dLoop2)
- .align 4
-L(dP2x):
- LD rWORD3, rOFF8, rSTR1
- LD rWORD4, rOFF8, rSTR2
- cmpld cr1, rWORD3, rWORD4
- sldi. r12, rN, 3
- bne cr6, L(dLcr6x)
- addi rSTR1, rSTR1, 8
- addi rSTR2, rSTR2, 8
- bne cr1, L(dLcr1x)
- subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
- bne L(d00)
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 0
- blr
-
-/* Remainder is 24 */
- .align 4
-L(dP3):
- mtctr r0
- LD rWORD3, 0, rSTR1
- LD rWORD4, 0, rSTR2
- cmpld cr1, rWORD3, rWORD4
-L(dP3e):
- LD rWORD5, rOFF8, rSTR1
- LD rWORD6, rOFF8, rSTR2
- cmpld cr6, rWORD5, rWORD6
- blt cr7, L(dP3x)
- LD rWORD7, rOFF16, rSTR1
- LD rWORD8, rOFF16, rSTR2
- cmpld cr5, rWORD7, rWORD8
- LD rWORD1, rOFF24, rSTR1
- LD rWORD2, rOFF24, rSTR2
- cmpld cr7, rWORD1, rWORD2
- addi rSTR1, rSTR1, 16
- addi rSTR2, rSTR2, 16
- bne cr1, L(dLcr1)
- bne cr6, L(dLcr6)
- b L(dLoop1)
-/* Again we are on a early exit path (24-31 byte compare), we want to
- only use volatile registers and avoid restoring non-volatile
- registers. */
- .align 4
-L(dP3x):
- LD rWORD1, rOFF16, rSTR1
- LD rWORD2, rOFF16, rSTR2
- cmpld cr7, rWORD1, rWORD2
- sldi. r12, rN, 3
- bne cr1, L(dLcr1x)
- addi rSTR1, rSTR1, 16
- addi rSTR2, rSTR2, 16
- bne cr6, L(dLcr6x)
- subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
- bne cr7, L(dLcr7x)
- bne L(d00)
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 0
- blr
-
-/* Count is a multiple of 32, remainder is 0 */
- .align 4
-L(dP4):
- mtctr r0
- LD rWORD1, 0, rSTR1
- LD rWORD2, 0, rSTR2
- cmpld cr7, rWORD1, rWORD2
-L(dP4e):
- LD rWORD3, rOFF8, rSTR1
- LD rWORD4, rOFF8, rSTR2
- cmpld cr1, rWORD3, rWORD4
- LD rWORD5, rOFF16, rSTR1
- LD rWORD6, rOFF16, rSTR2
- cmpld cr6, rWORD5, rWORD6
- LD rWORD7, rOFF24, rSTR1
- LD rWORD8, rOFF24, rSTR2
- addi rSTR1, rSTR1, 24
- addi rSTR2, rSTR2, 24
- cmpld cr5, rWORD7, rWORD8
- bne cr7, L(dLcr7)
- bne cr1, L(dLcr1)
- bdz- L(d24) /* Adjust CTR as we start with +4 */
-/* This is the primary loop */
- .align 4
-L(dLoop):
- LD rWORD1, rOFF8, rSTR1
- LD rWORD2, rOFF8, rSTR2
- cmpld cr1, rWORD3, rWORD4
- bne cr6, L(dLcr6)
-L(dLoop1):
- LD rWORD3, rOFF16, rSTR1
- LD rWORD4, rOFF16, rSTR2
- cmpld cr6, rWORD5, rWORD6
- bne cr5, L(dLcr5)
-L(dLoop2):
- LD rWORD5, rOFF24, rSTR1
- LD rWORD6, rOFF24, rSTR2
- cmpld cr5, rWORD7, rWORD8
- bne cr7, L(dLcr7)
-L(dLoop3):
- LD rWORD7, rOFF32, rSTR1
- LD rWORD8, rOFF32, rSTR2
- addi rSTR1, rSTR1, 32
- addi rSTR2, rSTR2, 32
- bne cr1, L(dLcr1)
- cmpld cr7, rWORD1, rWORD2
- bdnz L(dLoop)
-
-L(dL4):
- cmpld cr1, rWORD3, rWORD4
- bne cr6, L(dLcr6)
- cmpld cr6, rWORD5, rWORD6
- bne cr5, L(dLcr5)
- cmpld cr5, rWORD7, rWORD8
-L(d44):
- bne cr7, L(dLcr7)
-L(d34):
- bne cr1, L(dLcr1)
-L(d24):
- bne cr6, L(dLcr6)
-L(d14):
- sldi. r12, rN, 3
- bne cr5, L(dLcr5)
-L(d04):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
- subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
- beq L(duzeroLength)
-/* At this point we have a remainder of 1 to 7 bytes to compare. Since
- we are aligned it is safe to load the whole double word, and use
- shift right double to eliminate bits beyond the compare length. */
-L(d00):
- LD rWORD1, rOFF8, rSTR1
- LD rWORD2, rOFF8, rSTR2
- srd rWORD1, rWORD1, rN
- srd rWORD2, rWORD2, rN
- cmpld cr7, rWORD1, rWORD2
- bne cr7, L(dLcr7x)
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 0
- blr
-
- .align 4
-L(dLcr7):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
-L(dLcr7x):
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 1
- bgtlr cr7
- li rRTN, -1
- blr
- .align 4
-L(dLcr1):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
-L(dLcr1x):
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 1
- bgtlr cr1
- li rRTN, -1
- blr
- .align 4
-L(dLcr6):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
-L(dLcr6x):
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 1
- bgtlr cr6
- li rRTN, -1
- blr
- .align 4
-L(dLcr5):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
-L(dLcr5x):
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 1
- bgtlr cr5
- li rRTN, -1
- blr
-
- .align 4
-L(bytealigned):
- mtctr rN
-
-/* We need to prime this loop. This loop is swing modulo scheduled
- to avoid pipe delays. The dependent instruction latencies (load to
- compare to conditional branch) is 2 to 3 cycles. In this loop each
- dispatch group ends in a branch and takes 1 cycle. Effectively
- the first iteration of the loop only serves to load operands and
- branches based on compares are delayed until the next loop.
-
- So we must precondition some registers and condition codes so that
- we don't exit the loop early on the first iteration. */
-
- lbz rWORD1, 0(rSTR1)
- lbz rWORD2, 0(rSTR2)
- bdz L(b11)
- cmpld cr7, rWORD1, rWORD2
- lbz rWORD3, 1(rSTR1)
- lbz rWORD4, 1(rSTR2)
- bdz L(b12)
- cmpld cr1, rWORD3, rWORD4
- lbzu rWORD5, 2(rSTR1)
- lbzu rWORD6, 2(rSTR2)
- bdz L(b13)
- .align 4
-L(bLoop):
- lbzu rWORD1, 1(rSTR1)
- lbzu rWORD2, 1(rSTR2)
- bne cr7, L(bLcr7)
-
- cmpld cr6, rWORD5, rWORD6
- bdz L(b3i)
-
- lbzu rWORD3, 1(rSTR1)
- lbzu rWORD4, 1(rSTR2)
- bne cr1, L(bLcr1)
-
- cmpld cr7, rWORD1, rWORD2
- bdz L(b2i)
-
- lbzu rWORD5, 1(rSTR1)
- lbzu rWORD6, 1(rSTR2)
- bne cr6, L(bLcr6)
-
- cmpld cr1, rWORD3, rWORD4
- bdnz L(bLoop)
-
-/* We speculatively loading bytes before we have tested the previous
- bytes. But we must avoid overrunning the length (in the ctr) to
- prevent these speculative loads from causing a segfault. In this
- case the loop will exit early (before the all pending bytes are
- tested. In this case we must complete the pending operations
- before returning. */
-L(b1i):
- bne cr7, L(bLcr7)
- bne cr1, L(bLcr1)
- b L(bx56)
- .align 4
-L(b2i):
- bne cr6, L(bLcr6)
- bne cr7, L(bLcr7)
- b L(bx34)
- .align 4
-L(b3i):
- bne cr1, L(bLcr1)
- bne cr6, L(bLcr6)
- b L(bx12)
- .align 4
-L(bLcr7):
- li rRTN, 1
- bgtlr cr7
- li rRTN, -1
- blr
-L(bLcr1):
- li rRTN, 1
- bgtlr cr1
- li rRTN, -1
- blr
-L(bLcr6):
- li rRTN, 1
- bgtlr cr6
- li rRTN, -1
- blr
-
-L(b13):
- bne cr7, L(bx12)
- bne cr1, L(bx34)
-L(bx56):
- sub rRTN, rWORD5, rWORD6
- blr
- nop
-L(b12):
- bne cr7, L(bx12)
-L(bx34):
- sub rRTN, rWORD3, rWORD4
- blr
-L(b11):
-L(bx12):
- sub rRTN, rWORD1, rWORD2
- blr
-
- .align 4
-L(zeroLength):
- li rRTN, 0
- blr
-
- .align 4
-/* At this point we know the strings have different alignment and the
- compare length is at least 8 bytes. r12 contains the low order
- 3 bits of rSTR1 and cr5 contains the result of the logical compare
- of r12 to 0. If r12 == 0 then rStr1 is double word
- aligned and can perform the DWunaligned loop.
-
- Otherwise we know that rSTR1 is not already DW aligned yet.
- So we can force the string addresses to the next lower DW
- boundary and special case this first DW using shift left to
- eliminate bits preceding the first byte. Since we want to join the
- normal (DWaligned) compare loop, starting at the second double word,
- we need to adjust the length (rN) and special case the loop
- versioning for the first DW. This ensures that the loop count is
- correct and the first DW (shifted) is in the expected resister pair. */
-L(unaligned):
- std rSHL, rSHLSAVE(r1)
- cfi_offset(rSHL, rSHLSAVE)
- clrldi rSHL, rSTR2, 61
- beq cr6, L(duzeroLength)
- std rSHR, rSHRSAVE(r1)
- cfi_offset(rSHR, rSHRSAVE)
- beq cr5, L(DWunaligned)
- std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1)
- cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE)
-/* Adjust the logical start of rSTR2 to compensate for the extra bits
- in the 1st rSTR1 DW. */
- sub rWORD8_SHIFT, rSTR2, r12
-/* But do not attempt to address the DW before that DW that contains
- the actual start of rSTR2. */
- clrrdi rSTR2, rSTR2, 3
- std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1)
-/* Compute the left/right shift counts for the unaligned rSTR2,
- compensating for the logical (DW aligned) start of rSTR1. */
- clrldi rSHL, rWORD8_SHIFT, 61
- clrrdi rSTR1, rSTR1, 3
- std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1)
- sldi rSHL, rSHL, 3
- cmpld cr5, rWORD8_SHIFT, rSTR2
- add rN, rN, r12
- sldi rWORD6, r12, 3
- std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1)
- cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE)
- cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE)
- cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE)
- subfic rSHR, rSHL, 64
- srdi r0, rN, 5 /* Divide by 32 */
- andi. r12, rN, 24 /* Get the DW remainder */
-/* We normally need to load 2 DWs to start the unaligned rSTR2, but in
- this special case those bits may be discarded anyway. Also we
- must avoid loading a DW where none of the bits are part of rSTR2 as
- this may cross a page boundary and cause a page fault. */
- li rWORD8, 0
- blt cr5, L(dus0)
- LD rWORD8, 0, rSTR2
- addi rSTR2, rSTR2, 8
- sld rWORD8, rWORD8, rSHL
-
-L(dus0):
- LD rWORD1, 0, rSTR1
- LD rWORD2, 0, rSTR2
- cmpldi cr1, r12, 16
- cmpldi cr7, rN, 32
- srd r12, rWORD2, rSHR
- clrldi rN, rN, 61
- beq L(duPs4)
- mtctr r0
- or rWORD8, r12, rWORD8
- bgt cr1, L(duPs3)
- beq cr1, L(duPs2)
-
-/* Remainder is 8 */
- .align 4
-L(dusP1):
- sld rWORD8_SHIFT, rWORD2, rSHL
- sld rWORD7, rWORD1, rWORD6
- sld rWORD8, rWORD8, rWORD6
- bge cr7, L(duP1e)
-/* At this point we exit early with the first double word compare
- complete and remainder of 0 to 7 bytes. See L(du14) for details on
- how we handle the remaining bytes. */
- cmpld cr5, rWORD7, rWORD8
- sldi. rN, rN, 3
- bne cr5, L(duLcr5)
- cmpld cr7, rN, rSHR
- beq L(duZeroReturn)
- li r0, 0
- ble cr7, L(dutrim)
- LD rWORD2, rOFF8, rSTR2
- srd r0, rWORD2, rSHR
- b L(dutrim)
-/* Remainder is 16 */
- .align 4
-L(duPs2):
- sld rWORD6_SHIFT, rWORD2, rSHL
- sld rWORD5, rWORD1, rWORD6
- sld rWORD6, rWORD8, rWORD6
- b L(duP2e)
-/* Remainder is 24 */
- .align 4
-L(duPs3):
- sld rWORD4_SHIFT, rWORD2, rSHL
- sld rWORD3, rWORD1, rWORD6
- sld rWORD4, rWORD8, rWORD6
- b L(duP3e)
-/* Count is a multiple of 32, remainder is 0 */
- .align 4
-L(duPs4):
- mtctr r0
- or rWORD8, r12, rWORD8
- sld rWORD2_SHIFT, rWORD2, rSHL
- sld rWORD1, rWORD1, rWORD6
- sld rWORD2, rWORD8, rWORD6
- b L(duP4e)
-
-/* At this point we know rSTR1 is double word aligned and the
- compare length is at least 8 bytes. */
- .align 4
-L(DWunaligned):
- std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1)
- clrrdi rSTR2, rSTR2, 3
- std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1)
- srdi r0, rN, 5 /* Divide by 32 */
- std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1)
- andi. r12, rN, 24 /* Get the DW remainder */
- std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1)
- cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE)
- cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE)
- cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE)
- cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE)
- sldi rSHL, rSHL, 3
- LD rWORD6, 0, rSTR2
- LD rWORD8, rOFF8, rSTR2
- addi rSTR2, rSTR2, 8
- cmpldi cr1, r12, 16
- cmpldi cr7, rN, 32
- clrldi rN, rN, 61
- subfic rSHR, rSHL, 64
- sld rWORD6_SHIFT, rWORD6, rSHL
- beq L(duP4)
- mtctr r0
- bgt cr1, L(duP3)
- beq cr1, L(duP2)
-
-/* Remainder is 8 */
- .align 4
-L(duP1):
- srd r12, rWORD8, rSHR
- LD rWORD7, 0, rSTR1
- sld rWORD8_SHIFT, rWORD8, rSHL
- or rWORD8, r12, rWORD6_SHIFT
- blt cr7, L(duP1x)
-L(duP1e):
- LD rWORD1, rOFF8, rSTR1
- LD rWORD2, rOFF8, rSTR2
- cmpld cr5, rWORD7, rWORD8
- srd r0, rWORD2, rSHR
- sld rWORD2_SHIFT, rWORD2, rSHL
- or rWORD2, r0, rWORD8_SHIFT
- LD rWORD3, rOFF16, rSTR1
- LD rWORD4, rOFF16, rSTR2
- cmpld cr7, rWORD1, rWORD2
- srd r12, rWORD4, rSHR
- sld rWORD4_SHIFT, rWORD4, rSHL
- bne cr5, L(duLcr5)
- or rWORD4, r12, rWORD2_SHIFT
- LD rWORD5, rOFF24, rSTR1
- LD rWORD6, rOFF24, rSTR2
- cmpld cr1, rWORD3, rWORD4
- srd r0, rWORD6, rSHR
- sld rWORD6_SHIFT, rWORD6, rSHL
- bne cr7, L(duLcr7)
- or rWORD6, r0, rWORD4_SHIFT
- cmpld cr6, rWORD5, rWORD6
- b L(duLoop3)
- .align 4
-/* At this point we exit early with the first double word compare
- complete and remainder of 0 to 7 bytes. See L(du14) for details on
- how we handle the remaining bytes. */
-L(duP1x):
- cmpld cr5, rWORD7, rWORD8
- sldi. rN, rN, 3
- bne cr5, L(duLcr5)
- cmpld cr7, rN, rSHR
- beq L(duZeroReturn)
- li r0, 0
- ble cr7, L(dutrim)
- LD rWORD2, rOFF8, rSTR2
- srd r0, rWORD2, rSHR
- b L(dutrim)
-/* Remainder is 16 */
- .align 4
-L(duP2):
- srd r0, rWORD8, rSHR
- LD rWORD5, 0, rSTR1
- or rWORD6, r0, rWORD6_SHIFT
- sld rWORD6_SHIFT, rWORD8, rSHL
-L(duP2e):
- LD rWORD7, rOFF8, rSTR1
- LD rWORD8, rOFF8, rSTR2
- cmpld cr6, rWORD5, rWORD6
- srd r12, rWORD8, rSHR
- sld rWORD8_SHIFT, rWORD8, rSHL
- or rWORD8, r12, rWORD6_SHIFT
- blt cr7, L(duP2x)
- LD rWORD1, rOFF16, rSTR1
- LD rWORD2, rOFF16, rSTR2
- cmpld cr5, rWORD7, rWORD8
- bne cr6, L(duLcr6)
- srd r0, rWORD2, rSHR
- sld rWORD2_SHIFT, rWORD2, rSHL
- or rWORD2, r0, rWORD8_SHIFT
- LD rWORD3, rOFF24, rSTR1
- LD rWORD4, rOFF24, rSTR2
- cmpld cr7, rWORD1, rWORD2
- bne cr5, L(duLcr5)
- srd r12, rWORD4, rSHR
- sld rWORD4_SHIFT, rWORD4, rSHL
- or rWORD4, r12, rWORD2_SHIFT
- addi rSTR1, rSTR1, 8
- addi rSTR2, rSTR2, 8
- cmpld cr1, rWORD3, rWORD4
- b L(duLoop2)
- .align 4
-L(duP2x):
- cmpld cr5, rWORD7, rWORD8
- addi rSTR1, rSTR1, 8
- addi rSTR2, rSTR2, 8
- bne cr6, L(duLcr6)
- sldi. rN, rN, 3
- bne cr5, L(duLcr5)
- cmpld cr7, rN, rSHR
- beq L(duZeroReturn)
- li r0, 0
- ble cr7, L(dutrim)
- LD rWORD2, rOFF8, rSTR2
- srd r0, rWORD2, rSHR
- b L(dutrim)
-
-/* Remainder is 24 */
- .align 4
-L(duP3):
- srd r12, rWORD8, rSHR
- LD rWORD3, 0, rSTR1
- sld rWORD4_SHIFT, rWORD8, rSHL
- or rWORD4, r12, rWORD6_SHIFT
-L(duP3e):
- LD rWORD5, rOFF8, rSTR1
- LD rWORD6, rOFF8, rSTR2
- cmpld cr1, rWORD3, rWORD4
- srd r0, rWORD6, rSHR
- sld rWORD6_SHIFT, rWORD6, rSHL
- or rWORD6, r0, rWORD4_SHIFT
- LD rWORD7, rOFF16, rSTR1
- LD rWORD8, rOFF16, rSTR2
- cmpld cr6, rWORD5, rWORD6
- bne cr1, L(duLcr1)
- srd r12, rWORD8, rSHR
- sld rWORD8_SHIFT, rWORD8, rSHL
- or rWORD8, r12, rWORD6_SHIFT
- blt cr7, L(duP3x)
- LD rWORD1, rOFF24, rSTR1
- LD rWORD2, rOFF24, rSTR2
- cmpld cr5, rWORD7, rWORD8
- bne cr6, L(duLcr6)
- srd r0, rWORD2, rSHR
- sld rWORD2_SHIFT, rWORD2, rSHL
- or rWORD2, r0, rWORD8_SHIFT
- addi rSTR1, rSTR1, 16
- addi rSTR2, rSTR2, 16
- cmpld cr7, rWORD1, rWORD2
- b L(duLoop1)
- .align 4
-L(duP3x):
- addi rSTR1, rSTR1, 16
- addi rSTR2, rSTR2, 16
- cmpld cr5, rWORD7, rWORD8
- bne cr6, L(duLcr6)
- sldi. rN, rN, 3
- bne cr5, L(duLcr5)
- cmpld cr7, rN, rSHR
- beq L(duZeroReturn)
- li r0, 0
- ble cr7, L(dutrim)
- LD rWORD2, rOFF8, rSTR2
- srd r0, rWORD2, rSHR
- b L(dutrim)
-
-/* Count is a multiple of 32, remainder is 0 */
- .align 4
-L(duP4):
- mtctr r0
- srd r0, rWORD8, rSHR
- LD rWORD1, 0, rSTR1
- sld rWORD2_SHIFT, rWORD8, rSHL
- or rWORD2, r0, rWORD6_SHIFT
-L(duP4e):
- LD rWORD3, rOFF8, rSTR1
- LD rWORD4, rOFF8, rSTR2
- cmpld cr7, rWORD1, rWORD2
- srd r12, rWORD4, rSHR
- sld rWORD4_SHIFT, rWORD4, rSHL
- or rWORD4, r12, rWORD2_SHIFT
- LD rWORD5, rOFF16, rSTR1
- LD rWORD6, rOFF16, rSTR2
- cmpld cr1, rWORD3, rWORD4
- bne cr7, L(duLcr7)
- srd r0, rWORD6, rSHR
- sld rWORD6_SHIFT, rWORD6, rSHL
- or rWORD6, r0, rWORD4_SHIFT
- LD rWORD7, rOFF24, rSTR1
- LD rWORD8, rOFF24, rSTR2
- addi rSTR1, rSTR1, 24
- addi rSTR2, rSTR2, 24
- cmpld cr6, rWORD5, rWORD6
- bne cr1, L(duLcr1)
- srd r12, rWORD8, rSHR
- sld rWORD8_SHIFT, rWORD8, rSHL
- or rWORD8, r12, rWORD6_SHIFT
- cmpld cr5, rWORD7, rWORD8
- bdz L(du24) /* Adjust CTR as we start with +4 */
-/* This is the primary loop */
- .align 4
-L(duLoop):
- LD rWORD1, rOFF8, rSTR1
- LD rWORD2, rOFF8, rSTR2
- cmpld cr1, rWORD3, rWORD4
- bne cr6, L(duLcr6)
- srd r0, rWORD2, rSHR
- sld rWORD2_SHIFT, rWORD2, rSHL
- or rWORD2, r0, rWORD8_SHIFT
-L(duLoop1):
- LD rWORD3, rOFF16, rSTR1
- LD rWORD4, rOFF16, rSTR2
- cmpld cr6, rWORD5, rWORD6
- bne cr5, L(duLcr5)
- srd r12, rWORD4, rSHR
- sld rWORD4_SHIFT, rWORD4, rSHL
- or rWORD4, r12, rWORD2_SHIFT
-L(duLoop2):
- LD rWORD5, rOFF24, rSTR1
- LD rWORD6, rOFF24, rSTR2
- cmpld cr5, rWORD7, rWORD8
- bne cr7, L(duLcr7)
- srd r0, rWORD6, rSHR
- sld rWORD6_SHIFT, rWORD6, rSHL
- or rWORD6, r0, rWORD4_SHIFT
-L(duLoop3):
- LD rWORD7, rOFF32, rSTR1
- LD rWORD8, rOFF32, rSTR2
- addi rSTR1, rSTR1, 32
- addi rSTR2, rSTR2, 32
- cmpld cr7, rWORD1, rWORD2
- bne cr1, L(duLcr1)
- srd r12, rWORD8, rSHR
- sld rWORD8_SHIFT, rWORD8, rSHL
- or rWORD8, r12, rWORD6_SHIFT
- bdnz L(duLoop)
-
-L(duL4):
- cmpld cr1, rWORD3, rWORD4
- bne cr6, L(duLcr6)
- cmpld cr6, rWORD5, rWORD6
- bne cr5, L(duLcr5)
- cmpld cr5, rWORD7, rWORD8
-L(du44):
- bne cr7, L(duLcr7)
-L(du34):
- bne cr1, L(duLcr1)
-L(du24):
- bne cr6, L(duLcr6)
-L(du14):
- sldi. rN, rN, 3
- bne cr5, L(duLcr5)
-/* At this point we have a remainder of 1 to 7 bytes to compare. We use
- shift right double to eliminate bits beyond the compare length.
-
- However it may not be safe to load rWORD2 which may be beyond the
- string length. So we compare the bit length of the remainder to
- the right shift count (rSHR). If the bit count is less than or equal
- we do not need to load rWORD2 (all significant bits are already in
- rWORD8_SHIFT). */
- cmpld cr7, rN, rSHR
- beq L(duZeroReturn)
- li r0, 0
- ble cr7, L(dutrim)
- LD rWORD2, rOFF8, rSTR2
- srd r0, rWORD2, rSHR
- .align 4
-L(dutrim):
- LD rWORD1, rOFF8, rSTR1
- ld rWORD8, -8(r1)
- subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */
- or rWORD2, r0, rWORD8_SHIFT
- ld rWORD7, rWORD7SAVE(r1)
- ld rSHL, rSHLSAVE(r1)
- srd rWORD1, rWORD1, rN
- srd rWORD2, rWORD2, rN
- ld rSHR, rSHRSAVE(r1)
- ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1)
- li rRTN, 0
- cmpld cr7, rWORD1, rWORD2
- ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1)
- ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1)
- beq cr7, L(dureturn24)
- li rRTN, 1
- ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1)
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- bgtlr cr7
- li rRTN, -1
- blr
- .align 4
-L(duLcr7):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
- li rRTN, 1
- bgt cr7, L(dureturn29)
- ld rSHL, rSHLSAVE(r1)
- ld rSHR, rSHRSAVE(r1)
- li rRTN, -1
- b L(dureturn27)
- .align 4
-L(duLcr1):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
- li rRTN, 1
- bgt cr1, L(dureturn29)
- ld rSHL, rSHLSAVE(r1)
- ld rSHR, rSHRSAVE(r1)
- li rRTN, -1
- b L(dureturn27)
- .align 4
-L(duLcr6):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
- li rRTN, 1
- bgt cr6, L(dureturn29)
- ld rSHL, rSHLSAVE(r1)
- ld rSHR, rSHRSAVE(r1)
- li rRTN, -1
- b L(dureturn27)
- .align 4
-L(duLcr5):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
- li rRTN, 1
- bgt cr5, L(dureturn29)
- ld rSHL, rSHLSAVE(r1)
- ld rSHR, rSHRSAVE(r1)
- li rRTN, -1
- b L(dureturn27)
-
- .align 3
-L(duZeroReturn):
- li rRTN, 0
- .align 4
-L(dureturn):
- ld rWORD8, rWORD8SAVE(r1)
- ld rWORD7, rWORD7SAVE(r1)
-L(dureturn29):
- ld rSHL, rSHLSAVE(r1)
- ld rSHR, rSHRSAVE(r1)
-L(dureturn27):
- ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1)
- ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1)
- ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1)
-L(dureturn24):
- ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1)
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- blr
-
-L(duzeroLength):
- ld rOFF8, rOFF8SAVE(r1)
- ld rOFF16, rOFF16SAVE(r1)
- ld rOFF24, rOFF24SAVE(r1)
- ld rOFF32, rOFF32SAVE(r1)
- li rRTN, 0
- blr
-
-END (MEMCMP)
-libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp, bcmp)
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
deleted file mode 100644
index e08993cbc3..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
+++ /dev/null
@@ -1,430 +0,0 @@
-/* Optimized memcpy implementation for PowerPC64/POWER7.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-
-/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
- Returns 'dst'. */
-
-#ifndef MEMCPY
-# define MEMCPY memcpy
-#endif
-
-#define dst 11 /* Use r11 so r3 kept unchanged. */
-#define src 4
-#define cnt 5
-
- .machine power7
-EALIGN (MEMCPY, 5, 0)
- CALL_MCOUNT 3
-
- cmpldi cr1,cnt,31
- neg 0,3
- ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move
- code. */
-
-/* Align copies using VSX instructions to quadword. It is to avoid alignment
- traps when memcpy is used on non-cacheable memory (for instance, memory
- mapped I/O). */
- andi. 10,3,15
- clrldi 11,4,60
- cmpld cr6,10,11 /* SRC and DST alignments match? */
-
- mr dst,3
- bne cr6,L(copy_GE_32_unaligned)
- beq L(aligned_copy)
-
- mtocrf 0x01,0
- clrldi 0,0,60
-
-/* Get the DST and SRC aligned to 16 bytes. */
-1:
- bf 31,2f
- lbz 6,0(src)
- addi src,src,1
- stb 6,0(dst)
- addi dst,dst,1
-2:
- bf 30,4f
- lhz 6,0(src)
- addi src,src,2
- sth 6,0(dst)
- addi dst,dst,2
-4:
- bf 29,8f
- lwz 6,0(src)
- addi src,src,4
- stw 6,0(dst)
- addi dst,dst,4
-8:
- bf 28,16f
- ld 6,0(src)
- addi src,src,8
- std 6,0(dst)
- addi dst,dst,8
-16:
- subf cnt,0,cnt
-
-/* Main aligned copy loop. Copies 128 bytes at a time. */
-L(aligned_copy):
- li 6,16
- li 7,32
- li 8,48
- mtocrf 0x02,cnt
- srdi 12,cnt,7
- cmpdi 12,0
- beq L(aligned_tail)
- lxvd2x 6,0,src
- lxvd2x 7,src,6
- mtctr 12
- b L(aligned_128loop)
-
- .align 4
-L(aligned_128head):
- /* for the 2nd + iteration of this loop. */
- lxvd2x 6,0,src
- lxvd2x 7,src,6
-L(aligned_128loop):
- lxvd2x 8,src,7
- lxvd2x 9,src,8
- stxvd2x 6,0,dst
- addi src,src,64
- stxvd2x 7,dst,6
- stxvd2x 8,dst,7
- stxvd2x 9,dst,8
- lxvd2x 6,0,src
- lxvd2x 7,src,6
- addi dst,dst,64
- lxvd2x 8,src,7
- lxvd2x 9,src,8
- addi src,src,64
- stxvd2x 6,0,dst
- stxvd2x 7,dst,6
- stxvd2x 8,dst,7
- stxvd2x 9,dst,8
- addi dst,dst,64
- bdnz L(aligned_128head)
-
-L(aligned_tail):
- mtocrf 0x01,cnt
- bf 25,32f
- lxvd2x 6,0,src
- lxvd2x 7,src,6
- lxvd2x 8,src,7
- lxvd2x 9,src,8
- addi src,src,64
- stxvd2x 6,0,dst
- stxvd2x 7,dst,6
- stxvd2x 8,dst,7
- stxvd2x 9,dst,8
- addi dst,dst,64
-32:
- bf 26,16f
- lxvd2x 6,0,src
- lxvd2x 7,src,6
- addi src,src,32
- stxvd2x 6,0,dst
- stxvd2x 7,dst,6
- addi dst,dst,32
-16:
- bf 27,8f
- lxvd2x 6,0,src
- addi src,src,16
- stxvd2x 6,0,dst
- addi dst,dst,16
-8:
- bf 28,4f
- ld 6,0(src)
- addi src,src,8
- std 6,0(dst)
- addi dst,dst,8
-4: /* Copies 4~7 bytes. */
- bf 29,L(tail2)
- lwz 6,0(src)
- stw 6,0(dst)
- bf 30,L(tail5)
- lhz 7,4(src)
- sth 7,4(dst)
- bflr 31
- lbz 8,6(src)
- stb 8,6(dst)
- /* Return original DST pointer. */
- blr
-
-
-/* Handle copies of 0~31 bytes. */
- .align 4
-L(copy_LT_32):
- mr dst,3
- cmpldi cr6,cnt,8
- mtocrf 0x01,cnt
- ble cr6,L(copy_LE_8)
-
- /* At least 9 bytes to go. */
- neg 8,4
- andi. 0,8,3
- cmpldi cr1,cnt,16
- beq L(copy_LT_32_aligned)
-
- /* Force 4-byte alignment for SRC. */
- mtocrf 0x01,0
- subf cnt,0,cnt
-2:
- bf 30,1f
- lhz 6,0(src)
- addi src,src,2
- sth 6,0(dst)
- addi dst,dst,2
-1:
- bf 31,L(end_4bytes_alignment)
- lbz 6,0(src)
- addi src,src,1
- stb 6,0(dst)
- addi dst,dst,1
-
- .align 4
-L(end_4bytes_alignment):
- cmpldi cr1,cnt,16
- mtocrf 0x01,cnt
-
-L(copy_LT_32_aligned):
- /* At least 6 bytes to go, and SRC is word-aligned. */
- blt cr1,8f
-
- /* Copy 16 bytes. */
- lwz 6,0(src)
- lwz 7,4(src)
- stw 6,0(dst)
- lwz 8,8(src)
- stw 7,4(dst)
- lwz 6,12(src)
- addi src,src,16
- stw 8,8(dst)
- stw 6,12(dst)
- addi dst,dst,16
-8: /* Copy 8 bytes. */
- bf 28,L(tail4)
- lwz 6,0(src)
- lwz 7,4(src)
- addi src,src,8
- stw 6,0(dst)
- stw 7,4(dst)
- addi dst,dst,8
-
- .align 4
-/* Copies 4~7 bytes. */
-L(tail4):
- bf 29,L(tail2)
- lwz 6,0(src)
- stw 6,0(dst)
- bf 30,L(tail5)
- lhz 7,4(src)
- sth 7,4(dst)
- bflr 31
- lbz 8,6(src)
- stb 8,6(dst)
- /* Return original DST pointer. */
- blr
-
- .align 4
-/* Copies 2~3 bytes. */
-L(tail2):
- bf 30,1f
- lhz 6,0(src)
- sth 6,0(dst)
- bflr 31
- lbz 7,2(src)
- stb 7,2(dst)
- blr
-
- .align 4
-L(tail5):
- bflr 31
- lbz 6,4(src)
- stb 6,4(dst)
- blr
-
- .align 4
-1:
- bflr 31
- lbz 6,0(src)
- stb 6,0(dst)
- /* Return original DST pointer. */
- blr
-
-
-/* Handles copies of 0~8 bytes. */
- .align 4
-L(copy_LE_8):
- bne cr6,L(tail4)
-
- /* Though we could've used ld/std here, they are still
- slow for unaligned cases. */
-
- lwz 6,0(src)
- lwz 7,4(src)
- stw 6,0(dst)
- stw 7,4(dst)
- blr
-
-
-/* Handle copies of 32+ bytes where DST is aligned (to quadword) but
- SRC is not. Use aligned quadword loads from SRC, shifted to realign
- the data, allowing for aligned DST stores. */
- .align 4
-L(copy_GE_32_unaligned):
- clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */
- srdi 9,cnt,4 /* Number of full quadwords remaining. */
-
- beq L(copy_GE_32_unaligned_cont)
-
- /* DST is not quadword aligned, get it aligned. */
-
- mtocrf 0x01,0
- subf cnt,0,cnt
-
- /* Vector instructions work best when proper alignment (16-bytes)
- is present. Move 0~15 bytes as needed to get DST quadword-aligned. */
-1:
- bf 31,2f
- lbz 6,0(src)
- addi src,src,1
- stb 6,0(dst)
- addi dst,dst,1
-2:
- bf 30,4f
- lhz 6,0(src)
- addi src,src,2
- sth 6,0(dst)
- addi dst,dst,2
-4:
- bf 29,8f
- lwz 6,0(src)
- addi src,src,4
- stw 6,0(dst)
- addi dst,dst,4
-8:
- bf 28,0f
- ld 6,0(src)
- addi src,src,8
- std 6,0(dst)
- addi dst,dst,8
-0:
- srdi 9,cnt,4 /* Number of full quadwords remaining. */
-
- /* The proper alignment is present, it is OK to copy the bytes now. */
-L(copy_GE_32_unaligned_cont):
-
- /* Setup two indexes to speed up the indexed vector operations. */
- clrldi 10,cnt,60
- li 6,16 /* Index for 16-bytes offsets. */
- li 7,32 /* Index for 32-bytes offsets. */
- cmpldi cr1,10,0
- srdi 8,cnt,5 /* Setup the loop counter. */
- mtocrf 0x01,9
- cmpldi cr6,9,1
-#ifdef __LITTLE_ENDIAN__
- lvsr 5,0,src
-#else
- lvsl 5,0,src
-#endif
- lvx 3,0,src
- li 0,0
- bf 31,L(setup_unaligned_loop)
-
- /* Copy another 16 bytes to align to 32-bytes due to the loop. */
- lvx 4,src,6
-#ifdef __LITTLE_ENDIAN__
- vperm 6,4,3,5
-#else
- vperm 6,3,4,5
-#endif
- addi src,src,16
- stvx 6,0,dst
- addi dst,dst,16
- vor 3,4,4
- clrrdi 0,src,60
-
-L(setup_unaligned_loop):
- mtctr 8
- ble cr6,L(end_unaligned_loop)
-
- /* Copy 32 bytes at a time using vector instructions. */
- .align 4
-L(unaligned_loop):
-
- /* Note: vr6/vr10 may contain data that was already copied,
- but in order to get proper alignment, we may have to copy
- some portions again. This is faster than having unaligned
- vector instructions though. */
-
- lvx 4,src,6
-#ifdef __LITTLE_ENDIAN__
- vperm 6,4,3,5
-#else
- vperm 6,3,4,5
-#endif
- lvx 3,src,7
-#ifdef __LITTLE_ENDIAN__
- vperm 10,3,4,5
-#else
- vperm 10,4,3,5
-#endif
- addi src,src,32
- stvx 6,0,dst
- stvx 10,dst,6
- addi dst,dst,32
- bdnz L(unaligned_loop)
-
- clrrdi 0,src,60
-
- .align 4
-L(end_unaligned_loop):
-
- /* Check for tail bytes. */
- mtocrf 0x01,cnt
- beqlr cr1
-
- add src,src,0
-
- /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */
- /* Copy 8 bytes. */
- bf 28,4f
- lwz 6,0(src)
- lwz 7,4(src)
- addi src,src,8
- stw 6,0(dst)
- stw 7,4(dst)
- addi dst,dst,8
-4: /* Copy 4~7 bytes. */
- bf 29,L(tail2)
- lwz 6,0(src)
- stw 6,0(dst)
- bf 30,L(tail5)
- lhz 7,4(src)
- sth 7,4(dst)
- bflr 31
- lbz 8,6(src)
- stb 8,6(dst)
- /* Return original DST pointer. */
- blr
-
-END_GEN_TB (MEMCPY,TB_TOCLESS)
-libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/powerpc/powerpc64/power7/memmove.S b/sysdeps/powerpc/powerpc64/power7/memmove.S
deleted file mode 100644
index 4c0f7c3571..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/memmove.S
+++ /dev/null
@@ -1,835 +0,0 @@
-/* Optimized memmove implementation for PowerPC64/POWER7.
- Copyright (C) 2014-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-
-/* void* [r3] memmove (void *dest [r3], const void *src [r4], size_t len [r5])
-
- This optimization check if memory 'dest' overlaps with 'src'. If it does
- not then it calls an optimized memcpy call (similar to memcpy for POWER7,
- embedded here to gain some cycles).
- If source and destiny overlaps, a optimized backwards memcpy is used
- instead. */
-
-#ifndef MEMMOVE
-# define MEMMOVE memmove
-#endif
- .machine power7
-EALIGN (MEMMOVE, 5, 0)
- CALL_MCOUNT 3
-
-L(_memmove):
- subf r9,r4,r3
- cmpld cr7,r9,r5
- blt cr7,L(memmove_bwd)
-
- cmpldi cr1,r5,31
- neg 0,3
- ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move
- code. */
-
- andi. 10,3,15
- clrldi 11,4,60
- cmpld cr6,10,11 /* SRC and DST alignments match? */
-
- mr r11,3
- bne cr6,L(copy_GE_32_unaligned)
- beq L(aligned_copy)
-
- mtocrf 0x01,0
- clrldi 0,0,60
-
-/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */
-1:
- bf 31,2f
- lbz 6,0(r4)
- addi r4,r4,1
- stb 6,0(r11)
- addi r11,r11,1
-2:
- bf 30,4f
- lhz 6,0(r4)
- addi r4,r4,2
- sth 6,0(r11)
- addi r11,r11,2
-4:
- bf 29,8f
- lwz 6,0(r4)
- addi r4,r4,4
- stw 6,0(r11)
- addi r11,r11,4
-8:
- bf 28,16f
- ld 6,0(r4)
- addi r4,r4,8
- std 6,0(r11)
- addi r11,r11,8
-16:
- subf r5,0,r5
-
-/* Main aligned copy loop. Copies 128 bytes at a time. */
-L(aligned_copy):
- li 6,16
- li 7,32
- li 8,48
- mtocrf 0x02,r5
- srdi 12,r5,7
- cmpdi 12,0
- beq L(aligned_tail)
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
- mtctr 12
- b L(aligned_128loop)
-
- .align 4
-L(aligned_128head):
- /* for the 2nd + iteration of this loop. */
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
-L(aligned_128loop):
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
- stxvd2x 6,0,r11
- addi r4,r4,64
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
- addi r11,r11,64
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
- addi r4,r4,64
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
- addi r11,r11,64
- bdnz L(aligned_128head)
-
-L(aligned_tail):
- mtocrf 0x01,r5
- bf 25,32f
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
- lxvd2x 8,r4,7
- lxvd2x 9,r4,8
- addi r4,r4,64
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
- stxvd2x 8,r11,7
- stxvd2x 9,r11,8
- addi r11,r11,64
-32:
- bf 26,16f
- lxvd2x 6,0,r4
- lxvd2x 7,r4,6
- addi r4,r4,32
- stxvd2x 6,0,r11
- stxvd2x 7,r11,6
- addi r11,r11,32
-16:
- bf 27,8f
- lxvd2x 6,0,r4
- addi r4,r4,16
- stxvd2x 6,0,r11
- addi r11,r11,16
-8:
- bf 28,4f
- ld 6,0(r4)
- addi r4,r4,8
- std 6,0(r11)
- addi r11,r11,8
-4: /* Copies 4~7 bytes. */
- bf 29,L(tail2)
- lwz 6,0(r4)
- stw 6,0(r11)
- bf 30,L(tail5)
- lhz 7,4(r4)
- sth 7,4(r11)
- bflr 31
- lbz 8,6(r4)
- stb 8,6(r11)
- /* Return original DST pointer. */
- blr
-
-/* Handle copies of 0~31 bytes. */
- .align 4
-L(copy_LT_32):
- mr r11,3
- cmpldi cr6,r5,8
- mtocrf 0x01,r5
- ble cr6,L(copy_LE_8)
-
- /* At least 9 bytes to go. */
- neg 8,4
- andi. 0,8,3
- cmpldi cr1,r5,16
- beq L(copy_LT_32_aligned)
-
- /* Force 4-byte alignment for SRC. */
- mtocrf 0x01,0
- subf r5,0,r5
-2:
- bf 30,1f
- lhz 6,0(r4)
- addi r4,r4,2
- sth 6,0(r11)
- addi r11,r11,2
-1:
- bf 31,L(end_4bytes_alignment)
- lbz 6,0(r4)
- addi r4,r4,1
- stb 6,0(r11)
- addi r11,r11,1
-
- .align 4
-L(end_4bytes_alignment):
- cmpldi cr1,r5,16
- mtocrf 0x01,r5
-
-L(copy_LT_32_aligned):
- /* At least 6 bytes to go, and SRC is word-aligned. */
- blt cr1,8f
-
- /* Copy 16 bytes. */
- lwz 6,0(r4)
- lwz 7,4(r4)
- stw 6,0(r11)
- lwz 8,8(r4)
- stw 7,4(r11)
- lwz 6,12(r4)
- addi r4,r4,16
- stw 8,8(r11)
- stw 6,12(r11)
- addi r11,r11,16
-8: /* Copy 8 bytes. */
- bf 28,L(tail4)
- lwz 6,0(r4)
- lwz 7,4(r4)
- addi r4,r4,8
- stw 6,0(r11)
- stw 7,4(r11)
- addi r11,r11,8
-
- .align 4
-/* Copies 4~7 bytes. */
-L(tail4):
- bf 29,L(tail2)
- lwz 6,0(r4)
- stw 6,0(r11)
- bf 30,L(tail5)
- lhz 7,4(r4)
- sth 7,4(r11)
- bflr 31
- lbz 8,6(r4)
- stb 8,6(r11)
- /* Return original DST pointer. */
- blr
-
- .align 4
-/* Copies 2~3 bytes. */
-L(tail2):
- bf 30,1f
- lhz 6,0(r4)
- sth 6,0(r11)
- bflr 31
- lbz 7,2(r4)
- stb 7,2(r11)
- blr
-
- .align 4
-L(tail5):
- bflr 31
- lbz 6,4(r4)
- stb 6,4(r11)
- blr
-
- .align 4
-1:
- bflr 31
- lbz 6,0(r4)
- stb 6,0(r11)
- /* Return original DST pointer. */
- blr
-
-/* Handles copies of 0~8 bytes. */
- .align 4
-L(copy_LE_8):
- bne cr6,L(tail4)
-
- /* Though we could've used ld/std here, they are still
- slow for unaligned cases. */
-
- lwz 6,0(r4)
- lwz 7,4(r4)
- stw 6,0(r11)
- stw 7,4(r11)
- blr
-
-
-/* Handle copies of 32+ bytes where DST is aligned (to quadword) but
- SRC is not. Use aligned quadword loads from SRC, shifted to realign
- the data, allowing for aligned DST stores. */
- .align 4
-L(copy_GE_32_unaligned):
- clrldi 0,0,60 /* Number of bytes until the 1st r11 quadword. */
- srdi 9,r5,4 /* Number of full quadwords remaining. */
-
- beq L(copy_GE_32_unaligned_cont)
-
- /* DST is not quadword aligned, get it aligned. */
-
- mtocrf 0x01,0
- subf r5,0,r5
-
- /* Vector instructions work best when proper alignment (16-bytes)
- is present. Move 0~15 bytes as needed to get DST quadword-aligned. */
-1:
- bf 31,2f
- lbz 6,0(r4)
- addi r4,r4,1
- stb 6,0(r11)
- addi r11,r11,1
-2:
- bf 30,4f
- lhz 6,0(r4)
- addi r4,r4,2
- sth 6,0(r11)
- addi r11,r11,2
-4:
- bf 29,8f
- lwz 6,0(r4)
- addi r4,r4,4
- stw 6,0(r11)
- addi r11,r11,4
-8:
- bf 28,0f
- ld 6,0(r4)
- addi r4,r4,8
- std 6,0(r11)
- addi r11,r11,8
-0:
- srdi 9,r5,4 /* Number of full quadwords remaining. */
-
- /* The proper alignment is present, it is OK to copy the bytes now. */
-L(copy_GE_32_unaligned_cont):
-
- /* Setup two indexes to speed up the indexed vector operations. */
- clrldi 10,r5,60
- li 6,16 /* Index for 16-bytes offsets. */
- li 7,32 /* Index for 32-bytes offsets. */
- cmpldi cr1,10,0
- srdi 8,r5,5 /* Setup the loop counter. */
- mtocrf 0x01,9
- cmpldi cr6,9,1
-#ifdef __LITTLE_ENDIAN__
- lvsr 5,0,r4
-#else
- lvsl 5,0,r4
-#endif
- lvx 3,0,r4
- li 0,0
- bf 31,L(setup_unaligned_loop)
-
- /* Copy another 16 bytes to align to 32-bytes due to the loop. */
- lvx 4,r4,6
-#ifdef __LITTLE_ENDIAN__
- vperm 6,4,3,5
-#else
- vperm 6,3,4,5
-#endif
- addi r4,r4,16
- stvx 6,0,r11
- addi r11,r11,16
- vor 3,4,4
- clrrdi 0,r4,60
-
-L(setup_unaligned_loop):
- mtctr 8
- ble cr6,L(end_unaligned_loop)
-
- /* Copy 32 bytes at a time using vector instructions. */
- .align 4
-L(unaligned_loop):
-
- /* Note: vr6/vr10 may contain data that was already copied,
- but in order to get proper alignment, we may have to copy
- some portions again. This is faster than having unaligned
- vector instructions though. */
-
- lvx 4,r4,6
-#ifdef __LITTLE_ENDIAN__
- vperm 6,4,3,5
-#else
- vperm 6,3,4,5
-#endif
- lvx 3,r4,7
-#ifdef __LITTLE_ENDIAN__
- vperm 10,3,4,5
-#else
- vperm 10,4,3,5
-#endif
- addi r4,r4,32
- stvx 6,0,r11
- stvx 10,r11,6
- addi r11,r11,32
- bdnz L(unaligned_loop)
-
- clrrdi 0,r4,60
-
- .align 4
-L(end_unaligned_loop):
-
- /* Check for tail bytes. */
- mtocrf 0x01,r5
- beqlr cr1
-
- add r4,r4,0
-
- /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */
- /* Copy 8 bytes. */
- bf 28,4f
- lwz 6,0(r4)
- lwz 7,4(r4)
- addi r4,r4,8
- stw 6,0(r11)
- stw 7,4(r11)
- addi r11,r11,8
-4: /* Copy 4~7 bytes. */
- bf 29,L(tail2)
- lwz 6,0(r4)
- stw 6,0(r11)
- bf 30,L(tail5)
- lhz 7,4(r4)
- sth 7,4(r11)
- bflr 31
- lbz 8,6(r4)
- stb 8,6(r11)
- /* Return original DST pointer. */
- blr
-
- /* Start to memcpy backward implementation: the algorith first check if
- src and dest have the same alignment and if it does align both to 16
- bytes and copy using VSX instructions.
- If does not, align dest to 16 bytes and use VMX (altivec) instruction
- to read two 16 bytes at time, shift/permute the bytes read and write
- aligned to dest. */
-L(memmove_bwd):
- cmpldi cr1,r5,31
- /* Copy is done backwards: update the pointers and check alignment. */
- add r11,r3,r5
- add r4,r4,r5
- mr r0,r11
- ble cr1, L(copy_LT_32_bwd) /* If move < 32 bytes use short move
- code. */
-
- andi. r10,r11,15 /* Check if r11 is aligned to 16 bytes */
- clrldi r9,r4,60 /* Check if r4 is aligned to 16 bytes */
- cmpld cr6,r10,r9 /* SRC and DST alignments match? */
-
- bne cr6,L(copy_GE_32_unaligned_bwd)
- beq L(aligned_copy_bwd)
-
- mtocrf 0x01,r0
- clrldi r0,r0,60
-
-/* Get the DST and SRC aligned to 16 bytes. */
-1:
- bf 31,2f
- lbz r6,-1(r4)
- subi r4,r4,1
- stb r6,-1(r11)
- subi r11,r11,1
-2:
- bf 30,4f
- lhz r6,-2(r4)
- subi r4,r4,2
- sth r6,-2(r11)
- subi r11,r11,2
-4:
- bf 29,8f
- lwz r6,-4(r4)
- subi r4,r4,4
- stw r6,-4(r11)
- subi r11,r11,4
-8:
- bf 28,16f
- ld r6,-8(r4)
- subi r4,r4,8
- std r6,-8(r11)
- subi r11,r11,8
-16:
- subf r5,0,r5
-
-/* Main aligned copy loop. Copies 128 bytes at a time. */
-L(aligned_copy_bwd):
- li r6,-16
- li r7,-32
- li r8,-48
- li r9,-64
- mtocrf 0x02,r5
- srdi r12,r5,7
- cmpdi r12,0
- beq L(aligned_tail_bwd)
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
- mtctr 12
- b L(aligned_128loop_bwd)
-
- .align 4
-L(aligned_128head_bwd):
- /* for the 2nd + iteration of this loop. */
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
-L(aligned_128loop_bwd):
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
- stxvd2x v6,r11,r6
- subi r4,r4,64
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,7
- subi r11,r11,64
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
- subi r4,r4,64
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
- subi r11,r11,64
- bdnz L(aligned_128head_bwd)
-
-L(aligned_tail_bwd):
- mtocrf 0x01,r5
- bf 25,32f
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
- lxvd2x v8,r4,r8
- lxvd2x v9,r4,r9
- subi r4,r4,64
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
- stxvd2x v8,r11,r8
- stxvd2x v9,r11,r9
- subi r11,r11,64
-32:
- bf 26,16f
- lxvd2x v6,r4,r6
- lxvd2x v7,r4,r7
- subi r4,r4,32
- stxvd2x v6,r11,r6
- stxvd2x v7,r11,r7
- subi r11,r11,32
-16:
- bf 27,8f
- lxvd2x v6,r4,r6
- subi r4,r4,16
- stxvd2x v6,r11,r6
- subi r11,r11,16
-8:
- bf 28,4f
- ld r6,-8(r4)
- subi r4,r4,8
- std r6,-8(r11)
- subi r11,r11,8
-4: /* Copies 4~7 bytes. */
- bf 29,L(tail2_bwd)
- lwz r6,-4(r4)
- stw r6,-4(r11)
- bf 30,L(tail5_bwd)
- lhz r7,-6(r4)
- sth r7,-6(r11)
- bflr 31
- lbz r8,-7(r4)
- stb r8,-7(r11)
- /* Return original DST pointer. */
- blr
-
-/* Handle copies of 0~31 bytes. */
- .align 4
-L(copy_LT_32_bwd):
- cmpldi cr6,r5,8
- mtocrf 0x01,r5
- ble cr6,L(copy_LE_8_bwd)
-
- /* At least 9 bytes to go. */
- neg r8,r4
- andi. r0,r8,3
- cmpldi cr1,r5,16
- beq L(copy_LT_32_aligned_bwd)
-
- /* Force 4-byte alignment for SRC. */
- mtocrf 0x01,0
- subf r5,0,r5
-2:
- bf 30,1f
- lhz r6,-2(r4)
- subi r4,r4,2
- sth r6,-2(r11)
- subi r11,r11,2
-1:
- bf 31,L(end_4bytes_alignment_bwd)
- lbz 6,-1(r4)
- subi r4,r4,1
- stb 6,-1(r11)
- subi r11,r11,1
-
- .align 4
-L(end_4bytes_alignment_bwd):
- cmpldi cr1,r5,16
- mtocrf 0x01,r5
-
-L(copy_LT_32_aligned_bwd):
- /* At least 6 bytes to go, and SRC is word-aligned. */
- blt cr1,8f
-
- /* Copy 16 bytes. */
- lwz r6,-4(r4)
- lwz r7,-8(r4)
- stw r6,-4(r11)
- lwz r8,-12(r4)
- stw r7,-8(r11)
- lwz r6,-16(r4)
- subi r4,r4,16
- stw r8,-12(r11)
- stw r6,-16(r11)
- subi r11,r11,16
-8: /* Copy 8 bytes. */
- bf 28,L(tail4_bwd)
- lwz r6,-4(r4)
- lwz r7,-8(r4)
- subi r4,r4,8
- stw r6,-4(r11)
- stw r7,-8(r11)
- subi r11,r11,8
-
- .align 4
-/* Copies 4~7 bytes. */
-L(tail4_bwd):
- bf 29,L(tail2_bwd)
- lwz 6,-4(r4)
- stw 6,-4(r11)
- bf 30,L(tail5_bwd)
- lhz 7,-6(r4)
- sth 7,-6(r11)
- bflr 31
- lbz 8,-7(r4)
- stb 8,-7(r11)
- /* Return original DST pointer. */
- blr
-
- .align 4
-/* Copies 2~3 bytes. */
-L(tail2_bwd):
- bf 30,1f
- lhz 6,-2(r4)
- sth 6,-2(r11)
- bflr 31
- lbz 7,-3(r4)
- stb 7,-3(r11)
- blr
-
- .align 4
-L(tail5_bwd):
- bflr 31
- lbz 6,-5(r4)
- stb 6,-5(r11)
- blr
-
- .align 4
-1:
- bflr 31
- lbz 6,-1(r4)
- stb 6,-1(r11)
- /* Return original DST pointer. */
- blr
-
-
-/* Handles copies of 0~8 bytes. */
- .align 4
-L(copy_LE_8_bwd):
- bne cr6,L(tail4_bwd)
-
- /* Though we could've used ld/std here, they are still
- slow for unaligned cases. */
- lwz 6,-8(r4)
- lwz 7,-4(r4)
- stw 6,-8(r11)
- stw 7,-4(r11)
- blr
-
-
-/* Handle copies of 32+ bytes where DST is aligned (to quadword) but
- SRC is not. Use aligned quadword loads from SRC, shifted to realign
- the data, allowing for aligned DST stores. */
- .align 4
-L(copy_GE_32_unaligned_bwd):
- andi. r10,r11,15 /* Check alignment of DST against 16 bytes.. */
- srdi r9,r5,4 /* Number of full quadwords remaining. */
-
- beq L(copy_GE_32_unaligned_cont_bwd)
-
- /* DST is not quadword aligned and r10 holds the address masked to
- compare alignments. */
- mtocrf 0x01,r10
- subf r5,r10,r5
-
- /* Vector instructions work best when proper alignment (16-bytes)
- is present. Move 0~15 bytes as needed to get DST quadword-aligned. */
-1:
- bf 31,2f
- lbz r6,-1(r4)
- subi r4,r4,1
- stb r6,-1(r11)
- subi r11,r11,1
-2:
- bf 30,4f
- lhz r6,-2(r4)
- subi r4,r4,2
- sth r6,-2(r11)
- subi r11,r11,2
-4:
- bf 29,8f
- lwz r6,-4(r4)
- subi r4,r4,4
- stw r6,-4(r11)
- subi r11,r11,4
-8:
- bf 28,0f
- ld r6,-8(r4)
- subi r4,r4,8
- std r6,-8(r11)
- subi r11,r11,8
-0:
- srdi r9,r5,4 /* Number of full quadwords remaining. */
-
- /* The proper alignment is present, it is OK to copy the bytes now. */
-L(copy_GE_32_unaligned_cont_bwd):
-
- /* Setup two indexes to speed up the indexed vector operations. */
- clrldi r10,r5,60
- li r6,-16 /* Index for 16-bytes offsets. */
- li r7,-32 /* Index for 32-bytes offsets. */
- cmpldi cr1,10,0
- srdi r8,r5,5 /* Setup the loop counter. */
- mtocrf 0x01,9
- cmpldi cr6,r9,1
-#ifdef __LITTLE_ENDIAN__
- lvsr v5,r0,r4
-#else
- lvsl v5,r0,r4
-#endif
- lvx v3,0,r4
- li r0,0
- bf 31,L(setup_unaligned_loop_bwd)
-
- /* Copy another 16 bytes to align to 32-bytes due to the loop. */
- lvx v4,r4,r6
-#ifdef __LITTLE_ENDIAN__
- vperm v6,v3,v4,v5
-#else
- vperm v6,v4,v3,v5
-#endif
- subi r4,r4,16
- stvx v6,r11,r6
- subi r11,r11,16
- vor v3,v4,v4
- clrrdi r0,r4,60
-
-L(setup_unaligned_loop_bwd):
- mtctr r8
- ble cr6,L(end_unaligned_loop_bwd)
-
- /* Copy 32 bytes at a time using vector instructions. */
- .align 4
-L(unaligned_loop_bwd):
-
- /* Note: vr6/vr10 may contain data that was already copied,
- but in order to get proper alignment, we may have to copy
- some portions again. This is faster than having unaligned
- vector instructions though. */
-
- lvx v4,r4,r6
-#ifdef __LITTLE_ENDIAN__
- vperm v6,v3,v4,v5
-#else
- vperm v6,v4,v3,v5
-#endif
- lvx v3,r4,r7
-#ifdef __LITTLE_ENDIAN__
- vperm v10,v4,v3,v5
-#else
- vperm v10,v3,v4,v5
-#endif
- subi r4,r4,32
- stvx v6,r11,r6
- stvx v10,r11,r7
- subi r11,r11,32
- bdnz L(unaligned_loop_bwd)
-
- clrrdi r0,r4,60
-
- .align 4
-L(end_unaligned_loop_bwd):
-
- /* Check for tail bytes. */
- mtocrf 0x01,r5
- beqlr cr1
-
- add r4,r4,0
-
- /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */
- /* Copy 8 bytes. */
- bf 28,4f
- lwz r6,-4(r4)
- lwz r7,-8(r4)
- subi r4,r4,8
- stw r6,-4(r11)
- stw r7,-8(r11)
- subi r11,r11,8
-4: /* Copy 4~7 bytes. */
- bf 29,L(tail2_bwd)
- lwz r6,-4(r4)
- stw r6,-4(r11)
- bf 30,L(tail5_bwd)
- lhz r7,-6(r4)
- sth r7,-6(r11)
- bflr 31
- lbz r8,-7(r4)
- stb r8,-7(r11)
- /* Return original DST pointer. */
- blr
-END_GEN_TB (MEMMOVE, TB_TOCLESS)
-libc_hidden_builtin_def (memmove)
-
-
-/* void bcopy(const void *src [r3], void *dest [r4], size_t n [r5])
- Implemented in this file to avoid linker create a stub function call
- in the branch to '_memmove'. */
-ENTRY (__bcopy)
- mr r6,r3
- mr r3,r4
- mr r4,r6
- b L(_memmove)
-END (__bcopy)
-weak_alias (__bcopy, bcopy)
diff --git a/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/sysdeps/powerpc/powerpc64/power7/mempcpy.S
deleted file mode 100644
index 4e15d1e40c..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/mempcpy.S
+++ /dev/null
@@ -1,472 +0,0 @@
-/* Optimized mempcpy implementation for POWER7.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-
-/* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
- Returns 'dst' + 'len'. */
-
-#ifndef MEMPCPY
-# define MEMPCPY __mempcpy
-#endif
- .machine power7
-EALIGN (MEMPCPY, 5, 0)
- CALL_MCOUNT 3
-
- cmpldi cr1,5,31
- neg 0,3
- std 3,-16(1)
- std 31,-8(1)
- cfi_offset(31,-8)
- ble cr1,L(copy_LT_32) /* If move < 32 bytes use short move
- code. */
-
- andi. 11,3,7 /* Check alignment of DST. */
-
-
- clrldi 10,4,61 /* Check alignment of SRC. */
- cmpld cr6,10,11 /* SRC and DST alignments match? */
- mr 12,4
- mr 31,5
- bne cr6,L(copy_GE_32_unaligned)
-
- srdi 9,5,3 /* Number of full quadwords remaining. */
-
- beq L(copy_GE_32_aligned_cont)
-
- clrldi 0,0,61
- mtcrf 0x01,0
- subf 31,0,5
-
- /* Get the SRC aligned to 8 bytes. */
-
-1: bf 31,2f
- lbz 6,0(12)
- addi 12,12,1
- stb 6,0(3)
- addi 3,3,1
-2: bf 30,4f
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-4: bf 29,0f
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-0:
- clrldi 10,12,61 /* Check alignment of SRC again. */
- srdi 9,31,3 /* Number of full doublewords remaining. */
-
-L(copy_GE_32_aligned_cont):
-
- clrldi 11,31,61
- mtcrf 0x01,9
-
- srdi 8,31,5
- cmpldi cr1,9,4
- cmpldi cr6,11,0
- mr 11,12
-
- /* Copy 1~3 doublewords so the main loop starts
- at a multiple of 32 bytes. */
-
- bf 30,1f
- ld 6,0(12)
- ld 7,8(12)
- addi 11,12,16
- mtctr 8
- std 6,0(3)
- std 7,8(3)
- addi 10,3,16
- bf 31,4f
- ld 0,16(12)
- std 0,16(3)
- blt cr1,3f
- addi 11,12,24
- addi 10,3,24
- b 4f
-
- .align 4
-1: /* Copy 1 doubleword and set the counter. */
- mr 10,3
- mtctr 8
- bf 31,4f
- ld 6,0(12)
- addi 11,12,8
- std 6,0(3)
- addi 10,3,8
-
- /* Main aligned copy loop. Copies 32-bytes at a time. */
- .align 4
-4:
- ld 6,0(11)
- ld 7,8(11)
- ld 8,16(11)
- ld 0,24(11)
- addi 11,11,32
-
- std 6,0(10)
- std 7,8(10)
- std 8,16(10)
- std 0,24(10)
- addi 10,10,32
- bdnz 4b
-3:
-
- /* Check for tail bytes. */
- rldicr 0,31,0,60
- mtcrf 0x01,31
- beq cr6,0f
-
-.L9:
- add 3,3,0
- add 12,12,0
-
- /* At this point we have a tail of 0-7 bytes and we know that the
- destination is doubleword-aligned. */
-4: /* Copy 4 bytes. */
- bf 29,2f
-
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-2: /* Copy 2 bytes. */
- bf 30,1f
-
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-1: /* Copy 1 byte. */
- bf 31,0f
-
- lbz 6,0(12)
- stb 6,0(3)
-0: /* Return DST + LEN pointer. */
- ld 31,-8(1)
- ld 3,-16(1)
- add 3,3,5
- blr
-
- /* Handle copies of 0~31 bytes. */
- .align 4
-L(copy_LT_32):
- cmpldi cr6,5,8
- mr 12,4
- mtcrf 0x01,5
- ble cr6,L(copy_LE_8)
-
- /* At least 9 bytes to go. */
- neg 8,4
- clrrdi 11,4,2
- andi. 0,8,3
- cmpldi cr1,5,16
- mr 10,5
- beq L(copy_LT_32_aligned)
-
- /* Force 4-bytes alignment for SRC. */
- mtocrf 0x01,0
- subf 10,0,5
-2: bf 30,1f
-
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-1: bf 31,L(end_4bytes_alignment)
-
- lbz 6,0(12)
- addi 12,12,1
- stb 6,0(3)
- addi 3,3,1
-
- .align 4
-L(end_4bytes_alignment):
- cmpldi cr1,10,16
- mtcrf 0x01,10
-
-L(copy_LT_32_aligned):
- /* At least 6 bytes to go, and SRC is word-aligned. */
- blt cr1,8f
-
- /* Copy 16 bytes. */
- lwz 6,0(12)
- lwz 7,4(12)
- stw 6,0(3)
- lwz 8,8(12)
- stw 7,4(3)
- lwz 6,12(12)
- addi 12,12,16
- stw 8,8(3)
- stw 6,12(3)
- addi 3,3,16
-8: /* Copy 8 bytes. */
- bf 28,4f
-
- lwz 6,0(12)
- lwz 7,4(12)
- addi 12,12,8
- stw 6,0(3)
- stw 7,4(3)
- addi 3,3,8
-4: /* Copy 4 bytes. */
- bf 29,2f
-
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-2: /* Copy 2-3 bytes. */
- bf 30,1f
-
- lhz 6,0(12)
- sth 6,0(3)
- bf 31,0f
- lbz 7,2(12)
- stb 7,2(3)
- ld 3,-16(1)
- add 3,3,5
- blr
-
- .align 4
-1: /* Copy 1 byte. */
- bf 31,0f
-
- lbz 6,0(12)
- stb 6,0(3)
-0: /* Return DST + LEN pointer. */
- ld 3,-16(1)
- add 3,3,5
- blr
-
- /* Handles copies of 0~8 bytes. */
- .align 4
-L(copy_LE_8):
- bne cr6,4f
-
- /* Though we could've used ld/std here, they are still
- slow for unaligned cases. */
-
- lwz 6,0(4)
- lwz 7,4(4)
- stw 6,0(3)
- stw 7,4(3)
- ld 3,-16(1) /* Return DST + LEN pointer. */
- add 3,3,5
- blr
-
- .align 4
-4: /* Copies 4~7 bytes. */
- bf 29,2b
-
- lwz 6,0(4)
- stw 6,0(3)
- bf 30,5f
- lhz 7,4(4)
- sth 7,4(3)
- bf 31,0f
- lbz 8,6(4)
- stb 8,6(3)
- ld 3,-16(1)
- add 3,3,5
- blr
-
- .align 4
-5: /* Copy 1 byte. */
- bf 31,0f
-
- lbz 6,4(4)
- stb 6,4(3)
-
-0: /* Return DST + LEN pointer. */
- ld 3,-16(1)
- add 3,3,5
- blr
-
- /* Handle copies of 32+ bytes where DST is aligned (to quadword) but
- SRC is not. Use aligned quadword loads from SRC, shifted to realign
- the data, allowing for aligned DST stores. */
- .align 4
-L(copy_GE_32_unaligned):
- clrldi 0,0,60 /* Number of bytes until the 1st
- quadword. */
- andi. 11,3,15 /* Check alignment of DST (against
- quadwords). */
- srdi 9,5,4 /* Number of full quadwords remaining. */
-
- beq L(copy_GE_32_unaligned_cont)
-
- /* SRC is not quadword aligned, get it aligned. */
-
- mtcrf 0x01,0
- subf 31,0,5
-
- /* Vector instructions work best when proper alignment (16-bytes)
- is present. Move 0~15 bytes as needed to get DST quadword-aligned. */
-1: /* Copy 1 byte. */
- bf 31,2f
-
- lbz 6,0(12)
- addi 12,12,1
- stb 6,0(3)
- addi 3,3,1
-2: /* Copy 2 bytes. */
- bf 30,4f
-
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-4: /* Copy 4 bytes. */
- bf 29,8f
-
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-8: /* Copy 8 bytes. */
- bf 28,0f
-
- ld 6,0(12)
- addi 12,12,8
- std 6,0(3)
- addi 3,3,8
-0:
- clrldi 10,12,60 /* Check alignment of SRC. */
- srdi 9,31,4 /* Number of full quadwords remaining. */
-
- /* The proper alignment is present, it is OK to copy the bytes now. */
-L(copy_GE_32_unaligned_cont):
-
- /* Setup two indexes to speed up the indexed vector operations. */
- clrldi 11,31,60
- li 6,16 /* Index for 16-bytes offsets. */
- li 7,32 /* Index for 32-bytes offsets. */
- cmpldi cr1,11,0
- srdi 8,31,5 /* Setup the loop counter. */
- mr 10,3
- mr 11,12
- mtcrf 0x01,9
- cmpldi cr6,9,1
-#ifdef __LITTLE_ENDIAN__
- lvsr 5,0,12
-#else
- lvsl 5,0,12
-#endif
- lvx 3,0,12
- bf 31,L(setup_unaligned_loop)
-
- /* Copy another 16 bytes to align to 32-bytes due to the loop . */
- lvx 4,12,6
-#ifdef __LITTLE_ENDIAN__
- vperm 6,4,3,5
-#else
- vperm 6,3,4,5
-#endif
- addi 11,12,16
- addi 10,3,16
- stvx 6,0,3
- vor 3,4,4
-
-L(setup_unaligned_loop):
- mtctr 8
- ble cr6,L(end_unaligned_loop)
-
- /* Copy 32 bytes at a time using vector instructions. */
- .align 4
-L(unaligned_loop):
-
- /* Note: vr6/vr10 may contain data that was already copied,
- but in order to get proper alignment, we may have to copy
- some portions again. This is faster than having unaligned
- vector instructions though. */
-
- lvx 4,11,6 /* vr4 = r11+16. */
-#ifdef __LITTLE_ENDIAN__
- vperm 6,4,3,5
-#else
- vperm 6,3,4,5
-#endif
- lvx 3,11,7 /* vr3 = r11+32. */
-#ifdef __LITTLE_ENDIAN__
- vperm 10,3,4,5
-#else
- vperm 10,4,3,5
-#endif
- addi 11,11,32
- stvx 6,0,10
- stvx 10,10,6
- addi 10,10,32
-
- bdnz L(unaligned_loop)
-
- .align 4
-L(end_unaligned_loop):
-
- /* Check for tail bytes. */
- rldicr 0,31,0,59
- mtcrf 0x01,31
- beq cr1,0f
-
- add 3,3,0
- add 12,12,0
-
- /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */
-8: /* Copy 8 bytes. */
- bf 28,4f
-
- lwz 6,0(12)
- lwz 7,4(12)
- addi 12,12,8
- stw 6,0(3)
- stw 7,4(3)
- addi 3,3,8
-4: /* Copy 4 bytes. */
- bf 29,2f
-
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-2: /* Copy 2~3 bytes. */
- bf 30,1f
-
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-1: /* Copy 1 byte. */
- bf 31,0f
-
- lbz 6,0(12)
- stb 6,0(3)
-0: /* Return DST + LEN pointer. */
- ld 31,-8(1)
- ld 3,-16(1)
- add 3,3,5
- blr
-
-END_GEN_TB (MEMPCPY,TB_TOCLESS)
-libc_hidden_def (__mempcpy)
-weak_alias (__mempcpy, mempcpy)
-libc_hidden_builtin_def (mempcpy)
diff --git a/sysdeps/powerpc/powerpc64/power7/memrchr.S b/sysdeps/powerpc/powerpc64/power7/memrchr.S
deleted file mode 100644
index 4276768915..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/memrchr.S
+++ /dev/null
@@ -1,201 +0,0 @@
-/* Optimized memrchr implementation for PowerPC64/POWER7 using cmpb insn.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */
-
-#ifndef MEMRCHR
-# define MEMRCHR __memrchr
-#endif
- .machine power7
-ENTRY (MEMRCHR)
- CALL_MCOUNT 3
- add r7,r3,r5 /* Calculate the last acceptable address. */
- neg r0,r7
- addi r7,r7,-1
- mr r10,r3
- clrrdi r6,r7,7
- li r9,3<<5
- dcbt r9,r6,8 /* Stream hint, decreasing addresses. */
-
- /* Replicate BYTE to doubleword. */
- insrdi r4,r4,8,48
- insrdi r4,r4,16,32
- insrdi r4,r4,32,0
- li r6,-8
- li r9,-1
- rlwinm r0,r0,3,26,28 /* Calculate padding. */
- clrrdi r8,r7,3
- srd r9,r9,r0
- cmpldi r5,32
- clrrdi r0,r10,3
- ble L(small_range)
-
-#ifdef __LITTLE_ENDIAN__
- ldx r12,0,r8
-#else
- ldbrx r12,0,r8 /* Load reversed doubleword from memory. */
-#endif
- cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
- and r3,r3,r9
- cmpldi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */
- bne cr7,L(done)
-
- mtcrf 0x01,r8
- /* Are we now aligned to a quadword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
- bf 28,L(loop_setup)
-
- /* Handle DWORD2 of pair. */
-#ifdef __LITTLE_ENDIAN__
- ldx r12,r8,r6
-#else
- ldbrx r12,r8,r6
-#endif
- addi r8,r8,-8
- cmpb r3,r12,r4
- cmpldi cr7,r3,0
- bne cr7,L(done)
-
-L(loop_setup):
- /* The last dword we want to read in the loop below is the one
- containing the first byte of the string, ie. the dword at
- s & ~7, or r0. The first dword read is at r8 - 8, we
- read 2 * cnt dwords, so the last dword read will be at
- r8 - 8 - 16 * cnt + 8. Solving for cnt gives
- cnt = (r8 - r0) / 16 */
- sub r5,r8,r0
- addi r8,r8,-8
- srdi r9,r5,4 /* Number of loop iterations. */
- mtctr r9 /* Setup the counter. */
-
- /* Main loop to look for BYTE backwards in the string.
- FIXME: Investigate whether 32 byte align helps with this
- 9 instruction loop. */
- .align 5
-L(loop):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the byte-checking process for bigger strings. */
-
-#ifdef __LITTLE_ENDIAN__
- ldx r12,0,r8
- ldx r11,r8,r6
-#else
- ldbrx r12,0,r8
- ldbrx r11,r8,r6
-#endif
- cmpb r3,r12,r4
- cmpb r9,r11,r4
- or r5,r9,r3 /* Merge everything in one doubleword. */
- cmpldi cr7,r5,0
- bne cr7,L(found)
- addi r8,r8,-16
- bdnz L(loop)
-
- /* We may have one more word to read. */
- cmpld r8,r0
- bnelr
-
-#ifdef __LITTLE_ENDIAN__
- ldx r12,0,r8
-#else
- ldbrx r12,0,r8
-#endif
- cmpb r3,r12,r4
- cmpldi cr7,r3,0
- bne cr7,L(done)
- blr
-
- .align 4
-L(found):
- /* OK, one (or both) of the dwords contains BYTE. Check
- the first dword. */
- cmpldi cr6,r3,0
- bne cr6,L(done)
-
- /* BYTE must be in the second word. Adjust the address
- again and move the result of cmpb to r3 so we can calculate the
- pointer. */
-
- mr r3,r9
- addi r8,r8,-8
-
- /* r3 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as BYTE in the original
- word from the string. Use that to calculate the pointer.
- We need to make sure BYTE is *before* the end of the
- range. */
-L(done):
- cntlzd r9,r3 /* Count leading zeros before the match. */
- cmpld r8,r0 /* Are we on the last word? */
- srdi r6,r9,3 /* Convert leading zeros to bytes. */
- addi r0,r6,-7
- sub r3,r8,r0
- cmpld cr7,r3,r10
- bnelr
- bgelr cr7
- li r3,0
- blr
-
- .align 4
-L(null):
- li r3,0
- blr
-
-/* Deals with size <= 32. */
- .align 4
-L(small_range):
- cmpldi r5,0
- beq L(null)
-
-#ifdef __LITTLE_ENDIAN__
- ldx r12,0,r8
-#else
- ldbrx r12,0,r8 /* Load reversed doubleword from memory. */
-#endif
- cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
- and r3,r3,r9
- cmpldi cr7,r3,0
- bne cr7,L(done)
-
- /* Are we done already? */
- cmpld r8,r0
- addi r8,r8,-8
- beqlr
-
- .align 5
-L(loop_small):
-#ifdef __LITTLE_ENDIAN__
- ldx r12,0,r8
-#else
- ldbrx r12,0,r8
-#endif
- cmpb r3,r12,r4
- cmpld r8,r0
- cmpldi cr7,r3,0
- bne cr7,L(done)
- addi r8,r8,-8
- bne L(loop_small)
- blr
-
-END (MEMRCHR)
-weak_alias (__memrchr, memrchr)
-libc_hidden_builtin_def (memrchr)
diff --git a/sysdeps/powerpc/powerpc64/power7/memset.S b/sysdeps/powerpc/powerpc64/power7/memset.S
deleted file mode 100644
index 21933c0672..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/memset.S
+++ /dev/null
@@ -1,399 +0,0 @@
-/* Optimized memset implementation for PowerPC64/POWER7.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
- Returns 's'. */
-
-#ifndef MEMSET
-# define MEMSET memset
-#endif
- .machine power7
-EALIGN (MEMSET, 5, 0)
- CALL_MCOUNT 3
-
-L(_memset):
- cmpldi cr7,5,31
- cmpldi cr6,5,8
- mr 10,3
-
- /* Replicate byte to word. */
- insrdi 4,4,8,48
- insrdi 4,4,16,32
- ble cr6,L(small) /* If length <= 8, use short copy code. */
-
- neg 0,3
- ble cr7,L(medium) /* If length < 32, use medium copy code. */
-
- andi. 11,10,7 /* Check alignment of SRC. */
- insrdi 4,4,32,0 /* Replicate word to double word. */
-
- mr 12,5
- beq L(big_aligned)
-
- clrldi 0,0,61
- mtocrf 0x01,0
- subf 5,0,5
-
- /* Get DST aligned to 8 bytes. */
-1: bf 31,2f
-
- stb 4,0(10)
- addi 10,10,1
-2: bf 30,4f
-
- sth 4,0(10)
- addi 10,10,2
-4: bf 29,L(big_aligned)
-
- stw 4,0(10)
- addi 10,10,4
-
- .align 4
-L(big_aligned):
-
- cmpldi cr5,5,255
- li 0,32
- dcbtst 0,10
- cmpldi cr6,4,0
- srdi 9,5,3 /* Number of full doublewords remaining. */
- crand 27,26,21
- mtocrf 0x01,9
- bt 27,L(huge)
-
- /* From this point on, we'll copy 32+ bytes and the value
- isn't 0 (so we can't use dcbz). */
-
- srdi 8,5,5
- clrldi 11,5,61
- cmpldi cr6,11,0
- cmpldi cr1,9,4
- mtctr 8
-
- /* Copy 1~3 doublewords so the main loop starts
- at a multiple of 32 bytes. */
-
- bf 30,1f
-
- std 4,0(10)
- std 4,8(10)
- addi 10,10,16
- bf 31,L(big_loop)
-
- std 4,0(10)
- addi 10,10,8
- mr 12,10
- blt cr1,L(tail_bytes)
- b L(big_loop)
-
- .align 4
-1: /* Copy 1 doubleword. */
- bf 31,L(big_loop)
-
- std 4,0(10)
- addi 10,10,8
-
- /* Main aligned copy loop. Copies 32-bytes at a time and
- ping-pong through r10 and r12 to avoid AGEN delays. */
- .align 4
-L(big_loop):
- addi 12,10,32
- std 4,0(10)
- std 4,8(10)
- std 4,16(10)
- std 4,24(10)
- bdz L(tail_bytes)
-
- addi 10,10,64
- std 4,0(12)
- std 4,8(12)
- std 4,16(12)
- std 4,24(12)
- bdnz L(big_loop)
-
- mr 12,10
- b L(tail_bytes)
-
- .align 4
-L(tail_bytes):
-
- /* Check for tail bytes. */
- beqlr cr6
-
- clrldi 0,5,61
- mtocrf 0x01,0
-
- /* At this point we have a tail of 0-7 bytes and we know that the
- destination is doubleword-aligned. */
-4: /* Copy 4 bytes. */
- bf 29,2f
-
- stw 4,0(12)
- addi 12,12,4
-2: /* Copy 2 bytes. */
- bf 30,1f
-
- sth 4,0(12)
- addi 12,12,2
-1: /* Copy 1 byte. */
- bflr 31
-
- stb 4,0(12)
- blr
-
- /* Special case when value is 0 and we have a long length to deal
- with. Use dcbz to zero out 128-bytes at a time. Before using
- dcbz though, we need to get the destination 128-bytes aligned. */
- .align 4
-L(huge):
- andi. 11,10,127
- neg 0,10
- beq L(huge_aligned)
-
- clrldi 0,0,57
- subf 5,0,5
- srdi 0,0,3
- mtocrf 0x01,0
-
- /* Get DST aligned to 128 bytes. */
-8: bf 28,4f
-
- std 4,0(10)
- std 4,8(10)
- std 4,16(10)
- std 4,24(10)
- std 4,32(10)
- std 4,40(10)
- std 4,48(10)
- std 4,56(10)
- addi 10,10,64
- .align 4
-4: bf 29,2f
-
- std 4,0(10)
- std 4,8(10)
- std 4,16(10)
- std 4,24(10)
- addi 10,10,32
- .align 4
-2: bf 30,1f
-
- std 4,0(10)
- std 4,8(10)
- addi 10,10,16
- .align 4
-1: bf 31,L(huge_aligned)
-
- std 4,0(10)
- addi 10,10,8
-
-
-L(huge_aligned):
- srdi 8,5,7
- clrldi 11,5,57
- cmpldi cr6,11,0
- mtctr 8
-
- .align 4
-L(huge_loop):
- dcbz 0,10
- addi 10,10,128
- bdnz L(huge_loop)
-
- /* Check how many bytes are still left. */
- beqlr cr6
-
- subf 9,3,10
- subf 5,9,12
- srdi 8,5,3
- cmpldi cr6,8,0
- mtocrf 0x01,8
-
- /* We have a tail o 1~127 bytes. Copy up to 15 doublewords for
- speed. We'll handle the resulting tail bytes later. */
- beq cr6,L(tail)
-
-8: bf 28,4f
-
- std 4,0(10)
- std 4,8(10)
- std 4,16(10)
- std 4,24(10)
- std 4,32(10)
- std 4,40(10)
- std 4,48(10)
- std 4,56(10)
- addi 10,10,64
- .align 4
-4: bf 29,2f
-
- std 4,0(10)
- std 4,8(10)
- std 4,16(10)
- std 4,24(10)
- addi 10,10,32
- .align 4
-2: bf 30,1f
-
- std 4,0(10)
- std 4,8(10)
- addi 10,10,16
- .align 4
-1: bf 31,L(tail)
-
- std 4,0(10)
- addi 10,10,8
-
- /* Handle the rest of the tail bytes here. */
-L(tail):
- mtocrf 0x01,5
-
- .align 4
-4: bf 29,2f
-
- stw 4,0(10)
- addi 10,10,4
- .align 4
-2: bf 30,1f
-
- sth 4,0(10)
- addi 10,10,2
- .align 4
-1: bflr 31
-
- stb 4,0(10)
- blr
-
- /* Expanded tree to copy tail bytes without increments. */
- .align 4
-L(copy_tail):
- bf 29,L(FXX)
-
- stw 4,0(10)
- bf 30,L(TFX)
-
- sth 4,4(10)
- bflr 31
-
- stb 4,6(10)
- blr
-
- .align 4
-L(FXX): bf 30,L(FFX)
-
- sth 4,0(10)
- bflr 31
-
- stb 4,2(10)
- blr
-
- .align 4
-L(TFX): bflr 31
-
- stb 4,4(10)
- blr
-
- .align 4
-L(FFX): bflr 31
-
- stb 4,0(10)
- blr
-
- /* Handle copies of 9~31 bytes. */
- .align 4
-L(medium):
- /* At least 9 bytes to go. */
- andi. 11,10,3
- clrldi 0,0,62
- beq L(medium_aligned)
-
- /* Force 4-bytes alignment for DST. */
- mtocrf 0x01,0
- subf 5,0,5
-1: /* Copy 1 byte. */
- bf 31,2f
-
- stb 4,0(10)
- addi 10,10,1
-2: /* Copy 2 bytes. */
- bf 30,L(medium_aligned)
-
- sth 4,0(10)
- addi 10,10,2
-
- .align 4
-L(medium_aligned):
- /* At least 6 bytes to go, and DST is word-aligned. */
- cmpldi cr1,5,16
- mtocrf 0x01,5
- blt cr1,8f
-
- /* Copy 16 bytes. */
- stw 4,0(10)
- stw 4,4(10)
- stw 4,8(10)
- stw 4,12(10)
- addi 10,10,16
-8: /* Copy 8 bytes. */
- bf 28,4f
-
- stw 4,0(10)
- stw 4,4(10)
- addi 10,10,8
-4: /* Copy 4 bytes. */
- bf 29,2f
-
- stw 4,0(10)
- addi 10,10,4
-2: /* Copy 2-3 bytes. */
- bf 30,1f
-
- sth 4,0(10)
- addi 10,10,2
-1: /* Copy 1 byte. */
- bflr 31
-
- stb 4,0(10)
- blr
-
- /* Handles copies of 0~8 bytes. */
- .align 4
-L(small):
- mtocrf 0x01,5
- bne cr6,L(copy_tail)
-
- stw 4,0(10)
- stw 4,4(10)
- blr
-
-END_GEN_TB (MEMSET,TB_TOCLESS)
-libc_hidden_builtin_def (memset)
-
-/* Copied from bzero.S to prevent the linker from inserting a stub
- between bzero and memset. */
-ENTRY (__bzero)
- CALL_MCOUNT 3
- mr r5,r4
- li r4,0
- b L(_memset)
-END (__bzero)
-#ifndef __bzero
-weak_alias (__bzero, bzero)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/multiarch/Implies b/sysdeps/powerpc/powerpc64/power7/multiarch/Implies
deleted file mode 100644
index bf5d6171a5..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/multiarch/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power6/multiarch
diff --git a/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
deleted file mode 100644
index 48afb75943..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
+++ /dev/null
@@ -1,115 +0,0 @@
-/* Optimized rawmemchr implementation for PowerPC64/POWER7 using cmpb insn.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* int [r3] rawmemchr (void *s [r3], int c [r4]) */
-
-#ifndef RAWMEMCHR
-# define RAWMEMCHR __rawmemchr
-#endif
- .machine power7
-ENTRY (RAWMEMCHR)
- CALL_MCOUNT 2
- dcbt 0,r3
- clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
-
- /* Replicate byte to doubleword. */
- insrdi r4,r4,8,48
- insrdi r4,r4,16,32
- insrdi r4,r4,32,0
-
- /* Now r4 has a doubleword of c bytes. */
-
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
- ld r12,0(r8) /* Load doubleword from memory. */
- cmpb r5,r12,r4 /* Compare each byte against c byte. */
-#ifdef __LITTLE_ENDIAN__
- srd r5,r5,r6
- sld r5,r5,r6
-#else
- sld r5,r5,r6 /* Move left to discard ignored bits. */
- srd r5,r5,r6 /* Bring the bits back as zeros. */
-#endif
- cmpdi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */
- bne cr7,L(done)
-
- mtcrf 0x01,r8
-
- /* Are we now aligned to a quadword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
-
- bt 28,L(loop)
-
- /* Handle DWORD2 of pair. */
- ldu r12,8(r8)
- cmpb r5,r12,r4
- cmpdi cr7,r5,0
- bne cr7,L(done)
- b L(loop) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
-
- /* Main loop to look for the end of the string. Since it's a
- small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
-L(loop):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the byte-checking process for bigger strings. */
- ld r12,8(r8)
- ldu r11,16(r8)
- cmpb r5,r12,r4
- cmpb r6,r11,r4
- or r7,r5,r6
- cmpdi cr7,r7,0
- beq cr7,L(loop)
-
- /* OK, one (or both) of the doublewords contains a 'c' byte. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains a c byte. */
-
- cmpdi cr6,r5,0
- addi r8,r8,-8
- bne cr6,L(done)
-
- /* The 'c' byte must be in the second doubleword. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
- pointer. */
- mr r5,r6
- addi r8,r8,8
-
- /* r5 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as the 'c' byte in the original
- doubleword from the string. Use that fact to find out what is
- the position of the byte inside the string. */
-L(done):
-#ifdef __LITTLE_ENDIAN__
- addi r0,r5,-1
- andc r0,r0,r5
- popcntd r0,r0 /* Count trailing zeros. */
-#else
- cntlzd r0,r5 /* Count leading zeros before the match. */
-#endif
- srdi r0,r0,3 /* Convert leading zeros to bytes. */
- add r3,r8,r0 /* Return address of the matching char. */
- blr
-END (RAWMEMCHR)
-weak_alias (__rawmemchr,rawmemchr)
-libc_hidden_builtin_def (__rawmemchr)
diff --git a/sysdeps/powerpc/powerpc64/power7/stpncpy.S b/sysdeps/powerpc/powerpc64/power7/stpncpy.S
deleted file mode 100644
index a346dd7e28..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/stpncpy.S
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Optimized stpncpy implementation for PowerPC64/POWER7.
- Copyright (C) 2014-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define USE_AS_STPNCPY
-#include <sysdeps/powerpc/powerpc64/power7/strncpy.S>
-
-weak_alias (__stpncpy, stpncpy)
-libc_hidden_def (__stpncpy)
-libc_hidden_builtin_def (stpncpy)
diff --git a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S
deleted file mode 100644
index e856b8a593..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S
+++ /dev/null
@@ -1,126 +0,0 @@
-/* Optimized strcasecmp implementation for PowerPC64.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <locale-defines.h>
-
-/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] )
-
- or if defined USE_IN_EXTENDED_LOCALE_MODEL:
-
- int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4],
- __locale_t loc [r5]) */
-
-#ifndef STRCMP
-# define __STRCMP __strcasecmp
-# define STRCMP strcasecmp
-#endif
-
-ENTRY (__STRCMP)
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
- CALL_MCOUNT 2
-#else
- CALL_MCOUNT 3
-#endif
-
-#define rRTN r3 /* Return value */
-#define rSTR1 r5 /* 1st string */
-#define rSTR2 r4 /* 2nd string */
-#define rLOCARG r5 /* 3rd argument: locale_t */
-#define rCHAR1 r6 /* Byte read from 1st string */
-#define rCHAR2 r7 /* Byte read from 2nd string */
-#define rADDR1 r8 /* Address of tolower(rCHAR1) */
-#define rADDR2 r12 /* Address of tolower(rCHAR2) */
-#define rLWR1 r8 /* Word tolower(rCHAR1) */
-#define rLWR2 r12 /* Word tolower(rCHAR2) */
-#define rTMP r9
-#define rLOC r11 /* Default locale address */
-
- cmpd cr7, r3, r4
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
- ld rTMP, __libc_tsd_LOCALE@got@tprel(r2)
- add rLOC, rTMP, __libc_tsd_LOCALE@tls
- ld rLOC, 0(rLOC)
-#else
- mr rLOC, rLOCARG
-#endif
- ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
- mr rSTR1, rRTN
- li rRTN, 0
- beqlr cr7
-
-
- /* Unrolling loop for POWER: loads are done with 'lbz' plus
- offset and string descriptors are only updated in the end
- of loop unrolling. */
-
- lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
- lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
-L(loop):
- cmpdi rCHAR1, 0 /* *s1 == '\0' ? */
- sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
- sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
- lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
- lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
- cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */
- crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */
- beq cr1, L(done)
- lbz rCHAR1, 1(rSTR1)
- lbz rCHAR2, 1(rSTR2)
- cmpdi rCHAR1, 0
- sldi rADDR1, rCHAR1, 2
- sldi rADDR2, rCHAR2, 2
- lwzx rLWR1, rLOC, rADDR1
- lwzx rLWR2, rLOC, rADDR2
- cmpw cr1, rLWR1, rLWR2
- crorc 4*cr1+eq,eq,4*cr1+eq
- beq cr1, L(done)
- lbz rCHAR1, 2(rSTR1)
- lbz rCHAR2, 2(rSTR2)
- cmpdi rCHAR1, 0
- sldi rADDR1, rCHAR1, 2
- sldi rADDR2, rCHAR2, 2
- lwzx rLWR1, rLOC, rADDR1
- lwzx rLWR2, rLOC, rADDR2
- cmpw cr1, rLWR1, rLWR2
- crorc 4*cr1+eq,eq,4*cr1+eq
- beq cr1, L(done)
- lbz rCHAR1, 3(rSTR1)
- lbz rCHAR2, 3(rSTR2)
- cmpdi rCHAR1, 0
- /* Increment both string descriptors */
- addi rSTR1, rSTR1, 4
- addi rSTR2, rSTR2, 4
- sldi rADDR1, rCHAR1, 2
- sldi rADDR2, rCHAR2, 2
- lwzx rLWR1, rLOC, rADDR1
- lwzx rLWR2, rLOC, rADDR2
- cmpw cr1, rLWR1, rLWR2
- crorc 4*cr1+eq,eq,4*cr1+eq
- beq cr1,L(done)
- lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
- lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
- b L(loop)
-L(done):
- subf r0, rLWR2, rLWR1
- extsw rRTN, r0
- blr
-END (__STRCMP)
-
-weak_alias (__STRCMP, STRCMP)
-libc_hidden_builtin_def (__STRCMP)
diff --git a/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S b/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S
deleted file mode 100644
index c13c4ebcb8..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#define USE_IN_EXTENDED_LOCALE_MODEL
-#define STRCMP strcasecmp_l
-#define __STRCMP __strcasecmp_l
-
-#include "strcasecmp.S"
diff --git a/sysdeps/powerpc/powerpc64/power7/strchr.S b/sysdeps/powerpc/powerpc64/power7/strchr.S
deleted file mode 100644
index a18e2e101c..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strchr.S
+++ /dev/null
@@ -1,230 +0,0 @@
-/* Optimized strchr implementation for PowerPC64/POWER7 using cmpb insn.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#ifndef STRCHR
-# define STRCHR strchr
-#endif
-
-/* int [r3] strchr (char *s [r3], int c [r4]) */
- .machine power7
-ENTRY (STRCHR)
- CALL_MCOUNT 2
- dcbt 0,r3
- clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
- cmpdi cr7,r4,0
- ld r12,0(r8) /* Load doubleword from memory. */
- li r0,0 /* Doubleword with null chars to use
- with cmpb. */
-
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
-
- beq cr7,L(null_match)
-
- /* Replicate byte to doubleword. */
- insrdi r4,r4,8,48
- insrdi r4,r4,16,32
- insrdi r4,r4,32,0
-
- /* Now r4 has a doubleword of c bytes and r0 has
- a doubleword of null bytes. */
-
- cmpb r10,r12,r4 /* Compare each byte against c byte. */
- cmpb r11,r12,r0 /* Compare each byte against null byte. */
-
- /* Move the doublewords left and right to discard the bits that are
- not part of the string and bring them back as zeros. */
-#ifdef __LITTLE_ENDIAN__
- srd r10,r10,r6
- srd r11,r11,r6
- sld r10,r10,r6
- sld r11,r11,r6
-#else
- sld r10,r10,r6
- sld r11,r11,r6
- srd r10,r10,r6
- srd r11,r11,r6
-#endif
- or r5,r10,r11 /* OR the results to speed things up. */
- cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
- have been found. */
- bne cr7,L(done)
-
- mtcrf 0x01,r8
-
- /* Are we now aligned to a doubleword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
-
- bt 28,L(loop)
-
- /* Handle WORD2 of pair. */
- ldu r12,8(r8)
- cmpb r10,r12,r4
- cmpb r11,r12,r0
- or r5,r10,r11
- cmpdi cr7,r5,0
- bne cr7,L(done)
- b L(loop) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
-
- .p2align 5
-L(loop):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the null-checking process for bigger strings. */
- ld r12,8(r8)
- ldu r9,16(r8)
- cmpb r10,r12,r4
- cmpb r11,r12,r0
- cmpb r6,r9,r4
- cmpb r7,r9,r0
- or r12,r10,r11
- or r9,r6,r7
- or r5,r12,r9
- cmpdi cr7,r5,0
- beq cr7,L(loop)
-
- /* OK, one (or both) of the doublewords contains a c/null byte. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains a c/null byte. */
-
- cmpdi cr6,r12,0
- addi r8,r8,-8
- bne cr6,L(done)
-
- /* The c/null byte must be in the second doubleword. Adjust the
- address again and move the result of cmpb to r10 so we can calculate
- the pointer. */
-
- mr r10,r6
- mr r11,r7
- addi r8,r8,8
-
- /* r10/r11 have the output of the cmpb instructions, that is,
- 0xff in the same position as the c/null byte in the original
- doubleword from the string. Use that to calculate the pointer. */
-L(done):
-#ifdef __LITTLE_ENDIAN__
- addi r3,r10,-1
- andc r3,r3,r10
- popcntd r0,r3
- addi r4,r11,-1
- andc r4,r4,r11
- cmpld cr7,r3,r4
- bgt cr7,L(no_match)
-#else
- cntlzd r0,r10 /* Count leading zeros before c matches. */
- cmpld cr7,r11,r10
- bgt cr7,L(no_match)
-#endif
- srdi r0,r0,3 /* Convert leading zeros to bytes. */
- add r3,r8,r0 /* Return address of the matching c byte
- or null in case c was not found. */
- blr
-
- .align 4
-L(no_match):
- li r3,0
- blr
-
-/* We are here because strchr was called with a null byte. */
- .align 4
-L(null_match):
- /* r0 has a doubleword of null bytes. */
-
- cmpb r5,r12,r0 /* Compare each byte against null bytes. */
-
- /* Move the doublewords left and right to discard the bits that are
- not part of the string and bring them back as zeros. */
-#ifdef __LITTLE_ENDIAN__
- srd r5,r5,r6
- sld r5,r5,r6
-#else
- sld r5,r5,r6
- srd r5,r5,r6
-#endif
- cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes
- have been found. */
- bne cr7,L(done_null)
-
- mtcrf 0x01,r8
-
- /* Are we now aligned to a quadword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
-
- bt 28,L(loop_null)
-
- /* Handle WORD2 of pair. */
- ldu r12,8(r8)
- cmpb r5,r12,r0
- cmpdi cr7,r5,0
- bne cr7,L(done_null)
- b L(loop_null) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
-
- /* Main loop to look for the end of the string. Since it's a
- small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
-L(loop_null):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the null-checking process for bigger strings. */
- ld r12,8(r8)
- ldu r11,16(r8)
- cmpb r5,r12,r0
- cmpb r10,r11,r0
- or r6,r5,r10
- cmpdi cr7,r6,0
- beq cr7,L(loop_null)
-
- /* OK, one (or both) of the doublewords contains a null byte. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains a null byte. */
-
- cmpdi cr6,r5,0
- addi r8,r8,-8
- bne cr6,L(done_null)
-
- /* The null byte must be in the second doubleword. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
- pointer. */
-
- mr r5,r10
- addi r8,r8,8
-
- /* r5 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as the null byte in the original
- doubleword from the string. Use that to calculate the pointer. */
-L(done_null):
-#ifdef __LITTLE_ENDIAN__
- addi r0,r5,-1
- andc r0,r0,r5
- popcntd r0,r0
-#else
- cntlzd r0,r5 /* Count leading zeros before the match. */
-#endif
- srdi r0,r0,3 /* Convert leading zeros to bytes. */
- add r3,r8,r0 /* Return address of the matching null byte. */
- blr
-END (STRCHR)
-weak_alias (strchr, index)
-libc_hidden_builtin_def (strchr)
diff --git a/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/sysdeps/powerpc/powerpc64/power7/strchrnul.S
deleted file mode 100644
index 27bc1f0682..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strchrnul.S
+++ /dev/null
@@ -1,131 +0,0 @@
-/* Optimized strchrnul implementation for PowerPC64/POWER7 using cmpb insn.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#ifndef STRCHRNUL
-# define STRCHRNUL __strchrnul
-#endif
-/* int [r3] strchrnul (char *s [r3], int c [r4]) */
- .machine power7
-ENTRY (STRCHRNUL)
- CALL_MCOUNT 2
- dcbt 0,r3
- clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
-
- /* Replicate byte to doubleword. */
- insrdi r4,r4,8,48
- insrdi r4,r4,16,32
- insrdi r4,r4,32,0
-
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
- ld r12,0(r8) /* Load doubleword from memory. */
- li r0,0 /* Doubleword with null chars to use
- with cmpb. */
-
- /* Now r4 has a doubleword of c bytes and r0 has
- a doubleword of null bytes. */
-
- cmpb r10,r12,r0 /* Compare each byte against c byte. */
- cmpb r9,r12,r4 /* Compare each byte against null byte. */
-
- /* Move the doublewords left and right to discard the bits that are
- not part of the string and to bring them back as zeros. */
-#ifdef __LITTLE_ENDIAN__
- srd r10,r10,r6
- srd r9,r9,r6
- sld r10,r10,r6
- sld r9,r9,r6
-#else
- sld r10,r10,r6
- sld r9,r9,r6
- srd r10,r10,r6
- srd r9,r9,r6
-#endif
- or r5,r9,r10 /* OR the results to speed things up. */
- cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
- have been found. */
- bne cr7,L(done)
-
- mtcrf 0x01,r8
-
- /* Are we now aligned to a quadword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
-
- bt 28,L(loop)
-
- /* Handle DWORD2 of pair. */
- ldu r12,8(r8)
- cmpb r10,r12,r0
- cmpb r9,r12,r4
- or r5,r9,r10
- cmpdi cr7,r5,0
- bne cr7,L(done)
- b L(loop) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
-
- .p2align 5
-L(loop):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the null-checking process for bigger strings. */
- ld r12,8(r8)
- ldu r11,16(r8)
- cmpb r10,r12,r0
- cmpb r9,r12,r4
- cmpb r6,r11,r0
- cmpb r7,r11,r4
- or r5,r9,r10
- or r10,r6,r7
- or r11,r5,r10
- cmpdi cr7,r11,0
- beq cr7,L(loop)
-
- /* OK, one (or both) of the doublewords contains a c/null byte. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains a c/null byte. */
-
- cmpdi cr6,r5,0
- addi r8,r8,-8
- bne cr6,L(done)
-
- /* The c/null byte must be in the second doubleword. Adjust the
- address again and move the result of cmpb to r5 so we can calculate
- the pointer. */
- mr r5,r10
- addi r8,r8,8
-
- /* r5 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as the c/null byte in the original
- doubleword from the string. Use that to calculate the pointer. */
-L(done):
-#ifdef __LITTLE_ENDIAN__
- addi r0,r5,-1
- andc r0,r0,r5
- popcntd r0,r0
-#else
- cntlzd r0,r5 /* Count leading zeros before the match. */
-#endif
- srdi r0,r0,3 /* Convert leading zeros to bytes. */
- add r3,r8,r0 /* Return address of matching c/null byte. */
- blr
-END (STRCHRNUL)
-weak_alias (STRCHRNUL, strchrnul)
-libc_hidden_builtin_def (STRCHRNUL)
diff --git a/sysdeps/powerpc/powerpc64/power7/strcmp.S b/sysdeps/powerpc/powerpc64/power7/strcmp.S
deleted file mode 100644
index 14e14f457e..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strcmp.S
+++ /dev/null
@@ -1,168 +0,0 @@
-/* Optimized strcmp implementation for Power7 using 'cmpb' instruction
- Copyright (C) 2014-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-/* The optimization is achieved here through cmpb instruction.
- 8byte aligned strings are processed with double word comparision
- and unaligned strings are handled effectively with loop unrolling
- technique */
-
-#include <sysdep.h>
-
-#ifndef STRCMP
-# define STRCMP strcmp
-#endif
-
-/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */
-
- .machine power7
-EALIGN (STRCMP, 4, 0)
- CALL_MCOUNT 2
-
- or r9, r3, r4
- rldicl. r10, r9, 0, 61 /* are s1 and s2 8 byte aligned..? */
- bne cr0, L(process_unaligned_bytes)
- li r5, 0
-
- .align 4
-/* process input parameters on double word aligned boundary */
-L(unrollDword):
- ld r8,0(r3)
- ld r10,0(r4)
- cmpb r7,r8,r5
- cmpdi cr7,r7,0
- mr r9,r7
- bne cr7,L(null_found)
- cmpld cr7,r8,r10
- bne cr7,L(different)
-
- ld r8,8(r3)
- ld r10,8(r4)
- cmpb r7,r8,r5
- cmpdi cr7,r7,0
- mr r9,r7
- bne cr7,L(null_found)
- cmpld cr7,r8,r10
- bne cr7,L(different)
-
- ld r8,16(r3)
- ld r10,16(r4)
- cmpb r7,r8,r5
- cmpdi cr7,r7,0
- mr r9,r7
- bne cr7,L(null_found)
- cmpld cr7,r8,r10
- bne cr7,L(different)
-
- ld r8,24(r3)
- ld r10,24(r4)
- cmpb r7,r8,r5
- cmpdi cr7,r7,0
- mr r9,r7
- bne cr7,L(null_found)
- cmpld cr7,r8,r10
- bne cr7,L(different)
-
- addi r3, r3, 32
- addi r4, r4, 32
- beq cr7, L(unrollDword)
-
- .align 4
-L(null_found):
-#ifdef __LITTLE_ENDIAN__
- neg r7,r9
- and r9,r9,r7
- li r7,-1
- cntlzd r9,r9
- subfic r9,r9,71
- sld r9,r7,r9
-#else
- cntlzd r9,r9
- li r7,-1
- addi r9,r9,8
- srd r9,r7,r9
-#endif
- or r8,r8,r9
- or r10,r10,r9
-
-L(different):
- cmpb r9,r8,r10
-#ifdef __LITTLE_ENDIAN__
- addi r7,r9,1
- andc r9,r7,r9
- cntlzd r9,r9
- subfic r9,r9,63
-#else
- not r9,r9
- cntlzd r9,r9
- subfic r9,r9,56
-#endif
- srd r3,r8,r9
- srd r10,r10,r9
- rldicl r10,r10,0,56
- rldicl r3,r3,0,56
- subf r3,r10,r3
- blr
-
- .align 4
-L(process_unaligned_bytes):
- lbz r9, 0(r3) /* load byte from s1 */
- lbz r10, 0(r4) /* load byte from s2 */
- cmpdi cr7, r9, 0 /* compare *s1 with NULL */
- beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */
- cmplw cr7, r9, r10 /* compare *s1 and *s2 */
- bne cr7, L(ComputeDiff) /* branch to compute difference and return */
-
- lbz r9, 1(r3) /* load next byte from s1 */
- lbz r10, 1(r4) /* load next byte from s2 */
- cmpdi cr7, r9, 0 /* compare *s1 with NULL */
- beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */
- cmplw cr7, r9, r10 /* compare *s1 and *s2 */
- bne cr7, L(ComputeDiff) /* branch to compute difference and return */
-
- lbz r9, 2(r3) /* unroll 3rd byte here */
- lbz r10, 2(r4)
- cmpdi cr7, r9, 0
- beq cr7, L(diffOfNULL)
- cmplw cr7, r9, r10
- bne 7, L(ComputeDiff)
-
- lbz r9, 3(r3) /* unroll 4th byte now */
- lbz r10, 3(r4)
- addi r3, r3, 4 /* increment s1 by unroll factor */
- cmpdi cr7, r9, 0
- cmplw cr6, 9, r10
- beq cr7, L(diffOfNULL)
- addi r4, r4, 4 /* increment s2 by unroll factor */
- beq cr6, L(process_unaligned_bytes) /* unroll byte processing */
-
- .align 4
-L(ComputeDiff):
- extsw r9, r9
- subf r10, r10, r9 /* compute s1 - s2 */
- extsw r3, r10
- blr /* return */
-
- .align 4
-L(diffOfNULL):
- li r9, 0
- subf r10, r10, r9 /* compute s1 - s2 */
- extsw r3, r10 /* sign extend result */
- blr /* return */
-
-END (STRCMP)
-libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/powerpc/powerpc64/power7/strlen.S b/sysdeps/powerpc/powerpc64/power7/strlen.S
deleted file mode 100644
index 63848c460c..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strlen.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/* Optimized strlen implementation for PowerPC64/POWER7 using cmpb insn.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* int [r3] strlen (char *s [r3]) */
-
-#ifndef STRLEN
-# define STRLEN strlen
-#endif
- .machine power7
-ENTRY (STRLEN)
- CALL_MCOUNT 1
- dcbt 0,r3
- clrrdi r4,r3,3 /* Align the address to doubleword boundary. */
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
- li r0,0 /* Doubleword with null chars to use
- with cmpb. */
- li r5,-1 /* MASK = 0xffffffffffffffff. */
- ld r12,0(r4) /* Load doubleword from memory. */
-#ifdef __LITTLE_ENDIAN__
- sld r5,r5,r6
-#else
- srd r5,r5,r6 /* MASK = MASK >> padding. */
-#endif
- orc r9,r12,r5 /* Mask bits that are not part of the string. */
- cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */
- cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */
- bne cr7,L(done)
-
- mtcrf 0x01,r4
-
- /* Are we now aligned to a quadword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
-
- bt 28,L(loop)
-
- /* Handle DWORD2 of pair. */
- ldu r12,8(r4)
- cmpb r10,r12,r0
- cmpdi cr7,r10,0
- bne cr7,L(done)
-
- /* Main loop to look for the end of the string. Since it's a
- small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
-L(loop):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the null-checking process for bigger strings. */
-
- ld r12, 8(r4)
- ldu r11, 16(r4)
- cmpb r10,r12,r0
- cmpb r9,r11,r0
- or r8,r9,r10 /* Merge everything in one doubleword. */
- cmpdi cr7,r8,0
- beq cr7,L(loop)
-
- /* OK, one (or both) of the doublewords contains a null byte. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains a null byte. */
-
- cmpdi cr6,r10,0
- addi r4,r4,-8
- bne cr6,L(done)
-
- /* The null byte must be in the second doubleword. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
- length. */
-
- mr r10,r9
- addi r4,r4,8
-
- /* r10 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as the null byte in the original
- doubleword from the string. Use that to calculate the length. */
-L(done):
-#ifdef __LITTLE_ENDIAN__
- addi r9, r10, -1 /* Form a mask from trailing zeros. */
- andc r9, r9, r10
- popcntd r0, r9 /* Count the bits in the mask. */
-#else
- cntlzd r0,r10 /* Count leading zeros before the match. */
-#endif
- subf r5,r3,r4
- srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
- add r3,r5,r0 /* Compute final length. */
- blr
-END (STRLEN)
-libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S
deleted file mode 100644
index d53b31be8e..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strncmp.S
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Optimized strcmp implementation for POWER7/PowerPC64.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#ifndef STRNCMP
-# define STRNCMP strncmp
-#endif
-
-/* See strlen.s for comments on how the end-of-string testing works. */
-
-/* int [r3] strncmp (const char *s1 [r3],
- const char *s2 [r4],
- size_t size [r5]) */
-
-EALIGN (STRNCMP,5,0)
- CALL_MCOUNT 3
-
-#define rTMP2 r0
-#define rRTN r3
-#define rSTR1 r3 /* first string arg */
-#define rSTR2 r4 /* second string arg */
-#define rN r5 /* max string length */
-#define rWORD1 r6 /* current word in s1 */
-#define rWORD2 r7 /* current word in s2 */
-#define rWORD3 r10
-#define rWORD4 r11
-#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
-#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
-#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
-#define rBITDIF r11 /* bits that differ in s1 & s2 words */
-#define rTMP r12
-
- dcbt 0,rSTR1
- nop
- or rTMP,rSTR2,rSTR1
- lis r7F7F,0x7f7f
- dcbt 0,rSTR2
- nop
- clrldi. rTMP,rTMP,61
- cmpldi cr1,rN,0
- lis rFEFE,-0x101
- bne L(unaligned)
-/* We are doubleword aligned so set up for two loops. first a double word
- loop, then fall into the byte loop if any residual. */
- srdi. rTMP,rN,3
- clrldi rN,rN,61
- addi rFEFE,rFEFE,-0x101
- addi r7F7F,r7F7F,0x7f7f
- cmpldi cr1,rN,0
- beq L(unaligned)
-
- mtctr rTMP
- ld rWORD1,0(rSTR1)
- ld rWORD2,0(rSTR2)
- sldi rTMP,rFEFE,32
- insrdi r7F7F,r7F7F,32,0
- add rFEFE,rFEFE,rTMP
- b L(g1)
-
-L(g0):
- ldu rWORD1,8(rSTR1)
- bne cr1,L(different)
- ldu rWORD2,8(rSTR2)
-L(g1): add rTMP,rFEFE,rWORD1
- nor rNEG,r7F7F,rWORD1
- bdz L(tail)
- and. rTMP,rTMP,rNEG
- cmpd cr1,rWORD1,rWORD2
- beq L(g0)
-
-/* OK. We've hit the end of the string. We need to be careful that
- we don't compare two strings as different because of gunk beyond
- the end of the strings... */
-
-#ifdef __LITTLE_ENDIAN__
-L(endstring):
- addi rTMP2, rTMP, -1
- beq cr1, L(equal)
- andc rTMP2, rTMP2, rTMP
- rldimi rTMP2, rTMP2, 1, 0
- and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
- and rWORD1, rWORD1, rTMP2
- cmpd cr1, rWORD1, rWORD2
- beq cr1, L(equal)
- cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
- addi rNEG, rBITDIF, 1
- orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
- sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
- andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
- andc rWORD2, rWORD2, rNEG
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt L(highbit)
- sradi rRTN, rRTN, 63 /* must return an int. */
- ori rRTN, rRTN, 1
- blr
-L(equal):
- li rRTN, 0
- blr
-
-L(different):
- ld rWORD1, -8(rSTR1)
- cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
- addi rNEG, rBITDIF, 1
- orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
- sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
- andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
- andc rWORD2, rWORD2, rNEG
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt L(highbit)
- sradi rRTN, rRTN, 63
- ori rRTN, rRTN, 1
- blr
-L(highbit):
- sradi rRTN, rWORD2, 63
- ori rRTN, rRTN, 1
- blr
-
-#else
-L(endstring):
- and rTMP,r7F7F,rWORD1
- beq cr1,L(equal)
- add rTMP,rTMP,r7F7F
- xor. rBITDIF,rWORD1,rWORD2
- andc rNEG,rNEG,rTMP
- blt L(highbit)
- cntlzd rBITDIF,rBITDIF
- cntlzd rNEG,rNEG
- addi rNEG,rNEG,7
- cmpd cr1,rNEG,rBITDIF
- sub rRTN,rWORD1,rWORD2
- blt cr1,L(equal)
- sradi rRTN,rRTN,63 /* must return an int. */
- ori rRTN,rRTN,1
- blr
-L(equal):
- li rRTN,0
- blr
-
-L(different):
- ld rWORD1,-8(rSTR1)
- xor. rBITDIF,rWORD1,rWORD2
- sub rRTN,rWORD1,rWORD2
- blt L(highbit)
- sradi rRTN,rRTN,63
- ori rRTN,rRTN,1
- blr
-L(highbit):
- sradi rRTN,rWORD2,63
- ori rRTN,rRTN,1
- blr
-#endif
-
-/* Oh well. In this case, we just do a byte-by-byte comparison. */
- .align 4
-L(tail):
- and. rTMP,rTMP,rNEG
- cmpd cr1,rWORD1,rWORD2
- bne L(endstring)
- addi rSTR1,rSTR1,8
- bne cr1,L(different)
- addi rSTR2,rSTR2,8
- cmpldi cr1,rN,0
-L(unaligned):
- mtctr rN
- ble cr1,L(ux)
-L(uz):
- lbz rWORD1,0(rSTR1)
- lbz rWORD2,0(rSTR2)
- .align 4
-L(u1):
- cmpdi cr1,rWORD1,0
- bdz L(u4)
- cmpd rWORD1,rWORD2
- beq cr1,L(u4)
- bne L(u4)
- lbzu rWORD3,1(rSTR1)
- lbzu rWORD4,1(rSTR2)
- cmpdi cr1,rWORD3,0
- bdz L(u3)
- cmpd rWORD3,rWORD4
- beq cr1,L(u3)
- bne L(u3)
- lbzu rWORD1,1(rSTR1)
- lbzu rWORD2,1(rSTR2)
- cmpdi cr1,rWORD1,0
- bdz L(u4)
- cmpd rWORD1,rWORD2
- beq cr1,L(u4)
- bne L(u4)
- lbzu rWORD3,1(rSTR1)
- lbzu rWORD4,1(rSTR2)
- cmpdi cr1,rWORD3,0
- bdz L(u3)
- cmpd rWORD3,rWORD4
- beq cr1,L(u3)
- bne L(u3)
- lbzu rWORD1,1(rSTR1)
- lbzu rWORD2,1(rSTR2)
- b L(u1)
-
-L(u3): sub rRTN,rWORD3,rWORD4
- blr
-L(u4): sub rRTN,rWORD1,rWORD2
- blr
-L(ux):
- li rRTN,0
- blr
-END (STRNCMP)
-libc_hidden_builtin_def (strncmp)
diff --git a/sysdeps/powerpc/powerpc64/power7/strncpy.S b/sysdeps/powerpc/powerpc64/power7/strncpy.S
deleted file mode 100644
index 0224f74898..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strncpy.S
+++ /dev/null
@@ -1,722 +0,0 @@
-/* Copyright (C) 2014-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* Implements the functions
-
- char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5])
-
- AND
-
- char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5])
-
- The algorithm is as follows:
- > if src and dest are 8 byte aligned, perform double word copy
- else
- > copy byte by byte on unaligned addresses.
-
- The aligned comparison are made using cmpb instructions. */
-
-/* The focus on optimization for performance improvements are as follows:
- 1. data alignment [gain from aligned memory access on read/write]
- 2. POWER7 gains performance with loop unrolling/unwinding
- [gain by reduction of branch penalty].
- 3. The final pad with null bytes is done by calling an optimized
- memset. */
-
-#ifdef USE_AS_STPNCPY
-# ifndef STPNCPY
-# define FUNC_NAME __stpncpy
-# else
-# define FUNC_NAME STPNCPY
-# endif
-#else
-# ifndef STRNCPY
-# define FUNC_NAME strncpy
-# else
-# define FUNC_NAME STRNCPY
-# endif
-#endif /* !USE_AS_STPNCPY */
-
-#define FRAMESIZE (FRAME_MIN_SIZE+32)
-
-#ifndef MEMSET
-/* For builds with no IFUNC support, local calls should be made to internal
- GLIBC symbol (created by libc_hidden_builtin_def). */
-# ifdef SHARED
-# define MEMSET __GI_memset
-# else
-# define MEMSET memset
-# endif
-#endif
-
- .machine power7
-EALIGN(FUNC_NAME, 4, 0)
- CALL_MCOUNT 3
-
- mflr r0 /* load link register LR to r0 */
- or r10, r3, r4 /* to verify source and destination */
- rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */
-
- std r19, -8(r1) /* save callers register , r19 */
- std r18, -16(r1) /* save callers register , r18 */
- std r0, 16(r1) /* store the link register */
- stdu r1, -FRAMESIZE(r1) /* create the stack frame */
-
- mr r9, r3 /* save r3 into r9 for use */
- mr r18, r3 /* save r3 for retCode of strncpy */
- bne 0, L(unaligned)
-
-L(aligned):
- srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */
- cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */
- ble 7, L(update1)
-
- ld r10, 0(r4) /* load doubleWord from src */
- cmpb r8, r10, r8 /* compare src with NULL ,we read just now */
- cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */
- bne cr7, L(update3)
-
- std r10, 0(r3) /* copy doubleword at offset=0 */
- ld r10, 8(r4) /* load next doubleword from offset=8 */
- cmpb r8, r10, r8 /* compare src with NULL , we read just now */
- cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */
- bne 7,L(HopBy8)
-
- addi r8, r11, -4
- mr r7, r3
- srdi r8, r8, 2
- mr r6, r4
- addi r8, r8, 1
- li r12, 0
- mtctr r8
- b L(dwordCopy)
-
- .p2align 4
-L(dWordUnroll):
- std r8, 16(r9)
- ld r8, 24(r4) /* load dword,perform loop unrolling again */
- cmpb r10, r8, r10
- cmpdi cr7, r10, 0
- bne cr7, L(HopBy24)
-
- std r8, 24(r7) /* copy dword at offset=24 */
- addi r9, r9, 32
- addi r4, r4, 32
- bdz L(leftDwords) /* continue with loop on counter */
-
- ld r3, 32(r6)
- cmpb r8, r3, r10
- cmpdi cr7, r8, 0
- bne cr7, L(update2)
-
- std r3, 32(r7)
- ld r10, 40(r6)
- cmpb r8, r10, r8
- cmpdi cr7, r8, 0
- bne cr7, L(HopBy40)
-
- mr r6, r4 /* update values */
- mr r7, r9
- mr r11, r0
- mr r5, r19
-
-L(dwordCopy):
- std r10, 8(r9) /* copy dword at offset=8 */
- addi r19, r5, -32
- addi r0, r11, -4
- ld r8, 16(r4)
- cmpb r10, r8, r12
- cmpdi cr7, r10, 0
- beq cr7, L(dWordUnroll)
-
- addi r9, r9, 16 /* increment dst by 16 */
- addi r4, r4, 16 /* increment src by 16 */
- addi r5, r5, -16 /* decrement length 'n' by 16 */
- addi r0, r11, -2 /* decrement loop counter */
-
-L(dWordUnrollOFF):
- ld r10, 0(r4) /* load first dword */
- li r8, 0 /* load mask */
- cmpb r8, r10, r8
- cmpdi cr7, r8, 0
- bne cr7, L(byte_by_byte)
- mtctr r0
- li r7, 0
- b L(CopyDword)
-
- .p2align 4
-L(loadDWordandCompare):
- ld r10, 0(r4)
- cmpb r8, r10, r7
- cmpdi cr7, r8, 0
- bne cr7, L(byte_by_byte)
-
-L(CopyDword):
- addi r9, r9, 8
- std r10, -8(r9)
- addi r4, r4, 8
- addi r5, r5, -8
- bdnz L(loadDWordandCompare)
-
-L(byte_by_byte):
- cmpldi cr7, r5, 3
- ble cr7, L(verifyByte)
- srdi r10, r5, 2
- mr r19, r9
- mtctr r10
- b L(firstByteUnroll)
-
- .p2align 4
-L(bytes_unroll):
- lbz r10, 1(r4) /* load byte from src */
- cmpdi cr7, r10, 0 /* compare for NULL */
- stb r10, 1(r19) /* store byte to dst */
- beq cr7, L(updtDestComputeN2ndByte)
-
- addi r4, r4, 4 /* advance src */
-
- lbz r10, -2(r4) /* perform loop unrolling for byte r/w */
- cmpdi cr7, r10, 0
- stb r10, 2(r19)
- beq cr7, L(updtDestComputeN3rdByte)
-
- lbz r10, -1(r4) /* perform loop unrolling for byte r/w */
- addi r19, r19, 4
- cmpdi cr7, r10, 0
- stb r10, -1(r19)
- beq cr7, L(ComputeNByte)
-
- bdz L(update0)
-
-L(firstByteUnroll):
- lbz r10, 0(r4) /* perform loop unrolling for byte r/w */
- cmpdi cr7, 10, 0
- stb r10, 0(r19)
- bne cr7, L(bytes_unroll)
- addi r19, r19, 1
-
-L(ComputeNByte):
- subf r9, r19, r9 /* compute 'n'n bytes to fill */
- add r8, r9, r5
-
-L(zeroFill):
- cmpdi cr7, r8, 0 /* compare if length is zero */
- beq cr7, L(update3return)
-
- mr r3, r19 /* fill buffer with */
- li r4, 0 /* zero fill buffer */
- mr r5, r8 /* how many bytes to fill buffer with */
- bl MEMSET /* call optimized memset */
- nop
-
-L(update3return):
-#ifdef USE_AS_STPNCPY
- addi r3, r19, -1 /* update return value */
-#endif
-
-L(hop2return):
-#ifndef USE_AS_STPNCPY
- mr r3, r18 /* set return value */
-#endif
- addi r1, r1, FRAMESIZE /* restore stack pointer */
- ld r0, 16(r1) /* read the saved link register */
- ld r18, -16(r1) /* restore callers save register, r18 */
- ld r19, -8(r1) /* restore callers save register, r19 */
- mtlr r0 /* branch to link register */
- blr /* return */
-
- .p2align 4
-L(update0):
- mr r9, r19
-
- .p2align 4
-L(verifyByte):
- rldicl. r8, r5, 0, 62
-#ifdef USE_AS_STPNCPY
- mr r3, r9
-#endif
- beq cr0, L(hop2return)
- mtctr r8
- addi r4, r4, -1
- mr r19, r9
- b L(oneBYone)
-
- .p2align 4
-L(proceed):
- bdz L(done)
-
-L(oneBYone):
- lbzu r10, 1(r4) /* copy byte */
- addi r19, r19, 1
- addi r8, r8, -1
- cmpdi cr7, r10, 0
- stb r10, -1(r19)
- bne cr7, L(proceed)
- b L(zeroFill)
-
- .p2align 4
-L(done):
- addi r1, r1, FRAMESIZE /* restore stack pointer */
-#ifdef USE_AS_STPNCPY
- mr r3, r19 /* set the return value */
-#else
- mr r3, r18 /* set the return value */
-#endif
- ld r0, 16(r1) /* read the saved link register */
- ld r18, -16(r1) /* restore callers save register, r18 */
- ld r19, -8(r1) /* restore callers save register, r19 */
- mtlr r0 /* branch to link register */
- blr /* return */
-
-L(update1):
- mr r0, r11
- mr r19, r5
-
- .p2align 4
-L(leftDwords):
- cmpdi cr7, r0, 0
- mr r5, r19
- bne cr7, L(dWordUnrollOFF)
- b L(byte_by_byte)
-
- .p2align 4
-L(updtDestComputeN2ndByte):
- addi r19, r19, 2 /* update dst by 2 */
- subf r9, r19, r9 /* compute distance covered */
- add r8, r9, r5
- b L(zeroFill)
-
- .p2align 4
-L(updtDestComputeN3rdByte):
- addi r19, r19, 3 /* update dst by 3 */
- subf r9, r19, r9 /* compute distance covered */
- add r8, r9, r5
- b L(zeroFill)
-
- .p2align 4
-L(HopBy24):
- addi r9, r9, 24 /* increment dst by 24 */
- addi r4, r4, 24 /* increment src by 24 */
- addi r5, r5, -24 /* decrement length 'n' by 24 */
- addi r0, r11, -3 /* decrement loop counter */
- b L(dWordUnrollOFF)
-
- .p2align 4
-L(update2):
- mr r5, r19
- b L(dWordUnrollOFF)
-
- .p2align 4
-L(HopBy40):
- addi r9, r7, 40 /* increment dst by 40 */
- addi r4, r6, 40 /* increment src by 40 */
- addi r5, r5, -40 /* decrement length 'n' by 40 */
- addi r0, r11, -5 /* decrement loop counter */
- b L(dWordUnrollOFF)
-
-L(update3):
- mr r0, r11
- b L(dWordUnrollOFF)
-
-L(HopBy8):
- addi r9, r3, 8 /* increment dst by 8 */
- addi r4, r4, 8 /* increment src by 8 */
- addi r5, r5, -8 /* decrement length 'n' by 8 */
- addi r0, r11, -1 /* decrement loop counter */
- b L(dWordUnrollOFF)
-
-L(unaligned):
- cmpdi r5, 16 /* Proceed byte by byte for less than 16 */
- ble L(byte_by_byte)
- rldicl r7, r3, 0, 61
- rldicl r6, r4, 0, 61
- cmpdi r6, 0 /* Check src alignment */
- beq L(srcaligndstunalign)
- /* src is unaligned */
- rlwinm r10, r4, 3,26,28 /* Calculate padding. */
- clrrdi r4, r4, 3 /* Align the addr to dw boundary */
- ld r8, 0(r4) /* Load doubleword from memory. */
- li r0, 0
- /* Discard bits not part of the string */
-#ifdef __LITTLE_ENDIAN__
- srd r7, r8, r10
-#else
- sld r7, r8, r10
-#endif
- cmpb r0, r7, r0 /* Compare each byte against null */
- /* Discard bits not part of the string */
-#ifdef __LITTLE_ENDIAN__
- sld r0, r0, r10
-#else
- srd r0, r0, r10
-#endif
- cmpdi r0, 0
- bne L(bytebybyte) /* if it has null, copy byte by byte */
- subfic r6, r6, 8
- rlwinm r12, r3, 3,26,28 /* Calculate padding in bits. */
- rldicl r9, r3, 0, 61 /* Calculate padding in bytes. */
- addi r3, r3, -1
-
- cmpdi r12, 0 /* check dest alignment */
- beq L(srcunaligndstalign)
-
- /* both src and dst unaligned */
-#ifdef __LITTLE_ENDIAN__
- sld r8, r7, r10
- mr r11, r10
- addi r11, r11, -8 /* Adjust byte pointer on loaded dw */
-#else
- srd r8, r7, r10
- subfic r11, r10, 64
-#endif
- /* dst alignment is greater then src alignment? */
- cmpd cr7, r12, r10
- ble cr7, L(dst_align_small)
- /* src alignment is less than dst */
-
- /* Calculate the dst alignment difference */
- subfic r7, r9, 8
- mtctr r7
-
- /* Write until dst is aligned */
- cmpdi r0, r7, 4
- blt L(storebyte1) /* less than 4, store byte by byte */
- beq L(equal1) /* if its 4, store word */
- addi r0, r7, -4 /* greater than 4, so stb and stw */
- mtctr r0
-L(storebyte1):
-#ifdef __LITTLE_ENDIAN__
- addi r11, r11, 8 /* Adjust byte pointer on loaded dw */
-#else
- addi r11, r11, -8
-#endif
- srd r7, r8, r11
- stbu r7, 1(r3)
- addi r5, r5, -1
- bdnz L(storebyte1)
-
- subfic r7, r9, 8 /* Check the remaining bytes */
- cmpdi r0, r7, 4
- blt L(proceed1)
-
- .align 4
-L(equal1):
-#ifdef __LITTLE_ENDIAN__
- addi r11, r11, 8 /* Adjust byte pointer on loaded dw */
- srd r7, r8, r11
-#else
- subfic r11, r11, 64
- sld r7, r8, r11
- srdi r7, r7, 32
-#endif
- stw r7, 1(r3)
- addi r3, r3, 4
- addi r5, r5, -4
-
-L(proceed1):
- mr r7, r8
- /* calculate the Left over bytes to be written */
- subfic r11, r10, 64
- subfic r12, r12, 64
- subf r12, r12, r11 /* remaining bytes on second dw */
- subfic r10, r12, 64 /* remaining bytes on first dw */
- subfic r9, r9, 8
- subf r6, r9, r6 /* recalculate padding */
-L(srcunaligndstalign):
- addi r3, r3, 1
- subfic r12, r10, 64 /* remaining bytes on second dw */
- addi r4, r4, 8
- li r0,0
- b L(storedouble)
-
- .align 4
-L(dst_align_small):
- mtctr r6
- /* Write until src is aligned */
-L(storebyte2):
-#ifdef __LITTLE_ENDIAN__
- addi r11, r11, 8 /* Adjust byte pointer on dw */
-#else
- addi r11, r11, -8
-#endif
- srd r7, r8, r11
- stbu r7, 1(r3)
- addi r5, r5, -1
- bdnz L(storebyte2)
-
- addi r4, r4, 8 /* Increment src pointer */
- addi r3, r3, 1 /* Increment dst pointer */
- mr r9, r3
- li r8, 0
- cmpd cr7, r12, r10
- beq cr7, L(aligned)
- rldicl r6, r3, 0, 61 /* Recalculate padding */
- mr r7, r6
-
- /* src is algined */
-L(srcaligndstunalign):
- mr r9, r3
- mr r6, r7
- ld r8, 0(r4)
- subfic r10, r7, 8
- mr r7, r8
- li r0, 0 /* Check null */
- cmpb r0, r8, r0
- cmpdi r0, 0
- bne L(byte_by_byte) /* Do byte by byte if there is NULL */
- rlwinm r12, r3, 3,26,28 /* Calculate padding */
- addi r3, r3, -1
- /* write byte by byte until aligned */
-#ifdef __LITTLE_ENDIAN__
- li r11, -8
-#else
- li r11, 64
-#endif
- mtctr r10
- cmpdi r0, r10, 4
- blt L(storebyte)
- beq L(equal)
- addi r0, r10, -4
- mtctr r0
-L(storebyte):
-#ifdef __LITTLE_ENDIAN__
- addi r11, r11, 8 /* Adjust byte pointer on dw */
-#else
- addi r11, r11, -8
-#endif
- srd r7, r8, r11
- stbu r7, 1(r3)
- addi r5, r5, -1
- bdnz L(storebyte)
-
- cmpdi r0, r10, 4
- blt L(align)
-
- .align 4
-L(equal):
-#ifdef __LITTLE_ENDIAN__
- addi r11, r11, 8
- srd r7, r8, r11
-#else
- subfic r11, r11, 64
- sld r7, r8, r11
- srdi r7, r7, 32
-#endif
- stw r7, 1(r3)
- addi r5, r5, -4
- addi r3, r3, 4
-L(align):
- addi r3, r3, 1
- addi r4, r4, 8 /* Increment src pointer */
- subfic r10, r12, 64
- li r0, 0
- /* dst addr aligned to 8 */
-L(storedouble):
- cmpdi r5, 8
- ble L(null1)
- ld r7, 0(r4) /* load next dw */
- cmpb r0, r7, r0
- cmpdi r0, 0 /* check for null on each new dw */
- bne L(null)
-#ifdef __LITTLE_ENDIAN__
- srd r9, r8, r10 /* bytes from first dw */
- sld r11, r7, r12 /* bytes from second dw */
-#else
- sld r9, r8, r10
- srd r11, r7, r12
-#endif
- or r11, r9, r11 /* make as a single dw */
- std r11, 0(r3) /* store as std on aligned addr */
- mr r8, r7 /* still few bytes left to be written */
- addi r3, r3, 8 /* increment dst addr */
- addi r4, r4, 8 /* increment src addr */
- addi r5, r5, -8
- b L(storedouble) /* Loop until NULL */
-
- .align 4
-
-/* We've hit the end of the string. Do the rest byte-by-byte. */
-L(null):
- addi r3, r3, -1
- mr r10, r12
- mtctr r6
-#ifdef __LITTLE_ENDIAN__
- subfic r10, r10, 64
- addi r10, r10, -8
-#endif
- cmpdi r0, r5, 4
- blt L(loop)
- cmpdi r0, r6, 4
- blt L(loop)
-
- /* we can still use stw if leftover >= 4 */
-#ifdef __LITTLE_ENDIAN__
- addi r10, r10, 8
- srd r11, r8, r10
-#else
- subfic r10, r10, 64
- sld r11, r8, r10
- srdi r11, r11, 32
-#endif
- stw r11, 1(r3)
- addi r5, r5, -4
- addi r3, r3, 4
- cmpdi r0, r5, 0
- beq L(g1)
- cmpdi r0, r6, 4
- beq L(bytebybyte1)
- addi r10, r10, 32
-#ifdef __LITTLE_ENDIAN__
- addi r10, r10, -8
-#else
- subfic r10, r10, 64
-#endif
- addi r0, r6, -4
- mtctr r0
- /* remaining byte by byte part of first dw */
-L(loop):
-#ifdef __LITTLE_ENDIAN__
- addi r10, r10, 8
-#else
- addi r10, r10, -8
-#endif
- srd r0, r8, r10
- stbu r0, 1(r3)
- addi r5, r5, -1
- cmpdi r0, r5, 0
- beq L(g1)
- bdnz L(loop)
-L(bytebybyte1):
- addi r3, r3, 1
- /* remaining byte by byte part of second dw */
-L(bytebybyte):
- addi r3, r3, -8
- addi r4, r4, -1
-
-#ifdef __LITTLE_ENDIAN__
- extrdi. r0, r7, 8, 56
- stbu r7, 8(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 48
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 40
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 32
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 24
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 16
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 8
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi r0, r7, 8, 0
- stbu r0, 1(r3)
- addi r5, r5, -1
- b L(g2)
-#else
- extrdi. r0, r7, 8, 0
- stbu r0, 8(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 8
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 16
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 24
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 32
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 40
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- extrdi. r0, r7, 8, 48
- stbu r0, 1(r3)
- addi r5, r5, -1
- beq L(g2)
- cmpdi r5, 0
- beq L(g1)
- stbu r7, 1(r3)
- addi r5, r5, -1
- b L(g2)
-#endif
-L(g1):
-#ifdef USE_AS_STPNCPY
- addi r3, r3, 1
-#endif
-L(g2):
- addi r3, r3, 1
- mr r19, r3
- mr r8, r5
- b L(zeroFill)
-L(null1):
- mr r9, r3
- subf r4, r6, r4
- b L(byte_by_byte)
-END(FUNC_NAME)
-#ifndef USE_AS_STPNCPY
-libc_hidden_builtin_def (strncpy)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/strnlen.S b/sysdeps/powerpc/powerpc64/power7/strnlen.S
deleted file mode 100644
index a970b6ce30..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strnlen.S
+++ /dev/null
@@ -1,182 +0,0 @@
-/* Optimized strnlen implementation for PowerPC64/POWER7 using cmpb insn.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Luis Machado <luisgpm@br.ibm.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#ifndef STRNLEN
-# define STRNLEN __strnlen
-#endif
-
-/* int [r3] strnlen (char *s [r3], int size [r4]) */
- .machine power7
-ENTRY (STRNLEN)
- CALL_MCOUNT 2
- dcbt 0,r3
- clrrdi r8,r3,3
- add r7,r3,r4 /* Calculate the last acceptable address. */
- cmpldi r4,32
- li r0,0 /* Doubleword with null chars. */
- addi r7,r7,-1
-
- /* If we have less than 33 bytes to search, skip to a faster code. */
- ble L(small_range)
-
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
- ld r12,0(r8) /* Load doubleword from memory. */
- cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
-#ifdef __LITTLE_ENDIAN__
- srd r10,r10,r6
- sld r10,r10,r6
-#else
- sld r10,r10,r6
- srd r10,r10,r6
-#endif
- cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */
- bne cr7,L(done)
-
- clrrdi r7,r7,3 /* Address of last doubleword. */
- mtcrf 0x01,r8
- /* Are we now aligned to a quadword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
-
- bt 28,L(loop_setup)
-
- /* Handle DWORD2 of pair. */
- ldu r12,8(r8)
- cmpb r10,r12,r0
- cmpldi cr7,r10,0
- bne cr7,L(done)
-
-L(loop_setup):
- /* The last dword we want to read in the loop below is the one
- containing the last byte of the string, ie. the dword at
- (s + size - 1) & ~7, or r7. The first dword read is at
- r8 + 8, we read 2 * cnt dwords, so the last dword read will
- be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives
- cnt = (r7 - r8) / 16 */
- sub r5,r7,r8
- srdi r6,r5,4 /* Number of loop iterations. */
- mtctr r6 /* Setup the counter. */
-
- /* Main loop to look for the null byte in the string. Since
- it's a small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
-L(loop):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the null-checking process for bigger strings. */
-
- ld r12,8(r8)
- ldu r11,16(r8)
- cmpb r10,r12,r0
- cmpb r9,r11,r0
- or r5,r9,r10 /* Merge everything in one doubleword. */
- cmpldi cr7,r5,0
- bne cr7,L(found)
- bdnz L(loop)
-
- /* We may have one more dword to read. */
- cmpld cr6,r8,r7
- beq cr6,L(end_max)
-
- ldu r12,8(r8)
- cmpb r10,r12,r0
- cmpldi cr6,r10,0
- bne cr6,L(done)
-
-L(end_max):
- mr r3,r4
- blr
-
- /* OK, one (or both) of the doublewords contains a null byte. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains a null byte. */
- .align 4
-L(found):
- cmpldi cr6,r10,0
- addi r8,r8,-8
- bne cr6,L(done)
-
- /* The null byte must be in the second doubleword. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
- length. */
-
- mr r10,r9
- addi r8,r8,8
-
- /* r10 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as the null byte in the original
- doubleword from the string. Use that to calculate the length.
- We need to make sure the null char is *before* the end of the
- range. */
-L(done):
-#ifdef __LITTLE_ENDIAN__
- addi r0,r10,-1
- andc r0,r0,r10
- popcntd r0,r0
-#else
- cntlzd r0,r10 /* Count leading zeros before the match. */
-#endif
- sub r3,r8,r3
- srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
- add r3,r3,r0 /* Length until the match. */
- cmpld r3,r4
- blelr
- mr r3,r4
- blr
-
-/* Deals with size <= 32. */
- .align 4
-L(small_range):
- cmpldi r4,0
- beq L(end_max)
-
- clrrdi r7,r7,3 /* Address of last doubleword. */
-
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
- ld r12,0(r8) /* Load doubleword from memory. */
- cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
-#ifdef __LITTLE_ENDIAN__
- srd r10,r10,r6
- sld r10,r10,r6
-#else
- sld r10,r10,r6
- srd r10,r10,r6
-#endif
- cmpldi cr7,r10,0
- bne cr7,L(done)
-
- cmpld r8,r7
- beq L(end_max)
-
- .p2align 5
-L(loop_small):
- ldu r12,8(r8)
- cmpb r10,r12,r0
- cmpldi cr6,r10,0
- bne cr6,L(done)
- cmpld r8,r7
- bne L(loop_small)
- mr r3,r4
- blr
-
-END (STRNLEN)
-libc_hidden_def (__strnlen)
-weak_alias (__strnlen, strnlen)
-libc_hidden_def (strnlen)
diff --git a/sysdeps/powerpc/powerpc64/power7/strrchr.S b/sysdeps/powerpc/powerpc64/power7/strrchr.S
deleted file mode 100644
index c22393deb5..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strrchr.S
+++ /dev/null
@@ -1,260 +0,0 @@
-/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn.
- Copyright (C) 2014-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* int [r3] strrchr (char *s [r3], int c [r4]) */
-
-#ifndef STRRCHR
-# define STRRCHR strrchr
-#endif
-
- .machine power7
-ENTRY (STRRCHR)
- CALL_MCOUNT 2
- dcbt 0,r3
- clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
- cmpdi cr7,r4,0
- ld r12,0(r8) /* Load doubleword from memory. */
- li r9,0 /* used to store last occurence */
- li r0,0 /* Doubleword with null chars to use
- with cmpb. */
-
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
-
- beq cr7,L(null_match)
-
- /* Replicate byte to doubleword. */
- insrdi r4,r4,8,48
- insrdi r4,r4,16,32
- insrdi r4,r4,32,0
-
- /* r4 is changed now ,if its passed as more chars
- check for null again */
- cmpdi cr7,r4,0
- beq cr7,L(null_match)
- /* Now r4 has a doubleword of c bytes and r0 has
- a doubleword of null bytes. */
-
- cmpb r10,r12,r4 /* Compare each byte against c byte. */
- cmpb r11,r12,r0 /* Compare each byte against null byte. */
-
- /* Move the doublewords left and right to discard the bits that are
- not part of the string and bring them back as zeros. */
-#ifdef __LITTLE_ENDIAN__
- srd r10,r10,r6
- srd r11,r11,r6
- sld r10,r10,r6
- sld r11,r11,r6
-#else
- sld r10,r10,r6
- sld r11,r11,r6
- srd r10,r10,r6
- srd r11,r11,r6
-#endif
- or r5,r10,r11 /* OR the results to speed things up. */
- cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
- have been found. */
- bne cr7,L(done)
-
-L(align):
- mtcrf 0x01,r8
-
- /* Are we now aligned to a doubleword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
-
- bt 28,L(loop)
-
- /* Handle WORD2 of pair. */
- ldu r12,8(r8)
- cmpb r10,r12,r4
- cmpb r11,r12,r0
- or r5,r10,r11
- cmpdi cr7,r5,0
- bne cr7,L(done)
- b L(loop) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
- .p2align 5
-L(loop):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the null-checking process for bigger strings. */
- ld r12,8(r8)
- ldu r7,16(r8)
- cmpb r10,r12,r4
- cmpb r11,r12,r0
- cmpb r6,r7,r4
- cmpb r7,r7,r0
- or r12,r10,r11
- or r5,r6,r7
- or r5,r12,r5
- cmpdi cr7,r5,0
- beq cr7,L(loop)
-
- /* OK, one (or both) of the doublewords contains a c/null byte. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains a c/null byte. */
- cmpdi cr6,r12,0
- addi r8,r8,-8
- bne cr6,L(done)
-
- /* The c/null byte must be in the second doubleword. Adjust the
- address again and move the result of cmpb to r10 so we can calculate
- the pointer. */
-
- mr r10,r6
- mr r11,r7
- addi r8,r8,8
-
- /* r10/r11 have the output of the cmpb instructions, that is,
- 0xff in the same position as the c/null byte in the original
- doubleword from the string. Use that to calculate the pointer. */
-
-L(done):
- /* if there are more than one 0xff in r11, find the first pos of ff
- in r11 and fill r10 with 0 from that position */
- cmpdi cr7,r11,0
- beq cr7,L(no_null)
-#ifdef __LITTLE_ENDIAN__
- addi r3,r11,-1
- andc r3,r3,r11
- popcntd r0,r3
-#else
- cntlzd r0,r11
-#endif
- subfic r0,r0,63
- li r6,-1
-#ifdef __LITTLE_ENDIAN__
- srd r0,r6,r0
-#else
- sld r0,r6,r0
-#endif
- and r10,r0,r10
-L(no_null):
-#ifdef __LITTLE_ENDIAN__
- cntlzd r0,r10 /* Count leading zeros before c matches. */
- addi r3,r10,-1
- andc r3,r3,r10
- addi r10,r11,-1
- andc r10,r10,r11
- cmpld cr7,r3,r10
- bgt cr7,L(no_match)
-#else
- addi r3,r10,-1 /* Count trailing zeros before c matches. */
- andc r3,r3,r10
- popcntd r0,r3
- cmpld cr7,r11,r10
- bgt cr7,L(no_match)
-#endif
- srdi r0,r0,3 /* Convert trailing zeros to bytes. */
- subfic r0,r0,7
- add r9,r8,r0 /* Return address of the matching c byte
- or null in case c was not found. */
- li r0,0
- cmpdi cr7,r11,0 /* If r11 == 0, no null's have been found. */
- beq cr7,L(align)
-
- .align 4
-L(no_match):
- mr r3,r9
- blr
-
-/* We are here because strrchr was called with a null byte. */
- .align 4
-L(null_match):
- /* r0 has a doubleword of null bytes. */
-
- cmpb r5,r12,r0 /* Compare each byte against null bytes. */
-
- /* Move the doublewords left and right to discard the bits that are
- not part of the string and bring them back as zeros. */
-#ifdef __LITTLE_ENDIAN__
- srd r5,r5,r6
- sld r5,r5,r6
-#else
- sld r5,r5,r6
- srd r5,r5,r6
-#endif
- cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes
- have been found. */
- bne cr7,L(done_null)
-
- mtcrf 0x01,r8
-
- /* Are we now aligned to a quadword boundary? If so, skip to
- the main loop. Otherwise, go through the alignment code. */
-
- bt 28,L(loop_null)
-
- /* Handle WORD2 of pair. */
- ldu r12,8(r8)
- cmpb r5,r12,r0
- cmpdi cr7,r5,0
- bne cr7,L(done_null)
- b L(loop_null) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
-
- /* Main loop to look for the end of the string. Since it's a
- small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
-L(loop_null):
- /* Load two doublewords, compare and merge in a
- single register for speed. This is an attempt
- to speed up the null-checking process for bigger strings. */
- ld r12,8(r8)
- ldu r11,16(r8)
- cmpb r5,r12,r0
- cmpb r10,r11,r0
- or r6,r5,r10
- cmpdi cr7,r6,0
- beq cr7,L(loop_null)
-
- /* OK, one (or both) of the doublewords contains a null byte. Check
- the first doubleword and decrement the address in case the first
- doubleword really contains a null byte. */
-
- cmpdi cr6,r5,0
- addi r8,r8,-8
- bne cr6,L(done_null)
-
- /* The null byte must be in the second doubleword. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
- pointer. */
-
- mr r5,r10
- addi r8,r8,8
-
- /* r5 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as the null byte in the original
- doubleword from the string. Use that to calculate the pointer. */
-L(done_null):
-#ifdef __LITTLE_ENDIAN__
- addi r0,r5,-1
- andc r0,r0,r5
- popcntd r0,r0
-#else
- cntlzd r0,r5 /* Count leading zeros before the match. */
-#endif
- srdi r0,r0,3 /* Convert trailing zeros to bytes. */
- add r3,r8,r0 /* Return address of the matching null byte. */
- blr
-END (STRRCHR)
-weak_alias (strrchr, rindex)
-libc_hidden_builtin_def (strrchr)
diff --git a/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c b/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c
deleted file mode 100644
index a917b2157e..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Optimized strstr implementation for PowerPC64/POWER7.
- Copyright (C) 2015-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <string.h>
-
-#define STRSTR __strstr_ppc
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(__name)
-
-extern __typeof (strstr) __strstr_ppc attribute_hidden;
-
-#include <string/strstr.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S
deleted file mode 100644
index 260db2ed6d..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/strstr.S
+++ /dev/null
@@ -1,521 +0,0 @@
-/* Optimized strstr implementation for PowerPC64/POWER7.
- Copyright (C) 2015-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* Char * [r3] strstr (char *s [r3], char * pat[r4]) */
-
-/* The performance gain is obtained using aligned memory access, load
- * doubleword and usage of cmpb instruction for quicker comparison. */
-
-#define ITERATIONS 64
-
-#ifndef STRSTR
-# define STRSTR strstr
-#endif
-
-#ifndef STRLEN
-/* For builds with no IFUNC support, local calls should be made to internal
- GLIBC symbol (created by libc_hidden_builtin_def). */
-# ifdef SHARED
-# define STRLEN __GI_strlen
-# else
-# define STRLEN strlen
-# endif
-#endif
-
-#ifndef STRNLEN
-/* For builds with no IFUNC support, local calls should be made to internal
- GLIBC symbol (created by libc_hidden_builtin_def). */
-# ifdef SHARED
-# define STRNLEN __GI_strnlen
-# else
-# define STRNLEN __strnlen
-# endif
-#endif
-
-#ifndef STRCHR
-# ifdef SHARED
-# define STRCHR __GI_strchr
-# else
-# define STRCHR strchr
-# endif
-#endif
-
-#define FRAMESIZE (FRAME_MIN_SIZE+32)
- .machine power7
-EALIGN (STRSTR, 4, 0)
- CALL_MCOUNT 2
- mflr r0 /* Load link register LR to r0. */
- std r31, -8(r1) /* Save callers register r31. */
- std r30, -16(r1) /* Save callers register r30. */
- std r29, -24(r1) /* Save callers register r29. */
- std r28, -32(r1) /* Save callers register r28. */
- std r0, 16(r1) /* Store the link register. */
- cfi_offset(r31, -8)
- cfi_offset(r30, -16)
- cfi_offset(r28, -32)
- cfi_offset(r29, -24)
- cfi_offset(lr, 16)
- stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */
- cfi_adjust_cfa_offset(FRAMESIZE)
-
- dcbt 0, r3
- dcbt 0, r4
- cmpdi cr7, r3, 0
- beq cr7, L(retnull)
- cmpdi cr7, r4, 0
- beq cr7, L(retnull)
-
- mr r29, r3
- mr r30, r4
- mr r3, r4
- bl STRLEN
- nop
-
- cmpdi cr7, r3, 0 /* If search str is null. */
- beq cr7, L(ret_r3)
-
- mr r31, r3
- mr r4, r3
- mr r3, r29
- bl STRNLEN
- nop
-
- cmpd cr7, r3, r31 /* If len(r3) < len(r4). */
- blt cr7, L(retnull)
- mr r3, r29
- lbz r4, 0(r30)
- bl STRCHR
- nop
-
- mr r11, r3
- /* If first char of search str is not present. */
- cmpdi cr7, r3, 0
- ble cr7, L(end)
- /* Reg r28 is used to count the number of iterations. */
- li r28, 0
- rldicl r8, r3, 0, 52 /* Page cross check. */
- cmpldi cr7, r8, 4096-16
- bgt cr7, L(bytebybyte)
-
- rldicl r8, r30, 0, 52
- cmpldi cr7, r8, 4096-16
- bgt cr7, L(bytebybyte)
-
- /* If len(r4) < 8 handle in a different way. */
- /* Shift position based on null and use cmpb. */
- cmpdi cr7, r31, 8
- blt cr7, L(lessthan8)
-
- /* Len(r4) >= 8 reaches here. */
- mr r8, r3 /* Save r3 for future use. */
- mr r4, r30 /* Restore r4. */
- li r0, 0
- rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */
- clrrdi r4, r4, 3 /* Make r4 aligned to 8. */
- ld r6, 0(r4)
- addi r4, r4, 8
- cmpdi cr7, r10, 0 /* Check if its already aligned? */
- beq cr7, L(begin1)
-#ifdef __LITTLE_ENDIAN__
- srd r6, r6, r10 /* Discard unwanted bits. */
-#else
- sld r6, r6, r10
-#endif
- ld r9, 0(r4)
- subfic r10, r10, 64
-#ifdef __LITTLE_ENDIAN__
- sld r9, r9, r10 /* Discard unwanted bits. */
-#else
- srd r9, r9, r10
-#endif
- or r6, r6, r9 /* Form complete search str. */
-L(begin1):
- mr r29, r6
- rlwinm r10, r3, 3, 26, 28
- clrrdi r3, r3, 3
- ld r5, 0(r3)
- cmpb r9, r0, r6 /* Check if input has null. */
- cmpdi cr7, r9, 0
- bne cr7, L(return3)
- cmpb r9, r0, r5 /* Check if input has null. */
-#ifdef __LITTLE_ENDIAN__
- srd r9, r9, r10
-#else
- sld r9, r9, r10
-#endif
- cmpdi cr7, r9, 0
- bne cr7, L(retnull)
-
- li r12, -8 /* Shift values. */
- li r11, 72 /* Shift values. */
- cmpdi cr7, r10, 0
- beq cr7, L(nextbyte1)
- mr r12, r10
- addi r12, r12, -8
- subfic r11, r12, 64
-
-L(nextbyte1):
- ldu r7, 8(r3) /* Load next dw. */
- addi r12, r12, 8 /* Shift one byte and compare. */
- addi r11, r11, -8
-#ifdef __LITTLE_ENDIAN__
- srd r9, r5, r12 /* Rotate based on mask. */
- sld r10, r7, r11
-#else
- sld r9, r5, r12
- srd r10, r7, r11
-#endif
- /* Form single dw from few bytes on first load and second load. */
- or r10, r9, r10
- /* Check for null in the formed dw. */
- cmpb r9, r0, r10
- cmpdi cr7, r9, 0
- bne cr7, L(retnull)
- /* Cmpb search str and input str. */
- cmpb r9, r10, r6
- cmpdi cr7, r9, -1
- beq cr7, L(match)
- addi r8, r8, 1
- b L(begin)
-
- .align 4
-L(match):
- /* There is a match of 8 bytes, check next bytes. */
- cmpdi cr7, r31, 8
- beq cr7, L(return)
- /* Update next starting point r8. */
- srdi r9, r11, 3
- subf r9, r9, r3
- mr r8, r9
-
-L(secondmatch):
- mr r5, r7
- rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */
- ld r6, 0(r4)
- addi r4, r4, 8
- cmpdi cr7, r10, 0 /* Check if its already aligned? */
- beq cr7, L(proceed3)
-#ifdef __LITTLE_ENDIAN__
- srd r6, r6, r10 /* Discard unwanted bits. */
- cmpb r9, r0, r6
- sld r9, r9, r10
-#else
- sld r6, r6, r10
- cmpb r9, r0, r6
- srd r9, r9, r10
-#endif
- cmpdi cr7, r9, 0
- bne cr7, L(proceed3)
- ld r9, 0(r4)
- subfic r10, r10, 64
-#ifdef __LITTLE_ENDIAN__
- sld r9, r9, r10 /* Discard unwanted bits. */
-#else
- srd r9, r9, r10
-#endif
- or r6, r6, r9 /* Form complete search str. */
-
-L(proceed3):
- li r7, 0
- addi r3, r3, 8
- cmpb r9, r0, r5
- cmpdi cr7, r9, 0
- bne cr7, L(proceed4)
- ld r7, 0(r3)
-L(proceed4):
-#ifdef __LITTLE_ENDIAN__
- srd r9, r5, r12
- sld r10, r7, r11
-#else
- sld r9, r5, r12
- srd r10, r7, r11
-#endif
- /* Form single dw with few bytes from first and second load. */
- or r10, r9, r10
- cmpb r9, r0, r6
- cmpdi cr7, r9, 0
- bne cr7, L(return4)
- /* Check for null in the formed dw. */
- cmpb r9, r0, r10
- cmpdi cr7, r9, 0
- bne cr7, L(retnull)
- /* If the next 8 bytes dont match, start search again. */
- cmpb r9, r10, r6
- cmpdi cr7, r9, -1
- bne cr7, L(reset)
- /* If the next 8 bytes match, load and compare next 8. */
- b L(secondmatch)
-
- .align 4
-L(reset):
- /* Start the search again. */
- addi r8, r8, 1
- b L(begin)
-
- .align 4
-L(return3):
- /* Count leading zeros and compare partial dw. */
-#ifdef __LITTLE_ENDIAN__
- addi r7, r9, -1
- andc r7, r7, r9
- popcntd r7, r7
- subfic r7, r7, 64
- sld r10, r5, r7
- sld r6, r6, r7
-#else
- cntlzd r7, r9
- subfic r7, r7, 64
- srd r10, r5, r7
- srd r6, r6, r7
-#endif
- cmpb r9, r10, r6
- cmpdi cr7, r9, -1
- addi r8, r8, 1
- /* Start search again if there is no match. */
- bne cr7, L(begin)
- /* If the words match, update return values. */
- subfic r7, r7, 64
- srdi r7, r7, 3
- add r3, r3, r7
- subf r3, r31, r3
- b L(end)
-
- .align 4
-L(return4):
- /* Count leading zeros and compare partial dw. */
-#ifdef __LITTLE_ENDIAN__
- addi r7, r9, -1
- andc r7, r7, r9
- popcntd r7, r7
- subfic r7, r7, 64
- sld r10, r10, r7
- sld r6, r6, r7
-#else
- cntlzd r7, r9
- subfic r7, r7, 64
- srd r10, r10, r7
- srd r6, r6, r7
-#endif
- cmpb r9, r10, r6
- cmpdi cr7, r9, -1
- addi r8, r8, 1
- bne cr7, L(begin)
- subfic r7, r7, 64
- srdi r11, r11, 3
- subf r3, r11, r3
- srdi r7, r7, 3
- add r3, r3, r7
- subf r3, r31, r3
- b L(end)
-
- .align 4
-L(begin):
- mr r3, r8
- /* When our iterations exceed ITERATIONS,fall back to default. */
- addi r28, r28, 1
- cmpdi cr7, r28, ITERATIONS
- beq cr7, L(default)
- lbz r4, 0(r30)
- bl STRCHR
- nop
- /* If first char of search str is not present. */
- cmpdi cr7, r3, 0
- ble cr7, L(end)
- mr r8, r3
- mr r4, r30 /* Restore r4. */
- li r0, 0
- mr r6, r29
- clrrdi r4, r4, 3
- addi r4, r4, 8
- b L(begin1)
-
- /* Handle less than 8 search string. */
- .align 4
-L(lessthan8):
- mr r4, r3
- mr r9, r30
- li r0, 0
-
- rlwinm r10, r9, 3, 26, 28 /* Calculate padding in bits. */
- srdi r8, r10, 3 /* Padding in bytes. */
- clrrdi r9, r9, 3 /* Make r4 aligned to 8. */
- ld r6, 0(r9)
- cmpdi cr7, r10, 0 /* Check if its already aligned? */
- beq cr7, L(proceed2)
-#ifdef __LITTLE_ENDIAN__
- srd r6, r6, r10 /* Discard unwanted bits. */
-#else
- sld r6, r6, r10
-#endif
- subfic r8, r8, 8
- cmpd cr7, r8, r31 /* Next load needed? */
- bge cr7, L(proceed2)
- ld r7, 8(r9)
- subfic r10, r10, 64
-#ifdef __LITTLE_ENDIAN__
- sld r7, r7, r10 /* Discard unwanted bits. */
-#else
- srd r7, r7, r10
-#endif
- or r6, r6, r7 /* Form complete search str. */
-L(proceed2):
- mr r29, r6
- rlwinm r10, r3, 3, 26, 28
- clrrdi r7, r3, 3 /* Make r3 aligned. */
- ld r5, 0(r7)
- sldi r8, r31, 3
- subfic r8, r8, 64
-#ifdef __LITTLE_ENDIAN__
- sld r6, r6, r8
- cmpb r9, r0, r5
- srd r9, r9, r10
-#else
- srd r6, r6, r8
- cmpb r9, r0, r5
- sld r9, r9, r10
-#endif
- cmpdi cr7, r9, 0
- bne cr7, L(noload)
- cmpdi cr7, r10, 0
- beq cr7, L(continue)
- ld r7, 8(r7)
-L(continue1):
- mr r12, r10
- addi r12, r12, -8
- subfic r11, r12, 64
- b L(nextbyte)
-
- .align 4
-L(continue):
- ld r7, 8(r7)
- li r12, -8 /* Shift values. */
- li r11, 72 /* Shift values. */
-L(nextbyte):
- addi r12, r12, 8 /* Mask for rotation. */
- addi r11, r11, -8
-#ifdef __LITTLE_ENDIAN__
- srd r9, r5, r12
- sld r10, r7, r11
- or r10, r9, r10
- sld r10, r10, r8
- cmpb r9, r0, r10
- srd r9, r9, r8
-#else
- sld r9, r5, r12
- srd r10, r7, r11
- or r10, r9, r10
- srd r10, r10, r8
- cmpb r9, r0, r10
- sld r9, r9, r8
-#endif
- cmpdi cr7, r9, 0
- bne cr7, L(retnull)
- cmpb r9, r10, r6
- cmpdi cr7, r9, -1
- beq cr7, L(end)
- addi r3, r4, 1
- /* When our iterations exceed ITERATIONS,fall back to default. */
- addi r28, r28, 1
- cmpdi cr7, r28, ITERATIONS
- beq cr7, L(default)
- lbz r4, 0(r30)
- bl STRCHR
- nop
- /* If first char of search str is not present. */
- cmpdi cr7, r3, 0
- ble cr7, L(end)
- mr r4, r3
- mr r6, r29
- li r0, 0
- b L(proceed2)
-
- .align 4
-L(noload):
- /* Reached null in r3, so skip next load. */
- li r7, 0
- b L(continue1)
-
- .align 4
-L(return):
- /* Update return values. */
- srdi r9, r11, 3
- subf r3, r9, r3
- b L(end)
-
- /* Handling byte by byte. */
- .align 4
-L(bytebybyte):
- mr r8, r3
- addi r8, r8, -1
-L(loop1):
- addi r8, r8, 1
- mr r3, r8
- mr r4, r30
- lbz r6, 0(r4)
- cmpdi cr7, r6, 0
- beq cr7, L(updater3)
-L(loop):
- lbz r5, 0(r3)
- cmpdi cr7, r5, 0
- beq cr7, L(retnull)
- cmpld cr7, r6, r5
- bne cr7, L(loop1)
- addi r3, r3, 1
- addi r4, r4, 1
- lbz r6, 0(r4)
- cmpdi cr7, r6, 0
- beq cr7, L(updater3)
- b L(loop)
-
- /* Handling return values. */
- .align 4
-L(updater3):
- subf r3, r31, r3 /* Reduce len of r4 from r3. */
- b L(end)
-
- .align 4
-L(ret_r3):
- mr r3, r29 /* Return r3. */
- b L(end)
-
- .align 4
-L(retnull):
- li r3, 0 /* Return NULL. */
- b L(end)
-
- .align 4
-L(default):
- mr r4, r30
- bl __strstr_ppc
- nop
-
- .align 4
-L(end):
- addi r1, r1, FRAMESIZE /* Restore stack pointer. */
- cfi_adjust_cfa_offset(-FRAMESIZE)
- ld r0, 16(r1) /* Restore the saved link register. */
- ld r28, -32(r1) /* Restore callers save register r28. */
- ld r29, -24(r1) /* Restore callers save register r29. */
- ld r30, -16(r1) /* Restore callers save register r30. */
- ld r31, -8(r1) /* Restore callers save register r31. */
- mtlr r0 /* Branch to link register. */
- blr
-END (STRSTR)
-libc_hidden_builtin_def (strstr)
diff --git a/sysdeps/powerpc/powerpc64/power7/sub_n.S b/sysdeps/powerpc/powerpc64/power7/sub_n.S
deleted file mode 100644
index 848dad5718..0000000000
--- a/sysdeps/powerpc/powerpc64/power7/sub_n.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* PowerPC64 mpn_lshift -- mpn_add_n/mpn_sub_n -- mpn addition and
- subtraction.
- Copyright (C) 2013-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#define USE_AS_SUB
-#include "add_n.S"