diff options
Diffstat (limited to 'sysdeps/powerpc/powerpc64')
-rw-r--r-- | sysdeps/powerpc/powerpc64/dl-machine.h | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S | 176 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memcpy.c | 23 |
5 files changed, 196 insertions, 13 deletions
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index aeb91b8f69..76dceee80b 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -27,6 +27,7 @@ #include <dl-tls.h> #include <sysdep.h> #include <hwcapinfo.h> +#include <cpu-features.c> /* Translate a processor specific dynamic tag to the index in l_info array. */ @@ -300,13 +301,14 @@ BODY_PREFIX "_dl_start_user:\n" \ /* We define an initialization function to initialize HWCAP/HWCAP2 and platform data so it can be copied into the TCB later. This is called very early in _dl_sysdep_start for dynamically linked binaries. */ -#ifdef SHARED +#if defined(SHARED) && IS_IN (rtld) # define DL_PLATFORM_INIT dl_platform_init () static inline void __attribute__ ((unused)) dl_platform_init (void) { __tcb_parse_hwcap_and_convert_at_platform (); + init_cpu_features (&GLRO(dl_powerpc_cpu_features)); } #endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index dea49acff5..4df6b45c4c 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -1,6 +1,6 @@ ifeq ($(subdir),string) -sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ - memcpy-power4 memcpy-ppc64 \ +sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ + memcpy-cell memcpy-power4 memcpy-ppc64 \ memcmp-power8 memcmp-power7 memcmp-power4 memcmp-ppc64 \ memset-power7 memset-power6 memset-power4 \ memset-ppc64 memset-power8 \ diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 6a88536c98..77a60eaf27 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -51,6 +51,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, #ifdef SHARED /* Support sysdeps/powerpc/powerpc64/multiarch/memcpy.c. */ IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memcpy_power8_cached) IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX, __memcpy_power7) IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06, diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S new file mode 100644 index 0000000000..e8bea91ca6 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S @@ -0,0 +1,176 @@ +/* Optimized memcpy implementation for cached memory on PowerPC64/POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. */ + + .machine power8 +ENTRY_TOCLESS (__memcpy_power8_cached, 5) + CALL_MCOUNT 3 + + cmpldi cr7,r5,15 + bgt cr7,L(ge_16) + andi. r9,r5,0x1 + mr r9,r3 + beq cr0,1f + lbz r10,0(r4) + addi r9,r3,1 + addi r4,r4,1 + stb r10,0(r3) +1: + andi. r10,r5,0x2 + beq cr0,2f + lhz r10,0(r4) + addi r9,r9,2 + addi r4,r4,2 + sth r10,-2(r9) +2: + andi. r10,r5,0x4 + beq cr0,3f + lwz r10,0(r4) + addi r9,9,4 + addi r4,4,4 + stw r10,-4(r9) +3: + andi. r10,r5,0x8 + beqlr cr0 + ld r10,0(r4) + std r10,0(r9) + blr + + .align 4 +L(ge_16): + cmpldi cr7,r5,32 + ble cr7,L(ge_16_le_32) + cmpldi cr7,r5,64 + ble cr7,L(gt_32_le_64) + + /* Align dst to 16 bytes. */ + andi. r9,r3,0xf + mr r12,r3 + beq cr0,L(dst_is_align_16) + lxvd2x v0,0,r4 + subfic r12,r9,16 + subf r5,r12,r5 + add r4,r4,r12 + add r12,r3,r12 + stxvd2x v0,0,r3 +L(dst_is_align_16): + cmpldi cr7,r5,127 + ble cr7,L(tail_copy) + mr r9,r12 + srdi r10,r5,7 + li r11,16 + li r6,32 + li r7,48 + mtctr r10 + clrrdi r0,r5,7 + + /* Main loop, copy 128 bytes each time. */ + .align 4 +L(copy_128): + lxvd2x v10,0,r4 + lxvd2x v11,r4,r11 + addi r8,r4,64 + addi r10,r9,64 + lxvd2x v12,r4,r6 + lxvd2x v0,r4,r7 + addi r4,r4,128 + stxvd2x v10,0,r9 + stxvd2x v11,r9,r11 + stxvd2x v12,r9,r6 + stxvd2x v0,r9,r7 + addi r9,r9,128 + lxvd2x v10,0,r8 + lxvd2x v11,r8,r11 + lxvd2x v12,r8,r6 + lxvd2x v0,r8,r7 + stxvd2x v10,0,r10 + stxvd2x v11,r10,r11 + stxvd2x v12,r10,r6 + stxvd2x v0,r10,r7 + bdnz L(copy_128) + + add r12,r12,r0 + rldicl r5,r5,0,57 +L(tail_copy): + cmpldi cr7,r5,63 + ble cr7,L(tail_le_64) + li r8,16 + li r10,32 + lxvd2x v10,0,r4 + li r9,48 + addi r5,r5,-64 + lxvd2x v11,r4,r8 + lxvd2x v12,r4,r10 + lxvd2x v0,r4,r9 + addi r4,r4,64 + stxvd2x v10,0,r12 + stxvd2x v11,r12,r8 + stxvd2x v12,r12,r10 + stxvd2x v0,r12,9 + addi r12,r12,64 + +L(tail_le_64): + cmpldi cr7,r5,32 + bgt cr7,L(tail_gt_32_le_64) + cmpdi cr7,r5,0 + beqlr cr7 + addi r5,r5,-32 + li r9,16 + add r8,r4,r5 + add r10,r12,r5 + lxvd2x v12,r4,r5 + lxvd2x v0,r8,r9 + stxvd2x v12,r12,r5 + stxvd2x v0,r10,r9 + blr + + .align 4 +L(ge_16_le_32): + addi r5,r5,-16 + lxvd2x v0,0,r4 + lxvd2x v1,r4,r5 + stxvd2x v0,0,r3 + stxvd2x v1,r3,r5 + blr + + .align 4 +L(gt_32_le_64): + mr r12,r3 + + .align 4 +L(tail_gt_32_le_64): + li r9,16 + lxvd2x v0,0,r4 + addi r5,r5,-32 + lxvd2x v1,r4,r9 + add r8,r4,r5 + lxvd2x v2,r4,r5 + add r10,r12,r5 + lxvd2x v3,r8,r9 + stxvd2x v0,0,r12 + stxvd2x v1,r12,r9 + stxvd2x v2,r12,r5 + stxvd2x v3,r10,r9 + blr + +END_GEN_TB (__memcpy_power8_cached,TB_TOCLESS) diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c index 9f4286c4fe..fb49fe161f 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c @@ -35,18 +35,21 @@ extern __typeof (__redirect_memcpy) __memcpy_cell attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_a2 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_power7 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power8_cached attribute_hidden; libc_ifunc (__libc_memcpy, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __memcpy_power7 : - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __memcpy_a2 : - (hwcap & PPC_FEATURE_ARCH_2_05) - ? __memcpy_power6 : - (hwcap & PPC_FEATURE_CELL_BE) - ? __memcpy_cell : - (hwcap & PPC_FEATURE_POWER4) - ? __memcpy_power4 + ((hwcap2 & PPC_FEATURE2_ARCH_2_07) && use_cached_memopt) + ? __memcpy_power8_cached : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memcpy_a2 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memcpy_power6 : + (hwcap & PPC_FEATURE_CELL_BE) + ? __memcpy_cell : + (hwcap & PPC_FEATURE_POWER4) + ? __memcpy_power4 : __memcpy_ppc); #undef memcpy |