diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-07-15 12:19:09 -0400 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-09-10 07:39:46 -0400 |
commit | 71ae86478edc7b21872464f43fb29ff650c1681a (patch) | |
tree | a75679fa464a1d19543020ef0c4f4f982d099d99 /sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c | |
parent | 3b473fecdf4c52989cd915b649bb6d26c042d048 (diff) | |
download | glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.gz glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.bz2 glibc-71ae86478edc7b21872464f43fb29ff650c1681a.zip |
PowerPC: memset optimization for POWER8/PPC64
This patch adds an optimized memset implementation for POWER8. For
sizes from 0 to 255 bytes, a word/doubleword algorithm similar to
POWER7 optimized one is used.
For size higher than 255 two strategies are used:
1. If the constant is different than 0, the memory is written with
altivec vector instruction;
2. If constant is 0, dbcz instructions are used. The loop is unrolled
to clear 512 byte at time.
Using vector instructions increases throughput considerable, with a
double performance for sizes larger than 1024. The dcbz loops unrolls
also shows performance improvement, by doubling throughput for sizes
larger than 8192 bytes.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c')
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index a574487f2f..06d5be9efb 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -34,6 +34,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, size_t i = 0; unsigned long int hwcap = GLRO(dl_hwcap); + unsigned long int hwcap2 = GLRO(dl_hwcap2); + /* hwcap contains only the latest supported ISA, the code checks which is and fills the previous supported ones. */ if (hwcap & PPC_FEATURE_ARCH_2_06) @@ -69,6 +71,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/memset.c. */ IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memset_power8) IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, __memset_power7) IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05, @@ -134,6 +138,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/bzero.c. */ IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __bzero_power8) IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX, __bzero_power7) IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05, |