aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc64/multiarch
diff options
context:
space:
mode:
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>2014-07-15 12:19:09 -0400
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>2014-09-10 07:39:46 -0400
commit71ae86478edc7b21872464f43fb29ff650c1681a (patch)
treea75679fa464a1d19543020ef0c4f4f982d099d99 /sysdeps/powerpc/powerpc64/multiarch
parent3b473fecdf4c52989cd915b649bb6d26c042d048 (diff)
downloadglibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar
glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.gz
glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.bz2
glibc-71ae86478edc7b21872464f43fb29ff650c1681a.zip
PowerPC: memset optimization for POWER8/PPC64
This patch adds an optimized memset implementation for POWER8. For sizes from 0 to 255 bytes, a word/doubleword algorithm similar to POWER7 optimized one is used. For size higher than 255 two strategies are used: 1. If the constant is different than 0, the memory is written with altivec vector instruction; 2. If constant is 0, dbcz instructions are used. The loop is unrolled to clear 512 byte at time. Using vector instructions increases throughput considerable, with a double performance for sizes larger than 1024. The dcbz loops unrolls also shows performance improvement, by doubling throughput for sizes larger than 8192 bytes.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/multiarch')
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/Makefile2
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/bzero.c11
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c6
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memset-power8.S43
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memset.c11
5 files changed, 64 insertions, 9 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 0de3804c0c..abc9d2e973 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -2,7 +2,7 @@ ifeq ($(subdir),string)
sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
memcpy-power4 memcpy-ppc64 memcmp-power7 memcmp-power4 \
memcmp-ppc64 memset-power7 memset-power6 memset-power4 \
- memset-ppc64 \
+ memset-ppc64 memset-power8 \
mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \
memrchr-power7 memrchr-ppc64 rawmemchr-power7 \
rawmemchr-ppc64 strlen-power7 strlen-ppc64 strnlen-power7 \
diff --git a/sysdeps/powerpc/powerpc64/multiarch/bzero.c b/sysdeps/powerpc/powerpc64/multiarch/bzero.c
index ed83541fa5..298cf005a1 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/bzero.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/bzero.c
@@ -26,14 +26,17 @@ extern __typeof (bzero) __bzero_ppc attribute_hidden;
extern __typeof (bzero) __bzero_power4 attribute_hidden;
extern __typeof (bzero) __bzero_power6 attribute_hidden;
extern __typeof (bzero) __bzero_power7 attribute_hidden;
+extern __typeof (bzero) __bzero_power8 attribute_hidden;
libc_ifunc (__bzero,
- (hwcap & PPC_FEATURE_HAS_VSX)
- ? __bzero_power7 :
- (hwcap & PPC_FEATURE_ARCH_2_05)
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __bzero_power8 :
+ (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __bzero_power7 :
+ (hwcap & PPC_FEATURE_ARCH_2_05)
? __bzero_power6 :
(hwcap & PPC_FEATURE_POWER4)
- ? __bzero_power4
+ ? __bzero_power4
: __bzero_ppc);
weak_alias (__bzero, bzero)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index a574487f2f..06d5be9efb 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -34,6 +34,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = 0;
unsigned long int hwcap = GLRO(dl_hwcap);
+ unsigned long int hwcap2 = GLRO(dl_hwcap2);
+
/* hwcap contains only the latest supported ISA, the code checks which is
and fills the previous supported ones. */
if (hwcap & PPC_FEATURE_ARCH_2_06)
@@ -69,6 +71,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/powerpc/powerpc64/multiarch/memset.c. */
IFUNC_IMPL (i, name, memset,
+ IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07,
+ __memset_power8)
IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX,
__memset_power7)
IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05,
@@ -134,6 +138,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/powerpc/powerpc64/multiarch/bzero.c. */
IFUNC_IMPL (i, name, bzero,
+ IFUNC_IMPL_ADD (array, i, bzero, hwcap2 & PPC_FEATURE2_ARCH_2_07,
+ __bzero_power8)
IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX,
__bzero_power7)
IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S
new file mode 100644
index 0000000000..e8a604b000
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S
@@ -0,0 +1,43 @@
+/* Optimized memset implementation for PowerPC64/POWER8.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#undef EALIGN
+#define EALIGN(name, alignt, words) \
+ .section ".text"; \
+ ENTRY_2(__memset_power8) \
+ .align ALIGNARG(alignt); \
+ EALIGN_W_##words; \
+ BODY_LABEL(__memset_power8): \
+ cfi_startproc; \
+ LOCALENTRY(__memset_power8)
+
+#undef END_GEN_TB
+#define END_GEN_TB(name, mask) \
+ cfi_endproc; \
+ TRACEBACK_MASK(__memset_power8,mask) \
+ END_2(__memset_power8)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#undef __bzero
+#define __bzero __bzero_power8
+
+#include <sysdeps/powerpc/powerpc64/power8/memset.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset.c b/sysdeps/powerpc/powerpc64/multiarch/memset.c
index aa2ae7056e..9c7ed10c87 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memset.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memset.c
@@ -32,16 +32,19 @@ extern __typeof (__redirect_memset) __memset_ppc attribute_hidden;
extern __typeof (__redirect_memset) __memset_power4 attribute_hidden;
extern __typeof (__redirect_memset) __memset_power6 attribute_hidden;
extern __typeof (__redirect_memset) __memset_power7 attribute_hidden;
+extern __typeof (__redirect_memset) __memset_power8 attribute_hidden;
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
libc_ifunc (__libc_memset,
- (hwcap & PPC_FEATURE_HAS_VSX)
- ? __memset_power7 :
- (hwcap & PPC_FEATURE_ARCH_2_05)
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __memset_power8 :
+ (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __memset_power7 :
+ (hwcap & PPC_FEATURE_ARCH_2_05)
? __memset_power6 :
(hwcap & PPC_FEATURE_POWER4)
- ? __memset_power4
+ ? __memset_power4
: __memset_ppc);
#undef memset