diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-07-15 12:19:09 -0400 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2014-09-10 07:39:46 -0400 |
commit | 71ae86478edc7b21872464f43fb29ff650c1681a (patch) | |
tree | a75679fa464a1d19543020ef0c4f4f982d099d99 /sysdeps/powerpc/powerpc64/multiarch/memset-power8.S | |
parent | 3b473fecdf4c52989cd915b649bb6d26c042d048 (diff) | |
download | glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.gz glibc-71ae86478edc7b21872464f43fb29ff650c1681a.tar.bz2 glibc-71ae86478edc7b21872464f43fb29ff650c1681a.zip |
PowerPC: memset optimization for POWER8/PPC64
This patch adds an optimized memset implementation for POWER8. For
sizes from 0 to 255 bytes, a word/doubleword algorithm similar to
POWER7 optimized one is used.
For size higher than 255 two strategies are used:
1. If the constant is different than 0, the memory is written with
altivec vector instruction;
2. If constant is 0, dbcz instructions are used. The loop is unrolled
to clear 512 byte at time.
Using vector instructions increases throughput considerable, with a
double performance for sizes larger than 1024. The dcbz loops unrolls
also shows performance improvement, by doubling throughput for sizes
larger than 8192 bytes.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/multiarch/memset-power8.S')
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/memset-power8.S | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S new file mode 100644 index 0000000000..e8a604b000 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S @@ -0,0 +1,43 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__memset_power8) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__memset_power8): \ + cfi_startproc; \ + LOCALENTRY(__memset_power8) + +#undef END_GEN_TB +#define END_GEN_TB(name, mask) \ + cfi_endproc; \ + TRACEBACK_MASK(__memset_power8,mask) \ + END_2(__memset_power8) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power8 + +#include <sysdeps/powerpc/powerpc64/power8/memset.S> |