From 71ae86478edc7b21872464f43fb29ff650c1681a Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Tue, 15 Jul 2014 12:19:09 -0400 Subject: PowerPC: memset optimization for POWER8/PPC64 This patch adds an optimized memset implementation for POWER8. For sizes from 0 to 255 bytes, a word/doubleword algorithm similar to POWER7 optimized one is used. For size higher than 255 two strategies are used: 1. If the constant is different than 0, the memory is written with altivec vector instruction; 2. If constant is 0, dbcz instructions are used. The loop is unrolled to clear 512 byte at time. Using vector instructions increases throughput considerable, with a double performance for sizes larger than 1024. The dcbz loops unrolls also shows performance improvement, by doubling throughput for sizes larger than 8192 bytes. --- .../powerpc/powerpc64/multiarch/memset-power8.S | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 sysdeps/powerpc/powerpc64/multiarch/memset-power8.S (limited to 'sysdeps/powerpc/powerpc64/multiarch/memset-power8.S') diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S new file mode 100644 index 0000000000..e8a604b000 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S @@ -0,0 +1,43 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__memset_power8) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__memset_power8): \ + cfi_startproc; \ + LOCALENTRY(__memset_power8) + +#undef END_GEN_TB +#define END_GEN_TB(name, mask) \ + cfi_endproc; \ + TRACEBACK_MASK(__memset_power8,mask) \ + END_2(__memset_power8) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power8 + +#include -- cgit v1.2.3-70-g09d2