diff options
Diffstat (limited to 'sysdeps/aarch64/multiarch')
-rw-r--r-- | sysdeps/aarch64/multiarch/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/ifunc-impl-list.c | 51 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/init-arch.h | 23 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memcpy.c | 39 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memcpy_generic.S | 42 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memcpy_thunderx.S | 326 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memmove.c | 39 |
7 files changed, 0 insertions, 523 deletions
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile deleted file mode 100644 index 78d52c717d..0000000000 --- a/sysdeps/aarch64/multiarch/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -ifeq ($(subdir),string) -sysdep_routines += memcpy_generic memcpy_thunderx -endif diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c deleted file mode 100644 index c4f23dfb87..0000000000 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ /dev/null @@ -1,51 +0,0 @@ -/* Enumerate available IFUNC implementations of a function. AARCH64 version. - Copyright (C) 2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <assert.h> -#include <string.h> -#include <wchar.h> -#include <ldsodefs.h> -#include <ifunc-impl-list.h> -#include <init-arch.h> -#include <stdio.h> - -/* Maximum number of IFUNC implementations. */ -#define MAX_IFUNC 2 - -size_t -__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - size_t max) -{ - assert (max >= MAX_IFUNC); - - size_t i = 0; - - INIT_ARCH (); - - /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */ - IFUNC_IMPL (i, name, memcpy, - IFUNC_IMPL_ADD (array, i, memcpy, IS_THUNDERX (midr), - __memcpy_thunderx) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) - IFUNC_IMPL (i, name, memmove, - IFUNC_IMPL_ADD (array, i, memmove, IS_THUNDERX (midr), - __memmove_thunderx) - IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) - - return i; -} diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h deleted file mode 100644 index 3af442c538..0000000000 --- a/sysdeps/aarch64/multiarch/init-arch.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Define INIT_ARCH so that midr is initialized before use by IFUNCs. - This file is part of the GNU C Library. - Copyright (C) 2017 Free Software Foundation, Inc. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <ldsodefs.h> - -#define INIT_ARCH() \ - uint64_t __attribute__((unused)) midr = \ - GLRO(dl_aarch64_cpu_features).midr_el1; diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c deleted file mode 100644 index 9f73efbba7..0000000000 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ /dev/null @@ -1,39 +0,0 @@ -/* Multiple versions of memcpy. AARCH64 version. - Copyright (C) 2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* Define multiple versions only for the definition in libc. */ - -#if IS_IN (libc) -/* Redefine memcpy so that the compiler won't complain about the type - mismatch with the IFUNC selector in strong_alias, below. */ -# undef memcpy -# define memcpy __redirect_memcpy -# include <string.h> -# include <init-arch.h> - -extern __typeof (__redirect_memcpy) __libc_memcpy; - -extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden; -extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden; - -libc_ifunc (__libc_memcpy, - IS_THUNDERX (midr) ? __memcpy_thunderx : __memcpy_generic); - -# undef memcpy -strong_alias (__libc_memcpy, memcpy); -#endif diff --git a/sysdeps/aarch64/multiarch/memcpy_generic.S b/sysdeps/aarch64/multiarch/memcpy_generic.S deleted file mode 100644 index 041a77943d..0000000000 --- a/sysdeps/aarch64/multiarch/memcpy_generic.S +++ /dev/null @@ -1,42 +0,0 @@ -/* A Generic Optimized memcpy implementation for AARCH64. - Copyright (C) 2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* The actual memcpy and memmove code is in ../memcpy.S. If we are - building libc this file defines __memcpy_generic and __memmove_generic. - Otherwise the include of ../memcpy.S will define the normal __memcpy - and__memmove entry points. */ - -#include <sysdep.h> - -#if IS_IN (libc) - -# define MEMCPY __memcpy_generic -# define MEMMOVE __memmove_generic - -/* Do not hide the generic versions of memcpy and memmove, we use them - internally. */ -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) - -/* It doesn't make sense to send libc-internal memcpy calls through a PLT. */ - .globl __GI_memcpy; __GI_memcpy = __memcpy_generic - .globl __GI_memmove; __GI_memmove = __memmove_generic - -#endif - -#include "../memcpy.S" diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S deleted file mode 100644 index 5ac9e341bb..0000000000 --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S +++ /dev/null @@ -1,326 +0,0 @@ -/* A Thunderx Optimized memcpy implementation for AARCH64. - Copyright (C) 2017 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* The actual code in this memcpy and memmove should be identical to the - generic version except for the code under '#ifdef THUNDERX'. This is - to make is easier to keep this version and the generic version in sync - for changes that are not specific to thunderx. */ - -#include <sysdep.h> - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses. - * - */ - -#define dstin x0 -#define src x1 -#define count x2 -#define dst x3 -#define srcend x4 -#define dstend x5 -#define A_l x6 -#define A_lw w6 -#define A_h x7 -#define A_hw w7 -#define B_l x8 -#define B_lw w8 -#define B_h x9 -#define C_l x10 -#define C_h x11 -#define D_l x12 -#define D_h x13 -#define E_l src -#define E_h count -#define F_l srcend -#define F_h dst -#define G_l count -#define G_h dst -#define tmp1 x14 - -/* Copies are split into 3 main cases: small copies of up to 16 bytes, - medium copies of 17..96 bytes which are fully unrolled. Large copies - of more than 96 bytes align the destination and use an unrolled loop - processing 64 bytes per iteration. - In order to share code with memmove, small and medium copies read all - data before writing, allowing any kind of overlap. So small, medium - and large backwards memmoves are handled by falling through into memcpy. - Overlapping large forward memmoves use a loop that copies backwards. -*/ - -#ifndef MEMMOVE -# define MEMMOVE memmove -#endif -#ifndef MEMCPY -# define MEMCPY memcpy -#endif - -#if IS_IN (libc) - -# undef MEMCPY -# define MEMCPY __memcpy_thunderx -# undef MEMMOVE -# define MEMMOVE __memmove_thunderx -# define USE_THUNDERX - -ENTRY_ALIGN (MEMMOVE, 6) - - DELOUSE (0) - DELOUSE (1) - DELOUSE (2) - - sub tmp1, dstin, src - cmp count, 96 - ccmp tmp1, count, 2, hi - b.lo L(move_long) - - /* Common case falls through into memcpy. */ -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) -ENTRY (MEMCPY) - - DELOUSE (0) - DELOUSE (1) - DELOUSE (2) - - prfm PLDL1KEEP, [src] - add srcend, src, count - add dstend, dstin, count - cmp count, 16 - b.ls L(copy16) - cmp count, 96 - b.hi L(copy_long) - - /* Medium copies: 17..96 bytes. */ - sub tmp1, count, 1 - ldp A_l, A_h, [src] - tbnz tmp1, 6, L(copy96) - ldp D_l, D_h, [srcend, -16] - tbz tmp1, 5, 1f - ldp B_l, B_h, [src, 16] - ldp C_l, C_h, [srcend, -32] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstend, -32] -1: - stp A_l, A_h, [dstin] - stp D_l, D_h, [dstend, -16] - ret - - .p2align 4 - /* Small copies: 0..16 bytes. */ -L(copy16): - cmp count, 8 - b.lo 1f - ldr A_l, [src] - ldr A_h, [srcend, -8] - str A_l, [dstin] - str A_h, [dstend, -8] - ret - .p2align 4 -1: - tbz count, 2, 1f - ldr A_lw, [src] - ldr A_hw, [srcend, -4] - str A_lw, [dstin] - str A_hw, [dstend, -4] - ret - - /* Copy 0..3 bytes. Use a branchless sequence that copies the same - byte 3 times if count==1, or the 2nd byte twice if count==2. */ -1: - cbz count, 2f - lsr tmp1, count, 1 - ldrb A_lw, [src] - ldrb A_hw, [srcend, -1] - ldrb B_lw, [src, tmp1] - strb A_lw, [dstin] - strb B_lw, [dstin, tmp1] - strb A_hw, [dstend, -1] -2: ret - - .p2align 4 - /* Copy 64..96 bytes. Copy 64 bytes from the start and - 32 bytes from the end. */ -L(copy96): - ldp B_l, B_h, [src, 16] - ldp C_l, C_h, [src, 32] - ldp D_l, D_h, [src, 48] - ldp E_l, E_h, [srcend, -32] - ldp F_l, F_h, [srcend, -16] - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstin, 32] - stp D_l, D_h, [dstin, 48] - stp E_l, E_h, [dstend, -32] - stp F_l, F_h, [dstend, -16] - ret - - /* Align DST to 16 byte alignment so that we don't cross cache line - boundaries on both loads and stores. There are at least 96 bytes - to copy, so copy 16 bytes unaligned and then align. The loop - copies 64 bytes per iteration and prefetches one iteration ahead. */ - - .p2align 4 -L(copy_long): - -# ifdef USE_THUNDERX - - /* On thunderx, large memcpy's are helped by software prefetching. - This loop is identical to the one below it but with prefetching - instructions included. For loops that are less than 32768 bytes, - the prefetching does not help and slow the code down so we only - use the prefetching loop for the largest memcpys. */ - - cmp count, #32768 - b.lo L(copy_long_without_prefetch) - and tmp1, dstin, 15 - bic dst, dstin, 15 - ldp D_l, D_h, [src] - sub src, src, tmp1 - prfm pldl1strm, [src, 384] - add count, count, tmp1 /* Count is now 16 too large. */ - ldp A_l, A_h, [src, 16] - stp D_l, D_h, [dstin] - ldp B_l, B_h, [src, 32] - ldp C_l, C_h, [src, 48] - ldp D_l, D_h, [src, 64]! - subs count, count, 128 + 16 /* Test and readjust count. */ - -L(prefetch_loop64): - tbz src, #6, 1f - prfm pldl1strm, [src, 512] -1: - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [src, 16] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [src, 32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [src, 48] - stp D_l, D_h, [dst, 64]! - ldp D_l, D_h, [src, 64]! - subs count, count, 64 - b.hi L(prefetch_loop64) - b L(last64) - -L(copy_long_without_prefetch): -# endif - - and tmp1, dstin, 15 - bic dst, dstin, 15 - ldp D_l, D_h, [src] - sub src, src, tmp1 - add count, count, tmp1 /* Count is now 16 too large. */ - ldp A_l, A_h, [src, 16] - stp D_l, D_h, [dstin] - ldp B_l, B_h, [src, 32] - ldp C_l, C_h, [src, 48] - ldp D_l, D_h, [src, 64]! - subs count, count, 128 + 16 /* Test and readjust count. */ - b.ls L(last64) -L(loop64): - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [src, 16] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [src, 32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [src, 48] - stp D_l, D_h, [dst, 64]! - ldp D_l, D_h, [src, 64]! - subs count, count, 64 - b.hi L(loop64) - - /* Write the last full set of 64 bytes. The remainder is at most 64 - bytes, so it is safe to always copy 64 bytes from the end even if - there is just 1 byte left. */ -L(last64): - ldp E_l, E_h, [srcend, -64] - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [srcend, -48] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [srcend, -16] - stp D_l, D_h, [dst, 64] - stp E_l, E_h, [dstend, -64] - stp A_l, A_h, [dstend, -48] - stp B_l, B_h, [dstend, -32] - stp C_l, C_h, [dstend, -16] - ret - - .p2align 4 -L(move_long): - cbz tmp1, 3f - - add srcend, src, count - add dstend, dstin, count - - /* Align dstend to 16 byte alignment so that we don't cross cache line - boundaries on both loads and stores. There are at least 96 bytes - to copy, so copy 16 bytes unaligned and then align. The loop - copies 64 bytes per iteration and prefetches one iteration ahead. */ - - and tmp1, dstend, 15 - ldp D_l, D_h, [srcend, -16] - sub srcend, srcend, tmp1 - sub count, count, tmp1 - ldp A_l, A_h, [srcend, -16] - stp D_l, D_h, [dstend, -16] - ldp B_l, B_h, [srcend, -32] - ldp C_l, C_h, [srcend, -48] - ldp D_l, D_h, [srcend, -64]! - sub dstend, dstend, tmp1 - subs count, count, 128 - b.ls 2f - - nop -1: - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [srcend, -16] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [srcend, -48] - stp D_l, D_h, [dstend, -64]! - ldp D_l, D_h, [srcend, -64]! - subs count, count, 64 - b.hi 1b - - /* Write the last full set of 64 bytes. The remainder is at most 64 - bytes, so it is safe to always copy 64 bytes from the start even if - there is just 1 byte left. */ -2: - ldp G_l, G_h, [src, 48] - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [src, 32] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [src, 16] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [src] - stp D_l, D_h, [dstend, -64] - stp G_l, G_h, [dstin, 48] - stp A_l, A_h, [dstin, 32] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstin] -3: ret - -END (MEMCPY) -libc_hidden_builtin_def (MEMCPY) - -#endif diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c deleted file mode 100644 index 34c6b29bd5..0000000000 --- a/sysdeps/aarch64/multiarch/memmove.c +++ /dev/null @@ -1,39 +0,0 @@ -/* Multiple versions of memmove. AARCH64 version. - Copyright (C) 2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* Define multiple versions only for the definition in libc. */ - -#if IS_IN (libc) -/* Redefine memmove so that the compiler won't complain about the type - mismatch with the IFUNC selector in strong_alias, below. */ -# undef memmove -# define memmove __redirect_memmove -# include <string.h> -# include <init-arch.h> - -extern __typeof (__redirect_memmove) __libc_memmove; - -extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden; -extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden; - -libc_ifunc (__libc_memmove, - IS_THUNDERX (midr) ? __memmove_thunderx : __memmove_generic); - -# undef memmove -strong_alias (__libc_memmove, memmove); -#endif |