aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/aarch64/multiarch/memcpy_thunderx.S
diff options
context:
space:
mode:
authorSteve Ellcey <sellcey@caviumnetworks.com>2018-02-22 08:38:47 -0800
committerSteve Ellcey <sellcey@caviumnetworks.com>2018-02-22 08:38:47 -0800
commite9537dddc7c7c7b60b55ed845542c8d586164488 (patch)
treeae653efab8d31c8d5056f0d29fef30c19c0fd260 /sysdeps/aarch64/multiarch/memcpy_thunderx.S
parentda81ae645d8ee89052f109c814a68a9489f562e6 (diff)
downloadglibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar
glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar.gz
glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar.bz2
glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.zip
IFUNC for Cavium ThunderX2
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines): Add memcpy_thunderx2. * sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC): Increment to 4. (__libc_ifunc_impl_list): Add __memcpy_thunderx2. * sysdeps/aarch64/multiarch/memcpy.c (libc_ifunc): Add IS_THUNDERX2 and IS_THUNDERX2PA checks. * sysdeps/aarch64/multiarch/memcpy_thunderx.S (USE_THUNDERX2): Use macro to set name appropriately. (memcpy): Use USE_THUNDERX2 macro to modify prefetches. * sysdeps/aarch64/multiarch/memcpy_thunderx2.S: New file. * sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_THUNDERX2PA): New macro. (IS_THUNDERX2): New macro.
Diffstat (limited to 'sysdeps/aarch64/multiarch/memcpy_thunderx.S')
-rw-r--r--sysdeps/aarch64/multiarch/memcpy_thunderx.S22
1 files changed, 16 insertions, 6 deletions
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
index 4f6921d680..de494d933d 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
@@ -74,11 +74,13 @@
#if IS_IN (libc)
-# undef MEMCPY
-# define MEMCPY __memcpy_thunderx
-# undef MEMMOVE
-# define MEMMOVE __memmove_thunderx
-# define USE_THUNDERX
+# ifndef USE_THUNDERX2
+# undef MEMCPY
+# define MEMCPY __memcpy_thunderx
+# undef MEMMOVE
+# define MEMMOVE __memmove_thunderx
+# define USE_THUNDERX
+# endif
ENTRY_ALIGN (MEMMOVE, 6)
@@ -180,7 +182,7 @@ L(copy96):
.p2align 4
L(copy_long):
-# ifdef USE_THUNDERX
+# if defined(USE_THUNDERX) || defined (USE_THUNDERX2)
/* On thunderx, large memcpy's are helped by software prefetching.
This loop is identical to the one below it but with prefetching
@@ -194,7 +196,11 @@ L(copy_long):
bic dst, dstin, 15
ldp D_l, D_h, [src]
sub src, src, tmp1
+# if defined(USE_THUNDERX)
prfm pldl1strm, [src, 384]
+# elif defined(USE_THUNDERX2)
+ prfm pldl1strm, [src, 256]
+# endif
add count, count, tmp1 /* Count is now 16 too large. */
ldp A_l, A_h, [src, 16]
stp D_l, D_h, [dstin]
@@ -204,9 +210,13 @@ L(copy_long):
subs count, count, 128 + 16 /* Test and readjust count. */
L(prefetch_loop64):
+# if defined(USE_THUNDERX)
tbz src, #6, 1f
prfm pldl1strm, [src, 512]
1:
+# elif defined(USE_THUNDERX2)
+ prfm pldl1strm, [src, 256]
+# endif
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [src, 16]
stp B_l, B_h, [dst, 32]