diff options
author | Steve Ellcey <sellcey@caviumnetworks.com> | 2018-02-22 08:38:47 -0800 |
---|---|---|
committer | Steve Ellcey <sellcey@caviumnetworks.com> | 2018-02-22 08:38:47 -0800 |
commit | e9537dddc7c7c7b60b55ed845542c8d586164488 (patch) | |
tree | ae653efab8d31c8d5056f0d29fef30c19c0fd260 /sysdeps/aarch64/multiarch/memcpy_thunderx.S | |
parent | da81ae645d8ee89052f109c814a68a9489f562e6 (diff) | |
download | glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar.gz glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.tar.bz2 glibc-e9537dddc7c7c7b60b55ed845542c8d586164488.zip |
IFUNC for Cavium ThunderX2
* sysdeps/aarch64/multiarch/Makefile (sysdep_routines):
Add memcpy_thunderx2.
* sysdeps/aarch64/multiarch/ifunc-impl-list.c (MAX_IFUNC):
Increment to 4.
(__libc_ifunc_impl_list): Add __memcpy_thunderx2.
* sysdeps/aarch64/multiarch/memcpy.c (libc_ifunc): Add IS_THUNDERX2
and IS_THUNDERX2PA checks.
* sysdeps/aarch64/multiarch/memcpy_thunderx.S (USE_THUNDERX2):
Use macro to set name appropriately.
(memcpy): Use USE_THUNDERX2 macro to modify prefetches.
* sysdeps/aarch64/multiarch/memcpy_thunderx2.S: New file.
* sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_THUNDERX2PA):
New macro.
(IS_THUNDERX2): New macro.
Diffstat (limited to 'sysdeps/aarch64/multiarch/memcpy_thunderx.S')
-rw-r--r-- | sysdeps/aarch64/multiarch/memcpy_thunderx.S | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S index 4f6921d680..de494d933d 100644 --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S @@ -74,11 +74,13 @@ #if IS_IN (libc) -# undef MEMCPY -# define MEMCPY __memcpy_thunderx -# undef MEMMOVE -# define MEMMOVE __memmove_thunderx -# define USE_THUNDERX +# ifndef USE_THUNDERX2 +# undef MEMCPY +# define MEMCPY __memcpy_thunderx +# undef MEMMOVE +# define MEMMOVE __memmove_thunderx +# define USE_THUNDERX +# endif ENTRY_ALIGN (MEMMOVE, 6) @@ -180,7 +182,7 @@ L(copy96): .p2align 4 L(copy_long): -# ifdef USE_THUNDERX +# if defined(USE_THUNDERX) || defined (USE_THUNDERX2) /* On thunderx, large memcpy's are helped by software prefetching. This loop is identical to the one below it but with prefetching @@ -194,7 +196,11 @@ L(copy_long): bic dst, dstin, 15 ldp D_l, D_h, [src] sub src, src, tmp1 +# if defined(USE_THUNDERX) prfm pldl1strm, [src, 384] +# elif defined(USE_THUNDERX2) + prfm pldl1strm, [src, 256] +# endif add count, count, tmp1 /* Count is now 16 too large. */ ldp A_l, A_h, [src, 16] stp D_l, D_h, [dstin] @@ -204,9 +210,13 @@ L(copy_long): subs count, count, 128 + 16 /* Test and readjust count. */ L(prefetch_loop64): +# if defined(USE_THUNDERX) tbz src, #6, 1f prfm pldl1strm, [src, 512] 1: +# elif defined(USE_THUNDERX2) + prfm pldl1strm, [src, 256] +# endif stp A_l, A_h, [dst, 16] ldp A_l, A_h, [src, 16] stp B_l, B_h, [dst, 32] |