aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/sparc/sparc64/multiarch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-05-31 14:19:30 -0700
committerDavid S. Miller <davem@davemloft.net>2012-05-31 14:19:30 -0700
commitf230c29b40cc36ce62387664be92c3cf94119efe (patch)
tree26028666b279654830d54846e9194429accfd9f5 /sysdeps/sparc/sparc64/multiarch
parentfed806c3af7b6421e17ac50f8129dbe4fd693e4a (diff)
downloadglibc-f230c29b40cc36ce62387664be92c3cf94119efe.tar
glibc-f230c29b40cc36ce62387664be92c3cf94119efe.tar.gz
glibc-f230c29b40cc36ce62387664be92c3cf94119efe.tar.bz2
glibc-f230c29b40cc36ce62387664be92c3cf94119efe.zip
Avoid performance penalty in sparc optimized memcpy/memset.
fmovd clears the current exception field in the %fsr, fsrc2 does not and therefore runs more efficiently on some cpus. * sysdeps/sparc/sparc64/memcpy.S: Use fsrc2 to move 64-bit values between float registers. * sysdeps/sparc/sparc64/memset.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise.
Diffstat (limited to 'sysdeps/sparc/sparc64/multiarch')
-rw-r--r--sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S72
1 files changed, 36 insertions, 36 deletions
diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
index 0e9442de5f..fb815e5e57 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
@@ -58,49 +58,49 @@
faligndata %x7, %x8, %f14;
#define FREG_MOVE_1(x0) \
- fmovd %x0, %f0;
+ fsrc2 %x0, %f0;
#define FREG_MOVE_2(x0, x1) \
- fmovd %x0, %f0; \
- fmovd %x1, %f2;
+ fsrc2 %x0, %f0; \
+ fsrc2 %x1, %f2;
#define FREG_MOVE_3(x0, x1, x2) \
- fmovd %x0, %f0; \
- fmovd %x1, %f2; \
- fmovd %x2, %f4;
+ fsrc2 %x0, %f0; \
+ fsrc2 %x1, %f2; \
+ fsrc2 %x2, %f4;
#define FREG_MOVE_4(x0, x1, x2, x3) \
- fmovd %x0, %f0; \
- fmovd %x1, %f2; \
- fmovd %x2, %f4; \
- fmovd %x3, %f6;
+ fsrc2 %x0, %f0; \
+ fsrc2 %x1, %f2; \
+ fsrc2 %x2, %f4; \
+ fsrc2 %x3, %f6;
#define FREG_MOVE_5(x0, x1, x2, x3, x4) \
- fmovd %x0, %f0; \
- fmovd %x1, %f2; \
- fmovd %x2, %f4; \
- fmovd %x3, %f6; \
- fmovd %x4, %f8;
+ fsrc2 %x0, %f0; \
+ fsrc2 %x1, %f2; \
+ fsrc2 %x2, %f4; \
+ fsrc2 %x3, %f6; \
+ fsrc2 %x4, %f8;
#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \
- fmovd %x0, %f0; \
- fmovd %x1, %f2; \
- fmovd %x2, %f4; \
- fmovd %x3, %f6; \
- fmovd %x4, %f8; \
- fmovd %x5, %f10;
+ fsrc2 %x0, %f0; \
+ fsrc2 %x1, %f2; \
+ fsrc2 %x2, %f4; \
+ fsrc2 %x3, %f6; \
+ fsrc2 %x4, %f8; \
+ fsrc2 %x5, %f10;
#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \
- fmovd %x0, %f0; \
- fmovd %x1, %f2; \
- fmovd %x2, %f4; \
- fmovd %x3, %f6; \
- fmovd %x4, %f8; \
- fmovd %x5, %f10; \
- fmovd %x6, %f12;
+ fsrc2 %x0, %f0; \
+ fsrc2 %x1, %f2; \
+ fsrc2 %x2, %f4; \
+ fsrc2 %x3, %f6; \
+ fsrc2 %x4, %f8; \
+ fsrc2 %x5, %f10; \
+ fsrc2 %x6, %f12;
#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \
- fmovd %x0, %f0; \
- fmovd %x1, %f2; \
- fmovd %x2, %f4; \
- fmovd %x3, %f6; \
- fmovd %x4, %f8; \
- fmovd %x5, %f10; \
- fmovd %x6, %f12; \
- fmovd %x7, %f14;
+ fsrc2 %x0, %f0; \
+ fsrc2 %x1, %f2; \
+ fsrc2 %x2, %f4; \
+ fsrc2 %x3, %f6; \
+ fsrc2 %x4, %f8; \
+ fsrc2 %x5, %f10; \
+ fsrc2 %x6, %f12; \
+ fsrc2 %x7, %f14;
#define FREG_LOAD_1(base, x0) \
LOAD(ldd, base + 0x00, %x0)
#define FREG_LOAD_2(base, x0, x1) \