diff options
author | David S. Miller <davem@davemloft.net> | 2013-01-14 21:47:29 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-01-14 21:47:29 -0800 |
commit | 8b954ab9b808a51d8cf6a90c7d1d46a3366e3274 (patch) | |
tree | cbf6dba7b97106b9ce476078cda8e8f0a1e7bf7c /sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S | |
parent | 65a82e3dd538aabcdbeacf87fcee32f4e355e5b6 (diff) | |
download | glibc-8b954ab9b808a51d8cf6a90c7d1d46a3366e3274.tar glibc-8b954ab9b808a51d8cf6a90c7d1d46a3366e3274.tar.gz glibc-8b954ab9b808a51d8cf6a90c7d1d46a3366e3274.tar.bz2 glibc-8b954ab9b808a51d8cf6a90c7d1d46a3366e3274.zip |
Optimize sparc {ceil,floor}{,f} using vis2 'siam' instruction.
* sysdeps/sparc/sparc-ifunc.h (SPARC_ASM_IFUNC2): New macro.
(SPARC_ASM_VIS2_IFUNC): Likewise.
(SPARC_ASM_VIS3_VIS2_IFUNC): Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S: Make
use of 'siam' instruction.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S:
Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S:
Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S:
Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S: New
file.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S: New
file.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S: New
file.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S: New
file.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S: New file.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S: New file.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S: New file.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S: New file.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S: Hook in
new VIS2 routines.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S:
Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S: Likewise.
* sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add new VIS2
routines to libm-sysdep_routines.
* sysdeps/sparc/sparc64/fpu/multiarch/Makefile: Likewise.
Diffstat (limited to 'sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S')
-rw-r--r-- | sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S | 37 |
1 files changed, 11 insertions, 26 deletions
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S index 8c635907d3..b3ec3484db 100644 --- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S +++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S @@ -19,55 +19,40 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ -#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ ENTRY (__ceilf_vis3) sethi %hi(TWO_TWENTYTHREE), %o2 - sethi %hi(ONE_DOT_ZERO), %o3 fzeros ZERO - fnegs ZERO, SIGN_BIT - movwtos %o2, %f16 fabss %f1, %f14 - fcmps %fcc3, %f14, %f16 - fmovsuge %fcc3, ZERO, %f16 fands %f1, SIGN_BIT, SIGN_BIT - fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 fadds %f1, %f16, %f5 + siam (1 << 2) | 0 fsubs %f5, %f16, %f5 - fcmps %fcc2, %f5, %f1 - movwtos %o3, %f9 - - fmovsuge %fcc2, ZERO, %f9 - fadds %f5, %f9, %f0 - fabss %f0, %f0 + siam (0 << 2) retl - fors %f0, SIGN_BIT, %f0 + fors %f5, SIGN_BIT, %f0 END (__ceilf_vis3) |