diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2013-03-29 18:15:28 -0500 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2013-04-01 06:36:51 -0500 |
commit | 60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f (patch) | |
tree | 479eb937a9496619acfb24db9a6a48831f9c3fb3 | |
parent | 6142896d53d0e8e8abe5dd0494380c45acc1a919 (diff) | |
download | glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.tar glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.tar.gz glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.tar.bz2 glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.zip |
PowerPC: remove branch prediction from rint implementation
The branch prediction hints is actually hurts performance in this case.
The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52'
is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a
general floating point function, expected input is not bounded and then
it is better to let the hardware handle the branches.
-rw-r--r-- | benchtests/Makefile | 7 | ||||
-rw-r--r-- | benchtests/rint-inputs | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_rint.S | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/fpu/s_rintf.S | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/s_rint.S | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/s_rintf.S | 6 |
6 files changed, 22 insertions, 13 deletions
diff --git a/benchtests/Makefile b/benchtests/Makefile index 74938b925c..cc54b81faa 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -43,7 +43,7 @@ # See pow-inputs for an example. subdir := benchtests -bench := exp pow +bench := exp pow rint exp-ITER = 100000 exp-ARGLIST = double @@ -55,5 +55,10 @@ pow-ARGLIST = double:double pow-RET = double LDFLAGS-bench-pow = -lm +rint-ITER = 250000000 +rint-ARGLIST = double +rint-RET = double +LDFLAGS-bench-rint = -lm + include ../Makeconfig include ../Rules diff --git a/benchtests/rint-inputs b/benchtests/rint-inputs new file mode 100644 index 0000000000..a5f83dc8f9 --- /dev/null +++ b/benchtests/rint-inputs @@ -0,0 +1,4 @@ +78.5 +-78.5 +4503599627370497.0 +-4503599627370497.0 diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S index f3cd036680..f04055f461 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S @@ -45,14 +45,14 @@ ENTRY (__rint) fsub fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S index 247dd4a14d..e0301af2e7 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S @@ -41,14 +41,14 @@ ENTRY (__rintf) fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S index f3339727f4..57e3759bf0 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S @@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0) fsub fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ fnabs fp1,fp1 /* if (x == 0.0) */ diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S index 26b08721c7..cb28ec748d 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S @@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0) fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnllr- cr7 - bng- cr6,.L4 + bnllr cr7 + bng cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fabs fp1,fp1 /* if (x == 0.0) */ blr /* x = 0.0; */ .L4: - bnllr- cr6 /* if (x < 0.0) */ + bnllr cr6 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ fnabs fp1,fp1 /* if (x == 0.0) */ |