aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>2013-03-29 18:15:28 -0500
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>2013-04-01 06:36:51 -0500
commit60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f (patch)
tree479eb937a9496619acfb24db9a6a48831f9c3fb3 /sysdeps
parent6142896d53d0e8e8abe5dd0494380c45acc1a919 (diff)
downloadglibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.tar
glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.tar.gz
glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.tar.bz2
glibc-60c414c346a1d5ef0510ffbdc0ab75f288ee4d3f.zip
PowerPC: remove branch prediction from rint implementation
The branch prediction hints is actually hurts performance in this case. The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52' is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a general floating point function, expected input is not bounded and then it is better to let the hardware handle the branches.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rint.S6
-rw-r--r--sysdeps/powerpc/powerpc32/fpu/s_rintf.S6
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_rint.S6
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/s_rintf.S6
4 files changed, 12 insertions, 12 deletions
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index f3cd036680..f04055f461 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -45,14 +45,14 @@ ENTRY (__rint)
fsub fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
- bnllr- cr7
- bng- cr6,.L4
+ bnllr cr7
+ bng cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fabs fp1,fp1 /* if (x == 0.0) */
blr /* x = 0.0; */
.L4:
- bnllr- cr6 /* if (x < 0.0) */
+ bnllr cr6 /* if (x < 0.0) */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
fnabs fp1,fp1 /* if (x == 0.0) */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index 247dd4a14d..e0301af2e7 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -41,14 +41,14 @@ ENTRY (__rintf)
fsubs fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
- bnllr- cr7
- bng- cr6,.L4
+ bnllr cr7
+ bng cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fabs fp1,fp1 /* if (x == 0.0) */
blr /* x = 0.0; */
.L4:
- bnllr- cr6 /* if (x < 0.0) */
+ bnllr cr6 /* if (x < 0.0) */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
fnabs fp1,fp1 /* if (x == 0.0) */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
index f3339727f4..57e3759bf0 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
@@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
fsub fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
- bnllr- cr7
- bng- cr6,.L4
+ bnllr cr7
+ bng cr6,.L4
fadd fp1,fp1,fp13 /* x+= TWO52; */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fabs fp1,fp1 /* if (x == 0.0) */
blr /* x = 0.0; */
.L4:
- bnllr- cr6 /* if (x < 0.0) */
+ bnllr cr6 /* if (x < 0.0) */
fsub fp1,fp1,fp13 /* x-= TWO52; */
fadd fp1,fp1,fp13 /* x+= TWO52; */
fnabs fp1,fp1 /* if (x == 0.0) */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index 26b08721c7..cb28ec748d 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
fsubs fp12,fp13,fp13 /* generate 0.0 */
fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */
fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
- bnllr- cr7
- bng- cr6,.L4
+ bnllr cr7
+ bng cr6,.L4
fadds fp1,fp1,fp13 /* x+= TWO23; */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fabs fp1,fp1 /* if (x == 0.0) */
blr /* x = 0.0; */
.L4:
- bnllr- cr6 /* if (x < 0.0) */
+ bnllr cr6 /* if (x < 0.0) */
fsubs fp1,fp1,fp13 /* x-= TWO23; */
fadds fp1,fp1,fp13 /* x+= TWO23; */
fnabs fp1,fp1 /* if (x == 0.0) */