aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--sysdeps/alpha/div.S102
-rw-r--r--sysdeps/alpha/divq.S12
-rw-r--r--sysdeps/alpha/ldiv.S228
-rw-r--r--sysdeps/alpha/remq.S12
5 files changed, 223 insertions, 138 deletions
diff --git a/ChangeLog b/ChangeLog
index 15a0012898..38000c9e9d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2004-07-30 Richard Henderson <rth@redhat.com>
+
+ * sysdeps/alpha/divq.S: Save t3 before it gets clobbered.
+ * sysdeps/alpha/remq.S: Likewise.
+ * sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the
+ new division algorithms in divl.S and divq.S respectively.
+
2004-07-28 GOTO Masanori <gotom@debian.or.jp>
* timezone/asia: Update from tzdata2004b.
diff --git a/sysdeps/alpha/div.S b/sysdeps/alpha/div.S
index 2bc3d56a11..e0eb7e9796 100644
--- a/sysdeps/alpha/div.S
+++ b/sysdeps/alpha/div.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Richard Henderson <rth@tamu.edu>.
@@ -17,13 +17,13 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#include <sysdep.h>
+#include "div_libc.h"
-#ifdef __linux__
-# include <asm/gentrap.h>
-# include <asm/pal.h>
+#undef FRAME
+#ifdef __alpha_fix__
+#define FRAME 0
#else
-# include <machine/pal.h>
+#define FRAME 16
#endif
.set noat
@@ -32,78 +32,54 @@
.globl div
.ent div
div:
- .frame sp, 0, ra
+ .frame sp, FRAME, ra
+#if FRAME > 0
+ lda sp, -FRAME(sp)
+#endif
#ifdef PROF
+ .set macro
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
+ .set nomacro
.prologue 1
#else
.prologue 0
#endif
-#define divisor t1
-#define mask t2
-#define quotient t3
-#define modulus t4
-#define tmp1 t5
-#define tmp2 t6
-#define compare t7
-
- /* find correct sign for input to unsigned divide loop. */
- negl a1, modulus # e0 :
- negl a2, divisor # .. e1 :
- sextl a1, a1 # e0 :
- sextl a2, a2 # .. e1 :
- mov zero, quotient # e0 :
- mov 1, mask # .. e1 :
- cmovge a1, a1, modulus # e0 :
- cmovge a2, a2, divisor # .. e1 :
- beq a2, $divbyzero # e1 :
- unop # :
-
- /* shift divisor left, using 3-bit shifts for 32-bit divides as we
- can't overflow. Three-bit shifts will result in looping three
- times less here, but can result in two loops more later. Thus
- using a large shift isn't worth it (and s8addq pairs better than
- a shift). */
-
-1: cmpult divisor, modulus, compare # e0 :
- s8addq divisor, zero, divisor # .. e1 :
- s8addq mask, zero, mask # e0 :
- bne compare, 1b # .. e1 :
-
- /* start to go right again. */
-2: addq quotient, mask, tmp2 # e1 :
- srl mask, 1, mask # .. e0 :
- cmpule divisor, modulus, compare # e0 :
- subq modulus, divisor, tmp1 # .. e1 :
- cmovne compare, tmp2, quotient # e1 :
- srl divisor, 1, divisor # .. e0 :
- cmovne compare, tmp1, modulus # e0 :
- bne mask, 2b # .. e1 :
-
- /* find correct sign for result. */
- xor a1, a2, compare # e0 :
- negl quotient, tmp1 # .. e1 :
- negl modulus, tmp2 # e0 :
- cmovlt compare, tmp1, quotient # .. e1 :
- cmovlt a1, tmp2, modulus # e1 :
-
- /* and store it away in the structure. */
- stl quotient, 0(a0) # .. e0 :
- mov a0, v0 # e1 :
- stl modulus, 4(a0) # .. e0 :
- ret # e1 :
+ beq $18, $divbyzero
+
+ _ITOFT2 $17, $f0, 0, $18, $f1, 8
+
+ cvtqt $f0, $f0
+ cvtqt $f1, $f1
+ divt/c $f0, $f1, $f0
+ cvttq/c $f0, $f0
+
+ _FTOIT $f0, $0, 0
+
+ mull $0, $18, $1
+ subl $17, $1, $1
+
+ stl $0, 0(a0)
+ stl $1, 4(a0)
+ mov a0, v0
+
+#if FRAME > 0
+ lda sp, FRAME(sp)
+#endif
+ ret
$divbyzero:
mov a0, v0
- ldiq a0, GEN_INTDIV
+ lda a0, GEN_INTDIV
call_pal PAL_gentrap
-
- /* if trap returns, return zero. */
stl zero, 0(v0)
stl zero, 4(v0)
+
+#if FRAME > 0
+ lda sp, FRAME(sp)
+#endif
ret
.end div
diff --git a/sysdeps/alpha/divq.S b/sysdeps/alpha/divq.S
index 4df79829f9..cab6c34ad1 100644
--- a/sysdeps/alpha/divq.S
+++ b/sysdeps/alpha/divq.S
@@ -115,16 +115,16 @@ $fix_sign_in_ret1:
_FTOIT $f0, Q, 8
.align 3
$fix_sign_in_ret2:
+ ldt $f0, 0(sp)
+ stq t3, 0(sp)
+ cfi_restore ($f0)
+ cfi_rel_offset (t3, 0)
+
mulq Q, Y, QY
+ unop
stq t4, 8(sp)
-
- ldt $f0, 0(sp)
unop
cfi_rel_offset (t4, 8)
- cfi_restore ($f0)
- stq t3, 0(sp)
- unop
- cfi_rel_offset (t3, 0)
subq QY, X, R
mov Y, SY
diff --git a/sysdeps/alpha/ldiv.S b/sysdeps/alpha/ldiv.S
index 81b48cd559..c90edfb784 100644
--- a/sysdeps/alpha/ldiv.S
+++ b/sysdeps/alpha/ldiv.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 2001 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Richard Henderson <rth@tamu.edu>.
@@ -17,93 +17,195 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#include <sysdep.h>
+#include "div_libc.h"
-#ifdef __linux__
-# include <asm/gentrap.h>
-# include <asm/pal.h>
+#undef FRAME
+#ifdef __alpha_fix__
+#define FRAME 0
#else
-# include <machine/pal.h>
+#define FRAME 16
#endif
+#undef X
+#undef Y
+#define X $17
+#define Y $18
+
.set noat
.align 4
.globl ldiv
.ent ldiv
ldiv:
- .frame sp, 0, ra
+ .frame sp, FRAME, ra
+#if FRAME > 0
+ lda sp, -FRAME(sp)
+#endif
#ifdef PROF
+ .set macro
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
+ .set nomacro
.prologue 1
#else
.prologue 0
#endif
-#define divisor t1
-#define mask t2
-#define quotient t3
-#define modulus t4
-#define tmp1 t5
-#define tmp2 t6
-#define compare t7
-
- /* find correct sign for input to unsigned divide loop. */
- mov a1, modulus # e0 :
- mov a2, divisor # .. e1 :
- negq a1, tmp1 # e0 :
- negq a2, tmp2 # .. e1 :
- mov zero, quotient # e0 :
- mov 1, mask # .. e1 :
- cmovlt a1, tmp1, modulus # e0 :
- cmovlt a2, tmp2, divisor # .. e1 :
- beq a2, $divbyzero # e1 :
- unop # :
-
- /* shift divisor left. */
-1: cmpult divisor, modulus, compare # e0 :
- blt divisor, 2f # .. e1 :
- addq divisor, divisor, divisor # e0 :
- addq mask, mask, mask # .. e1 :
- bne compare, 1b # e1 :
- unop # :
-
- /* start to go right again. */
-2: addq quotient, mask, tmp2 # e1 :
- srl mask, 1, mask # .. e0 :
- cmpule divisor, modulus, compare # e0 :
- subq modulus, divisor, tmp1 # .. e1 :
- cmovne compare, tmp2, quotient # e1 :
- srl divisor, 1, divisor # .. e0 :
- cmovne compare, tmp1, modulus # e0 :
- bne mask, 2b # .. e1 :
-
- /* find correct sign for result. */
- xor a1, a2, compare # e0 :
- negq quotient, tmp1 # .. e1 :
- negq modulus, tmp2 # e0 :
- cmovlt compare, tmp1, quotient # .. e1 :
- cmovlt a1, tmp2, modulus # e1 :
-
- /* and store it away in the structure. */
-9: stq quotient, 0(a0) # .. e0 :
- mov a0, v0 # e1 :
- stq modulus, 8(a0) # .. e0 :
- ret # e1 :
+ beq Y, $divbyzero
+
+ _ITOFT2 X, $f0, 0, Y, $f1, 8
+
+ .align 4
+ cvtqt $f0, $f0
+ cvtqt $f1, $f1
+ divt/c $f0, $f1, $f0
+ unop
+
+ /* Check to see if X fit in the double as an exact value. */
+ sll X, (64-53), AT
+ sra AT, (64-53), AT
+ cmpeq X, AT, AT
+ beq AT, $x_big
+
+ /* If we get here, we're expecting exact results from the division.
+ Do nothing else besides convert and clean up. */
+ cvttq/c $f0, $f0
+ _FTOIT $f0, $0, 0
+
+$egress:
+ mulq $0, Y, $1
+ subq X, $1, $1
+
+ stq $0, 0($16)
+ stq $1, 8($16)
+ mov $16, $0
+
+#if FRAME > 0
+ lda sp, FRAME(sp)
+#endif
+ ret
+
+ .align 4
+$x_big:
+ /* If we get here, X is large enough that we don't expect exact
+ results, and neither X nor Y got mis-translated for the fp
+ division. Our task is to take the fp result, figure out how
+ far it's off from the correct result and compute a fixup. */
+
+#define Q v0 /* quotient */
+#define R t0 /* remainder */
+#define SY t1 /* scaled Y */
+#define S t2 /* scalar */
+#define QY t3 /* Q*Y */
+
+ /* The fixup code below can only handle unsigned values. */
+ or X, Y, AT
+ mov $31, t5
+ blt AT, $fix_sign_in
+$fix_sign_in_ret1:
+ cvttq/c $f0, $f0
+
+ _FTOIT $f0, Q, 8
+ .align 3
+$fix_sign_in_ret2:
+ mulq Q, Y, QY
+
+ .align 4
+ subq QY, X, R
+ mov Y, SY
+ mov 1, S
+ bgt R, $q_high
+
+$q_high_ret:
+ subq X, QY, R
+ mov Y, SY
+ mov 1, S
+ bgt R, $q_low
+
+$q_low_ret:
+ negq Q, t4
+ cmovlbs t5, t4, Q
+ br $egress
+
+ .align 4
+ /* The quotient that we computed was too large. We need to reduce
+ it by S such that Y*S >= R. Obviously the closer we get to the
+ correct value the better, but overshooting high is ok, as we'll
+ fix that up later. */
+0:
+ addq SY, SY, SY
+ addq S, S, S
+$q_high:
+ cmpult SY, R, AT
+ bne AT, 0b
+
+ subq Q, S, Q
+ unop
+ subq QY, SY, QY
+ br $q_high_ret
+
+ .align 4
+ /* The quotient that we computed was too small. Divide Y by the
+ current remainder (R) and add that to the existing quotient (Q).
+ The expectation, of course, is that R is much smaller than X. */
+ /* Begin with a shift-up loop. Compute S such that Y*S >= R. We
+ already have a copy of Y in SY and the value 1 in S. */
+0:
+ addq SY, SY, SY
+ addq S, S, S
+$q_low:
+ cmpult SY, R, AT
+ bne AT, 0b
+
+ /* Shift-down and subtract loop. Each iteration compares our scaled
+ Y (SY) with the remainder (R); if SY <= R then X is divisible by
+ Y's scalar (S) so add it to the quotient (Q). */
+2: addq Q, S, t3
+ srl S, 1, S
+ cmpule SY, R, AT
+ subq R, SY, t4
+
+ cmovne AT, t3, Q
+ cmovne AT, t4, R
+ srl SY, 1, SY
+ bne S, 2b
+
+ br $q_low_ret
+
+ .align 4
+$fix_sign_in:
+ /* If we got here, then X|Y is negative. Need to adjust everything
+ such that we're doing unsigned division in the fixup loop. */
+ /* T5 is true if result should be negative. */
+ xor X, Y, AT
+ cmplt AT, 0, t5
+ cmplt X, 0, AT
+ negq X, t0
+
+ cmovne AT, t0, X
+ cmplt Y, 0, AT
+ negq Y, t0
+
+ cmovne AT, t0, Y
+ blbc t5, $fix_sign_in_ret1
+
+ cvttq/c $f0, $f0
+ _FTOIT $f0, Q, 8
+ .align 3
+ negq Q, Q
+ br $fix_sign_in_ret2
$divbyzero:
mov a0, v0
lda a0, GEN_INTDIV
call_pal PAL_gentrap
-
- /* if trap returns, return zero. */
stq zero, 0(v0)
stq zero, 8(v0)
- ret
- .end ldiv
+#if FRAME > 0
+ lda sp, FRAME(sp)
+#endif
+ ret
-weak_alias(ldiv, lldiv)
-weak_alias(ldiv, imaxdiv)
+ .end ldiv
diff --git a/sysdeps/alpha/remq.S b/sysdeps/alpha/remq.S
index a8795c8d24..40c68d7f10 100644
--- a/sysdeps/alpha/remq.S
+++ b/sysdeps/alpha/remq.S
@@ -116,16 +116,16 @@ $fix_sign_in_ret1:
_FTOIT $f0, Q, 8
.align 3
$fix_sign_in_ret2:
+ ldt $f0, 0(sp)
+ stq t3, 0(sp)
+ cfi_restore ($f0)
+ cfi_rel_offset (t3, 0)
+
mulq Q, Y, QY
+ unop
stq t4, 8(sp)
-
- ldt $f0, 0(sp)
unop
cfi_rel_offset (t4, 8)
- cfi_restore ($f0)
- stq t3, 0(sp)
- unop
- cfi_rel_offset (t3, 0)
subq QY, X, R
mov Y, SY