diff options
Diffstat (limited to 'sysdeps/alpha/divq.S')
-rw-r--r-- | sysdeps/alpha/divq.S | 274 |
1 files changed, 0 insertions, 274 deletions
diff --git a/sysdeps/alpha/divq.S b/sysdeps/alpha/divq.S deleted file mode 100644 index d2ed2c5af6..0000000000 --- a/sysdeps/alpha/divq.S +++ /dev/null @@ -1,274 +0,0 @@ -/* Copyright (C) 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include "div_libc.h" - - -/* 64-bit signed long divide. These are not normal C functions. Argument - registers are t10 and t11, the result goes in t12. Only t12 and AT may - be clobbered. - - Theory of operation here is that we can use the FPU divider for virtually - all operands that we see: all dividend values between -2**53 and 2**53-1 - can be computed directly. Note that divisor values need not be checked - against that range because the rounded fp value will be close enough such - that the quotient is < 1, which will properly be truncated to zero when we - convert back to integer. - - When the dividend is outside the range for which we can compute exact - results, we use the fp quotent as an estimate from which we begin refining - an exact integral value. This reduces the number of iterations in the - shift-and-subtract loop significantly. - - The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE - for cvttq/c even without /sui being set. It will not, however, properly - raise the exception, so we don't have to worry about FPCR_INED being clear - and so dying by SIGFPE. */ - - .text - .align 4 - .globl __divq - .type __divq, @funcnoplt - .usepv __divq, no - - cfi_startproc - cfi_return_column (RA) -__divq: - lda sp, -FRAME(sp) - cfi_def_cfa_offset (FRAME) - CALL_MCOUNT - - /* Get the fp divide insn issued as quickly as possible. After - that's done, we have at least 22 cycles until its results are - ready -- all the time in the world to figure out how we're - going to use the results. */ - stt $f0, 0(sp) - excb - beq Y, DIVBYZERO - - stt $f1, 8(sp) - stt $f3, 48(sp) - cfi_rel_offset ($f0, 0) - cfi_rel_offset ($f1, 8) - cfi_rel_offset ($f3, 48) - mf_fpcr $f3 - - _ITOFT2 X, $f0, 16, Y, $f1, 24 - cvtqt $f0, $f0 - cvtqt $f1, $f1 - divt/c $f0, $f1, $f0 - - /* Check to see if X fit in the double as an exact value. */ - sll X, (64-53), AT - ldt $f1, 8(sp) - sra AT, (64-53), AT - cmpeq X, AT, AT - beq AT, $x_big - - /* If we get here, we're expecting exact results from the division. - Do nothing else besides convert and clean up. */ - cvttq/c $f0, $f0 - excb - mt_fpcr $f3 - _FTOIT $f0, RV, 16 - - ldt $f0, 0(sp) - ldt $f3, 48(sp) - cfi_restore ($f1) - cfi_remember_state - cfi_restore ($f0) - cfi_restore ($f3) - cfi_def_cfa_offset (0) - lda sp, FRAME(sp) - ret $31, (RA), 1 - - .align 4 - cfi_restore_state -$x_big: - /* If we get here, X is large enough that we don't expect exact - results, and neither X nor Y got mis-translated for the fp - division. Our task is to take the fp result, figure out how - far it's off from the correct result and compute a fixup. */ - stq t0, 16(sp) - stq t1, 24(sp) - stq t2, 32(sp) - stq t5, 40(sp) - cfi_rel_offset (t0, 16) - cfi_rel_offset (t1, 24) - cfi_rel_offset (t2, 32) - cfi_rel_offset (t5, 40) - -#define Q RV /* quotient */ -#define R t0 /* remainder */ -#define SY t1 /* scaled Y */ -#define S t2 /* scalar */ -#define QY t3 /* Q*Y */ - - /* The fixup code below can only handle unsigned values. */ - or X, Y, AT - mov $31, t5 - blt AT, $fix_sign_in -$fix_sign_in_ret1: - cvttq/c $f0, $f0 - - _FTOIT $f0, Q, 8 - .align 3 -$fix_sign_in_ret2: - ldt $f0, 0(sp) - stq t3, 0(sp) - cfi_restore ($f0) - cfi_rel_offset (t3, 0) - - mulq Q, Y, QY - excb - stq t4, 8(sp) - mt_fpcr $f3 - cfi_rel_offset (t4, 8) - - subq QY, X, R - mov Y, SY - mov 1, S - bgt R, $q_high - -$q_high_ret: - subq X, QY, R - mov Y, SY - mov 1, S - bgt R, $q_low - -$q_low_ret: - ldq t0, 16(sp) - ldq t1, 24(sp) - ldq t2, 32(sp) - bne t5, $fix_sign_out - -$fix_sign_out_ret: - ldq t3, 0(sp) - ldq t4, 8(sp) - ldq t5, 40(sp) - ldt $f3, 48(sp) - lda sp, FRAME(sp) - cfi_remember_state - cfi_restore (t0) - cfi_restore (t1) - cfi_restore (t2) - cfi_restore (t3) - cfi_restore (t4) - cfi_restore (t5) - cfi_restore ($f3) - cfi_def_cfa_offset (0) - ret $31, (RA), 1 - - .align 4 - cfi_restore_state - /* The quotient that we computed was too large. We need to reduce - it by S such that Y*S >= R. Obviously the closer we get to the - correct value the better, but overshooting high is ok, as we'll - fix that up later. */ -0: - addq SY, SY, SY - addq S, S, S -$q_high: - cmpult SY, R, AT - bne AT, 0b - - subq Q, S, Q - unop - subq QY, SY, QY - br $q_high_ret - - .align 4 - /* The quotient that we computed was too small. Divide Y by the - current remainder (R) and add that to the existing quotient (Q). - The expectation, of course, is that R is much smaller than X. */ - /* Begin with a shift-up loop. Compute S such that Y*S >= R. We - already have a copy of Y in SY and the value 1 in S. */ -0: - addq SY, SY, SY - addq S, S, S -$q_low: - cmpult SY, R, AT - bne AT, 0b - - /* Shift-down and subtract loop. Each iteration compares our scaled - Y (SY) with the remainder (R); if SY <= R then X is divisible by - Y's scalar (S) so add it to the quotient (Q). */ -2: addq Q, S, t3 - srl S, 1, S - cmpule SY, R, AT - subq R, SY, t4 - - cmovne AT, t3, Q - cmovne AT, t4, R - srl SY, 1, SY - bne S, 2b - - br $q_low_ret - - .align 4 -$fix_sign_in: - /* If we got here, then X|Y is negative. Need to adjust everything - such that we're doing unsigned division in the fixup loop. */ - /* T5 records the changes we had to make: - bit 0: set if result should be negative. - bit 2: set if X was negated. - bit 3: set if Y was negated. - */ - xor X, Y, AT - cmplt AT, 0, t5 - cmplt X, 0, AT - negq X, t0 - - s4addq AT, t5, t5 - cmovne AT, t0, X - cmplt Y, 0, AT - negq Y, t0 - - s8addq AT, t5, t5 - cmovne AT, t0, Y - unop - blbc t5, $fix_sign_in_ret1 - - cvttq/c $f0, $f0 - _FTOIT $f0, Q, 8 - .align 3 - negq Q, Q - br $fix_sign_in_ret2 - - .align 4 -$fix_sign_out: - /* Now we get to undo what we did above. */ - /* ??? Is this really faster than just increasing the size of - the stack frame and storing X and Y in memory? */ - and t5, 8, AT - negq Y, t4 - cmovne AT, t4, Y - - and t5, 4, AT - negq X, t4 - cmovne AT, t4, X - - negq RV, t4 - cmovlbs t5, t4, RV - - br $fix_sign_out_ret - - cfi_endproc - .size __divq, .-__divq - - DO_DIVBYZERO |