aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/i386/i586
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i586')
-rw-r--r--sysdeps/i386/i586/add_n.S133
-rw-r--r--sysdeps/i386/i586/addmul_1.S34
-rw-r--r--sysdeps/i386/i586/mul_1.S20
-rw-r--r--sysdeps/i386/i586/sub_n.S133
-rw-r--r--sysdeps/i386/i586/submul_1.S16
5 files changed, 160 insertions, 176 deletions
diff --git a/sysdeps/i386/i586/add_n.S b/sysdeps/i386/i586/add_n.S
index f52f9c60bc..f214c8cb36 100644
--- a/sysdeps/i386/i586/add_n.S
+++ b/sysdeps/i386/i586/add_n.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
-Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -30,13 +30,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "sysdep.h"
#include "asm-syntax.h"
-#define t1 %eax
-#define t2 %edx
-#define src1 %esi
-#define src2 %ebp
-#define dst %edi
-#define x %ebx
-
.text
ALIGN (3)
.globl C_SYMBOL_NAME(__mpn_add_n)
@@ -46,85 +39,85 @@ C_SYMBOL_NAME(__mpn_add_n:)
pushl %ebx
pushl %ebp
- movl 20(%esp),dst /* res_ptr */
- movl 24(%esp),src1 /* s1_ptr */
- movl 28(%esp),src2 /* s2_ptr */
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* s2_ptr */
movl 32(%esp),%ecx /* size */
- movl (src2),x
+ movl (%ebp),%ebx
decl %ecx
- movl %ecx,t2
+ movl %ecx,%edx
shrl $3,%ecx
- andl $7,t2
+ andl $7,%edx
testl %ecx,%ecx /* zero carry flag */
jz Lend
- pushl t2
+ pushl %edx
ALIGN (3)
-Loop: movl 28(dst),%eax /* fetch destination cache line */
- leal 32(dst),dst
-
-L1: movl (src1),t1
- movl 4(src1),t2
- adcl x,t1
- movl 4(src2),x
- adcl x,t2
- movl 8(src2),x
- movl t1,-32(dst)
- movl t2,-28(dst)
-
-L2: movl 8(src1),t1
- movl 12(src1),t2
- adcl x,t1
- movl 12(src2),x
- adcl x,t2
- movl 16(src2),x
- movl t1,-24(dst)
- movl t2,-20(dst)
-
-L3: movl 16(src1),t1
- movl 20(src1),t2
- adcl x,t1
- movl 20(src2),x
- adcl x,t2
- movl 24(src2),x
- movl t1,-16(dst)
- movl t2,-12(dst)
-
-L4: movl 24(src1),t1
- movl 28(src1),t2
- adcl x,t1
- movl 28(src2),x
- adcl x,t2
- movl 32(src2),x
- movl t1,-8(dst)
- movl t2,-4(dst)
-
- leal 32(src1),src1
- leal 32(src2),src2
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L1: movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %ebx,%eax
+ movl 4(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L2: movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ adcl %ebx,%eax
+ movl 12(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L3: movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ adcl %ebx,%eax
+ movl 20(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L4: movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %ebx,%eax
+ movl 28(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
decl %ecx
jnz Loop
- popl t2
+ popl %edx
Lend:
- decl t2 /* test t2 w/o clobbering carry */
+ decl %edx /* test %edx w/o clobbering carry */
js Lend2
- incl t2
+ incl %edx
Loop2:
- leal 4(dst),dst
- movl (src1),t1
- adcl x,t1
- movl 4(src2),x
- movl t1,-4(dst)
- leal 4(src1),src1
- leal 4(src2),src2
- decl t2
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ adcl %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
jnz Loop2
Lend2:
- movl (src1),t1
- adcl x,t1
- movl t1,(dst)
+ movl (%esi),%eax
+ adcl %ebx,%eax
+ movl %eax,(%edi)
sbbl %eax,%eax
negl %eax
diff --git a/sysdeps/i386/i586/addmul_1.S b/sysdeps/i386/i586/addmul_1.S
index b222840591..5bf2603cab 100644
--- a/sysdeps/i386/i586/addmul_1.S
+++ b/sysdeps/i386/i586/addmul_1.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
the result to a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -32,12 +32,12 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define res_ptr edi
#define s1_ptr esi
+#define size ecx
#define s2_limb ebp
TEXT
ALIGN (3)
GLOBL C_SYMBOL_NAME(__mpn_addmul_1)
- .type C_SYMBOL_NAME(__mpn_addmul_1),@function
C_SYMBOL_NAME(__mpn_addmul_1:)
INSN1(push,l ,R(edi))
@@ -47,38 +47,36 @@ C_SYMBOL_NAME(__mpn_addmul_1:)
INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
- INSN2(mov,l ,R(ecx),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
- INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,ecx,4))
- INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,ecx,4))
- INSN1(neg,l ,R(ecx))
- INSN2(xor,l ,R(edx),R(edx))
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
ALIGN (3)
-Loop:
- INSN2(mov,l ,R(ebx),R(edx))
- INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,ecx,4))
+
+Loop: INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
INSN1(mul,l ,R(s2_limb))
INSN2(add,l ,R(eax),R(ebx))
- INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,ecx,4))
+ INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
INSN2(adc,l ,R(edx),$0)
INSN2(add,l ,R(ebx),R(eax))
- INSN2(adc,l ,R(edx),$0)
- INSN2(mov,l ,MEM_INDEX(res_ptr,ecx,4),R(ebx))
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
+ INSN1(inc,l ,R(size))
- INSN1(inc,l ,R(ecx))
+ INSN2(mov,l ,R(ebx),R(edx))
INSN1(jnz, ,Loop)
-
- INSN2(mov,l ,R(eax),R(edx))
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
INSN1(pop,l ,R(ebp))
INSN1(pop,l ,R(ebx))
INSN1(pop,l ,R(esi))
INSN1(pop,l ,R(edi))
ret
-Lfe1:
- .size C_SYMBOL_NAME(__mpn_addmul_1),Lfe1-C_SYMBOL_NAME(__mpn_addmul_1)
diff --git a/sysdeps/i386/i586/mul_1.S b/sysdeps/i386/i586/mul_1.S
index 2b7258e130..048c0601f2 100644
--- a/sysdeps/i386/i586/mul_1.S
+++ b/sysdeps/i386/i586/mul_1.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
the result in a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -53,24 +53,24 @@ C_SYMBOL_NAME(__mpn_mul_1:)
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
INSN1(neg,l ,R(size))
- INSN2(xor,l ,R(edx),R(edx))
+ INSN2(xor,l ,R(ebx),R(ebx))
ALIGN (3)
-Loop:
- INSN2(mov,l ,R(ebx),R(edx))
+
+Loop: INSN2(adc,l ,R(ebx),$0)
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
INSN1(mul,l ,R(s2_limb))
- INSN2(add,l ,R(eax),R(ebx))
-
- INSN2(adc,l ,R(edx),$0)
- INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(eax))
+ INSN2(add,l ,R(ebx),R(eax))
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
INSN1(inc,l ,R(size))
- INSN1(jnz, ,Loop)
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
- INSN2(mov,l ,R(eax),R(edx))
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
INSN1(pop,l ,R(ebp))
INSN1(pop,l ,R(ebx))
INSN1(pop,l ,R(esi))
diff --git a/sysdeps/i386/i586/sub_n.S b/sysdeps/i386/i586/sub_n.S
index 9c964a82f3..cd158a5469 100644
--- a/sysdeps/i386/i586/sub_n.S
+++ b/sysdeps/i386/i586/sub_n.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
and store difference in a third limb vector.
-Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -30,13 +30,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "sysdep.h"
#include "asm-syntax.h"
-#define t1 %eax
-#define t2 %edx
-#define src1 %esi
-#define src2 %ebp
-#define dst %edi
-#define x %ebx
-
.text
ALIGN (3)
.globl C_SYMBOL_NAME(__mpn_sub_n)
@@ -46,85 +39,85 @@ C_SYMBOL_NAME(__mpn_sub_n:)
pushl %ebx
pushl %ebp
- movl 20(%esp),dst /* res_ptr */
- movl 24(%esp),src1 /* s1_ptr */
- movl 28(%esp),src2 /* s2_ptr */
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* s2_ptr */
movl 32(%esp),%ecx /* size */
- movl (src2),x
+ movl (%ebp),%ebx
decl %ecx
- movl %ecx,t2
+ movl %ecx,%edx
shrl $3,%ecx
- andl $7,t2
+ andl $7,%edx
testl %ecx,%ecx /* zero carry flag */
jz Lend
- pushl t2
+ pushl %edx
ALIGN (3)
-Loop: movl 28(dst),%eax /* fetch destination cache line */
- leal 32(dst),dst
-
-L1: movl (src1),t1
- movl 4(src1),t2
- sbbl x,t1
- movl 4(src2),x
- sbbl x,t2
- movl 8(src2),x
- movl t1,-32(dst)
- movl t2,-28(dst)
-
-L2: movl 8(src1),t1
- movl 12(src1),t2
- sbbl x,t1
- movl 12(src2),x
- sbbl x,t2
- movl 16(src2),x
- movl t1,-24(dst)
- movl t2,-20(dst)
-
-L3: movl 16(src1),t1
- movl 20(src1),t2
- sbbl x,t1
- movl 20(src2),x
- sbbl x,t2
- movl 24(src2),x
- movl t1,-16(dst)
- movl t2,-12(dst)
-
-L4: movl 24(src1),t1
- movl 28(src1),t2
- sbbl x,t1
- movl 28(src2),x
- sbbl x,t2
- movl 32(src2),x
- movl t1,-8(dst)
- movl t2,-4(dst)
-
- leal 32(src1),src1
- leal 32(src2),src2
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L1: movl (%esi),%eax
+ movl 4(%esi),%edx
+ sbbl %ebx,%eax
+ movl 4(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L2: movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ sbbl %ebx,%eax
+ movl 12(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L3: movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ sbbl %ebx,%eax
+ movl 20(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L4: movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ sbbl %ebx,%eax
+ movl 28(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
decl %ecx
jnz Loop
- popl t2
+ popl %edx
Lend:
- decl t2 /* test t2 w/o clobbering carry */
+ decl %edx /* test %edx w/o clobbering carry */
js Lend2
- incl t2
+ incl %edx
Loop2:
- leal 4(dst),dst
- movl (src1),t1
- sbbl x,t1
- movl 4(src2),x
- movl t1,-4(dst)
- leal 4(src1),src1
- leal 4(src2),src2
- decl t2
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ sbbl %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
jnz Loop2
Lend2:
- movl (src1),t1
- sbbl x,t1
- movl t1,(dst)
+ movl (%esi),%eax
+ sbbl %ebx,%eax
+ movl %eax,(%edi)
sbbl %eax,%eax
negl %eax
diff --git a/sysdeps/i386/i586/submul_1.S b/sysdeps/i386/i586/submul_1.S
index 14bfe54e24..440f64f358 100644
--- a/sysdeps/i386/i586/submul_1.S
+++ b/sysdeps/i386/i586/submul_1.S
@@ -1,7 +1,7 @@
/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
the result from a second limb vector.
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -53,10 +53,10 @@ C_SYMBOL_NAME(__mpn_submul_1:)
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
INSN1(neg,l ,R(size))
- INSN2(xor,l ,R(edx),R(edx))
+ INSN2(xor,l ,R(ebx),R(ebx))
ALIGN (3)
-Loop:
- INSN2(mov,l ,R(ebx),R(edx))
+
+Loop: INSN2(adc,l ,R(ebx),$0)
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
INSN1(mul,l ,R(s2_limb))
@@ -67,14 +67,14 @@ Loop:
INSN2(adc,l ,R(edx),$0)
INSN2(sub,l ,R(ebx),R(eax))
- INSN2(adc,l ,R(edx),$0)
INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
-
INSN1(inc,l ,R(size))
- INSN1(jnz, ,Loop)
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
- INSN2(mov,l ,R(eax),R(edx))
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
INSN1(pop,l ,R(ebp))
INSN1(pop,l ,R(ebx))
INSN1(pop,l ,R(esi))