aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog20
-rw-r--r--sysdeps/x86_64/add_n.S99
-rw-r--r--sysdeps/x86_64/addmul_1.S115
-rw-r--r--sysdeps/x86_64/lshift.S127
-rw-r--r--sysdeps/x86_64/mul_1.S119
-rw-r--r--sysdeps/x86_64/rshift.S129
-rw-r--r--sysdeps/x86_64/sub_n.S28
-rw-r--r--sysdeps/x86_64/submul_1.S32
8 files changed, 482 insertions, 187 deletions
diff --git a/ChangeLog b/ChangeLog
index fd4b7751ca..fe512db42b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,12 +1,22 @@
+2010-09-02 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/x86_64/add_n.S: Update from GMP 5.0.1.
+ * sysdeps/x86_64/addmul_1.S: Likewise.
+ * sysdeps/x86_64/lshift.S: Likewise.
+ * sysdeps/x86_64/mul_1.S: Likewise.
+ * sysdeps/x86_64/rshift.S: Likewise.
+ * sysdeps/x86_64/sub_n.S: Likewise.
+ * sysdeps/x86_64/submul_1.S: Likewise.
+
2010-09-01 Samuel Thibault <samuel.thibault@ens-lyon.org>
- This aligns bits/sched.h onto sysdeps/unix/sysv/linux/bits/sched.h:
- Define __sched_param instead of SCHED_* and sched_param when
+ This aligns bits/sched.h onto sysdeps/unix/sysv/linux/bits/sched.h:
+ Define __sched_param instead of SCHED_* and sched_param when
<bits/sched.h> is included with __need_schedparam defined.
- * bits/sched.h [__need_schedparam]
+ * bits/sched.h [__need_schedparam]
(SCHED_OTHER, SCHED_FIFO, SCHED_RR, sched_param): Do not define.
- [!__defined_schedparam && (__need_schedparam || _SCHED_H)]
- (__defined_schedparam): Define to 1.
+ [!__defined_schedparam && (__need_schedparam || _SCHED_H)]
+ (__defined_schedparam): Define to 1.
(__sched_param): New structure, identical to sched_param.
(__need_schedparam): Undefine.
diff --git a/sysdeps/x86_64/add_n.S b/sysdeps/x86_64/add_n.S
index 7883f6c840..f0b4c3f78c 100644
--- a/sysdeps/x86_64/add_n.S
+++ b/sysdeps/x86_64/add_n.S
@@ -1,6 +1,6 @@
-/* Add two limb vectors of the same length > 0 and store sum in a third
- limb vector.
- Copyright (C) 2004 Free Software Foundation, Inc.
+/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+ sum in a third limb vector.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -21,22 +21,81 @@
#include "sysdep.h"
#include "asm-syntax.h"
+#define rp %rdi
+#define up %rsi
+#define vp %rdx
+#define n %rcx
+#define cy %r8
+
+#ifndef func
+# define func __mpn_add_n
+# define ADCSBB adc
+#endif
+
.text
-ENTRY (__mpn_add_n)
- leaq (%rsi,%rcx,8), %rsi
- leaq (%rdi,%rcx,8), %rdi
- leaq (%rdx,%rcx,8), %rdx
- negq %rcx
- xorl %eax, %eax # clear cy
- .p2align 2
-L(loop):
- movq (%rsi,%rcx,8), %rax
- movq (%rdx,%rcx,8), %r10
- adcq %r10, %rax
- movq %rax, (%rdi,%rcx,8)
- incq %rcx
- jne L(loop)
- movq %rcx, %rax # zero %rax
- adcq %rax, %rax
+ENTRY (func)
+ xor %r8, %r8
+ mov (up), %r10
+ mov (vp), %r11
+
+ lea -8(up,n,8), up
+ lea -8(vp,n,8), vp
+ lea -16(rp,n,8), rp
+ mov %ecx, %eax
+ neg n
+ and $3, %eax
+ je L(b00)
+ add %rax, n /* clear low rcx bits for jrcxz */
+ cmp $2, %eax
+ jl L(b01)
+ je L(b10)
+
+L(b11): shr %r8 /* set cy */
+ jmp L(e11)
+
+L(b00): shr %r8 /* set cy */
+ mov %r10, %r8
+ mov %r11, %r9
+ lea 4(n), n
+ jmp L(e00)
+
+L(b01): shr %r8 /* set cy */
+ jmp L(e01)
+
+L(b10): shr %r8 /* set cy */
+ mov %r10, %r8
+ mov %r11, %r9
+ jmp L(e10)
+
+L(end): ADCSBB %r11, %r10
+ mov %r10, 8(rp)
+ mov %ecx, %eax /* clear eax, ecx contains 0 */
+ adc %eax, %eax
ret
-END (__mpn_add_n)
+
+ .p2align 4
+L(top):
+ mov -24(up,n,8), %r8
+ mov -24(vp,n,8), %r9
+ ADCSBB %r11, %r10
+ mov %r10, -24(rp,n,8)
+L(e00):
+ mov -16(up,n,8), %r10
+ mov -16(vp,n,8), %r11
+ ADCSBB %r9, %r8
+ mov %r8, -16(rp,n,8)
+L(e11):
+ mov -8(up,n,8), %r8
+ mov -8(vp,n,8), %r9
+ ADCSBB %r11, %r10
+ mov %r10, -8(rp,n,8)
+L(e10):
+ mov (up,n,8), %r10
+ mov (vp,n,8), %r11
+ ADCSBB %r9, %r8
+ mov %r8, (rp,n,8)
+L(e01):
+ jrcxz L(end)
+ lea 4(n), n
+ jmp L(top)
+END (func)
diff --git a/sysdeps/x86_64/addmul_1.S b/sysdeps/x86_64/addmul_1.S
index bdb5226a33..e997896703 100644
--- a/sysdeps/x86_64/addmul_1.S
+++ b/sysdeps/x86_64/addmul_1.S
@@ -1,6 +1,6 @@
-/* AMD64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+/* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
the result to a second limb vector.
- Copyright (C) 2004 Free Software Foundation, Inc.
+ Copyright (C) 2003,2004,2005,2007,2008,2009 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -21,26 +21,95 @@
#include "sysdep.h"
#include "asm-syntax.h"
+#define rp %rdi
+#define up %rsi
+#define n %rdx
+#define v0 %rcx
+
+#ifndef func
+# define func __mpn_addmul_1
+# define ADDSUB add
+#endif
+
.text
-ENTRY (__mpn_addmul_1)
- movq %rdx, %r11
- leaq (%rsi,%rdx,8), %rsi
- leaq (%rdi,%rdx,8), %rdi
- negq %r11
- xorl %r8d, %r8d
- xorl %r10d, %r10d
- .p2align 2
-L(loop):
- movq (%rsi,%r11,8), %rax
- mulq %rcx
- addq (%rdi,%r11,8), %rax
- adcq %r10, %rdx
- addq %r8, %rax
- movq %r10, %r8
- movq %rax, (%rdi,%r11,8)
- adcq %rdx, %r8
- incq %r11
- jne L(loop)
- movq %r8, %rax
+ENTRY (func)
+ push %rbx
+ push %rbp
+ lea (%rdx), %rbx
+ neg %rbx
+
+ mov (up), %rax
+ mov (rp), %r10
+
+ lea -16(rp,%rdx,8), rp
+ lea (up,%rdx,8), up
+ mul %rcx
+
+ bt $0, %ebx
+ jc L(odd)
+
+ lea (%rax), %r11
+ mov 8(up,%rbx,8), %rax
+ lea (%rdx), %rbp
+ mul %rcx
+ add $2, %rbx
+ jns L(n2)
+
+ lea (%rax), %r8
+ mov (up,%rbx,8), %rax
+ lea (%rdx), %r9
+ jmp L(mid)
+
+L(odd): add $1, %rbx
+ jns L(n1)
+
+ lea (%rax), %r8
+ mov (up,%rbx,8), %rax
+ lea (%rdx), %r9
+ mul %rcx
+ lea (%rax), %r11
+ mov 8(up,%rbx,8), %rax
+ lea (%rdx), %rbp
+ jmp L(e)
+
+ .p2align 4
+L(top): mul %rcx
+ ADDSUB %r8, %r10
+ lea (%rax), %r8
+ mov (up,%rbx,8), %rax
+ adc %r9, %r11
+ mov %r10, -8(rp,%rbx,8)
+ mov (rp,%rbx,8), %r10
+ lea (%rdx), %r9
+ adc $0, %rbp
+L(mid): mul %rcx
+ ADDSUB %r11, %r10
+ lea (%rax), %r11
+ mov 8(up,%rbx,8), %rax
+ adc %rbp, %r8
+ mov %r10, (rp,%rbx,8)
+ mov 8(rp,%rbx,8), %r10
+ lea (%rdx), %rbp
+ adc $0, %r9
+L(e): add $2, %rbx
+ js L(top)
+
+ mul %rcx
+ ADDSUB %r8, %r10
+ adc %r9, %r11
+ mov %r10, -8(rp)
+ adc $0, %rbp
+L(n2): mov (rp), %r10
+ ADDSUB %r11, %r10
+ adc %rbp, %rax
+ mov %r10, (rp)
+ adc $0, %rdx
+L(n1): mov 8(rp), %r10
+ ADDSUB %rax, %r10
+ mov %r10, 8(rp)
+ mov %ebx, %eax /* zero rax */
+ adc %rdx, %rax
+ pop %rbp
+ pop %rbx
ret
-END (__mpn_addmul_1)
+END (func)
diff --git a/sysdeps/x86_64/lshift.S b/sysdeps/x86_64/lshift.S
index 5ac66f0a36..f89d3e09b3 100644
--- a/sysdeps/x86_64/lshift.S
+++ b/sysdeps/x86_64/lshift.S
@@ -1,5 +1,5 @@
-/* AMD64 __mpn_lshift --
- Copyright 2004, 2006 Free Software Foundation, Inc.
+/* x86-64 __mpn_lshift --
+ Copyright (C) 2007, 2009 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -20,41 +20,98 @@
#include "sysdep.h"
#include "asm-syntax.h"
+#define rp %rdi
+#define up %rsi
+#define n %rdx
+#define cnt %cl
.text
ENTRY (__mpn_lshift)
- movq -8(%rsi,%rdx,8), %mm7
- movd %ecx, %mm1
- movl $64, %eax
- subl %ecx, %eax
- movd %eax, %mm0
- movq %mm7, %mm3
- psrlq %mm0, %mm7
- movd %mm7, %rax
- subq $2, %rdx
- jl L(endo)
- .p2align 2
-L(loop):
- movq (%rsi,%rdx,8), %mm6
- movq %mm6, %mm2
- psrlq %mm0, %mm6
- psllq %mm1, %mm3
- por %mm6, %mm3
- movq %mm3, 8(%rdi,%rdx,8)
- je L(ende)
- movq -8(%rsi,%rdx,8), %mm7
- movq %mm7, %mm3
- psrlq %mm0, %mm7
- psllq %mm1, %mm2
- por %mm7, %mm2
- movq %mm2, (%rdi,%rdx,8)
- subq $2, %rdx
- jge L(loop)
-L(endo):
- movq %mm3, %mm2
-L(ende):
- psllq %mm1, %mm2
- movq %mm2, (%rdi)
- emms
+ lea -8(rp,n,8), rp
+ lea -8(up,n,8), up
+
+ mov %edx, %eax
+ and $3, %eax
+ jne L(nb00)
+L(b00): /* n = 4, 8, 12, ... */
+ mov (up), %r10
+ mov -8(up), %r11
+ xor %eax, %eax
+ shld %cl, %r10, %rax
+ mov -16(up), %r8
+ lea 24(rp), rp
+ sub $4, n
+ jmp L(00)
+
+L(nb00):/* n = 1, 5, 9, ... */
+ cmp $2, %eax
+ jae L(nb01)
+L(b01): mov (up), %r9
+ xor %eax, %eax
+ shld %cl, %r9, %rax
+ sub $2, n
+ jb L(le1)
+ mov -8(up), %r10
+ mov -16(up), %r11
+ lea -8(up), up
+ lea 16(rp), rp
+ jmp L(01)
+L(le1): shl %cl, %r9
+ mov %r9, (rp)
+ ret
+
+L(nb01):/* n = 2, 6, 10, ... */
+ jne L(b11)
+L(b10): mov (up), %r8
+ mov -8(up), %r9
+ xor %eax, %eax
+ shld %cl, %r8, %rax
+ sub $3, n
+ jb L(le2)
+ mov -16(up), %r10
+ lea -16(up), up
+ lea 8(rp), rp
+ jmp L(10)
+L(le2): shld %cl, %r9, %r8
+ mov %r8, (rp)
+ shl %cl, %r9
+ mov %r9, -8(rp)
+ ret
+
+ .p2align 4 /* performance critical! */
+L(b11): /* n = 3, 7, 11, ... */
+ mov (up), %r11
+ mov -8(up), %r8
+ xor %eax, %eax
+ shld %cl, %r11, %rax
+ mov -16(up), %r9
+ lea -24(up), up
+ sub $4, n
+ jb L(end)
+
+ .p2align 4
+L(top): shld %cl, %r8, %r11
+ mov (up), %r10
+ mov %r11, (rp)
+L(10): shld %cl, %r9, %r8
+ mov -8(up), %r11
+ mov %r8, -8(rp)
+L(01): shld %cl, %r10, %r9
+ mov -16(up), %r8
+ mov %r9, -16(rp)
+L(00): shld %cl, %r11, %r10
+ mov -24(up), %r9
+ mov %r10, -24(rp)
+ add $-32, up
+ lea -32(rp), rp
+ sub $4, n
+ jnc L(top)
+
+L(end): shld %cl, %r8, %r11
+ mov %r11, (rp)
+ shld %cl, %r9, %r8
+ mov %r8, -8(rp)
+ shl %cl, %r9
+ mov %r9, -16(rp)
ret
END (__mpn_lshift)
diff --git a/sysdeps/x86_64/mul_1.S b/sysdeps/x86_64/mul_1.S
index 978916b72c..676afd1755 100644
--- a/sysdeps/x86_64/mul_1.S
+++ b/sysdeps/x86_64/mul_1.S
@@ -1,6 +1,6 @@
/* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store
the result in a second limb vector.
- Copyright (C) 2004 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -21,22 +21,109 @@
#include <sysdep.h>
#include "asm-syntax.h"
+#define rp %rdi
+#define up %rsi
+#define n_param %rdx
+#define vl %rcx
+
+#define n %r11
+
.text
ENTRY (__mpn_mul_1)
- movq %rdx, %r11
- leaq (%rsi,%rdx,8), %rsi
- leaq (%rdi,%rdx,8), %rdi
- negq %r11
- xorl %r8d, %r8d
-L(loop):
- movq (%rsi,%r11,8), %rax
- mulq %rcx
- addq %r8, %rax
- movl $0, %r8d
- adcq %rdx, %r8
- movq %rax, (%rdi,%r11,8)
- incq %r11
- jne L(loop)
- movq %r8, %rax
+ push %rbx
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbx, 0)
+ xor %r10, %r10
+ mov (up), %rax /* read first u limb early */
+ mov n_param, %rbx /* move away n from rdx, mul uses it */
+ mul vl
+ mov %rbx, %r11
+
+ add %r10, %rax
+ adc $0, %rdx
+
+ and $3, %ebx
+ jz L(b0)
+ cmp $2, %ebx
+ jz L(b2)
+ jg L(b3)
+
+L(b1): dec n
+ jne L(gt1)
+ mov %rax, (rp)
+ jmp L(ret)
+L(gt1): lea 8(up,n,8), up
+ lea -8(rp,n,8), rp
+ neg n
+ xor %r10, %r10
+ xor %ebx, %ebx
+ mov %rax, %r9
+ mov (up,n,8), %rax
+ mov %rdx, %r8
+ jmp L(L1)
+
+L(b0): lea (up,n,8), up
+ lea -16(rp,n,8), rp
+ neg n
+ xor %r10, %r10
+ mov %rax, %r8
+ mov %rdx, %rbx
+ jmp L(L0)
+
+L(b3): lea -8(up,n,8), up
+ lea -24(rp,n,8), rp
+ neg n
+ mov %rax, %rbx
+ mov %rdx, %r10
+ jmp L(L3)
+
+L(b2): lea -16(up,n,8), up
+ lea -32(rp,n,8), rp
+ neg n
+ xor %r8, %r8
+ xor %ebx, %ebx
+ mov %rax, %r10
+ mov 24(up,n,8), %rax
+ mov %rdx, %r9
+ jmp L(L2)
+
+ .p2align 4
+L(top): mov %r10, (rp,n,8)
+ add %rax, %r9
+ mov (up,n,8), %rax
+ adc %rdx, %r8
+ mov $0, %r10d
+L(L1): mul vl
+ mov %r9, 8(rp,n,8)
+ add %rax, %r8
+ adc %rdx, %rbx
+L(L0): mov 8(up,n,8), %rax
+ mul vl
+ mov %r8, 16(rp,n,8)
+ add %rax, %rbx
+ adc %rdx, %r10
+L(L3): mov 16(up,n,8), %rax
+ mul vl
+ mov %rbx, 24(rp,n,8)
+ mov $0, %r8d # zero
+ mov %r8, %rbx # zero
+ add %rax, %r10
+ mov 24(up,n,8), %rax
+ mov %r8, %r9 # zero
+ adc %rdx, %r9
+L(L2): mul vl
+ add $4, n
+ js L(top)
+
+ mov %r10, (rp,n,8)
+ add %rax, %r9
+ adc %r8, %rdx
+ mov %r9, 8(rp,n,8)
+ add %r8, %rdx
+L(ret): mov %rdx, %rax
+
+ pop %rbx
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbx)
ret
END (__mpn_mul_1)
diff --git a/sysdeps/x86_64/rshift.S b/sysdeps/x86_64/rshift.S
index ee0c8aa15c..8ff055169a 100644
--- a/sysdeps/x86_64/rshift.S
+++ b/sysdeps/x86_64/rshift.S
@@ -1,5 +1,5 @@
-/* AMD64 __mpn_rshift --
- Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+/* x86-64 __mpn_rshift --
+ Copyright (C) 2007, 2009 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -20,43 +20,96 @@
#include "sysdep.h"
#include "asm-syntax.h"
+#define rp %rdi
+#define up %rsi
+#define n %rdx
+#define cnt %cl
+
.text
ENTRY (__mpn_rshift)
- movq (%rsi), %mm7
- movd %ecx, %mm1
- movl $64, %eax
- subl %ecx, %eax
- movd %eax, %mm0
- movq %mm7, %mm3
- psllq %mm0, %mm7
- movd %mm7, %rax
- leaq (%rsi,%rdx,8), %rsi
- leaq (%rdi,%rdx,8), %rdi
- negq %rdx
- addq $2, %rdx
- jg L(endo)
- .p2align 2
-L(loop):
- movq -8(%rsi,%rdx,8), %mm6
- movq %mm6, %mm2
- psllq %mm0, %mm6
- psrlq %mm1, %mm3
- por %mm6, %mm3
- movq %mm3, -16(%rdi,%rdx,8)
- je L(ende)
- movq (%rsi,%rdx,8), %mm7
- movq %mm7, %mm3
- psllq %mm0, %mm7
- psrlq %mm1, %mm2
- por %mm7, %mm2
- movq %mm2, -8(%rdi,%rdx,8)
- addq $2, %rdx
- jle L(loop)
-L(endo):
- movq %mm3, %mm2
-L(ende):
- psrlq %mm1, %mm2
- movq %mm2, -8(%rdi)
- emms
+ mov %edx, %eax
+ and $3, %eax
+ jne L(nb00)
+L(b00): /* n = 4, 8, 12, ... */
+ mov (up), %r10
+ mov 8(up), %r11
+ xor %eax, %eax
+ shrd %cl, %r10, %rax
+ mov 16(up), %r8
+ lea 8(up), up
+ lea -24(rp), rp
+ sub $4, n
+ jmp L(00)
+
+L(nb00):/* n = 1, 5, 9, ... */
+ cmp $2, %eax
+ jae L(nb01)
+L(b01): mov (up), %r9
+ xor %eax, %eax
+ shrd %cl, %r9, %rax
+ sub $2, n
+ jb L(le1)
+ mov 8(up), %r10
+ mov 16(up), %r11
+ lea 16(up), up
+ lea -16(rp), rp
+ jmp L(01)
+L(le1): shr %cl, %r9
+ mov %r9, (rp)
+ ret
+
+L(nb01):/* n = 2, 6, 10, ... */
+ jne L(b11)
+L(b10): mov (up), %r8
+ mov 8(up), %r9
+ xor %eax, %eax
+ shrd %cl, %r8, %rax
+ sub $3, n
+ jb L(le2)
+ mov 16(up), %r10
+ lea 24(up), up
+ lea -8(rp), rp
+ jmp L(10)
+L(le2): shrd %cl, %r9, %r8
+ mov %r8, (rp)
+ shr %cl, %r9
+ mov %r9, 8(rp)
+ ret
+
+ .p2align 4
+L(b11): /* n = 3, 7, 11, ... */
+ mov (up), %r11
+ mov 8(up), %r8
+ xor %eax, %eax
+ shrd %cl, %r11, %rax
+ mov 16(up), %r9
+ lea 32(up), up
+ sub $4, n
+ jb L(end)
+
+ .p2align 4
+L(top): shrd %cl, %r8, %r11
+ mov -8(up), %r10
+ mov %r11, (rp)
+L(10): shrd %cl, %r9, %r8
+ mov (up), %r11
+ mov %r8, 8(rp)
+L(01): shrd %cl, %r10, %r9
+ mov 8(up), %r8
+ mov %r9, 16(rp)
+L(00): shrd %cl, %r11, %r10
+ mov 16(up), %r9
+ mov %r10, 24(rp)
+ add $32, up
+ lea 32(rp), rp
+ sub $4, n
+ jnc L(top)
+
+L(end): shrd %cl, %r8, %r11
+ mov %r11, (rp)
+ shrd %cl, %r9, %r8
+ mov %r8, 8(rp)
+ shr %cl, %r9
+ mov %r9, 16(rp)
ret
END (__mpn_rshift)
diff --git a/sysdeps/x86_64/sub_n.S b/sysdeps/x86_64/sub_n.S
index 48e1a2e0f4..60c15fc3e1 100644
--- a/sysdeps/x86_64/sub_n.S
+++ b/sysdeps/x86_64/sub_n.S
@@ -1,6 +1,6 @@
-/* AMD64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
+/* x86-64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
- Copyright (C) 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -18,25 +18,7 @@
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
-#include "sysdep.h"
-#include "asm-syntax.h"
+#define func __mpn_sub_n
+#define ADCSBB sbb
- .text
-ENTRY (__mpn_sub_n)
- leaq (%rsi,%rcx,8), %rsi
- leaq (%rdi,%rcx,8), %rdi
- leaq (%rdx,%rcx,8), %rdx
- negq %rcx
- xorl %eax, %eax # clear cy
- .p2align 2
-L(loop):
- movq (%rsi,%rcx,8), %rax
- movq (%rdx,%rcx,8), %r10
- sbbq %r10, %rax
- movq %rax, (%rdi,%rcx,8)
- incq %rcx
- jne L(loop)
- movq %rcx, %rax # zero %rax
- adcq %rax, %rax
- ret
-END (__mpn_sub_n)
+#include "add_n.S"
diff --git a/sysdeps/x86_64/submul_1.S b/sysdeps/x86_64/submul_1.S
index e94c9a7bee..150a92762f 100644
--- a/sysdeps/x86_64/submul_1.S
+++ b/sysdeps/x86_64/submul_1.S
@@ -1,6 +1,6 @@
-/* AMD64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+/* x86-64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
the result from a second limb vector.
- Copyright (C) 2004 Free Software Foundation, Inc.
+ Copyright (C) 2003,2004,2005,2007,2008,2009 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -18,29 +18,7 @@
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
-#include "sysdep.h"
-#include "asm-syntax.h"
+#define func __mpn_submul_1
+#define ADDSUB sub
- .text
-ENTRY (__mpn_submul_1)
- movq %rdx, %r11
- leaq (%rsi,%r11,8), %rsi
- leaq (%rdi,%r11,8), %rdi
- negq %r11
- xorl %r8d, %r8d
- .p2align 3
-L(loop):
- movq (%rsi,%r11,8), %rax
- movq (%rdi,%r11,8), %r10
- mulq %rcx
- subq %r8, %r10
- movl $0, %r8d
- adcl %r8d, %r8d
- subq %rax, %r10
- adcq %rdx, %r8
- movq %r10, (%rdi,%r11,8)
- incq %r11
- jne L(loop)
- movq %r8, %rax
- ret
-END (__mpn_submul_1)
+#include "addmul_1.S"