aboutsummaryrefslogtreecommitdiff
path: root/REORG.TODO/sysdeps/sparc/sparc32/add_n.S
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/sysdeps/sparc/sparc32/add_n.S')
-rw-r--r--REORG.TODO/sysdeps/sparc/sparc32/add_n.S237
1 files changed, 237 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/sparc/sparc32/add_n.S b/REORG.TODO/sysdeps/sparc/sparc32/add_n.S
new file mode 100644
index 0000000000..75289af5f9
--- /dev/null
+++ b/REORG.TODO/sysdeps/sparc/sparc32/add_n.S
@@ -0,0 +1,237 @@
+! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+!
+! Copyright (C) 1995-2017 Free Software Foundation, Inc.
+!
+! This file is part of the GNU MP Library.
+!
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+!
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+!
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not,
+! see <http://www.gnu.org/licenses/>.
+
+
+! INPUT PARAMETERS
+#define RES_PTR %o0
+#define S1_PTR %o1
+#define S2_PTR %o2
+#define SIZE %o3
+
+#include <sysdep.h>
+
+ENTRY(__mpn_add_n)
+ xor S2_PTR,RES_PTR,%g1
+ andcc %g1,4,%g0
+ bne LOC(1) ! branch if alignment differs
+ nop
+! ** V1a **
+LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
+ be LOC(v1) ! if no, branch
+ nop
+/* Add least significant limb separately to align RES_PTR and S2_PTR */
+ ld [S1_PTR],%g4
+ add S1_PTR,4,S1_PTR
+ ld [S2_PTR],%g2
+ add S2_PTR,4,S2_PTR
+ add SIZE,-1,SIZE
+ addcc %g4,%g2,%o4
+ st %o4,[RES_PTR]
+ add RES_PTR,4,RES_PTR
+LOC(v1):
+ addx %g0,%g0,%o4 ! save cy in register
+ cmp SIZE,2 ! if SIZE < 2 ...
+ bl LOC(end2) ! ... branch to tail code
+ subcc %g0,%o4,%g0 ! restore cy
+
+ ld [S1_PTR+0],%g4
+ addcc SIZE,-10,SIZE
+ ld [S1_PTR+4],%g1
+ ldd [S2_PTR+0],%g2
+ blt LOC(fin1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+LOC(loop1):
+ addxcc %g4,%g2,%o4
+ ld [S1_PTR+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [S1_PTR+12],%g1
+ ldd [S2_PTR+8],%g2
+ std %o4,[RES_PTR+0]
+ addxcc %g4,%g2,%o4
+ ld [S1_PTR+16],%g4
+ addxcc %g1,%g3,%o5
+ ld [S1_PTR+20],%g1
+ ldd [S2_PTR+16],%g2
+ std %o4,[RES_PTR+8]
+ addxcc %g4,%g2,%o4
+ ld [S1_PTR+24],%g4
+ addxcc %g1,%g3,%o5
+ ld [S1_PTR+28],%g1
+ ldd [S2_PTR+24],%g2
+ std %o4,[RES_PTR+16]
+ addxcc %g4,%g2,%o4
+ ld [S1_PTR+32],%g4
+ addxcc %g1,%g3,%o5
+ ld [S1_PTR+36],%g1
+ ldd [S2_PTR+32],%g2
+ std %o4,[RES_PTR+24]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc SIZE,-8,SIZE
+ add S1_PTR,32,S1_PTR
+ add S2_PTR,32,S2_PTR
+ add RES_PTR,32,RES_PTR
+ bge LOC(loop1)
+ subcc %g0,%o4,%g0 ! restore cy
+
+LOC(fin1):
+ addcc SIZE,8-2,SIZE
+ blt LOC(end1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+LOC(loope1):
+ addxcc %g4,%g2,%o4
+ ld [S1_PTR+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [S1_PTR+12],%g1
+ ldd [S2_PTR+8],%g2
+ std %o4,[RES_PTR+0]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc SIZE,-2,SIZE
+ add S1_PTR,8,S1_PTR
+ add S2_PTR,8,S2_PTR
+ add RES_PTR,8,RES_PTR
+ bge LOC(loope1)
+ subcc %g0,%o4,%g0 ! restore cy
+LOC(end1):
+ addxcc %g4,%g2,%o4
+ addxcc %g1,%g3,%o5
+ std %o4,[RES_PTR+0]
+ addx %g0,%g0,%o4 ! save cy in register
+
+ andcc SIZE,1,%g0
+ be LOC(ret1)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+ ld [S1_PTR+8],%g4
+ ld [S2_PTR+8],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[RES_PTR+8]
+
+LOC(ret1):
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+LOC(1): xor S1_PTR,RES_PTR,%g1
+ andcc %g1,4,%g0
+ bne LOC(2)
+ nop
+! ** V1b **
+ mov S2_PTR,%g1
+ mov S1_PTR,S2_PTR
+ b LOC(0)
+ mov %g1,S1_PTR
+
+! ** V2 **
+/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
+ alignment of S2_PTR and RES_PTR differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of S1_PTR and S2_PTR are the same. */
+
+LOC(2): cmp SIZE,1
+ be LOC(jone)
+ nop
+ andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
+ be LOC(v2) ! if no, branch
+ nop
+/* Add least significant limb separately to align S1_PTR and S2_PTR */
+ ld [S1_PTR],%g4
+ add S1_PTR,4,S1_PTR
+ ld [S2_PTR],%g2
+ add S2_PTR,4,S2_PTR
+ add SIZE,-1,SIZE
+ addcc %g4,%g2,%o4
+ st %o4,[RES_PTR]
+ add RES_PTR,4,RES_PTR
+
+LOC(v2):
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc SIZE,-8,SIZE
+ blt LOC(fin2)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+LOC(loop2):
+ ldd [S1_PTR+0],%g2
+ ldd [S2_PTR+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[RES_PTR+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[RES_PTR+4]
+ ldd [S1_PTR+8],%g2
+ ldd [S2_PTR+8],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[RES_PTR+8]
+ addxcc %g3,%o5,%g3
+ st %g3,[RES_PTR+12]
+ ldd [S1_PTR+16],%g2
+ ldd [S2_PTR+16],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[RES_PTR+16]
+ addxcc %g3,%o5,%g3
+ st %g3,[RES_PTR+20]
+ ldd [S1_PTR+24],%g2
+ ldd [S2_PTR+24],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[RES_PTR+24]
+ addxcc %g3,%o5,%g3
+ st %g3,[RES_PTR+28]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc SIZE,-8,SIZE
+ add S1_PTR,32,S1_PTR
+ add S2_PTR,32,S2_PTR
+ add RES_PTR,32,RES_PTR
+ bge LOC(loop2)
+ subcc %g0,%o4,%g0 ! restore cy
+
+LOC(fin2):
+ addcc SIZE,8-2,SIZE
+ blt LOC(end2)
+ subcc %g0,%o4,%g0 ! restore cy
+LOC(loope2):
+ ldd [S1_PTR+0],%g2
+ ldd [S2_PTR+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[RES_PTR+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[RES_PTR+4]
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc SIZE,-2,SIZE
+ add S1_PTR,8,S1_PTR
+ add S2_PTR,8,S2_PTR
+ add RES_PTR,8,RES_PTR
+ bge LOC(loope2)
+ subcc %g0,%o4,%g0 ! restore cy
+LOC(end2):
+ andcc SIZE,1,%g0
+ be LOC(ret2)
+ subcc %g0,%o4,%g0 ! restore cy
+/* Add last limb */
+LOC(jone):
+ ld [S1_PTR],%g4
+ ld [S2_PTR],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[RES_PTR]
+
+LOC(ret2):
+ retl
+ addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+END(__mpn_add_n)