diff options
author | David S. Miller <davem@davemloft.net> | 2013-01-11 20:53:10 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-01-11 20:53:10 -0800 |
commit | 4ae4244d4b63a8974af4b6ac28ff32941086781e (patch) | |
tree | bc6e7fb632db08b0f0e3a03e967834cba36bb57f /sysdeps/sparc/sparc32 | |
parent | 8794a96418e19262d805cba44046482f41906356 (diff) | |
download | glibc-4ae4244d4b63a8974af4b6ac28ff32941086781e.tar glibc-4ae4244d4b63a8974af4b6ac28ff32941086781e.tar.gz glibc-4ae4244d4b63a8974af4b6ac28ff32941086781e.tar.bz2 glibc-4ae4244d4b63a8974af4b6ac28ff32941086781e.zip |
Redo sparc 32-bit V9 GMP optimizations with fixed copyrights.
* sysdeps/sparc/sparc32/sparcv9/mul_1.S: Properly optimize for 32-bit
sparc V9 rather than using V8 code.
* sysdeps/sparc/sparc32/sparcv9/addmul_1.S: Likewise.
* sysdeps/sparc/sparc32/sparcv9/submul_1.S: Likewise.
Diffstat (limited to 'sysdeps/sparc/sparc32')
-rw-r--r-- | sysdeps/sparc/sparc32/sparcv9/addmul_1.S | 82 | ||||
-rw-r--r-- | sysdeps/sparc/sparc32/sparcv9/mul_1.S | 71 | ||||
-rw-r--r-- | sysdeps/sparc/sparc32/sparcv9/submul_1.S | 83 |
3 files changed, 233 insertions, 3 deletions
diff --git a/sysdeps/sparc/sparc32/sparcv9/addmul_1.S b/sysdeps/sparc/sparc32/sparcv9/addmul_1.S index 563bfb1c0a..7ba81d8a6c 100644 --- a/sysdeps/sparc/sparc32/sparcv9/addmul_1.S +++ b/sysdeps/sparc/sparc32/sparcv9/addmul_1.S @@ -1 +1,81 @@ -#include <sparcv8/addmul_1.S> +! SPARC v9 32-bit __mpn_addmul_1 -- Multiply a limb vector with a limb +! and add the result to a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz_arg %i2 +#define s2l_arg %i3 +#define sz %o4 +#define carry %o5 +#define s2_limb %g1 +#define tmp1 %l0 +#define tmp2 %l1 +#define tmp3 %l2 +#define tmp4 %l3 +#define tmp64_1 %g3 +#define tmp64_2 %o3 + +ENTRY(__mpn_addmul_1) + save %sp, -96, %sp + srl sz_arg, 0, sz + srl s2l_arg, 0, s2_limb + subcc sz, 1, sz + be,pn %icc, .Lfinal_limb + clr carry + +.Lloop: + lduw [s1_ptr + 0x00], tmp1 + lduw [res_ptr + 0x00], tmp3 + lduw [s1_ptr + 0x04], tmp2 + lduw [res_ptr + 0x04], tmp4 + mulx tmp1, s2_limb, tmp64_1 + add s1_ptr, 8, s1_ptr + mulx tmp2, s2_limb, tmp64_2 + sub sz, 2, sz + add res_ptr, 8, res_ptr + add tmp3, tmp64_1, tmp64_1 + add carry, tmp64_1, tmp64_1 + stw tmp64_1, [res_ptr - 0x08] + srlx tmp64_1, 32, carry + add tmp4, tmp64_2, tmp64_2 + add carry, tmp64_2, tmp64_2 + stw tmp64_2, [res_ptr - 0x04] + brgz sz, .Lloop + srlx tmp64_2, 32, carry + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + lduw [s1_ptr + 0x00], tmp1 + lduw [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp64_1 + add tmp3, tmp64_1, tmp64_1 + add carry, tmp64_1, tmp64_1 + stw tmp64_1, [res_ptr + 0x00] + srlx tmp64_1, 32, carry + +.Lfinish: + jmpl %i7 + 0x8, %g0 + restore carry, 0, %o0 +END(__mpn_addmul_1) diff --git a/sysdeps/sparc/sparc32/sparcv9/mul_1.S b/sysdeps/sparc/sparc32/sparcv9/mul_1.S index 42284eada6..e9a537196e 100644 --- a/sysdeps/sparc/sparc32/sparcv9/mul_1.S +++ b/sysdeps/sparc/sparc32/sparcv9/mul_1.S @@ -1 +1,70 @@ -#include <sparcv8/mul_1.S> +! SPARC v9 32-bit __mpn_mul_1 -- Multiply a limb vector with a single +! limb and store the product in a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %o0 +#define s1_ptr %o1 +#define sz %o2 +#define s2_limb %o3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + +ENTRY(__mpn_mul_1) + srl sz, 0, sz + srl s2_limb, 0, s2_limb + subcc sz, 1, sz + be,pn %icc, .Lfinal_limb + clr carry + +.Lloop: + lduw [s1_ptr + 0x00], tmp1 + lduw [s1_ptr + 0x04], tmp2 + mulx tmp1, s2_limb, tmp3 + add s1_ptr, 8, s1_ptr + mulx tmp2, s2_limb, tmp4 + sub sz, 2, sz + add res_ptr, 8, res_ptr + add carry, tmp3, tmp3 + stw tmp3, [res_ptr - 0x08] + srlx tmp3, 32, carry + add carry, tmp4, tmp4 + stw tmp4, [res_ptr - 0x04] + brgz sz, .Lloop + srlx tmp4, 32, carry + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + lduw [s1_ptr + 0x00], tmp1 + mulx tmp1, s2_limb, tmp3 + add carry, tmp3, tmp3 + stw tmp3, [res_ptr + 0x00] + srlx tmp3, 32, carry + +.Lfinish: + retl + mov carry, %o0 +END(__mpn_mul_1) diff --git a/sysdeps/sparc/sparc32/sparcv9/submul_1.S b/sysdeps/sparc/sparc32/sparcv9/submul_1.S index de69533f63..8985e2a4cb 100644 --- a/sysdeps/sparc/sparc32/sparcv9/submul_1.S +++ b/sysdeps/sparc/sparc32/sparcv9/submul_1.S @@ -1 +1,82 @@ -#include <sparcv8/submul_1.S> +! SPARC v9 32-bit __mpn_submul_1 -- Multiply a limb vector with a limb +! and subtract the result from a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz_arg %i2 +#define s2l_arg %i3 +#define sz %o4 +#define carry %o5 +#define s2_limb %g1 +#define tmp1 %l0 +#define tmp2 %l1 +#define tmp3 %l2 +#define tmp4 %l3 +#define tmp64_1 %g3 +#define tmp64_2 %o3 + +ENTRY(__mpn_submul_1) + save %sp, -96, %sp + srl sz_arg, 0, sz + srl s2l_arg, 0, s2_limb + subcc sz, 1, sz + be,pn %icc, .Lfinal_limb + subcc %g0, 0, carry + +.Lloop: + lduw [s1_ptr + 0x00], tmp1 + lduw [res_ptr + 0x00], tmp3 + lduw [s1_ptr + 0x04], tmp2 + lduw [res_ptr + 0x04], tmp4 + mulx tmp1, s2_limb, tmp64_1 + add s1_ptr, 8, s1_ptr + mulx tmp2, s2_limb, tmp64_2 + sub sz, 2, sz + add res_ptr, 8, res_ptr + addx carry, tmp64_1, tmp64_1 + srlx tmp64_1, 32, carry + subcc tmp3, tmp64_1, tmp64_1 + stw tmp64_1, [res_ptr - 0x08] + addx carry, tmp64_2, tmp64_2 + srlx tmp64_2, 32, carry + subcc tmp4, tmp64_2, tmp64_2 + brgz sz, .Lloop + stw tmp64_2, [res_ptr - 0x04] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + lduw [s1_ptr + 0x00], tmp1 + lduw [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp64_1 + addx carry, tmp64_1, tmp64_1 + srlx tmp64_1, 32, carry + subcc tmp3, tmp64_1, tmp64_1 + stw tmp64_1, [res_ptr + 0x00] + +.Lfinish: + addx carry, 0, carry + jmpl %i7 + 0x8, %g0 + restore carry, 0, %o0 +END(__mpn_submul_1) |