From 800d775e426b9c0af63f711b79b09bf540c97456 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 29 Aug 1997 21:01:47 +0000 Subject: Update. 1997-08-29 21:45 Ulrich Drepper * sunrpc/auth_des.c: New file. Copied from former secure_rpc add-on. * sunrpc/authdes_prot.c: New file. Likewise. * sunrpc/des.h: New file. Likewise. * sunrpc/des_crypt.c: New file. Likewise. * sunrpc/des_soft.c: New file. Likewise. * sunrpc/key_call.c: New file. Likewise. * sunrpc/key_prot.c: New file. Likewise. * sunrpc/netname.c: New file. Likewise. * sunrpc/openchild.c: New file. Likewise. * sunrpc/rtime.c: New file. Likewise. * sunrpc/svc_auth.c: New file. Likewise. * sunrpc/svcauth_des.c: New file. Likewise. * sunrpc/xcrypt.c: New file. Likewise. * sunrpc/rpc/auth.h: New file. Likewise. * sunrpc/rpc/auth_des.h: New file. Likewise. * sunrpc/rpc/des_crypt.h: New file. Likewise. * sunrpc/rpc/key_prot.h: New file. Likewise. * sunrpc/rpcsvc/key_prot.x: New file. Likewise. * sysdeps/generic/svc_auth.h: Removed. * sysdeps/generic/rpc/auth.h: Removed. * sysdeps/generic/rpc/auth_des.h: Removed. * sysdeps/stub/des_impl.c: New file. Stub version for DES. * sunrpc/Makefile (rpcsvc): Add keyprot.x. (headers): Add des_crypt.h and key_prot.h. (routines): Add auth_des, authdes_prot, des_crypt, des_impl, des_soft, key_call, key_prot, netname, openchild, rtime, svcauth_des, xcrypt. (distribute): Add des.h. * db2/Makefile: Add all headers and other files to distribute. (others): Add db_printlog. * sysdeps/mach/hurd/Dist: Add net/* headers. * sysdeps/mach/hurd/mips/Dist: New file. * sysdeps/powerpc/Dist: Add fe_nomask.c and t_sqrt.c. * sysdeps/sparc/Dist: Add sys/trap.h. * sysdeps/sparc/sparc32/Dist: Remove sys/trap.h. * sysdeps/sparc/sparc32/sparcv8/Dist: New file. * sysdeps/unix/sysv/linux/mips/Dist: Add sgidefs.h. * sysdeps/unix/sysv/linux/sparc/Dist: Add sys/trap.h. * sysdeps/unix/sysv/linux/sparc/sparc32/Dist: Remove sys/trap.h. Add previously missing assembler files for PPC. * sysdeps/powerpc/add_n.s: New file. * sysdeps/powerpc/addmul_1.s: New file. * sysdeps/powerpc/lshift.s: New file. * sysdeps/powerpc/memset.s: New file. * sysdeps/powerpc/mul_1.s: New file. * sysdeps/powerpc/rshift.s: New file. * sysdeps/powerpc/strchr.s: New file. * sysdeps/powerpc/strcmp.s: New file. * sysdeps/powerpc/sub_n.s: New file. * sysdeps/powerpc/submul_1.s: New file. 1997-08-28 18:42 Thorsten Kukuk * nis/nis_server.c: Rewritten to fix a lot of bugs. 1997-08-28 Andreas Schwab * md5-crypt/Makefile (LDFLAGS-md5crypt.so, libmd5crypt-map): New variables. in NLSPATH environment variable. Patch by HJ Lu . --- sysdeps/powerpc/Dist | 2 + sysdeps/powerpc/add_n.s | 68 +++++++ sysdeps/powerpc/addmul_1.s | 50 +++++ sysdeps/powerpc/lshift.s | 479 +++++++++++++++++++++++++++++++++++++++++++++ sysdeps/powerpc/memset.s | 202 +++++++++++++++++++ sysdeps/powerpc/mul_1.s | 47 +++++ sysdeps/powerpc/rshift.s | 59 ++++++ sysdeps/powerpc/strchr.s | 118 +++++++++++ sysdeps/powerpc/strcmp.s | 273 ++++++++++++++++++++++++++ sysdeps/powerpc/sub_n.s | 69 +++++++ sysdeps/powerpc/submul_1.s | 52 +++++ 11 files changed, 1419 insertions(+) create mode 100644 sysdeps/powerpc/add_n.s create mode 100644 sysdeps/powerpc/addmul_1.s create mode 100644 sysdeps/powerpc/lshift.s create mode 100644 sysdeps/powerpc/memset.s create mode 100644 sysdeps/powerpc/mul_1.s create mode 100644 sysdeps/powerpc/rshift.s create mode 100644 sysdeps/powerpc/strchr.s create mode 100644 sysdeps/powerpc/strcmp.s create mode 100644 sysdeps/powerpc/sub_n.s create mode 100644 sysdeps/powerpc/submul_1.s (limited to 'sysdeps/powerpc') diff --git a/sysdeps/powerpc/Dist b/sysdeps/powerpc/Dist index 282cf1394e..a3de7b3c96 100644 --- a/sysdeps/powerpc/Dist +++ b/sysdeps/powerpc/Dist @@ -1,3 +1,5 @@ fenv_const.c fenv_libc.h quad_float.h +fe_nomask.c +t_sqrt.c diff --git a/sysdeps/powerpc/add_n.s b/sysdeps/powerpc/add_n.s new file mode 100644 index 0000000000..609f0a502a --- /dev/null +++ b/sysdeps/powerpc/add_n.s @@ -0,0 +1,68 @@ + # Add two limb vectors of equal, non-zero length for PowerPC. + # Copyright (C) 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + # mp_size_t size) + # Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1. + + # Note on optimisation: This code is optimal for the 601. Almost every other + # possible 2-unrolled inner loop will not be. Also, watch out for the + # alignment... + + .align 3 + .globl __mpn_add_n + .type __mpn_add_n,@function +__mpn_add_n: + # Set up for loop below. + mtcrf 0x01,%r6 + srwi. %r7,%r6,1 + li %r10,0 + mtctr %r7 + bt 31,2f + + # Clear the carry. + addic %r0,%r0,0 + # Adjust pointers for loop. + addi %r3,%r3,-4 + addi %r4,%r4,-4 + addi %r5,%r5,-4 + b 0f + +2: lwz %r7,0(%r5) + lwz %r6,0(%r4) + addc %r6,%r6,%r7 + stw %r6,0(%r3) + beq 1f + + # The loop. + + # Align start of loop to an odd word boundary to guarantee that the + # last two words can be fetched in one access (for 601). +0: lwz %r9,4(%r4) + lwz %r8,4(%r5) + lwzu %r6,8(%r4) + lwzu %r7,8(%r5) + adde %r8,%r9,%r8 + stw %r8,4(%r3) + adde %r6,%r6,%r7 + stwu %r6,8(%r3) + bdnz 0b + # return the carry +1: addze %r3,%r10 + blr diff --git a/sysdeps/powerpc/addmul_1.s b/sysdeps/powerpc/addmul_1.s new file mode 100644 index 0000000000..cf8fd2a555 --- /dev/null +++ b/sysdeps/powerpc/addmul_1.s @@ -0,0 +1,50 @@ + # Multiply a limb vector by a single limb, for PowerPC. + # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + # mp_size_t s1_size, mp_limb_t s2_limb) + # Calculate res+s1*s2 and put result back in res; return carry. + + .align 2 + .globl __mpn_addmul_1 + .type __mpn_addmul_1,@function +__mpn_addmul_1: + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + lwz %r9,0(%r3) + addc %r8,%r7,%r9 + addi %r3,%r3,-4 # adjust res_ptr + bdz Lend + +Loop: lwzu %r0,4(%r4) + stwu %r8,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + lwz %r9,4(%r3) + addze %r10,%r10 + addc %r8,%r7,%r9 + bdnz Loop + +Lend: stw %r8,4(%r3) + addze %r3,%r10 + blr diff --git a/sysdeps/powerpc/lshift.s b/sysdeps/powerpc/lshift.s new file mode 100644 index 0000000000..9612a3dbec --- /dev/null +++ b/sysdeps/powerpc/lshift.s @@ -0,0 +1,479 @@ + # Shift a limb left, low level routine. + # Copyright (C) 1996, 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize, + # unsigned int cnt) + + .align 3 + .globl __mpn_lshift + .type __mpn_lshift,@function +__mpn_lshift: + mtctr %r5 # copy size into CTR + cmplwi %cr0,%r5,16 # is size < 16 + slwi %r0,%r5,2 + add %r7,%r3,%r0 # make r7 point at end of res + add %r4,%r4,%r0 # make r4 point at end of s1 + lwzu %r11,-4(%r4) # load first s1 limb + subfic %r8,%r6,32 + srw %r3,%r11,%r8 # compute function return value + bge %cr0,Lbig # branch if size >= 16 + + bdz Lend1 + +Loop: lwzu %r10,-4(%r4) + slw %r9,%r11,%r6 + srw %r12,%r10,%r8 + or %r9,%r9,%r12 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slw %r9,%r10,%r6 + srw %r12,%r11,%r8 + or %r9,%r9,%r12 + stwu %r9,-4(%r7) + bdnz Loop + b Lend1 + + # Guaranteed not to succeed. +LBoom: tweq %r0,%r0 + + # We imitate a case statement, by using (yuk!) fixed-length code chunks, + # of size 4*12 bytes. We have to do this (or something) to make this PIC. +Lbig: mflr %r9 + bltl %cr0,LBoom # Never taken, only used to set LR. + slwi %r10,%r6,4 + mflr %r12 + add %r10,%r12,%r10 + slwi %r8,%r6,5 + add %r10,%r8,%r10 + mtctr %r10 + addi %r5,%r5,-1 + mtlr %r9 + bctr + +Lend1: slw %r0,%r11,%r6 + stw %r0,-4(%r7) + blr + + mtctr %r5 +Loop1: lwzu %r10,-4(%r4) + slwi %r9,%r11,1 + inslwi %r9,%r10,1,31 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,1 + inslwi %r9,%r11,1,31 + stwu %r9,-4(%r7) + bdnz Loop1 + b Lend1 + + mtctr %r5 +Loop2: lwzu %r10,-4(%r4) + slwi %r9,%r11,2 + inslwi %r9,%r10,2,30 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,2 + inslwi %r9,%r11,2,30 + stwu %r9,-4(%r7) + bdnz Loop2 + b Lend1 + + mtctr %r5 +Loop3: lwzu %r10,-4(%r4) + slwi %r9,%r11,3 + inslwi %r9,%r10,3,29 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,3 + inslwi %r9,%r11,3,29 + stwu %r9,-4(%r7) + bdnz Loop3 + b Lend1 + + mtctr %r5 +Loop4: lwzu %r10,-4(%r4) + slwi %r9,%r11,4 + inslwi %r9,%r10,4,28 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,4 + inslwi %r9,%r11,4,28 + stwu %r9,-4(%r7) + bdnz Loop4 + b Lend1 + + mtctr %r5 +Loop5: lwzu %r10,-4(%r4) + slwi %r9,%r11,5 + inslwi %r9,%r10,5,27 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,5 + inslwi %r9,%r11,5,27 + stwu %r9,-4(%r7) + bdnz Loop5 + b Lend1 + + mtctr %r5 +Loop6: lwzu %r10,-4(%r4) + slwi %r9,%r11,6 + inslwi %r9,%r10,6,26 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,6 + inslwi %r9,%r11,6,26 + stwu %r9,-4(%r7) + bdnz Loop6 + b Lend1 + + mtctr %r5 +Loop7: lwzu %r10,-4(%r4) + slwi %r9,%r11,7 + inslwi %r9,%r10,7,25 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,7 + inslwi %r9,%r11,7,25 + stwu %r9,-4(%r7) + bdnz Loop7 + b Lend1 + + mtctr %r5 +Loop8: lwzu %r10,-4(%r4) + slwi %r9,%r11,8 + inslwi %r9,%r10,8,24 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,8 + inslwi %r9,%r11,8,24 + stwu %r9,-4(%r7) + bdnz Loop8 + b Lend1 + + mtctr %r5 +Loop9: lwzu %r10,-4(%r4) + slwi %r9,%r11,9 + inslwi %r9,%r10,9,23 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,9 + inslwi %r9,%r11,9,23 + stwu %r9,-4(%r7) + bdnz Loop9 + b Lend1 + + mtctr %r5 +Loop10: lwzu %r10,-4(%r4) + slwi %r9,%r11,10 + inslwi %r9,%r10,10,22 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,10 + inslwi %r9,%r11,10,22 + stwu %r9,-4(%r7) + bdnz Loop10 + b Lend1 + + mtctr %r5 +Loop11: lwzu %r10,-4(%r4) + slwi %r9,%r11,11 + inslwi %r9,%r10,11,21 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,11 + inslwi %r9,%r11,11,21 + stwu %r9,-4(%r7) + bdnz Loop11 + b Lend1 + + mtctr %r5 +Loop12: lwzu %r10,-4(%r4) + slwi %r9,%r11,12 + inslwi %r9,%r10,12,20 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,12 + inslwi %r9,%r11,12,20 + stwu %r9,-4(%r7) + bdnz Loop12 + b Lend1 + + mtctr %r5 +Loop13: lwzu %r10,-4(%r4) + slwi %r9,%r11,13 + inslwi %r9,%r10,13,19 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,13 + inslwi %r9,%r11,13,19 + stwu %r9,-4(%r7) + bdnz Loop13 + b Lend1 + + mtctr %r5 +Loop14: lwzu %r10,-4(%r4) + slwi %r9,%r11,14 + inslwi %r9,%r10,14,18 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,14 + inslwi %r9,%r11,14,18 + stwu %r9,-4(%r7) + bdnz Loop14 + b Lend1 + + mtctr %r5 +Loop15: lwzu %r10,-4(%r4) + slwi %r9,%r11,15 + inslwi %r9,%r10,15,17 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,15 + inslwi %r9,%r11,15,17 + stwu %r9,-4(%r7) + bdnz Loop15 + b Lend1 + + mtctr %r5 +Loop16: lwzu %r10,-4(%r4) + slwi %r9,%r11,16 + inslwi %r9,%r10,16,16 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,16 + inslwi %r9,%r11,16,16 + stwu %r9,-4(%r7) + bdnz Loop16 + b Lend1 + + mtctr %r5 +Loop17: lwzu %r10,-4(%r4) + slwi %r9,%r11,17 + inslwi %r9,%r10,17,15 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,17 + inslwi %r9,%r11,17,15 + stwu %r9,-4(%r7) + bdnz Loop17 + b Lend1 + + mtctr %r5 +Loop18: lwzu %r10,-4(%r4) + slwi %r9,%r11,18 + inslwi %r9,%r10,18,14 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,18 + inslwi %r9,%r11,18,14 + stwu %r9,-4(%r7) + bdnz Loop18 + b Lend1 + + mtctr %r5 +Loop19: lwzu %r10,-4(%r4) + slwi %r9,%r11,19 + inslwi %r9,%r10,19,13 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,19 + inslwi %r9,%r11,19,13 + stwu %r9,-4(%r7) + bdnz Loop19 + b Lend1 + + mtctr %r5 +Loop20: lwzu %r10,-4(%r4) + slwi %r9,%r11,20 + inslwi %r9,%r10,20,12 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,20 + inslwi %r9,%r11,20,12 + stwu %r9,-4(%r7) + bdnz Loop20 + b Lend1 + + mtctr %r5 +Loop21: lwzu %r10,-4(%r4) + slwi %r9,%r11,21 + inslwi %r9,%r10,21,11 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,21 + inslwi %r9,%r11,21,11 + stwu %r9,-4(%r7) + bdnz Loop21 + b Lend1 + + mtctr %r5 +Loop22: lwzu %r10,-4(%r4) + slwi %r9,%r11,22 + inslwi %r9,%r10,22,10 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,22 + inslwi %r9,%r11,22,10 + stwu %r9,-4(%r7) + bdnz Loop22 + b Lend1 + + mtctr %r5 +Loop23: lwzu %r10,-4(%r4) + slwi %r9,%r11,23 + inslwi %r9,%r10,23,9 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,23 + inslwi %r9,%r11,23,9 + stwu %r9,-4(%r7) + bdnz Loop23 + b Lend1 + + mtctr %r5 +Loop24: lwzu %r10,-4(%r4) + slwi %r9,%r11,24 + inslwi %r9,%r10,24,8 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,24 + inslwi %r9,%r11,24,8 + stwu %r9,-4(%r7) + bdnz Loop24 + b Lend1 + + mtctr %r5 +Loop25: lwzu %r10,-4(%r4) + slwi %r9,%r11,25 + inslwi %r9,%r10,25,7 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,25 + inslwi %r9,%r11,25,7 + stwu %r9,-4(%r7) + bdnz Loop25 + b Lend1 + + mtctr %r5 +Loop26: lwzu %r10,-4(%r4) + slwi %r9,%r11,26 + inslwi %r9,%r10,26,6 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,26 + inslwi %r9,%r11,26,6 + stwu %r9,-4(%r7) + bdnz Loop26 + b Lend1 + + mtctr %r5 +Loop27: lwzu %r10,-4(%r4) + slwi %r9,%r11,27 + inslwi %r9,%r10,27,5 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,27 + inslwi %r9,%r11,27,5 + stwu %r9,-4(%r7) + bdnz Loop27 + b Lend1 + + mtctr %r5 +Loop28: lwzu %r10,-4(%r4) + slwi %r9,%r11,28 + inslwi %r9,%r10,28,4 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,28 + inslwi %r9,%r11,28,4 + stwu %r9,-4(%r7) + bdnz Loop28 + b Lend1 + + mtctr %r5 +Loop29: lwzu %r10,-4(%r4) + slwi %r9,%r11,29 + inslwi %r9,%r10,29,3 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,29 + inslwi %r9,%r11,29,3 + stwu %r9,-4(%r7) + bdnz Loop29 + b Lend1 + + mtctr %r5 +Loop30: lwzu %r10,-4(%r4) + slwi %r9,%r11,30 + inslwi %r9,%r10,30,2 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,30 + inslwi %r9,%r11,30,2 + stwu %r9,-4(%r7) + bdnz Loop30 + b Lend1 + + mtctr %r5 +Loop31: lwzu %r10,-4(%r4) + slwi %r9,%r11,31 + inslwi %r9,%r10,31,1 + stwu %r9,-4(%r7) + bdz Lend2 + lwzu %r11,-4(%r4) + slwi %r9,%r10,31 + inslwi %r9,%r11,31,1 + stwu %r9,-4(%r7) + bdnz Loop31 + b Lend1 + +Lend2: slw %r0,%r10,%r6 + stw %r0,-4(%r7) + blr diff --git a/sysdeps/powerpc/memset.s b/sysdeps/powerpc/memset.s new file mode 100644 index 0000000000..4c8bf8c6b4 --- /dev/null +++ b/sysdeps/powerpc/memset.s @@ -0,0 +1,202 @@ + # Optimized memset implementation for PowerPC. + # Copyright (C) 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + .section ".text" + .align 5 + nop + + .globl memset + .type memset,@function +memset: + # __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + # Returns 's'. + + # The memset is done in three sizes: byte (8 bits), word (32 bits), + # cache line (256 bits). There is a special case for setting cache lines + # to 0, to take advantage of the dcbz instruction. + # r6: current address we are storing at + # r7: number of bytes we are setting now (when aligning) + + # take care of case for size <= 4 + cmplwi %cr1,%r5,4 + andi. %r7,%r3,3 + mr %r6,%r3 + ble- %cr1,small + # align to word boundary + cmplwi %cr5,%r5,31 + rlwimi %r4,%r4,8,16,23 + beq+ aligned # 8th instruction from .align + mtcrf 0x01,%r3 + subfic %r7,%r7,4 + add %r6,%r6,%r7 + sub %r5,%r5,%r7 + bf+ 31,0f + stb %r4,0(%r3) + bt 30,aligned +0: sth %r4,-2(%r6) # 16th instruction from .align + # take care of case for size < 31 +aligned: + mtcrf 0x01,%r5 + rlwimi %r4,%r4,16,0,15 + ble %cr5,medium + # align to cache line boundary... + andi. %r7,%r6,0x1C + subfic %r7,%r7,0x20 + beq caligned + mtcrf 0x01,%r7 + add %r6,%r6,%r7 + sub %r5,%r5,%r7 + cmplwi %cr1,%r7,0x10 + mr %r8,%r6 + bf 28,1f + stw %r4,-4(%r8) + stwu %r4,-8(%r8) +1: blt %cr1,2f + stw %r4,-4(%r8) # 32nd instruction from .align + stw %r4,-8(%r8) + stw %r4,-12(%r8) + stwu %r4,-16(%r8) +2: bf 29,caligned + stw %r4,-4(%r8) + # now aligned to a cache line. +caligned: + cmplwi %cr1,%r4,0 + clrrwi. %r7,%r5,5 + mtcrf 0x01,%r5 # 40th instruction from .align + beq %cr1,zloopstart # special case for clearing memory using dcbz + srwi %r0,%r7,5 + mtctr %r0 + beq medium # we may not actually get to do a full line + clrlwi. %r5,%r5,27 + add %r6,%r6,%r7 +0: li %r8,-0x40 + bdz cloopdone # 48th instruction from .align + +cloop: dcbz %r8,%r6 + stw %r4,-4(%r6) + stw %r4,-8(%r6) + stw %r4,-12(%r6) + stw %r4,-16(%r6) + nop # let 601 fetch last 4 instructions of loop + stw %r4,-20(%r6) + stw %r4,-24(%r6) # 56th instruction from .align + nop # let 601 fetch first 8 instructions of loop + stw %r4,-28(%r6) + stwu %r4,-32(%r6) + bdnz cloop +cloopdone: + stw %r4,-4(%r6) + stw %r4,-8(%r6) + stw %r4,-12(%r6) + stw %r4,-16(%r6) # 64th instruction from .align + stw %r4,-20(%r6) + cmplwi %cr1,%r5,16 + stw %r4,-24(%r6) + stw %r4,-28(%r6) + stwu %r4,-32(%r6) + beqlr + add %r6,%r6,%r7 + b medium_tail2 # 72nd instruction from .align + + .align 5 + nop +# clear lines of memory in 128-byte chunks. +zloopstart: + clrlwi %r5,%r5,27 + mtcrf 0x02,%r7 + srwi. %r0,%r7,7 + mtctr %r0 + li %r7,0x20 + li %r8,-0x40 + cmplwi %cr1,%r5,16 # 8 + bf 26,0f + dcbz 0,%r6 + addi %r6,%r6,0x20 +0: li %r9,-0x20 + bf 25,1f + dcbz 0,%r6 + dcbz %r7,%r6 + addi %r6,%r6,0x40 # 16 +1: cmplwi %cr5,%r5,0 + beq medium +zloop: + dcbz 0,%r6 + dcbz %r7,%r6 + addi %r6,%r6,0x80 + dcbz %r8,%r6 + dcbz %r9,%r6 + bdnz zloop + beqlr %cr5 + b medium_tail2 + + .align 5 +small: + # Memset of 4 bytes or less. + cmplwi %cr5,%r5,1 + cmplwi %cr1,%r5,3 + bltlr %cr5 + stb %r4,0(%r6) + beqlr %cr5 + nop + stb %r4,1(%r6) + bltlr %cr1 + stb %r4,2(%r6) + beqlr %cr1 + nop + stb %r4,3(%r6) + blr + +# memset of 0-31 bytes + .align 5 +medium: + cmplwi %cr1,%r5,16 +medium_tail2: + add %r6,%r6,%r5 +medium_tail: + bt- 31,medium_31t + bt- 30,medium_30t +medium_30f: + bt- 29,medium_29t +medium_29f: + bge- %cr1,medium_27t + bflr- 28 + stw %r4,-4(%r6) # 8th instruction from .align + stw %r4,-8(%r6) + blr + +medium_31t: + stbu %r4,-1(%r6) + bf- 30,medium_30f +medium_30t: + sthu %r4,-2(%r6) + bf- 29,medium_29f +medium_29t: + stwu %r4,-4(%r6) + blt- %cr1,medium_27f # 16th instruction from .align +medium_27t: + stw %r4,-4(%r6) + stw %r4,-8(%r6) + stw %r4,-12(%r6) + stwu %r4,-16(%r6) +medium_27f: + bflr- 28 +medium_28t: + stw %r4,-4(%r6) + stw %r4,-8(%r6) + blr diff --git a/sysdeps/powerpc/mul_1.s b/sysdeps/powerpc/mul_1.s new file mode 100644 index 0000000000..d6eb623bd4 --- /dev/null +++ b/sysdeps/powerpc/mul_1.s @@ -0,0 +1,47 @@ + # Multiply a limb vector by a limb, for PowerPC. + # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + # mp_size_t s1_size, mp_limb_t s2_limb) + # Calculate s1*s2 and put result in res_ptr; return carry. + + .align 2 + .globl __mpn_mul_1 + .type __mpn_mul_1,@function + +__mpn_mul_1: + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + addi %r3,%r3,-4 # adjust res_ptr + addic %r5,%r5,0 # clear cy with dummy insn + bdz Lend + +Loop: lwzu %r0,4(%r4) + stwu %r7,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + bdnz Loop + +Lend: stw %r7,4(%r3) + addze %r3,%r10 + blr diff --git a/sysdeps/powerpc/rshift.s b/sysdeps/powerpc/rshift.s new file mode 100644 index 0000000000..20f09ad86a --- /dev/null +++ b/sysdeps/powerpc/rshift.s @@ -0,0 +1,59 @@ +# PowerPC-32 __mpn_rshift -- + +# Copyright (C) 1995 Free Software Foundation, Inc. + +# This file is part of the GNU MP Library. + +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. + +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +# License for more details. + +# You should have received a copy of the GNU Library General Public License +# along with the GNU MP Library; see the file COPYING.LIB. If not, write to +# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +# MA 02111-1307, USA. + + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# cnt r6 + + .align 3 + .globl __mpn_rshift + .type __mpn_rshift,@function +__mpn_rshift: + mtctr 5 # copy size into CTR + addi 7,3,-4 # move adjusted res_ptr to free return reg + subfic 8,6,32 + lwz 11,0(4) # load first s1 limb + slw 3,11,8 # compute function return value + bdz Lend1 + +Loop: lwzu 10,4(4) + srw 9,11,6 + slw 12,10,8 + or 9,9,12 + stwu 9,4(7) + bdz Lend2 + lwzu 11,4(4) + srw 9,10,6 + slw 12,11,8 + or 9,9,12 + stwu 9,4(7) + bdnz Loop + +Lend1: srw 0,11,6 + stw 0,4(7) + blr + +Lend2: srw 0,10,6 + stw 0,4(7) + blr diff --git a/sysdeps/powerpc/strchr.s b/sysdeps/powerpc/strchr.s new file mode 100644 index 0000000000..c1df66f8dc --- /dev/null +++ b/sysdeps/powerpc/strchr.s @@ -0,0 +1,118 @@ + # Optimized strchr implementation for PowerPC. + # Copyright (C) 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # See strlen.s for comments on how this works. + + .section ".text" + .align 2 + .globl strchr + .type strchr,@function +strchr: + # char * [r3] strchr (const char *s [r3] , int c [r4] ) + + # r0: a temporary + # r3: our return result. + # r4: byte we're looking for, spread over the whole word + # r5: the current word + # r6: the constant 0xfefefeff (-0x01010101) + # r7: the constant 0x7f7f7f7f + # r8: pointer to the current word. + # r9: a temporary + # r10: the number of bits we should ignore in the first word + # r11: a mask with the bits to ignore set to 0 + # r12: a temporary + + rlwimi %r4,%r4,8,16,23 + li %r11,-1 + rlwimi %r4,%r4,16,0,15 + lis %r6,0xfeff + lis %r7,0x7f7f + clrrwi %r8,%r3,2 + addi %r7,%r7,0x7f7f + addi %r6,%r6,0xfffffeff + rlwinm %r10,%r3,3,27,28 + # Test the first (partial?) word. + lwz %r5,0(%r8) + srw %r11,%r11,%r10 + orc %r5,%r5,%r11 + add %r0,%r6,%r5 + nor %r9,%r7,%r5 + and. %r0,%r0,%r9 + xor %r12,%r4,%r5 + orc %r12,%r12,%r11 + b loopentry + + # The loop. + +loop: lwzu %r5,4(%r8) + and. %r0,%r0,%r9 + # Test for 0 + add %r0,%r6,%r5 + nor %r9,%r7,%r5 + bne foundit + and. %r0,%r0,%r9 + # Start test for the bytes we're looking for + xor %r12,%r4,%r5 +loopentry: + add %r0,%r6,%r12 + nor %r9,%r7,%r12 + beq loop + # There is a zero byte in the word, but may also be a matching byte (either + # before or after the zero byte). In fact, we may be looking for a + # zero byte, in which case we return a match. We guess that this hasn't + # happened, though. +missed: + and. %r0,%r0,%r9 + li %r3,0 + beqlr + # It did happen. Decide which one was first... + # I'm not sure if this is actually faster than a sequence of + # rotates, compares, and branches (we use it anyway because it's shorter). + and %r6,%r7,%r5 + or %r11,%r7,%r5 + and %r0,%r7,%r12 + or %r10,%r7,%r12 + add %r6,%r6,%r7 + add %r0,%r0,%r7 + nor %r5,%r11,%r6 + nor %r9,%r10,%r0 + cmplw %r5,%r9 + bgtlr + cntlzw %r4,%r9 + srwi %r4,%r4,3 + add %r3,%r8,%r4 + blr + +foundit: + and %r0,%r7,%r12 + or %r10,%r7,%r12 + add %r0,%r0,%r7 + nor %r9,%r10,%r0 + cntlzw %r4,%r9 + subi %r8,%r8,4 + srwi %r4,%r4,3 + add %r3,%r8,%r4 + blr + +0: + .size strchr,0b-strchr + + .globl index + .weak index + .set index,strchr diff --git a/sysdeps/powerpc/strcmp.s b/sysdeps/powerpc/strcmp.s new file mode 100644 index 0000000000..f901b82ab1 --- /dev/null +++ b/sysdeps/powerpc/strcmp.s @@ -0,0 +1,273 @@ + # Optimized strcmp implementation for PowerPC. + # Copyright (C) 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # See strlen.s for comments on how the end-of-string testing works. + + .section ".text" + .align 3 + .globl strcmp + .type strcmp,@function +strcmp: + # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) + + # General register assignments: + # r0: temporary + # r3: pointer to previous word in s1 + # r4: pointer to previous word in s2 + # r5: current first word in s1 + # r6: current first word in s2 (after re-alignment) + # r7: 0xfefefeff + # r8: 0x7f7f7f7f + # r9: ~(word in s1 | 0x7f7f7f7f) + + # Register assignments in the prologue: + # r10: low 2 bits of p2-p1 + # r11: mask to orc with r5/r6 + + subf. %r10,%r4,%r3 + beq- equal + andi. %r10,%r10,3 + cmpi %cr1,%r10,2 + beq- %cr1,align2 + lis %r7,0xfeff + lis %r8,0x7f7f + addi %r8,%r8,0x7f7f + addi %r7,%r7,0xfffffeff + bgt- %cr1,align3 +strcmp3: + rlwinm %r0,%r3,3,27,28 + li %r11,-1 + srw %r11,%r11,%r0 + clrrwi %r3,%r3,2 + clrrwi %r4,%r4,2 + lwz %r5,0(%r3) + lwz %r6,0(%r4) + bne- align1 + + # The loop, case when both strings are aligned the same. + # on entry, cr1.eq must be 1. + # r10: second word in s1 + # r11: second word in s2 OR mask to orc with first two words. +align0: + andi. %r0,%r3,4 + orc %r5,%r5,%r11 + orc %r6,%r6,%r11 + beq+ a0start + add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + cmplw %cr1,%r5,%r6 + subi %r3,%r3,4 + bne- endstringeq + subi %r4,%r4,4 + bne- %cr1,difference + +loopalign0: + lwzu %r5,8(%r3) + bne- %cr1,difference2 + lwzu %r6,8(%r4) +a0start: + add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + cmplw %cr1,%r5,%r6 + lwz %r10,4(%r3) + bne- endstringeq + add %r0,%r7,%r10 + bne- %cr1,difference + nor %r9,%r8,%r10 + lwz %r11,4(%r4) + and. %r0,%r0,%r9 + cmplw %cr1,%r10,%r11 + beq+ loopalign0 + + mr %r5,%r10 + mr %r6,%r11 + + # fall through to... + +endstringeq: + # (like 'endstring', but an equality code is in cr1) + beq %cr1,equal +endstring: + # OK. We've hit the end of the string. We need to be careful that + # we don't compare two strings as different because of gunk beyond + # the end of the strings. We do it like this... + and %r0,%r8,%r5 + add %r0,%r0,%r8 + xor. %r10,%r5,%r6 + andc %r9,%r9,%r0 + cntlzw %r10,%r10 + cntlzw %r9,%r9 + addi %r9,%r9,7 + cmpw %cr1,%r9,%r10 + blt %cr1,equal + sub %r3,%r5,%r6 + bgelr+ + mr %r3,%r6 + blr +equal: li %r3,0 + blr + + # The loop, case when s2 is aligned 1 char behind s1. + # r10: current word in s2 (before re-alignment) + +align1: + cmpwi %cr1,%r0,0 + orc %r5,%r5,%r11 + bne %cr1,align1_123 + # When s1 is aligned to a word boundary, the startup processing is special. + slwi. %r6,%r6,24 + bne+ a1entry_0 + nor %r9,%r8,%r5 + b endstring + +align1_123: + # Otherwise (s1 not aligned to a word boundary): + mr %r10,%r6 + add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + srwi %r6,%r6,8 + orc %r6,%r6,%r11 + cmplw %cr1,%r5,%r6 + bne- endstringeq + bne- %cr1,difference + +loopalign1: + slwi. %r6,%r10,24 + bne- %cr1,a1difference + lwzu %r5,4(%r3) + beq- endstring1 +a1entry_0: + lwzu %r10,4(%r4) +a1entry_123: + add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + rlwimi %r6,%r10,24,8,31 + cmplw %cr1,%r5,%r6 + beq+ loopalign1 + b endstringeq + +endstring1: + srwi %r3,%r5,24 + blr + +a1difference: + lbz %r6,-1(%r4) + slwi %r6,%r6,24 + rlwimi %r6,%r10,24,8,31 + + # fall through to... + +difference: + # The idea here is that we could just return '%r5 - %r6', except + # that the result might overflow. Overflow can only happen when %r5 + # and %r6 have different signs (thus the xor), in which case we want to + # return negative iff %r6 has its high bit set so %r5 < %r6. + # A branch-free implementation of this is + # xor %r0,%r5,%r6 + # rlwinm %r0,%r0,1,31,31 + # rlwnm %r5,%r5,%r0,1,31 + # rlwnm %r6,%r6,%r0,1,31 + # sub %r3,%r5,%r6 + # blr + # but this is usually more expensive. + xor. %r0,%r5,%r6 + sub %r3,%r5,%r6 + bgelr+ + mr %r3,%r6 + blr + +difference2: + # As for 'difference', but use registers r10 and r11 instead of r5 and r6. + xor. %r0,%r10,%r11 + sub %r3,%r10,%r11 + bgelr+ + mr %r3,%r11 + blr + + # For the case when s2 is aligned 3 chars behind s1, we switch + # s1 and s2... + # r10: used by 'align2' (see below) + # r11: used by 'align2' (see below) + # r12: saved link register + # cr0.eq: must be left as 1. + +align3: mflr %r12 + mr %r0,%r3 + mr %r3,%r4 + mr %r4,%r0 + bl strcmp3 + mtlr %r12 + neg %r3,%r3 + blr + + # The loop, case when s2 and s1's alignments differ by 2 + # This is the ugly case... + # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect, + # because there are too many branches. This loop should probably be + # coded like the align1 case. + +a2even: lhz %r5,0(%r3) + lhz %r6,0(%r4) + b a2entry + +align2: + andi. %r0,%r3,1 + beq+ a2even + subi %r3,%r3,1 + subi %r4,%r4,1 + lbz %r5,1(%r3) + lbz %r6,1(%r4) + cmpwi %cr0,%r5,0 + cmpw %cr1,%r5,%r6 + beq- align2end2 + lhzu %r5,2(%r3) + beq+ %cr1,a2entry1 + lbz %r5,-1(%r3) + sub %r3,%r5,%r6 + blr + +loopalign2: + cmpw %cr1,%r5,%r6 + beq- align2end2 + lhzu %r5,2(%r3) + bne- %cr1,align2different +a2entry1: + lhzu %r6,2(%r4) +a2entry: + cmpwi %cr5,%r5,0x00ff + andi. %r0,%r5,0x00ff + bgt+ %cr5,loopalign2 + +align2end: + andi. %r3,%r6,0xff00 + neg %r3,%r3 + blr + +align2different: + lhzu %r5,-2(%r3) +align2end2: + sub %r3,%r5,%r6 + blr + +0: + .size strcmp,0b-strcmp diff --git a/sysdeps/powerpc/sub_n.s b/sysdeps/powerpc/sub_n.s new file mode 100644 index 0000000000..8711bf9a40 --- /dev/null +++ b/sysdeps/powerpc/sub_n.s @@ -0,0 +1,69 @@ + # Subtract two limb vectors of equal, non-zero length for PowerPC. + # Copyright (C) 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + # mp_size_t size) + # Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1. + + # Note on optimisation: This code is optimal for the 601. Almost every other + # possible 2-unrolled inner loop will not be. Also, watch out for the + # alignment... + + .align 3 + .globl __mpn_sub_n + .type __mpn_sub_n,@function + nop +__mpn_sub_n: + # Set up for loop below. + mtcrf 0x01,%r6 + srwi. %r7,%r6,1 + mtctr %r7 + bt 31,2f + + # Set the carry (clear the borrow). + subfc %r0,%r0,%r0 + # Adjust pointers for loop. + addi %r3,%r3,-4 + addi %r4,%r4,-4 + addi %r5,%r5,-4 + b 0f + +2: lwz %r7,0(%r5) + lwz %r6,0(%r4) + subfc %r6,%r7,%r6 + stw %r6,0(%r3) + beq 1f + + # Align start of loop to an odd word boundary to guarantee that the + # last two words can be fetched in one access (for 601). This turns + # out to be important. +0: + lwz %r9,4(%r4) + lwz %r8,4(%r5) + lwzu %r6,8(%r4) + lwzu %r7,8(%r5) + subfe %r8,%r8,%r9 + stw %r8,4(%r3) + subfe %r6,%r7,%r6 + stwu %r6,8(%r3) + bdnz 0b + # return the borrow +1: subfe %r3,%r3,%r3 + neg %r3,%r3 + blr diff --git a/sysdeps/powerpc/submul_1.s b/sysdeps/powerpc/submul_1.s new file mode 100644 index 0000000000..999430d744 --- /dev/null +++ b/sysdeps/powerpc/submul_1.s @@ -0,0 +1,52 @@ + # Multiply a limb vector by a single limb, for PowerPC. + # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + # mp_size_t s1_size, mp_limb_t s2_limb) + # Calculate res-s1*s2 and put result back in res; return carry. + + .align 2 + .globl __mpn_submul_1 + .type __mpn_submul_1,@function +__mpn_submul_1: + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + lwz %r9,0(%r3) + subf %r8,%r7,%r9 + addc %r7,%r7,%r8 # invert cy (r7 is junk) + addi %r3,%r3,-4 # adjust res_ptr + bdz Lend + +Loop: lwzu %r0,4(%r4) + stwu %r8,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + lwz %r9,4(%r3) + addze %r10,%r10 + subf %r8,%r7,%r9 + addc %r7,%r7,%r8 # invert cy (r7 is junk) + bdnz Loop + +Lend: stw %r8,4(%r3) + addze %r3,%r10 + blr -- cgit v1.2.3-70-g09d2