32 files changed, 1284 insertions, 1700 deletions
diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile
index 0a50956640..3e8f22b573 100644
--- a/sysdeps/powerpc/Makefile
+++ b/sysdeps/powerpc/Makefile
@@ -11,3 +11,21 @@ tests += test-arith test-arithf
 LDLIBS-test-arith = libm
 LDLIBS-test-arithf = libm
 endif
+
+ifeq ($(subdir),gmon)
+sysdep_routines += ppc-mcount
+endif
+
+# On PPC, -fpic works until the GOT contains 2^15 bytes, and possibly
+# more depending on how clever the linker is.  Each GOT entry takes 4 bytes,
+# so that's at least 8192 entries.  Since libc only uses about 1200 entries,
+# we want to use -fpic, because this generates fewer relocs.
+ifeq (yes,$(build-shared))
+CFLAGS-.os = -fpic -fno-common
+endif
+
+# The initfini generation code doesn't work in the presence of -fPIC, so
+# we use -fpic instead which is much better.
+ifeq ($(subdir),csu)
+CFLAGS-initfini.s = -g0 -fpic
+endif
diff --git a/sysdeps/powerpc/add_n.S b/sysdeps/powerpc/add_n.S
new file mode 100644
index 0000000000..2bd59ae4a7
--- /dev/null
+++ b/sysdeps/powerpc/add_n.S
@@ -0,0 +1,68 @@
+/* Add two limb vectors of equal, non-zero length for PowerPC.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
+                        mp_size_t size)
+   Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1.  */
+
+/* Note on optimisation: This code is optimal for the 601.  Almost every other
+   possible 2-unrolled inner loop will not be.  Also, watch out for the
+   alignment...  */
+
+EALIGN(__mpn_add_n,3,0)
+/* Set up for loop below.  */
+	mtcrf 0x01,%r6
+	srwi. %r7,%r6,1
+	li    %r10,0
+	mtctr %r7
+	bt    31,2f
+
+/* Clear the carry.  */
+	addic %r0,%r0,0
+/* Adjust pointers for loop.  */
+	addi  %r3,%r3,-4
+	addi  %r4,%r4,-4
+	addi  %r5,%r5,-4
+	b     0f
+
+2:	lwz  %r7,0(%r5)
+	lwz  %r6,0(%r4)
+	addc %r6,%r6,%r7
+	stw  %r6,0(%r3)
+        beq  1f
+
+/* The loop.  */
+
+/* Align start of loop to an odd word boundary to guarantee that the
+   last two words can be fetched in one access (for 601).  */
+0:	lwz  %r9,4(%r4)
+	lwz  %r8,4(%r5)
+	lwzu %r6,8(%r4)
+	lwzu %r7,8(%r5)
+	adde %r8,%r9,%r8
+	stw  %r8,4(%r3)
+	adde %r6,%r6,%r7
+	stwu %r6,8(%r3)
+	bdnz 0b
+/* Return the carry.  */
+1:	addze %r3,%r10
+	blr
+END(__mpn_add_n)
diff --git a/sysdeps/powerpc/add_n.s b/sysdeps/powerpc/add_n.s
deleted file mode 100644
index 609f0a502a..0000000000
--- a/sysdeps/powerpc/add_n.s
+++ /dev/null
@@ -1,68 +0,0 @@
- # Add two limb vectors of equal, non-zero length for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
- #                      mp_size_t size)
- # Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1.
-
- # Note on optimisation: This code is optimal for the 601.  Almost every other
- # possible 2-unrolled inner loop will not be.  Also, watch out for the
- # alignment...
-
-	.align 3
-	.globl __mpn_add_n
-	.type	 __mpn_add_n,@function
-__mpn_add_n:
- # Set up for loop below.
-	mtcrf 0x01,%r6
-	srwi. %r7,%r6,1
-	li    %r10,0
-	mtctr %r7
-	bt    31,2f
-
- # Clear the carry.
-	addic %r0,%r0,0
- # Adjust pointers for loop.
-	addi  %r3,%r3,-4
-	addi  %r4,%r4,-4
-	addi  %r5,%r5,-4
-	b     0f
-
-2:	lwz  %r7,0(%r5)
-	lwz  %r6,0(%r4)
-	addc %r6,%r6,%r7
-	stw  %r6,0(%r3)
-        beq  1f
-
- # The loop.
-
- # Align start of loop to an odd word boundary to guarantee that the
- # last two words can be fetched in one access (for 601).
-0:	lwz  %r9,4(%r4)
-	lwz  %r8,4(%r5)
-	lwzu %r6,8(%r4)
-	lwzu %r7,8(%r5)
-	adde %r8,%r9,%r8
-	stw  %r8,4(%r3)
-	adde %r6,%r6,%r7
-	stwu %r6,8(%r3)
-	bdnz 0b
- # return the carry
-1:	addze %r3,%r10
-	blr
diff --git a/sysdeps/powerpc/addmul_1.S b/sysdeps/powerpc/addmul_1.S
new file mode 100644
index 0000000000..dc762fcc43
--- /dev/null
+++ b/sysdeps/powerpc/addmul_1.S
@@ -0,0 +1,49 @@
+/* Multiply a limb vector by a single limb, for PowerPC.
+   Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+                           mp_size_t s1_size, mp_limb_t s2_limb)
+   Calculate res+s1*s2 and put result back in res; return carry.  */
+ENTRY(__mpn_addmul_1)
+	mtctr	%r5
+
+	lwz	%r0,0(%r4)
+	mullw	%r7,%r0,%r6
+	mulhwu	%r10,%r0,%r6
+	lwz     %r9,0(%r3)
+	addc	%r8,%r7,%r9
+	addi	%r3,%r3,-4		/* adjust res_ptr */
+	bdz	1f
+
+0:	lwzu	%r0,4(%r4)
+	stwu	%r8,4(%r3)
+	mullw	%r8,%r0,%r6
+	adde	%r7,%r8,%r10
+	mulhwu	%r10,%r0,%r6
+	lwz     %r9,4(%r3)
+	addze   %r10,%r10
+	addc    %r8,%r7,%r9
+	bdnz	0b
+
+1:	stw	%r8,4(%r3)
+	addze	%r3,%r10
+	blr
+END(__mpn_addmul_1)
diff --git a/sysdeps/powerpc/addmul_1.s b/sysdeps/powerpc/addmul_1.s
deleted file mode 100644
index cf8fd2a555..0000000000
--- a/sysdeps/powerpc/addmul_1.s
+++ /dev/null
@@ -1,50 +0,0 @@
- # Multiply a limb vector by a single limb, for PowerPC.
- # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
- #                         mp_size_t s1_size, mp_limb_t s2_limb)
- # Calculate res+s1*s2 and put result back in res; return carry.
-
-	.align 2
-	.globl __mpn_addmul_1
-	.type	 __mpn_addmul_1,@function
-__mpn_addmul_1:
-	mtctr	%r5
-
-	lwz	%r0,0(%r4)
-	mullw	%r7,%r0,%r6
-	mulhwu	%r10,%r0,%r6
-	lwz     %r9,0(%r3)
-	addc	%r8,%r7,%r9
-	addi	%r3,%r3,-4		# adjust res_ptr
-	bdz	Lend
-
-Loop:	lwzu	%r0,4(%r4)
-	stwu	%r8,4(%r3)
-	mullw	%r8,%r0,%r6
-	adde	%r7,%r8,%r10
-	mulhwu	%r10,%r0,%r6
-	lwz     %r9,4(%r3)
-	addze   %r10,%r10
-	addc    %r8,%r7,%r9
-	bdnz	Loop
-
-Lend:	stw	%r8,4(%r3)
-	addze	%r3,%r10
-	blr
diff --git a/sysdeps/powerpc/bsd-_setjmp.S b/sysdeps/powerpc/bsd-_setjmp.S
index ffd90d5bd2..ef31f841c4 100644
--- a/sysdeps/powerpc/bsd-_setjmp.S
+++ b/sysdeps/powerpc/bsd-_setjmp.S
@@ -25,9 +25,5 @@
 
 ENTRY (_setjmp)
 	li %r4,0			/* Set second argument to 0.  */
-#ifdef PIC
-	b __sigsetjmp@plt
-#else
-	b __sigsetjmp
-#endif
+	b JUMPTARGET(__sigsetjmp)
 END (_setjmp)
diff --git a/sysdeps/powerpc/bsd-setjmp.S b/sysdeps/powerpc/bsd-setjmp.S
index f02d7815ed..d26b3fc93e 100644
--- a/sysdeps/powerpc/bsd-setjmp.S
+++ b/sysdeps/powerpc/bsd-setjmp.S
@@ -25,11 +25,7 @@
 
 ENTRY (__setjmp)
 	li %r4,1			/* Set second argument to 1.  */
-#ifdef PIC
-	b __sigsetjmp@plt
-#else
-	b __sigsetjmp
-#endif
+	b JUMPTARGET(__sigsetjmp)
 END (__setjmp)
 
 	.globl setjmp
diff --git a/sysdeps/powerpc/dl-machine.h b/sysdeps/powerpc/dl-machine.h
index 917e4f7970..771b711a14 100644
--- a/sysdeps/powerpc/dl-machine.h
+++ b/sysdeps/powerpc/dl-machine.h
@@ -149,33 +149,34 @@ elf_machine_load_address (void)
 #define elf_machine_relplt elf_machine_rela
 
 /* This code is used in dl-runtime.c to call the `fixup' function
-   and then redirect to the address it returns. It is called
-   from code built in the PLT by elf_machine_runtime_setup. */
+   and then redirect to the address it returns.  It is called
+   from code built in the PLT by elf_machine_runtime_setup.  */
 #define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
 	.section \".text\"
 	.align 2
 	.globl _dl_runtime_resolve
 	.type _dl_runtime_resolve,@function
 _dl_runtime_resolve:
- # We need to save the registers used to pass parameters.
- # We build a stack frame to put them in.
+ # We need to save the registers used to pass parameters, and register 0,
+ # which is used by _mcount; the registers are saved in a stack frame.
 	stwu 1,-48(1)
-	mflr 0
+	stw 0,12(1)
 	stw 3,16(1)
 	stw 4,20(1)
-	stw 0,52(1)
+ # The code that calls this has put parameters for `fixup' in r12 and r11.
+	mr 3,12
 	stw 5,24(1)
- # We also need to save some of the condition register fields.
-	mfcr 0
+	mr 4,11
 	stw 6,28(1)
+	mflr 0
+ # We also need to save some of the condition register fields.
 	stw 7,32(1)
+	stw 0,52(1)
 	stw 8,36(1)
+	mfcr 0
 	stw 9,40(1)
 	stw 10,44(1)
-	stw 0,12(1)
- # The code that calls this has put parameters for `fixup' in r12 and r11.
-	mr 3,12
-	mr 4,11
+	stw 0,8(1)
 	bl fixup@local
  # 'fixup' returns the address we want to branch to.
 	mtctr 3
@@ -184,20 +185,21 @@ _dl_runtime_resolve:
 	lwz 10,44(1)
 	lwz 9,40(1)
 	mtlr 0
-	lwz 0,12(1)
 	lwz 8,36(1)
+	lwz 0,8(1)
 	lwz 7,32(1)
 	lwz 6,28(1)
 	mtcrf 0xFF,0
 	lwz 5,24(1)
 	lwz 4,20(1)
 	lwz 3,16(1)
+	lwz 0,12(1)
  # ...unwind the stack frame, and jump to the PLT entry we updated.
 	addi 1,1,48
 	bctr
 0:
 	.size	 _dl_runtime_resolve,0b-_dl_runtime_resolve
- # undo '.section text'.
+ # Undo '.section text'.
 	.previous
 ");
 
@@ -213,20 +215,20 @@ asm ("\
 	.type _start,@function
 _start:
  # We start with the following on the stack, from top:
- # argc (4 bytes)
- # arguments for program (terminated by NULL)
- # environment variables (terminated by NULL)
- # arguments for the program loader
+ # argc (4 bytes);
+ # arguments for program (terminated by NULL);
+ # environment variables (terminated by NULL);
+ # arguments for the program loader.
  # FIXME: perhaps this should do the same trick as elf/start.c?
 
  # Call _dl_start with one parameter pointing at argc
-	mr 3,1
+	mr   3,1
  #  (we have to frob the stack pointer a bit to allow room for
  #   _dl_start to save the link register)
-	li 4,0
+	li   4,0
 	addi 1,1,-16
-	stw 4,0(1)
-	bl _dl_start@local
+	stw  4,0(1)
+	bl   _dl_start@local
 
  # Now, we do our main work of calling initialisation procedures.
  # The ELF ABI doesn't say anything about parameters for these,
@@ -234,70 +236,72 @@ _start:
  # Changing these is strongly discouraged (not least because argc is
  # passed by value!).
 
- #  put our GOT pointer in r31
-	bl _GLOBAL_OFFSET_TABLE_-4@local
+ #  Put our GOT pointer in r31,
+	bl   _GLOBAL_OFFSET_TABLE_-4@local
 	mflr 31
- #  the address of _start in r30
-	mr 30,3
- #  &_dl_argc in 29, &_dl_argv in 27, and _dl_default_scope in 28
-	lwz 28,_dl_default_scope@got(31)
-	lwz 29,_dl_argc@got(31)
-	lwz 27,_dl_argv@got(31)
+ #  the address of _start in r30,
+	mr   30,3
+ #  &_dl_argc in 29, &_dl_argv in 27, and _dl_default_scope in 28.
+	lwz  28,_dl_default_scope@got(31)
+	lwz  29,_dl_argc@got(31)
+	lwz  27,_dl_argv@got(31)
 0:
- #  call initfunc = _dl_init_next(_dl_default_scope[2])
-	lwz 3,8(28)
-	bl _dl_init_next@plt
- # if initfunc is NULL, we exit the loop
-	mr. 0,3
-	beq 1f
+ #  Set initfunc = _dl_init_next(_dl_default_scope[2])
+	lwz  3,8(28)
+	bl   _dl_init_next@plt
+ # If initfunc is NULL, we exit the loop; otherwise,
+	cmpwi 3,0
+	beq  1f
  # call initfunc(_dl_argc, _dl_argv, _dl_argv+_dl_argc+1)
-	mtlr 0
-	lwz 3,0(29)
-	lwz 4,0(27)
+	mtlr 3
+	lwz  3,0(29)
+	lwz  4,0(27)
 	slwi 5,3,2
-	add 5,4,5
+	add  5,4,5
 	addi 5,5,4
 	blrl
  # and loop.
-	b 0b
+	b    0b
 1:
  # Now, to conform to the ELF ABI, we have to:
- # pass argv (actually _dl_argv) in r4
-	lwz 4,0(27)
- # pass argc (actually _dl_argc) in r3
-	lwz 3,0(29)
- # pass envp (actually _dl_argv+_dl_argc+1) in r5
+ # Pass argc (actually _dl_argc) in r3;
+	lwz  3,0(29)
+ # pass argv (actually _dl_argv) in r4;
+	lwz  4,0(27)
+ # pass envp (actually _dl_argv+_dl_argc+1) in r5;
 	slwi 5,3,2
-	add 5,4,5
-	addi 5,5,4
- # pass the auxilary vector in r6. This is passed just after _envp.
-	addi 6,5,-4
+	add  6,4,5
+	addi 5,6,4
+ # pass the auxilary vector in r6. This is passed to us just after _envp.
 2:	lwzu 0,4(6)
-	cmpwi 1,0,0
-	bne 2b
+	cmpwi 0,0,0
+	bne  2b
 	addi 6,6,4
- # pass a termination function pointer (in this case _dl_fini) in r7
-	lwz 7,_dl_fini@got(31)
- # now, call the start function in r30...
+ # Pass a termination function pointer (in this case _dl_fini) in r7.
+	lwz  7,_dl_fini@got(31)
+ # Now, call the start function in r30...
 	mtctr 30
- # pass the stack pointer in r1 (so far so good), pointing to a NULL value
- # (this lets our startup code distinguish between a program linked statically,
+	lwz  26,_dl_starting_up@got(31)
+ # Pass the stack pointer in r1 (so far so good), pointing to a NULL value.
+ # (This lets our startup code distinguish between a program linked statically,
  # which linux will call with argc on top of the stack which will hopefully
  # never be zero, and a dynamically linked program which will always have
  # a NULL on the top of the stack).
  # Take the opportunity to clear LR, so anyone who accidentally returns
- # from _start gets SEGV.
-	li 0,0
-	stw 0,0(1)
-	mtlr 0
- # and also clear _dl_starting_up
-	lwz 26,_dl_starting_up@got(31)
-	stw 0,0(26)
- # go do it!
+ # from _start gets SEGV.  Also clear the next few words of the stack.
+	li   31,0
+	stw  31,0(1)
+	mtlr 31
+	stw  31,4(1)
+ 	stw  31,8(1)
+	stw  31,12(1)
+ # Clear _dl_starting_up.
+	stw  31,0(26)
+ # Go do it!
 	bctr
 0:
 	.size	 _start,0b-_start
- # undo '.section text'.
+ # Undo '.section text'.
 	.previous
 ");
 
@@ -346,7 +350,7 @@ static ElfW(Addr) _dl_preferred_address = 1
 
 /* We require the address of the PLT entry returned from fixup, not
    the first word of the PLT entry. */
-#define ELF_FIXUP_RETURN_VALUE(map, result)  (&(result))
+#define ELF_FIXUP_RETURN_VALUE(map, result)  ((Elf32_Addr) &(result))
 
 /* Nonzero iff TYPE should not be allowed to resolve to one of
    the main executable's symbols, as for a COPY reloc.  */
@@ -396,7 +400,7 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
 {
   if (map->l_info[DT_JMPREL])
     {
-      int i;
+      Elf32_Word i;
       /* Fill in the PLT. Its initial contents are directed to a
 	 function earlier in the PLT which arranges for the dynamic
 	 linker to be called back.  */
@@ -516,10 +520,10 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 {
 #ifndef RTLD_BOOTSTRAP
   const Elf32_Sym *const refsym = sym;
+  extern char **_dl_argv;
 #endif
   Elf32_Word loadbase, finaladdr;
   const int rinfo = ELF32_R_TYPE (reloc->r_info);
-  extern char **_dl_argv;
 
   if (rinfo == R_PPC_NONE)
     return;
@@ -551,9 +555,9 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 		     + reloc->r_addend);
     }
 
-  /* This is an if/else if chain because GCC 2.7.2.[012] turns case
-     statements into non-PIC table lookups.  When a later version
-     comes out that fixes this, this should be changed.  */
+  /* This is still an if/else if chain because GCC uses the GOT to find
+     the table for table-based switch statements, and we haven't set it
+     up yet.  */
   if (rinfo == R_PPC_UADDR32 ||
       rinfo == R_PPC_GLOB_DAT ||
       rinfo == R_PPC_ADDR32 ||
@@ -561,6 +565,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
     {
       *reloc_addr = finaladdr;
     }
+#ifndef RTLD_BOOTSTRAP
   else if (rinfo == R_PPC_ADDR16_LO)
     {
       *(Elf32_Half*) reloc_addr = finaladdr;
@@ -573,7 +578,6 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
     {
       *(Elf32_Half*) reloc_addr = (finaladdr + 0x8000) >> 16;
     }
-#ifndef RTLD_BOOTSTRAP
   else if (rinfo == R_PPC_REL24)
     {
       Elf32_Sword delta = finaladdr - (Elf32_Word) (char *) reloc_addr;
@@ -693,12 +697,14 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 #endif
     }
 
+#ifndef RTLD_BOOTSTRAP
   if (rinfo == R_PPC_ADDR16_LO ||
       rinfo == R_PPC_ADDR16_HI ||
       rinfo == R_PPC_ADDR16_HA ||
       rinfo == R_PPC_REL24 ||
       rinfo == R_PPC_ADDR24)
     MODIFIED_CODE_NOQUEUE (reloc_addr);
+#endif
 }
 
 #define ELF_MACHINE_NO_REL 1
diff --git a/sysdeps/powerpc/lshift.S b/sysdeps/powerpc/lshift.S
new file mode 100644
index 0000000000..b1487a1c17
--- /dev/null
+++ b/sysdeps/powerpc/lshift.S
@@ -0,0 +1,123 @@
+/* Shift a limb left, low level routine.
+   Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize,
+  			 unsigned int cnt)  */
+
+EALIGN(__mpn_lshift,3,0)
+	mtctr	%r5		# copy size into CTR
+	cmplwi	%cr0,%r5,16	# is size < 16
+	slwi	%r0,%r5,2
+	add	%r7,%r3,%r0	# make r7 point at end of res
+	add	%r4,%r4,%r0	# make r4 point at end of s1
+	lwzu	%r11,-4(%r4)	# load first s1 limb
+	subfic	%r8,%r6,32
+	srw	%r3,%r11,%r8	# compute function return value
+	bge	%cr0,L(big)	# branch if size >= 16
+
+	bdz	L(end1)
+
+0:	lwzu	%r10,-4(%r4)
+	slw	%r9,%r11,%r6
+	srw	%r12,%r10,%r8
+	or	%r9,%r9,%r12
+	stwu	%r9,-4(%r7)
+	bdz	L(end2)
+	lwzu	%r11,-4(%r4)
+	slw	%r9,%r10,%r6
+	srw	%r12,%r11,%r8
+	or	%r9,%r9,%r12
+	stwu	%r9,-4(%r7)
+	bdnz	0b
+
+L(end1):slw	%r0,%r11,%r6
+	stw	%r0,-4(%r7)
+	blr
+
+
+/* Guaranteed not to succeed.  */
+L(boom): tweq    %r0,%r0
+
+/* We imitate a case statement, by using (yuk!) fixed-length code chunks,
+   of size 4*12 bytes.  We have to do this (or something) to make this PIC.  */
+L(big):	mflr    %r9
+	bltl-   %cr0,L(boom)	# Never taken, only used to set LR.
+	slwi    %r10,%r6,4
+	mflr    %r12
+	add     %r10,%r12,%r10
+	slwi	%r8,%r6,5
+	add     %r10,%r8,%r10
+	mtctr   %r10
+	addi	%r5,%r5,-1
+	mtlr    %r9
+	bctr
+
+L(end2):slw	%r0,%r10,%r6
+	stw	%r0,-4(%r7)
+	blr
+
+#define DO_LSHIFT(n) \
+	mtctr	%r5;							\
+0:	lwzu	%r10,-4(%r4);						\
+	slwi	%r9,%r11,n;						\
+	inslwi	%r9,%r10,n,32-n;					\
+	stwu	%r9,-4(%r7);						\
+	bdz-	L(end2);						\
+	lwzu	%r11,-4(%r4);						\
+	slwi	%r9,%r10,n;						\
+	inslwi	%r9,%r11,n,32-n;					\
+	stwu	%r9,-4(%r7);						\
+	bdnz	0b;							\
+	b	L(end1)
+
+	DO_LSHIFT(1)
+	DO_LSHIFT(2)
+	DO_LSHIFT(3)
+	DO_LSHIFT(4)
+	DO_LSHIFT(5)
+	DO_LSHIFT(6)
+	DO_LSHIFT(7)
+	DO_LSHIFT(8)
+	DO_LSHIFT(9)
+	DO_LSHIFT(10)
+	DO_LSHIFT(11)
+	DO_LSHIFT(12)
+	DO_LSHIFT(13)
+	DO_LSHIFT(14)
+	DO_LSHIFT(15)
+	DO_LSHIFT(16)
+	DO_LSHIFT(17)
+	DO_LSHIFT(18)
+	DO_LSHIFT(19)
+	DO_LSHIFT(20)
+	DO_LSHIFT(21)
+	DO_LSHIFT(22)
+	DO_LSHIFT(23)
+	DO_LSHIFT(24)
+	DO_LSHIFT(25)
+	DO_LSHIFT(26)
+	DO_LSHIFT(27)
+	DO_LSHIFT(28)
+	DO_LSHIFT(29)
+	DO_LSHIFT(30)
+	DO_LSHIFT(31)
+
+END(__mpn_lshift)
diff --git a/sysdeps/powerpc/lshift.s b/sysdeps/powerpc/lshift.s
deleted file mode 100644
index 9612a3dbec..0000000000
--- a/sysdeps/powerpc/lshift.s
+++ /dev/null
@@ -1,479 +0,0 @@
- # Shift a limb left, low level routine.
- # Copyright (C) 1996, 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize,
- #			 unsigned int cnt)
-
-	.align 3
-	.globl __mpn_lshift
-	.type	 __mpn_lshift,@function
-__mpn_lshift:
-	mtctr	%r5		# copy size into CTR
-	cmplwi	%cr0,%r5,16	# is size < 16
-	slwi	%r0,%r5,2
-	add	%r7,%r3,%r0	# make r7 point at end of res
-	add	%r4,%r4,%r0	# make r4 point at end of s1
-	lwzu	%r11,-4(%r4)	# load first s1 limb
-	subfic	%r8,%r6,32
-	srw	%r3,%r11,%r8	# compute function return value
-	bge	%cr0,Lbig	# branch if size >= 16
-
-	bdz	Lend1
-
-Loop:	lwzu	%r10,-4(%r4)
-	slw	%r9,%r11,%r6
-	srw	%r12,%r10,%r8
-	or	%r9,%r9,%r12
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slw	%r9,%r10,%r6
-	srw	%r12,%r11,%r8
-	or	%r9,%r9,%r12
-	stwu	%r9,-4(%r7)
-	bdnz	Loop
-	b	Lend1
-
- # Guaranteed not to succeed.
-LBoom:	tweq    %r0,%r0
-
- # We imitate a case statement, by using (yuk!) fixed-length code chunks,
- # of size 4*12 bytes.  We have to do this (or something) to make this PIC.
-Lbig:	mflr    %r9
-	bltl    %cr0,LBoom      # Never taken, only used to set LR.
-	slwi    %r10,%r6,4
-	mflr    %r12
-	add     %r10,%r12,%r10
-	slwi	%r8,%r6,5
-	add     %r10,%r8,%r10
-	mtctr   %r10
-	addi	%r5,%r5,-1
-	mtlr    %r9
-	bctr
-
-Lend1:	slw	%r0,%r11,%r6
-	stw	%r0,-4(%r7)
-	blr
-
-	mtctr	%r5
-Loop1:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,1
-	inslwi	%r9,%r10,1,31
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,1
-	inslwi	%r9,%r11,1,31
-	stwu	%r9,-4(%r7)
-	bdnz	Loop1
-	b	Lend1
-
-	mtctr	%r5
-Loop2:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,2
-	inslwi	%r9,%r10,2,30
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,2
-	inslwi	%r9,%r11,2,30
-	stwu	%r9,-4(%r7)
-	bdnz	Loop2
-	b	Lend1
-
-	mtctr	%r5
-Loop3:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,3
-	inslwi	%r9,%r10,3,29
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,3
-	inslwi	%r9,%r11,3,29
-	stwu	%r9,-4(%r7)
-	bdnz	Loop3
-	b	Lend1
-
-	mtctr	%r5
-Loop4:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,4
-	inslwi	%r9,%r10,4,28
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,4
-	inslwi	%r9,%r11,4,28
-	stwu	%r9,-4(%r7)
-	bdnz	Loop4
-	b	Lend1
-
-	mtctr	%r5
-Loop5:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,5
-	inslwi	%r9,%r10,5,27
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,5
-	inslwi	%r9,%r11,5,27
-	stwu	%r9,-4(%r7)
-	bdnz	Loop5
-	b	Lend1
-
-	mtctr	%r5
-Loop6:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,6
-	inslwi	%r9,%r10,6,26
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,6
-	inslwi	%r9,%r11,6,26
-	stwu	%r9,-4(%r7)
-	bdnz	Loop6
-	b	Lend1
-
-	mtctr	%r5
-Loop7:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,7
-	inslwi	%r9,%r10,7,25
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,7
-	inslwi	%r9,%r11,7,25
-	stwu	%r9,-4(%r7)
-	bdnz	Loop7
-	b	Lend1
-
-	mtctr	%r5
-Loop8:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,8
-	inslwi	%r9,%r10,8,24
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,8
-	inslwi	%r9,%r11,8,24
-	stwu	%r9,-4(%r7)
-	bdnz	Loop8
-	b	Lend1
-
-	mtctr	%r5
-Loop9:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,9
-	inslwi	%r9,%r10,9,23
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,9
-	inslwi	%r9,%r11,9,23
-	stwu	%r9,-4(%r7)
-	bdnz	Loop9
-	b	Lend1
-
-	mtctr	%r5
-Loop10:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,10
-	inslwi	%r9,%r10,10,22
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,10
-	inslwi	%r9,%r11,10,22
-	stwu	%r9,-4(%r7)
-	bdnz	Loop10
-	b	Lend1
-
-	mtctr	%r5
-Loop11:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,11
-	inslwi	%r9,%r10,11,21
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,11
-	inslwi	%r9,%r11,11,21
-	stwu	%r9,-4(%r7)
-	bdnz	Loop11
-	b	Lend1
-
-	mtctr	%r5
-Loop12:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,12
-	inslwi	%r9,%r10,12,20
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,12
-	inslwi	%r9,%r11,12,20
-	stwu	%r9,-4(%r7)
-	bdnz	Loop12
-	b	Lend1
-
-	mtctr	%r5
-Loop13:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,13
-	inslwi	%r9,%r10,13,19
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,13
-	inslwi	%r9,%r11,13,19
-	stwu	%r9,-4(%r7)
-	bdnz	Loop13
-	b	Lend1
-
-	mtctr	%r5
-Loop14:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,14
-	inslwi	%r9,%r10,14,18
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,14
-	inslwi	%r9,%r11,14,18
-	stwu	%r9,-4(%r7)
-	bdnz	Loop14
-	b	Lend1
-
-	mtctr	%r5
-Loop15:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,15
-	inslwi	%r9,%r10,15,17
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,15
-	inslwi	%r9,%r11,15,17
-	stwu	%r9,-4(%r7)
-	bdnz	Loop15
-	b	Lend1
-
-	mtctr	%r5
-Loop16:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,16
-	inslwi	%r9,%r10,16,16
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,16
-	inslwi	%r9,%r11,16,16
-	stwu	%r9,-4(%r7)
-	bdnz	Loop16
-	b	Lend1
-
-	mtctr	%r5
-Loop17:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,17
-	inslwi	%r9,%r10,17,15
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,17
-	inslwi	%r9,%r11,17,15
-	stwu	%r9,-4(%r7)
-	bdnz	Loop17
-	b	Lend1
-
-	mtctr	%r5
-Loop18:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,18
-	inslwi	%r9,%r10,18,14
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,18
-	inslwi	%r9,%r11,18,14
-	stwu	%r9,-4(%r7)
-	bdnz	Loop18
-	b	Lend1
-
-	mtctr	%r5
-Loop19:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,19
-	inslwi	%r9,%r10,19,13
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,19
-	inslwi	%r9,%r11,19,13
-	stwu	%r9,-4(%r7)
-	bdnz	Loop19
-	b	Lend1
-
-	mtctr	%r5
-Loop20:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,20
-	inslwi	%r9,%r10,20,12
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,20
-	inslwi	%r9,%r11,20,12
-	stwu	%r9,-4(%r7)
-	bdnz	Loop20
-	b	Lend1
-
-	mtctr	%r5
-Loop21:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,21
-	inslwi	%r9,%r10,21,11
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,21
-	inslwi	%r9,%r11,21,11
-	stwu	%r9,-4(%r7)
-	bdnz	Loop21
-	b	Lend1
-
-	mtctr	%r5
-Loop22:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,22
-	inslwi	%r9,%r10,22,10
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,22
-	inslwi	%r9,%r11,22,10
-	stwu	%r9,-4(%r7)
-	bdnz	Loop22
-	b	Lend1
-
-	mtctr	%r5
-Loop23:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,23
-	inslwi	%r9,%r10,23,9
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,23
-	inslwi	%r9,%r11,23,9
-	stwu	%r9,-4(%r7)
-	bdnz	Loop23
-	b	Lend1
-
-	mtctr	%r5
-Loop24:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,24
-	inslwi	%r9,%r10,24,8
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,24
-	inslwi	%r9,%r11,24,8
-	stwu	%r9,-4(%r7)
-	bdnz	Loop24
-	b	Lend1
-
-	mtctr	%r5
-Loop25:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,25
-	inslwi	%r9,%r10,25,7
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,25
-	inslwi	%r9,%r11,25,7
-	stwu	%r9,-4(%r7)
-	bdnz	Loop25
-	b	Lend1
-
-	mtctr	%r5
-Loop26:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,26
-	inslwi	%r9,%r10,26,6
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,26
-	inslwi	%r9,%r11,26,6
-	stwu	%r9,-4(%r7)
-	bdnz	Loop26
-	b	Lend1
-
-	mtctr	%r5
-Loop27:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,27
-	inslwi	%r9,%r10,27,5
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,27
-	inslwi	%r9,%r11,27,5
-	stwu	%r9,-4(%r7)
-	bdnz	Loop27
-	b	Lend1
-
-	mtctr	%r5
-Loop28:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,28
-	inslwi	%r9,%r10,28,4
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,28
-	inslwi	%r9,%r11,28,4
-	stwu	%r9,-4(%r7)
-	bdnz	Loop28
-	b	Lend1
-
-	mtctr	%r5
-Loop29:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,29
-	inslwi	%r9,%r10,29,3
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,29
-	inslwi	%r9,%r11,29,3
-	stwu	%r9,-4(%r7)
-	bdnz	Loop29
-	b	Lend1
-
-	mtctr	%r5
-Loop30:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,30
-	inslwi	%r9,%r10,30,2
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,30
-	inslwi	%r9,%r11,30,2
-	stwu	%r9,-4(%r7)
-	bdnz	Loop30
-	b	Lend1
-
-	mtctr	%r5
-Loop31:	lwzu	%r10,-4(%r4)
-	slwi	%r9,%r11,31
-	inslwi	%r9,%r10,31,1
-	stwu	%r9,-4(%r7)
-	bdz	Lend2
-	lwzu	%r11,-4(%r4)
-	slwi	%r9,%r10,31
-	inslwi	%r9,%r11,31,1
-	stwu	%r9,-4(%r7)
-	bdnz	Loop31
-	b	Lend1
-
-Lend2:	slw	%r0,%r10,%r6
-	stw	%r0,-4(%r7)
-	blr
diff --git a/sysdeps/powerpc/machine-gmon.h b/sysdeps/powerpc/machine-gmon.h
new file mode 100644
index 0000000000..ba53807308
--- /dev/null
+++ b/sysdeps/powerpc/machine-gmon.h
@@ -0,0 +1,32 @@
+/* PowerPC-specific implementation of profiling support.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* We need a special version of the `mcount' function because it has
+   to preserve more registers than your usual function.  */
+
+void __mcount_internal (unsigned long frompc, unsigned long selfpc);
+
+#define _MCOUNT_DECL(frompc, selfpc) \
+void __mcount_internal (unsigned long frompc, unsigned long selfpc)
+
+
+/* Define MCOUNT as empty since we have the implementation in another
+   file.  */
+#define MCOUNT
diff --git a/sysdeps/powerpc/memset.S b/sysdeps/powerpc/memset.S
new file mode 100644
index 0000000000..6ac32ddc99
--- /dev/null
+++ b/sysdeps/powerpc/memset.S
@@ -0,0 +1,199 @@
+/* Optimized memset implementation for PowerPC.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+EALIGN(memset,5,1)
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+   Returns 's'.
+
+   The memset is done in three sizes: byte (8 bits), word (32 bits),
+   cache line (256 bits). There is a special case for setting cache lines
+   to 0, to take advantage of the dcbz instruction.
+   r6:	current address we are storing at
+   r7:	number of bytes we are setting now (when aligning)  */
+
+/* take care of case for size <= 4  */
+	cmplwi %cr1,%r5,4
+	andi.  %r7,%r3,3
+	mr     %r6,%r3
+	ble-   %cr1,L(small)
+/* align to word boundary  */
+	cmplwi %cr5,%r5,31
+	rlwimi %r4,%r4,8,16,23
+	beq+   L(aligned)		# 8th instruction from .align
+	mtcrf  0x01,%r3
+	subfic %r7,%r7,4
+	add    %r6,%r6,%r7
+	sub    %r5,%r5,%r7
+	bf+    31,0f
+	stb    %r4,0(%r3)
+	bt     30,L(aligned)
+0:	sth    %r4,-2(%r6)		#  16th instruction from .align
+/* take care of case for size < 31 */
+L(aligned):
+	mtcrf  0x01,%r5
+	rlwimi %r4,%r4,16,0,15
+	ble    %cr5,L(medium)
+/* align to cache line boundary...  */
+	andi.  %r7,%r6,0x1C
+	subfic %r7,%r7,0x20
+	beq    L(caligned)
+	mtcrf  0x01,%r7
+	add    %r6,%r6,%r7
+	sub    %r5,%r5,%r7
+	cmplwi %cr1,%r7,0x10
+	mr     %r8,%r6
+	bf     28,1f
+	stw    %r4,-4(%r8)
+	stwu   %r4,-8(%r8)
+1:	blt    %cr1,2f
+	stw    %r4,-4(%r8)	# 32nd instruction from .align
+	stw    %r4,-8(%r8)
+	stw    %r4,-12(%r8)
+	stwu   %r4,-16(%r8)
+2:	bf     29,L(caligned)
+	stw    %r4,-4(%r8)
+/* now aligned to a cache line.  */
+L(caligned):
+	cmplwi %cr1,%r4,0
+	clrrwi. %r7,%r5,5
+	mtcrf  0x01,%r5		# 40th instruction from .align
+	beq    %cr1,L(zloopstart) # special case for clearing memory using dcbz
+	srwi   %r0,%r7,5
+	mtctr  %r0
+	beq    L(medium)	# we may not actually get to do a full line
+	clrlwi. %r5,%r5,27
+	add    %r6,%r6,%r7
+0:	li     %r8,-0x40
+	bdz    L(cloopdone)	# 48th instruction from .align
+
+3:	dcbz   %r8,%r6
+	stw    %r4,-4(%r6)
+	stw    %r4,-8(%r6)
+	stw    %r4,-12(%r6)
+	stw    %r4,-16(%r6)
+	nop			# let 601 fetch last 4 instructions of loop
+	stw    %r4,-20(%r6)
+	stw    %r4,-24(%r6)	# 56th instruction from .align
+	nop			# let 601 fetch first 8 instructions of loop
+	stw    %r4,-28(%r6)
+	stwu   %r4,-32(%r6)
+	bdnz   3b
+L(cloopdone):
+	stw    %r4,-4(%r6)
+	stw    %r4,-8(%r6)
+	stw    %r4,-12(%r6)
+	stw    %r4,-16(%r6)	# 64th instruction from .align
+	stw    %r4,-20(%r6)
+	cmplwi %cr1,%r5,16
+	stw    %r4,-24(%r6)
+	stw    %r4,-28(%r6)
+	stwu   %r4,-32(%r6)
+	beqlr
+	add    %r6,%r6,%r7
+	b      L(medium_tail2)	# 72nd instruction from .align
+
+	.align 5
+	nop
+/* Clear lines of memory in 128-byte chunks.  */
+L(zloopstart):
+	clrlwi %r5,%r5,27
+	mtcrf  0x02,%r7
+	srwi.  %r0,%r7,7
+	mtctr  %r0
+	li     %r7,0x20
+	li     %r8,-0x40
+	cmplwi %cr1,%r5,16	# 8
+	bf     26,0f
+	dcbz   0,%r6
+	addi   %r6,%r6,0x20
+0:	li     %r9,-0x20
+	bf     25,1f
+	dcbz   0,%r6
+	dcbz   %r7,%r6
+	addi   %r6,%r6,0x40	# 16
+1:	cmplwi %cr5,%r5,0
+	beq    L(medium)
+L(zloop):
+	dcbz   0,%r6
+	dcbz   %r7,%r6
+	addi   %r6,%r6,0x80
+	dcbz   %r8,%r6
+	dcbz   %r9,%r6
+	bdnz   L(zloop)
+	beqlr  %cr5
+	b      L(medium_tail2)
+
+	.align 5
+L(small):
+/* Memset of 4 bytes or less.  */
+	cmplwi %cr5,%r5,1
+	cmplwi %cr1,%r5,3
+	bltlr  %cr5
+	stb    %r4,0(%r6)
+	beqlr  %cr5
+	nop
+	stb    %r4,1(%r6)
+	bltlr  %cr1
+	stb    %r4,2(%r6)
+	beqlr  %cr1
+	nop
+	stb    %r4,3(%r6)
+	blr
+
+/* Memset of 0-31 bytes.  */
+	.align 5
+L(medium):
+	cmplwi %cr1,%r5,16
+L(medium_tail2):
+	add    %r6,%r6,%r5
+L(medium_tail):
+	bt-    31,L(medium_31t)
+	bt-    30,L(medium_30t)
+L(medium_30f):
+	bt-    29,L(medium_29t)
+L(medium_29f):
+	bge-   %cr1,L(medium_27t)
+	bflr-  28
+	stw    %r4,-4(%r6)		# 8th instruction from .align
+	stw    %r4,-8(%r6)
+	blr
+
+L(medium_31t):
+	stbu   %r4,-1(%r6)
+	bf-    30,L(medium_30f)
+L(medium_30t):
+	sthu   %r4,-2(%r6)
+	bf-    29,L(medium_29f)
+L(medium_29t):
+	stwu   %r4,-4(%r6)
+	blt-   %cr1,L(medium_27f)	# 16th instruction from .align
+L(medium_27t):
+	stw    %r4,-4(%r6)
+	stw    %r4,-8(%r6)
+	stw    %r4,-12(%r6)
+	stwu   %r4,-16(%r6)
+L(medium_27f):
+	bflr-  28
+L(medium_28t):
+	stw    %r4,-4(%r6)
+	stw    %r4,-8(%r6)
+	blr
+END(memset)
diff --git a/sysdeps/powerpc/memset.s b/sysdeps/powerpc/memset.s
deleted file mode 100644
index 4c8bf8c6b4..0000000000
--- a/sysdeps/powerpc/memset.s
+++ /dev/null
@@ -1,202 +0,0 @@
- # Optimized memset implementation for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
-	.section ".text"
-	.align 5
-	nop
-	
-	.globl memset
-	.type memset,@function
-memset:	
- # __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
- # Returns 's'.
-
- # The memset is done in three sizes: byte (8 bits), word (32 bits),
- # cache line (256 bits). There is a special case for setting cache lines
- # to 0, to take advantage of the dcbz instruction.
- # r6:	current address we are storing at
- # r7:	number of bytes we are setting now (when aligning)
-
- # take care of case for size <= 4
-	cmplwi %cr1,%r5,4	
-	andi.  %r7,%r3,3
-	mr     %r6,%r3
-	ble-   %cr1,small
- # align to word boundary
-	cmplwi %cr5,%r5,31
-	rlwimi %r4,%r4,8,16,23
-	beq+   aligned			# 8th instruction from .align
-	mtcrf  0x01,%r3
-	subfic %r7,%r7,4
-	add    %r6,%r6,%r7
-	sub    %r5,%r5,%r7
-	bf+    31,0f
-	stb    %r4,0(%r3)
-	bt     30,aligned
-0:	sth    %r4,-2(%r6)		#  16th instruction from .align
- # take care of case for size < 31
-aligned:
-	mtcrf  0x01,%r5
-	rlwimi %r4,%r4,16,0,15
-	ble    %cr5,medium
- # align to cache line boundary...
-	andi.  %r7,%r6,0x1C
-	subfic %r7,%r7,0x20
-	beq    caligned
-	mtcrf  0x01,%r7
-	add    %r6,%r6,%r7
-	sub    %r5,%r5,%r7
-	cmplwi %cr1,%r7,0x10
-	mr     %r8,%r6
-	bf     28,1f
-	stw    %r4,-4(%r8)
-	stwu   %r4,-8(%r8)
-1:	blt    %cr1,2f
-	stw    %r4,-4(%r8)	# 32nd instruction from .align
-	stw    %r4,-8(%r8)
-	stw    %r4,-12(%r8)
-	stwu   %r4,-16(%r8)
-2:	bf     29,caligned
-	stw    %r4,-4(%r8)
- # now aligned to a cache line.
-caligned:
-	cmplwi %cr1,%r4,0
-	clrrwi. %r7,%r5,5
-	mtcrf  0x01,%r5		# 40th instruction from .align
-	beq    %cr1,zloopstart	# special case for clearing memory using dcbz
-	srwi   %r0,%r7,5
-	mtctr  %r0
-	beq    medium		# we may not actually get to do a full line
-	clrlwi. %r5,%r5,27
-	add    %r6,%r6,%r7
-0:	li     %r8,-0x40
-	bdz    cloopdone	# 48th instruction from .align
-	
-cloop:	dcbz   %r8,%r6
-	stw    %r4,-4(%r6)
-	stw    %r4,-8(%r6)
-	stw    %r4,-12(%r6)
-	stw    %r4,-16(%r6)
-	nop			# let 601 fetch last 4 instructions of loop
-	stw    %r4,-20(%r6)
-	stw    %r4,-24(%r6)	# 56th instruction from .align
-	nop			# let 601 fetch first 8 instructions of loop
-	stw    %r4,-28(%r6)
-	stwu   %r4,-32(%r6)
-	bdnz   cloop
-cloopdone:
-	stw    %r4,-4(%r6)
-	stw    %r4,-8(%r6)
-	stw    %r4,-12(%r6)
-	stw    %r4,-16(%r6)	# 64th instruction from .align
-	stw    %r4,-20(%r6)
-	cmplwi %cr1,%r5,16
-	stw    %r4,-24(%r6)
-	stw    %r4,-28(%r6)
-	stwu   %r4,-32(%r6)
-	beqlr
-	add    %r6,%r6,%r7
-	b      medium_tail2	# 72nd instruction from .align
-
-	.align 5
-	nop
-# clear lines of memory in 128-byte chunks.
-zloopstart:
-	clrlwi %r5,%r5,27
-	mtcrf  0x02,%r7
-	srwi.  %r0,%r7,7
-	mtctr  %r0
-	li     %r7,0x20
-	li     %r8,-0x40
-	cmplwi %cr1,%r5,16	# 8
-	bf     26,0f	
-	dcbz   0,%r6
-	addi   %r6,%r6,0x20
-0:	li     %r9,-0x20
-	bf     25,1f
-	dcbz   0,%r6
-	dcbz   %r7,%r6
-	addi   %r6,%r6,0x40	# 16
-1:	cmplwi %cr5,%r5,0
-	beq    medium
-zloop:	
-	dcbz   0,%r6
-	dcbz   %r7,%r6
-	addi   %r6,%r6,0x80
-	dcbz   %r8,%r6
-	dcbz   %r9,%r6
-	bdnz   zloop
-	beqlr  %cr5
-	b      medium_tail2
-	
-	.align 5	
-small:
- # Memset of 4 bytes or less.
-	cmplwi %cr5,%r5,1
-	cmplwi %cr1,%r5,3
-	bltlr  %cr5
-	stb    %r4,0(%r6)
-	beqlr  %cr5
-	nop
-	stb    %r4,1(%r6)
-	bltlr  %cr1
-	stb    %r4,2(%r6)
-	beqlr  %cr1
-	nop
-	stb    %r4,3(%r6)
-	blr
-
-# memset of 0-31 bytes
-	.align 5
-medium:
-	cmplwi %cr1,%r5,16
-medium_tail2:
-	add    %r6,%r6,%r5
-medium_tail:
-	bt-    31,medium_31t
-	bt-    30,medium_30t
-medium_30f:
-	bt-    29,medium_29t
-medium_29f:
-	bge-   %cr1,medium_27t
-	bflr-  28
-	stw    %r4,-4(%r6)	# 8th instruction from .align
-	stw    %r4,-8(%r6)
-	blr
-
-medium_31t:	
-	stbu   %r4,-1(%r6)
-	bf-    30,medium_30f
-medium_30t:
-	sthu   %r4,-2(%r6)
-	bf-    29,medium_29f
-medium_29t:
-	stwu   %r4,-4(%r6)
-	blt-   %cr1,medium_27f	# 16th instruction from .align
-medium_27t:
-	stw    %r4,-4(%r6)
-	stw    %r4,-8(%r6)
-	stw    %r4,-12(%r6)
-	stwu   %r4,-16(%r6)
-medium_27f:
-	bflr-  28
-medium_28t:
-	stw    %r4,-4(%r6)
-	stw    %r4,-8(%r6)
-	blr
diff --git a/sysdeps/powerpc/mul_1.S b/sysdeps/powerpc/mul_1.S
new file mode 100644
index 0000000000..d48bd8fa19
--- /dev/null
+++ b/sysdeps/powerpc/mul_1.S
@@ -0,0 +1,46 @@
+/* Multiply a limb vector by a limb, for PowerPC.
+   Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+                        mp_size_t s1_size, mp_limb_t s2_limb)
+   Calculate s1*s2 and put result in res_ptr; return carry.  */
+
+ENTRY(__mpn_mul_1)
+	mtctr	%r5
+
+	lwz	%r0,0(%r4)
+	mullw	%r7,%r0,%r6
+	mulhwu	%r10,%r0,%r6
+	addi	%r3,%r3,-4		# adjust res_ptr
+	addic	%r5,%r5,0		# clear cy with dummy insn
+	bdz	1f
+
+0:	lwzu	%r0,4(%r4)
+	stwu	%r7,4(%r3)
+	mullw	%r8,%r0,%r6
+	adde	%r7,%r8,%r10
+	mulhwu	%r10,%r0,%r6
+	bdnz	0b
+
+1:	stw	%r7,4(%r3)
+	addze	%r3,%r10
+	blr
+END(__mpn_mul_1)
diff --git a/sysdeps/powerpc/mul_1.s b/sysdeps/powerpc/mul_1.s
deleted file mode 100644
index d6eb623bd4..0000000000
--- a/sysdeps/powerpc/mul_1.s
+++ /dev/null
@@ -1,47 +0,0 @@
- # Multiply a limb vector by a limb, for PowerPC.
- # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
- #                      mp_size_t s1_size, mp_limb_t s2_limb)
- # Calculate s1*s2 and put result in res_ptr; return carry.
-
-	.align 2
-	.globl __mpn_mul_1
-	.type	 __mpn_mul_1,@function
-
-__mpn_mul_1:
-	mtctr	%r5
-
-	lwz	%r0,0(%r4)
-	mullw	%r7,%r0,%r6
-	mulhwu	%r10,%r0,%r6
-	addi	%r3,%r3,-4		# adjust res_ptr
-	addic	%r5,%r5,0		# clear cy with dummy insn
-	bdz	Lend
-
-Loop:	lwzu	%r0,4(%r4)
-	stwu	%r7,4(%r3)
-	mullw	%r8,%r0,%r6
-	adde	%r7,%r8,%r10
-	mulhwu	%r10,%r0,%r6
-	bdnz	Loop
-
-Lend:	stw	%r7,4(%r3)
-	addze	%r3,%r10
-	blr
diff --git a/sysdeps/powerpc/ppc-mcount.S b/sysdeps/powerpc/ppc-mcount.S
new file mode 100644
index 0000000000..06f1fcda12
--- /dev/null
+++ b/sysdeps/powerpc/ppc-mcount.S
@@ -0,0 +1,84 @@
+/* PowerPC-specific implementation of profiling support.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.	*/
+
+/* This would be bad.  */
+#ifdef PROF
+#undef PROF
+#endif
+
+#include <sysdep.h>
+
+/* We do profiling as described in the SYSV ELF ABI, _mcount is called
+   with the address of a data word in r0 (that is different for every
+   routine, initialised to 0, and otherwise unused).  The caller has put
+   the address the caller will return to in the usual place on the stack,
+   4(%r1).  _mcount is responsible for ensuring that when it returns no
+   argument-passing registers are disturbed, and that the LR is set back
+   to (what the caller sees as) 4(%r1).
+
+   This is intended so that the following code can be inserted at the
+   front of any routine without changing the routine:
+
+	.data
+	.align	2
+   0:	.long	0
+	.previous
+	mflr	%r0
+	lis	%r11,0b@ha
+	stw	%r0,4(%r1)
+	addi	%r0,%r11,0b@l
+	bl	_mcount
+*/
+
+ENTRY(_mcount)
+	stwu	%r1,-48(%r1)
+/* We need to save the parameter-passing registers.  */
+	stw	%r3, 12(%r1)
+	stw	%r4, 16(%r1)
+	stw	%r5, 20(%r1)
+	stw	%r6, 24(%r1)
+	mflr	%r4
+	lwz	%r3, 52(%r1)
+	mfcr	%r5
+	stw	%r7, 28(%r1)
+	stw	%r8, 32(%r1)
+	stw	%r9, 36(%r1)
+	stw	%r10,40(%r1)
+	stw	%r4, 44(%r1)
+	stw	%r5,  8(%r1)
+	bl	JUMPTARGET(__mcount_internal)
+ /* Restore the registers...  */
+	lwz     %r6,  8(%r1)
+	lwz	%r0, 44(%r1)
+	lwz	%r3, 12(%r1)
+	mtctr	%r0
+	lwz	%r4, 16(%r1)
+	mtcrf	0xff,%r6
+	lwz	%r5, 20(%r1)
+	lwz	%r6, 24(%r1)
+	lwz	%r0, 52(%r1)
+	lwz	%r7, 28(%r1)
+	lwz	%r8, 32(%r1)
+	mtlr	%r0
+	lwz	%r9, 36(%r1)
+	lwz	%r10,40(%r1)
+ /* ...unwind the stack frame, and return to your usual programming.  */
+	addi	%r1,%r1,48
+	bctr
+END(_mcount)
diff --git a/sysdeps/powerpc/rshift.S b/sysdeps/powerpc/rshift.S
new file mode 100644
index 0000000000..eb1f562bed
--- /dev/null
+++ b/sysdeps/powerpc/rshift.S
@@ -0,0 +1,56 @@
+/* Shift a limb right, low level routine.
+   Copyright (C) 1995, 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* INPUT PARAMETERS
+   res_ptr	r3
+   s1_ptr	r4
+   size		r5
+   cnt		r6  */
+
+ENTRY(__mpn_rshift)
+	mtctr	5		# copy size into CTR
+	addi	7,3,-4		# move adjusted res_ptr to free return reg
+	subfic	8,6,32
+	lwz	11,0(4)		# load first s1 limb
+	slw	3,11,8		# compute function return value
+	bdz	1f
+
+0:	lwzu	10,4(4)
+	srw	9,11,6
+	slw	12,10,8
+	or	9,9,12
+	stwu	9,4(7)
+	bdz	2f
+	lwzu	11,4(4)
+	srw	9,10,6
+	slw	12,11,8
+	or	9,9,12
+	stwu	9,4(7)
+	bdnz	0b
+
+1:	srw	0,11,6
+	stw	0,4(7)
+	blr
+
+2:	srw	0,10,6
+	stw	0,4(7)
+	blr
+END(__mpn_rshift)
diff --git a/sysdeps/powerpc/rshift.s b/sysdeps/powerpc/rshift.s
deleted file mode 100644
index 20f09ad86a..0000000000
--- a/sysdeps/powerpc/rshift.s
+++ /dev/null
@@ -1,59 +0,0 @@
-# PowerPC-32 __mpn_rshift --
-
-# Copyright (C) 1995 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
-# License for more details.
-
-# You should have received a copy of the GNU Library General Public License
-# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr	r3
-# s1_ptr	r4
-# size		r5
-# cnt		r6
-
-	.align 3
-	.globl __mpn_rshift
-	.type	 __mpn_rshift,@function
-__mpn_rshift:
-	mtctr	5		# copy size into CTR
-	addi	7,3,-4		# move adjusted res_ptr to free return reg
-	subfic	8,6,32
-	lwz	11,0(4)		# load first s1 limb
-	slw	3,11,8		# compute function return value
-	bdz	Lend1
-
-Loop:	lwzu	10,4(4)
-	srw	9,11,6
-	slw	12,10,8
-	or	9,9,12
-	stwu	9,4(7)
-	bdz	Lend2
-	lwzu	11,4(4)
-	srw	9,10,6
-	slw	12,11,8
-	or	9,9,12
-	stwu	9,4(7)
-	bdnz	Loop
-
-Lend1:	srw	0,11,6
-	stw	0,4(7)
-	blr
-
-Lend2:	srw	0,10,6
-	stw	0,4(7)
-	blr
diff --git a/sysdeps/powerpc/s_copysign.S b/sysdeps/powerpc/s_copysign.S
index adc7df226a..6d5ba82592 100644
--- a/sysdeps/powerpc/s_copysign.S
+++ b/sysdeps/powerpc/s_copysign.S
@@ -1,17 +1,17 @@
 /* Copy a sign bit between floating-point values.
    Copyright (C) 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-  
+
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
-  
+
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
-  
+
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
@@ -20,15 +20,12 @@
 /* This has been coded in assembler because GCC makes such a mess of it
    when it's coded in C.  */
 
-	.section ".text"
-	.align 2
-	.globl __copysign
-	.type __copysign,@function
-__copysign:	
+#include <sysdep.h>
+
+ENTRY(__copysign)
 /* double [f1] copysign (double [f1] x, double [f2] y);
    copysign(x,y) returns a value with the magnitude of x and
    with the sign bit of y.  */
-
 	stwu	%r1,-16(%r1)
 	stfd	%f2,8(%r1)
 	lwz	%r3,8(%r1)
@@ -39,22 +36,15 @@ __copysign:
 	blr
 0:	fnabs   %f1,%f1
 	blr
-0:
-	.size	 __copysign,0b-__copysign
-	
-	.globl copysign
-	.globl copysignf
-	.globl __copysignf
-	.weak copysign
-	.weak copysignf
-	.set copysign,__copysign
+	END (__copysign)
+
+weak_alias(__copysign,copysign)
+
 /* It turns out that it's safe to use this code even for single-precision.  */
-	.set __copysignf,__copysign
-	.set copysignf,__copysign
+weak_alias(__copysign,copysignf)
+strong_alias(__copysign,__copysignf)
+
 #ifdef NO_LONG_DOUBLE
-	.globl copysignl
-	.globl __copysignl
-	.weak copysignl
-	.set __copysignl,__copysign
-	.set copysignl,__copysign
+weak_alias(__copysign,copysignl)
+strong_alias(__copysign,__copysignl)
 #endif
diff --git a/sysdeps/powerpc/s_fabs.S b/sysdeps/powerpc/s_fabs.S
index a52733568d..3c6374b0aa 100644
--- a/sysdeps/powerpc/s_fabs.S
+++ b/sysdeps/powerpc/s_fabs.S
@@ -1,42 +1,37 @@
 /* Floating-point absolute value.  PowerPC version.
    Copyright (C) 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-  
+
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.
-  
+
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
-  
+
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If not,
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-	.section ".text"
-	.align 2
-	.globl __fabs
-	.type __fabs,@function
-__fabs:	
+#include <sysdep.h>
+
+ENTRY(__fabs)
 /* double [f1] fabs (double [f1] x); */
 	fabs %f1,%f1
 	blr
-0:
-	.size	 __fabs,0b-__fabs
+END(__fabs)
+
+weak_alias(__fabs,fabs)
 
-	.globl fabs,fabsf,__fabsf
-	.weak fabs,fabsf
-	.set fabs,__fabs
 /* It turns out that it's safe to use this code even for single-precision.  */
-	.set __fabsf,__fabs
-	.set fabsf,__fabs
+strong_alias(__fabs,__fabsf)
+weak_alias(__fabs,fabsf)
+
 #ifdef NO_LONG_DOUBLE
-	.globl fabsl,__fabsl
-	.weak fabsl
-	.set __fabsl,__fabs
-	.set fabsl,__fabs
+weak_alias(__fabs,__fabsl)
+weak_alias(__fabs,fabsl)
 #endif
diff --git a/sysdeps/powerpc/setjmp.S b/sysdeps/powerpc/setjmp.S
index ddfea7eed3..8fa863f161 100644
--- a/sysdeps/powerpc/setjmp.S
+++ b/sysdeps/powerpc/setjmp.S
@@ -62,9 +62,5 @@ ENTRY (__sigsetjmp)
 	stfd %f30,((JB_FPRS+16*2)*4)(3)
 	stw  %r31,((JB_GPRS+17)*4)(3)
 	stfd %f31,((JB_FPRS+17*2)*4)(3)
-#ifdef PIC
-	b __sigjmp_save@plt
-#else
-	b __sigjmp_save
-#endif
+	b JUMPTARGET(__sigjmp_save)
 END (__sigsetjmp)
diff --git a/sysdeps/powerpc/strchr.S b/sysdeps/powerpc/strchr.S
new file mode 100644
index 0000000000..156d4d155c
--- /dev/null
+++ b/sysdeps/powerpc/strchr.S
@@ -0,0 +1,111 @@
+/* Optimized strchr implementation for PowerPC.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* See strlen.s for comments on how this works.  */
+
+/* char * [r3] strchr (const char *s [r3] , int c [r4] )
+
+   r0:	a temporary
+   r3:	our return result.
+   r4:	byte we're looking for, spread over the whole word
+   r5:	the current word
+   r6:	the constant 0xfefefeff (-0x01010101)
+   r7:	the constant 0x7f7f7f7f
+   r8:	pointer to the current word.
+   r9:	a temporary
+   r10:	the number of bits we should ignore in the first word
+   r11:	a mask with the bits to ignore set to 0
+   r12:	a temporary  */
+ENTRY(strchr)
+	rlwimi %r4,%r4,8,16,23
+	li   %r11,-1
+	rlwimi %r4,%r4,16,0,15
+	lis  %r6,0xfeff
+	lis  %r7,0x7f7f
+	clrrwi %r8,%r3,2
+	addi %r7,%r7,0x7f7f
+	addi %r6,%r6,0xfffffeff
+	rlwinm %r10,%r3,3,27,28
+/* Test the first (partial?) word.  */
+	lwz  %r5,0(%r8)
+	srw  %r11,%r11,%r10
+	orc  %r5,%r5,%r11
+	add  %r0,%r6,%r5
+	nor  %r9,%r7,%r5
+	and. %r0,%r0,%r9
+	xor  %r12,%r4,%r5
+	orc  %r12,%r12,%r11
+	b    L(loopentry)
+
+/* The loop.  */
+
+L(loop):lwzu %r5,4(%r8)
+	and. %r0,%r0,%r9
+/* Test for 0.  */
+	add  %r0,%r6,%r5
+	nor  %r9,%r7,%r5
+	bne  L(foundit)
+	and. %r0,%r0,%r9
+/* Start test for the bytes we're looking for.  */
+	xor  %r12,%r4,%r5
+L(loopentry):
+	add  %r0,%r6,%r12
+	nor  %r9,%r7,%r12
+	beq  L(loop)
+/* There is a zero byte in the word, but may also be a matching byte (either
+   before or after the zero byte).  In fact, we may be looking for a
+   zero byte, in which case we return a match.  We guess that this hasn't
+   happened, though.  */
+L(missed):
+	and. %r0,%r0,%r9
+	li   %r3,0
+	beqlr
+/* It did happen. Decide which one was first...
+   I'm not sure if this is actually faster than a sequence of
+   rotates, compares, and branches (we use it anyway because it's shorter).  */
+	and  %r6,%r7,%r5
+	or   %r11,%r7,%r5
+	and  %r0,%r7,%r12
+	or   %r10,%r7,%r12
+	add  %r6,%r6,%r7
+	add  %r0,%r0,%r7
+	nor  %r5,%r11,%r6
+	nor  %r9,%r10,%r0
+	cmplw %r5,%r9
+	bgtlr
+	cntlzw %r4,%r9
+	srwi %r4,%r4,3
+	add  %r3,%r8,%r4
+	blr
+
+L(foundit):
+	and  %r0,%r7,%r12
+	or   %r10,%r7,%r12
+	add  %r0,%r0,%r7
+	nor  %r9,%r10,%r0
+	cntlzw %r4,%r9
+	subi %r8,%r8,4
+	srwi %r4,%r4,3
+	add  %r3,%r8,%r4
+	blr
+END(strchr)
+
+weak_alias(strchr,index)
diff --git a/sysdeps/powerpc/strchr.s b/sysdeps/powerpc/strchr.s
deleted file mode 100644
index c1df66f8dc..0000000000
--- a/sysdeps/powerpc/strchr.s
+++ /dev/null
@@ -1,118 +0,0 @@
- # Optimized strchr implementation for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # See strlen.s for comments on how this works.
-
-	.section ".text"
-	.align 2
-	.globl strchr
-	.type strchr,@function
-strchr:
- # char * [r3] strchr (const char *s [r3] , int c [r4] )
-
- # r0:	a temporary
- # r3:	our return result.
- # r4:	byte we're looking for, spread over the whole word
- # r5:	the current word
- # r6:	the constant 0xfefefeff (-0x01010101)
- # r7:	the constant 0x7f7f7f7f
- # r8:	pointer to the current word.
- # r9:	a temporary
- # r10:	the number of bits we should ignore in the first word
- # r11:	a mask with the bits to ignore set to 0
- # r12:	a temporary
-	
-	rlwimi %r4,%r4,8,16,23
-	li   %r11,-1
-	rlwimi %r4,%r4,16,0,15
-	lis  %r6,0xfeff
-	lis  %r7,0x7f7f
-	clrrwi %r8,%r3,2
-	addi %r7,%r7,0x7f7f
-	addi %r6,%r6,0xfffffeff
-	rlwinm %r10,%r3,3,27,28
- # Test the first (partial?) word.
-	lwz  %r5,0(%r8)
-	srw  %r11,%r11,%r10
-	orc  %r5,%r5,%r11
-	add  %r0,%r6,%r5
-	nor  %r9,%r7,%r5
-	and. %r0,%r0,%r9
-	xor  %r12,%r4,%r5
-	orc  %r12,%r12,%r11
-	b    loopentry
-	
- # The loop.
-
-loop:	lwzu %r5,4(%r8)
-	and. %r0,%r0,%r9
- # Test for 0
-	add  %r0,%r6,%r5
-	nor  %r9,%r7,%r5
-	bne  foundit
-	and. %r0,%r0,%r9
- # Start test for the bytes we're looking for
-	xor  %r12,%r4,%r5
-loopentry:
-	add  %r0,%r6,%r12
-	nor  %r9,%r7,%r12
-	beq  loop
- # There is a zero byte in the word, but may also be a matching byte (either
- # before or after the zero byte). In fact, we may be looking for a
- # zero byte, in which case we return a match. We guess that this hasn't
- # happened, though.
-missed:	
-	and. %r0,%r0,%r9
-	li   %r3,0
-	beqlr
- # It did happen. Decide which one was first...
- # I'm not sure if this is actually faster than a sequence of
- # rotates, compares, and branches (we use it anyway because it's shorter).
-	and  %r6,%r7,%r5
-	or   %r11,%r7,%r5
-	and  %r0,%r7,%r12
-	or   %r10,%r7,%r12
-	add  %r6,%r6,%r7
-	add  %r0,%r0,%r7
-	nor  %r5,%r11,%r6
-	nor  %r9,%r10,%r0
-	cmplw %r5,%r9
-	bgtlr
-	cntlzw %r4,%r9
-	srwi %r4,%r4,3
-	add  %r3,%r8,%r4
-	blr
-
-foundit:
-	and  %r0,%r7,%r12
-	or   %r10,%r7,%r12
-	add  %r0,%r0,%r7
-	nor  %r9,%r10,%r0
-	cntlzw %r4,%r9
-	subi %r8,%r8,4
-	srwi %r4,%r4,3
-	add  %r3,%r8,%r4
-	blr
-
-0:
-	.size	 strchr,0b-strchr
-
-	.globl index
-	.weak index
-	.set index,strchr
diff --git a/sysdeps/powerpc/strcmp.S b/sysdeps/powerpc/strcmp.S
new file mode 100644
index 0000000000..9f4d134419
--- /dev/null
+++ b/sysdeps/powerpc/strcmp.S
@@ -0,0 +1,115 @@
+/* Optimized strcmp implementation for PowerPC.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* See strlen.s for comments on how the end-of-string testing works.  */
+
+EALIGN(strcmp,4,0)
+/* int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])  */
+
+/* General register assignments:
+   r0:	temporary
+   r3:	pointer to previous word in s1
+   r4:	pointer to previous word in s2
+   r5:	current word from s1
+   r6:	current word from s2
+   r7:	0xfefefeff
+   r8:	0x7f7f7f7f
+   r9:	~(word in s1 | 0x7f7f7f7f)  */
+
+/* Register assignments in the prologue:
+   r10:	low 2 bits of p2-p1
+   r11:	mask to orc with r5/r6  */
+
+	or    %r0,%r4,%r3
+	clrlwi. %r0,%r0,30
+	lis   %r7,0xfeff
+	bne   L(unaligned)
+
+	lwz   %r5,0(%r3)
+	lwz   %r6,0(%r4)
+	lis   %r8,0x7f7f
+	addi  %r7,%r7,-0x101
+	addi  %r8,%r8,0x7f7f
+	b     1f
+
+0:	lwzu  %r5,4(%r3)
+	bne   %cr1,L(different)
+	lwzu  %r6,4(%r4)
+1:	add   %r0,%r7,%r5
+	nor   %r9,%r8,%r5
+	and.  %r0,%r0,%r9
+	cmpw  %cr1,%r5,%r6
+	beq+  0b
+L(endstring):
+/* OK. We've hit the end of the string. We need to be careful that
+   we don't compare two strings as different because of gunk beyond
+   the end of the strings...  */
+	and   %r0,%r8,%r5
+	beq   %cr1,L(equal)
+	add   %r0,%r0,%r8
+	xor.  %r10,%r5,%r6
+	andc  %r9,%r9,%r0
+	blt-  L(highbit)
+	cntlzw %r10,%r10
+	cntlzw %r9,%r9
+	addi  %r9,%r9,7
+	cmpw  %cr1,%r9,%r10
+	sub   %r3,%r5,%r6
+	bgelr+ %cr1
+L(equal):
+	li    %r3,0
+	blr
+
+L(different):
+	lwz   %r5,-4(%r3)
+	xor.  %r10,%r5,%r6
+	sub   %r3,%r5,%r6
+	bgelr+
+L(highbit):
+	mr    %r3,%r6
+	blr
+
+
+/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
+	.align 4
+L(unaligned):
+	lbz   %r5,0(%r3)
+	lbz   %r6,0(%r4)
+	b     1f
+
+0:	lbzu  %r5,1(%r3)
+	bne-  4f
+	lbzu  %r6,1(%r4)
+1:	cmpwi %cr1,%r5,0
+	beq-  %cr1,3f
+	cmpw  %r5,%r6
+	bne-  3f
+	lbzu  %r5,1(%r3)
+	lbzu  %r6,1(%r4)
+	cmpwi %cr1,%r5,0
+	cmpw  %r5,%r6
+	bne+  %cr1,0b
+3:	sub   %r3,%r5,%r6
+	blr
+4:	lbz   %r5,-1(%r3)
+	sub   %r3,%r5,%r6
+	blr
+END(strcmp)
diff --git a/sysdeps/powerpc/strcmp.s b/sysdeps/powerpc/strcmp.s
deleted file mode 100644
index f901b82ab1..0000000000
--- a/sysdeps/powerpc/strcmp.s
+++ /dev/null
@@ -1,273 +0,0 @@
- # Optimized strcmp implementation for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # See strlen.s for comments on how the end-of-string testing works.
-
-	.section ".text"
-	.align 3
-	.globl strcmp
-	.type strcmp,@function
-strcmp:
- # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])
-
- # General register assignments:
- # r0:	temporary
- # r3:	pointer to previous word in s1
- # r4:	pointer to previous word in s2
- # r5:	current first word in s1
- # r6:	current first word in s2 (after re-alignment)
- # r7:	0xfefefeff
- # r8:	0x7f7f7f7f
- # r9:	~(word in s1 | 0x7f7f7f7f)
-	
- # Register assignments in the prologue:
- # r10:	low 2 bits of p2-p1
- # r11:	mask to orc with r5/r6
-	
-	subf. %r10,%r4,%r3
-	beq-  equal
-	andi. %r10,%r10,3
-	cmpi  %cr1,%r10,2
-	beq-  %cr1,align2
-	lis   %r7,0xfeff
-	lis   %r8,0x7f7f
-	addi  %r8,%r8,0x7f7f
-	addi  %r7,%r7,0xfffffeff
-	bgt-  %cr1,align3
-strcmp3:
-	rlwinm %r0,%r3,3,27,28
-	li    %r11,-1
-	srw   %r11,%r11,%r0
-	clrrwi %r3,%r3,2
-	clrrwi %r4,%r4,2
-	lwz   %r5,0(%r3)
-	lwz   %r6,0(%r4)
-	bne-  align1
-
- # The loop, case when both strings are aligned the same.
- # on entry, cr1.eq must be 1.
- # r10:	second word in s1
- # r11:	second word in s2 OR mask to orc with first two words.
-align0:	
-	andi. %r0,%r3,4
-	orc   %r5,%r5,%r11
-	orc   %r6,%r6,%r11
-	beq+  a0start
-	add   %r0,%r7,%r5
-	nor   %r9,%r8,%r5
-	and.  %r0,%r0,%r9
-	cmplw %cr1,%r5,%r6
-	subi  %r3,%r3,4
-	bne-  endstringeq
-	subi  %r4,%r4,4
-	bne-  %cr1,difference
-
-loopalign0:
-	lwzu  %r5,8(%r3)
-	bne-  %cr1,difference2
-	lwzu  %r6,8(%r4)
-a0start:
-	add   %r0,%r7,%r5
-	nor   %r9,%r8,%r5
-	and.  %r0,%r0,%r9
-	cmplw %cr1,%r5,%r6
-	lwz   %r10,4(%r3)
-	bne-  endstringeq
-	add   %r0,%r7,%r10
-	bne-  %cr1,difference
-	nor   %r9,%r8,%r10
-	lwz   %r11,4(%r4)
-	and.  %r0,%r0,%r9
-	cmplw %cr1,%r10,%r11
-	beq+  loopalign0
-
-	mr    %r5,%r10
-	mr    %r6,%r11
-
- # fall through to...
-
-endstringeq:
- # (like 'endstring', but an equality code is in cr1)
-	beq  %cr1,equal
-endstring:
- # OK. We've hit the end of the string. We need to be careful that
- # we don't compare two strings as different because of gunk beyond
- # the end of the strings. We do it like this...
-	and  %r0,%r8,%r5
-	add  %r0,%r0,%r8
-	xor. %r10,%r5,%r6
-	andc %r9,%r9,%r0
-	cntlzw %r10,%r10
-	cntlzw %r9,%r9
-	addi %r9,%r9,7
-	cmpw %cr1,%r9,%r10
-	blt  %cr1,equal
-	sub  %r3,%r5,%r6
-	bgelr+
-	mr   %r3,%r6
-	blr
-equal:	li   %r3,0
-	blr
-	
- # The loop, case when s2 is aligned 1 char behind s1.
- # r10:	current word in s2 (before re-alignment)
-
-align1:
-	cmpwi %cr1,%r0,0
-	orc   %r5,%r5,%r11
-	bne   %cr1,align1_123
- # When s1 is aligned to a word boundary, the startup processing is special.
-	slwi. %r6,%r6,24
-	bne+  a1entry_0
-	nor   %r9,%r8,%r5
-	b     endstring
-
-align1_123:
- # Otherwise (s1 not aligned to a word boundary):
-	mr    %r10,%r6
-	add   %r0,%r7,%r5
-	nor   %r9,%r8,%r5
-	and.  %r0,%r0,%r9
-	srwi  %r6,%r6,8
-	orc   %r6,%r6,%r11
-	cmplw %cr1,%r5,%r6
-	bne-  endstringeq
-	bne-  %cr1,difference
-
-loopalign1:
-	slwi. %r6,%r10,24
-	bne-  %cr1,a1difference
-	lwzu  %r5,4(%r3)
-	beq-  endstring1
-a1entry_0:
-	lwzu  %r10,4(%r4)
-a1entry_123:	
-	add   %r0,%r7,%r5
-	nor   %r9,%r8,%r5
-	and.  %r0,%r0,%r9
-	rlwimi %r6,%r10,24,8,31
-	cmplw %cr1,%r5,%r6
-	beq+  loopalign1
-	b     endstringeq
-
-endstring1:
-	srwi  %r3,%r5,24
-	blr
-
-a1difference:
-	lbz   %r6,-1(%r4)
-	slwi  %r6,%r6,24
-	rlwimi %r6,%r10,24,8,31
-
- # fall through to...
-		
-difference:	
- # The idea here is that we could just return '%r5 - %r6', except
- # that the result might overflow. Overflow can only happen when %r5
- # and %r6 have different signs (thus the xor), in which case we want to
- # return negative iff %r6 has its high bit set so %r5 < %r6.
- # A branch-free implementation of this is
- #	xor  %r0,%r5,%r6
- #	rlwinm %r0,%r0,1,31,31
- #	rlwnm %r5,%r5,%r0,1,31
- #	rlwnm %r6,%r6,%r0,1,31
- #	sub  %r3,%r5,%r6
- #	blr
- # but this is usually more expensive.
-	xor. %r0,%r5,%r6
-	sub  %r3,%r5,%r6
-	bgelr+
-	mr   %r3,%r6
-	blr
-
-difference2:
- # As for 'difference', but use registers r10 and r11 instead of r5 and r6.
-	xor. %r0,%r10,%r11
-	sub  %r3,%r10,%r11
-	bgelr+
-	mr   %r3,%r11
-	blr
-	
- # For the case when s2 is aligned 3 chars behind s1, we switch
- # s1 and s2...
- # r10:	used by 'align2' (see below)
- # r11:	used by 'align2' (see below)
- # r12:	saved link register
- # cr0.eq: must be left as 1.
-
-align3:	mflr %r12
-	mr   %r0,%r3
-	mr   %r3,%r4
-	mr   %r4,%r0
-	bl   strcmp3
-	mtlr %r12
-	neg  %r3,%r3
-	blr
-	
- # The loop, case when s2 and s1's alignments differ by 2
- # This is the ugly case...
- # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect,
- # because there are too many branches. This loop should probably be
- # coded like the align1 case.
-	
-a2even:	lhz   %r5,0(%r3)
-	lhz   %r6,0(%r4)
-	b     a2entry
-	
-align2:
-	andi. %r0,%r3,1
-	beq+  a2even
-	subi  %r3,%r3,1
-	subi  %r4,%r4,1
-	lbz   %r5,1(%r3)
-	lbz   %r6,1(%r4)
-	cmpwi %cr0,%r5,0
-	cmpw  %cr1,%r5,%r6
-	beq-  align2end2
-	lhzu  %r5,2(%r3)
-	beq+  %cr1,a2entry1
-	lbz   %r5,-1(%r3)
-	sub   %r3,%r5,%r6
-	blr
-
-loopalign2:
-	cmpw  %cr1,%r5,%r6
-	beq-  align2end2
-	lhzu  %r5,2(%r3)
-	bne-  %cr1,align2different
-a2entry1:
-	lhzu  %r6,2(%r4)
-a2entry:	
-	cmpwi %cr5,%r5,0x00ff
-	andi. %r0,%r5,0x00ff
-	bgt+  %cr5,loopalign2
-
-align2end:
-	andi. %r3,%r6,0xff00
-	neg   %r3,%r3
-	blr
-
-align2different:
-	lhzu  %r5,-2(%r3)
-align2end2:
-	sub   %r3,%r5,%r6
-	blr
-		
-0:
-	.size	 strcmp,0b-strcmp
diff --git a/sysdeps/powerpc/strlen.S b/sysdeps/powerpc/strlen.S
new file mode 100644
index 0000000000..dc6660b8fc
--- /dev/null
+++ b/sysdeps/powerpc/strlen.S
@@ -0,0 +1,144 @@
+/* Optimized strlen implementation for PowerPC.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* The algorithm here uses the following techniques:
+
+   1) Given a word 'x', we can test to see if it contains any 0 bytes
+      by subtracting 0x01010101, and seeing if any of the high bits of each
+      byte changed from 0 to 1. This works because the least significant
+      0 byte must have had no incoming carry (otherwise it's not the least
+      significant), so it is 0x00 - 0x01 == 0xff. For all other
+      byte values, either they have the high bit set initially, or when
+      1 is subtracted you get a value in the range 0x00-0x7f, none of which
+      have their high bit set. The expression here is
+      (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
+      there were no 0x00 bytes in the word.
+
+   2) Given a word 'x', we can test to see _which_ byte was zero by
+      calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
+      This produces 0x80 in each byte that was zero, and 0x00 in all
+      the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
+      byte, and the '| x' part ensures that bytes with the high bit set
+      produce 0x00. The addition will carry into the high bit of each byte
+      iff that byte had one of its low 7 bits set. We can then just see
+      which was the most significant bit set and divide by 8 to find how
+      many to add to the index.
+      This is from the book 'The PowerPC Compiler Writer's Guide',
+      by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
+
+   We deal with strings not aligned to a word boundary by taking the
+   first word and ensuring that bytes not part of the string
+   are treated as nonzero. To allow for memory latency, we unroll the
+   loop a few times, being careful to ensure that we do not read ahead
+   across cache line boundaries.
+
+   Questions to answer:
+   1) How long are strings passed to strlen? If they're often really long,
+   we should probably use cache management instructions and/or unroll the
+   loop more. If they're often quite short, it might be better to use
+   fact (2) in the inner loop than have to recalculate it.
+   2) How popular are bytes with the high bit set? If they are very rare,
+   on some processors it might be useful to use the simpler expression
+   ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
+   ALU), but this fails when any character has its high bit set.  */
+
+/* Some notes on register usage: Under the SVR4 ABI, we can use registers
+   0 and 3 through 12 (so long as we don't call any procedures) without
+   saving them. We can also use registers 14 through 31 if we save them.
+   We can't use r1 (it's the stack pointer), r2 nor r13 because the user
+   program may expect them to hold their usual value if we get sent
+   a signal. Integer parameters are passed in r3 through r10.
+   We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
+   them, the others we must save.  */
+
+ENTRY(strlen)
+/* On entry, r3 points to the string, and it's left that way.
+   We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f.
+   r4 is used to keep the current index into the string; r5 holds
+   the number of padding bits we prepend to the string to make it
+   start at a word boundary. r8 holds the 'current' word.
+   r9-12 are temporaries. r0 is used as a temporary and for discarded
+   results.  */
+	clrrwi %r4,%r3,2
+	lis   %r7,0x7f7f
+	rlwinm %r5,%r3,3,27,28
+	lwz   %r8,0(%r4)
+	li    %r9,-1
+	addi  %r7,%r7,0x7f7f
+/* That's the setup done, now do the first pair of words.
+   We make an exception and use method (2) on the first two words, to reduce
+   overhead.  */
+	srw   %r9,%r9,%r5
+	and   %r0,%r7,%r8
+	or    %r10,%r7,%r8
+	add   %r0,%r0,%r7
+	nor   %r0,%r10,%r0
+	and.  %r8,%r0,%r9
+	mtcrf 0x01,%r3
+	bne   L(done0)
+	lis   %r6,0xfeff
+	addi  %r6,%r6,-0x101
+/* Are we now aligned to a doubleword boundary?  */
+	bt    29,L(loop)
+
+/* Handle second word of pair.  */
+	lwzu  %r8,4(%r4)
+	and   %r0,%r7,%r8
+	or    %r10,%r7,%r8
+	add   %r0,%r0,%r7
+	nor.  %r8,%r10,%r0
+	bne   L(done0)
+
+/* The loop.  */
+
+L(loop):
+	lwz   %r8,4(%r4)
+	lwzu  %r9,8(%r4)
+	add   %r0,%r6,%r8
+	nor   %r10,%r7,%r8
+	and.  %r0,%r0,%r10
+	add   %r11,%r6,%r9
+	nor   %r12,%r7,%r9
+	bne   L(done1)
+	and.  %r0,%r11,%r12
+	beq   L(loop)
+
+	and   %r0,%r7,%r9
+	add   %r0,%r0,%r7
+	andc  %r8,%r12,%r0
+	b     L(done0)
+
+L(done1):
+	and   %r0,%r7,%r8
+	subi  %r4,%r4,4
+	add   %r0,%r0,%r7
+	andc  %r8,%r10,%r0
+
+/* When we get to here, r4 points to the first word in the string that
+   contains a zero byte, and the most significant set bit in r8 is in that
+   byte.  */
+L(done0):
+	cntlzw %r11,%r8
+	subf  %r0,%r3,%r4
+	srwi  %r11,%r11,3
+	add   %r3,%r0,%r11
+	blr
+END(strlen)
diff --git a/sysdeps/powerpc/strlen.s b/sysdeps/powerpc/strlen.s
deleted file mode 100644
index ea809772eb..0000000000
--- a/sysdeps/powerpc/strlen.s
+++ /dev/null
@@ -1,144 +0,0 @@
- # Optimized strlen implementation for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # The algorithm here uses the following techniques:
- #
- # 1) Given a word 'x', we can test to see if it contains any 0 bytes
- #    by subtracting 0x01010101, and seeing if any of the high bits of each
- #    byte changed from 0 to 1. This works because the least significant
- #    0 byte must have had no incoming carry (otherwise it's not the least
- #    significant), so it is 0x00 - 0x01 == 0xff. For all other
- #    byte values, either they have the high bit set initially, or when
- #    1 is subtracted you get a value in the range 0x00-0x7f, none of which
- #    have their high bit set. The expression here is
- #    (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
- #    there were no 0x00 bytes in the word.
- #
- # 2) Given a word 'x', we can test to see _which_ byte was zero by
- #    calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
- #    This produces 0x80 in each byte that was zero, and 0x00 in all
- #    the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
- #    byte, and the '| x' part ensures that bytes with the high bit set
- #    produce 0x00. The addition will carry into the high bit of each byte
- #    iff that byte had one of its low 7 bits set. We can then just see
- #    which was the most significant bit set and divide by 8 to find how
- #    many to add to the index.
- #    This is from the book 'The PowerPC Compiler Writer's Guide',
- #    by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
- #
- # We deal with strings not aligned to a word boundary by taking the
- # first word and ensuring that bytes not part of the string
- # are treated as nonzero. To allow for memory latency, we unroll the
- # loop a few times, being careful to ensure that we do not read ahead
- # across cache line boundaries.
- #
- # Questions to answer:
- # 1) How long are strings passed to strlen? If they're often really long,
- # we should probably use cache management instructions and/or unroll the
- # loop more. If they're often quite short, it might be better to use
- # fact (2) in the inner loop than have to recalculate it.
- # 2) How popular are bytes with the high bit set? If they are very rare,
- # on some processors it might be useful to use the simpler expression
- # ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
- # ALU), but this fails when any character has its high bit set.
-
- # Some notes on register usage: Under the SVR4 ABI, we can use registers
- # 0 and 3 through 12 (so long as we don't call any procedures) without
- # saving them. We can also use registers 14 through 31 if we save them.
- # We can't use r1 (it's the stack pointer), r2 nor r13 because the user
- # program may expect them to hold their usual value if we get sent
- # a signal. Integer parameters are passed in r3 through r10.
- # We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
- # them, the others we must save.
-
-	.section ".text"
-	.align 2
-	.globl strlen
-	.type strlen,@function
-strlen:
- # On entry, r3 points to the string, and it's left that way.
- # We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f.
- # r4 is used to keep the current index into the string; r5 holds
- # the number of padding bits we prepend to the string to make it
- # start at a word boundary. r8 holds the 'current' word.
- # r9-12 are temporaries. r0 is used as a temporary and for discarded
- # results.
-	clrrwi %r4,%r3,2
-	lis   %r7,0x7f7f
-	rlwinm %r5,%r3,3,27,28
-	lwz   %r8,0(%r4)
-	li    %r9,-1
-	addi  %r7,%r7,0x7f7f
- # That's the setup done, now do the first pair of words.
- # We make an exception and use method (2) on the first two words, to reduce
- # overhead.
-	srw   %r9,%r9,%r5
-	and   %r0,%r7,%r8
-	or    %r10,%r7,%r8
-	add   %r0,%r0,%r7
-	nor   %r0,%r10,%r0
-	and.  %r8,%r0,%r9
-	mtcrf 0x01,%r3
-	bne   done0
-	lis   %r6,0xfeff
-	addi  %r6,%r6,-0x101
- # Are we now aligned to a doubleword boundary?
-	bt    29,loop
-
- # Handle second word of pair.
-	lwzu  %r8,4(%r4)
-	and   %r0,%r7,%r8
-	or    %r10,%r7,%r8
-	add   %r0,%r0,%r7
-	nor.  %r8,%r10,%r0
-	bne   done0
-
- # The loop.
-
-loop:	lwz   %r8,4(%r4)
-	lwzu  %r9,8(%r4)
-	add   %r0,%r6,%r8
-	nor   %r10,%r7,%r8
-	and.  %r0,%r0,%r10
-	add   %r11,%r6,%r9
-	nor   %r12,%r7,%r9
-	bne   done1
-	and.  %r0,%r11,%r12
-	beq   loop
-
-	and   %r0,%r7,%r9
-	add   %r0,%r0,%r7
-	andc  %r8,%r12,%r0
-	b     done0
-
-done1:	and   %r0,%r7,%r8
-	subi  %r4,%r4,4
-	add   %r0,%r0,%r7
-	andc  %r8,%r10,%r0
-
- # When we get to here, r4 points to the first word in the string that
- # contains a zero byte, and the most significant set bit in r8 is in that
- # byte.
-done0:	cntlzw %r11,%r8
-	subf  %r0,%r3,%r4
-	srwi  %r11,%r11,3
-	add   %r3,%r0,%r11
-	blr
-0:
-	.size	 strlen,0b-strlen
diff --git a/sysdeps/powerpc/sub_n.S b/sysdeps/powerpc/sub_n.S
new file mode 100644
index 0000000000..7af577d835
--- /dev/null
+++ b/sysdeps/powerpc/sub_n.S
@@ -0,0 +1,68 @@
+/* Subtract two limb vectors of equal, non-zero length for PowerPC.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
+                        mp_size_t size)
+   Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1.  */
+
+/* Note on optimisation: This code is optimal for the 601.  Almost every other
+   possible 2-unrolled inner loop will not be.  Also, watch out for the
+   alignment...  */
+
+EALIGN(__mpn_sub_n,3,1)
+/* Set up for loop below.  */
+	mtcrf 0x01,%r6
+	srwi. %r7,%r6,1
+	mtctr %r7
+	bt    31,2f
+
+/* Set the carry (clear the borrow).  */
+	subfc %r0,%r0,%r0
+/* Adjust pointers for loop.  */
+	addi  %r3,%r3,-4
+	addi  %r4,%r4,-4
+	addi  %r5,%r5,-4
+	b     0f
+
+2:	lwz   %r7,0(%r5)
+	lwz   %r6,0(%r4)
+	subfc %r6,%r7,%r6
+	stw   %r6,0(%r3)
+        beq   1f
+
+/* Align start of loop to an odd word boundary to guarantee that the
+   last two words can be fetched in one access (for 601).  This turns
+   out to be important.  */
+0:
+	lwz   %r9,4(%r4)
+	lwz   %r8,4(%r5)
+	lwzu  %r6,8(%r4)
+	lwzu  %r7,8(%r5)
+	subfe %r8,%r8,%r9
+	stw   %r8,4(%r3)
+	subfe %r6,%r7,%r6
+	stwu  %r6,8(%r3)
+	bdnz  0b
+/* Return the borrow. */
+1:	subfe %r3,%r3,%r3
+	neg   %r3,%r3
+	blr
+END(__mpn_sub_n)
diff --git a/sysdeps/powerpc/sub_n.s b/sysdeps/powerpc/sub_n.s
deleted file mode 100644
index 8711bf9a40..0000000000
--- a/sysdeps/powerpc/sub_n.s
+++ /dev/null
@@ -1,69 +0,0 @@
- # Subtract two limb vectors of equal, non-zero length for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
- #                      mp_size_t size)
- # Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1.
-
- # Note on optimisation: This code is optimal for the 601.  Almost every other
- # possible 2-unrolled inner loop will not be.  Also, watch out for the
- # alignment...
-
-	.align 3
-	.globl __mpn_sub_n
-	.type	 __mpn_sub_n,@function
-	nop
-__mpn_sub_n:
- # Set up for loop below.
-	mtcrf 0x01,%r6
-	srwi. %r7,%r6,1
-	mtctr %r7
-	bt    31,2f
-
- # Set the carry (clear the borrow).
-	subfc %r0,%r0,%r0
- # Adjust pointers for loop.
-	addi  %r3,%r3,-4
-	addi  %r4,%r4,-4
-	addi  %r5,%r5,-4
-	b     0f
-
-2:	lwz   %r7,0(%r5)
-	lwz   %r6,0(%r4)
-	subfc %r6,%r7,%r6
-	stw   %r6,0(%r3)
-        beq   1f
-
- # Align start of loop to an odd word boundary to guarantee that the
- # last two words can be fetched in one access (for 601).  This turns
- # out to be important.
-0:
-	lwz   %r9,4(%r4)
-	lwz   %r8,4(%r5)
-	lwzu  %r6,8(%r4)
-	lwzu  %r7,8(%r5)
-	subfe %r8,%r8,%r9
-	stw   %r8,4(%r3)
-	subfe %r6,%r7,%r6
-	stwu  %r6,8(%r3)
-	bdnz  0b
- # return the borrow
-1:	subfe %r3,%r3,%r3
-	neg   %r3,%r3
-	blr
diff --git a/sysdeps/powerpc/submul_1.S b/sysdeps/powerpc/submul_1.S
new file mode 100644
index 0000000000..80da8ec8c1
--- /dev/null
+++ b/sysdeps/powerpc/submul_1.S
@@ -0,0 +1,52 @@
+/* Multiply a limb vector by a single limb, for PowerPC.
+   Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+                           mp_size_t s1_size, mp_limb_t s2_limb)
+   Calculate res-s1*s2 and put result back in res; return carry.  */
+
+ENTRY(__mpn_submul_1)
+	mtctr	%r5
+
+	lwz	%r0,0(%r4)
+	mullw	%r7,%r0,%r6
+	mulhwu	%r10,%r0,%r6
+	lwz     %r9,0(%r3)
+	subf 	%r8,%r7,%r9
+	addc    %r7,%r7,%r8		# invert cy (r7 is junk)
+	addi	%r3,%r3,-4		# adjust res_ptr
+	bdz	1f
+
+0:	lwzu	%r0,4(%r4)
+	stwu	%r8,4(%r3)
+	mullw	%r8,%r0,%r6
+	adde	%r7,%r8,%r10
+	mulhwu	%r10,%r0,%r6
+	lwz     %r9,4(%r3)
+	addze   %r10,%r10
+	subf    %r8,%r7,%r9
+	addc    %r7,%r7,%r8		# invert cy (r7 is junk)
+	bdnz	0b
+
+1:	stw	%r8,4(%r3)
+	addze	%r3,%r10
+	blr
+END(__mpn_submul_1)
diff --git a/sysdeps/powerpc/submul_1.s b/sysdeps/powerpc/submul_1.s
deleted file mode 100644
index 999430d744..0000000000
--- a/sysdeps/powerpc/submul_1.s
+++ /dev/null
@@ -1,52 +0,0 @@
- # Multiply a limb vector by a single limb, for PowerPC.
- # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB.  If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
- #                         mp_size_t s1_size, mp_limb_t s2_limb)
- # Calculate res-s1*s2 and put result back in res; return carry.
-
-	.align 2
-	.globl __mpn_submul_1
-	.type	 __mpn_submul_1,@function
-__mpn_submul_1:
-	mtctr	%r5
-
-	lwz	%r0,0(%r4)
-	mullw	%r7,%r0,%r6
-	mulhwu	%r10,%r0,%r6
-	lwz     %r9,0(%r3)
-	subf 	%r8,%r7,%r9
-	addc    %r7,%r7,%r8		# invert cy (r7 is junk)
-	addi	%r3,%r3,-4		# adjust res_ptr
-	bdz	Lend
-
-Loop:	lwzu	%r0,4(%r4)
-	stwu	%r8,4(%r3)
-	mullw	%r8,%r0,%r6
-	adde	%r7,%r8,%r10
-	mulhwu	%r10,%r0,%r6
-	lwz     %r9,4(%r3)
-	addze   %r10,%r10
-	subf    %r8,%r7,%r9
-	addc    %r7,%r7,%r8		# invert cy (r7 is junk)
-	bdnz	Loop
-
-Lend:	stw	%r8,4(%r3)
-	addze	%r3,%r10
-	blr
diff --git a/sysdeps/powerpc/test-arith.c b/sysdeps/powerpc/test-arith.c
index c846b0d0ec..9e1be88098 100644
--- a/sysdeps/powerpc/test-arith.c
+++ b/sysdeps/powerpc/test-arith.c
@@ -226,7 +226,7 @@ check_result(int line, const char *rm, tocheck_t expected, tocheck_t actual)
   if (memcmp(&expected, &actual, sizeof(tocheck_t)) != 0)
     {
       unsigned char *ex, *ac;
-      int i;
+      size_t i;
 
       printf("%s:%d:round %s:result failed\n"
 	     " expected result 0x", __FILE__, line, rm);
@@ -323,7 +323,7 @@ check_excepts(int line, const char *rm, int expected, int actual)
     expected = expected & ~excepts_missing | FE_INVALID_SNAN;
   if ((expected & all_exceptions) != actual)
     {
-      int i;
+      size_t i;
       printf("%s:%d:round %s:exceptions failed\n"
 	     " expected exceptions ", __FILE__, line,rm);
       for (i = 0; i < sizeof(excepts)/sizeof(excepts[0]); i++)
@@ -419,7 +419,7 @@ static const optest_t optests[] = {
   {__LINE__,B_NEG, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 },
   {__LINE__,B_NEG, 0,P_Z,P_Z1,  0,0,0, R_ALL, 0, 1,P_Z,P_Z1 },
   {__LINE__,B_NEG, 1,P_Z,P_Z1,  0,0,0, R_ALL, 0, 0,P_Z,P_Z1 },
-  
+
   /* Absolute value.  */
   {__LINE__,B_ABS, 0,P_Z,P_Z,   0,0,0, R_ALL, 0, 0,P_Z,P_Z },
   {__LINE__,B_ABS, 1,P_Z,P_Z,   0,0,0, R_ALL, 0, 0,P_Z,P_Z },
@@ -433,7 +433,7 @@ static const optest_t optests[] = {
   {__LINE__,B_ABS, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 },
   {__LINE__,B_ABS, 0,P_Z,P_Z1,  0,0,0, R_ALL, 0, 0,P_Z,P_Z1 },
   {__LINE__,B_ABS, 1,P_Z,P_Z1,  0,0,0, R_ALL, 0, 0,P_Z,P_Z1 },
-  
+
   /* Square root.  */
   {__LINE__,B_SQRT, 0,P_Z,P_Z,   0,0,0, R_ALL, 0, 0,P_Z,P_Z },
   {__LINE__,B_SQRT, 1,P_Z,P_Z,   0,0,0, R_ALL, 0, 1,P_Z,P_Z },
@@ -459,7 +459,8 @@ static const optest_t optests[] = {
 static void
 check_op(void)
 {
-  int i, j;
+  size_t i;
+  int j;
   tocheck_t r, a, b, x;
   int raised;
 
@@ -497,7 +498,7 @@ static void
 fail_xr(int line, const char *rm, tocheck_t x, tocheck_t r, tocheck_t xx,
 	int xflag)
 {
-  int i;
+  size_t i;
   unsigned char *cx, *cr, *cxx;
 
   printf("%s:%d:round %s:fail\n with x=0x", __FILE__, line,rm);
@@ -539,7 +540,7 @@ check_sqrt(tocheck_t a)
 	  r0 = delta(r1,-1); r2 = delta(r1,1);
 	  switch (1 << j)
 	    {
-	    case R_NEAREST: 
+	    case R_NEAREST:
 	      x0 = r0 * r0 - a; x2 = r2 * r2 - a;
 	      ok = fabs(x0) >= fabs(x1) && fabs(x1) <= fabs(x2);
 	      break;