Prepare for radical source tree reorganization.zack/build-layout-experiment

All top-level files and directories are moved into a temporary storage directory, REORG.TODO, except for files that will certainly still exist in their current form at top level when we're done (COPYING, COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which are moved to the new directory OldChangeLogs, instead), and the generated file INSTALL (which is just deleted; in the new order, there will be no generated files checked into version control).
author: Zack Weinberg <zackw@panix.com> 2017-06-08 15:39:03 -0400
committer: Zack Weinberg <zackw@panix.com> 2017-06-08 15:39:03 -0400
commit: 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch)
tree: 4470480d904b65cf14ca524f96f79eca818c3eaf /sysdeps/alpha/alphaev6
parent: 199fc19d3aaaf57944ef036e15904febe877fc93 (diff)
download: glibc-zack/build-layout-experiment.tar
glibc-zack/build-layout-experiment.tar.gz
glibc-zack/build-layout-experiment.tar.bz2
glibc-zack/build-layout-experiment.zip
8 files changed, 0 insertions, 1768 deletions
diff --git a/sysdeps/alpha/alphaev6/Implies b/sysdeps/alpha/alphaev6/Implies
deleted file mode 100644
index 0e7fc170ba..0000000000
--- a/sysdeps/alpha/alphaev6/Implies
+++ /dev/null
@@ -1 +0,0 @@
-alpha/alphaev5
diff --git a/sysdeps/alpha/alphaev6/addmul_1.S b/sysdeps/alpha/alphaev6/addmul_1.S
deleted file mode 100644
index 1072ea763f..0000000000
--- a/sysdeps/alpha/alphaev6/addmul_1.S
+++ /dev/null
@@ -1,477 +0,0 @@
- # Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add
- # the result to a second limb vector.
- #
- #  Copyright (C) 2000-2017 Free Software Foundation, Inc.
- #
- #  This file is part of the GNU MP Library.
- #
- #  The GNU MP Library is free software; you can redistribute it and/or modify
- #  it under the terms of the GNU Lesser General Public License as published
- #  by the Free Software Foundation; either version 2.1 of the License, or (at
- #  your option) any later version.
- #
- #  The GNU MP Library is distributed in the hope that it will be useful, but
- #  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- #  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- #  License for more details.
- #
- #  You should have received a copy of the GNU Lesser General Public License
- #  along with the GNU MP Library.  If not, see <http://www.gnu.org/licenses/>.
-
- #  INPUT PARAMETERS
- #  res_ptr	$16
- #  s1_ptr	$17
- #  size	$18
- #  s2_limb	$19
- #
- #  This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and
- #  exactly 3.625 cycles/limb on EV6...
- #
- # This code was written in close cooperation with ev6 pipeline expert
- # Steve Root (root@toober.hlo.dec.com).  Any errors are tege's fault, though.
- #
- #   Register usages for unrolled loop:
- #	  0-3     mul's
- #	  4-7     acc's
- #	  8-15    mul results
- #	  20,21   carry's
- #	  22,23   save for stores
- #
- #   Sustains 8 mul-adds in 29 cycles in the unrolled inner loop.
- #
- #   The stores can issue a cycle late so we have paired no-op's to 'catch'
- #   them, so that further disturbance to the schedule is damped.
- #
- #   We couldn't pair the loads, because the entangled schedule of the
- #   carry's has to happen on one side {0} of the machine. Note, the total
- #   use of U0, and the total use of L0 (after attending to the stores).
- #   which is part of the reason why....
- #
- #   This is a great schedule for the d_cache, a poor schedule for the
- #   b_cache. The lockup on U0 means that any stall can't be recovered
- #   from. Consider a ldq in L1.  say that load gets stalled because it
- #   collides with a fill from the b_Cache. On the next cycle, this load
- #   gets priority. If first looks at L0, and goes there. The instruction
- #   we intended for L0 gets to look at L1, which is NOT where we want
- #   it. It either stalls 1, because it can't go in L0, or goes there, and
- #   causes a further instruction to stall.
- #
- #   So for b_cache, we're likely going to want to put one or more cycles
- #   back into the code! And, of course, put in prefetches. For the
- #   accumulator, lds, intent to modify.  For the multiplier, you might
- #   want ldq, evict next, if you're not wanting to use it again soon. Use
- #   256 ahead of present pointer value. At a place where we have an mt
- #   followed by a bookkeeping, put the bookkeeping in upper, and the
- #   prefetch into lower.
- #
- #   Note, the usage of physical registers per cycle is smoothed off, as
- #   much as possible.
- #
- #   Note, the ldq's and stq's are at the end of the quadpacks.  note, we'd
- #   like not to have a ldq or stq to preceded a conditional branch in a
- #   quadpack. The conditional branch moves the retire pointer one cycle
- #   later.
- #
- #   Optimization notes:
- #   Callee-saves regs: $9 $10 $11 $12 $13 $14 $15 $26 ?$27?
- #   Reserved regs:	 $29 $30 $31
- #   Free caller-saves regs in unrolled code: $24 $25 $28
- #   We should swap some of the callee-saves regs for some of the free
- #   caller-saves regs, saving some overhead cycles.
- #   Most importantly, we should write fast code for the 0-7 case.
- #   The code we use there are for the 21164, and runs at 7 cycles/limb
- #   on the 21264.  Should not be hard, if we write specialized code for
- #   1-7 limbs (the one for 0 limbs should be straightforward).  We then just
- #   need a jump table indexed by the low 3 bits of the count argument.
-
-	.set	noreorder
-	.set	noat
-	.text
-
-	.globl	__mpn_addmul_1
-	.ent	__mpn_addmul_1
-__mpn_addmul_1:
-	.frame	$30,0,$26,0
-	.prologue 0
-
-	cmpult	$18,	8,	$1
-	beq	$1,	$Large
-
-	ldq	$2,	0($17)		# $2 = s1_limb
-	addq	$17,	8,	$17	# s1_ptr++
-	subq	$18,	1,	$18	# size--
-	mulq	$2,	$19,	$3	# $3 = prod_low
-	ldq	$5,	0($16)		# $5 = *res_ptr
-	umulh	$2,	$19,	$0	# $0 = prod_high
-	beq	$18,	$Lend0b		# jump if size was == 1
-	ldq	$2,	0($17)		# $2 = s1_limb
-	addq	$17,	8,	$17	# s1_ptr++
-	subq	$18,	1,	$18	# size--
-	addq	$5,	$3,	$3
-	cmpult	$3,	$5,	$4
-	stq	$3,	0($16)
-	addq	$16,	8,	$16	# res_ptr++
-	beq	$18,	$Lend0a		# jump if size was == 2
-
-	.align 3
-$Loop0:	mulq	$2,	$19,	$3	# $3 = prod_low
-	ldq	$5,	0($16)		# $5 = *res_ptr
-	addq	$4,	$0,	$0	# cy_limb = cy_limb + 'cy'
-	subq	$18,	1,	$18	# size--
-	umulh	$2,	$19,	$4	# $4 = cy_limb
-	ldq	$2,	0($17)		# $2 = s1_limb
-	addq	$17,	8,	$17	# s1_ptr++
-	addq	$3,	$0,	$3	# $3 = cy_limb + prod_low
-	cmpult	$3,	$0,	$0	# $0 = carry from (cy_limb + prod_low)
-	addq	$5,	$3,	$3
-	cmpult	$3,	$5,	$5
-	stq	$3,	0($16)
-	addq	$16,	8,	$16	# res_ptr++
-	addq	$5,	$0,	$0	# combine carries
-	bne	$18,	$Loop0
-$Lend0a:
-	mulq	$2,	$19,	$3	# $3 = prod_low
-	ldq	$5,	0($16)		# $5 = *res_ptr
-	addq	$4,	$0,	$0	# cy_limb = cy_limb + 'cy'
-	umulh	$2,	$19,	$4	# $4 = cy_limb
-	addq	$3,	$0,	$3	# $3 = cy_limb + prod_low
-	cmpult	$3,	$0,	$0	# $0 = carry from (cy_limb + prod_low)
-	addq	$5,	$3,	$3
-	cmpult	$3,	$5,	$5
-	stq	$3,	0($16)
-	addq	$5,	$0,	$0	# combine carries
-	addq	$4,	$0,	$0	# cy_limb = prod_high + cy
-	ret	$31,	($26),	1
-$Lend0b:
-	addq	$5,	$3,	$3
-	cmpult	$3,	$5,	$5
-	stq	$3,	0($16)
-	addq	$0,	$5,	$0
-	ret	$31,	($26),	1
-
-$Large:
-	lda	$30,	-240($30)
-	stq	$9,	8($30)
-	stq	$10,	16($30)
-	stq	$11,	24($30)
-	stq	$12,	32($30)
-	stq	$13,	40($30)
-	stq	$14,	48($30)
-	stq	$15,	56($30)
-
-	and	$18,	7,	$20	# count for the first loop, 0-7
-	srl	$18,	3,	$18	# count for unrolled loop
-	bis	$31,	$31,	$0
-	beq	$20,	$Lunroll
-	ldq	$2,	0($17)		# $2 = s1_limb
-	addq	$17,	8,	$17	# s1_ptr++
-	subq	$20,	1,	$20	# size--
-	mulq	$2,	$19,	$3	# $3 = prod_low
-	ldq	$5,	0($16)		# $5 = *res_ptr
-	umulh	$2,	$19,	$0	# $0 = prod_high
-	beq	$20,	$Lend1b		# jump if size was == 1
-	ldq	$2,	0($17)		# $2 = s1_limb
-	addq	$17,	8,	$17	# s1_ptr++
-	subq	$20,	1,	$20	# size--
-	addq	$5,	$3,	$3
-	cmpult	$3,	$5,	$4
-	stq	$3,	0($16)
-	addq	$16,	8,	$16	# res_ptr++
-	beq	$20,	$Lend1a		# jump if size was == 2
-
-	.align 3
-$Loop1:	mulq	$2,	$19,	$3	# $3 = prod_low
-	ldq	$5,	0($16)		# $5 = *res_ptr
-	addq	$4,	$0,	$0	# cy_limb = cy_limb + 'cy'
-	subq	$20,	1,	$20	# size--
-	umulh	$2,	$19,	$4	# $4 = cy_limb
-	ldq	$2,	0($17)		# $2 = s1_limb
-	addq	$17,	8,	$17	# s1_ptr++
-	addq	$3,	$0,	$3	# $3 = cy_limb + prod_low
-	cmpult	$3,	$0,	$0	# $0 = carry from (cy_limb + prod_low)
-	addq	$5,	$3,	$3
-	cmpult	$3,	$5,	$5
-	stq	$3,	0($16)
-	addq	$16,	8,	$16	# res_ptr++
-	addq	$5,	$0,	$0	# combine carries
-	bne	$20,	$Loop1
-
-$Lend1a:
-	mulq	$2,	$19,	$3	# $3 = prod_low
-	ldq	$5,	0($16)		# $5 = *res_ptr
-	addq	$4,	$0,	$0	# cy_limb = cy_limb + 'cy'
-	umulh	$2,	$19,	$4	# $4 = cy_limb
-	addq	$3,	$0,	$3	# $3 = cy_limb + prod_low
-	cmpult	$3,	$0,	$0	# $0 = carry from (cy_limb + prod_low)
-	addq	$5,	$3,	$3
-	cmpult	$3,	$5,	$5
-	stq	$3,	0($16)
-	addq	$16,	8,	$16	# res_ptr++
-	addq	$5,	$0,	$0	# combine carries
-	addq	$4,	$0,	$0	# cy_limb = prod_high + cy
-	br	$31,	$Lunroll
-$Lend1b:
-	addq	$5,	$3,	$3
-	cmpult	$3,	$5,	$5
-	stq	$3,	0($16)
-	addq	$16,	8,	$16	# res_ptr++
-	addq	$0,	$5,	$0
-
-$Lunroll:
-	lda	$17,	-16($17)	# L1 bookkeeping
-	lda	$16,	-16($16)	# L1 bookkeeping
-	bis	$0,	$31,	$12
-
- # ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____
-
-	ldq	$2,	16($17)		# L1
-	ldq	$3,	24($17)		# L1
-	lda	$18,	-1($18)		# L1 bookkeeping
-	ldq	$6,	16($16)		# L1
-	ldq	$7,	24($16)		# L1
-	ldq	$0,	32($17)		# L1
-	mulq	$19,	$2,	$13	# U1
-	ldq	$1,	40($17)		# L1
-	umulh	$19,	$2,	$14	# U1
-	mulq	$19,	$3,	$15	# U1
-	lda	$17,	64($17)		# L1 bookkeeping
-	ldq	$4,	32($16)		# L1
-	ldq	$5,	40($16)		# L1
-	umulh	$19,	$3,	$8	# U1
-	ldq	$2,	-16($17)	# L1
-	mulq	$19,	$0,	$9	# U1
-	ldq	$3,	-8($17)		# L1
-	umulh	$19,	$0,	$10	# U1
-	addq	$6,	$13,	$6	# L0 lo + acc
-	mulq	$19,	$1,	$11	# U1
-	cmpult	$6,	$13,	$20	# L0 lo add => carry
-	lda	$16,	64($16)		# L1 bookkeeping
-	addq	$6,	$12,	$22	# U0 hi add => answer
-	cmpult	$22,	$12,	$21	# L0 hi add => carry
-	addq	$14,	$20,	$14	# U0 hi mul + carry
-	ldq	$6,	-16($16)	# L1
-	addq	$7,	$15,	$23	# L0 lo + acc
-	addq	$14,	$21,	$14	# U0 hi mul + carry
-	ldq	$7,	-8($16)		# L1
-	umulh	$19,	$1,	$12	# U1
-	cmpult	$23,	$15,	$20	# L0 lo add => carry
-	addq	$23,	$14,	$23	# U0 hi add => answer
-	ldq	$0,	0($17)		# L1
-	mulq	$19,	$2,	$13	# U1
-	cmpult	$23,	$14,	$21	# L0 hi add => carry
-	addq	$8,	$20,	$8	# U0 hi mul + carry
-	ldq	$1,	8($17)		# L1
-	umulh	$19,	$2,	$14	# U1
-	addq	$4,	$9,	$4	# L0 lo + acc
-	stq	$22,	-48($16)	# L0
-	stq	$23,	-40($16)	# L1
-	mulq	$19,	$3,	$15	# U1
-	addq	$8,	$21,	$8	# U0 hi mul + carry
-	cmpult	$4,	$9,	$20	# L0 lo add => carry
-	addq	$4,	$8,	$22	# U0 hi add => answer
-	ble	$18,	$Lend		# U1 bookkeeping
-
- # ____ MAIN UNROLLED LOOP ____
-	.align 4
-$Loop:
-	bis	$31,	$31,	$31	# U1 mt
-	cmpult	$22,	$8,	$21	# L0 hi add => carry
-	addq	$10,	$20,	$10	# U0 hi mul + carry
-	ldq	$4,	0($16)		# L1
-
-	bis	$31,	$31,	$31	# U1 mt
-	addq	$5,	$11,	$23	# L0 lo + acc
-	addq	$10,	$21,	$10	# L0 hi mul + carry
-	ldq	$5,	8($16)		# L1
-
-	umulh	$19,	$3,	$8	# U1
-	cmpult	$23,	$11,	$20	# L0 lo add => carry
-	addq	$23,	$10,	$23	# U0 hi add => answer
-	ldq	$2,	16($17)		# L1
-
-	mulq	$19,	$0,	$9	# U1
-	cmpult	$23,	$10,	$21	# L0 hi add => carry
-	addq	$12,	$20,	$12	# U0 hi mul + carry
-	ldq	$3,	24($17)		# L1
-
-	umulh	$19,	$0,	$10	# U1
-	addq	$6,	$13,	$6	# L0 lo + acc
-	stq	$22,	-32($16)	# L0
-	stq	$23,	-24($16)	# L1
-
-	bis	$31,	$31,	$31	# L0 st slosh
-	mulq	$19,	$1,	$11	# U1
-	bis	$31,	$31,	$31	# L1 st slosh
-	addq	$12,	$21,	$12	# U0 hi mul + carry
-
-	cmpult	$6,	$13,	$20	# L0 lo add => carry
-	bis	$31,	$31,	$31	# U1 mt
-	lda	$18,	-1($18)		# L1 bookkeeping
-	addq	$6,	$12,	$22	# U0 hi add => answer
-
-	bis	$31,	$31,	$31	# U1 mt
-	cmpult	$22,	$12,	$21	# L0 hi add => carry
-	addq	$14,	$20,	$14	# U0 hi mul + carry
-	ldq	$6,	16($16)		# L1
-
-	bis	$31,	$31,	$31	# U1 mt
-	addq	$7,	$15,	$23	# L0 lo + acc
-	addq	$14,	$21,	$14	# U0 hi mul + carry
-	ldq	$7,	24($16)		# L1
-
-	umulh	$19,	$1,	$12	# U1
-	cmpult	$23,	$15,	$20	# L0 lo add => carry
-	addq	$23,	$14,	$23	# U0 hi add => answer
-	ldq	$0,	32($17)		# L1
-
-	mulq	$19,	$2,	$13	# U1
-	cmpult	$23,	$14,	$21	# L0 hi add => carry
-	addq	$8,	$20,	$8	# U0 hi mul + carry
-	ldq	$1,	40($17)		# L1
-
-	umulh	$19,	$2,	$14	# U1
-	addq	$4,	$9,	$4	# U0 lo + acc
-	stq	$22,	-16($16)	# L0
-	stq	$23,	-8($16)		# L1
-
-	bis	$31,	$31,	$31	# L0 st slosh
-	mulq	$19,	$3,	$15	# U1
-	bis	$31,	$31,	$31	# L1 st slosh
-	addq	$8,	$21,	$8	# L0 hi mul + carry
-
-	cmpult	$4,	$9,	$20	# L0 lo add => carry
-	bis	$31,	$31,	$31	# U1 mt
-	lda	$17,	64($17)		# L1 bookkeeping
-	addq	$4,	$8,	$22	# U0 hi add => answer
-
-	bis	$31,	$31,	$31	# U1 mt
-	cmpult	$22,	$8,	$21	# L0 hi add => carry
-	addq	$10,	$20,	$10	# U0 hi mul + carry
-	ldq	$4,	32($16)		# L1
-
-	bis	$31,	$31,	$31	# U1 mt
-	addq	$5,	$11,	$23	# L0 lo + acc
-	addq	$10,	$21,	$10	# L0 hi mul + carry
-	ldq	$5,	40($16)		# L1
-
-	umulh	$19,	$3,	$8	# U1
-	cmpult	$23,	$11,	$20	# L0 lo add => carry
-	addq	$23,	$10,	$23	# U0 hi add => answer
-	ldq	$2,	-16($17)	# L1
-
-	mulq	$19,	$0,	$9	# U1
-	cmpult	$23,	$10,	$21	# L0 hi add => carry
-	addq	$12,	$20,	$12	# U0 hi mul + carry
-	ldq	$3,	-8($17)		# L1
-
-	umulh	$19,	$0,	$10	# U1
-	addq	$6,	$13,	$6	# L0 lo + acc
-	stq	$22,	0($16)		# L0
-	stq	$23,	8($16)		# L1
-
-	bis	$31,	$31,	$31	# L0 st slosh
-	mulq	$19,	$1,	$11	# U1
-	bis	$31,	$31,	$31	# L1 st slosh
-	addq	$12,	$21,	$12	# U0 hi mul + carry
-
-	cmpult	$6,	$13,	$20	# L0 lo add => carry
-	bis	$31,	$31,	$31	# U1 mt
-	lda	$16,	64($16)		# L1 bookkeeping
-	addq	$6,	$12,	$22	# U0 hi add => answer
-
-	bis	$31,	$31,	$31	# U1 mt
-	cmpult	$22,	$12,	$21	# L0 hi add => carry
-	addq	$14,	$20,	$14	# U0 hi mul + carry
-	ldq	$6,	-16($16)	# L1
-
-	bis	$31,	$31,	$31	# U1 mt
-	addq	$7,	$15,	$23	# L0 lo + acc
-	addq	$14,	$21,	$14	# U0 hi mul + carry
-	ldq	$7,	-8($16)		# L1
-
-	umulh	$19,	$1,	$12	# U1
-	cmpult	$23,	$15,	$20	# L0 lo add => carry
-	addq	$23,	$14,	$23	# U0 hi add => answer
-	ldq	$0,	0($17)		# L1
-
-	mulq	$19,	$2,	$13	# U1
-	cmpult	$23,	$14,	$21	# L0 hi add => carry
-	addq	$8,	$20,	$8	# U0 hi mul + carry
-	ldq	$1,	8($17)		# L1
-
-	umulh	$19,	$2,	$14	# U1
-	addq	$4,	$9,	$4	# L0 lo + acc
-	stq	$22,	-48($16)	# L0
-	stq	$23,	-40($16)	# L1
-
-	bis	$31,	$31,	$31	# L0 st slosh
-	mulq	$19,	$3,	$15	# U1
-	bis	$31,	$31,	$31	# L1 st slosh
-	addq	$8,	$21,	$8	# U0 hi mul + carry
-
-	cmpult	$4,	$9,	$20	# L0 lo add => carry
-	addq	$4,	$8,	$22	# U0 hi add => answer
-	bis	$31,	$31,	$31	# L1 mt
-	bgt	$18,	$Loop		# U1 bookkeeping
-
-# ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____
-$Lend:
-	cmpult	$22,	$8,	$21	# L0 hi add => carry
-	addq	$10,	$20,	$10	# U0 hi mul + carry
-	ldq	$4,	0($16)		# L1
-	addq	$5,	$11,	$23	# L0 lo + acc
-	addq	$10,	$21,	$10	# L0 hi mul + carry
-	ldq	$5,	8($16)		# L1
-	umulh	$19,	$3,	$8	# U1
-	cmpult	$23,	$11,	$20	# L0 lo add => carry
-	addq	$23,	$10,	$23	# U0 hi add => answer
-	mulq	$19,	$0,	$9	# U1
-	cmpult	$23,	$10,	$21	# L0 hi add => carry
-	addq	$12,	$20,	$12	# U0 hi mul + carry
-	umulh	$19,	$0,	$10	# U1
-	addq	$6,	$13,	$6	# L0 lo + acc
-	stq	$22,	-32($16)	# L0
-	stq	$23,	-24($16)	# L1
-	mulq	$19,	$1,	$11	# U1
-	addq	$12,	$21,	$12	# U0 hi mul + carry
-	cmpult	$6,	$13,	$20	# L0 lo add => carry
-	addq	$6,	$12,	$22	# U0 hi add => answer
-	cmpult	$22,	$12,	$21	# L0 hi add => carry
-	addq	$14,	$20,	$14	# U0 hi mul + carry
-	addq	$7,	$15,	$23	# L0 lo + acc
-	addq	$14,	$21,	$14	# U0 hi mul + carry
-	umulh	$19,	$1,	$12	# U1
-	cmpult	$23,	$15,	$20	# L0 lo add => carry
-	addq	$23,	$14,	$23	# U0 hi add => answer
-	cmpult	$23,	$14,	$21	# L0 hi add => carry
-	addq	$8,	$20,	$8	# U0 hi mul + carry
-	addq	$4,	$9,	$4	# U0 lo + acc
-	stq	$22,	-16($16)	# L0
-	stq	$23,	-8($16)		# L1
-	bis	$31,	$31,	$31	# L0 st slosh
-	addq	$8,	$21,	$8	# L0 hi mul + carry
-	cmpult	$4,	$9,	$20	# L0 lo add => carry
-	addq	$4,	$8,	$22	# U0 hi add => answer
-	cmpult	$22,	$8,	$21	# L0 hi add => carry
-	addq	$10,	$20,	$10	# U0 hi mul + carry
-	addq	$5,	$11,	$23	# L0 lo + acc
-	addq	$10,	$21,	$10	# L0 hi mul + carry
-	cmpult	$23,	$11,	$20	# L0 lo add => carry
-	addq	$23,	$10,	$23	# U0 hi add => answer
-	cmpult	$23,	$10,	$21	# L0 hi add => carry
-	addq	$12,	$20,	$12	# U0 hi mul + carry
-	stq	$22,	0($16)		# L0
-	stq	$23,	8($16)		# L1
-	addq	$12,	$21,	$0	# U0 hi mul + carry
-
-	ldq	$9,	8($30)
-	ldq	$10,	16($30)
-	ldq	$11,	24($30)
-	ldq	$12,	32($30)
-	ldq	$13,	40($30)
-	ldq	$14,	48($30)
-	ldq	$15,	56($30)
-	lda	$30,	240($30)
-	ret	$31,	($26),	1
-
-	.end	__mpn_addmul_1
diff --git a/sysdeps/alpha/alphaev6/fpu/e_sqrt.S b/sysdeps/alpha/alphaev6/fpu/e_sqrt.S
deleted file mode 100644
index 18d03ee9c9..0000000000
--- a/sysdeps/alpha/alphaev6/fpu/e_sqrt.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-
-	.arch ev6
-	.set noreorder
-	.set noat
-
-ENTRY(__ieee754_sqrt)
-#ifdef PROF
-	ldgp    gp, 0(pv)
-	lda     AT, _mcount
-	jsr     AT, (AT), _mcount
-	.prologue 1
-#else
-	.prologue 0
-#endif
-
-	.align 4
-#ifdef _IEEE_FP_INEXACT
-	sqrtt/suid $f16, $f0
-#else
-	sqrtt/sud $f16, $f0
-#endif
-	ret
-	nop
-	nop
-
-END(__ieee754_sqrt)
-
-#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18)
-strong_alias(__ieee754_sqrt, __sqrt_finite1)
-compat_symbol(libm, __sqrt_finite1, __sqrt_finite, GLIBC_2_15)
-versioned_symbol(libm, __ieee754_sqrt, __sqrt_finite, GLIBC_2_18)
-#else
-strong_alias(__ieee754_sqrt, __sqrt_finite)
-#endif
diff --git a/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S b/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S
deleted file mode 100644
index c4ef9c32c6..0000000000
--- a/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-
-	.arch ev6
-	.set noreorder
-	.set noat
-
-ENTRY(__ieee754_sqrtf)
-#ifdef PROF
-	ldgp    gp, 0(pv)
-	lda     AT, _mcount
-	jsr     AT, (AT), _mcount
-	.prologue 1
-#else
-	.prologue 0
-#endif
-
-	.align 4
-#ifdef _IEEE_FP_INEXACT
-	sqrts/suid $f16, $f0
-#else
-	sqrts/sud $f16, $f0
-#endif
-	ret
-	nop
-	nop
-
-END(__ieee754_sqrtf)
-
-#if SHLIB_COMPAT (libm, GLIBC_2_15, GLIBC_2_18)
-strong_alias(__ieee754_sqrtf, __sqrtf_finite1)
-compat_symbol(libm, __sqrtf_finite1, __sqrtf_finite, GLIBC_2_15)
-versioned_symbol(libm, __ieee754_sqrtf, __sqrtf_finite, GLIBC_2_18)
-#else
-strong_alias(__ieee754_sqrtf, __sqrtf_finite)
-#endif
diff --git a/sysdeps/alpha/alphaev6/memcpy.S b/sysdeps/alpha/alphaev6/memcpy.S
deleted file mode 100644
index 170a23b5da..0000000000
--- a/sysdeps/alpha/alphaev6/memcpy.S
+++ /dev/null
@@ -1,255 +0,0 @@
-/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/*
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- *
- * Temp usage notes:
- *	$0		- destination address
- *	$1,$2,		- scratch
- */
-
-#include <sysdep.h>
-
-	.arch ev6
-	.set noreorder
-	.set noat
-
-ENTRY(memcpy)
-	.prologue 0
-
-	mov	$16, $0			# E : copy dest to return
-	ble	$18, $nomoredata	# U : done with the copy?
-	xor	$16, $17, $1		# E : are source and dest alignments the same?
-	and	$1, 7, $1		# E : are they the same mod 8?
-
-	bne	$1, $misaligned		# U : Nope - gotta do this the slow way
-	/* source and dest are same mod 8 address */
-	and	$16, 7, $1		# E : Are both 0mod8?
-	beq	$1, $both_0mod8		# U : Yes
-	nop				# E :
-
-	/*
-	 * source and dest are same misalignment.  move a byte at a time
-	 * until a 0mod8 alignment for both is reached.
-	 * At least one byte more to move
-	 */
-
-$head_align:
-	ldbu	$1, 0($17)		# L : grab a byte
-	subq	$18, 1, $18		# E : count--
-	addq	$17, 1, $17		# E : src++
-	stb	$1, 0($16)		# L :
-	addq	$16, 1, $16		# E : dest++
-	and	$16, 7, $1		# E : Are we at 0mod8 yet?
-	ble	$18, $nomoredata	# U : done with the copy?
-	bne	$1, $head_align		# U :
-
-$both_0mod8:
-	cmple	$18, 127, $1		# E : Can we unroll the loop?
-	bne	$1, $no_unroll		# U :
-	and	$16, 63, $1		# E : get mod64 alignment
-	beq	$1, $do_unroll		# U : no single quads to fiddle
-
-$single_head_quad:
-	ldq	$1, 0($17)		# L : get 8 bytes
-	subq	$18, 8, $18		# E : count -= 8
-	addq	$17, 8, $17		# E : src += 8
-	nop				# E :
-
-	stq	$1, 0($16)		# L : store
-	addq	$16, 8, $16		# E : dest += 8
-	and	$16, 63, $1		# E : get mod64 alignment
-	bne	$1, $single_head_quad	# U : still not fully aligned
-
-$do_unroll:
-	addq	$16, 64, $7		# E : Initial (+1 trip) wh64 address
-	cmple	$18, 127, $1		# E : Can we go through the unrolled loop?
-	bne	$1, $tail_quads		# U : Nope
-	nop				# E :
-
-$unroll_body:
-	wh64	($7)			# L1 : memory subsystem hint: 64 bytes at
-					# ($7) are about to be over-written
-	ldq	$6, 0($17)		# L0 : bytes 0..7
-	nop				# E :
-	nop				# E :
-
-	ldq	$4, 8($17)		# L : bytes 8..15
-	ldq	$5, 16($17)		# L : bytes 16..23
-	addq	$7, 64, $7		# E : Update next wh64 address
-	nop				# E :
-
-	ldq	$3, 24($17)		# L : bytes 24..31
-	addq	$16, 64, $1		# E : fallback value for wh64
-	nop				# E :
-	nop				# E :
-
-	addq	$17, 32, $17		# E : src += 32 bytes
-	stq	$6, 0($16)		# L : bytes 0..7
-	nop				# E :
-	nop				# E :
-
-	stq	$4, 8($16)		# L : bytes 8..15
-	stq	$5, 16($16)		# L : bytes 16..23
-	subq	$18, 192, $2		# E : At least two more trips to go?
-	nop				# E :
-
-	stq	$3, 24($16)		# L : bytes 24..31
-	addq	$16, 32, $16		# E : dest += 32 bytes
-	nop				# E :
-	nop				# E :
-
-	ldq	$6, 0($17)		# L : bytes 0..7
-	ldq	$4, 8($17)		# L : bytes 8..15
-	cmovlt	$2, $1, $7		# E : Latency 2, extra map slot - Use
-					# fallback wh64 address if < 2 more trips
-	nop				# E :
-
-	ldq	$5, 16($17)		# L : bytes 16..23
-	ldq	$3, 24($17)		# L : bytes 24..31
-	addq	$16, 32, $16		# E : dest += 32
-	subq	$18, 64, $18		# E : count -= 64
-
-	addq	$17, 32, $17		# E : src += 32
-	stq	$6, -32($16)		# L : bytes 0..7
-	stq	$4, -24($16)		# L : bytes 8..15
-	cmple	$18, 63, $1		# E : At least one more trip?
-
-	stq	$5, -16($16)		# L : bytes 16..23
-	stq	$3, -8($16)		# L : bytes 24..31
-	nop				# E :
-	beq	$1, $unroll_body
-
-$tail_quads:
-$no_unroll:
-	.align 4
-	subq	$18, 8, $18		# E : At least a quad left?
-	blt	$18, $less_than_8	# U : Nope
-	nop				# E :
-	nop				# E :
-
-$move_a_quad:
-	ldq	$1, 0($17)		# L : fetch 8
-	subq	$18, 8, $18		# E : count -= 8
-	addq	$17, 8, $17		# E : src += 8
-	nop				# E :
-
-	stq	$1, 0($16)		# L : store 8
-	addq	$16, 8, $16		# E : dest += 8
-	bge	$18, $move_a_quad	# U :
-	nop				# E :
-
-$less_than_8:
-	.align 4
-	addq	$18, 8, $18		# E : add back for trailing bytes
-	ble	$18, $nomoredata	# U : All-done
-	nop				# E :
-	nop				# E :
-
-	/* Trailing bytes */
-$tail_bytes:
-	subq	$18, 1, $18		# E : count--
-	ldbu	$1, 0($17)		# L : fetch a byte
-	addq	$17, 1, $17		# E : src++
-	nop				# E :
-
-	stb	$1, 0($16)		# L : store a byte
-	addq	$16, 1, $16		# E : dest++
-	bgt	$18, $tail_bytes	# U : more to be done?
-	nop				# E :
-
-	/* branching to exit takes 3 extra cycles, so replicate exit here */
-	ret	$31, ($26), 1		# L0 :
-	nop				# E :
-	nop				# E :
-	nop				# E :
-
-$misaligned:
-	mov	$0, $4			# E : dest temp
-	and	$0, 7, $1		# E : dest alignment mod8
-	beq	$1, $dest_0mod8		# U : life doesnt totally suck
-	nop
-
-$aligndest:
-	ble	$18, $nomoredata	# U :
-	ldbu	$1, 0($17)		# L : fetch a byte
-	subq	$18, 1, $18		# E : count--
-	addq	$17, 1, $17		# E : src++
-
-	stb	$1, 0($4)		# L : store it
-	addq	$4, 1, $4		# E : dest++
-	and	$4, 7, $1		# E : dest 0mod8 yet?
-	bne	$1, $aligndest		# U : go until we are aligned.
-
-	/* Source has unknown alignment, but dest is known to be 0mod8 */
-$dest_0mod8:
-	subq	$18, 8, $18		# E : At least a quad left?
-	blt	$18, $misalign_tail	# U : Nope
-	ldq_u	$3, 0($17)		# L : seed (rotating load) of 8 bytes
-	nop				# E :
-
-$mis_quad:
-	ldq_u	$16, 8($17)		# L : Fetch next 8
-	extql	$3, $17, $3		# U : masking
-	extqh	$16, $17, $1		# U : masking
-	bis	$3, $1, $1		# E : merged bytes to store
-
-	subq	$18, 8, $18		# E : count -= 8
-	addq	$17, 8, $17		# E : src += 8
-	stq	$1, 0($4)		# L : store 8 (aligned)
-	mov	$16, $3			# E : "rotate" source data
-
-	addq	$4, 8, $4		# E : dest += 8
-	bge	$18, $mis_quad		# U : More quads to move
-	nop
-	nop
-
-$misalign_tail:
-	addq	$18, 8, $18		# E : account for tail stuff
-	ble	$18, $nomoredata	# U :
-	nop
-	nop
-
-$misalign_byte:
-	ldbu	$1, 0($17)		# L : fetch 1
-	subq	$18, 1, $18		# E : count--
-	addq	$17, 1, $17		# E : src++
-	nop				# E :
-
-	stb	$1, 0($4)		# L : store
-	addq	$4, 1, $4		# E : dest++
-	bgt	$18, $misalign_byte	# U : more to go?
-	nop
-
-
-$nomoredata:
-	ret	$31, ($26), 1		# L0 :
-	nop				# E :
-	nop				# E :
-	nop				# E :
-
-END(memcpy)
-libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/alpha/alphaev6/memset.S b/sysdeps/alpha/alphaev6/memset.S
deleted file mode 100644
index 185821c7eb..0000000000
--- a/sysdeps/alpha/alphaev6/memset.S
+++ /dev/null
@@ -1,223 +0,0 @@
-/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
-   Contributed by Richard Henderson (rth@tamu.edu)
-   EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-	.arch ev6
-	.set noat
-	.set noreorder
-
-ENTRY(memset)
-#ifdef PROF
-	ldgp	gp, 0(pv)
-	lda	AT, _mcount
-	jsr	AT, (AT), _mcount
-	.prologue 1
-#else
-	.prologue 0
-#endif
-
-	/*
-	 * Serious stalling happens.  The only way to mitigate this is to
-	 * undertake a major re-write to interleave the constant materialization
-	 * with other parts of the fall-through code.  This is important, even
-	 * though it makes maintenance tougher.
-	 * Do this later.
-	 */
-	and	$17, 255, $1	# E : 00000000000000ch
-	insbl	$17, 1, $2	# U : 000000000000ch00
-	mov	$16, $0		# E : return value
-	ble	$18, $end	# U : zero length requested?
-
-	addq	$18, $16, $6	# E : max address to write to
-	or	$1, $2, $17	# E : 000000000000chch
-	insbl	$1, 2, $3	# U : 0000000000ch0000
-	insbl	$1, 3, $4	# U : 00000000ch000000
-
-	or	$3, $4, $3	# E : 00000000chch0000
-	inswl	$17, 4, $5	# U : 0000chch00000000
-	xor	$16, $6, $1	# E : will complete write be within one quadword?
-	inswl	$17, 6, $2	# U : chch000000000000
-
-	or	$17, $3, $17	# E : 00000000chchchch
-	or	$2, $5, $2	# E : chchchch00000000
-	bic	$1, 7, $1	# E : fit within a single quadword?
-	and	$16, 7, $3	# E : Target addr misalignment
-
-	or	$17, $2, $17	# E : chchchchchchchch
-	beq	$1, $within_quad # U :
-	nop			# E :
-	beq	$3, $aligned	# U : target is 0mod8
-
-	/*
-	 * Target address is misaligned, and won't fit within a quadword.
-	 */
-	ldq_u	$4, 0($16)	# L : Fetch first partial
-	mov	$16, $5		# E : Save the address
-	insql	$17, $16, $2	# U : Insert new bytes
-	subq	$3, 8, $3	# E : Invert (for addressing uses)
-
-	addq	$18, $3, $18	# E : $18 is new count ($3 is negative)
-	mskql	$4, $16, $4	# U : clear relevant parts of the quad
-	subq	$16, $3, $16	# E : $16 is new aligned destination
-	or	$2, $4, $1	# E : Final bytes
-
-	nop
-	stq_u	$1,0($5)	# L : Store result
-	nop
-	nop
-
-	.align 4
-$aligned:
-	/*
-	 * We are now guaranteed to be quad aligned, with at least
-	 * one partial quad to write.
-	 */
-
-	sra	$18, 3, $3	# U : Number of remaining quads to write
-	and	$18, 7, $18	# E : Number of trailing bytes to write
-	mov	$16, $5		# E : Save dest address
-	beq	$3, $no_quad	# U : tail stuff only
-
-	/*
-	 * It's worth the effort to unroll this and use wh64 if possible.
-	 * At this point, entry values are:
-	 * $16	Current destination address
-	 * $5	A copy of $16
-	 * $6	The max quadword address to write to
-	 * $18	Number trailer bytes
-	 * $3	Number quads to write
-	 */
-
-	and	$16, 0x3f, $2	# E : Forward work (only useful for unrolled loop)
-	subq	$3, 16, $4	# E : Only try to unroll if > 128 bytes
-	subq	$2, 0x40, $1	# E : bias counter (aligning stuff 0mod64)
-	blt	$4, $loop	# U :
-
-	/*
-	 * We know we've got at least 16 quads, minimum of one trip
-	 * through unrolled loop.  Do a quad at a time to get us 0mod64
-	 * aligned.
-	 */
-
-	nop			# E :
-	nop			# E :
-	nop			# E :
-	beq	$1, $bigalign	# U :
-
-$alignmod64:
-	stq	$17, 0($5)	# L :
-	subq	$3, 1, $3	# E : For consistency later
-	addq	$1, 8, $1	# E : Increment towards zero for alignment
-	addq	$5, 8, $4	# E : Initial wh64 address (filler instruction)
-
-	nop
-	nop
-	addq	$5, 8, $5	# E : Inc address
-	blt	$1, $alignmod64 # U :
-
-$bigalign:
-	/*
-	 * $3 - number quads left to go
-	 * $5 - target address (aligned 0mod64)
-	 * $17 - mask of stuff to store
-	 * Scratch registers available: $7, $2, $4, $1
-	 * We know that we'll be taking a minimum of one trip through.
-	 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
-	 * Assumes the wh64 needs to be for 2 trips through the loop in the future.
-	 * The wh64 is issued on for the starting destination address for trip +2
-	 * through the loop, and if there are less than two trips left, the target
-	 * address will be for the current trip.
-	 */
-
-$do_wh64:
-	wh64	($4)		# L1 : memory subsystem write hint
-	subq	$3, 24, $2	# E : For determining future wh64 addresses
-	stq	$17, 0($5)	# L :
-	nop			# E :
-
-	addq	$5, 128, $4	# E : speculative target of next wh64
-	stq	$17, 8($5)	# L :
-	stq	$17, 16($5)	# L :
-	addq	$5, 64, $7	# E : Fallback address for wh64 (== next trip addr)
-
-	stq	$17, 24($5)	# L :
-	stq	$17, 32($5)	# L :
-	cmovlt	$2, $7, $4	# E : Latency 2, extra mapping cycle
-	nop
-
-	stq	$17, 40($5)	# L :
-	stq	$17, 48($5)	# L :
-	subq	$3, 16, $2	# E : Repeat the loop at least once more?
-	nop
-
-	stq	$17, 56($5)	# L :
-	addq	$5, 64, $5	# E :
-	subq	$3, 8, $3	# E :
-	bge	$2, $do_wh64	# U :
-
-	nop
-	nop
-	nop
-	beq	$3, $no_quad	# U : Might have finished already
-
-	.align 4
-	/*
-	 * Simple loop for trailing quadwords, or for small amounts
-	 * of data (where we can't use an unrolled loop and wh64)
-	 */
-$loop:
-	stq	$17, 0($5)	# L :
-	subq	$3, 1, $3	# E : Decrement number quads left
-	addq	$5, 8, $5	# E : Inc address
-	bne	$3, $loop	# U : more?
-
-$no_quad:
-	/*
-	 * Write 0..7 trailing bytes.
-	 */
-	nop			# E :
-	beq	$18, $end	# U : All done?
-	ldq	$7, 0($5)	# L :
-	mskqh	$7, $6, $2	# U : Mask final quad
-
-	insqh	$17, $6, $4	# U : New bits
-	or	$2, $4, $1	# E : Put it all together
-	stq	$1, 0($5)	# L : And back to memory
-	ret	$31,($26),1	# L0 :
-
-$within_quad:
-	ldq_u	$1, 0($16)	# L :
-	insql	$17, $16, $2	# U : New bits
-	mskql	$1, $16, $4	# U : Clear old
-	or	$2, $4, $2	# E : New result
-
-	mskql	$2, $6, $4	# U :
-	mskqh	$1, $6, $2	# U :
-	or	$2, $4, $1	# E :
-	stq_u	$1, 0($16)	# L :
-
-$end:
-	nop
-	nop
-	nop
-	ret $31,($26),1		# L0 :
-
-	END(memset)
-libc_hidden_builtin_def (memset)
diff --git a/sysdeps/alpha/alphaev6/stxcpy.S b/sysdeps/alpha/alphaev6/stxcpy.S
deleted file mode 100644
index 84f19581d1..0000000000
--- a/sysdeps/alpha/alphaev6/stxcpy.S
+++ /dev/null
@@ -1,314 +0,0 @@
-/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
-   Contributed by Richard Henderson (rth@tamu.edu)
-   EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Copy a null-terminated string from SRC to DST.
-
-   This is an internal routine used by strcpy, stpcpy, and strcat.
-   As such, it uses special linkage conventions to make implementation
-   of these public functions more efficient.
-
-   On input:
-	t9 = return address
-	a0 = DST
-	a1 = SRC
-
-   On output:
-	t8  = bitmask (with one bit set) indicating the last byte written
-	a0  = unaligned address of the last *word* written
-
-   Furthermore, v0, a3-a5, t11, and t12 are untouched.
-*/
-
-
-#include <sysdep.h>
-
-	.arch ev6
-	.set noat
-	.set noreorder
-
-	.text
-	.type	__stxcpy, @function
-	.globl	__stxcpy
-	.usepv	__stxcpy, no
-
-	cfi_startproc
-	cfi_return_column (t9)
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == the first source word.  */
-	.align 4
-stxcpy_aligned:
-	/* Create the 1st output word and detect 0's in the 1st input word.  */
-	lda	t2, -1		# E : build a mask against false zero
-	mskqh	t2, a1, t2	# U :   detection in the src word (stall)
-	mskqh	t1, a1, t3	# U :
-	ornot	t1, t2, t2	# E : (stall)
-
-	mskql	t0, a1, t0	# U : assemble the first output word
-	cmpbge	zero, t2, t10	# E : bits set iff null found
-	or	t0, t3, t1	# E : (stall)
-	bne	t10, $a_eos	# U : (stall)
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == a source word not containing a null.  */
-	/* Nops here to separate store quads from load quads */
-
-$a_loop:
-	stq_u	t1, 0(a0)	# L :
-	addq	a0, 8, a0	# E :
-	nop
-	nop
-
-	ldq_u	t1, 0(a1)	# L : Latency=3
-	addq	a1, 8, a1	# E :
-	cmpbge	zero, t1, t10	# E : (3 cycle stall)
-	beq	t10, $a_loop	# U : (stall for t10)
-
-	/* Take care of the final (partial) word store.
-	   On entry to this basic block we have:
-	   t1 == the source word containing the null
-	   t10 == the cmpbge mask that found it.  */
-$a_eos:
-	negq	t10, t6		# E : find low bit set
-	and	t10, t6, t8	# E : (stall)
-	/* For the sake of the cache, don't read a destination word
-	   if we're not going to need it.  */
-	and	t8, 0x80, t6	# E : (stall)
-	bne	t6, 1f		# U : (stall)
-
-	/* We're doing a partial word store and so need to combine
-	   our source and original destination words.  */
-	ldq_u	t0, 0(a0)	# L : Latency=3
-	subq	t8, 1, t6	# E :
-	zapnot	t1, t6, t1	# U : clear src bytes >= null (stall)
-	or	t8, t6, t10	# E : (stall)
-
-	zap	t0, t10, t0	# E : clear dst bytes <= null
-	or	t0, t1, t1	# E : (stall)
-	nop
-	nop
-
-1:	stq_u	t1, 0(a0)	# L :
-	ret	(t9)		# L0 : Latency=3
-	nop
-	nop
-
-	.align 4
-__stxcpy:
-	/* Are source and destination co-aligned?  */
-	xor	a0, a1, t0	# E :
-	unop			# E :
-	and	t0, 7, t0	# E : (stall)
-	bne	t0, $unaligned	# U : (stall)
-
-	/* We are co-aligned; take care of a partial first word.  */
-	ldq_u	t1, 0(a1)		# L : load first src word
-	and	a0, 7, t0		# E : take care not to load a word ...
-	addq	a1, 8, a1		# E :
-	beq	t0, stxcpy_aligned	# U : ... if we wont need it (stall)
-
-	ldq_u	t0, 0(a0)	# L :
-	br	stxcpy_aligned	# L0 : Latency=3
-	nop
-	nop
-
-
-/* The source and destination are not co-aligned.  Align the destination
-   and cope.  We have to be very careful about not reading too much and
-   causing a SEGV.  */
-
-	.align 4
-$u_head:
-	/* We know just enough now to be able to assemble the first
-	   full source word.  We can still find a zero at the end of it
-	   that prevents us from outputting the whole thing.
-
-	   On entry to this basic block:
-	   t0 == the first dest word, for masking back in, if needed else 0
-	   t1 == the low bits of the first source word
-	   t6 == bytemask that is -1 in dest word bytes */
-
-	ldq_u	t2, 8(a1)	# L :
-	addq	a1, 8, a1	# E :
-	extql	t1, a1, t1	# U : (stall on a1)
-	extqh	t2, a1, t4	# U : (stall on a1)
-
-	mskql	t0, a0, t0	# U :
-	or	t1, t4, t1	# E :
-	mskqh	t1, a0, t1	# U : (stall on t1)
-	or	t0, t1, t1	# E : (stall on t1)
-
-	or	t1, t6, t6	# E :
-	cmpbge	zero, t6, t10	# E : (stall)
-	lda	t6, -1		# E : for masking just below
-	bne	t10, $u_final	# U : (stall)
-
-	mskql	t6, a1, t6		# U : mask out the bits we have
-	or	t6, t2, t2		# E :   already extracted before (stall)
-	cmpbge	zero, t2, t10		# E :   testing eos (stall)
-	bne	t10, $u_late_head_exit	# U : (stall)
-
-	/* Finally, we've got all the stupid leading edge cases taken care
-	   of and we can set up to enter the main loop.  */
-
-	stq_u	t1, 0(a0)	# L : store first output word
-	addq	a0, 8, a0	# E :
-	extql	t2, a1, t0	# U : position ho-bits of lo word
-	ldq_u	t2, 8(a1)	# U : read next high-order source word
-
-	addq	a1, 8, a1	# E :
-	cmpbge	zero, t2, t10	# E : (stall for t2)
-	nop			# E :
-	bne	t10, $u_eos	# U : (stall)
-
-	/* Unaligned copy main loop.  In order to avoid reading too much,
-	   the loop is structured to detect zeros in aligned source words.
-	   This has, unfortunately, effectively pulled half of a loop
-	   iteration out into the head and half into the tail, but it does
-	   prevent nastiness from accumulating in the very thing we want
-	   to run as fast as possible.
-
-	   On entry to this basic block:
-	   t0 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word
-
-	   We further know that t2 does not contain a null terminator.  */
-
-	.align 3
-$u_loop:
-	extqh	t2, a1, t1	# U : extract high bits for current word
-	addq	a1, 8, a1	# E : (stall)
-	extql	t2, a1, t3	# U : extract low bits for next time (stall)
-	addq	a0, 8, a0	# E :
-
-	or	t0, t1, t1	# E : current dst word now complete
-	ldq_u	t2, 0(a1)	# L : Latency=3 load high word for next time
-	stq_u	t1, -8(a0)	# L : save the current word (stall)
-	mov	t3, t0		# E :
-
-	cmpbge	zero, t2, t10	# E : test new word for eos
-	beq	t10, $u_loop	# U : (stall)
-	nop
-	nop
-
-	/* We've found a zero somewhere in the source word we just read.
-	   If it resides in the lower half, we have one (probably partial)
-	   word to write out, and if it resides in the upper half, we
-	   have one full and one partial word left to write out.
-
-	   On entry to this basic block:
-	   t0 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word.  */
-$u_eos:
-	extqh	t2, a1, t1	# U :
-	or	t0, t1, t1	# E : first (partial) source word complete (stall)
-	cmpbge	zero, t1, t10	# E : is the null in this first bit? (stall)
-	bne	t10, $u_final	# U : (stall)
-
-$u_late_head_exit:
-	stq_u	t1, 0(a0)	# L : the null was in the high-order bits
-	addq	a0, 8, a0	# E :
-	extql	t2, a1, t1	# U :
-	cmpbge	zero, t1, t10	# E : (stall)
-
-	/* Take care of a final (probably partial) result word.
-	   On entry to this basic block:
-	   t1 == assembled source word
-	   t10 == cmpbge mask that found the null.  */
-$u_final:
-	negq	t10, t6		# E : isolate low bit set
-	and	t6, t10, t8	# E : (stall)
-	and	t8, 0x80, t6	# E : avoid dest word load if we can (stall)
-	bne	t6, 1f		# U : (stall)
-
-	ldq_u	t0, 0(a0)	# E :
-	subq	t8, 1, t6	# E :
-	or	t6, t8, t10	# E : (stall)
-	zapnot	t1, t6, t1	# U : kill source bytes >= null (stall)
-
-	zap	t0, t10, t0	# U : kill dest bytes <= null (2 cycle data stall)
-	or	t0, t1, t1	# E : (stall)
-	nop
-	nop
-
-1:	stq_u	t1, 0(a0)	# L :
-	ret	(t9)		# L0 : Latency=3
-	nop
-	nop
-
-	/* Unaligned copy entry point.  */
-	.align 4
-$unaligned:
-
-	ldq_u	t1, 0(a1)	# L : load first source word
-	and	a0, 7, t4	# E : find dest misalignment
-	and	a1, 7, t5	# E : find src misalignment
-	/* Conditionally load the first destination word and a bytemask
-	   with 0xff indicating that the destination byte is sacrosanct.  */
-	mov	zero, t0	# E :
-
-	mov	zero, t6	# E :
-	beq	t4, 1f		# U :
-	ldq_u	t0, 0(a0)	# L :
-	lda	t6, -1		# E :
-
-	mskql	t6, a0, t6	# U :
-	nop
-	nop
-	nop
-1:
-	subq	a1, t4, a1	# E : sub dest misalignment from src addr
-	/* If source misalignment is larger than dest misalignment, we need
-	   extra startup checks to avoid SEGV.  */
-	cmplt	t4, t5, t8	# E :
-	beq	t8, $u_head	# U :
-	lda	t2, -1		# E : mask out leading garbage in source
-
-	mskqh	t2, t5, t2	# U :
-	ornot	t1, t2, t3	# E : (stall)
-	cmpbge	zero, t3, t10	# E : is there a zero? (stall)
-	beq	t10, $u_head	# U : (stall)
-
-	/* At this point we've found a zero in the first partial word of
-	   the source.  We need to isolate the valid source data and mask
-	   it into the original destination data.  (Incidentally, we know
-	   that we'll need at least one byte of that original dest word.) */
-
-	ldq_u	t0, 0(a0)	# L :
-	negq	t10, t6		# E : build bitmask of bytes <= zero
-	and	t6, t10, t8	# E : (stall)
-	and	a1, 7, t5	# E :
-
-	subq	t8, 1, t6	# E :
-	or	t6, t8, t10	# E : (stall)
-	srl	t8, t5, t8	# U : adjust final null return value
-	zapnot	t2, t10, t2	# U : prepare source word; mirror changes (stall)
-
-	and	t1, t2, t1	# E : to source validity mask
-	extql	t2, a1, t2	# U :
-	extql	t1, a1, t1	# U : (stall)
-	andnot	t0, t2, t0	# .. e1 : zero place for source to reside (stall)
-
-	or	t0, t1, t1	# e1    : and put it there
-	stq_u	t1, 0(a0)	# .. e0 : (stall)
-	ret	(t9)		# e1    :
-
-	cfi_endproc
diff --git a/sysdeps/alpha/alphaev6/stxncpy.S b/sysdeps/alpha/alphaev6/stxncpy.S
deleted file mode 100644
index ad094cc1df..0000000000
--- a/sysdeps/alpha/alphaev6/stxncpy.S
+++ /dev/null
@@ -1,392 +0,0 @@
-/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
-   Contributed by Richard Henderson (rth@tamu.edu)
-   EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Copy no more than COUNT bytes of the null-terminated string from
-   SRC to DST.
-
-   This is an internal routine used by strncpy, stpncpy, and strncat.
-   As such, it uses special linkage conventions to make implementation
-   of these public functions more efficient.
-
-   On input:
-	t9 = return address
-	a0 = DST
-	a1 = SRC
-	a2 = COUNT
-
-   Furthermore, COUNT may not be zero.
-
-   On output:
-	t0  = last word written
-	t8  = bitmask (with one bit set) indicating the last byte written
-	t10 = bitmask (with one bit set) indicating the byte position of
-	      the end of the range specified by COUNT
-	a0  = unaligned address of the last *word* written
-	a2  = the number of full words left in COUNT
-
-   Furthermore, v0, a3-a5, t11, and t12 are untouched.
-*/
-
-#include <sysdep.h>
-
-	.arch ev6
-	.set noat
-	.set noreorder
-
-	.text
-	.type	__stxncpy, @function
-	.globl	__stxncpy
-	.usepv	__stxncpy, no
-
-	cfi_startproc
-	cfi_return_column (t9)
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == the first source word.  */
-	.align 4
-stxncpy_aligned:
-	/* Create the 1st output word and detect 0's in the 1st input word.  */
-	lda	t2, -1		# E : build a mask against false zero
-	mskqh	t2, a1, t2	# U :   detection in the src word (stall)
-	mskqh	t1, a1, t3	# U :
-	ornot	t1, t2, t2	# E : (stall)
-
-	mskql	t0, a1, t0	# U : assemble the first output word
-	cmpbge	zero, t2, t7	# E : bits set iff null found
-	or	t0, t3, t0	# E : (stall)
-	beq	a2, $a_eoc	# U :
-
-	bne	t7, $a_eos	# U :
-	nop
-	nop
-	nop
-
-	/* On entry to this basic block:
-	   t0 == a source word not containing a null.  */
-
-	/*
-	 * nops here to:
-	 *	separate store quads from load quads
-	 *	limit of 1 bcond/quad to permit training
-	 */
-$a_loop:
-	stq_u	t0, 0(a0)	# L :
-	addq	a0, 8, a0	# E :
-	subq	a2, 1, a2	# E :
-	nop
-
-	ldq_u	t0, 0(a1)	# L :
-	addq	a1, 8, a1	# E :
-	cmpbge	zero, t0, t7	# E :
-	beq	a2, $a_eoc      # U :
-
-	beq	t7, $a_loop	# U :
-	nop
-	nop
-	nop
-
-	/* Take care of the final (partial) word store.  At this point
-	   the end-of-count bit is set in t7 iff it applies.
-
-	   On entry to this basic block we have:
-	   t0 == the source word containing the null
-	   t7 == the cmpbge mask that found it.  */
-$a_eos:
-	negq	t7, t8		# E : find low bit set
-	and	t7, t8, t8	# E : (stall)
-	/* For the sake of the cache, don't read a destination word
-	   if we're not going to need it.  */
-	and	t8, 0x80, t6	# E : (stall)
-	bne	t6, 1f		# U : (stall)
-
-	/* We're doing a partial word store and so need to combine
-	   our source and original destination words.  */
-	ldq_u	t1, 0(a0)	# L :
-	subq	t8, 1, t6	# E :
-	or	t8, t6, t7	# E : (stall)
-	zapnot	t0, t7, t0	# U : clear src bytes > null (stall)
-
-	zap	t1, t7, t1	# .. e1 : clear dst bytes <= null
-	or	t0, t1, t0	# e1    : (stall)
-	nop
-	nop
-
-1:	stq_u	t0, 0(a0)	# L :
-	ret	(t9)		# L0 : Latency=3
-	nop
-	nop
-
-	/* Add the end-of-count bit to the eos detection bitmask.  */
-$a_eoc:
-	or	t10, t7, t7	# E :
-	br	$a_eos		# L0 : Latency=3
-	nop
-	nop
-
-	.align 4
-__stxncpy:
-	/* Are source and destination co-aligned?  */
-	lda	t2, -1		# E :
-	xor	a0, a1, t1	# E :
-	and	a0, 7, t0	# E : find dest misalignment
-	nop			# E :
-
-	srl	t2, 1, t2	# U :
-	and	t1, 7, t1	# E :
-	cmovlt	a2, t2, a2	# E : bound count to LONG_MAX (stall)
-	nop			# E :
-
-	addq	a2, t0, a2	# E : bias count by dest misalignment
-	subq	a2, 1, a2	# E : (stall)
-	and	a2, 7, t2	# E : (stall)
-	lda	t10, 1		# E :
-
-	srl	a2, 3, a2	# U : a2 = loop counter = (count - 1)/8
-	sll	t10, t2, t10	# U : t10 = bitmask of last count byte
-	nop			# E :
-	bne	t1, $unaligned	# U : (stall)
-
-	/* We are co-aligned; take care of a partial first word.  */
-	ldq_u	t1, 0(a1)	# L : load first src word
-	addq	a1, 8, a1	# E :
-	beq	t0, stxncpy_aligned # U : avoid loading dest word if not needed
-	ldq_u	t0, 0(a0)	# L :
-
-	br	stxncpy_aligned	# U :
-	nop
-	nop
-	nop
-
-
-
-/* The source and destination are not co-aligned.  Align the destination
-   and cope.  We have to be very careful about not reading too much and
-   causing a SEGV.  */
-
-	.align 4
-$u_head:
-	/* We know just enough now to be able to assemble the first
-	   full source word.  We can still find a zero at the end of it
-	   that prevents us from outputting the whole thing.
-
-	   On entry to this basic block:
-	   t0 == the first dest word, unmasked
-	   t1 == the shifted low bits of the first source word
-	   t6 == bytemask that is -1 in dest word bytes */
-
-	ldq_u	t2, 8(a1)	# L : Latency=3 load second src word
-	addq	a1, 8, a1	# E :
-	mskql	t0, a0, t0	# U : mask trailing garbage in dst
-	extqh	t2, a1, t4	# U : (3 cycle stall on t2)
-
-	or	t1, t4, t1	# E : first aligned src word complete (stall)
-	mskqh	t1, a0, t1	# U : mask leading garbage in src (stall)
-	or	t0, t1, t0	# E : first output word complete (stall)
-	or	t0, t6, t6	# E : mask original data for zero test (stall)
-
-	cmpbge	zero, t6, t7	# E :
-	beq	a2, $u_eocfin	# U :
-	lda	t6, -1		# E :
-	nop
-
-	bne	t7, $u_final	# U :
-	mskql	t6, a1, t6	# U : mask out bits already seen
-	stq_u	t0, 0(a0)	# L : store first output word
-	or      t6, t2, t2	# E :
-
-	cmpbge	zero, t2, t7	# E : find nulls in second partial
-	addq	a0, 8, a0	# E :
-	subq	a2, 1, a2	# E :
-	bne	t7, $u_late_head_exit	# U :
-
-	/* Finally, we've got all the stupid leading edge cases taken care
-	   of and we can set up to enter the main loop.  */
-	extql	t2, a1, t1	# U : position hi-bits of lo word
-	beq	a2, $u_eoc	# U :
-	ldq_u	t2, 8(a1)	# L : read next high-order source word
-	addq	a1, 8, a1	# E :
-
-	extqh	t2, a1, t0	# U : position lo-bits of hi word (stall)
-	cmpbge	zero, t2, t7	# E :
-	nop
-	bne	t7, $u_eos	# U :
-
-	/* Unaligned copy main loop.  In order to avoid reading too much,
-	   the loop is structured to detect zeros in aligned source words.
-	   This has, unfortunately, effectively pulled half of a loop
-	   iteration out into the head and half into the tail, but it does
-	   prevent nastiness from accumulating in the very thing we want
-	   to run as fast as possible.
-
-	   On entry to this basic block:
-	   t0 == the shifted low-order bits from the current source word
-	   t1 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word
-
-	   We further know that t2 does not contain a null terminator.  */
-
-	.align 4
-$u_loop:
-	or	t0, t1, t0	# E : current dst word now complete
-	subq	a2, 1, a2	# E : decrement word count
-	extql	t2, a1, t1	# U : extract high bits for next time
-	addq	a0, 8, a0	# E :
-
-	stq_u	t0, -8(a0)	# L : save the current word
-	beq	a2, $u_eoc	# U :
-	ldq_u	t2, 8(a1)	# L : Latency=3 load high word for next time
-	addq	a1, 8, a1	# E :
-
-	extqh	t2, a1, t0	# U : extract low bits (2 cycle stall)
-	cmpbge	zero, t2, t7	# E : test new word for eos
-	nop
-	beq	t7, $u_loop	# U :
-
-	/* We've found a zero somewhere in the source word we just read.
-	   If it resides in the lower half, we have one (probably partial)
-	   word to write out, and if it resides in the upper half, we
-	   have one full and one partial word left to write out.
-
-	   On entry to this basic block:
-	   t0 == the shifted low-order bits from the current source word
-	   t1 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word.  */
-$u_eos:
-	or	t0, t1, t0	# E : first (partial) source word complete
-	nop
-	cmpbge	zero, t0, t7	# E : is the null in this first bit? (stall)
-	bne	t7, $u_final	# U : (stall)
-
-	stq_u	t0, 0(a0)	# L : the null was in the high-order bits
-	addq	a0, 8, a0	# E :
-	subq	a2, 1, a2	# E :
-	nop
-
-$u_late_head_exit:
-	extql	t2, a1, t0	# U :
-	cmpbge	zero, t0, t7	# E :
-	or	t7, t10, t6	# E : (stall)
-	cmoveq	a2, t6, t7	# E : Latency=2, extra map slot (stall)
-
-	/* Take care of a final (probably partial) result word.
-	   On entry to this basic block:
-	   t0 == assembled source word
-	   t7 == cmpbge mask that found the null.  */
-$u_final:
-	negq	t7, t6		# E : isolate low bit set
-	and	t6, t7, t8	# E : (stall)
-	and	t8, 0x80, t6	# E : avoid dest word load if we can (stall)
-	bne	t6, 1f		# U : (stall)
-
-	ldq_u	t1, 0(a0)	# L :
-	subq	t8, 1, t6	# E :
-	or	t6, t8, t7	# E : (stall)
-	zapnot	t0, t7, t0	# U : kill source bytes > null
-
-	zap	t1, t7, t1	# U : kill dest bytes <= null
-	or	t0, t1, t0	# E : (stall)
-	nop
-	nop
-
-1:	stq_u	t0, 0(a0)	# L :
-	ret	(t9)		# L0 : Latency=3
-
-        /* Got to end-of-count before end of string.
-           On entry to this basic block:
-           t1 == the shifted high-order bits from the previous source word  */
-$u_eoc:
-	and	a1, 7, t6	# E :
-	sll	t10, t6, t6	# U : (stall)
-	and	t6, 0xff, t6	# E : (stall)
-	bne	t6, 1f		# U : (stall)
-
-	ldq_u	t2, 8(a1)	# L : load final src word
-	nop
-	extqh	t2, a1, t0	# U : extract low bits for last word (stall)
-	or	t1, t0, t1	# E : (stall)
-
-1:	cmpbge	zero, t1, t7	# E :
-	mov	t1, t0
-
-$u_eocfin:			# end-of-count, final word
-	or	t10, t7, t7	# E :
-	br	$u_final	# L0 : Latency=3
-
-	/* Unaligned copy entry point.  */
-	.align 4
-$unaligned:
-
-	ldq_u	t1, 0(a1)	# L : load first source word
-	and	a0, 7, t4	# E : find dest misalignment
-	and	a1, 7, t5	# E : find src misalignment
-	/* Conditionally load the first destination word and a bytemask
-	   with 0xff indicating that the destination byte is sacrosanct.  */
-	mov	zero, t0	# E :
-
-	mov	zero, t6	# E :
-	beq	t4, 1f		# U :
-	ldq_u	t0, 0(a0)	# L :
-	lda	t6, -1		# E :
-
-	mskql	t6, a0, t6	# U :
-	nop
-	nop
-1:	subq	a1, t4, a1	# E : sub dest misalignment from src addr
-
-	/* If source misalignment is larger than dest misalignment, we need
-	   extra startup checks to avoid SEGV.  */
-
-	cmplt	t4, t5, t8	# E :
-	extql	t1, a1, t1	# U : shift src into place
-	lda	t2, -1		# E : for creating masks later
-	beq	t8, $u_head	# U : (stall)
-
-	mskqh	t2, t5, t2	# U : begin src byte validity mask
-	cmpbge	zero, t1, t7	# E : is there a zero?
-	extql	t2, a1, t2	# U :
-	or	t7, t10, t5	# E : test for end-of-count too
-
-	cmpbge	zero, t2, t3	# E :
-	cmoveq	a2, t5, t7	# E : Latency=2, extra map slot
-	nop			# E : keep with cmoveq
-	andnot	t7, t3, t7	# E : (stall)
-
-	beq	t7, $u_head	# U :
-	/* At this point we've found a zero in the first partial word of
-	   the source.  We need to isolate the valid source data and mask
-	   it into the original destination data.  (Incidentally, we know
-	   that we'll need at least one byte of that original dest word.) */
-	ldq_u	t0, 0(a0)	# L :
-	negq	t7, t6		# E : build bitmask of bytes <= zero
-	mskqh	t1, t4, t1	# U :
-
-	and	t6, t7, t8	# E :
-	subq	t8, 1, t6	# E : (stall)
-	or	t6, t8, t7	# E : (stall)
-	zapnot	t2, t7, t2	# U : prepare source word; mirror changes (stall)
-
-	zapnot	t1, t7, t1	# U : to source validity mask
-	andnot	t0, t2, t0	# E : zero place for source to reside
-	or	t0, t1, t0	# E : and put it there (stall both t0, t1)
-	stq_u	t0, 0(a0)	# L : (stall)
-
-	ret	(t9)		# L0 : Latency=3
-
-	cfi_endproc
author	Zack Weinberg <zackw@panix.com>	2017-06-08 15:39:03 -0400
committer	Zack Weinberg <zackw@panix.com>	2017-06-08 15:39:03 -0400
commit	5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch)
tree	4470480d904b65cf14ca524f96f79eca818c3eaf /sysdeps/alpha/alphaev6
parent	199fc19d3aaaf57944ef036e15904febe877fc93 (diff)
download	glibc-zack/build-layout-experiment.tar glibc-zack/build-layout-experiment.tar.gz glibc-zack/build-layout-experiment.tar.bz2 glibc-zack/build-layout-experiment.zip