aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/ia64/memset.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ia64/memset.S')
-rw-r--r--sysdeps/ia64/memset.S95
1 files changed, 95 insertions, 0 deletions
diff --git a/sysdeps/ia64/memset.S b/sysdeps/ia64/memset.S
new file mode 100644
index 0000000000..0ebd9bc72c
--- /dev/null
+++ b/sysdeps/ia64/memset.S
@@ -0,0 +1,95 @@
+/* Optimized version of the standard memset() function.
+ This file is part of the GNU C Library.
+ Copyright (C) 2000 Free Software Foundation, Inc.
+ Contributed by Dan Pop <Dan.Pop@cern.ch>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* Return: dest
+
+ Inputs:
+ in0: dest
+ in1: value
+ in2: count
+
+ The algorithm is fairly straightforward: set byte by byte until we
+ we get to a word aligned address, then set word by word as much as
+ possible; the remaining few bytes are set one by one. */
+
+#include <sysdep.h>
+#undef ret
+
+#define dest in0
+#define byteval in1
+#define cnt in2
+
+#define save_pfs loc0
+#define ptr1 loc1
+#define ptr2 loc2
+#define tmp loc3
+#define loopcnt loc4
+#define save_lc loc5
+#define wordval loc6
+
+ENTRY(memset)
+ alloc save_pfs = ar.pfs, 3, 7, 0, 0
+ mov save_lc = ar.lc
+ mov ret0 = dest
+ and tmp = 7, dest
+ cmp.eq p6, p0 = cnt, r0
+(p6) br.cond.spnt .restore_and_exit ;;
+ mov ptr1 = dest
+ sub loopcnt = 8, tmp
+ cmp.gt p6, p0 = 16, cnt
+(p6) br.cond.spnt .set_few;;
+ cmp.eq p6, p0 = tmp, r0
+(p6) br.cond.sptk .dest_aligned
+ sub cnt = cnt, loopcnt
+ adds loopcnt = -1, loopcnt;;
+ mov ar.lc = loopcnt;;
+.l1:
+ st1 [ptr1] = byteval, 1
+ br.cloop.dptk .l1 ;;
+.dest_aligned:
+ adds ptr2 = 8, ptr1
+ mux1 wordval = byteval, @brcst
+ shr.u loopcnt = cnt, 4 ;; // loopcnt = cnt / 16
+ cmp.eq p6, p0 = loopcnt, r0
+(p6) br.cond.spnt .one_more
+ and cnt = 0xf, cnt // compute the remaining cnt
+ adds loopcnt = -1, loopcnt;;
+ mov ar.lc = loopcnt;;
+.l2:
+ st8 [ptr1] = wordval, 16
+ st8 [ptr2] = wordval, 16
+ br.cloop.dptk .l2
+ cmp.le p6, p0 = 8, cnt ;;
+.one_more:
+(p6) st8 [ptr1] = wordval, 8
+(p6) adds cnt = -8, cnt ;;
+ cmp.eq p6, p0 = cnt, r0
+(p6) br.cond.spnt .restore_and_exit
+.set_few:
+ adds loopcnt = -1, cnt;;
+ mov ar.lc = loopcnt;;
+.l3:
+ st1 [ptr1] = byteval, 1
+ br.cloop.dptk .l3 ;;
+.restore_and_exit:
+ mov ar.lc = save_lc
+ mov ar.pfs = save_pfs
+ br.ret.sptk.many b0
+END(memset)