diff options
Diffstat (limited to 'sysdeps/ia64/memset.S')
-rw-r--r-- | sysdeps/ia64/memset.S | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/sysdeps/ia64/memset.S b/sysdeps/ia64/memset.S new file mode 100644 index 0000000000..0ebd9bc72c --- /dev/null +++ b/sysdeps/ia64/memset.S @@ -0,0 +1,95 @@ +/* Optimized version of the standard memset() function. + This file is part of the GNU C Library. + Copyright (C) 2000 Free Software Foundation, Inc. + Contributed by Dan Pop <Dan.Pop@cern.ch>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Return: dest + + Inputs: + in0: dest + in1: value + in2: count + + The algorithm is fairly straightforward: set byte by byte until we + we get to a word aligned address, then set word by word as much as + possible; the remaining few bytes are set one by one. */ + +#include <sysdep.h> +#undef ret + +#define dest in0 +#define byteval in1 +#define cnt in2 + +#define save_pfs loc0 +#define ptr1 loc1 +#define ptr2 loc2 +#define tmp loc3 +#define loopcnt loc4 +#define save_lc loc5 +#define wordval loc6 + +ENTRY(memset) + alloc save_pfs = ar.pfs, 3, 7, 0, 0 + mov save_lc = ar.lc + mov ret0 = dest + and tmp = 7, dest + cmp.eq p6, p0 = cnt, r0 +(p6) br.cond.spnt .restore_and_exit ;; + mov ptr1 = dest + sub loopcnt = 8, tmp + cmp.gt p6, p0 = 16, cnt +(p6) br.cond.spnt .set_few;; + cmp.eq p6, p0 = tmp, r0 +(p6) br.cond.sptk .dest_aligned + sub cnt = cnt, loopcnt + adds loopcnt = -1, loopcnt;; + mov ar.lc = loopcnt;; +.l1: + st1 [ptr1] = byteval, 1 + br.cloop.dptk .l1 ;; +.dest_aligned: + adds ptr2 = 8, ptr1 + mux1 wordval = byteval, @brcst + shr.u loopcnt = cnt, 4 ;; // loopcnt = cnt / 16 + cmp.eq p6, p0 = loopcnt, r0 +(p6) br.cond.spnt .one_more + and cnt = 0xf, cnt // compute the remaining cnt + adds loopcnt = -1, loopcnt;; + mov ar.lc = loopcnt;; +.l2: + st8 [ptr1] = wordval, 16 + st8 [ptr2] = wordval, 16 + br.cloop.dptk .l2 + cmp.le p6, p0 = 8, cnt ;; +.one_more: +(p6) st8 [ptr1] = wordval, 8 +(p6) adds cnt = -8, cnt ;; + cmp.eq p6, p0 = cnt, r0 +(p6) br.cond.spnt .restore_and_exit +.set_few: + adds loopcnt = -1, cnt;; + mov ar.lc = loopcnt;; +.l3: + st1 [ptr1] = byteval, 1 + br.cloop.dptk .l3 ;; +.restore_and_exit: + mov ar.lc = save_lc + mov ar.pfs = save_pfs + br.ret.sptk.many b0 +END(memset) |