/* Optimized version of the standard bzero() function. This file is part of the GNU C Library. Copyright (C) 2000, 2001 Free Software Foundation, Inc. Contributed by Dan Pop <Dan.Pop@cern.ch>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ /* Return: dest Inputs: in0: dest in1: count The algorithm is fairly straightforward: set byte by byte until we we get to a word aligned address, then set word by word as much as possible; the remaining few bytes are set one by one. */ #include <sysdep.h> #undef ret #define dest in0 #define cnt in1 #define save_pfs loc0 #define ptr1 loc1 #define ptr2 loc2 #define tmp loc3 #define loopcnt loc4 #define save_lc loc5 ENTRY(bzero) .prologue alloc save_pfs = ar.pfs, 2, 6, 0, 0 .save ar.lc, save_lc mov save_lc = ar.lc .body mov ret0 = dest and tmp = 7, dest cmp.eq p6, p0 = cnt, r0 (p6) br.cond.spnt .restore_and_exit ;; mov ptr1 = dest sub loopcnt = 8, tmp cmp.gt p6, p0 = 16, cnt (p6) br.cond.spnt .set_few;; cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .dest_aligned sub cnt = cnt, loopcnt adds loopcnt = -1, loopcnt;; mov ar.lc = loopcnt;; .l1: st1 [ptr1] = r0, 1 br.cloop.dptk .l1 ;; .dest_aligned: adds ptr2 = 8, ptr1 shr.u loopcnt = cnt, 4 ;; // loopcnt = cnt / 16 cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.spnt .one_more and cnt = 0xf, cnt // compute the remaining cnt adds loopcnt = -1, loopcnt;; mov ar.lc = loopcnt;; .l2: st8 [ptr1] = r0, 16 st8 [ptr2] = r0, 16 br.cloop.dptk .l2 cmp.le p6, p0 = 8, cnt ;; .one_more: (p6) st8 [ptr1] = r0, 8 (p6) adds cnt = -8, cnt ;; cmp.eq p6, p0 = cnt, r0 (p6) br.cond.spnt .restore_and_exit .set_few: adds loopcnt = -1, cnt;; mov ar.lc = loopcnt;; .l3: st1 [ptr1] = r0, 1 br.cloop.dptk .l3 ;; .restore_and_exit: mov ar.lc = save_lc mov ar.pfs = save_pfs br.ret.sptk.many b0 END(bzero)