2 * Copyright (C) 1999-2003 Hewlett-Packard Co.
3 * Contributed by Stephane Eranian <eranian@hpl.hp.com>
5 * This file is part of the ELILO, the EFI Linux boot loader.
7 * ELILO is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * ELILO is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with ELILO; see the file COPYING. If not, write to the Free
19 * Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 * Please check out the elilo.txt for complete documentation on how
23 * to use this program.
25 * This code is derived from the Linux/ia64 source code.
30 * Optimized version of the standard memset() function
35 * in0: address of buffer
36 * in1: byte value to use for storing
37 * in2: length of the buffer
60 .save ar.pfs, saved_pfs
61 alloc saved_pfs=ar.pfs,3,0,0,0 // cnt is sink here
62 cmp.eq p8,p0=r0,len // check for zero length
64 mov saved_lc=ar.lc // preserve ar.lc (slow)
69 adds tmp=-1,len // br.ctop is repeat/until
70 tbit.nz p6,p0=buf,0 // odd alignment
71 (p8) br.ret.spnt.few rp
73 cmp.lt p7,p0=16,len // if len > 16 then long memset
74 mux1 val=val,@brcst // prepare value
75 (p7) br.cond.dptk.few long_memset
77 mov ar.lc=tmp // initialize lc for small count
78 ;; // avoid RAW and WAW on ar.lc
79 1: // worst case 15 cyles, avg 8 cycles
82 ;; // avoid RAW on ar.lc
85 br.ret.sptk.few rp // end of short memset
87 // at this point we know we have more than 16 bytes to copy
88 // so we focus on alignment
90 (p6) st1 [buf]=val,1 // 1-byte aligned
91 (p6) adds len=-1,len;; // sync because buf is modified
94 (p6) st2 [buf]=val,2 // 2-byte aligned
95 (p6) adds len=-2,len;;
98 (p6) st4 [buf]=val,4 // 4-byte aligned
99 (p6) adds len=-4,len;;
102 (p6) st8 [buf]=val,8 // 8-byte aligned
103 (p6) adds len=-8,len;;
104 shr.u cnt=len,4 // number of 128-bit (2x64bit) words
108 (p6) br.cond.dpnt.few .dotail // we have less than 16 bytes left
110 adds buf2=8,buf // setup second base pointer
113 2: // 16bytes/iteration
118 .dotail: // tail correction based on len only
121 (p6) st8 [buf]=val,8 // at least 8 bytes
124 (p6) st4 [buf]=val,4 // at least 4 bytes
127 (p6) st2 [buf]=val,2 // at least 2 bytes
131 (p6) st1 [buf]=val // only 1 byte left