3 * Copyright 2008 Free Software Foundation, Inc.
5 * This file is part of GNU Radio
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 #ifndef INCLUDED_GC_SPU_MACS_H
23 #define INCLUDED_GC_SPU_MACS_H
26 * This file contains a set of macros that are generally useful when
27 * coding in SPU assembler
29 * Note that the multi-instruction macros in here may overwrite
30 * registers 77, 78, and 79 without warning.
34 * defines for all registers
166 #define lr r0 // link register
167 #define sp r1 // stack pointer
168 // r2 is environment pointer for langs that need it (ALGOL)
170 #define retval r3 // return values are passed in regs starting at r3
172 #define arg1 r3 // args are passed in regs starting at r3
183 // r3 - r74 are volatile (caller saves)
184 // r74 - r79 are volatile (scratch regs possibly destroyed by fct prolog/epilog)
185 // r80 - r127 are non-volatile (callee-saves)
187 // scratch registers reserved for use by the macros in this file.
194 * ----------------------------------------------------------------
196 * ----------------------------------------------------------------
198 #define PROC_ENTRY(name) \
202 .type name, @function; \
206 * ----------------------------------------------------------------
207 * aliases for common operations
208 * ----------------------------------------------------------------
211 // Move register (even pipe, 2 cycles)
212 #define MR(rt, ra) or rt, ra, ra;
214 // Move register (odd pipe, 4 cycles)
215 #define LMR(rt, ra) rotqbyi rt, ra, 0;
218 #define RETURN() bi lr;
221 #define HINT_RETURN(ret_label) hbr ret_label, lr;
224 #define BRZ_RETURN(rt) biz rt, lr;
226 // return if not zero
227 #define BRNZ_RETURN(rt) binz rt, lr;
229 // return if halfword zero
230 #define BRHZ_RETURN(rt) bihz rt, lr;
232 // return if halfword not zero
233 #define BRHNZ_RETURN(rt) bihnz rt, lr;
237 * ----------------------------------------------------------------
238 * modulo like things for constant moduli that are powers of 2
239 * ----------------------------------------------------------------
242 // rt = ra & (pow2 - 1)
243 #define MODULO(rt, ra, pow2) \
244 andi rt, ra, (pow2)-1;
246 // rt = pow2 - (ra & (pow2 - 1))
247 #define MODULO_NEG(rt, ra, pow2) \
248 andi rt, ra, (pow2)-1; \
252 #define ROUND_DOWN(rt, ra, pow2) \
253 andi rt, ra, -(pow2);
255 // rt = (ra + (pow2 - 1)) & -(pow2)
256 #define ROUND_UP(rt, ra, pow2) \
257 ai rt, ra, (pow2)-1; \
258 andi rt, rt, -(pow2);
261 * ----------------------------------------------------------------
262 * Splat - replicate a particular slot into all slots
264 * ----------------------------------------------------------------
267 // replicate byte from slot s [0,15]
268 #define VSPLTB(rt, ra, s) \
269 ilh _gc_t0, (s)*0x0101; \
270 shufb rt, ra, ra, _gc_t0;
272 // replicate halfword from slot s [0,7]
273 #define VSPLTH(rt, ra, s) \
274 ilh _gc_t0, 2*(s)*0x0101 + 0x0001; \
275 shufb rt, ra, ra, _gc_t0;
277 // replicate word from slot s [0,3]
278 #define VSPLTW(rt, ra, s) \
279 iluh _gc_t0, 4*(s)*0x0101 + 0x0001; \
280 iohl _gc_t0, 4*(s)*0x0101 + 0x0203; \
281 shufb rt, ra, ra, _gc_t0;
283 // replicate double from slot s [0,1]
284 #define VSPLTD(rt, ra, s) \
285 /* sp is always 16-byte aligned */ \
286 cdd _gc_t0, 8(sp); /* 0x10111213 14151617 00010203 04050607 */ \
287 rotqbyi rt, ra, ra, (s) << 3; /* rotate double into preferred slot */ \
288 shufb rt, rt, rt, _gc_t0;
291 * ----------------------------------------------------------------
292 * lots of min/max variations...
294 * On a slot by slot basis, compute the min or max
296 * U - unsigned, else signed
297 * B,H,{} - byte, halfword, word
299 * ----------------------------------------------------------------
302 #define MIN_SELB(rt, ra, rb, rc) selb rt, ra, rb, rc;
303 #define MAX_SELB(rt, ra, rb, rc) selb rt, rb, ra, rc;
307 #define MIN(rt, ra, rb) \
308 cgt _gc_t0, ra, rb; \
309 MIN_SELB(rt, ra, rb, _gc_t0)
311 #define MAX(rt, ra, rb) \
312 cgt _gc_t0, ra, rb; \
313 MAX_SELB(rt, ra, rb, _gc_t0)
315 #define UMIN(rt, ra, rb) \
316 clgt _gc_t0, ra, rb; \
317 MIN_SELB(rt, ra, rb, _gc_t0)
319 #define UMAX(rt, ra, rb) \
320 clgt _gc_t0, ra, rb; \
321 MAX_SELB(rt, ra, rb, _gc_t0)
325 #define MINB(rt, ra, rb) \
326 cgtb _gc_t0, ra, rb; \
327 MIN_SELB(rt, ra, rb, _gc_t0)
329 #define MAXB(rt, ra, rb) \
330 cgtb _gc_t0, ra, rb; \
331 MAX_SELB(rt, ra, rb, _gc_t0)
333 #define UMINB(rt, ra, rb) \
334 clgtb _gc_t0, ra, rb; \
335 MIN_SELB(rt, ra, rb, _gc_t0)
337 #define UMAXB(rt, ra, rb) \
338 clgtb _gc_t0, ra, rb; \
339 MAX_SELB(rt, ra, rb, _gc_t0)
343 #define MINH(rt, ra, rb) \
344 cgth _gc_t0, ra, rb; \
345 MIN_SELB(rt, ra, rb, _gc_t0)
347 #define MAXH(rt, ra, rb) \
348 cgth _gc_t0, ra, rb; \
349 MAX_SELB(rt, ra, rb, _gc_t0)
351 #define UMINH(rt, ra, rb) \
352 clgth _gc_t0, ra, rb; \
353 MIN_SELB(rt, ra, rb, _gc_t0)
355 #define UMAXH(rt, ra, rb) \
356 clgth _gc_t0, ra, rb; \
357 MAX_SELB(rt, ra, rb, _gc_t0)
361 #define FMIN(rt, ra, rb) \
362 fcgt _gc_t0, ra, rb; \
363 MIN_SELB(rt, ra, rb, _gc_t0)
365 #define FMAX(rt, ra, rb) \
366 fcgt _gc_t0, ra, rb; \
367 MAX_SELB(rt, ra, rb, _gc_t0)
369 // Ignoring the sign, select the values with the minimum magnitude
370 #define FMINMAG(rt, ra, rb) \
371 fcmgt _gc_t0, ra, rb; \
372 MIN_SELB(rt, ra, rb, _gc_t0)
374 // Ignoring the sign, select the values with the maximum magnitude
375 #define FMAXMAG(rt, ra, rb) \
376 fcmgt _gc_t0, ra, rb; \
377 MAX_SELB(rt, ra, rb, _gc_t0)
380 #endif /* INCLUDED_GC_SPU_MACS_H */