2 # Copyright 2002 Free Software Foundation, Inc.
4 # This file is part of GNU Radio
6 # GNU Radio is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3, or (at your option)
11 # GNU Radio is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with GNU Radio; see the file COPYING. If not, write to
18 # the Free Software Foundation, Inc., 51 Franklin Street,
19 # Boston, MA 02110-1301, USA.
22 # SIMD MMX dot product
23 # Equivalent to the following C code:
24 # long dotprod(signed short *a,signed short *b,int cnt)
32 # a and b should also be 64-bit aligned, or speed will suffer greatly
33 # Copyright 1999, Phil Karn KA9Q
34 # May be used under the terms of the GNU public license
39 .file "short_dotprod_mmx.S"
43 .globl GLOB_SYMB(short_dotprod_mmx)
44 DEF_FUNC_HEAD(short_dotprod_mmx)
45 GLOB_SYMB(short_dotprod_mmx):
53 movl 12(%ebp),%edi # b
54 movl 16(%ebp),%ecx # cnt
55 pxor %mm0,%mm0 # clear running sum (in two 32-bit halves)
57 # MMX dot product loop unrolled 4 times, crunching 16 terms per loop
59 .Loop1mmx: subl $4,%ecx
62 movq (%esi),%mm1 # mm1 = a[3],a[2],a[1],a[0]
63 pmaddwd (%edi),%mm1 # mm1 = b[3]*a[3]+b[2]*a[2],b[1]*a[1]+b[0]*a[0]
85 # MMX dot product loop, not unrolled, crunching 4 terms per loop
86 # This could be redone as Duff's Device on the unrolled loop above
98 movd %mm0,%ebx # right-hand word to ebx
99 punpckhdq %mm0,%mm0 # left-hand word to right side of %mm0
101 addl %ebx,%eax # running sum now in %eax
112 FUNC_TAIL(short_dotprod_mmx)
113 .ident "Hand coded x86 MMX assembly"