2 # Copyright 2002 Free Software Foundation, Inc.
4 # This file is part of GNU Radio
6 # GNU Radio is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2, or (at your option)
11 # GNU Radio is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with GNU Radio; see the file COPYING. If not, write to
18 # the Free Software Foundation, Inc., 51 Franklin Street,
19 # Boston, MA 02110-1301, USA.
23 # input and taps are guarenteed to be 16 byte aligned.
24 # n_4_float_blocks is != 0
28 # sse_float_dotprod (const float *input,
29 # const float *taps, unsigned n_4_float_blocks)
38 # sum0 += input[0] * taps[0];
39 # sum1 += input[1] * taps[1];
40 # sum2 += input[2] * taps[2];
41 # sum3 += input[3] * taps[3];
46 # } while (--n_4_float_blocks != 0);
49 # return sum0 + sum1 + sum2 + sum3;
54 .file "3dnow_float_dotprod_simple.s"
58 .globl sse_float_dotprod
59 .type sse_float_dotprod,@function
68 # The plan is to get it computing the correct answer, and
69 # then to unroll and schedule the inner loop.
71 pxor %mm4, %mm4 # mm4 = 0 0
72 pxor %mm5, %mm5 # mm5 = 0 0
90 # at this point mm4 and mm5 contain partial sums
101 .size sse_float_dotprod,.Lfe1-sse_float_dotprod
102 .ident "Hand coded x86 3DNow! assembly"