3 * Copyright 2002 Free Software Foundation, Inc.
5 * This file is part of GNU Radio
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
27 #include <cppunit/TestAssert.h>
28 #include <qa_float_dotprod_x86.h>
29 #include <float_dotprod_x86.h>
33 #include <sse_debug.h>
42 #define FLOATS_PER_BLK 4
44 #define ERR_DELTA (1e-6)
48 random_floats (float *buf, unsigned n)
50 for (unsigned i = 0; i < n; i++)
51 buf[i] = random () - RANDOM_MAX/2;
55 zero_floats (float *buf, unsigned n)
57 for (unsigned i = 0; i < n; i++)
62 ref_float_dotprod (const float *input,
63 const float *taps, unsigned n_4_float_blocks)
72 sum0 += input[0] * taps[0];
73 sum1 += input[1] * taps[1];
74 sum2 += input[2] * taps[2];
75 sum3 += input[3] * taps[3];
80 } while (--n_4_float_blocks != 0);
83 return sum0 + sum1 + sum2 + sum3;
87 qa_float_dotprod_x86::setUp ()
89 taps = (float *) calloc16Align (MAX_BLKS,
90 sizeof (float) * FLOATS_PER_BLK);
92 input = (float *) calloc16Align (MAX_BLKS,
93 sizeof (float) * FLOATS_PER_BLK);
95 if (taps == 0 || input == 0)
100 qa_float_dotprod_x86::tearDown ()
110 qa_float_dotprod_x86::zb () // "zero both"
112 zero_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
113 zero_floats (input, MAX_BLKS * FLOATS_PER_BLK);
121 qa_float_dotprod_x86::t1_base (float_dotprod_t float_dotprod)
124 // cerr << "Testing dump_xmm_regs\n";
127 // test basic cases, 1 block
130 CPPUNIT_ASSERT_DOUBLES_EQUAL (0.0, float_dotprod (input, taps, 1), ERR_DELTA);
135 input[0] = 0.5; taps[0] = 1.0;
136 CPPUNIT_ASSERT_DOUBLES_EQUAL (0.5, float_dotprod (input, taps, 1), ERR_DELTA);
139 input[1] = 2.0; taps[1] = 1.0;
140 CPPUNIT_ASSERT_DOUBLES_EQUAL (2.0, float_dotprod (input, taps, 1), ERR_DELTA);
143 input[2] = 3.0; taps[2] = 1.0;
144 CPPUNIT_ASSERT_DOUBLES_EQUAL (3.0, float_dotprod (input, taps, 1), ERR_DELTA);
147 input[3] = 4.0; taps[3] = 1.0;
148 CPPUNIT_ASSERT_DOUBLES_EQUAL (4.0, float_dotprod (input, taps, 1), ERR_DELTA);
153 input[0] = 1.0; taps[0] = 0.5;
154 CPPUNIT_ASSERT_DOUBLES_EQUAL (0.5, float_dotprod (input, taps, 1), ERR_DELTA);
157 input[0] = 1.0; taps[0] = 2.0;
158 CPPUNIT_ASSERT_DOUBLES_EQUAL (2.0, float_dotprod (input, taps, 1), ERR_DELTA);
161 input[0] = 1.0; taps[0] = 3.0;
162 CPPUNIT_ASSERT_DOUBLES_EQUAL (3.0, float_dotprod (input, taps, 1), ERR_DELTA);
165 input[0] = 1.0; taps[0] = 4.0;
166 CPPUNIT_ASSERT_DOUBLES_EQUAL (4.0, float_dotprod (input, taps, 1), ERR_DELTA);
173 qa_float_dotprod_x86::t2_base (float_dotprod_t float_dotprod)
176 input[0] = 1.0; taps[0] = 2.0;
177 input[1] = 3.0; taps[1] = 5.0;
178 input[2] = 7.0; taps[2] = 11.0;
179 input[3] = 13.0; taps[3] = 17.0;
181 CPPUNIT_ASSERT_DOUBLES_EQUAL (315.0, float_dotprod (input, taps, 1), ERR_DELTA);
183 input[4] = 19.0; taps[4] = 23.0;
184 CPPUNIT_ASSERT_DOUBLES_EQUAL (752.0, float_dotprod (input, taps, 2), ERR_DELTA);
192 qa_float_dotprod_x86::t3_base (float_dotprod_t float_dotprod)
194 srandom (0); // we want reproducibility
196 for (unsigned int i = 0; i < 10; i++){
197 random_floats (input, MAX_BLKS * FLOATS_PER_BLK);
198 random_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
200 // we use a sloppy error margin because on the x86 architecture,
201 // our reference implementation is using 80 bit floating point
202 // arithmetic, while the SSE version is using 32 bit float point
205 float ref = ref_float_dotprod (input, taps, MAX_BLKS);
206 CPPUNIT_ASSERT_DOUBLES_EQUAL (ref,
207 float_dotprod (input, taps, MAX_BLKS),
213 qa_float_dotprod_x86::t1_3dnow ()
215 if (!gr_cpu::has_3dnow ()){
216 cerr << "No 3DNow! support; not tested\n";
219 t1_base (float_dotprod_3dnow);
223 qa_float_dotprod_x86::t2_3dnow ()
225 if (!gr_cpu::has_3dnow ()){
226 cerr << "No 3DNow! support; not tested\n";
229 t2_base (float_dotprod_3dnow);
233 qa_float_dotprod_x86::t3_3dnow ()
235 if (!gr_cpu::has_3dnow ()){
236 cerr << "No 3DNow! support; not tested\n";
239 t3_base (float_dotprod_3dnow);
243 qa_float_dotprod_x86::t1_sse ()
245 if (!gr_cpu::has_sse ()){
246 cerr << "No SSE support; not tested\n";
249 t1_base (float_dotprod_sse);
253 qa_float_dotprod_x86::t2_sse ()
255 if (!gr_cpu::has_sse ()){
256 cerr << "No SSE support; not tested\n";
259 t2_base (float_dotprod_sse);
263 qa_float_dotprod_x86::t3_sse ()
265 if (!gr_cpu::has_sse ()){
266 cerr << "No SSE support; not tested\n";
269 t3_base (float_dotprod_sse);