3 * Copyright 2002 Free Software Foundation, Inc.
5 * This file is part of GNU Radio
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
27 #include <cppunit/TestAssert.h>
28 #include <qa_complex_dotprod_x86.h>
29 #include <complex_dotprod_x86.h>
33 #include <sse_debug.h>
40 /// Macro for primitive value comparisons
41 #define assertcomplexEqual(expected0,expected1,actual,delta) \
42 CPPUNIT_ASSERT_DOUBLES_EQUAL (expected0, actual[0], delta); \
43 CPPUNIT_ASSERT_DOUBLES_EQUAL (expected1, actual[1], delta);
47 #define FLOATS_PER_BLK 4
48 #define SHORTS_PER_BLK 2
50 #define ERR_DELTA (1e-6)
55 return 2.0 * ((float) random () / RANDOM_MAX - 0.5); // uniformly (-1, 1)
59 random_floats (float *buf, unsigned n)
61 for (unsigned i = 0; i < n; i++)
62 buf[i] = rint (uniform () * 32767);
66 zero_floats (float *buf, unsigned n)
68 for (unsigned i = 0; i < n; i++)
73 random_shorts (short *buf, unsigned n)
75 for (unsigned i = 0; i < n; i++)
76 buf[i] = (short) rint (uniform () * 32767);
80 zero_shorts (short *buf, unsigned n)
82 for (unsigned i = 0; i < n; i++)
87 ref_complex_dotprod (const short *input,
88 const float *taps, unsigned n_2_complex_blocks,
91 float sum0[2] = {0,0};
92 float sum1[2] = {0,0};
96 sum0[0] += input[0] * taps[0];
97 sum0[1] += input[0] * taps[1];
98 sum1[0] += input[1] * taps[2];
99 sum1[1] += input[1] * taps[3];
104 } while (--n_2_complex_blocks != 0);
107 result[0] = sum0[0] + sum1[0];
108 result[1] = sum0[1] + sum1[1];
112 qa_complex_dotprod_x86::setUp ()
114 taps = (float *) calloc16Align (MAX_BLKS,
115 sizeof (float) * FLOATS_PER_BLK);
117 input = (short *) calloc16Align (MAX_BLKS,
118 sizeof (short) * SHORTS_PER_BLK);
120 if (taps == 0 || input == 0)
125 qa_complex_dotprod_x86::tearDown ()
135 qa_complex_dotprod_x86::zb () // "zero both"
137 zero_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
138 zero_shorts (input, MAX_BLKS * SHORTS_PER_BLK);
146 qa_complex_dotprod_x86::t1_base (complex_dotprod_t complex_dotprod)
150 // cerr << "Testing dump_xmm_regs\n";
153 // test basic cases, 1 block
156 complex_dotprod (input, taps, 1, result);
157 assertcomplexEqual (0.0, 0.0, result, ERR_DELTA);
162 input[0] = 1; taps[0] = 1.0; taps[1] = -1.0;
163 complex_dotprod (input, taps, 1, result);
164 //cerr << result[0] << " " << result[1] << "\n";
165 assertcomplexEqual (1.0, -1.0, result, ERR_DELTA);
168 input[1] = 2; taps[2] = 1.0; taps[3] = -1.0;
169 complex_dotprod (input, taps, 1, result);
170 assertcomplexEqual (2.0, -2.0, result, ERR_DELTA);
173 input[2] = 3; taps[4] = 1.0; taps[5] = -1.0;
174 complex_dotprod (input, taps, 2, result);
175 assertcomplexEqual (3.0, -3.0, result, ERR_DELTA);
178 input[3] = 4; taps[6] = 1.0; taps[7] = -1.0;
179 complex_dotprod (input, taps, 2, result);
180 assertcomplexEqual (4.0, -4.0, result, ERR_DELTA);
185 input[0] = 1; taps[0] = 0.5; taps[1] = -0.5;
186 complex_dotprod (input, taps, 1, result);
187 assertcomplexEqual (0.5, -0.5, result, ERR_DELTA);
190 input[0] = 1; taps[0] = 2.0; taps[1] = -2.0;
191 complex_dotprod (input, taps, 1, result);
192 assertcomplexEqual (2.0, -2.0, result, ERR_DELTA);
195 input[0] = 1; taps[0] = 3.0; taps[1] = -3.0;
196 complex_dotprod (input, taps, 1, result);
197 assertcomplexEqual (3.0, -3.0, result, ERR_DELTA);
200 input[0] = 1; taps[0] = 4.0; taps[1] = -4.0;
201 complex_dotprod (input, taps, 1, result);
202 assertcomplexEqual (4.0, -4.0, result, ERR_DELTA);
209 qa_complex_dotprod_x86::t2_base (complex_dotprod_t complex_dotprod)
214 input[0] = 1; taps[0] = 2.0; taps[1] = -2.0;
215 input[1] = 3; taps[2] = 5.0; taps[3] = -5.0;
216 input[2] = 7; taps[4] = 11.0; taps[5] = -11.0;
217 input[3] = 13; taps[6] = 17.0; taps[7] = -17.0;
219 complex_dotprod (input, taps, 2, result);
220 assertcomplexEqual (315.0, -315.0, result, ERR_DELTA);
222 input[4] = 19; taps[8] = 23.0; taps[9] = -23.0;
223 complex_dotprod (input, taps, 3, result);
224 assertcomplexEqual (752.0, -752.0, result, ERR_DELTA);
232 qa_complex_dotprod_x86::t3_base (complex_dotprod_t complex_dotprod)
234 srandom (0); // we want reproducibility
236 for (unsigned int i = 0; i < 10; i++){
237 random_shorts (input, MAX_BLKS * SHORTS_PER_BLK);
238 random_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
240 // we use a sloppy error margin because on the x86 architecture,
241 // our reference implementation is using 80 bit floating point
242 // arithmetic, while the SSE version is using 32 bit float point
246 ref_complex_dotprod (input, taps, MAX_BLKS, ref);
248 complex_dotprod (input, taps, MAX_BLKS, calc);
249 CPPUNIT_ASSERT_DOUBLES_EQUAL (ref[0],
251 fabs (ref[0]) * 1e-4);
252 CPPUNIT_ASSERT_DOUBLES_EQUAL (ref[1],
254 fabs (ref[1]) * 1e-4);
259 qa_complex_dotprod_x86::t1_3dnowext ()
261 if (!gr_cpu::has_3dnowext ()){
262 cerr << "No 3DNow!Ext support; not tested\n";
265 t1_base (complex_dotprod_3dnowext);
269 qa_complex_dotprod_x86::t2_3dnowext ()
271 if (!gr_cpu::has_3dnowext ()){
272 cerr << "No 3DNow!Ext support; not tested\n";
275 t2_base (complex_dotprod_3dnowext);
279 qa_complex_dotprod_x86::t3_3dnowext ()
281 if (!gr_cpu::has_3dnowext ()){
282 cerr << "No 3DNow!Ext support; not tested\n";
285 t3_base (complex_dotprod_3dnowext);
289 qa_complex_dotprod_x86::t1_3dnow ()
291 if (!gr_cpu::has_3dnow ()){
292 cerr << "No 3DNow! support; not tested\n";
295 t1_base (complex_dotprod_3dnow);
299 qa_complex_dotprod_x86::t2_3dnow ()
301 if (!gr_cpu::has_3dnow ()){
302 cerr << "No 3DNow! support; not tested\n";
305 t2_base (complex_dotprod_3dnow);
309 qa_complex_dotprod_x86::t3_3dnow ()
311 if (!gr_cpu::has_3dnow ()){
312 cerr << "No 3DNow! support; not tested\n";
315 t3_base (complex_dotprod_3dnow);
319 qa_complex_dotprod_x86::t1_sse ()
321 if (!gr_cpu::has_sse ()){
322 cerr << "No SSE support; not tested\n";
325 t1_base (complex_dotprod_sse);
329 qa_complex_dotprod_x86::t2_sse ()
331 if (!gr_cpu::has_sse ()){
332 cerr << "No SSE support; not tested\n";
335 t2_base (complex_dotprod_sse);
339 qa_complex_dotprod_x86::t3_sse ()
341 if (!gr_cpu::has_sse ()){
342 cerr << "No SSE support; not tested\n";
345 t3_base (complex_dotprod_sse);