3 * Copyright 2002 Free Software Foundation, Inc.
5 * This file is part of GNU Radio
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
27 #include <cppunit/TestAssert.h>
28 #include <qa_ccomplex_dotprod_x86.h>
29 #include <ccomplex_dotprod_x86.h>
33 #include <sse_debug.h>
40 /// Macro for primitive value comparisons
41 #define assertcomplexEqual(expected0,expected1,actual,delta) \
42 CPPUNIT_ASSERT_DOUBLES_EQUAL (expected0, actual[0], delta); \
43 CPPUNIT_ASSERT_DOUBLES_EQUAL (expected1, actual[1], delta);
47 #define FLOATS_PER_BLK 4
49 #define ERR_DELTA (1e-6)
54 return 2.0 * ((float) random () / RANDOM_MAX - 0.5); // uniformly (-1, 1)
58 random_floats (float *buf, unsigned n)
60 for (unsigned i = 0; i < n; i++)
61 buf[i] = rint (uniform () * 32767);
65 zero_floats (float *buf, unsigned n)
67 for (unsigned i = 0; i < n; i++)
72 ref_ccomplex_dotprod (const float *input,
73 const float *taps, unsigned n_2_ccomplex_blocks,
76 float sum0[2] = {0,0};
77 float sum1[2] = {0,0};
81 sum0[0] += input[0] * taps[0] - input[1] * taps[1];
82 sum0[1] += input[0] * taps[1] + input[1] * taps[0];
83 sum1[0] += input[2] * taps[2] - input[3] * taps[3];
84 sum1[1] += input[2] * taps[3] + input[3] * taps[2];
89 } while (--n_2_ccomplex_blocks != 0);
92 result[0] = sum0[0] + sum1[0];
93 result[1] = sum0[1] + sum1[1];
97 qa_ccomplex_dotprod_x86::setUp ()
99 taps = (float *) calloc16Align (MAX_BLKS,
100 sizeof (float) * FLOATS_PER_BLK);
102 input = (float *) calloc16Align (MAX_BLKS,
103 sizeof (float) * FLOATS_PER_BLK);
105 if (taps == 0 || input == 0)
110 qa_ccomplex_dotprod_x86::tearDown ()
120 qa_ccomplex_dotprod_x86::zb () // "zero both"
122 zero_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
123 zero_floats (input, MAX_BLKS * FLOATS_PER_BLK);
131 qa_ccomplex_dotprod_x86::t1_base (ccomplex_dotprod_t ccomplex_dotprod)
135 // cerr << "Testing dump_xmm_regs\n";
138 // test basic cases, 1 block
141 ccomplex_dotprod (input, taps, 1, result);
142 assertcomplexEqual (0.0, 0.0, result, ERR_DELTA);
147 input[0] = 1.0; taps[0] = 1.0; taps[1] = -1.0;
148 ccomplex_dotprod (input, taps, 1, result);
149 //cerr << result[0] << " " << result[1] << "\n";
150 assertcomplexEqual (1.0, -1.0, result, ERR_DELTA);
153 input[1] = 2.0; taps[0] = 1.0; taps[1] = -1.0;
154 ccomplex_dotprod (input, taps, 1, result);
155 assertcomplexEqual (2.0, 2.0, result, ERR_DELTA);
158 input[2] = 3.0; taps[2] = 1.0; taps[3] = -1.0;
159 ccomplex_dotprod (input, taps, 1, result);
160 assertcomplexEqual (3.0, -3.0, result, ERR_DELTA);
163 input[3] = 4.0; taps[2] = 1.0; taps[3] = -1.0;
164 ccomplex_dotprod (input, taps, 1, result);
165 assertcomplexEqual (4.0, 4.0, result, ERR_DELTA);
170 input[0] = 1.0; taps[0] = 0.5; taps[1] = -0.5;
171 ccomplex_dotprod (input, taps, 1, result);
172 assertcomplexEqual (0.5, -0.5, result, ERR_DELTA);
175 input[0] = 1.0; taps[0] = 2.0; taps[1] = -2.0;
176 ccomplex_dotprod (input, taps, 1, result);
177 assertcomplexEqual (2.0, -2.0, result, ERR_DELTA);
180 input[0] = 1.0; taps[0] = 3.0; taps[1] = -3.0;
181 ccomplex_dotprod (input, taps, 1, result);
182 assertcomplexEqual (3.0, -3.0, result, ERR_DELTA);
185 input[0] = 1.0; taps[0] = 4.0; taps[1] = -4.0;
186 ccomplex_dotprod (input, taps, 1, result);
187 assertcomplexEqual (4.0, -4.0, result, ERR_DELTA);
194 qa_ccomplex_dotprod_x86::t2_base (ccomplex_dotprod_t ccomplex_dotprod)
199 input[0] = 1.0; input[1] = 3.0; taps[0] = 5.0; taps[1] = -2.0;
201 //1*5-3*-2 =11, 1*-2+3*5=13
203 ccomplex_dotprod (input, taps, 1, result);
204 assertcomplexEqual (11.0, 13.0, result, ERR_DELTA);
206 //7*5-13*-5 =100, 7*-5+13*5=30
208 input[2] = 7.0; input[3] = 13.0; taps[2] = 5.0; taps[3] = -5.0;
210 ccomplex_dotprod (input, taps, 1, result);
211 assertcomplexEqual (111.0, 43.0, result, ERR_DELTA);
213 input[4] = 19; input[5] = -19; taps[4] = 23.0; taps[5] = -23.0;
215 //19*23--19*-23 =0, 19*-23+-19*23=-874
217 ccomplex_dotprod (input, taps, 2, result);
218 assertcomplexEqual (111.0, -831.0, result, ERR_DELTA);
226 qa_ccomplex_dotprod_x86::t3_base (ccomplex_dotprod_t ccomplex_dotprod)
228 srandom (0); // we want reproducibility
230 for (unsigned int i = 0; i < 10; i++){
231 random_floats (input, MAX_BLKS * FLOATS_PER_BLK);
232 random_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
234 // we use a sloppy error margin because on the x86 architecture,
235 // our reference implementation is using 80 bit floating point
236 // arithmetic, while the SSE version is using 32 bit float point
240 ref_ccomplex_dotprod (input, taps, MAX_BLKS, ref);
242 ccomplex_dotprod (input, taps, MAX_BLKS, calc);
243 CPPUNIT_ASSERT_DOUBLES_EQUAL (ref[0],
245 fabs (ref[0]) * 1e-4);
246 CPPUNIT_ASSERT_DOUBLES_EQUAL (ref[1],
248 fabs (ref[1]) * 1e-4);
253 qa_ccomplex_dotprod_x86::t1_3dnowext ()
255 if (!gr_cpu::has_3dnowext ()){
256 cerr << "No 3DNow!Ext support; not tested\n";
259 t1_base (ccomplex_dotprod_3dnowext);
263 qa_ccomplex_dotprod_x86::t2_3dnowext ()
265 if (!gr_cpu::has_3dnowext ()){
266 cerr << "No 3DNow!Ext support; not tested\n";
269 t2_base (ccomplex_dotprod_3dnowext);
273 qa_ccomplex_dotprod_x86::t3_3dnowext ()
275 if (!gr_cpu::has_3dnowext ()){
276 cerr << "No 3DNow!Ext support; not tested\n";
279 t3_base (ccomplex_dotprod_3dnowext);
283 qa_ccomplex_dotprod_x86::t1_3dnow ()
285 if (!gr_cpu::has_3dnow ()){
286 cerr << "No 3DNow! support; not tested\n";
289 t1_base (ccomplex_dotprod_3dnow);
293 qa_ccomplex_dotprod_x86::t2_3dnow ()
295 if (!gr_cpu::has_3dnow ()){
296 cerr << "No 3DNow! support; not tested\n";
299 t2_base (ccomplex_dotprod_3dnow);
303 qa_ccomplex_dotprod_x86::t3_3dnow ()
305 if (!gr_cpu::has_3dnow ()){
306 cerr << "No 3DNow! support; not tested\n";
309 t3_base (ccomplex_dotprod_3dnow);
313 qa_ccomplex_dotprod_x86::t1_sse ()
315 if (!gr_cpu::has_sse ()){
316 cerr << "No SSE support; not tested\n";
319 t1_base (ccomplex_dotprod_sse);
323 qa_ccomplex_dotprod_x86::t2_sse ()
325 if (!gr_cpu::has_sse ()){
326 cerr << "No SSE support; not tested\n";
329 t2_base (ccomplex_dotprod_sse);
333 qa_ccomplex_dotprod_x86::t3_sse ()
335 if (!gr_cpu::has_sse ()){
336 cerr << "No SSE support; not tested\n";
339 t3_base (ccomplex_dotprod_sse);