git.gag.com Git - debian/gnuradio/blob - gnuradio-core/src/lib/filter/qa_ccomplex_dotprod_x86.cc

   1 /* -*- c++ -*- */
   2 /*
   3  * Copyright 2002 Free Software Foundation, Inc.
   4  *
   5  * This file is part of GNU Radio
   6  *
   7  * GNU Radio is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2, or (at your option)
  10  * any later version.
  11  *
  12  * GNU Radio is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with GNU Radio; see the file COPYING.  If not, write to
  19  * the Free Software Foundation, Inc., 51 Franklin Street,
  20  * Boston, MA 02110-1301, USA.
  21  */
  22
  23 #ifdef HAVE_CONFIG_H
  24 #include <config.h>
  25 #endif
  26
  27 #include <cppunit/TestAssert.h>
  28 #include <qa_ccomplex_dotprod_x86.h>
  29 #include <ccomplex_dotprod_x86.h>
  30 #include <string.h>
  31 #include <iostream>
  32 #include <malloc16.h>
  33 #include <sse_debug.h>
  34 #include <cmath>
  35 #include <gr_cpu.h>
  36 #include <random.h>
  37
  38 using std::cerr;
  39
  40 /// Macro for primitive value comparisons
  41 #define assertcomplexEqual(expected0,expected1,actual,delta)    \
  42   CPPUNIT_ASSERT_DOUBLES_EQUAL (expected0, actual[0], delta);   \
  43   CPPUNIT_ASSERT_DOUBLES_EQUAL (expected1, actual[1], delta);
  44
  45
  46 #define MAX_BLKS        10
  47 #define FLOATS_PER_BLK   4
  48
  49 #define ERR_DELTA       (1e-6)
  50
  51 static float
  52 uniform ()
  53 {
  54   return 2.0 * ((float) random () / RANDOM_MAX - 0.5);  // uniformly (-1, 1)
  55 }
  56
  57 static void
  58 random_floats (float *buf, unsigned n)
  59 {
  60   for (unsigned i = 0; i < n; i++)
  61     buf[i] = rint (uniform () * 32767);
  62 }
  63
  64 static void
  65 zero_floats (float *buf, unsigned n)
  66 {
  67   for (unsigned i = 0; i < n; i++)
  68     buf[i] = 0.0;
  69 }
  70
  71 void
  72 ref_ccomplex_dotprod (const float *input,
  73                       const float *taps, unsigned n_2_ccomplex_blocks,
  74                       float *result)
  75 {
  76   float sum0[2] = {0,0};
  77   float sum1[2] = {0,0};
  78
  79   do {
  80
  81     sum0[0] += input[0] * taps[0] - input[1] * taps[1];
  82     sum0[1] += input[0] * taps[1] + input[1] * taps[0];
  83     sum1[0] += input[2] * taps[2] - input[3] * taps[3];
  84     sum1[1] += input[2] * taps[3] + input[3] * taps[2];
  85
  86     input += 4;
  87     taps += 4;
  88
  89   } while (--n_2_ccomplex_blocks != 0);
  90
  91
  92   result[0] = sum0[0] + sum1[0];
  93   result[1] = sum0[1] + sum1[1];
  94 }
  95
  96 void
  97 qa_ccomplex_dotprod_x86::setUp ()
  98 {
  99   taps = (float *) calloc16Align (MAX_BLKS,
 100                                   sizeof (float) * FLOATS_PER_BLK);
 101
 102   input = (float *) calloc16Align (MAX_BLKS,
 103                                    sizeof (float) * FLOATS_PER_BLK);
 104
 105   if (taps == 0 || input == 0)
 106     abort ();
 107 }
 108
 109 void
 110 qa_ccomplex_dotprod_x86::tearDown ()
 111 {
 112   free16Align (taps);
 113   free16Align (input);
 114   taps = 0;
 115   input = 0;
 116 }
 117
 118
 119 void
 120 qa_ccomplex_dotprod_x86::zb ()  // "zero both"
 121 {
 122   zero_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
 123   zero_floats (input, MAX_BLKS * FLOATS_PER_BLK);
 124 }
 125
 126 //
 127 // t1
 128 //
 129
 130 void
 131 qa_ccomplex_dotprod_x86::t1_base (ccomplex_dotprod_t ccomplex_dotprod)
 132 {
 133   float result[2];
 134
 135   // cerr << "Testing dump_xmm_regs\n";
 136   // dump_xmm_regs ();
 137
 138   // test basic cases, 1 block
 139
 140   zb ();
 141   ccomplex_dotprod (input, taps, 1, result);
 142   assertcomplexEqual (0.0, 0.0, result, ERR_DELTA);
 143
 144   // vary each input
 145
 146   zb ();
 147   input[0] = 1.0;       taps[0] = 1.0; taps[1] = -1.0;
 148   ccomplex_dotprod (input, taps, 1, result);
 149   //cerr << result[0] << " " << result[1] << "\n";
 150   assertcomplexEqual (1.0, -1.0, result, ERR_DELTA);
 151
 152   zb ();
 153   input[1] = 2.0;       taps[0] = 1.0; taps[1] = -1.0;
 154   ccomplex_dotprod (input, taps, 1, result);
 155   assertcomplexEqual (2.0, 2.0, result, ERR_DELTA);
 156
 157   zb ();
 158   input[2] = 3.0;       taps[2] = 1.0; taps[3] = -1.0;
 159   ccomplex_dotprod (input, taps, 1, result);
 160   assertcomplexEqual (3.0, -3.0, result, ERR_DELTA);
 161
 162   zb ();
 163   input[3] = 4.0;       taps[2] = 1.0; taps[3] = -1.0;
 164   ccomplex_dotprod (input, taps, 1, result);
 165   assertcomplexEqual (4.0, 4.0, result, ERR_DELTA);
 166
 167   // vary each tap
 168
 169   zb ();
 170   input[0] = 1.0;       taps[0] = 0.5; taps[1] = -0.5;
 171   ccomplex_dotprod (input, taps, 1, result);
 172   assertcomplexEqual (0.5, -0.5, result, ERR_DELTA);
 173
 174   zb ();
 175   input[0] = 1.0;       taps[0] = 2.0; taps[1] = -2.0;
 176   ccomplex_dotprod (input, taps, 1, result);
 177   assertcomplexEqual (2.0, -2.0, result, ERR_DELTA);
 178
 179   zb ();
 180   input[0] = 1.0;       taps[0] = 3.0; taps[1] = -3.0;
 181   ccomplex_dotprod (input, taps, 1, result);
 182   assertcomplexEqual (3.0, -3.0, result, ERR_DELTA);
 183
 184   zb ();
 185   input[0] = 1.0;       taps[0] = 4.0; taps[1] = -4.0;
 186   ccomplex_dotprod (input, taps, 1, result);
 187   assertcomplexEqual (4.0, -4.0, result, ERR_DELTA);
 188 }
 189
 190 //
 191 // t2
 192 //
 193 void
 194 qa_ccomplex_dotprod_x86::t2_base (ccomplex_dotprod_t ccomplex_dotprod)
 195 {
 196   float result[2];
 197
 198   zb ();
 199   input[0] =  1.0; input[1] =  3.0;     taps[0] =  5.0; taps[1] =  -2.0;
 200
 201   //1*5-3*-2 =11, 1*-2+3*5=13
 202
 203   ccomplex_dotprod (input, taps, 1, result);
 204   assertcomplexEqual (11.0, 13.0, result, ERR_DELTA);
 205
 206   //7*5-13*-5 =100, 7*-5+13*5=30
 207
 208   input[2] =  7.0; input[3] = 13.0;     taps[2] =  5.0; taps[3] =  -5.0;
 209
 210   ccomplex_dotprod (input, taps, 1, result);
 211   assertcomplexEqual (111.0, 43.0, result, ERR_DELTA);
 212
 213   input[4] = 19; input[5] = -19;        taps[4] = 23.0; taps[5] = -23.0;
 214
 215   //19*23--19*-23 =0, 19*-23+-19*23=-874
 216
 217   ccomplex_dotprod (input, taps, 2, result);
 218   assertcomplexEqual (111.0, -831.0, result, ERR_DELTA);
 219
 220 }
 221
 222 //
 223 // t3
 224 //
 225 void
 226 qa_ccomplex_dotprod_x86::t3_base (ccomplex_dotprod_t ccomplex_dotprod)
 227 {
 228   srandom (0);  // we want reproducibility
 229
 230   for (unsigned int i = 0; i < 10; i++){
 231     random_floats (input, MAX_BLKS * FLOATS_PER_BLK);
 232     random_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
 233
 234     // we use a sloppy error margin because on the x86 architecture,
 235     // our reference implementation is using 80 bit floating point
 236     // arithmetic, while the SSE version is using 32 bit float point
 237     // arithmetic.
 238
 239     float ref[2];
 240     ref_ccomplex_dotprod (input, taps, MAX_BLKS, ref);
 241     float calc[2];
 242     ccomplex_dotprod (input, taps, MAX_BLKS, calc);
 243     CPPUNIT_ASSERT_DOUBLES_EQUAL (ref[0],
 244                         calc[0],
 245                         fabs (ref[0]) * 1e-4);
 246     CPPUNIT_ASSERT_DOUBLES_EQUAL (ref[1],
 247                         calc[1],
 248                         fabs (ref[1]) * 1e-4);
 249   }
 250 }
 251
 252 void
 253 qa_ccomplex_dotprod_x86::t1_3dnowext ()
 254 {
 255   if (!gr_cpu::has_3dnowext ()){
 256     cerr << "No 3DNow!Ext support; not tested\n";
 257   }
 258   else
 259     t1_base (ccomplex_dotprod_3dnowext);
 260 }
 261
 262 void
 263 qa_ccomplex_dotprod_x86::t2_3dnowext ()
 264 {
 265   if (!gr_cpu::has_3dnowext ()){
 266     cerr << "No 3DNow!Ext support; not tested\n";
 267   }
 268   else
 269     t2_base (ccomplex_dotprod_3dnowext);
 270 }
 271
 272 void
 273 qa_ccomplex_dotprod_x86::t3_3dnowext ()
 274 {
 275   if (!gr_cpu::has_3dnowext ()){
 276     cerr << "No 3DNow!Ext support; not tested\n";
 277   }
 278   else
 279     t3_base (ccomplex_dotprod_3dnowext);
 280 }
 281
 282 void
 283 qa_ccomplex_dotprod_x86::t1_3dnow ()
 284 {
 285   if (!gr_cpu::has_3dnow ()){
 286     cerr << "No 3DNow! support; not tested\n";
 287   }
 288   else
 289     t1_base (ccomplex_dotprod_3dnow);
 290 }
 291
 292 void
 293 qa_ccomplex_dotprod_x86::t2_3dnow ()
 294 {
 295   if (!gr_cpu::has_3dnow ()){
 296     cerr << "No 3DNow! support; not tested\n";
 297   }
 298   else
 299     t2_base (ccomplex_dotprod_3dnow);
 300 }
 301
 302 void
 303 qa_ccomplex_dotprod_x86::t3_3dnow ()
 304 {
 305   if (!gr_cpu::has_3dnow ()){
 306     cerr << "No 3DNow! support; not tested\n";
 307   }
 308   else
 309     t3_base (ccomplex_dotprod_3dnow);
 310 }
 311
 312 void
 313 qa_ccomplex_dotprod_x86::t1_sse ()
 314 {
 315   if (!gr_cpu::has_sse ()){
 316     cerr << "No SSE support; not tested\n";
 317   }
 318   else
 319     t1_base (ccomplex_dotprod_sse);
 320 }
 321
 322 void
 323 qa_ccomplex_dotprod_x86::t2_sse ()
 324 {
 325   if (!gr_cpu::has_sse ()){
 326     cerr << "No SSE support; not tested\n";
 327   }
 328   else
 329     t2_base (ccomplex_dotprod_sse);
 330 }
 331
 332 void
 333 qa_ccomplex_dotprod_x86::t3_sse ()
 334 {
 335   if (!gr_cpu::has_sse ()){
 336     cerr << "No SSE support; not tested\n";
 337   }
 338   else
 339     t3_base (ccomplex_dotprod_sse);
 340 }
 341