git.gag.com Git - debian/gnuradio/blob - gnuradio-core/src/lib/filter/qa_float_dotprod_x86.cc

   1 /* -*- c++ -*- */
   2 /*
   3  * Copyright 2002 Free Software Foundation, Inc.
   4  *
   5  * This file is part of GNU Radio
   6  *
   7  * GNU Radio is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2, or (at your option)
  10  * any later version.
  11  *
  12  * GNU Radio is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with GNU Radio; see the file COPYING.  If not, write to
  19  * the Free Software Foundation, Inc., 51 Franklin Street,
  20  * Boston, MA 02110-1301, USA.
  21  */
  22
  23 #ifdef HAVE_CONFIG_H
  24 #include <config.h>
  25 #endif
  26
  27 #include <cppunit/TestAssert.h>
  28 #include <qa_float_dotprod_x86.h>
  29 #include <float_dotprod_x86.h>
  30 #include <string.h>
  31 #include <iostream>
  32 #include <malloc16.h>
  33 #include <sse_debug.h>
  34 #include <cmath>
  35 #include <gr_cpu.h>
  36 #include <random.h>
  37
  38 using std::cerr;
  39
  40
  41 #define MAX_BLKS        10
  42 #define FLOATS_PER_BLK   4
  43
  44 #define ERR_DELTA       (1e-6)
  45
  46
  47 static void
  48 random_floats (float *buf, unsigned n)
  49 {
  50   for (unsigned i = 0; i < n; i++)
  51     buf[i] = random () - RANDOM_MAX/2;
  52 }
  53
  54 static void
  55 zero_floats (float *buf, unsigned n)
  56 {
  57   for (unsigned i = 0; i < n; i++)
  58     buf[i] = 0.0;
  59 }
  60
  61 float
  62 ref_float_dotprod (const float *input,
  63                    const float *taps, unsigned n_4_float_blocks)
  64 {
  65   float sum0 = 0;
  66   float sum1 = 0;
  67   float sum2 = 0;
  68   float sum3 = 0;
  69
  70   do {
  71
  72     sum0 += input[0] * taps[0];
  73     sum1 += input[1] * taps[1];
  74     sum2 += input[2] * taps[2];
  75     sum3 += input[3] * taps[3];
  76
  77     input += 4;
  78     taps += 4;
  79
  80   } while (--n_4_float_blocks != 0);
  81
  82
  83   return sum0 + sum1 + sum2 + sum3;
  84 }
  85
  86 void
  87 qa_float_dotprod_x86::setUp ()
  88 {
  89   taps = (float *) calloc16Align (MAX_BLKS,
  90                                   sizeof (float) * FLOATS_PER_BLK);
  91
  92   input = (float *) calloc16Align (MAX_BLKS,
  93                                    sizeof (float) * FLOATS_PER_BLK);
  94
  95   if (taps == 0 || input == 0)
  96     abort ();
  97 }
  98
  99 void
 100 qa_float_dotprod_x86::tearDown ()
 101 {
 102   free16Align (taps);
 103   free16Align (input);
 104   taps = 0;
 105   input = 0;
 106 }
 107
 108
 109 void
 110 qa_float_dotprod_x86::zb ()     // "zero both"
 111 {
 112   zero_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
 113   zero_floats (input, MAX_BLKS * FLOATS_PER_BLK);
 114 }
 115
 116 //
 117 // t1
 118 //
 119
 120 void
 121 qa_float_dotprod_x86::t1_base (float_dotprod_t float_dotprod)
 122 {
 123
 124   // cerr << "Testing dump_xmm_regs\n";
 125   // dump_xmm_regs ();
 126
 127   // test basic cases, 1 block
 128
 129   zb ();
 130   CPPUNIT_ASSERT_DOUBLES_EQUAL (0.0, float_dotprod (input, taps, 1), ERR_DELTA);
 131
 132   // vary each input
 133
 134   zb ();
 135   input[0] = 0.5;       taps[0] = 1.0;
 136   CPPUNIT_ASSERT_DOUBLES_EQUAL (0.5, float_dotprod (input, taps, 1), ERR_DELTA);
 137
 138   zb ();
 139   input[1] = 2.0;       taps[1] = 1.0;
 140   CPPUNIT_ASSERT_DOUBLES_EQUAL (2.0, float_dotprod (input, taps, 1), ERR_DELTA);
 141
 142   zb ();
 143   input[2] = 3.0;       taps[2] = 1.0;
 144   CPPUNIT_ASSERT_DOUBLES_EQUAL (3.0, float_dotprod (input, taps, 1), ERR_DELTA);
 145
 146   zb ();
 147   input[3] = 4.0;       taps[3] = 1.0;
 148   CPPUNIT_ASSERT_DOUBLES_EQUAL (4.0, float_dotprod (input, taps, 1), ERR_DELTA);
 149
 150   // vary each tap
 151
 152   zb ();
 153   input[0] = 1.0;       taps[0] = 0.5;
 154   CPPUNIT_ASSERT_DOUBLES_EQUAL (0.5, float_dotprod (input, taps, 1), ERR_DELTA);
 155
 156   zb ();
 157   input[0] = 1.0;       taps[0] = 2.0;
 158   CPPUNIT_ASSERT_DOUBLES_EQUAL (2.0, float_dotprod (input, taps, 1), ERR_DELTA);
 159
 160   zb ();
 161   input[0] = 1.0;       taps[0] = 3.0;
 162   CPPUNIT_ASSERT_DOUBLES_EQUAL (3.0, float_dotprod (input, taps, 1), ERR_DELTA);
 163
 164   zb ();
 165   input[0] = 1.0;       taps[0] = 4.0;
 166   CPPUNIT_ASSERT_DOUBLES_EQUAL (4.0, float_dotprod (input, taps, 1), ERR_DELTA);
 167 }
 168
 169 //
 170 // t2
 171 //
 172 void
 173 qa_float_dotprod_x86::t2_base (float_dotprod_t float_dotprod)
 174 {
 175   zb ();
 176   input[0] =  1.0;      taps[0] =  2.0;
 177   input[1] =  3.0;      taps[1] =  5.0;
 178   input[2] =  7.0;      taps[2] = 11.0;
 179   input[3] = 13.0;      taps[3] = 17.0;
 180
 181   CPPUNIT_ASSERT_DOUBLES_EQUAL (315.0, float_dotprod (input, taps, 1), ERR_DELTA);
 182
 183   input[4] = 19.0;      taps[4] = 23.0;
 184   CPPUNIT_ASSERT_DOUBLES_EQUAL (752.0, float_dotprod (input, taps, 2), ERR_DELTA);
 185
 186 }
 187
 188 //
 189 // t3
 190 //
 191 void
 192 qa_float_dotprod_x86::t3_base (float_dotprod_t float_dotprod)
 193 {
 194   srandom (0);  // we want reproducibility
 195
 196   for (unsigned int i = 0; i < 10; i++){
 197     random_floats (input, MAX_BLKS * FLOATS_PER_BLK);
 198     random_floats (taps, MAX_BLKS * FLOATS_PER_BLK);
 199
 200     // we use a sloppy error margin because on the x86 architecture,
 201     // our reference implementation is using 80 bit floating point
 202     // arithmetic, while the SSE version is using 32 bit float point
 203     // arithmetic.
 204
 205     float ref = ref_float_dotprod (input, taps, MAX_BLKS);
 206     CPPUNIT_ASSERT_DOUBLES_EQUAL (ref,
 207                         float_dotprod (input, taps, MAX_BLKS),
 208                         fabs (ref) * 1e-4);
 209   }
 210 }
 211
 212 void
 213 qa_float_dotprod_x86::t1_3dnow ()
 214 {
 215   if (!gr_cpu::has_3dnow ()){
 216     cerr << "No 3DNow! support; not tested\n";
 217   }
 218   else
 219     t1_base (float_dotprod_3dnow);
 220 }
 221
 222 void
 223 qa_float_dotprod_x86::t2_3dnow ()
 224 {
 225   if (!gr_cpu::has_3dnow ()){
 226     cerr << "No 3DNow! support; not tested\n";
 227   }
 228   else
 229     t2_base (float_dotprod_3dnow);
 230 }
 231
 232 void
 233 qa_float_dotprod_x86::t3_3dnow ()
 234 {
 235   if (!gr_cpu::has_3dnow ()){
 236     cerr << "No 3DNow! support; not tested\n";
 237   }
 238   else
 239     t3_base (float_dotprod_3dnow);
 240 }
 241
 242 void
 243 qa_float_dotprod_x86::t1_sse ()
 244 {
 245   if (!gr_cpu::has_sse ()){
 246     cerr << "No SSE support; not tested\n";
 247   }
 248   else
 249     t1_base (float_dotprod_sse);
 250 }
 251
 252 void
 253 qa_float_dotprod_x86::t2_sse ()
 254 {
 255   if (!gr_cpu::has_sse ()){
 256     cerr << "No SSE support; not tested\n";
 257   }
 258   else
 259     t2_base (float_dotprod_sse);
 260 }
 261
 262 void
 263 qa_float_dotprod_x86::t3_sse ()
 264 {
 265   if (!gr_cpu::has_sse ()){
 266     cerr << "No SSE support; not tested\n";
 267   }
 268   else
 269     t3_base (float_dotprod_sse);
 270 }