git.gag.com Git - debian/gnuradio/blob - gnuradio-core/src/lib/filter/gr_fir_ccc_simd.cc

   1 /* -*- c++ -*- */
   2 /*
   3  * Copyright 2002,2007 Free Software Foundation, Inc.
   4  *
   5  * This file is part of GNU Radio
   6  *
   7  * GNU Radio is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 3, or (at your option)
  10  * any later version.
  11  *
  12  * GNU Radio is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with GNU Radio; see the file COPYING.  If not, write to
  19  * the Free Software Foundation, Inc., 51 Franklin Street,
  20  * Boston, MA 02110-1301, USA.
  21  */
  22
  23 #ifdef HAVE_CONFIG_H
  24 #include <config.h>
  25 #endif
  26 #include <gr_fir_ccc_simd.h>
  27
  28 #include <assert.h>
  29 #include <malloc16.h>
  30 #include <iostream>
  31 #include <stdexcept>
  32
  33 using std::cerr;
  34 using std::endl;
  35
  36 gr_fir_ccc_simd::gr_fir_ccc_simd ()
  37   : gr_fir_ccc_generic ()
  38 {
  39   // cerr << "@@@ gr_fir_ccc_simd\n";
  40
  41   d_ccomplex_dotprod = 0;
  42
  43   d_aligned_taps[0] = 0;
  44   d_aligned_taps[1] = 0;
  45   d_aligned_taps[2] = 0;
  46   d_aligned_taps[3] = 0;
  47 }
  48
  49 gr_fir_ccc_simd::gr_fir_ccc_simd (const std::vector<gr_complex> &new_taps)
  50   : gr_fir_ccc_generic (new_taps)
  51 {
  52   // cerr << "@@@ gr_fir_ccc_simd\n";
  53
  54   d_ccomplex_dotprod = 0;
  55
  56   d_aligned_taps[0] = 0;
  57   d_aligned_taps[1] = 0;
  58   d_aligned_taps[2] = 0;
  59   d_aligned_taps[3] = 0;
  60   set_taps (new_taps);
  61 }
  62
  63 gr_fir_ccc_simd::~gr_fir_ccc_simd ()
  64 {
  65   free16Align (d_aligned_taps[0]);
  66   free16Align (d_aligned_taps[1]);
  67   free16Align (d_aligned_taps[2]);
  68   free16Align (d_aligned_taps[3]);
  69 }
  70
  71 void
  72 gr_fir_ccc_simd::set_taps (const std::vector<gr_complex> &inew_taps)
  73 {
  74   gr_fir_ccc::set_taps (inew_taps);     // call superclass
  75
  76   const std::vector<gr_complex> new_taps = gr_reverse(inew_taps);
  77   unsigned len = new_taps.size ();
  78
  79   // Make 4 copies of the coefficients, one for each data alignment
  80   // Note use of special 16-byte-aligned version of calloc()
  81
  82   for (unsigned i = 0; i < 4; i++){
  83     free16Align (d_aligned_taps[i]);    // free old value
  84
  85     // this works because the bit representation of a IEEE floating point
  86     // +zero is all zeros.  If you're using a different representation,
  87     // you'll need to explictly set the result to the appropriate 0.0 value.
  88
  89     d_aligned_taps[i] = (float *) calloc16Align (1 + (len + i - 1) / 2,
  90                                                2 * 4 * sizeof (float));
  91     if (d_aligned_taps[i] == 0){
  92       // throw something...
  93       cerr << "@@@ gr_fir_ccc_simd d_aligned_taps[" << i << "] == 0\n";
  94     }
  95
  96     for (unsigned j = 0; j < len; j++) {
  97       d_aligned_taps[i][2*(j+i)] = new_taps[j].real();
  98       d_aligned_taps[i][2*(j+i)+1] = new_taps[j].imag();
  99     }
 100   }
 101 }
 102
 103 gr_complex
 104 gr_fir_ccc_simd::filter (const gr_complex input[])
 105 {
 106   if (ntaps () == 0)
 107     return 0.0;
 108
 109   if (((intptr_t) input & 0x7) != 0)
 110     throw std::invalid_argument("gr_complex must be 8-byte aligned");
 111
 112   // Round input data address down to 16 byte boundary
 113   // NB: depending on the alignment of input[], memory
 114   // before input[] will be accessed. The contents don't matter since
 115   // they'll be multiplied by zero coefficients. I can't conceive of any
 116   // situation where this could cause a segfault since memory protection
 117   // in the x86 machines is done on much larger boundaries.
 118
 119   const gr_complex *ar = (gr_complex *)((unsigned long) input & ~15);
 120
 121   // Choose one of 4 sets of pre-shifted coefficients. al is both the
 122   // index into d_aligned_taps[] and the number of 0 words padded onto
 123   // that coefficients array for alignment purposes.
 124
 125   unsigned al = input - ar;
 126
 127   // call assembler routine to do the work, passing number of 2x4-float blocks.
 128
 129   // assert (((unsigned long) ar & 15) == 0);
 130   // assert (((unsigned long) d_aligned_taps[al] & 15) == 0);
 131
 132   // cerr << "ar: " << ar << " d_aligned_taps[ar]: " << d_aligned_taps[al]
 133   //  << " (ntaps() + al - 1)/2 + 1: " << (ntaps() + al -1) / 2 + 1 << endl;
 134
 135   float result[2];
 136
 137   d_ccomplex_dotprod ((float*)ar, d_aligned_taps[al], (ntaps() + al - 1) / 2 + 1, result);
 138
 139   // cerr << "result = " << result[0] << " " << result[1] << endl;
 140
 141   return gr_complex(result[0], result[1]);
 142 }