git.gag.com Git - debian/gnuradio/blob - gnuradio-core/src/lib/filter/gri_fft_filter_fff_sse.cc

   1 /* -*- c++ -*- */
   2 /*
   3  * Copyright 2010 Free Software Foundation, Inc.
   4  *
   5  * This file is part of GNU Radio
   6  *
   7  * GNU Radio is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 3, or (at your option)
  10  * any later version.
  11  *
  12  * GNU Radio is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with GNU Radio; see the file COPYING.  If not, write to
  19  * the Free Software Foundation, Inc., 51 Franklin Street,
  20  * Boston, MA 02110-1301, USA.
  21  */
  22
  23 #ifdef HAVE_CONFIG_H
  24 #include "config.h"
  25 #endif
  26
  27 #include <gri_fft_filter_fff_sse.h>
  28 #include <gri_fft.h>
  29 #include <assert.h>
  30 #include <stdexcept>
  31 #include <cstdio>
  32 #include <xmmintrin.h>
  33 #include <fftw3.h>
  34
  35 gri_fft_filter_fff_sse::gri_fft_filter_fff_sse (int decimation,
  36                                                         const std::vector<float> &taps)
  37   : d_fftsize(-1), d_decimation(decimation), d_fwdfft(0), d_invfft(0)
  38 {
  39   d_xformed_taps = (gr_complex*)fftwf_malloc(1*sizeof(gr_complex));
  40   set_taps(taps);
  41 }
  42
  43 gri_fft_filter_fff_sse::~gri_fft_filter_fff_sse ()
  44 {
  45   fftwf_free(d_xformed_taps);
  46   delete d_fwdfft;
  47   delete d_invfft;
  48 }
  49
  50 /*
  51  * determines d_ntaps, d_nsamples, d_fftsize, d_xformed_taps
  52  */
  53 int
  54 gri_fft_filter_fff_sse::set_taps (const std::vector<float> &taps)
  55 {
  56   int i = 0;
  57   compute_sizes(taps.size());
  58
  59   d_tail.resize(tailsize());
  60   for (i = 0; i < tailsize(); i++)
  61     d_tail[i] = 0;
  62
  63   float *in = d_fwdfft->get_inbuf();
  64   gr_complex *out = d_fwdfft->get_outbuf();
  65
  66   float scale = 1.0 / d_fftsize;
  67
  68   // Compute forward xform of taps.
  69   // Copy taps into first ntaps slots, then pad with zeros
  70   for (i = 0; i < d_ntaps; i++)
  71     in[i] = taps[i] * scale;
  72
  73   for (; i < d_fftsize; i++)
  74     in[i] = 0;
  75
  76   d_fwdfft->execute();          // do the xform
  77
  78   // now copy output to d_xformed_taps
  79   for (i = 0; i < d_fftsize/2+1; i++)
  80     d_xformed_taps[i] = out[i];
  81
  82   return d_nsamples;
  83 }
  84
  85 // determine and set d_ntaps, d_nsamples, d_fftsize
  86
  87 void
  88 gri_fft_filter_fff_sse::compute_sizes(int ntaps)
  89 {
  90   int old_fftsize = d_fftsize;
  91   d_ntaps = ntaps;
  92   d_fftsize = (int) (2 * pow(2.0, ceil(log(ntaps) / log(2))));
  93   d_nsamples = d_fftsize - d_ntaps + 1;
  94
  95   if (0)
  96     fprintf(stderr, "gri_fft_filter_fff_sse: ntaps = %d, fftsize = %d, nsamples = %d\n",
  97             d_ntaps, d_fftsize, d_nsamples);
  98
  99   assert(d_fftsize == d_ntaps + d_nsamples -1 );
 100
 101   if (d_fftsize != old_fftsize){        // compute new plans
 102     delete d_fwdfft;
 103     delete d_invfft;
 104     d_fwdfft = new gri_fft_real_fwd(d_fftsize);
 105     d_invfft = new gri_fft_real_rev(d_fftsize);
 106     //d_xformed_taps.resize(d_fftsize/2+1);
 107
 108     fftwf_free(d_xformed_taps);
 109     d_xformed_taps = (gr_complex*)fftwf_malloc((d_fftsize/2+1)*sizeof(gr_complex));
 110   }
 111 }
 112
 113 int
 114 gri_fft_filter_fff_sse::filter (int nitems, const float *input, float *output)
 115 {
 116   int dec_ctr = 0;
 117   int j = 0;
 118   int ninput_items = nitems * d_decimation;
 119
 120   for (int i = 0; i < ninput_items; i += d_nsamples){
 121
 122     memcpy(d_fwdfft->get_inbuf(), &input[i], d_nsamples * sizeof(float));
 123
 124     for (j = d_nsamples; j < d_fftsize; j++)
 125       d_fwdfft->get_inbuf()[j] = 0;
 126
 127     d_fwdfft->execute();        // compute fwd xform
 128
 129     float *a = (float*)(d_fwdfft->get_outbuf());
 130     float *b = (float*)(&d_xformed_taps[0]);
 131     float *c = (float*)(d_invfft->get_inbuf());
 132
 133     __m128 x0, x1, x2, t0, t1, m;
 134     m = _mm_set_ps(-1, 1, -1, 1);
 135     for (j = 0; j < d_fftsize; j+=4) {  // filter in the freq domain
 136       x0 = _mm_load_ps(&a[j]);
 137       t0 = _mm_load_ps(&b[j]);
 138
 139       t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 1, 1));
 140       t0 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 0, 0));
 141       t1 = _mm_mul_ps(t1, m);
 142
 143       x1 = _mm_mul_ps(x0, t0);
 144       x2 = _mm_mul_ps(x0, t1);
 145
 146       x2 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(2, 3, 0, 1));
 147       x2 = _mm_add_ps(x1, x2);
 148
 149       _mm_store_ps(&c[j], x2);
 150     }
 151
 152     // Finish off the last one; do the complex multiply as floats
 153     j = d_fftsize/2;
 154     c[j] = (a[j] * b[j]) - (a[j+1] * b[j+1]);
 155     c[j+1] = (a[j] * b[j+1]) + (a[j+1] * b[j]);
 156
 157     d_invfft->execute();        // compute inv xform
 158
 159     // add in the overlapping tail
 160
 161     for (j = 0; j < tailsize(); j++)
 162       d_invfft->get_outbuf()[j] += d_tail[j];
 163
 164     // copy nsamples to output
 165
 166     //memcpy(out, d_invfft->get_outbuf(), d_nsamples * sizeof(float));
 167     //out += d_nsamples;
 168
 169     j = dec_ctr;
 170     while (j < d_nsamples) {
 171       *output++ = d_invfft->get_outbuf()[j];
 172       j += d_decimation;
 173     }
 174     dec_ctr = (j - d_nsamples);
 175
 176     // stash the tail
 177     memcpy(&d_tail[0], d_invfft->get_outbuf() + d_nsamples,
 178            tailsize() * sizeof(float));
 179   }
 180
 181   assert(dec_ctr == 0);
 182
 183   return nitems;
 184 }