3 * Copyright 2002,2007 Free Software Foundation, Inc.
5 * This file is part of GNU Radio
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
26 #include <gr_fir_ccf_simd.h>
36 gr_fir_ccf_simd::gr_fir_ccf_simd ()
37 : gr_fir_ccf_generic ()
39 // cerr << "@@@ gr_fir_ccf_simd\n";
41 d_fcomplex_dotprod = 0;
43 d_aligned_taps[0] = 0;
44 d_aligned_taps[1] = 0;
45 d_aligned_taps[2] = 0;
46 d_aligned_taps[3] = 0;
49 gr_fir_ccf_simd::gr_fir_ccf_simd (const std::vector<float> &new_taps)
50 : gr_fir_ccf_generic (new_taps)
52 // cerr << "@@@ gr_fir_ccf_simd\n";
54 d_fcomplex_dotprod = 0;
56 d_aligned_taps[0] = 0;
57 d_aligned_taps[1] = 0;
58 d_aligned_taps[2] = 0;
59 d_aligned_taps[3] = 0;
63 gr_fir_ccf_simd::~gr_fir_ccf_simd ()
65 free16Align (d_aligned_taps[0]);
66 free16Align (d_aligned_taps[1]);
67 free16Align (d_aligned_taps[2]);
68 free16Align (d_aligned_taps[3]);
72 gr_fir_ccf_simd::set_taps (const std::vector<float> &inew_taps)
74 gr_fir_ccf::set_taps (inew_taps); // call superclass
75 const std::vector<float> new_taps = gr_reverse(inew_taps);
77 unsigned len = new_taps.size ();
79 // Make 4 copies of the coefficients, one for each data alignment
80 // Note use of special 16-byte-aligned version of calloc()
82 for (unsigned i = 0; i < 4; i++){
83 free16Align (d_aligned_taps[i]); // free old value
85 // this works because the bit representation of a IEEE floating point
86 // +zero is all zeros. If you're using a different representation,
87 // you'll need to explictly set the result to the appropriate 0.0 value.
89 d_aligned_taps[i] = (float *) calloc16Align (1 + (len + i - 1) / 4,
91 if (d_aligned_taps[i] == 0){
93 cerr << "@@@ gr_fir_ccf_simd d_aligned_taps[" << i << "] == 0\n";
96 for (unsigned j = 0; j < len; j++)
97 d_aligned_taps[i][j+i] = new_taps[j];
102 gr_fir_ccf_simd::filter (const gr_complex input[])
107 if (((intptr_t) input & 0x7) != 0)
108 throw std::invalid_argument("gr_complex must be 8-byte aligned");
110 // Round input data address down to 16 byte boundary
111 // NB: depending on the alignment of input[], memory
112 // before input[] will be accessed. The contents don't matter since
113 // they'll be multiplied by zero coefficients. I can't conceive of any
114 // situation where this could cause a segfault since memory protection
115 // in the x86 machines is done on much larger boundaries.
117 const gr_complex *ar = (gr_complex *)((unsigned long) input & ~15);
119 // Choose one of 4 sets of pre-shifted coefficients. al is both the
120 // index into d_aligned_taps[] and the number of 0 words padded onto
121 // that coefficients array for alignment purposes.
123 unsigned al = input - ar;
125 // call assembler routine to do the work, passing number of 2x4-float blocks.
127 // assert (((unsigned long) ar & 15) == 0);
128 // assert (((unsigned long) d_aligned_taps[al] & 15) == 0);
130 // cerr << "ar: " << ar << " d_aligned_taps[ar]: " << d_aligned_taps[al]
131 // << " (ntaps() + al - 1)/2 + 1: " << (ntaps() + al -1) / 2 + 1 << endl;
135 // the trick here is to invert input and taps, and reuse FCC speedup
136 d_fcomplex_dotprod (d_aligned_taps[al], (float*)ar, (ntaps() + al - 1) / 2 + 1, result);
138 // cerr << "result = " << result[0] << " " << result[1] << endl;
140 return gr_complex(result[0], result[1]);