3 * Copyright 2002 Free Software Foundation, Inc.
5 * This file is part of GNU Radio
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
26 #include <gr_fir_ccf_simd.h>
35 gr_fir_ccf_simd::gr_fir_ccf_simd ()
36 : gr_fir_ccf_generic ()
38 // cerr << "@@@ gr_fir_ccf_simd\n";
40 d_fcomplex_dotprod = 0;
42 d_aligned_taps[0] = 0;
43 d_aligned_taps[1] = 0;
44 d_aligned_taps[2] = 0;
45 d_aligned_taps[3] = 0;
48 gr_fir_ccf_simd::gr_fir_ccf_simd (const std::vector<float> &new_taps)
49 : gr_fir_ccf_generic (new_taps)
51 // cerr << "@@@ gr_fir_ccf_simd\n";
53 d_fcomplex_dotprod = 0;
55 d_aligned_taps[0] = 0;
56 d_aligned_taps[1] = 0;
57 d_aligned_taps[2] = 0;
58 d_aligned_taps[3] = 0;
62 gr_fir_ccf_simd::~gr_fir_ccf_simd ()
64 free16Align (d_aligned_taps[0]);
65 free16Align (d_aligned_taps[1]);
66 free16Align (d_aligned_taps[2]);
67 free16Align (d_aligned_taps[3]);
71 gr_fir_ccf_simd::set_taps (const std::vector<float> &inew_taps)
73 gr_fir_ccf::set_taps (inew_taps); // call superclass
74 const std::vector<float> new_taps = gr_reverse(inew_taps);
76 unsigned len = new_taps.size ();
78 // Make 4 copies of the coefficients, one for each data alignment
79 // Note use of special 16-byte-aligned version of calloc()
81 for (unsigned i = 0; i < 4; i++){
82 free16Align (d_aligned_taps[i]); // free old value
84 // this works because the bit representation of a IEEE floating point
85 // +zero is all zeros. If you're using a different representation,
86 // you'll need to explictly set the result to the appropriate 0.0 value.
88 d_aligned_taps[i] = (float *) calloc16Align (1 + (len + i - 1) / 4,
90 if (d_aligned_taps[i] == 0){
92 cerr << "@@@ gr_fir_ccf_simd d_aligned_taps[" << i << "] == 0\n";
95 for (unsigned j = 0; j < len; j++)
96 d_aligned_taps[i][j+i] = new_taps[j];
101 gr_fir_ccf_simd::filter (const gr_complex input[])
107 // Round input data address down to 16 byte boundary
108 // NB: depending on the alignment of input[], memory
109 // before input[] will be accessed. The contents don't matter since
110 // they'll be multiplied by zero coefficients. I can't conceive of any
111 // situation where this could cause a segfault since memory protection
112 // in the x86 machines is done on much larger boundaries.
114 const gr_complex *ar = (gr_complex *)((unsigned long) input & ~15);
116 // Choose one of 4 sets of pre-shifted coefficients. al is both the
117 // index into d_aligned_taps[] and the number of 0 words padded onto
118 // that coefficients array for alignment purposes.
120 unsigned al = input - ar;
122 // call assembler routine to do the work, passing number of 2x4-float blocks.
124 // assert (((unsigned long) ar & 15) == 0);
125 // assert (((unsigned long) d_aligned_taps[al] & 15) == 0);
127 // cerr << "ar: " << ar << " d_aligned_taps[ar]: " << d_aligned_taps[al]
128 // << " (ntaps() + al - 1)/2 + 1: " << (ntaps() + al -1) / 2 + 1 << endl;
132 // the trick here is to invert input and taps, and reuse FCC speedup
133 d_fcomplex_dotprod (d_aligned_taps[al], (float*)ar, (ntaps() + al - 1) / 2 + 1, result);
135 // cerr << "result = " << result[0] << " " << result[1] << endl;
137 return gr_complex(result[0], result[1]);