3 * Copyright 2002,2007 Free Software Foundation, Inc.
5 * This file is part of GNU Radio
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
26 #include <gr_fir_ccc_simd.h>
36 gr_fir_ccc_simd::gr_fir_ccc_simd ()
37 : gr_fir_ccc_generic ()
39 // cerr << "@@@ gr_fir_ccc_simd\n";
41 d_ccomplex_dotprod = 0;
43 d_aligned_taps[0] = 0;
44 d_aligned_taps[1] = 0;
45 d_aligned_taps[2] = 0;
46 d_aligned_taps[3] = 0;
49 gr_fir_ccc_simd::gr_fir_ccc_simd (const std::vector<gr_complex> &new_taps)
50 : gr_fir_ccc_generic (new_taps)
52 // cerr << "@@@ gr_fir_ccc_simd\n";
54 d_ccomplex_dotprod = 0;
56 d_aligned_taps[0] = 0;
57 d_aligned_taps[1] = 0;
58 d_aligned_taps[2] = 0;
59 d_aligned_taps[3] = 0;
63 gr_fir_ccc_simd::~gr_fir_ccc_simd ()
65 free16Align (d_aligned_taps[0]);
66 free16Align (d_aligned_taps[1]);
67 free16Align (d_aligned_taps[2]);
68 free16Align (d_aligned_taps[3]);
72 gr_fir_ccc_simd::set_taps (const std::vector<gr_complex> &inew_taps)
74 gr_fir_ccc::set_taps (inew_taps); // call superclass
76 const std::vector<gr_complex> new_taps = gr_reverse(inew_taps);
77 unsigned len = new_taps.size ();
79 // Make 4 copies of the coefficients, one for each data alignment
80 // Note use of special 16-byte-aligned version of calloc()
82 for (unsigned i = 0; i < 4; i++){
83 free16Align (d_aligned_taps[i]); // free old value
85 // this works because the bit representation of a IEEE floating point
86 // +zero is all zeros. If you're using a different representation,
87 // you'll need to explictly set the result to the appropriate 0.0 value.
89 d_aligned_taps[i] = (float *) calloc16Align (1 + (len + i - 1) / 2,
90 2 * 4 * sizeof (float));
91 if (d_aligned_taps[i] == 0){
93 cerr << "@@@ gr_fir_ccc_simd d_aligned_taps[" << i << "] == 0\n";
96 for (unsigned j = 0; j < len; j++) {
97 d_aligned_taps[i][2*(j+i)] = new_taps[j].real();
98 d_aligned_taps[i][2*(j+i)+1] = new_taps[j].imag();
104 gr_fir_ccc_simd::filter (const gr_complex input[])
109 if (((intptr_t) input & 0x7) != 0)
110 throw std::invalid_argument("gr_complex must be 8-byte aligned");
112 // Round input data address down to 16 byte boundary
113 // NB: depending on the alignment of input[], memory
114 // before input[] will be accessed. The contents don't matter since
115 // they'll be multiplied by zero coefficients. I can't conceive of any
116 // situation where this could cause a segfault since memory protection
117 // in the x86 machines is done on much larger boundaries.
119 const gr_complex *ar = (gr_complex *)((unsigned long) input & ~15);
121 // Choose one of 4 sets of pre-shifted coefficients. al is both the
122 // index into d_aligned_taps[] and the number of 0 words padded onto
123 // that coefficients array for alignment purposes.
125 unsigned al = input - ar;
127 // call assembler routine to do the work, passing number of 2x4-float blocks.
129 // assert (((unsigned long) ar & 15) == 0);
130 // assert (((unsigned long) d_aligned_taps[al] & 15) == 0);
132 // cerr << "ar: " << ar << " d_aligned_taps[ar]: " << d_aligned_taps[al]
133 // << " (ntaps() + al - 1)/2 + 1: " << (ntaps() + al -1) / 2 + 1 << endl;
137 d_ccomplex_dotprod ((float*)ar, d_aligned_taps[al], (ntaps() + al - 1) / 2 + 1, result);
139 // cerr << "result = " << result[0] << " " << result[1] << endl;
141 return gr_complex(result[0], result[1]);