3 * Copyright 2002 Free Software Foundation, Inc.
5 * This file is part of GNU Radio
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
26 #include <gr_fir_fff_simd.h>
34 gr_fir_fff_simd::gr_fir_fff_simd ()
35 : gr_fir_fff_generic ()
37 // cerr << "@@@ gr_fir_fff_simd\n";
41 d_aligned_taps[0] = 0;
42 d_aligned_taps[1] = 0;
43 d_aligned_taps[2] = 0;
44 d_aligned_taps[3] = 0;
47 gr_fir_fff_simd::gr_fir_fff_simd (const std::vector<float> &new_taps)
48 : gr_fir_fff_generic (new_taps)
50 // cerr << "@@@ gr_fir_fff_simd\n";
54 d_aligned_taps[0] = 0;
55 d_aligned_taps[1] = 0;
56 d_aligned_taps[2] = 0;
57 d_aligned_taps[3] = 0;
61 gr_fir_fff_simd::~gr_fir_fff_simd ()
63 free16Align (d_aligned_taps[0]);
64 free16Align (d_aligned_taps[1]);
65 free16Align (d_aligned_taps[2]);
66 free16Align (d_aligned_taps[3]);
70 gr_fir_fff_simd::set_taps (const std::vector<float> &inew_taps)
72 gr_fir_fff::set_taps (inew_taps); // call superclass
73 const std::vector<float> new_taps = gr_reverse(inew_taps);
75 unsigned len = new_taps.size ();
77 // Make 4 copies of the coefficients, one for each data alignment
78 // Note use of special 16-byte-aligned version of calloc()
80 for (unsigned i = 0; i < 4; i++){
81 free16Align (d_aligned_taps[i]); // free old value
83 // this works because the bit representation of a IEEE floating point
84 // +zero is all zeros. If you're using a different representation,
85 // you'll need to explictly set the result to the appropriate 0.0 value.
87 d_aligned_taps[i] = (float *) calloc16Align (1 + (len + i - 1) / 4,
89 if (d_aligned_taps[i] == 0){
91 cerr << "@@@ gr_fir_fff_simd d_aligned_taps[" << i << "] == 0\n";
94 for (unsigned j = 0; j < len; j++)
95 d_aligned_taps[i][j+i] = new_taps[j];
100 gr_fir_fff_simd::filter (const float input[])
106 // Round input data address down to 16 byte boundary
107 // NB: depending on the alignment of input[], memory
108 // before input[] will be accessed. The contents don't matter since
109 // they'll be multiplied by zero coefficients. I can't conceive of any
110 // situation where this could cause a segfault since memory protection
111 // in the x86 machines is done on much larger boundaries.
113 const float *ar = (float *)((unsigned long) input & ~15);
115 // Choose one of 4 sets of pre-shifted coefficients. al is both the
116 // index into d_aligned_taps[] and the number of 0 words padded onto
117 // that coefficients array for alignment purposes.
119 unsigned al = input - ar;
121 // call assembler routine to do the work, passing number of 4-float blocks.
123 // assert (((unsigned long) ar & 15) == 0);
124 // assert (((unsigned long) d_aligned_taps[al] & 15) == 0);
126 // cerr << "ar: " << ar << " d_aligned_taps[ar]: " << d_aligned_taps[al]
127 // << " (ntaps() + al - 1)/4 + 1: " << (ntaps() + al -1) / 4 + 1 << endl;
129 float r = d_float_dotprod (ar, d_aligned_taps[al], (ntaps() + al - 1) / 4 + 1);
131 // cerr << "result = " << r << endl;