1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010 ARM Limited. All rights reserved.
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_sparse_f32.c
10 * Description: Floating-point sparse FIR filter processing function.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Version 1.0.10 2011/7/15
15 * Big Endian support added and Merged M0 and M3/M4 Source code.
17 * Version 1.0.3 2010/11/29
18 * Re-organized the CMSIS folders and updated documentation.
20 * Version 1.0.2 2010/11/11
21 * Documentation updated.
23 * Version 1.0.1 2010/10/05
24 * Production release and review comments incorporated.
26 * Version 1.0.0 2010/09/20
27 * Production release and review comments incorporated
29 * Version 0.0.7 2010/06/10
30 * Misra-C changes done
31 * ------------------------------------------------------------------- */
35 * @ingroup groupFilters
39 * @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters
41 * This group of functions implements sparse FIR filters.
42 * Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero.
43 * Sparse filters are used for simulating reflections in communications and audio applications.
45 * There are separate functions for Q7, Q15, Q31, and floating-point data types.
46 * The functions operate on blocks of input and output data and each call to the function processes
47 * <code>blockSize</code> samples through the filter. <code>pSrc</code> and
48 * <code>pDst</code> points to input and output arrays respectively containing <code>blockSize</code> values.
51 * The sparse filter instant structure contains an array of tap indices <code>pTapDelay</code> which specifies the locations of the non-zero coefficients.
52 * This is in addition to the coefficient array <code>b</code>.
53 * The implementation essentially skips the multiplications by zero and leads to an efficient realization.
55 * y[n] = b[0] * x[n-pTapDelay[0]] + b[1] * x[n-pTapDelay[1]] + b[2] * x[n-pTapDelay[2]] + ...+ b[numTaps-1] * x[n-pTapDelay[numTaps-1]]
58 * \image html FIRSparse.gif "Sparse FIR filter. b[n] represents the filter coefficients"
60 * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>;
61 * <code>pTapDelay</code> points to an array of nonzero indices and is also of size <code>numTaps</code>;
62 * <code>pState</code> points to a state array of size <code>maxDelay + blockSize</code>, where
63 * <code>maxDelay</code> is the largest offset value that is ever used in the <code>pTapDelay</code> array.
64 * Some of the processing functions also require temporary working buffers.
66 * \par Instance Structure
67 * The coefficients and state variables for a filter are stored together in an instance data structure.
68 * A separate instance structure must be defined for each filter.
69 * Coefficient and offset arrays may be shared among several instances while state variable arrays cannot be shared.
70 * There are separate instance structure declarations for each of the 4 supported data types.
72 * \par Initialization Functions
73 * There is also an associated initialization function for each data type.
74 * The initialization function performs the following operations:
75 * - Sets the values of the internal structure fields.
76 * - Zeros out the values in the state buffer.
79 * Use of the initialization function is optional.
80 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
81 * To place an instance structure into a const data section, the instance structure must be manually initialized.
82 * Set the values in the state buffer to zeros before static initialization.
83 * The code below statically initializes each of the 4 different data type filter instance structures
85 *arm_fir_sparse_instance_f32 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
86 *arm_fir_sparse_instance_q31 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
87 *arm_fir_sparse_instance_q15 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
88 *arm_fir_sparse_instance_q7 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
92 * \par Fixed-Point Behavior
93 * Care must be taken when using the fixed-point versions of the sparse FIR filter functions.
94 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
95 * Refer to the function specific documentation below for usage guidelines.
99 * @addtogroup FIR_Sparse
104 * @brief Processing function for the floating-point sparse FIR filter.
105 * @param[in] *S points to an instance of the floating-point sparse FIR structure.
106 * @param[in] *pSrc points to the block of input data.
107 * @param[out] *pDst points to the block of output data
108 * @param[in] *pScratchIn points to a temporary buffer of size blockSize.
109 * @param[in] blockSize number of input samples to process per call.
113 void arm_fir_sparse_f32(
114 arm_fir_sparse_instance_f32 * S,
117 float32_t * pScratchIn,
121 float32_t *pState = S->pState; /* State pointer */
122 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
123 float32_t *px; /* Scratch buffer pointer */
124 float32_t *py = pState; /* Temporary pointers for state buffer */
125 float32_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */
126 float32_t *pOut; /* Destination pointer */
127 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */
128 uint32_t delaySize = S->maxDelay + blockSize; /* state length */
129 uint16_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
130 int32_t readIndex; /* Read index of the state buffer */
131 uint32_t tapCnt, blkCnt; /* loop counters */
132 float32_t coeff = *pCoeffs++; /* Read the first coefficient value */
136 /* BlockSize of Input samples are copied into the state buffer */
137 /* StateIndex points to the starting position to write in the state buffer */
138 arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1,
139 (int32_t *) pSrc, 1, blockSize);
142 /* Read Index, from where the state buffer should be read, is calculated. */
143 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
145 /* Wraparound of readIndex */
148 readIndex += (int32_t) delaySize;
151 /* Working pointer for state buffer is updated */
154 /* blockSize samples are read from the state buffer */
155 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
156 (int32_t *) pb, (int32_t *) pb, blockSize, 1,
159 /* Working pointer for the scratch buffer */
162 /* Working pointer for destination buffer */
168 /* Run the below code for Cortex-M4 and Cortex-M3 */
170 /* Loop over the blockSize. Unroll by a factor of 4.
171 * Compute 4 Multiplications at a time. */
172 blkCnt = blockSize >> 2u;
176 /* Perform Multiplications and store in destination buffer */
177 *pOut++ = *px++ * coeff;
178 *pOut++ = *px++ * coeff;
179 *pOut++ = *px++ * coeff;
180 *pOut++ = *px++ * coeff;
182 /* Decrement the loop counter */
186 /* If the blockSize is not a multiple of 4,
187 * compute the remaining samples */
188 blkCnt = blockSize % 0x4u;
192 /* Perform Multiplications and store in destination buffer */
193 *pOut++ = *px++ * coeff;
195 /* Decrement the loop counter */
199 /* Load the coefficient value and
200 * increment the coefficient buffer for the next set of state values */
203 /* Read Index, from where the state buffer should be read, is calculated. */
204 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
206 /* Wraparound of readIndex */
209 readIndex += (int32_t) delaySize;
212 /* Loop over the number of taps. */
213 tapCnt = (uint32_t) numTaps - 1u;
218 /* Working pointer for state buffer is updated */
221 /* blockSize samples are read from the state buffer */
222 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
223 (int32_t *) pb, (int32_t *) pb, blockSize, 1,
226 /* Working pointer for the scratch buffer */
229 /* Working pointer for destination buffer */
232 /* Loop over the blockSize. Unroll by a factor of 4.
233 * Compute 4 MACS at a time. */
234 blkCnt = blockSize >> 2u;
238 /* Perform Multiply-Accumulate */
239 *pOut++ += *px++ * coeff;
240 *pOut++ += *px++ * coeff;
241 *pOut++ += *px++ * coeff;
242 *pOut++ += *px++ * coeff;
244 /* Decrement the loop counter */
248 /* If the blockSize is not a multiple of 4,
249 * compute the remaining samples */
250 blkCnt = blockSize % 0x4u;
254 /* Perform Multiply-Accumulate */
255 *pOut++ += *px++ * coeff;
257 /* Decrement the loop counter */
261 /* Load the coefficient value and
262 * increment the coefficient buffer for the next set of state values */
265 /* Read Index, from where the state buffer should be read, is calculated. */
266 readIndex = ((int32_t) S->stateIndex -
267 (int32_t) blockSize) - *pTapDelay++;
269 /* Wraparound of readIndex */
272 readIndex += (int32_t) delaySize;
275 /* Decrement the tap loop counter */
281 /* Run the below code for Cortex-M0 */
287 /* Perform Multiplications and store in destination buffer */
288 *pOut++ = *px++ * coeff;
290 /* Decrement the loop counter */
294 /* Load the coefficient value and
295 * increment the coefficient buffer for the next set of state values */
298 /* Read Index, from where the state buffer should be read, is calculated. */
299 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
301 /* Wraparound of readIndex */
304 readIndex += (int32_t) delaySize;
307 /* Loop over the number of taps. */
308 tapCnt = (uint32_t) numTaps - 1u;
313 /* Working pointer for state buffer is updated */
316 /* blockSize samples are read from the state buffer */
317 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
318 (int32_t *) pb, (int32_t *) pb, blockSize, 1,
321 /* Working pointer for the scratch buffer */
324 /* Working pointer for destination buffer */
331 /* Perform Multiply-Accumulate */
332 *pOut++ += *px++ * coeff;
334 /* Decrement the loop counter */
338 /* Load the coefficient value and
339 * increment the coefficient buffer for the next set of state values */
342 /* Read Index, from where the state buffer should be read, is calculated. */
344 ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
346 /* Wraparound of readIndex */
349 readIndex += (int32_t) delaySize;
352 /* Decrement the tap loop counter */
356 #endif /* #ifndef ARM_MATH_CM0 */
361 * @} end of FIR_Sparse group