1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010 ARM Limited. All rights reserved.
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_decimate_f32.c
10 * Description: FIR decimation for floating-point sequences.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Version 1.0.10 2011/7/15
15 * Big Endian support added and Merged M0 and M3/M4 Source code.
17 * Version 1.0.3 2010/11/29
18 * Re-organized the CMSIS folders and updated documentation.
20 * Version 1.0.2 2010/11/11
21 * Documentation updated.
23 * Version 1.0.1 2010/10/05
24 * Production release and review comments incorporated.
26 * Version 1.0.0 2010/09/20
27 * Production release and review comments incorporated
29 * Version 0.0.7 2010/06/10
30 * Misra-C changes done
32 * -------------------------------------------------------------------- */
37 * @ingroup groupFilters
41 * @defgroup FIR_decimate Finite Impulse Response (FIR) Decimator
43 * These functions combine an FIR filter together with a decimator.
44 * They are used in multirate systems for reducing the sample rate of a signal without introducing aliasing distortion.
45 * Conceptually, the functions are equivalent to the block diagram below:
46 * \image html FIRDecimator.gif "Components included in the FIR Decimator functions"
47 * When decimating by a factor of <code>M</code>, the signal should be prefiltered by a lowpass filter with a normalized
48 * cutoff frequency of <code>1/M</code> in order to prevent aliasing distortion.
49 * The user of the function is responsible for providing the filter coefficients.
51 * The FIR decimator functions provided in the CMSIS DSP Library combine the FIR filter and the decimator in an efficient manner.
52 * Instead of calculating all of the FIR filter outputs and discarding <code>M-1</code> out of every <code>M</code>, only the
53 * samples output by the decimator are computed.
54 * The functions operate on blocks of input and output data.
55 * <code>pSrc</code> points to an array of <code>blockSize</code> input values and
56 * <code>pDst</code> points to an array of <code>blockSize/M</code> output values.
57 * In order to have an integer number of output samples <code>blockSize</code>
58 * must always be a multiple of the decimation factor <code>M</code>.
60 * The library provides separate functions for Q15, Q31 and floating-point data types.
63 * The FIR portion of the algorithm uses the standard form filter:
65 * y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]
67 * where, <code>b[n]</code> are the filter coefficients.
69 * The <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
70 * Coefficients are stored in time reversed order.
73 * {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
76 * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.
77 * Samples in the state buffer are stored in the order:
80 * {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}
82 * The state variables are updated after each block of data is processed, the coefficients are untouched.
84 * \par Instance Structure
85 * The coefficients and state variables for a filter are stored together in an instance data structure.
86 * A separate instance structure must be defined for each filter.
87 * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.
88 * There are separate instance structure declarations for each of the 3 supported data types.
90 * \par Initialization Functions
91 * There is also an associated initialization function for each data type.
92 * The initialization function performs the following operations:
93 * - Sets the values of the internal structure fields.
94 * - Zeros out the values in the state buffer.
95 * - Checks to make sure that the size of the input is a multiple of the decimation factor.
98 * Use of the initialization function is optional.
99 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
100 * To place an instance structure into a const data section, the instance structure must be manually initialized.
101 * The code below statically initializes each of the 3 different data type filter instance structures
103 *arm_fir_decimate_instance_f32 S = {M, numTaps, pCoeffs, pState};
104 *arm_fir_decimate_instance_q31 S = {M, numTaps, pCoeffs, pState};
105 *arm_fir_decimate_instance_q15 S = {M, numTaps, pCoeffs, pState};
107 * where <code>M</code> is the decimation factor; <code>numTaps</code> is the number of filter coefficients in the filter;
108 * <code>pCoeffs</code> is the address of the coefficient buffer;
109 * <code>pState</code> is the address of the state buffer.
110 * Be sure to set the values in the state buffer to zeros when doing static initialization.
112 * \par Fixed-Point Behavior
113 * Care must be taken when using the fixed-point versions of the FIR decimate filter functions.
114 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
115 * Refer to the function specific documentation below for usage guidelines.
119 * @addtogroup FIR_decimate
124 * @brief Processing function for the floating-point FIR decimator.
125 * @param[in] *S points to an instance of the floating-point FIR decimator structure.
126 * @param[in] *pSrc points to the block of input data.
127 * @param[out] *pDst points to the block of output data.
128 * @param[in] blockSize number of input samples to process per call.
132 void arm_fir_decimate_f32(
133 const arm_fir_decimate_instance_f32 * S,
138 float32_t *pState = S->pState; /* State pointer */
139 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
140 float32_t *pStateCurnt; /* Points to the current sample of the state */
141 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
142 float32_t sum0; /* Accumulator */
143 float32_t x0, c0; /* Temporary variables to hold state and coefficient values */
144 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
145 uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M; /* Loop counters */
149 /* Run the below code for Cortex-M4 and Cortex-M3 */
151 /* S->pState buffer contains previous frame (numTaps - 1) samples */
152 /* pStateCurnt points to the location where the new input data should be written */
153 pStateCurnt = S->pState + (numTaps - 1u);
155 /* Total number of output samples to be computed */
156 blkCnt = outBlockSize;
160 /* Copy decimation factor number of new input samples into the state buffer */
165 *pStateCurnt++ = *pSrc++;
169 /* Set accumulator to zero */
172 /* Initialize state pointer */
175 /* Initialize coeff pointer */
178 /* Loop unrolling. Process 4 taps at a time. */
179 tapCnt = numTaps >> 2;
181 /* Loop over the number of taps. Unroll by a factor of 4.
182 ** Repeat until we've computed numTaps-4 coefficients. */
185 /* Read the b[numTaps-1] coefficient */
188 /* Read x[n-numTaps-1] sample */
191 /* Perform the multiply-accumulate */
194 /* Read the b[numTaps-2] coefficient */
197 /* Read x[n-numTaps-2] sample */
200 /* Perform the multiply-accumulate */
203 /* Read the b[numTaps-3] coefficient */
206 /* Read x[n-numTaps-3] sample */
209 /* Perform the multiply-accumulate */
212 /* Read the b[numTaps-4] coefficient */
215 /* Read x[n-numTaps-4] sample */
218 /* Perform the multiply-accumulate */
221 /* Decrement the loop counter */
225 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
226 tapCnt = numTaps % 0x4u;
230 /* Read coefficients */
233 /* Fetch 1 state variable */
236 /* Perform the multiply-accumulate */
239 /* Decrement the loop counter */
243 /* Advance the state pointer by the decimation factor
244 * to process the next group of decimation factor number samples */
245 pState = pState + S->M;
247 /* The result is in the accumulator, store in the destination buffer. */
250 /* Decrement the loop counter */
254 /* Processing is complete.
255 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
256 ** This prepares the state buffer for the next function call. */
258 /* Points to the start of the state buffer */
259 pStateCurnt = S->pState;
261 i = (numTaps - 1u) >> 2;
266 *pStateCurnt++ = *pState++;
267 *pStateCurnt++ = *pState++;
268 *pStateCurnt++ = *pState++;
269 *pStateCurnt++ = *pState++;
271 /* Decrement the loop counter */
275 i = (numTaps - 1u) % 0x04u;
280 *pStateCurnt++ = *pState++;
282 /* Decrement the loop counter */
288 /* Run the below code for Cortex-M0 */
290 /* S->pState buffer contains previous frame (numTaps - 1) samples */
291 /* pStateCurnt points to the location where the new input data should be written */
292 pStateCurnt = S->pState + (numTaps - 1u);
294 /* Total number of output samples to be computed */
295 blkCnt = outBlockSize;
299 /* Copy decimation factor number of new input samples into the state buffer */
304 *pStateCurnt++ = *pSrc++;
308 /* Set accumulator to zero */
311 /* Initialize state pointer */
314 /* Initialize coeff pointer */
321 /* Read coefficients */
324 /* Fetch 1 state variable */
327 /* Perform the multiply-accumulate */
330 /* Decrement the loop counter */
334 /* Advance the state pointer by the decimation factor
335 * to process the next group of decimation factor number samples */
336 pState = pState + S->M;
338 /* The result is in the accumulator, store in the destination buffer. */
341 /* Decrement the loop counter */
345 /* Processing is complete.
346 ** Now copy the last numTaps - 1 samples to the start of the state buffer.
347 ** This prepares the state buffer for the next function call. */
349 /* Points to the start of the state buffer */
350 pStateCurnt = S->pState;
352 /* Copy numTaps number of values */
358 *pStateCurnt++ = *pState++;
360 /* Decrement the loop counter */
364 #endif /* #ifndef ARM_MATH_CM0 */
369 * @} end of FIR_decimate group