1 /*-----------------------------------------------------------------------------
2 * Copyright (C) 2010 ARM Limited. All rights reserved.
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_interpolate_q31.c
10 * Description: Q31 FIR interpolation.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Version 1.0.10 2011/7/15
15 * Big Endian support added and Merged M0 and M3/M4 Source code.
17 * Version 1.0.3 2010/11/29
18 * Re-organized the CMSIS folders and updated documentation.
20 * Version 1.0.2 2010/11/11
21 * Documentation updated.
23 * Version 1.0.1 2010/10/05
24 * Production release and review comments incorporated.
26 * Version 1.0.0 2010/09/20
27 * Production release and review comments incorporated
29 * Version 0.0.7 2010/06/10
30 * Misra-C changes done
31 * ---------------------------------------------------------------------------*/
36 * @ingroup groupFilters
40 * @addtogroup FIR_Interpolate
45 * @brief Processing function for the Q31 FIR interpolator.
46 * @param[in] *S points to an instance of the Q31 FIR interpolator structure.
47 * @param[in] *pSrc points to the block of input data.
48 * @param[out] *pDst points to the block of output data.
49 * @param[in] blockSize number of input samples to process per call.
52 * <b>Scaling and Overflow Behavior:</b>
54 * The function is implemented using an internal 64-bit accumulator.
55 * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
56 * Thus, if the accumulator result overflows it wraps around rather than clip.
57 * In order to avoid overflows completely the input signal must be scaled down by <code>1/(numTaps/L)</code>.
58 * since <code>numTaps/L</code> additions occur per output sample.
59 * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
63 void arm_fir_interpolate_q31(
64 const arm_fir_interpolate_instance_q31 * S,
69 q31_t *pState = S->pState; /* State pointer */
70 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
71 q31_t *pStateCurnt; /* Points to the current sample of the state */
72 q31_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
76 /* Run the below code for Cortex-M4 and Cortex-M3 */
78 q63_t sum0; /* Accumulators */
79 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
80 uint32_t i, blkCnt, j; /* Loop counters */
81 uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */
84 /* S->pState buffer contains previous frame (phaseLen - 1) samples */
85 /* pStateCurnt points to the location where the new input data should be written */
86 pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
88 /* Total number of intput samples */
91 /* Loop over the blockSize. */
94 /* Copy new input sample into the state buffer */
95 *pStateCurnt++ = *pSrc++;
97 /* Address modifier index of coefficient buffer */
100 /* Loop over the Interpolation factor. */
104 /* Set accumulator to zero */
107 /* Initialize state pointer */
110 /* Initialize coefficient pointer */
111 ptr2 = pCoeffs + (S->L - j);
113 /* Loop over the polyPhase length. Unroll by a factor of 4.
114 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
115 tapCnt = phaseLen >> 2;
119 /* Read the coefficient */
122 /* Upsampling is done by stuffing L-1 zeros between each sample.
123 * So instead of multiplying zeros with coefficients,
124 * Increment the coefficient pointer by interpolation factor times. */
127 /* Read the input sample */
130 /* Perform the multiply-accumulate */
131 sum0 += (q63_t) x0 *c0;
133 /* Read the coefficient */
136 /* Increment the coefficient pointer by interpolation factor times. */
139 /* Read the input sample */
142 /* Perform the multiply-accumulate */
143 sum0 += (q63_t) x0 *c0;
145 /* Read the coefficient */
148 /* Increment the coefficient pointer by interpolation factor times. */
151 /* Read the input sample */
154 /* Perform the multiply-accumulate */
155 sum0 += (q63_t) x0 *c0;
157 /* Read the coefficient */
160 /* Increment the coefficient pointer by interpolation factor times. */
163 /* Read the input sample */
166 /* Perform the multiply-accumulate */
167 sum0 += (q63_t) x0 *c0;
169 /* Decrement the loop counter */
173 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
174 tapCnt = phaseLen & 0x3u;
178 /* Read the coefficient */
181 /* Increment the coefficient pointer by interpolation factor times. */
184 /* Read the input sample */
187 /* Perform the multiply-accumulate */
188 sum0 += (q63_t) x0 *c0;
190 /* Decrement the loop counter */
194 /* The result is in the accumulator, store in the destination buffer. */
195 *pDst++ = (q31_t) (sum0 >> 31);
197 /* Increment the address modifier index of coefficient buffer */
200 /* Decrement the loop counter */
204 /* Advance the state pointer by 1
205 * to process the next group of interpolation factor number samples */
208 /* Decrement the loop counter */
212 /* Processing is complete.
213 ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
214 ** This prepares the state buffer for the next function call. */
216 /* Points to the start of the state buffer */
217 pStateCurnt = S->pState;
219 tapCnt = (phaseLen - 1u) >> 2u;
224 *pStateCurnt++ = *pState++;
225 *pStateCurnt++ = *pState++;
226 *pStateCurnt++ = *pState++;
227 *pStateCurnt++ = *pState++;
229 /* Decrement the loop counter */
233 tapCnt = (phaseLen - 1u) % 0x04u;
238 *pStateCurnt++ = *pState++;
240 /* Decrement the loop counter */
246 /* Run the below code for Cortex-M0 */
248 q63_t sum; /* Accumulator */
249 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
250 uint32_t i, blkCnt; /* Loop counters */
251 uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */
254 /* S->pState buffer contains previous frame (phaseLen - 1) samples */
255 /* pStateCurnt points to the location where the new input data should be written */
256 pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
258 /* Total number of intput samples */
261 /* Loop over the blockSize. */
264 /* Copy new input sample into the state buffer */
265 *pStateCurnt++ = *pSrc++;
267 /* Loop over the Interpolation factor. */
272 /* Set accumulator to zero */
275 /* Initialize state pointer */
278 /* Initialize coefficient pointer */
279 ptr2 = pCoeffs + (i - 1u);
285 /* Read the coefficient */
288 /* Increment the coefficient pointer by interpolation factor times. */
291 /* Read the input sample */
294 /* Perform the multiply-accumulate */
295 sum += (q63_t) x0 *c0;
297 /* Decrement the loop counter */
301 /* The result is in the accumulator, store in the destination buffer. */
302 *pDst++ = (q31_t) (sum >> 31);
304 /* Decrement the loop counter */
308 /* Advance the state pointer by 1
309 * to process the next group of interpolation factor number samples */
312 /* Decrement the loop counter */
316 /* Processing is complete.
317 ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
318 ** This prepares the state buffer for the next function call. */
320 /* Points to the start of the state buffer */
321 pStateCurnt = S->pState;
323 tapCnt = phaseLen - 1u;
328 *pStateCurnt++ = *pState++;
330 /* Decrement the loop counter */
334 #endif /* #ifndef ARM_MATH_CM0 */
339 * @} end of FIR_Interpolate group