1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010 ARM Limited. All rights reserved.
7 * Project: CMSIS DSP Library
8 * Title: arm_cfft_radix4_q31.c
10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
11 * In-place bit reversal using bit reversal table
13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
15 * Version 1.0.10 2011/7/15
16 * Big Endian support added and Merged M0 and M3/M4 Source code.
18 * Version 1.0.3 2010/11/29
19 * Re-organized the CMSIS folders and updated documentation.
21 * Version 1.0.2 2010/11/11
22 * Documentation updated.
24 * Version 1.0.1 2010/10/05
25 * Production release and review comments incorporated.
27 * Version 1.0.0 2010/09/20
28 * Production release and review comments incorporated.
30 * Version 0.0.5 2010/04/26
31 * incorporated review comments and updated with latest CMSIS layer
33 * Version 0.0.3 2010/03/10
35 * -------------------------------------------------------------------- */
40 * @ingroup groupTransforms
44 * @addtogroup CFFT_CIFFT
50 * @brief Processing function for the Q31 CFFT/CIFFT.
51 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure.
52 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
55 * \par Input and output formats:
57 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
58 * Hence the output format is different for different FFT sizes.
59 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
61 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
62 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
66 void arm_cfft_radix4_q31(
67 const arm_cfft_radix4_instance_q31 * S,
72 /* Complex IFFT radix-4 */
73 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle,
78 /* Complex FFT radix-4 */
79 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle,
84 if(S->bitReverseFlag == 1u)
87 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
93 * @} end of CFFT_CIFFT group
97 * Radix-4 FFT algorithm used is :
99 * Input real and imaginary data:
101 * x(n+N/4 ) = xb + j * yb
102 * x(n+N/2 ) = xc + j * yc
103 * x(n+3N 4) = xd + j * yd
106 * Output real and imaginary data:
107 * x(4r) = xa'+ j * ya'
108 * x(4r+1) = xb'+ j * yb'
109 * x(4r+2) = xc'+ j * yc'
110 * x(4r+3) = xd'+ j * yd'
113 * Twiddle factors for radix-4 FFT:
114 * Wn = co1 + j * (- si1)
115 * W2n = co2 + j * (- si2)
116 * W3n = co3 + j * (- si3)
118 * Butterfly implementation:
119 * xa' = xa + xb + xc + xd
120 * ya' = ya + yb + yc + yd
121 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
122 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
123 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
124 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
125 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
126 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
131 * @brief Core function for the Q31 CFFT butterfly process.
132 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
133 * @param[in] fftLen length of the FFT.
134 * @param[in] *pCoef points to twiddle coefficient buffer.
135 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
139 void arm_radix4_butterfly_q31(
143 uint32_t twidCoefModifier)
145 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
146 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
149 /* Total process is divided into three stages */
151 /* process first stage, middle stages, & last stage */
154 /* start of first stage process */
156 /* Initializations for the first stage */
166 /* Calculation of first stage */
169 /* index calculation for the input as, */
170 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
175 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
177 /* Butterfly implementation */
179 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
181 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
184 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
186 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
189 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
191 /* xa' = xa + xb + xc + xd */
192 pSrc[2u * i0] = (r1 + t1);
193 /* (xa + xc) - (xb + xd) */
196 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
197 /* ya' = ya + yb + yc + yd */
198 pSrc[(2u * i0) + 1u] = (s1 + t2);
200 /* (ya + yc) - (yb + yd) */
204 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
206 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
208 /* index calculation for the coefficients */
210 co2 = pCoef[ia2 * 2u];
211 si2 = pCoef[(ia2 * 2u) + 1u];
213 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
214 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
215 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
217 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
218 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
219 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
221 /* (xa - xc) + (yb - yd) */
223 /* (xa - xc) - (yb - yd) */
226 /* (ya - yc) - (xb - xd) */
228 /* (ya - yc) + (xb - xd) */
231 co1 = pCoef[ia1 * 2u];
232 si1 = pCoef[(ia1 * 2u) + 1u];
234 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
235 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
236 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
238 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
239 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
240 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
242 /* index calculation for the coefficients */
244 co3 = pCoef[ia3 * 2u];
245 si3 = pCoef[(ia3 * 2u) + 1u];
247 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
248 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
249 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
251 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
252 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
253 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
255 /* Twiddle coefficients index modifier */
256 ia1 = ia1 + twidCoefModifier;
258 /* Updating input index */
263 /* end of first stage process */
265 /* data is in 5.27(q27) format */
268 /* start of Middle stages process */
271 /* each stage in middle stages provides two down scaling of the input */
273 twidCoefModifier <<= 2u;
276 for (k = fftLen / 4u; k > 4u; k >>= 2u)
278 /* Initializations for the first stage */
283 /* Calculation of first stage */
284 for (j = 0u; j <= (n2 - 1u); j++)
286 /* index calculation for the coefficients */
289 co1 = pCoef[ia1 * 2u];
290 si1 = pCoef[(ia1 * 2u) + 1u];
291 co2 = pCoef[ia2 * 2u];
292 si2 = pCoef[(ia2 * 2u) + 1u];
293 co3 = pCoef[ia3 * 2u];
294 si3 = pCoef[(ia3 * 2u) + 1u];
295 /* Twiddle coefficients index modifier */
296 ia1 = ia1 + twidCoefModifier;
298 for (i0 = j; i0 < fftLen; i0 += n1)
300 /* index calculation for the input as, */
301 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
306 /* Butterfly implementation */
308 r1 = pSrc[2u * i0] + pSrc[2u * i2];
310 r2 = pSrc[2u * i0] - pSrc[2u * i2];
313 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
315 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
318 t1 = pSrc[2u * i1] + pSrc[2u * i3];
320 /* xa' = xa + xb + xc + xd */
321 pSrc[2u * i0] = (r1 + t1) >> 2u;
322 /* xa + xc -(xb + xd) */
326 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
327 /* ya' = ya + yb + yc + yd */
328 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
330 /* (ya + yc) - (yb + yd) */
334 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
336 t2 = pSrc[2u * i1] - pSrc[2u * i3];
338 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
339 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
340 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
342 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
343 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
344 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
346 /* (xa - xc) + (yb - yd) */
348 /* (xa - xc) - (yb - yd) */
351 /* (ya - yc) - (xb - xd) */
353 /* (ya - yc) + (xb - xd) */
356 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
357 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
358 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
360 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
361 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
362 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
364 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
365 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
366 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
368 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
369 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
370 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
373 twidCoefModifier <<= 2u;
376 /* End of Middle stages process */
378 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
379 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
380 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
381 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
384 /* start of Last stage process */
386 /* Initializations of last stage */
390 /* Calculations of last stage */
391 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
393 /* index calculation for the input as, */
394 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
399 /* Butterfly implementation */
401 r1 = pSrc[2u * i0] + pSrc[2u * i2];
403 r2 = pSrc[2u * i0] - pSrc[2u * i2];
406 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
408 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
411 t1 = pSrc[2u * i1] + pSrc[2u * i3];
412 /* xa' = xa + xb + xc + xd */
413 pSrc[2u * i0] = (r1 + t1);
414 /* (xa + xb) - (xc + xd) */
418 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
419 /* ya' = ya + yb + yc + yd */
420 pSrc[(2u * i0) + 1u] = (s1 + t2);
421 /* (ya + yc) - (yb + yd) */
425 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
427 t2 = pSrc[2u * i1] - pSrc[2u * i3];
429 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
431 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
432 pSrc[(2u * i1) + 1u] = s1;
444 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
446 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
447 pSrc[(2u * i2) + 1u] = s1;
449 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
451 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
452 pSrc[(2u * i3) + 1u] = s2;
457 /* output is in 11.21(q21) format for the 1024 point */
458 /* output is in 9.23(q23) format for the 256 point */
459 /* output is in 7.25(q25) format for the 64 point */
460 /* output is in 5.27(q27) format for the 16 point */
462 /* End of last stage process */
468 * @brief Core function for the Q31 CIFFT butterfly process.
469 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
470 * @param[in] fftLen length of the FFT.
471 * @param[in] *pCoef points to twiddle coefficient buffer.
472 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
478 * Radix-4 IFFT algorithm used is :
480 * CIFFT uses same twiddle coefficients as CFFT Function
481 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
484 * IFFT is implemented with following changes in equations from FFT
486 * Input real and imaginary data:
488 * x(n+N/4 ) = xb + j * yb
489 * x(n+N/2 ) = xc + j * yc
490 * x(n+3N 4) = xd + j * yd
493 * Output real and imaginary data:
494 * x(4r) = xa'+ j * ya'
495 * x(4r+1) = xb'+ j * yb'
496 * x(4r+2) = xc'+ j * yc'
497 * x(4r+3) = xd'+ j * yd'
500 * Twiddle factors for radix-4 IFFT:
501 * Wn = co1 + j * (si1)
502 * W2n = co2 + j * (si2)
503 * W3n = co3 + j * (si3)
505 * The real and imaginary output values for the radix-4 butterfly are
506 * xa' = xa + xb + xc + xd
507 * ya' = ya + yb + yc + yd
508 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
509 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
510 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
511 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
512 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
513 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
517 void arm_radix4_butterfly_inverse_q31(
521 uint32_t twidCoefModifier)
523 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
524 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
526 /* input is be 1.31(q31) format for all FFT sizes */
527 /* Total process is divided into three stages */
528 /* process first stage, middle stages, & last stage */
530 /* Start of first stage process */
532 /* Initializations for the first stage */
545 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
547 /* index calculation for the input as, */
548 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
553 /* Butterfly implementation */
555 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
557 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
560 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
562 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
565 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
567 /* xa' = xa + xb + xc + xd */
568 pSrc[2u * i0] = (r1 + t1);
569 /* (xa + xc) - (xb + xd) */
572 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
573 /* ya' = ya + yb + yc + yd */
574 pSrc[(2u * i0) + 1u] = (s1 + t2);
576 /* (ya + yc) - (yb + yd) */
580 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
582 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
584 /* index calculation for the coefficients */
586 co2 = pCoef[ia2 * 2u];
587 si2 = pCoef[(ia2 * 2u) + 1u];
589 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
590 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
591 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
593 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
594 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
595 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
597 /* (xa - xc) - (yb - yd) */
599 /* (xa - xc) + (yb - yd) */
602 /* (ya - yc) + (xb - xd) */
604 /* (ya - yc) - (xb - xd) */
607 co1 = pCoef[ia1 * 2u];
608 si1 = pCoef[(ia1 * 2u) + 1u];
610 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
611 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
612 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
614 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
615 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
616 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
618 /* index calculation for the coefficients */
620 co3 = pCoef[ia3 * 2u];
621 si3 = pCoef[(ia3 * 2u) + 1u];
623 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
624 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
625 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
627 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
628 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
629 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
631 /* Twiddle coefficients index modifier */
632 ia1 = ia1 + twidCoefModifier;
634 /* Updating input index */
639 /* data is in 5.27(q27) format */
640 /* each stage provides two down scaling of the input */
643 /* Start of Middle stages process */
645 twidCoefModifier <<= 2u;
647 /* Calculation of second stage to excluding last stage */
648 for (k = fftLen / 4u; k > 4u; k >>= 2u)
650 /* Initializations for the first stage */
655 for (j = 0; j <= (n2 - 1u); j++)
657 /* index calculation for the coefficients */
660 co1 = pCoef[ia1 * 2u];
661 si1 = pCoef[(ia1 * 2u) + 1u];
662 co2 = pCoef[ia2 * 2u];
663 si2 = pCoef[(ia2 * 2u) + 1u];
664 co3 = pCoef[ia3 * 2u];
665 si3 = pCoef[(ia3 * 2u) + 1u];
666 /* Twiddle coefficients index modifier */
667 ia1 = ia1 + twidCoefModifier;
669 for (i0 = j; i0 < fftLen; i0 += n1)
671 /* index calculation for the input as, */
672 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
677 /* Butterfly implementation */
679 r1 = pSrc[2u * i0] + pSrc[2u * i2];
681 r2 = pSrc[2u * i0] - pSrc[2u * i2];
684 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
686 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
689 t1 = pSrc[2u * i1] + pSrc[2u * i3];
691 /* xa' = xa + xb + xc + xd */
692 pSrc[2u * i0] = (r1 + t1) >> 2u;
693 /* xa + xc -(xb + xd) */
696 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
697 /* ya' = ya + yb + yc + yd */
698 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
700 /* (ya + yc) - (yb + yd) */
704 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
706 t2 = pSrc[2u * i1] - pSrc[2u * i3];
708 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
709 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
710 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
712 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
713 pSrc[(2u * i1) + 1u] =
714 (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
715 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
717 /* (xa - xc) - (yb - yd) */
719 /* (xa - xc) + (yb - yd) */
722 /* (ya - yc) + (xb - xd) */
724 /* (ya - yc) - (xb - xd) */
727 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
728 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
729 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
731 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
732 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
733 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
735 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
736 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
737 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
739 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
740 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
741 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
744 twidCoefModifier <<= 2u;
747 /* End of Middle stages process */
749 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
750 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
751 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
752 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
755 /* Start of last stage process */
758 /* Initializations of last stage */
762 /* Calculations of last stage */
763 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
765 /* index calculation for the input as, */
766 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
771 /* Butterfly implementation */
773 r1 = pSrc[2u * i0] + pSrc[2u * i2];
775 r2 = pSrc[2u * i0] - pSrc[2u * i2];
778 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
780 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
783 t1 = pSrc[2u * i1] + pSrc[2u * i3];
784 /* xa' = xa + xb + xc + xd */
785 pSrc[2u * i0] = (r1 + t1);
786 /* (xa + xb) - (xc + xd) */
790 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
791 /* ya' = ya + yb + yc + yd */
792 pSrc[(2u * i0) + 1u] = (s1 + t2);
793 /* (ya + yc) - (yb + yd) */
797 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
799 t2 = pSrc[2u * i1] - pSrc[2u * i3];
801 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
803 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
804 pSrc[(2u * i1) + 1u] = s1;
806 /* (xa - xc) - (yb-yd) */
809 /* (xa - xc) + (yb-yd) */
812 /* (ya - yc) + (xb-xd) */
815 /* (ya - yc) - (xb-xd) */
818 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
820 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
821 pSrc[(2u * i2) + 1u] = s1;
823 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
825 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
826 pSrc[(2u * i3) + 1u] = s2;
830 /* output is in 11.21(q21) format for the 1024 point */
831 /* output is in 9.23(q23) format for the 256 point */
832 /* output is in 7.25(q25) format for the 64 point */
833 /* output is in 5.27(q27) format for the 16 point */
835 /* End of last stage process */
840 * @brief In-place bit reversal function.
841 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
842 * @param[in] fftLen length of the FFT.
843 * @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
844 * @param[in] *pBitRevTab points to bit reversal table.
848 void arm_bitreversal_q31(
851 uint16_t bitRevFactor,
852 uint16_t * pBitRevTable)
854 uint32_t fftLenBy2, fftLenBy2p1, i, j;
857 /* Initializations */
859 fftLenBy2 = fftLen / 2u;
860 fftLenBy2p1 = (fftLen / 2u) + 1u;
862 /* Bit Reversal Implementation */
863 for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
867 /* pSrc[i] <-> pSrc[j]; */
869 pSrc[2u * i] = pSrc[2u * j];
872 /* pSrc[i+1u] <-> pSrc[j+1u] */
873 in = pSrc[(2u * i) + 1u];
874 pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u];
875 pSrc[(2u * j) + 1u] = in;
877 /* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
878 in = pSrc[2u * (i + fftLenBy2p1)];
879 pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)];
880 pSrc[2u * (j + fftLenBy2p1)] = in;
882 /* pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */
883 in = pSrc[(2u * (i + fftLenBy2p1)) + 1u];
884 pSrc[(2u * (i + fftLenBy2p1)) + 1u] =
885 pSrc[(2u * (j + fftLenBy2p1)) + 1u];
886 pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in;
890 /* pSrc[i+1u] <-> pSrc[j+1u] */
891 in = pSrc[2u * (i + 1u)];
892 pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)];
893 pSrc[2u * (j + fftLenBy2)] = in;
895 /* pSrc[i+2u] <-> pSrc[j+2u] */
896 in = pSrc[(2u * (i + 1u)) + 1u];
897 pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u];
898 pSrc[(2u * (j + fftLenBy2)) + 1u] = in;
900 /* Reading the index for the bit reversal */
903 /* Updating the bit reversal index depending on the fft length */
904 pBitRevTable += bitRevFactor;