1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010 ARM Limited. All rights reserved.
7 * Project: CMSIS DSP Library
8 * Title: arm_cfft_radix4_q15.c
10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
11 * In-place bit reversal using bit reversal table
13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
15 * Version 1.0.10 2011/7/15
16 * Big Endian support added and Merged M0 and M3/M4 Source code.
18 * Version 1.0.3 2010/11/29
19 * Re-organized the CMSIS folders and updated documentation.
21 * Version 1.0.2 2010/11/11
22 * Documentation updated.
24 * Version 1.0.1 2010/10/05
25 * Production release and review comments incorporated.
27 * Version 1.0.0 2010/09/20
28 * Production release and review comments incorporated.
30 * Version 0.0.5 2010/04/26
31 * incorporated review comments and updated with latest CMSIS layer
33 * Version 0.0.3 2010/03/10
35 * -------------------------------------------------------------------- */
40 * @ingroup groupTransforms
44 * @addtogroup CFFT_CIFFT
51 * @brief Processing function for the Q15 CFFT/CIFFT.
52 * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure.
53 * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
56 * \par Input and output formats:
58 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
59 * Hence the output format is different for different FFT sizes.
60 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
62 * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
63 * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
66 void arm_cfft_radix4_q15(
67 const arm_cfft_radix4_instance_q15 * S,
72 /* Complex IFFT radix-4 */
73 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle,
78 /* Complex FFT radix-4 */
79 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle,
83 if(S->bitReverseFlag == 1u)
86 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
92 * @} end of CFFT_CIFFT group
96 * Radix-4 FFT algorithm used is :
98 * Input real and imaginary data:
100 * x(n+N/4 ) = xb + j * yb
101 * x(n+N/2 ) = xc + j * yc
102 * x(n+3N 4) = xd + j * yd
105 * Output real and imaginary data:
106 * x(4r) = xa'+ j * ya'
107 * x(4r+1) = xb'+ j * yb'
108 * x(4r+2) = xc'+ j * yc'
109 * x(4r+3) = xd'+ j * yd'
112 * Twiddle factors for radix-4 FFT:
113 * Wn = co1 + j * (- si1)
114 * W2n = co2 + j * (- si2)
115 * W3n = co3 + j * (- si3)
117 * The real and imaginary output values for the radix-4 butterfly are
118 * xa' = xa + xb + xc + xd
119 * ya' = ya + yb + yc + yd
120 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
121 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
122 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
123 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
124 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
125 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
130 * @brief Core function for the Q15 CFFT butterfly process.
131 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type.
132 * @param[in] fftLen length of the FFT.
133 * @param[in] *pCoef16 points to twiddle coefficient buffer.
134 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
138 void arm_radix4_butterfly_q15(
142 uint32_t twidCoefModifier)
147 /* Run the below code for Cortex-M4 and Cortex-M3 */
150 q31_t C1, C2, C3, out1, out2;
151 q31_t *pSrc, *pCoeff;
152 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
155 /* Total process is divided into three stages */
157 /* process first stage, middle stages, & last stage */
159 /* pointer initializations for SIMD calculations */
160 pSrc = (q31_t *) pSrc16;
161 pCoeff = (q31_t *) pCoef16;
163 /* Initializations for the first stage */
170 /* Index for twiddle coefficient */
173 /* Index for input read and output write */
177 /* Input is in 1.15(q15) format */
179 /* start of first stage process */
182 /* Butterfly implementation */
184 /* index calculation for the input as, */
185 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
190 /* Reading i0, i0+fftLen/2 inputs */
191 /* Read ya (real), xa(imag) input */
193 in = ((int16_t) (T & 0xFFFF)) >> 2;
194 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
195 /* Read yc (real), xc(imag) input */
197 in = ((int16_t) (S & 0xFFFF)) >> 2;
198 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
199 /* R = packed((ya + yc), (xa + xc) ) */
201 /* S = packed((ya - yc), (xa - xc) ) */
204 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
205 /* Read yb (real), xb(imag) input */
207 in = ((int16_t) (T & 0xFFFF)) >> 2;
208 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
209 /* Read yd (real), xd(imag) input */
211 in = ((int16_t) (U & 0xFFFF)) >> 2;
212 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
213 /* T = packed((yb + yd), (xb + xd) ) */
216 /* writing the butterfly processed i0 sample */
217 /* xa' = xa + xb + xc + xd */
218 /* ya' = ya + yb + yc + yd */
219 pSrc[i0] = __SHADD16(R, T);
221 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
224 /* co2 & si2 are read from SIMD Coefficient pointer */
225 C2 = pCoeff[2u * ic];
228 #ifndef ARM_MATH_BIG_ENDIAN
230 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
231 out1 = __SMUAD(C2, R) >> 16u;
232 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
233 out2 = __SMUSDX(C2, R);
237 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
238 out1 = __SMUSDX(R, C2) >> 16u;
239 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
240 out2 = __SMUAD(C2, R);
242 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
244 /* Reading i0+fftLen/4 */
245 /* T = packed(yb, xb) */
247 in = ((int16_t) (T & 0xFFFF)) >> 2;
248 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
250 /* writing the butterfly processed i0 + fftLen/4 sample */
251 /* writing output(xc', yc') in little endian format */
252 pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
254 /* Butterfly calculations */
255 /* U = packed(yd, xd) */
257 in = ((int16_t) (U & 0xFFFF)) >> 2;
258 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
259 /* T = packed(yb-yd, xb-xd) */
263 #ifndef ARM_MATH_BIG_ENDIAN
265 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
267 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
272 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
274 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
277 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
279 /* co1 & si1 are read from SIMD Coefficient pointer */
281 /* Butterfly process for the i0+fftLen/2 sample */
283 #ifndef ARM_MATH_BIG_ENDIAN
285 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
286 out1 = __SMUAD(C1, S) >> 16u;
287 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
288 out2 = __SMUSDX(C1, S);
292 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
293 out1 = __SMUSDX(S, C1) >> 16u;
294 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
295 out2 = __SMUAD(C1, S);
297 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
299 /* writing output(xb', yb') in little endian format */
300 pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
303 /* co3 & si3 are read from SIMD Coefficient pointer */
304 C3 = pCoeff[3u * ic];
305 /* Butterfly process for the i0+3fftLen/4 sample */
307 #ifndef ARM_MATH_BIG_ENDIAN
309 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
310 out1 = __SMUAD(C3, R) >> 16u;
311 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
312 out2 = __SMUSDX(C3, R);
316 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
317 out1 = __SMUSDX(R, C3) >> 16u;
318 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
319 out2 = __SMUAD(C3, R);
321 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
323 /* writing output(xd', yd') in little endian format */
324 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
326 /* Twiddle coefficients index modifier */
327 ic = ic + twidCoefModifier;
329 /* Updating input index */
333 /* data is in 4.11(q11) format */
335 /* end of first stage process */
338 /* start of middle stage process */
340 /* Twiddle coefficients index modifier */
341 twidCoefModifier <<= 2u;
343 /* Calculation of Middle stage */
344 for (k = fftLen / 4u; k > 4u; k >>= 2u)
346 /* Initializations for the middle stage */
351 for (j = 0u; j <= (n2 - 1u); j++)
353 /* index calculation for the coefficients */
355 C2 = pCoeff[2u * ic];
356 C3 = pCoeff[3u * ic];
358 /* Twiddle coefficients index modifier */
359 ic = ic + twidCoefModifier;
361 /* Butterfly implementation */
362 for (i0 = j; i0 < fftLen; i0 += n1)
364 /* index calculation for the input as, */
365 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
370 /* Reading i0, i0+fftLen/2 inputs */
371 /* Read ya (real), xa(imag) input */
374 /* Read yc (real), xc(imag) input */
377 /* R = packed( (ya + yc), (xa + xc)) */
380 /* S = packed((ya - yc), (xa - xc)) */
383 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
384 /* Read yb (real), xb(imag) input */
387 /* Read yd (real), xd(imag) input */
391 /* T = packed( (yb + yd), (xb + xd)) */
395 /* writing the butterfly processed i0 sample */
397 /* xa' = xa + xb + xc + xd */
398 /* ya' = ya + yb + yc + yd */
399 out1 = __SHADD16(R, T);
400 in = ((int16_t) (out1 & 0xFFFF)) >> 1;
401 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
404 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
408 #ifndef ARM_MATH_BIG_ENDIAN
410 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
411 out1 = __SMUAD(C2, R) >> 16u;
413 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
414 out2 = __SMUSDX(C2, R);
418 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
419 out1 = __SMUSDX(R, C2) >> 16u;
421 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
422 out2 = __SMUAD(C2, R);
424 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
426 /* Reading i0+3fftLen/4 */
427 /* Read yb (real), xb(imag) input */
430 /* writing the butterfly processed i0 + fftLen/4 sample */
431 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
432 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
433 pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
435 /* Butterfly calculations */
437 /* Read yd (real), xd(imag) input */
440 /* T = packed(yb-yd, xb-xd) */
444 #ifndef ARM_MATH_BIG_ENDIAN
446 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
449 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
453 /* Butterfly process for the i0+fftLen/2 sample */
454 out1 = __SMUAD(C1, S) >> 16u;
455 out2 = __SMUSDX(C1, S);
459 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
462 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
466 /* Butterfly process for the i0+fftLen/2 sample */
467 out1 = __SMUSDX(S, C1) >> 16u;
468 out2 = __SMUAD(C1, S);
470 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
472 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
473 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
474 pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
476 /* Butterfly process for the i0+3fftLen/4 sample */
478 #ifndef ARM_MATH_BIG_ENDIAN
480 out1 = __SMUAD(C3, R) >> 16u;
481 out2 = __SMUSDX(C3, R);
485 out1 = __SMUSDX(R, C3) >> 16u;
486 out2 = __SMUAD(C3, R);
488 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
490 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
491 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
492 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
495 /* Twiddle coefficients index modifier */
496 twidCoefModifier <<= 2u;
498 /* end of middle stage process */
501 /* data is in 10.6(q6) format for the 1024 point */
502 /* data is in 8.8(q8) format for the 256 point */
503 /* data is in 6.10(q10) format for the 64 point */
504 /* data is in 4.12(q12) format for the 16 point */
506 /* Initializations for the last stage */
510 /* start of last stage process */
512 /* Butterfly implementation */
513 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
515 /* index calculation for the input as, */
516 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
521 /* Reading i0, i0+fftLen/2 inputs */
522 /* Read ya (real), xa(imag) input */
524 /* Read yc (real), xc(imag) input */
527 /* R = packed((ya + yc), (xa + xc)) */
529 /* S = packed((ya - yc), (xa - xc)) */
532 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
533 /* Read yb (real), xb(imag) input */
535 /* Read yd (real), xd(imag) input */
538 /* T = packed((yb + yd), (xb + xd)) */
541 /* writing the butterfly processed i0 sample */
542 /* xa' = xa + xb + xc + xd */
543 /* ya' = ya + yb + yc + yd */
544 pSrc[i0] = __SHADD16(R, T);
546 /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
549 /* Read yb (real), xb(imag) input */
552 /* writing the butterfly processed i0 + fftLen/4 sample */
553 /* xc' = (xa-xb+xc-xd) */
554 /* yc' = (ya-yb+yc-yd) */
557 /* Read yd (real), xd(imag) input */
559 /* T = packed( (yb - yd), (xb - xd)) */
563 #ifndef ARM_MATH_BIG_ENDIAN
565 /* writing the butterfly processed i0 + fftLen/2 sample */
566 /* xb' = (xa+yb-xc-yd) */
567 /* yb' = (ya-xb-yc+xd) */
568 pSrc[i2] = __SHSAX(S, T);
570 /* writing the butterfly processed i0 + 3fftLen/4 sample */
571 /* xd' = (xa-yb-xc+yd) */
572 /* yd' = (ya+xb-yc-xd) */
573 pSrc[i3] = __SHASX(S, T);
577 /* writing the butterfly processed i0 + fftLen/2 sample */
578 /* xb' = (xa+yb-xc-yd) */
579 /* yb' = (ya-xb-yc+xd) */
580 pSrc[i2] = __SHASX(S, T);
582 /* writing the butterfly processed i0 + 3fftLen/4 sample */
583 /* xd' = (xa-yb-xc+yd) */
584 /* yd' = (ya+xb-yc-xd) */
585 pSrc[i3] = __SHSAX(S, T);
587 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
591 /* end of last stage process */
593 /* output is in 11.5(q5) format for the 1024 point */
594 /* output is in 9.7(q7) format for the 256 point */
595 /* output is in 7.9(q9) format for the 64 point */
596 /* output is in 5.11(q11) format for the 16 point */
601 /* Run the below code for Cortex-M0 */
603 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
604 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
605 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
607 /* Total process is divided into three stages */
609 /* process first stage, middle stages, & last stage */
611 /* Initializations for the first stage */
618 /* Index for twiddle coefficient */
621 /* Index for input read and output write */
625 /* Input is in 1.15(q15) format */
627 /* start of first stage process */
630 /* Butterfly implementation */
632 /* index calculation for the input as, */
633 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
638 /* Reading i0, i0+fftLen/2 inputs */
640 /* input is down scale by 4 to avoid overflow */
641 /* Read ya (real), xa(imag) input */
642 T0 = pSrc16[i0 * 2u] >> 2u;
643 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
645 /* input is down scale by 4 to avoid overflow */
646 /* Read yc (real), xc(imag) input */
647 S0 = pSrc16[i2 * 2u] >> 2u;
648 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
651 R0 = __SSAT(T0 + S0, 16u);
653 R1 = __SSAT(T1 + S1, 16u);
656 S0 = __SSAT(T0 - S0, 16);
658 S1 = __SSAT(T1 - S1, 16);
660 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
661 /* input is down scale by 4 to avoid overflow */
662 /* Read yb (real), xb(imag) input */
663 T0 = pSrc16[i1 * 2u] >> 2u;
664 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
666 /* input is down scale by 4 to avoid overflow */
667 /* Read yd (real), xd(imag) input */
668 U0 = pSrc16[i3 * 2u] >> 2u;
669 U1 = pSrc16[(i3 * 2u) + 1] >> 2u;
672 T0 = __SSAT(T0 + U0, 16u);
674 T1 = __SSAT(T1 + U1, 16u);
676 /* writing the butterfly processed i0 sample */
677 /* ya' = ya + yb + yc + yd */
678 /* xa' = xa + xb + xc + xd */
679 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
680 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
682 /* R0 = (ya + yc) - (yb + yd) */
683 /* R1 = (xa + xc) - (xb + xd) */
684 R0 = __SSAT(R0 - T0, 16u);
685 R1 = __SSAT(R1 - T1, 16u);
687 /* co2 & si2 are read from Coefficient pointer */
688 Co2 = pCoef16[2u * ic * 2u];
689 Si2 = pCoef16[(2u * ic * 2u) + 1];
691 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
692 out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u);
693 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
694 out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u);
696 /* Reading i0+fftLen/4 */
697 /* input is down scale by 4 to avoid overflow */
698 /* T0 = yb, T1 = xb */
699 T0 = pSrc16[i1 * 2u] >> 2;
700 T1 = pSrc16[(i1 * 2u) + 1] >> 2;
702 /* writing the butterfly processed i0 + fftLen/4 sample */
703 /* writing output(xc', yc') in little endian format */
704 pSrc16[i1 * 2u] = out1;
705 pSrc16[(i1 * 2u) + 1] = out2;
707 /* Butterfly calculations */
708 /* input is down scale by 4 to avoid overflow */
709 /* U0 = yd, U1 = xd */
710 U0 = pSrc16[i3 * 2u] >> 2;
711 U1 = pSrc16[(i3 * 2u) + 1] >> 2;
713 T0 = __SSAT(T0 - U0, 16);
715 T1 = __SSAT(T1 - U1, 16);
717 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
718 R0 = (short) __SSAT((q31_t) (S0 - T1), 16);
719 R1 = (short) __SSAT((q31_t) (S1 + T0), 16);
721 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
722 S0 = (short) __SSAT(((q31_t) S0 + T1), 16u);
723 S1 = (short) __SSAT(((q31_t) S1 - T0), 16u);
725 /* co1 & si1 are read from Coefficient pointer */
726 Co1 = pCoef16[ic * 2u];
727 Si1 = pCoef16[(ic * 2u) + 1];
728 /* Butterfly process for the i0+fftLen/2 sample */
729 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
730 out1 = (short) ((Si1 * S1 + Co1 * S0) >> 16);
731 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
732 out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16);
734 /* writing output(xb', yb') in little endian format */
735 pSrc16[i2 * 2u] = out1;
736 pSrc16[(i2 * 2u) + 1] = out2;
738 /* Co3 & si3 are read from Coefficient pointer */
739 Co3 = pCoef16[3u * (ic * 2u)];
740 Si3 = pCoef16[(3u * (ic * 2u)) + 1];
741 /* Butterfly process for the i0+3fftLen/4 sample */
742 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
743 out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u);
744 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
745 out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u);
746 /* writing output(xd', yd') in little endian format */
747 pSrc16[i3 * 2u] = out1;
748 pSrc16[(i3 * 2u) + 1] = out2;
750 /* Twiddle coefficients index modifier */
751 ic = ic + twidCoefModifier;
753 /* Updating input index */
757 /* data is in 4.11(q11) format */
759 /* end of first stage process */
762 /* start of middle stage process */
764 /* Twiddle coefficients index modifier */
765 twidCoefModifier <<= 2u;
767 /* Calculation of Middle stage */
768 for (k = fftLen / 4u; k > 4u; k >>= 2u)
770 /* Initializations for the middle stage */
775 for (j = 0u; j <= (n2 - 1u); j++)
777 /* index calculation for the coefficients */
778 Co1 = pCoef16[ic * 2u];
779 Si1 = pCoef16[(ic * 2u) + 1u];
780 Co2 = pCoef16[2u * (ic * 2u)];
781 Si2 = pCoef16[(2u * (ic * 2u)) + 1u];
782 Co3 = pCoef16[3u * (ic * 2u)];
783 Si3 = pCoef16[(3u * (ic * 2u)) + 1u];
785 /* Twiddle coefficients index modifier */
786 ic = ic + twidCoefModifier;
788 /* Butterfly implementation */
789 for (i0 = j; i0 < fftLen; i0 += n1)
791 /* index calculation for the input as, */
792 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
797 /* Reading i0, i0+fftLen/2 inputs */
798 /* Read ya (real), xa(imag) input */
799 T0 = pSrc16[i0 * 2u];
800 T1 = pSrc16[(i0 * 2u) + 1u];
802 /* Read yc (real), xc(imag) input */
803 S0 = pSrc16[i2 * 2u];
804 S1 = pSrc16[(i2 * 2u) + 1u];
806 /* R0 = (ya + yc), R1 = (xa + xc) */
807 R0 = __SSAT(T0 + S0, 16);
808 R1 = __SSAT(T1 + S1, 16);
810 /* S0 = (ya - yc), S1 =(xa - xc) */
811 S0 = __SSAT(T0 - S0, 16);
812 S1 = __SSAT(T1 - S1, 16);
814 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
815 /* Read yb (real), xb(imag) input */
816 T0 = pSrc16[i1 * 2u];
817 T1 = pSrc16[(i1 * 2u) + 1u];
819 /* Read yd (real), xd(imag) input */
820 U0 = pSrc16[i3 * 2u];
821 U1 = pSrc16[(i3 * 2u) + 1u];
824 /* T0 = (yb + yd), T1 = (xb + xd) */
825 T0 = __SSAT(T0 + U0, 16);
826 T1 = __SSAT(T1 + U1, 16);
828 /* writing the butterfly processed i0 sample */
830 /* xa' = xa + xb + xc + xd */
831 /* ya' = ya + yb + yc + yd */
832 out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
833 out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
835 pSrc16[i0 * 2u] = out1;
836 pSrc16[(2u * i0) + 1u] = out2;
838 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
839 R0 = (R0 >> 1u) - (T0 >> 1u);
840 R1 = (R1 >> 1u) - (T1 >> 1u);
842 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
843 out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u);
845 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
846 out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u);
848 /* Reading i0+3fftLen/4 */
849 /* Read yb (real), xb(imag) input */
850 T0 = pSrc16[i1 * 2u];
851 T1 = pSrc16[(i1 * 2u) + 1u];
853 /* writing the butterfly processed i0 + fftLen/4 sample */
854 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
855 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
856 pSrc16[i1 * 2u] = out1;
857 pSrc16[(i1 * 2u) + 1u] = out2;
859 /* Butterfly calculations */
861 /* Read yd (real), xd(imag) input */
862 U0 = pSrc16[i3 * 2u];
863 U1 = pSrc16[(i3 * 2u) + 1u];
865 /* T0 = yb-yd, T1 = xb-xd */
866 T0 = __SSAT(T0 - U0, 16);
867 T1 = __SSAT(T1 - U1, 16);
869 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
870 R0 = (S0 >> 1u) - (T1 >> 1u);
871 R1 = (S1 >> 1u) + (T0 >> 1u);
873 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
874 S0 = (S0 >> 1u) + (T1 >> 1u);
875 S1 = (S1 >> 1u) - (T0 >> 1u);
877 /* Butterfly process for the i0+fftLen/2 sample */
878 out1 = (short) ((Co1 * S0 + Si1 * S1) >> 16u);
880 out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16u);
882 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
883 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
884 pSrc16[i2 * 2u] = out1;
885 pSrc16[(i2 * 2u) + 1u] = out2;
887 /* Butterfly process for the i0+3fftLen/4 sample */
888 out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u);
890 out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u);
891 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
892 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
893 pSrc16[i3 * 2u] = out1;
894 pSrc16[(i3 * 2u) + 1u] = out2;
897 /* Twiddle coefficients index modifier */
898 twidCoefModifier <<= 2u;
900 /* end of middle stage process */
903 /* data is in 10.6(q6) format for the 1024 point */
904 /* data is in 8.8(q8) format for the 256 point */
905 /* data is in 6.10(q10) format for the 64 point */
906 /* data is in 4.12(q12) format for the 16 point */
908 /* Initializations for the last stage */
912 /* start of last stage process */
914 /* Butterfly implementation */
915 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
917 /* index calculation for the input as, */
918 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
923 /* Reading i0, i0+fftLen/2 inputs */
924 /* Read ya (real), xa(imag) input */
925 T0 = pSrc16[i0 * 2u];
926 T1 = pSrc16[(i0 * 2u) + 1u];
928 /* Read yc (real), xc(imag) input */
929 S0 = pSrc16[i2 * 2u];
930 S1 = pSrc16[(i2 * 2u) + 1u];
932 /* R0 = (ya + yc), R1 = (xa + xc) */
933 R0 = __SSAT(T0 + S0, 16u);
934 R1 = __SSAT(T1 + S1, 16u);
936 /* S0 = (ya - yc), S1 = (xa - xc) */
937 S0 = __SSAT(T0 - S0, 16u);
938 S1 = __SSAT(T1 - S1, 16u);
940 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
941 /* Read yb (real), xb(imag) input */
942 T0 = pSrc16[i1 * 2u];
943 T1 = pSrc16[(i1 * 2u) + 1u];
944 /* Read yd (real), xd(imag) input */
945 U0 = pSrc16[i3 * 2u];
946 U1 = pSrc16[(i3 * 2u) + 1u];
948 /* T0 = (yb + yd), T1 = (xb + xd)) */
949 T0 = __SSAT(T0 + U0, 16u);
950 T1 = __SSAT(T1 + U1, 16u);
952 /* writing the butterfly processed i0 sample */
953 /* xa' = xa + xb + xc + xd */
954 /* ya' = ya + yb + yc + yd */
955 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
956 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
958 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
959 R0 = (R0 >> 1u) - (T0 >> 1u);
960 R1 = (R1 >> 1u) - (T1 >> 1u);
961 /* Read yb (real), xb(imag) input */
962 T0 = pSrc16[i1 * 2u];
963 T1 = pSrc16[(i1 * 2u) + 1u];
965 /* writing the butterfly processed i0 + fftLen/4 sample */
966 /* xc' = (xa-xb+xc-xd) */
967 /* yc' = (ya-yb+yc-yd) */
968 pSrc16[i1 * 2u] = R0;
969 pSrc16[(i1 * 2u) + 1u] = R1;
971 /* Read yd (real), xd(imag) input */
972 U0 = pSrc16[i3 * 2u];
973 U1 = pSrc16[(i3 * 2u) + 1u];
974 /* T0 = (yb - yd), T1 = (xb - xd) */
975 T0 = __SSAT(T0 - U0, 16u);
976 T1 = __SSAT(T1 - U1, 16u);
978 /* writing the butterfly processed i0 + fftLen/2 sample */
979 /* xb' = (xa+yb-xc-yd) */
980 /* yb' = (ya-xb-yc+xd) */
981 pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u);
982 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
984 /* writing the butterfly processed i0 + 3fftLen/4 sample */
985 /* xd' = (xa-yb-xc+yd) */
986 /* yd' = (ya+xb-yc-xd) */
987 pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u);
988 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
992 /* end of last stage process */
994 /* output is in 11.5(q5) format for the 1024 point */
995 /* output is in 9.7(q7) format for the 256 point */
996 /* output is in 7.9(q9) format for the 64 point */
997 /* output is in 5.11(q11) format for the 16 point */
999 #endif /* #ifndef ARM_MATH_CM0 */
1005 * @brief Core function for the Q15 CIFFT butterfly process.
1006 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type.
1007 * @param[in] fftLen length of the FFT.
1008 * @param[in] *pCoef16 points to twiddle coefficient buffer.
1009 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
1014 * Radix-4 IFFT algorithm used is :
1016 * CIFFT uses same twiddle coefficients as CFFT function
1017 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
1020 * IFFT is implemented with following changes in equations from FFT
1022 * Input real and imaginary data:
1023 * x(n) = xa + j * ya
1024 * x(n+N/4 ) = xb + j * yb
1025 * x(n+N/2 ) = xc + j * yc
1026 * x(n+3N 4) = xd + j * yd
1029 * Output real and imaginary data:
1030 * x(4r) = xa'+ j * ya'
1031 * x(4r+1) = xb'+ j * yb'
1032 * x(4r+2) = xc'+ j * yc'
1033 * x(4r+3) = xd'+ j * yd'
1036 * Twiddle factors for radix-4 IFFT:
1037 * Wn = co1 + j * (si1)
1038 * W2n = co2 + j * (si2)
1039 * W3n = co3 + j * (si3)
1041 * The real and imaginary output values for the radix-4 butterfly are
1042 * xa' = xa + xb + xc + xd
1043 * ya' = ya + yb + yc + yd
1044 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
1045 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
1046 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
1047 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
1048 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
1049 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
1053 void arm_radix4_butterfly_inverse_q15(
1057 uint32_t twidCoefModifier)
1060 #ifndef ARM_MATH_CM0
1062 /* Run the below code for Cortex-M4 and Cortex-M3 */
1065 q31_t C1, C2, C3, out1, out2;
1066 q31_t *pSrc, *pCoeff;
1067 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
1070 /* Total process is divided into three stages */
1072 /* process first stage, middle stages, & last stage */
1074 /* pointer initializations for SIMD calculations */
1075 pSrc = (q31_t *) pSrc16;
1076 pCoeff = (q31_t *) pCoef16;
1078 /* Initializations for the first stage */
1085 /* Index for twiddle coefficient */
1088 /* Index for input read and output write */
1093 /* Input is in 1.15(q15) format */
1095 /* Start of first stage process */
1098 /* Butterfly implementation */
1100 /* index calculation for the input as, */
1101 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1106 /* Reading i0, i0+fftLen/2 inputs */
1107 /* Read ya (real), xa(imag) input */
1109 in = ((int16_t) (T & 0xFFFF)) >> 2;
1110 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1111 /* Read yc (real), xc(imag) input */
1113 in = ((int16_t) (S & 0xFFFF)) >> 2;
1114 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1116 /* R = packed((ya + yc), (xa + xc) ) */
1118 /* S = packed((ya - yc), (xa - xc) ) */
1121 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1122 /* Read yb (real), xb(imag) input */
1124 in = ((int16_t) (T & 0xFFFF)) >> 2;
1125 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1126 /* Read yd (real), xd(imag) input */
1128 in = ((int16_t) (U & 0xFFFF)) >> 2;
1129 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1131 /* T = packed((yb + yd), (xb + xd) ) */
1134 /* writing the butterfly processed i0 sample */
1135 /* xa' = xa + xb + xc + xd */
1136 /* ya' = ya + yb + yc + yd */
1137 pSrc[i0] = __SHADD16(R, T);
1139 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
1141 /* co2 & si2 are read from SIMD Coefficient pointer */
1142 C2 = pCoeff[2u * ic];
1144 #ifndef ARM_MATH_BIG_ENDIAN
1146 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1147 out1 = __SMUSD(C2, R) >> 16u;
1148 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1149 out2 = __SMUADX(C2, R);
1153 /* xc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1154 out1 = __SMUADX(C2, R) >> 16u;
1155 /* yc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1156 out2 = __SMUSD(-C2, R);
1158 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1160 /* Reading i0+fftLen/4 */
1161 /* T = packed(yb, xb) */
1163 in = ((int16_t) (T & 0xFFFF)) >> 2;
1164 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1166 /* writing the butterfly processed i0 + fftLen/4 sample */
1167 /* writing output(xc', yc') in little endian format */
1168 pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1170 /* Butterfly calculations */
1171 /* U = packed(yd, xd) */
1173 in = ((int16_t) (U & 0xFFFF)) >> 2;
1174 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1176 /* T = packed(yb-yd, xb-xd) */
1179 #ifndef ARM_MATH_BIG_ENDIAN
1181 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */
1183 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
1188 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */
1190 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
1193 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1195 /* co1 & si1 are read from SIMD Coefficient pointer */
1197 /* Butterfly process for the i0+fftLen/2 sample */
1199 #ifndef ARM_MATH_BIG_ENDIAN
1201 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1202 out1 = __SMUSD(C1, S) >> 16u;
1203 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1204 out2 = __SMUADX(C1, S);
1208 /* xb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1209 out1 = __SMUADX(C1, S) >> 16u;
1210 /* yb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1211 out2 = __SMUSD(-C1, S);
1213 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1215 /* writing output(xb', yb') in little endian format */
1216 pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
1218 /* co3 & si3 are read from SIMD Coefficient pointer */
1219 C3 = pCoeff[3u * ic];
1220 /* Butterfly process for the i0+3fftLen/4 sample */
1222 #ifndef ARM_MATH_BIG_ENDIAN
1224 /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */
1225 out1 = __SMUSD(C3, R) >> 16u;
1226 /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */
1227 out2 = __SMUADX(C3, R);
1231 /* xd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */
1232 out1 = __SMUADX(C3, R) >> 16u;
1233 /* yd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */
1234 out2 = __SMUSD(-C3, R);
1236 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1238 /* writing output(xd', yd') in little endian format */
1239 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1241 /* Twiddle coefficients index modifier */
1242 ic = ic + twidCoefModifier;
1244 /* Updating input index */
1249 /* End of first stage process */
1251 /* data is in 4.11(q11) format */
1254 /* Start of Middle stage process */
1256 /* Twiddle coefficients index modifier */
1257 twidCoefModifier <<= 2u;
1259 /* Calculation of Middle stage */
1260 for (k = fftLen / 4u; k > 4u; k >>= 2u)
1262 /* Initializations for the middle stage */
1267 for (j = 0u; j <= (n2 - 1u); j++)
1269 /* index calculation for the coefficients */
1271 C2 = pCoeff[2u * ic];
1272 C3 = pCoeff[3u * ic];
1274 /* Twiddle coefficients index modifier */
1275 ic = ic + twidCoefModifier;
1277 /* Butterfly implementation */
1278 for (i0 = j; i0 < fftLen; i0 += n1)
1280 /* index calculation for the input as, */
1281 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1286 /* Reading i0, i0+fftLen/2 inputs */
1287 /* Read ya (real), xa(imag) input */
1290 /* Read yc (real), xc(imag) input */
1294 /* R = packed( (ya + yc), (xa + xc)) */
1296 /* S = packed((ya - yc), (xa - xc)) */
1299 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1300 /* Read yb (real), xb(imag) input */
1303 /* Read yd (real), xd(imag) input */
1307 /* T = packed( (yb + yd), (xb + xd)) */
1310 /* writing the butterfly processed i0 sample */
1311 /* xa' = xa + xb + xc + xd */
1312 /* ya' = ya + yb + yc + yd */
1313 out1 = __SHADD16(R, T);
1314 in = ((int16_t) (out1 & 0xFFFF)) >> 1;
1315 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
1320 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
1321 R = __SHSUB16(R, T);
1324 #ifndef ARM_MATH_BIG_ENDIAN
1326 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1327 out1 = __SMUSD(C2, R) >> 16u;
1328 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1329 out2 = __SMUADX(C2, R);
1333 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1334 out1 = __SMUADX(R, C2) >> 16u;
1335 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1336 out2 = __SMUSD(-C2, R);
1338 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1340 /* Reading i0+3fftLen/4 */
1341 /* Read yb (real), xb(imag) input */
1344 /* writing the butterfly processed i0 + fftLen/4 sample */
1345 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1346 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1347 pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1349 /* Butterfly calculations */
1350 /* Read yd (real), xd(imag) input */
1353 /* T = packed(yb-yd, xb-xd) */
1357 #ifndef ARM_MATH_BIG_ENDIAN
1359 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */
1362 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
1364 /* Butterfly process for the i0+fftLen/2 sample */
1365 out1 = __SMUSD(C1, S) >> 16u;
1366 out2 = __SMUADX(C1, S);
1370 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */
1373 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
1375 /* Butterfly process for the i0+fftLen/2 sample */
1376 out1 = __SMUADX(S, C1) >> 16u;
1377 out2 = __SMUSD(-C1, S);
1379 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1381 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1382 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1383 pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1385 /* Butterfly process for the i0+3fftLen/4 sample */
1387 #ifndef ARM_MATH_BIG_ENDIAN
1389 out1 = __SMUSD(C3, R) >> 16u;
1390 out2 = __SMUADX(C3, R);
1394 out1 = __SMUADX(C3, R) >> 16u;
1395 out2 = __SMUSD(-C3, R);
1397 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1399 /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */
1400 /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */
1401 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1406 /* Twiddle coefficients index modifier */
1407 twidCoefModifier <<= 2u;
1409 /* End of Middle stages process */
1412 /* data is in 10.6(q6) format for the 1024 point */
1413 /* data is in 8.8(q8) format for the 256 point */
1414 /* data is in 6.10(q10) format for the 64 point */
1415 /* data is in 4.12(q12) format for the 16 point */
1417 /* start of last stage process */
1420 /* Initializations for the last stage */
1424 /* Butterfly implementation */
1425 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
1427 /* index calculation for the input as, */
1428 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1433 /* Reading i0, i0+fftLen/2 inputs */
1434 /* Read ya (real), xa(imag) input */
1436 /* Read yc (real), xc(imag) input */
1439 /* R = packed((ya + yc), (xa + xc)) */
1441 /* S = packed((ya - yc), (xa - xc)) */
1444 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1445 /* Read yb (real), xb(imag) input */
1447 /* Read yd (real), xd(imag) input */
1450 /* T = packed((yb + yd), (xb + xd)) */
1453 /* writing the butterfly processed i0 sample */
1454 /* xa' = xa + xb + xc + xd */
1455 /* ya' = ya + yb + yc + yd */
1456 pSrc[i0] = __SHADD16(R, T);
1458 /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
1459 R = __SHSUB16(R, T);
1461 /* Read yb (real), xb(imag) input */
1464 /* writing the butterfly processed i0 + fftLen/4 sample */
1465 /* xc' = (xa-xb+xc-xd) */
1466 /* yc' = (ya-yb+yc-yd) */
1469 /* Read yd (real), xd(imag) input */
1471 /* T = packed( (yb - yd), (xb - xd)) */
1475 #ifndef ARM_MATH_BIG_ENDIAN
1477 /* writing the butterfly processed i0 + fftLen/2 sample */
1478 /* xb' = (xa-yb-xc+yd) */
1479 /* yb' = (ya+xb-yc-xd) */
1480 pSrc[i2] = __SHASX(S, T);
1482 /* writing the butterfly processed i0 + 3fftLen/4 sample */
1483 /* xd' = (xa+yb-xc-yd) */
1484 /* yd' = (ya-xb-yc+xd) */
1485 pSrc[i3] = __SHSAX(S, T);
1490 /* writing the butterfly processed i0 + fftLen/2 sample */
1491 /* xb' = (xa-yb-xc+yd) */
1492 /* yb' = (ya+xb-yc-xd) */
1493 pSrc[i2] = __SHSAX(S, T);
1495 /* writing the butterfly processed i0 + 3fftLen/4 sample */
1496 /* xd' = (xa+yb-xc-yd) */
1497 /* yd' = (ya-xb-yc+xd) */
1498 pSrc[i3] = __SHASX(S, T);
1500 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
1503 /* end of last stage process */
1505 /* output is in 11.5(q5) format for the 1024 point */
1506 /* output is in 9.7(q7) format for the 256 point */
1507 /* output is in 7.9(q9) format for the 64 point */
1508 /* output is in 5.11(q11) format for the 16 point */
1513 /* Run the below code for Cortex-M0 */
1515 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
1516 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
1517 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
1519 /* Total process is divided into three stages */
1521 /* process first stage, middle stages, & last stage */
1523 /* Initializations for the first stage */
1530 /* Index for twiddle coefficient */
1533 /* Index for input read and output write */
1538 /* Input is in 1.15(q15) format */
1540 /* Start of first stage process */
1543 /* Butterfly implementation */
1545 /* index calculation for the input as, */
1546 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1551 /* Reading i0, i0+fftLen/2 inputs */
1552 /* input is down scale by 4 to avoid overflow */
1553 /* Read ya (real), xa(imag) input */
1554 T0 = pSrc16[i0 * 2u] >> 2u;
1555 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
1556 /* input is down scale by 4 to avoid overflow */
1557 /* Read yc (real), xc(imag) input */
1558 S0 = pSrc16[i2 * 2u] >> 2u;
1559 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
1561 /* R0 = (ya + yc), R1 = (xa + xc) */
1562 R0 = __SSAT(T0 + S0, 16u);
1563 R1 = __SSAT(T1 + S1, 16u);
1564 /* S0 = (ya - yc), S1 = (xa - xc) */
1565 S0 = __SSAT(T0 - S0, 16u);
1566 S1 = __SSAT(T1 - S1, 16u);
1568 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1569 /* input is down scale by 4 to avoid overflow */
1570 /* Read yb (real), xb(imag) input */
1571 T0 = pSrc16[i1 * 2u] >> 2u;
1572 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
1573 /* Read yd (real), xd(imag) input */
1574 /* input is down scale by 4 to avoid overflow */
1575 U0 = pSrc16[i3 * 2u] >> 2u;
1576 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
1578 /* T0 = (yb + yd), T1 = (xb + xd) */
1579 T0 = __SSAT(T0 + U0, 16u);
1580 T1 = __SSAT(T1 + U1, 16u);
1582 /* writing the butterfly processed i0 sample */
1583 /* xa' = xa + xb + xc + xd */
1584 /* ya' = ya + yb + yc + yd */
1585 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
1586 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
1588 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
1589 R0 = __SSAT(R0 - T0, 16u);
1590 R1 = __SSAT(R1 - T1, 16u);
1591 /* co2 & si2 are read from Coefficient pointer */
1592 Co2 = pCoef16[2u * ic * 2u];
1593 Si2 = pCoef16[(2u * ic * 2u) + 1u];
1594 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1595 out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16u);
1596 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1597 out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16u);
1599 /* Reading i0+fftLen/4 */
1600 /* input is down scale by 4 to avoid overflow */
1601 /* T0 = yb, T1 = xb */
1602 T0 = pSrc16[i1 * 2u] >> 2u;
1603 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
1605 /* writing the butterfly processed i0 + fftLen/4 sample */
1606 /* writing output(xc', yc') in little endian format */
1607 pSrc16[i1 * 2u] = out1;
1608 pSrc16[(i1 * 2u) + 1u] = out2;
1610 /* Butterfly calculations */
1611 /* input is down scale by 4 to avoid overflow */
1612 /* U0 = yd, U1 = xd) */
1613 U0 = pSrc16[i3 * 2u] >> 2u;
1614 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
1616 /* T0 = yb-yd, T1 = xb-xd) */
1617 T0 = __SSAT(T0 - U0, 16u);
1618 T1 = __SSAT(T1 - U1, 16u);
1619 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1620 R0 = (short) __SSAT((q31_t) (S0 + T1), 16);
1621 R1 = (short) __SSAT((q31_t) (S1 - T0), 16);
1622 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1623 S0 = (short) __SSAT((q31_t) (S0 - T1), 16);
1624 S1 = (short) __SSAT((q31_t) (S1 + T0), 16);
1626 /* co1 & si1 are read from Coefficient pointer */
1627 Co1 = pCoef16[ic * 2u];
1628 Si1 = pCoef16[(ic * 2u) + 1u];
1629 /* Butterfly process for the i0+fftLen/2 sample */
1630 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1631 out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u);
1632 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1633 out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u);
1634 /* writing output(xb', yb') in little endian format */
1635 pSrc16[i2 * 2u] = out1;
1636 pSrc16[(i2 * 2u) + 1u] = out2;
1638 /* Co3 & si3 are read from Coefficient pointer */
1639 Co3 = pCoef16[3u * ic * 2u];
1640 Si3 = pCoef16[(3u * ic * 2u) + 1u];
1641 /* Butterfly process for the i0+3fftLen/4 sample */
1642 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1643 out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u);
1644 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1645 out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u);
1646 /* writing output(xd', yd') in little endian format */
1647 pSrc16[i3 * 2u] = out1;
1648 pSrc16[(i3 * 2u) + 1u] = out2;
1650 /* Twiddle coefficients index modifier */
1651 ic = ic + twidCoefModifier;
1653 /* Updating input index */
1658 /* End of first stage process */
1660 /* data is in 4.11(q11) format */
1663 /* Start of Middle stage process */
1665 /* Twiddle coefficients index modifier */
1666 twidCoefModifier <<= 2u;
1668 /* Calculation of Middle stage */
1669 for (k = fftLen / 4u; k > 4u; k >>= 2u)
1671 /* Initializations for the middle stage */
1676 for (j = 0u; j <= (n2 - 1u); j++)
1678 /* index calculation for the coefficients */
1679 Co1 = pCoef16[ic * 2u];
1680 Si1 = pCoef16[(ic * 2u) + 1u];
1681 Co2 = pCoef16[2u * ic * 2u];
1682 Si2 = pCoef16[2u * ic * 2u + 1u];
1683 Co3 = pCoef16[3u * ic * 2u];
1684 Si3 = pCoef16[(3u * ic * 2u) + 1u];
1686 /* Twiddle coefficients index modifier */
1687 ic = ic + twidCoefModifier;
1689 /* Butterfly implementation */
1690 for (i0 = j; i0 < fftLen; i0 += n1)
1692 /* index calculation for the input as, */
1693 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1698 /* Reading i0, i0+fftLen/2 inputs */
1699 /* Read ya (real), xa(imag) input */
1700 T0 = pSrc16[i0 * 2u];
1701 T1 = pSrc16[(i0 * 2u) + 1u];
1703 /* Read yc (real), xc(imag) input */
1704 S0 = pSrc16[i2 * 2u];
1705 S1 = pSrc16[(i2 * 2u) + 1u];
1708 /* R0 = (ya + yc), R1 = (xa + xc) */
1709 R0 = __SSAT(T0 + S0, 16u);
1710 R1 = __SSAT(T1 + S1, 16u);
1711 /* S0 = (ya - yc), S1 = (xa - xc) */
1712 S0 = __SSAT(T0 - S0, 16u);
1713 S1 = __SSAT(T1 - S1, 16u);
1715 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1716 /* Read yb (real), xb(imag) input */
1717 T0 = pSrc16[i1 * 2u];
1718 T1 = pSrc16[(i1 * 2u) + 1u];
1720 /* Read yd (real), xd(imag) input */
1721 U0 = pSrc16[i3 * 2u];
1722 U1 = pSrc16[(i3 * 2u) + 1u];
1724 /* T0 = (yb + yd), T1 = (xb + xd) */
1725 T0 = __SSAT(T0 + U0, 16u);
1726 T1 = __SSAT(T1 + U1, 16u);
1728 /* writing the butterfly processed i0 sample */
1729 /* xa' = xa + xb + xc + xd */
1730 /* ya' = ya + yb + yc + yd */
1731 pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
1732 pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
1734 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1735 R0 = (R0 >> 1u) - (T0 >> 1u);
1736 R1 = (R1 >> 1u) - (T1 >> 1u);
1738 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1739 out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16);
1740 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1741 out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16);
1743 /* Reading i0+3fftLen/4 */
1744 /* Read yb (real), xb(imag) input */
1745 T0 = pSrc16[i1 * 2u];
1746 T1 = pSrc16[(i1 * 2u) + 1u];
1748 /* writing the butterfly processed i0 + fftLen/4 sample */
1749 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1750 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1751 pSrc16[i1 * 2u] = out1;
1752 pSrc16[(i1 * 2u) + 1u] = out2;
1754 /* Butterfly calculations */
1755 /* Read yd (real), xd(imag) input */
1756 U0 = pSrc16[i3 * 2u];
1757 U1 = pSrc16[(i3 * 2u) + 1u];
1759 /* T0 = yb-yd, T1 = xb-xd) */
1760 T0 = __SSAT(T0 - U0, 16u);
1761 T1 = __SSAT(T1 - U1, 16u);
1763 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1764 R0 = (S0 >> 1u) + (T1 >> 1u);
1765 R1 = (S1 >> 1u) - (T0 >> 1u);
1767 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1768 S0 = (S0 >> 1u) - (T1 >> 1u);
1769 S1 = (S1 >> 1u) + (T0 >> 1u);
1771 /* Butterfly process for the i0+fftLen/2 sample */
1772 out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u);
1773 out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u);
1774 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1775 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1776 pSrc16[i2 * 2u] = out1;
1777 pSrc16[(i2 * 2u) + 1u] = out2;
1779 /* Butterfly process for the i0+3fftLen/4 sample */
1780 out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u);
1782 out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u);
1783 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1784 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1785 pSrc16[i3 * 2u] = out1;
1786 pSrc16[(i3 * 2u) + 1u] = out2;
1791 /* Twiddle coefficients index modifier */
1792 twidCoefModifier <<= 2u;
1794 /* End of Middle stages process */
1797 /* data is in 10.6(q6) format for the 1024 point */
1798 /* data is in 8.8(q8) format for the 256 point */
1799 /* data is in 6.10(q10) format for the 64 point */
1800 /* data is in 4.12(q12) format for the 16 point */
1802 /* start of last stage process */
1805 /* Initializations for the last stage */
1809 /* Butterfly implementation */
1810 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
1812 /* index calculation for the input as, */
1813 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1818 /* Reading i0, i0+fftLen/2 inputs */
1819 /* Read ya (real), xa(imag) input */
1820 T0 = pSrc16[i0 * 2u];
1821 T1 = pSrc16[(i0 * 2u) + 1u];
1822 /* Read yc (real), xc(imag) input */
1823 S0 = pSrc16[i2 * 2u];
1824 S1 = pSrc16[(i2 * 2u) + 1u];
1826 /* R0 = (ya + yc), R1 = (xa + xc) */
1827 R0 = __SSAT(T0 + S0, 16u);
1828 R1 = __SSAT(T1 + S1, 16u);
1829 /* S0 = (ya - yc), S1 = (xa - xc) */
1830 S0 = __SSAT(T0 - S0, 16u);
1831 S1 = __SSAT(T1 - S1, 16u);
1833 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1834 /* Read yb (real), xb(imag) input */
1835 T0 = pSrc16[i1 * 2u];
1836 T1 = pSrc16[(i1 * 2u) + 1u];
1837 /* Read yd (real), xd(imag) input */
1838 U0 = pSrc16[i3 * 2u];
1839 U1 = pSrc16[(i3 * 2u) + 1u];
1841 /* T0 = (yb + yd), T1 = (xb + xd) */
1842 T0 = __SSAT(T0 + U0, 16u);
1843 T1 = __SSAT(T1 + U1, 16u);
1845 /* writing the butterfly processed i0 sample */
1846 /* xa' = xa + xb + xc + xd */
1847 /* ya' = ya + yb + yc + yd */
1848 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
1849 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
1851 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1852 R0 = (R0 >> 1u) - (T0 >> 1u);
1853 R1 = (R1 >> 1u) - (T1 >> 1u);
1855 /* Read yb (real), xb(imag) input */
1856 T0 = pSrc16[i1 * 2u];
1857 T1 = pSrc16[(i1 * 2u) + 1u];
1859 /* writing the butterfly processed i0 + fftLen/4 sample */
1860 /* xc' = (xa-xb+xc-xd) */
1861 /* yc' = (ya-yb+yc-yd) */
1862 pSrc16[i1 * 2u] = R0;
1863 pSrc16[(i1 * 2u) + 1u] = R1;
1865 /* Read yd (real), xd(imag) input */
1866 U0 = pSrc16[i3 * 2u];
1867 U1 = pSrc16[(i3 * 2u) + 1u];
1868 /* T0 = (yb - yd), T1 = (xb - xd) */
1869 T0 = __SSAT(T0 - U0, 16u);
1870 T1 = __SSAT(T1 - U1, 16u);
1872 /* writing the butterfly processed i0 + fftLen/2 sample */
1873 /* xb' = (xa-yb-xc+yd) */
1874 /* yb' = (ya+xb-yc-xd) */
1875 pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u);
1876 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
1879 /* writing the butterfly processed i0 + 3fftLen/4 sample */
1880 /* xd' = (xa+yb-xc-yd) */
1881 /* yd' = (ya-xb-yc+xd) */
1882 pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u);
1883 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
1885 /* end of last stage process */
1887 /* output is in 11.5(q5) format for the 1024 point */
1888 /* output is in 9.7(q7) format for the 256 point */
1889 /* output is in 7.9(q9) format for the 64 point */
1890 /* output is in 5.11(q11) format for the 16 point */
1892 #endif /* #ifndef ARM_MATH_CM0 */
1898 * @brief In-place bit reversal function.
1899 * @param[in, out] *pSrc points to the in-place buffer of Q15 data type.
1900 * @param[in] fftLen length of the FFT.
1901 * @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
1902 * @param[in] *pBitRevTab points to bit reversal table.
1906 void arm_bitreversal_q15(
1909 uint16_t bitRevFactor,
1910 uint16_t * pBitRevTab)
1912 q31_t *pSrc = (q31_t *) pSrc16;
1914 uint32_t fftLenBy2, fftLenBy2p1;
1917 /* Initializations */
1919 fftLenBy2 = fftLen / 2u;
1920 fftLenBy2p1 = (fftLen / 2u) + 1u;
1922 /* Bit Reversal Implementation */
1923 for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
1927 /* pSrc[i] <-> pSrc[j]; */
1928 /* pSrc[i+1u] <-> pSrc[j+1u] */
1933 /* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */
1934 /* pSrc[i + fftLenBy2p1+1u] <-> pSrc[j + fftLenBy2p1+1u] */
1935 in = pSrc[i + fftLenBy2p1];
1936 pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
1937 pSrc[j + fftLenBy2p1] = in;
1940 /* pSrc[i+1u] <-> pSrc[j+fftLenBy2]; */
1941 /* pSrc[i+2] <-> pSrc[j+fftLenBy2+1u] */
1943 pSrc[i + 1u] = pSrc[j + fftLenBy2];
1944 pSrc[j + fftLenBy2] = in;
1946 /* Reading the index for the bit reversal */
1949 /* Updating the bit reversal index depending on the fft length */
1950 pBitRevTab += bitRevFactor;