git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/TransformFunctions/arm_cfft_radix4_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_cfft_radix4_q15.c
   9 *
  10 * Description:  This file has function definition of Radix-4 FFT & IFFT function and
  11 *                               In-place bit reversal using bit reversal table
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14 *
  15 * Version 1.0.10 2011/7/15
  16 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  17 *
  18 * Version 1.0.3 2010/11/29
  19 *    Re-organized the CMSIS folders and updated documentation.
  20 *
  21 * Version 1.0.2 2010/11/11
  22 *    Documentation updated.
  23 *
  24 * Version 1.0.1 2010/10/05
  25 *    Production release and review comments incorporated.
  26 *
  27 * Version 1.0.0 2010/09/20
  28 *    Production release and review comments incorporated.
  29 *
  30 * Version 0.0.5  2010/04/26
  31 *        incorporated review comments and updated with latest CMSIS layer
  32 *
  33 * Version 0.0.3  2010/03/10
  34 *    Initial version
  35 * -------------------------------------------------------------------- */
  36
  37 #include "arm_math.h"
  38
  39 /**
  40  * @ingroup groupTransforms
  41  */
  42
  43 /**
  44  * @addtogroup CFFT_CIFFT
  45  * @{
  46  */
  47
  48
  49 /**
  50  * @details
  51  * @brief Processing function for the Q15 CFFT/CIFFT.
  52  * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure.
  53  * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
  54  * @return none.
  55  *
  56  * \par Input and output formats:
  57  * \par
  58  * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
  59  * Hence the output format is different for different FFT sizes.
  60  * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
  61  * \par
  62  * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
  63  * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
  64  */
  65
  66 void arm_cfft_radix4_q15(
  67   const arm_cfft_radix4_instance_q15 * S,
  68   q15_t * pSrc)
  69 {
  70   if(S->ifftFlag == 1u)
  71   {
  72     /*  Complex IFFT radix-4  */
  73     arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle,
  74                                      S->twidCoefModifier);
  75   }
  76   else
  77   {
  78     /*  Complex FFT radix-4  */
  79     arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle,
  80                              S->twidCoefModifier);
  81   }
  82
  83   if(S->bitReverseFlag == 1u)
  84   {
  85     /*  Bit Reversal */
  86     arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  87   }
  88
  89 }
  90
  91 /**
  92  * @} end of CFFT_CIFFT group
  93  */
  94
  95 /*
  96 * Radix-4 FFT algorithm used is :
  97 *
  98 * Input real and imaginary data:
  99 * x(n) = xa + j * ya
 100 * x(n+N/4 ) = xb + j * yb
 101 * x(n+N/2 ) = xc + j * yc
 102 * x(n+3N 4) = xd + j * yd
 103 *
 104 *
 105 * Output real and imaginary data:
 106 * x(4r) = xa'+ j * ya'
 107 * x(4r+1) = xb'+ j * yb'
 108 * x(4r+2) = xc'+ j * yc'
 109 * x(4r+3) = xd'+ j * yd'
 110 *
 111 *
 112 * Twiddle factors for radix-4 FFT:
 113 * Wn = co1 + j * (- si1)
 114 * W2n = co2 + j * (- si2)
 115 * W3n = co3 + j * (- si3)
 116
 117 * The real and imaginary output values for the radix-4 butterfly are
 118 * xa' = xa + xb + xc + xd
 119 * ya' = ya + yb + yc + yd
 120 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
 121 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
 122 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
 123 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
 124 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
 125 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
 126 *
 127 */
 128
 129 /**
 130  * @brief  Core function for the Q15 CFFT butterfly process.
 131  * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
 132  * @param[in]      fftLen           length of the FFT.
 133  * @param[in]      *pCoef16         points to twiddle coefficient buffer.
 134  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 135  * @return none.
 136  */
 137
 138 void arm_radix4_butterfly_q15(
 139   q15_t * pSrc16,
 140   uint32_t fftLen,
 141   q15_t * pCoef16,
 142   uint32_t twidCoefModifier)
 143 {
 144
 145 #ifndef ARM_MATH_CM0
 146
 147   /* Run the below code for Cortex-M4 and Cortex-M3 */
 148
 149   q31_t R, S, T, U;
 150   q31_t C1, C2, C3, out1, out2;
 151   q31_t *pSrc, *pCoeff;
 152   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
 153   q15_t in;
 154
 155   /* Total process is divided into three stages */
 156
 157   /* process first stage, middle stages, & last stage */
 158
 159   /*  pointer initializations for SIMD calculations */
 160   pSrc = (q31_t *) pSrc16;
 161   pCoeff = (q31_t *) pCoef16;
 162
 163   /*  Initializations for the first stage */
 164   n2 = fftLen;
 165   n1 = n2;
 166
 167   /* n2 = fftLen/4 */
 168   n2 >>= 2u;
 169
 170   /* Index for twiddle coefficient */
 171   ic = 0u;
 172
 173   /* Index for input read and output write */
 174   i0 = 0u;
 175   j = n2;
 176
 177   /* Input is in 1.15(q15) format */
 178
 179   /*  start of first stage process */
 180   do
 181   {
 182     /*  Butterfly implementation */
 183
 184     /*  index calculation for the input as, */
 185     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 186     i1 = i0 + n2;
 187     i2 = i1 + n2;
 188     i3 = i2 + n2;
 189
 190     /*  Reading i0, i0+fftLen/2 inputs */
 191     /* Read ya (real), xa(imag) input */
 192     T = pSrc[i0];
 193     in = ((int16_t) (T & 0xFFFF)) >> 2;
 194     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
 195     /* Read yc (real), xc(imag) input */
 196     S = pSrc[i2];
 197     in = ((int16_t) (S & 0xFFFF)) >> 2;
 198     S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
 199     /* R = packed((ya + yc), (xa + xc) ) */
 200     R = __QADD16(T, S);
 201     /* S = packed((ya - yc), (xa - xc) ) */
 202     S = __QSUB16(T, S);
 203
 204     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
 205     /* Read yb (real), xb(imag) input */
 206     T = pSrc[i1];
 207     in = ((int16_t) (T & 0xFFFF)) >> 2;
 208     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
 209     /* Read yd (real), xd(imag) input */
 210     U = pSrc[i3];
 211     in = ((int16_t) (U & 0xFFFF)) >> 2;
 212     U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
 213     /* T = packed((yb + yd), (xb + xd) ) */
 214     T = __QADD16(T, U);
 215
 216     /*  writing the butterfly processed i0 sample */
 217     /* xa' = xa + xb + xc + xd */
 218     /* ya' = ya + yb + yc + yd */
 219     pSrc[i0] = __SHADD16(R, T);
 220
 221     /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
 222     R = __QSUB16(R, T);
 223
 224     /* co2 & si2 are read from SIMD Coefficient pointer */
 225     C2 = pCoeff[2u * ic];
 226
 227
 228 #ifndef ARM_MATH_BIG_ENDIAN
 229
 230     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
 231     out1 = __SMUAD(C2, R) >> 16u;
 232     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
 233     out2 = __SMUSDX(C2, R);
 234
 235 #else
 236
 237     /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
 238     out1 = __SMUSDX(R, C2) >> 16u;
 239     /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
 240     out2 = __SMUAD(C2, R);
 241
 242 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 243
 244     /*  Reading i0+fftLen/4 */
 245     /* T = packed(yb, xb) */
 246     T = pSrc[i1];
 247     in = ((int16_t) (T & 0xFFFF)) >> 2;
 248     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
 249
 250     /* writing the butterfly processed i0 + fftLen/4 sample */
 251     /* writing output(xc', yc') in little endian format */
 252     pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
 253
 254     /*  Butterfly calculations */
 255     /* U = packed(yd, xd) */
 256     U = pSrc[i3];
 257     in = ((int16_t) (U & 0xFFFF)) >> 2;
 258     U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
 259     /* T = packed(yb-yd, xb-xd) */
 260     T = __QSUB16(T, U);
 261
 262
 263 #ifndef ARM_MATH_BIG_ENDIAN
 264
 265     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
 266     R = __QASX(S, T);
 267     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
 268     S = __QSAX(S, T);
 269
 270 #else
 271
 272     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
 273     R = __QSAX(S, T);
 274     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
 275     S = __QASX(S, T);
 276
 277 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 278
 279     /* co1 & si1 are read from SIMD Coefficient pointer */
 280     C1 = pCoeff[ic];
 281     /*  Butterfly process for the i0+fftLen/2 sample */
 282
 283 #ifndef ARM_MATH_BIG_ENDIAN
 284
 285     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
 286     out1 = __SMUAD(C1, S) >> 16u;
 287     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
 288     out2 = __SMUSDX(C1, S);
 289
 290 #else
 291
 292     /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
 293     out1 = __SMUSDX(S, C1) >> 16u;
 294     /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
 295     out2 = __SMUAD(C1, S);
 296
 297 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 298
 299     /* writing output(xb', yb') in little endian format */
 300     pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
 301
 302
 303     /* co3 & si3 are read from SIMD Coefficient pointer */
 304     C3 = pCoeff[3u * ic];
 305     /*  Butterfly process for the i0+3fftLen/4 sample */
 306
 307 #ifndef ARM_MATH_BIG_ENDIAN
 308
 309     /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
 310     out1 = __SMUAD(C3, R) >> 16u;
 311     /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
 312     out2 = __SMUSDX(C3, R);
 313
 314 #else
 315
 316     /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
 317     out1 = __SMUSDX(R, C3) >> 16u;
 318     /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
 319     out2 = __SMUAD(C3, R);
 320
 321 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 322
 323     /* writing output(xd', yd') in little endian format */
 324     pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
 325
 326     /*  Twiddle coefficients index modifier */
 327     ic = ic + twidCoefModifier;
 328
 329     /*  Updating input index */
 330     i0 = i0 + 1u;
 331
 332   } while(--j);
 333   /* data is in 4.11(q11) format */
 334
 335   /* end of first stage process */
 336
 337
 338   /* start of middle stage process */
 339
 340   /*  Twiddle coefficients index modifier */
 341   twidCoefModifier <<= 2u;
 342
 343   /*  Calculation of Middle stage */
 344   for (k = fftLen / 4u; k > 4u; k >>= 2u)
 345   {
 346     /*  Initializations for the middle stage */
 347     n1 = n2;
 348     n2 >>= 2u;
 349     ic = 0u;
 350
 351     for (j = 0u; j <= (n2 - 1u); j++)
 352     {
 353       /*  index calculation for the coefficients */
 354       C1 = pCoeff[ic];
 355       C2 = pCoeff[2u * ic];
 356       C3 = pCoeff[3u * ic];
 357
 358       /*  Twiddle coefficients index modifier */
 359       ic = ic + twidCoefModifier;
 360
 361       /*  Butterfly implementation */
 362       for (i0 = j; i0 < fftLen; i0 += n1)
 363       {
 364         /*  index calculation for the input as, */
 365         /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 366         i1 = i0 + n2;
 367         i2 = i1 + n2;
 368         i3 = i2 + n2;
 369
 370         /*  Reading i0, i0+fftLen/2 inputs */
 371         /* Read ya (real), xa(imag) input */
 372         T = pSrc[i0];
 373
 374         /* Read yc (real), xc(imag) input */
 375         S = pSrc[i2];
 376
 377         /* R = packed( (ya + yc), (xa + xc)) */
 378         R = __QADD16(T, S);
 379
 380         /* S = packed((ya - yc), (xa - xc)) */
 381         S = __QSUB16(T, S);
 382
 383         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
 384         /* Read yb (real), xb(imag) input */
 385         T = pSrc[i1];
 386
 387         /* Read yd (real), xd(imag) input */
 388         U = pSrc[i3];
 389
 390
 391         /* T = packed( (yb + yd), (xb + xd)) */
 392         T = __QADD16(T, U);
 393
 394
 395         /*  writing the butterfly processed i0 sample */
 396
 397         /* xa' = xa + xb + xc + xd */
 398         /* ya' = ya + yb + yc + yd */
 399         out1 = __SHADD16(R, T);
 400         in = ((int16_t) (out1 & 0xFFFF)) >> 1;
 401         out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
 402         pSrc[i0] = out1;
 403
 404         /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
 405         R = __SHSUB16(R, T);
 406
 407
 408 #ifndef ARM_MATH_BIG_ENDIAN
 409
 410         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
 411         out1 = __SMUAD(C2, R) >> 16u;
 412
 413         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
 414         out2 = __SMUSDX(C2, R);
 415
 416 #else
 417
 418         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
 419         out1 = __SMUSDX(R, C2) >> 16u;
 420
 421         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
 422         out2 = __SMUAD(C2, R);
 423
 424 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 425
 426         /*  Reading i0+3fftLen/4 */
 427         /* Read yb (real), xb(imag) input */
 428         T = pSrc[i1];
 429
 430         /*  writing the butterfly processed i0 + fftLen/4 sample */
 431         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
 432         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
 433         pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
 434
 435         /*  Butterfly calculations */
 436
 437         /* Read yd (real), xd(imag) input */
 438         U = pSrc[i3];
 439
 440         /* T = packed(yb-yd, xb-xd) */
 441         T = __QSUB16(T, U);
 442
 443
 444 #ifndef ARM_MATH_BIG_ENDIAN
 445
 446         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
 447         R = __SHASX(S, T);
 448
 449         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
 450         S = __SHSAX(S, T);
 451
 452
 453         /*  Butterfly process for the i0+fftLen/2 sample */
 454         out1 = __SMUAD(C1, S) >> 16u;
 455         out2 = __SMUSDX(C1, S);
 456
 457 #else
 458
 459         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
 460         R = __SHSAX(S, T);
 461
 462         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
 463         S = __SHASX(S, T);
 464
 465
 466         /*  Butterfly process for the i0+fftLen/2 sample */
 467         out1 = __SMUSDX(S, C1) >> 16u;
 468         out2 = __SMUAD(C1, S);
 469
 470 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 471
 472         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
 473         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
 474         pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
 475
 476         /*  Butterfly process for the i0+3fftLen/4 sample */
 477
 478 #ifndef ARM_MATH_BIG_ENDIAN
 479
 480         out1 = __SMUAD(C3, R) >> 16u;
 481         out2 = __SMUSDX(C3, R);
 482
 483 #else
 484
 485         out1 = __SMUSDX(R, C3) >> 16u;
 486         out2 = __SMUAD(C3, R);
 487
 488 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 489
 490         /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
 491         /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
 492         pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
 493       }
 494     }
 495     /*  Twiddle coefficients index modifier */
 496     twidCoefModifier <<= 2u;
 497   }
 498   /* end of middle stage process */
 499
 500
 501   /* data is in 10.6(q6) format for the 1024 point */
 502   /* data is in 8.8(q8) format for the 256 point */
 503   /* data is in 6.10(q10) format for the 64 point */
 504   /* data is in 4.12(q12) format for the 16 point */
 505
 506   /*  Initializations for the last stage */
 507   n1 = n2;
 508   n2 >>= 2u;
 509
 510   /* start of last stage process */
 511
 512   /*  Butterfly implementation */
 513   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
 514   {
 515     /*  index calculation for the input as, */
 516     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 517     i1 = i0 + n2;
 518     i2 = i1 + n2;
 519     i3 = i2 + n2;
 520
 521     /*  Reading i0, i0+fftLen/2 inputs */
 522     /* Read ya (real), xa(imag) input */
 523     T = pSrc[i0];
 524     /* Read yc (real), xc(imag) input */
 525     S = pSrc[i2];
 526
 527     /* R = packed((ya + yc), (xa + xc)) */
 528     R = __QADD16(T, S);
 529     /* S = packed((ya - yc), (xa - xc)) */
 530     S = __QSUB16(T, S);
 531
 532     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
 533     /* Read yb (real), xb(imag) input */
 534     T = pSrc[i1];
 535     /* Read yd (real), xd(imag) input */
 536     U = pSrc[i3];
 537
 538     /* T = packed((yb + yd), (xb + xd)) */
 539     T = __QADD16(T, U);
 540
 541     /*  writing the butterfly processed i0 sample */
 542     /* xa' = xa + xb + xc + xd */
 543     /* ya' = ya + yb + yc + yd */
 544     pSrc[i0] = __SHADD16(R, T);
 545
 546     /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
 547     R = __SHSUB16(R, T);
 548
 549     /* Read yb (real), xb(imag) input */
 550     T = pSrc[i1];
 551
 552     /*  writing the butterfly processed i0 + fftLen/4 sample */
 553     /* xc' = (xa-xb+xc-xd) */
 554     /* yc' = (ya-yb+yc-yd) */
 555     pSrc[i1] = R;
 556
 557     /* Read yd (real), xd(imag) input */
 558     U = pSrc[i3];
 559     /* T = packed( (yb - yd), (xb - xd))  */
 560     T = __QSUB16(T, U);
 561
 562
 563 #ifndef ARM_MATH_BIG_ENDIAN
 564
 565     /*  writing the butterfly processed i0 + fftLen/2 sample */
 566     /* xb' = (xa+yb-xc-yd) */
 567     /* yb' = (ya-xb-yc+xd) */
 568     pSrc[i2] = __SHSAX(S, T);
 569
 570     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
 571     /* xd' = (xa-yb-xc+yd) */
 572     /* yd' = (ya+xb-yc-xd) */
 573     pSrc[i3] = __SHASX(S, T);
 574
 575 #else
 576
 577     /*  writing the butterfly processed i0 + fftLen/2 sample */
 578     /* xb' = (xa+yb-xc-yd) */
 579     /* yb' = (ya-xb-yc+xd) */
 580     pSrc[i2] = __SHASX(S, T);
 581
 582     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
 583     /* xd' = (xa-yb-xc+yd) */
 584     /* yd' = (ya+xb-yc-xd) */
 585     pSrc[i3] = __SHSAX(S, T);
 586
 587 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
 588
 589   }
 590
 591   /* end of last stage process */
 592
 593   /* output is in 11.5(q5) format for the 1024 point */
 594   /* output is in 9.7(q7) format for the 256 point   */
 595   /* output is in 7.9(q9) format for the 64 point  */
 596   /* output is in 5.11(q11) format for the 16 point  */
 597
 598
 599 #else
 600
 601   /* Run the below code for Cortex-M0 */
 602
 603   q15_t R0, R1, S0, S1, T0, T1, U0, U1;
 604   q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
 605   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
 606
 607   /* Total process is divided into three stages */
 608
 609   /* process first stage, middle stages, & last stage */
 610
 611   /*  Initializations for the first stage */
 612   n2 = fftLen;
 613   n1 = n2;
 614
 615   /* n2 = fftLen/4 */
 616   n2 >>= 2u;
 617
 618   /* Index for twiddle coefficient */
 619   ic = 0u;
 620
 621   /* Index for input read and output write */
 622   i0 = 0u;
 623   j = n2;
 624
 625   /* Input is in 1.15(q15) format */
 626
 627   /*  start of first stage process */
 628   do
 629   {
 630     /*  Butterfly implementation */
 631
 632     /*  index calculation for the input as, */
 633     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
 634     i1 = i0 + n2;
 635     i2 = i1 + n2;
 636     i3 = i2 + n2;
 637
 638     /*  Reading i0, i0+fftLen/2 inputs */
 639
 640     /* input is down scale by 4 to avoid overflow */
 641     /* Read ya (real), xa(imag) input */
 642     T0 = pSrc16[i0 * 2u] >> 2u;
 643     T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
 644
 645     /* input is down scale by 4 to avoid overflow */
 646     /* Read yc (real), xc(imag) input */
 647     S0 = pSrc16[i2 * 2u] >> 2u;
 648     S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
 649
 650     /* R0 = (ya + yc) */
 651     R0 = __SSAT(T0 + S0, 16u);
 652     /* R1 = (xa + xc) */
 653     R1 = __SSAT(T1 + S1, 16u);
 654
 655     /* S0 = (ya - yc) */
 656     S0 = __SSAT(T0 - S0, 16);
 657     /* S1 = (xa - xc) */
 658     S1 = __SSAT(T1 - S1, 16);
 659
 660     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
 661     /* input is down scale by 4 to avoid overflow */
 662     /* Read yb (real), xb(imag) input */
 663     T0 = pSrc16[i1 * 2u] >> 2u;
 664     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
 665
 666     /* input is down scale by 4 to avoid overflow */
 667     /* Read yd (real), xd(imag) input */
 668     U0 = pSrc16[i3 * 2u] >> 2u;
 669     U1 = pSrc16[(i3 * 2u) + 1] >> 2u;
 670
 671     /* T0 = (yb + yd) */
 672     T0 = __SSAT(T0 + U0, 16u);
 673     /* T1 = (xb + xd) */
 674     T1 = __SSAT(T1 + U1, 16u);
 675
 676     /*  writing the butterfly processed i0 sample */
 677     /* ya' = ya + yb + yc + yd */
 678     /* xa' = xa + xb + xc + xd */
 679     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
 680     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
 681
 682     /* R0 = (ya + yc) - (yb + yd) */
 683     /* R1 = (xa + xc) - (xb + xd) */
 684     R0 = __SSAT(R0 - T0, 16u);
 685     R1 = __SSAT(R1 - T1, 16u);
 686
 687     /* co2 & si2 are read from Coefficient pointer */
 688     Co2 = pCoef16[2u * ic * 2u];
 689     Si2 = pCoef16[(2u * ic * 2u) + 1];
 690
 691     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
 692     out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u);
 693     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
 694     out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u);
 695
 696     /*  Reading i0+fftLen/4 */
 697     /* input is down scale by 4 to avoid overflow */
 698     /* T0 = yb, T1 =  xb */
 699     T0 = pSrc16[i1 * 2u] >> 2;
 700     T1 = pSrc16[(i1 * 2u) + 1] >> 2;
 701
 702     /* writing the butterfly processed i0 + fftLen/4 sample */
 703     /* writing output(xc', yc') in little endian format */
 704     pSrc16[i1 * 2u] = out1;
 705     pSrc16[(i1 * 2u) + 1] = out2;
 706
 707     /*  Butterfly calculations */
 708     /* input is down scale by 4 to avoid overflow */
 709     /* U0 = yd, U1 = xd */
 710     U0 = pSrc16[i3 * 2u] >> 2;
 711     U1 = pSrc16[(i3 * 2u) + 1] >> 2;
 712     /* T0 = yb-yd */
 713     T0 = __SSAT(T0 - U0, 16);
 714     /* T1 = xb-xd */
 715     T1 = __SSAT(T1 - U1, 16);
 716
 717     /* R1 = (ya-yc) + (xb- xd),  R0 = (xa-xc) - (yb-yd)) */
 718     R0 = (short) __SSAT((q31_t) (S0 - T1), 16);
 719     R1 = (short) __SSAT((q31_t) (S1 + T0), 16);
 720
 721     /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
 722     S0 = (short) __SSAT(((q31_t) S0 + T1), 16u);
 723     S1 = (short) __SSAT(((q31_t) S1 - T0), 16u);
 724
 725     /* co1 & si1 are read from Coefficient pointer */
 726     Co1 = pCoef16[ic * 2u];
 727     Si1 = pCoef16[(ic * 2u) + 1];
 728     /*  Butterfly process for the i0+fftLen/2 sample */
 729     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
 730     out1 = (short) ((Si1 * S1 + Co1 * S0) >> 16);
 731     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
 732     out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16);
 733
 734     /* writing output(xb', yb') in little endian format */
 735     pSrc16[i2 * 2u] = out1;
 736     pSrc16[(i2 * 2u) + 1] = out2;
 737
 738     /* Co3 & si3 are read from Coefficient pointer */
 739     Co3 = pCoef16[3u * (ic * 2u)];
 740     Si3 = pCoef16[(3u * (ic * 2u)) + 1];
 741     /*  Butterfly process for the i0+3fftLen/4 sample */
 742     /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
 743     out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u);
 744     /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
 745     out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u);
 746     /* writing output(xd', yd') in little endian format */
 747     pSrc16[i3 * 2u] = out1;
 748     pSrc16[(i3 * 2u) + 1] = out2;
 749
 750     /*  Twiddle coefficients index modifier */
 751     ic = ic + twidCoefModifier;
 752
 753     /*  Updating input index */
 754     i0 = i0 + 1u;
 755
 756   } while(--j);
 757   /* data is in 4.11(q11) format */
 758
 759   /* end of first stage process */
 760
 761
 762   /* start of middle stage process */
 763
 764   /*  Twiddle coefficients index modifier */
 765   twidCoefModifier <<= 2u;
 766
 767   /*  Calculation of Middle stage */
 768   for (k = fftLen / 4u; k > 4u; k >>= 2u)
 769   {
 770     /*  Initializations for the middle stage */
 771     n1 = n2;
 772     n2 >>= 2u;
 773     ic = 0u;
 774
 775     for (j = 0u; j <= (n2 - 1u); j++)
 776     {
 777       /*  index calculation for the coefficients */
 778       Co1 = pCoef16[ic * 2u];
 779       Si1 = pCoef16[(ic * 2u) + 1u];
 780       Co2 = pCoef16[2u * (ic * 2u)];
 781       Si2 = pCoef16[(2u * (ic * 2u)) + 1u];
 782       Co3 = pCoef16[3u * (ic * 2u)];
 783       Si3 = pCoef16[(3u * (ic * 2u)) + 1u];
 784
 785       /*  Twiddle coefficients index modifier */
 786       ic = ic + twidCoefModifier;
 787
 788       /*  Butterfly implementation */
 789       for (i0 = j; i0 < fftLen; i0 += n1)
 790       {
 791         /*  index calculation for the input as, */
 792         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
 793         i1 = i0 + n2;
 794         i2 = i1 + n2;
 795         i3 = i2 + n2;
 796
 797         /*  Reading i0, i0+fftLen/2 inputs */
 798         /* Read ya (real), xa(imag) input */
 799         T0 = pSrc16[i0 * 2u];
 800         T1 = pSrc16[(i0 * 2u) + 1u];
 801
 802         /* Read yc (real), xc(imag) input */
 803         S0 = pSrc16[i2 * 2u];
 804         S1 = pSrc16[(i2 * 2u) + 1u];
 805
 806         /* R0 = (ya + yc), R1 = (xa + xc) */
 807         R0 = __SSAT(T0 + S0, 16);
 808         R1 = __SSAT(T1 + S1, 16);
 809
 810         /* S0 = (ya - yc), S1 =(xa - xc) */
 811         S0 = __SSAT(T0 - S0, 16);
 812         S1 = __SSAT(T1 - S1, 16);
 813
 814         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
 815         /* Read yb (real), xb(imag) input */
 816         T0 = pSrc16[i1 * 2u];
 817         T1 = pSrc16[(i1 * 2u) + 1u];
 818
 819         /* Read yd (real), xd(imag) input */
 820         U0 = pSrc16[i3 * 2u];
 821         U1 = pSrc16[(i3 * 2u) + 1u];
 822
 823
 824         /* T0 = (yb + yd), T1 = (xb + xd) */
 825         T0 = __SSAT(T0 + U0, 16);
 826         T1 = __SSAT(T1 + U1, 16);
 827
 828         /*  writing the butterfly processed i0 sample */
 829
 830         /* xa' = xa + xb + xc + xd */
 831         /* ya' = ya + yb + yc + yd */
 832         out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
 833         out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
 834
 835         pSrc16[i0 * 2u] = out1;
 836         pSrc16[(2u * i0) + 1u] = out2;
 837
 838         /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
 839         R0 = (R0 >> 1u) - (T0 >> 1u);
 840         R1 = (R1 >> 1u) - (T1 >> 1u);
 841
 842         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
 843         out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u);
 844
 845         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
 846         out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u);
 847
 848         /*  Reading i0+3fftLen/4 */
 849         /* Read yb (real), xb(imag) input */
 850         T0 = pSrc16[i1 * 2u];
 851         T1 = pSrc16[(i1 * 2u) + 1u];
 852
 853         /*  writing the butterfly processed i0 + fftLen/4 sample */
 854         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
 855         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
 856         pSrc16[i1 * 2u] = out1;
 857         pSrc16[(i1 * 2u) + 1u] = out2;
 858
 859         /*  Butterfly calculations */
 860
 861         /* Read yd (real), xd(imag) input */
 862         U0 = pSrc16[i3 * 2u];
 863         U1 = pSrc16[(i3 * 2u) + 1u];
 864
 865         /* T0 = yb-yd, T1 = xb-xd */
 866         T0 = __SSAT(T0 - U0, 16);
 867         T1 = __SSAT(T1 - U1, 16);
 868
 869         /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
 870         R0 = (S0 >> 1u) - (T1 >> 1u);
 871         R1 = (S1 >> 1u) + (T0 >> 1u);
 872
 873         /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
 874         S0 = (S0 >> 1u) + (T1 >> 1u);
 875         S1 = (S1 >> 1u) - (T0 >> 1u);
 876
 877         /*  Butterfly process for the i0+fftLen/2 sample */
 878         out1 = (short) ((Co1 * S0 + Si1 * S1) >> 16u);
 879
 880         out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16u);
 881
 882         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
 883         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
 884         pSrc16[i2 * 2u] = out1;
 885         pSrc16[(i2 * 2u) + 1u] = out2;
 886
 887         /*  Butterfly process for the i0+3fftLen/4 sample */
 888         out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u);
 889
 890         out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u);
 891         /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
 892         /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
 893         pSrc16[i3 * 2u] = out1;
 894         pSrc16[(i3 * 2u) + 1u] = out2;
 895       }
 896     }
 897     /*  Twiddle coefficients index modifier */
 898     twidCoefModifier <<= 2u;
 899   }
 900   /* end of middle stage process */
 901
 902
 903   /* data is in 10.6(q6) format for the 1024 point */
 904   /* data is in 8.8(q8) format for the 256 point */
 905   /* data is in 6.10(q10) format for the 64 point */
 906   /* data is in 4.12(q12) format for the 16 point */
 907
 908   /*  Initializations for the last stage */
 909   n1 = n2;
 910   n2 >>= 2u;
 911
 912   /* start of last stage process */
 913
 914   /*  Butterfly implementation */
 915   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
 916   {
 917     /*  index calculation for the input as, */
 918     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
 919     i1 = i0 + n2;
 920     i2 = i1 + n2;
 921     i3 = i2 + n2;
 922
 923     /*  Reading i0, i0+fftLen/2 inputs */
 924     /* Read ya (real), xa(imag) input */
 925     T0 = pSrc16[i0 * 2u];
 926     T1 = pSrc16[(i0 * 2u) + 1u];
 927
 928     /* Read yc (real), xc(imag) input */
 929     S0 = pSrc16[i2 * 2u];
 930     S1 = pSrc16[(i2 * 2u) + 1u];
 931
 932     /* R0 = (ya + yc), R1 = (xa + xc) */
 933     R0 = __SSAT(T0 + S0, 16u);
 934     R1 = __SSAT(T1 + S1, 16u);
 935
 936     /* S0 = (ya - yc), S1 = (xa - xc) */
 937     S0 = __SSAT(T0 - S0, 16u);
 938     S1 = __SSAT(T1 - S1, 16u);
 939
 940     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
 941     /* Read yb (real), xb(imag) input */
 942     T0 = pSrc16[i1 * 2u];
 943     T1 = pSrc16[(i1 * 2u) + 1u];
 944     /* Read yd (real), xd(imag) input */
 945     U0 = pSrc16[i3 * 2u];
 946     U1 = pSrc16[(i3 * 2u) + 1u];
 947
 948     /* T0 = (yb + yd), T1 = (xb + xd)) */
 949     T0 = __SSAT(T0 + U0, 16u);
 950     T1 = __SSAT(T1 + U1, 16u);
 951
 952     /*  writing the butterfly processed i0 sample */
 953     /* xa' = xa + xb + xc + xd */
 954     /* ya' = ya + yb + yc + yd */
 955     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
 956     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
 957
 958     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
 959     R0 = (R0 >> 1u) - (T0 >> 1u);
 960     R1 = (R1 >> 1u) - (T1 >> 1u);
 961     /* Read yb (real), xb(imag) input */
 962     T0 = pSrc16[i1 * 2u];
 963     T1 = pSrc16[(i1 * 2u) + 1u];
 964
 965     /*  writing the butterfly processed i0 + fftLen/4 sample */
 966     /* xc' = (xa-xb+xc-xd) */
 967     /* yc' = (ya-yb+yc-yd) */
 968     pSrc16[i1 * 2u] = R0;
 969     pSrc16[(i1 * 2u) + 1u] = R1;
 970
 971     /* Read yd (real), xd(imag) input */
 972     U0 = pSrc16[i3 * 2u];
 973     U1 = pSrc16[(i3 * 2u) + 1u];
 974     /* T0 = (yb - yd), T1 = (xb - xd)  */
 975     T0 = __SSAT(T0 - U0, 16u);
 976     T1 = __SSAT(T1 - U1, 16u);
 977
 978     /*  writing the butterfly processed i0 + fftLen/2 sample */
 979     /* xb' = (xa+yb-xc-yd) */
 980     /* yb' = (ya-xb-yc+xd) */
 981     pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u);
 982     pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
 983
 984     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
 985     /* xd' = (xa-yb-xc+yd) */
 986     /* yd' = (ya+xb-yc-xd) */
 987     pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u);
 988     pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
 989
 990   }
 991
 992   /* end of last stage process */
 993
 994   /* output is in 11.5(q5) format for the 1024 point */
 995   /* output is in 9.7(q7) format for the 256 point   */
 996   /* output is in 7.9(q9) format for the 64 point  */
 997   /* output is in 5.11(q11) format for the 16 point  */
 998
 999 #endif /* #ifndef ARM_MATH_CM0 */
1000
1001 }
1002
1003
1004 /**
1005  * @brief  Core function for the Q15 CIFFT butterfly process.
1006  * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
1007  * @param[in]      fftLen           length of the FFT.
1008  * @param[in]      *pCoef16         points to twiddle coefficient buffer.
1009  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
1010  * @return none.
1011  */
1012
1013 /*
1014 * Radix-4 IFFT algorithm used is :
1015 *
1016 * CIFFT uses same twiddle coefficients as CFFT function
1017 *  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
1018 *
1019 *
1020 * IFFT is implemented with following changes in equations from FFT
1021 *
1022 * Input real and imaginary data:
1023 * x(n) = xa + j * ya
1024 * x(n+N/4 ) = xb + j * yb
1025 * x(n+N/2 ) = xc + j * yc
1026 * x(n+3N 4) = xd + j * yd
1027 *
1028 *
1029 * Output real and imaginary data:
1030 * x(4r) = xa'+ j * ya'
1031 * x(4r+1) = xb'+ j * yb'
1032 * x(4r+2) = xc'+ j * yc'
1033 * x(4r+3) = xd'+ j * yd'
1034 *
1035 *
1036 * Twiddle factors for radix-4 IFFT:
1037 * Wn = co1 + j * (si1)
1038 * W2n = co2 + j * (si2)
1039 * W3n = co3 + j * (si3)
1040
1041 * The real and imaginary output values for the radix-4 butterfly are
1042 * xa' = xa + xb + xc + xd
1043 * ya' = ya + yb + yc + yd
1044 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
1045 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
1046 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
1047 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
1048 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
1049 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
1050 *
1051 */
1052
1053 void arm_radix4_butterfly_inverse_q15(
1054   q15_t * pSrc16,
1055   uint32_t fftLen,
1056   q15_t * pCoef16,
1057   uint32_t twidCoefModifier)
1058 {
1059
1060 #ifndef ARM_MATH_CM0
1061
1062   /* Run the below code for Cortex-M4 and Cortex-M3 */
1063
1064   q31_t R, S, T, U;
1065   q31_t C1, C2, C3, out1, out2;
1066   q31_t *pSrc, *pCoeff;
1067   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
1068   q15_t in;
1069
1070   /* Total process is divided into three stages */
1071
1072   /* process first stage, middle stages, & last stage */
1073
1074   /*  pointer initializations for SIMD calculations */
1075   pSrc = (q31_t *) pSrc16;
1076   pCoeff = (q31_t *) pCoef16;
1077
1078   /*  Initializations for the first stage */
1079   n2 = fftLen;
1080   n1 = n2;
1081
1082   /* n2 = fftLen/4 */
1083   n2 >>= 2u;
1084
1085   /* Index for twiddle coefficient */
1086   ic = 0u;
1087
1088   /* Index for input read and output write */
1089   i0 = 0u;
1090
1091   j = n2;
1092
1093   /* Input is in 1.15(q15) format */
1094
1095   /*  Start of first stage process */
1096   do
1097   {
1098     /*  Butterfly implementation */
1099
1100     /*  index calculation for the input as, */
1101     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1102     i1 = i0 + n2;
1103     i2 = i1 + n2;
1104     i3 = i2 + n2;
1105
1106     /*  Reading i0, i0+fftLen/2 inputs */
1107     /* Read ya (real), xa(imag) input */
1108     T = pSrc[i0];
1109     in = ((int16_t) (T & 0xFFFF)) >> 2;
1110     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1111     /* Read yc (real), xc(imag) input */
1112     S = pSrc[i2];
1113     in = ((int16_t) (S & 0xFFFF)) >> 2;
1114     S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1115
1116     /* R = packed((ya + yc), (xa + xc) ) */
1117     R = __QADD16(T, S);
1118     /* S = packed((ya - yc), (xa - xc) ) */
1119     S = __QSUB16(T, S);
1120
1121     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1122     /* Read yb (real), xb(imag) input */
1123     T = pSrc[i1];
1124     in = ((int16_t) (T & 0xFFFF)) >> 2;
1125     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1126     /* Read yd (real), xd(imag) input */
1127     U = pSrc[i3];
1128     in = ((int16_t) (U & 0xFFFF)) >> 2;
1129     U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1130
1131     /* T = packed((yb + yd), (xb + xd) ) */
1132     T = __QADD16(T, U);
1133
1134     /*  writing the butterfly processed i0 sample */
1135     /* xa' = xa + xb + xc + xd */
1136     /* ya' = ya + yb + yc + yd */
1137     pSrc[i0] = __SHADD16(R, T);
1138
1139     /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
1140     R = __QSUB16(R, T);
1141     /* co2 & si2 are read from SIMD Coefficient pointer */
1142     C2 = pCoeff[2u * ic];
1143
1144 #ifndef ARM_MATH_BIG_ENDIAN
1145
1146     /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1147     out1 = __SMUSD(C2, R) >> 16u;
1148     /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1149     out2 = __SMUADX(C2, R);
1150
1151 #else
1152
1153     /* xc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1154     out1 = __SMUADX(C2, R) >> 16u;
1155     /* yc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1156     out2 = __SMUSD(-C2, R);
1157
1158 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
1159
1160     /*  Reading i0+fftLen/4 */
1161     /* T = packed(yb, xb) */
1162     T = pSrc[i1];
1163     in = ((int16_t) (T & 0xFFFF)) >> 2;
1164     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1165
1166     /* writing the butterfly processed i0 + fftLen/4 sample */
1167     /* writing output(xc', yc') in little endian format */
1168     pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1169
1170     /*  Butterfly calculations */
1171     /* U = packed(yd, xd) */
1172     U = pSrc[i3];
1173     in = ((int16_t) (U & 0xFFFF)) >> 2;
1174     U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
1175
1176     /* T = packed(yb-yd, xb-xd) */
1177     T = __QSUB16(T, U);
1178
1179 #ifndef ARM_MATH_BIG_ENDIAN
1180
1181     /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */
1182     R = __QSAX(S, T);
1183     /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */
1184     S = __QASX(S, T);
1185
1186 #else
1187
1188     /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */
1189     R = __QASX(S, T);
1190     /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */
1191     S = __QSAX(S, T);
1192
1193 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
1194
1195     /* co1 & si1 are read from SIMD Coefficient pointer */
1196     C1 = pCoeff[ic];
1197     /*  Butterfly process for the i0+fftLen/2 sample */
1198
1199 #ifndef ARM_MATH_BIG_ENDIAN
1200
1201     /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1202     out1 = __SMUSD(C1, S) >> 16u;
1203     /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1204     out2 = __SMUADX(C1, S);
1205
1206 #else
1207
1208     /* xb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1209     out1 = __SMUADX(C1, S) >> 16u;
1210     /* yb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1211     out2 = __SMUSD(-C1, S);
1212
1213 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
1214
1215     /* writing output(xb', yb') in little endian format */
1216     pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
1217
1218     /* co3 & si3 are read from SIMD Coefficient pointer */
1219     C3 = pCoeff[3u * ic];
1220     /*  Butterfly process for the i0+3fftLen/4 sample */
1221
1222 #ifndef ARM_MATH_BIG_ENDIAN
1223
1224     /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */
1225     out1 = __SMUSD(C3, R) >> 16u;
1226     /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */
1227     out2 = __SMUADX(C3, R);
1228
1229 #else
1230
1231     /* xd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */
1232     out1 = __SMUADX(C3, R) >> 16u;
1233     /* yd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */
1234     out2 = __SMUSD(-C3, R);
1235
1236 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
1237
1238     /* writing output(xd', yd') in little endian format */
1239     pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1240
1241     /*  Twiddle coefficients index modifier */
1242     ic = ic + twidCoefModifier;
1243
1244     /*  Updating input index */
1245     i0 = i0 + 1u;
1246
1247   } while(--j);
1248
1249   /*  End of first stage process */
1250
1251   /* data is in 4.11(q11) format */
1252
1253
1254   /*  Start of Middle stage process */
1255
1256   /*  Twiddle coefficients index modifier */
1257   twidCoefModifier <<= 2u;
1258
1259   /*  Calculation of Middle stage */
1260   for (k = fftLen / 4u; k > 4u; k >>= 2u)
1261   {
1262     /*  Initializations for the middle stage */
1263     n1 = n2;
1264     n2 >>= 2u;
1265     ic = 0u;
1266
1267     for (j = 0u; j <= (n2 - 1u); j++)
1268     {
1269       /*  index calculation for the coefficients */
1270       C1 = pCoeff[ic];
1271       C2 = pCoeff[2u * ic];
1272       C3 = pCoeff[3u * ic];
1273
1274       /*  Twiddle coefficients index modifier */
1275       ic = ic + twidCoefModifier;
1276
1277       /*  Butterfly implementation */
1278       for (i0 = j; i0 < fftLen; i0 += n1)
1279       {
1280         /*  index calculation for the input as, */
1281         /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1282         i1 = i0 + n2;
1283         i2 = i1 + n2;
1284         i3 = i2 + n2;
1285
1286         /*  Reading i0, i0+fftLen/2 inputs */
1287         /* Read ya (real), xa(imag) input */
1288         T = pSrc[i0];
1289
1290         /* Read yc (real), xc(imag) input */
1291         S = pSrc[i2];
1292
1293
1294         /* R = packed( (ya + yc), (xa + xc)) */
1295         R = __QADD16(T, S);
1296         /* S = packed((ya - yc), (xa - xc)) */
1297         S = __QSUB16(T, S);
1298
1299         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1300         /* Read yb (real), xb(imag) input */
1301         T = pSrc[i1];
1302
1303         /* Read yd (real), xd(imag) input */
1304         U = pSrc[i3];
1305
1306
1307         /* T = packed( (yb + yd), (xb + xd)) */
1308         T = __QADD16(T, U);
1309
1310         /*  writing the butterfly processed i0 sample */
1311         /* xa' = xa + xb + xc + xd */
1312         /* ya' = ya + yb + yc + yd */
1313         out1 = __SHADD16(R, T);
1314         in = ((int16_t) (out1 & 0xFFFF)) >> 1;
1315         out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
1316         pSrc[i0] = out1;
1317
1318
1319
1320         /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
1321         R = __SHSUB16(R, T);
1322
1323
1324 #ifndef ARM_MATH_BIG_ENDIAN
1325
1326         /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1327         out1 = __SMUSD(C2, R) >> 16u;
1328         /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1329         out2 = __SMUADX(C2, R);
1330
1331 #else
1332
1333         /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1334         out1 = __SMUADX(R, C2) >> 16u;
1335         /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1336         out2 = __SMUSD(-C2, R);
1337
1338 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
1339
1340         /*  Reading i0+3fftLen/4 */
1341         /* Read yb (real), xb(imag) input */
1342         T = pSrc[i1];
1343
1344         /*  writing the butterfly processed i0 + fftLen/4 sample */
1345         /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1346         /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1347         pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1348
1349         /*  Butterfly calculations */
1350         /* Read yd (real), xd(imag) input */
1351         U = pSrc[i3];
1352
1353         /* T = packed(yb-yd, xb-xd) */
1354         T = __QSUB16(T, U);
1355
1356
1357 #ifndef ARM_MATH_BIG_ENDIAN
1358
1359         /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */
1360         R = __SHSAX(S, T);
1361
1362         /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */
1363         S = __SHASX(S, T);
1364         /*  Butterfly process for the i0+fftLen/2 sample */
1365         out1 = __SMUSD(C1, S) >> 16u;
1366         out2 = __SMUADX(C1, S);
1367
1368 #else
1369
1370         /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */
1371         R = __SHASX(S, T);
1372
1373         /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */
1374         S = __SHSAX(S, T);
1375         /*  Butterfly process for the i0+fftLen/2 sample */
1376         out1 = __SMUADX(S, C1) >> 16u;
1377         out2 = __SMUSD(-C1, S);
1378
1379 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
1380
1381         /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1382         /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1383         pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1384
1385         /*  Butterfly process for the i0+3fftLen/4 sample */
1386
1387 #ifndef ARM_MATH_BIG_ENDIAN
1388
1389         out1 = __SMUSD(C3, R) >> 16u;
1390         out2 = __SMUADX(C3, R);
1391
1392 #else
1393
1394         out1 = __SMUADX(C3, R) >> 16u;
1395         out2 = __SMUSD(-C3, R);
1396
1397 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
1398
1399         /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */
1400         /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */
1401         pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
1402
1403
1404       }
1405     }
1406     /*  Twiddle coefficients index modifier */
1407     twidCoefModifier <<= 2u;
1408   }
1409   /*  End of Middle stages process */
1410
1411
1412   /* data is in 10.6(q6) format for the 1024 point */
1413   /* data is in 8.8(q8) format for the 256 point   */
1414   /* data is in 6.10(q10) format for the 64 point  */
1415   /* data is in 4.12(q12) format for the 16 point  */
1416
1417   /* start of last stage process */
1418
1419
1420   /*  Initializations for the last stage */
1421   n1 = n2;
1422   n2 >>= 2u;
1423
1424   /*  Butterfly implementation */
1425   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
1426   {
1427     /*  index calculation for the input as, */
1428     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1429     i1 = i0 + n2;
1430     i2 = i1 + n2;
1431     i3 = i2 + n2;
1432
1433     /*  Reading i0, i0+fftLen/2 inputs */
1434     /* Read ya (real), xa(imag) input */
1435     T = pSrc[i0];
1436     /* Read yc (real), xc(imag) input */
1437     S = pSrc[i2];
1438
1439     /* R = packed((ya + yc), (xa + xc)) */
1440     R = __QADD16(T, S);
1441     /* S = packed((ya - yc), (xa - xc)) */
1442     S = __QSUB16(T, S);
1443
1444     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1445     /* Read yb (real), xb(imag) input */
1446     T = pSrc[i1];
1447     /* Read yd (real), xd(imag) input */
1448     U = pSrc[i3];
1449
1450     /* T = packed((yb + yd), (xb + xd)) */
1451     T = __QADD16(T, U);
1452
1453     /*  writing the butterfly processed i0 sample */
1454     /* xa' = xa + xb + xc + xd */
1455     /* ya' = ya + yb + yc + yd */
1456     pSrc[i0] = __SHADD16(R, T);
1457
1458     /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
1459     R = __SHSUB16(R, T);
1460
1461     /* Read yb (real), xb(imag) input */
1462     T = pSrc[i1];
1463
1464     /*  writing the butterfly processed i0 + fftLen/4 sample */
1465     /* xc' = (xa-xb+xc-xd) */
1466     /* yc' = (ya-yb+yc-yd) */
1467     pSrc[i1] = R;
1468
1469     /* Read yd (real), xd(imag) input */
1470     U = pSrc[i3];
1471     /* T = packed( (yb - yd), (xb - xd))  */
1472     T = __QSUB16(T, U);
1473
1474
1475 #ifndef ARM_MATH_BIG_ENDIAN
1476
1477     /*  writing the butterfly processed i0 + fftLen/2 sample */
1478     /* xb' = (xa-yb-xc+yd) */
1479     /* yb' = (ya+xb-yc-xd) */
1480     pSrc[i2] = __SHASX(S, T);
1481
1482     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
1483     /* xd' = (xa+yb-xc-yd) */
1484     /* yd' = (ya-xb-yc+xd) */
1485     pSrc[i3] = __SHSAX(S, T);
1486
1487
1488 #else
1489
1490     /*  writing the butterfly processed i0 + fftLen/2 sample */
1491     /* xb' = (xa-yb-xc+yd) */
1492     /* yb' = (ya+xb-yc-xd) */
1493     pSrc[i2] = __SHSAX(S, T);
1494
1495     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
1496     /* xd' = (xa+yb-xc-yd) */
1497     /* yd' = (ya-xb-yc+xd) */
1498     pSrc[i3] = __SHASX(S, T);
1499
1500 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
1501
1502   }
1503   /* end of last stage  process */
1504
1505   /* output is in 11.5(q5) format for the 1024 point */
1506   /* output is in 9.7(q7) format for the 256 point   */
1507   /* output is in 7.9(q9) format for the 64 point  */
1508   /* output is in 5.11(q11) format for the 16 point  */
1509
1510
1511 #else
1512
1513   /* Run the below code for Cortex-M0 */
1514
1515   q15_t R0, R1, S0, S1, T0, T1, U0, U1;
1516   q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
1517   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
1518
1519   /* Total process is divided into three stages */
1520
1521   /* process first stage, middle stages, & last stage */
1522
1523   /*  Initializations for the first stage */
1524   n2 = fftLen;
1525   n1 = n2;
1526
1527   /* n2 = fftLen/4 */
1528   n2 >>= 2u;
1529
1530   /* Index for twiddle coefficient */
1531   ic = 0u;
1532
1533   /* Index for input read and output write */
1534   i0 = 0u;
1535
1536   j = n2;
1537
1538   /* Input is in 1.15(q15) format */
1539
1540   /*  Start of first stage process */
1541   do
1542   {
1543     /*  Butterfly implementation */
1544
1545     /*  index calculation for the input as, */
1546     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1547     i1 = i0 + n2;
1548     i2 = i1 + n2;
1549     i3 = i2 + n2;
1550
1551     /*  Reading i0, i0+fftLen/2 inputs */
1552     /* input is down scale by 4 to avoid overflow */
1553     /* Read ya (real), xa(imag) input */
1554     T0 = pSrc16[i0 * 2u] >> 2u;
1555     T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
1556     /* input is down scale by 4 to avoid overflow */
1557     /* Read yc (real), xc(imag) input */
1558     S0 = pSrc16[i2 * 2u] >> 2u;
1559     S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
1560
1561     /* R0 = (ya + yc), R1 = (xa + xc) */
1562     R0 = __SSAT(T0 + S0, 16u);
1563     R1 = __SSAT(T1 + S1, 16u);
1564     /* S0 = (ya - yc), S1 = (xa - xc) */
1565     S0 = __SSAT(T0 - S0, 16u);
1566     S1 = __SSAT(T1 - S1, 16u);
1567
1568     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1569     /* input is down scale by 4 to avoid overflow */
1570     /* Read yb (real), xb(imag) input */
1571     T0 = pSrc16[i1 * 2u] >> 2u;
1572     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
1573     /* Read yd (real), xd(imag) input */
1574     /* input is down scale by 4 to avoid overflow */
1575     U0 = pSrc16[i3 * 2u] >> 2u;
1576     U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
1577
1578     /* T0 = (yb + yd), T1 = (xb + xd) */
1579     T0 = __SSAT(T0 + U0, 16u);
1580     T1 = __SSAT(T1 + U1, 16u);
1581
1582     /*  writing the butterfly processed i0 sample */
1583     /* xa' = xa + xb + xc + xd */
1584     /* ya' = ya + yb + yc + yd */
1585     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
1586     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
1587
1588     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
1589     R0 = __SSAT(R0 - T0, 16u);
1590     R1 = __SSAT(R1 - T1, 16u);
1591     /* co2 & si2 are read from Coefficient pointer */
1592     Co2 = pCoef16[2u * ic * 2u];
1593     Si2 = pCoef16[(2u * ic * 2u) + 1u];
1594     /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1595     out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16u);
1596     /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1597     out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16u);
1598
1599     /*  Reading i0+fftLen/4 */
1600     /* input is down scale by 4 to avoid overflow */
1601     /* T0 = yb, T1 = xb */
1602     T0 = pSrc16[i1 * 2u] >> 2u;
1603     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
1604
1605     /* writing the butterfly processed i0 + fftLen/4 sample */
1606     /* writing output(xc', yc') in little endian format */
1607     pSrc16[i1 * 2u] = out1;
1608     pSrc16[(i1 * 2u) + 1u] = out2;
1609
1610     /*  Butterfly calculations */
1611     /* input is down scale by 4 to avoid overflow */
1612     /* U0 = yd, U1 = xd) */
1613     U0 = pSrc16[i3 * 2u] >> 2u;
1614     U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
1615
1616     /* T0 = yb-yd, T1 = xb-xd) */
1617     T0 = __SSAT(T0 - U0, 16u);
1618     T1 = __SSAT(T1 - U1, 16u);
1619     /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1620     R0 = (short) __SSAT((q31_t) (S0 + T1), 16);
1621     R1 = (short) __SSAT((q31_t) (S1 - T0), 16);
1622     /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1623     S0 = (short) __SSAT((q31_t) (S0 - T1), 16);
1624     S1 = (short) __SSAT((q31_t) (S1 + T0), 16);
1625
1626     /* co1 & si1 are read from Coefficient pointer */
1627     Co1 = pCoef16[ic * 2u];
1628     Si1 = pCoef16[(ic * 2u) + 1u];
1629     /*  Butterfly process for the i0+fftLen/2 sample */
1630     /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1631     out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u);
1632     /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1633     out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u);
1634     /* writing output(xb', yb') in little endian format */
1635     pSrc16[i2 * 2u] = out1;
1636     pSrc16[(i2 * 2u) + 1u] = out2;
1637
1638     /* Co3 & si3 are read from Coefficient pointer */
1639     Co3 = pCoef16[3u * ic * 2u];
1640     Si3 = pCoef16[(3u * ic * 2u) + 1u];
1641     /*  Butterfly process for the i0+3fftLen/4 sample */
1642     /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1643     out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u);
1644     /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1645     out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u);
1646     /* writing output(xd', yd') in little endian format */
1647     pSrc16[i3 * 2u] = out1;
1648     pSrc16[(i3 * 2u) + 1u] = out2;
1649
1650     /*  Twiddle coefficients index modifier */
1651     ic = ic + twidCoefModifier;
1652
1653     /*  Updating input index */
1654     i0 = i0 + 1u;
1655
1656   } while(--j);
1657
1658   /*  End of first stage process */
1659
1660   /* data is in 4.11(q11) format */
1661
1662
1663   /*  Start of Middle stage process */
1664
1665   /*  Twiddle coefficients index modifier */
1666   twidCoefModifier <<= 2u;
1667
1668   /*  Calculation of Middle stage */
1669   for (k = fftLen / 4u; k > 4u; k >>= 2u)
1670   {
1671     /*  Initializations for the middle stage */
1672     n1 = n2;
1673     n2 >>= 2u;
1674     ic = 0u;
1675
1676     for (j = 0u; j <= (n2 - 1u); j++)
1677     {
1678       /*  index calculation for the coefficients */
1679       Co1 = pCoef16[ic * 2u];
1680       Si1 = pCoef16[(ic * 2u) + 1u];
1681       Co2 = pCoef16[2u * ic * 2u];
1682       Si2 = pCoef16[2u * ic * 2u + 1u];
1683       Co3 = pCoef16[3u * ic * 2u];
1684       Si3 = pCoef16[(3u * ic * 2u) + 1u];
1685
1686       /*  Twiddle coefficients index modifier */
1687       ic = ic + twidCoefModifier;
1688
1689       /*  Butterfly implementation */
1690       for (i0 = j; i0 < fftLen; i0 += n1)
1691       {
1692         /*  index calculation for the input as, */
1693         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1694         i1 = i0 + n2;
1695         i2 = i1 + n2;
1696         i3 = i2 + n2;
1697
1698         /*  Reading i0, i0+fftLen/2 inputs */
1699         /* Read ya (real), xa(imag) input */
1700         T0 = pSrc16[i0 * 2u];
1701         T1 = pSrc16[(i0 * 2u) + 1u];
1702
1703         /* Read yc (real), xc(imag) input */
1704         S0 = pSrc16[i2 * 2u];
1705         S1 = pSrc16[(i2 * 2u) + 1u];
1706
1707
1708         /* R0 = (ya + yc), R1 = (xa + xc) */
1709         R0 = __SSAT(T0 + S0, 16u);
1710         R1 = __SSAT(T1 + S1, 16u);
1711         /* S0 = (ya - yc), S1 = (xa - xc) */
1712         S0 = __SSAT(T0 - S0, 16u);
1713         S1 = __SSAT(T1 - S1, 16u);
1714
1715         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1716         /* Read yb (real), xb(imag) input */
1717         T0 = pSrc16[i1 * 2u];
1718         T1 = pSrc16[(i1 * 2u) + 1u];
1719
1720         /* Read yd (real), xd(imag) input */
1721         U0 = pSrc16[i3 * 2u];
1722         U1 = pSrc16[(i3 * 2u) + 1u];
1723
1724         /* T0 = (yb + yd), T1 = (xb + xd) */
1725         T0 = __SSAT(T0 + U0, 16u);
1726         T1 = __SSAT(T1 + U1, 16u);
1727
1728         /*  writing the butterfly processed i0 sample */
1729         /* xa' = xa + xb + xc + xd */
1730         /* ya' = ya + yb + yc + yd */
1731         pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
1732         pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
1733
1734         /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1735         R0 = (R0 >> 1u) - (T0 >> 1u);
1736         R1 = (R1 >> 1u) - (T1 >> 1u);
1737
1738         /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
1739         out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16);
1740         /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1741         out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16);
1742
1743         /*  Reading i0+3fftLen/4 */
1744         /* Read yb (real), xb(imag) input */
1745         T0 = pSrc16[i1 * 2u];
1746         T1 = pSrc16[(i1 * 2u) + 1u];
1747
1748         /*  writing the butterfly processed i0 + fftLen/4 sample */
1749         /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
1750         /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
1751         pSrc16[i1 * 2u] = out1;
1752         pSrc16[(i1 * 2u) + 1u] = out2;
1753
1754         /*  Butterfly calculations */
1755         /* Read yd (real), xd(imag) input */
1756         U0 = pSrc16[i3 * 2u];
1757         U1 = pSrc16[(i3 * 2u) + 1u];
1758
1759         /* T0 = yb-yd, T1 = xb-xd) */
1760         T0 = __SSAT(T0 - U0, 16u);
1761         T1 = __SSAT(T1 - U1, 16u);
1762
1763         /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
1764         R0 = (S0 >> 1u) + (T1 >> 1u);
1765         R1 = (S1 >> 1u) - (T0 >> 1u);
1766
1767         /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
1768         S0 = (S0 >> 1u) - (T1 >> 1u);
1769         S1 = (S1 >> 1u) + (T0 >> 1u);
1770
1771         /*  Butterfly process for the i0+fftLen/2 sample */
1772         out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u);
1773         out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u);
1774         /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
1775         /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
1776         pSrc16[i2 * 2u] = out1;
1777         pSrc16[(i2 * 2u) + 1u] = out2;
1778
1779         /*  Butterfly process for the i0+3fftLen/4 sample */
1780         out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u);
1781
1782         out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u);
1783         /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
1784         /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
1785         pSrc16[i3 * 2u] = out1;
1786         pSrc16[(i3 * 2u) + 1u] = out2;
1787
1788
1789       }
1790     }
1791     /*  Twiddle coefficients index modifier */
1792     twidCoefModifier <<= 2u;
1793   }
1794   /*  End of Middle stages process */
1795
1796
1797   /* data is in 10.6(q6) format for the 1024 point */
1798   /* data is in 8.8(q8) format for the 256 point   */
1799   /* data is in 6.10(q10) format for the 64 point  */
1800   /* data is in 4.12(q12) format for the 16 point  */
1801
1802   /* start of last stage process */
1803
1804
1805   /*  Initializations for the last stage */
1806   n1 = n2;
1807   n2 >>= 2u;
1808
1809   /*  Butterfly implementation */
1810   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
1811   {
1812     /*  index calculation for the input as, */
1813     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
1814     i1 = i0 + n2;
1815     i2 = i1 + n2;
1816     i3 = i2 + n2;
1817
1818     /*  Reading i0, i0+fftLen/2 inputs */
1819     /* Read ya (real), xa(imag) input */
1820     T0 = pSrc16[i0 * 2u];
1821     T1 = pSrc16[(i0 * 2u) + 1u];
1822     /* Read yc (real), xc(imag) input */
1823     S0 = pSrc16[i2 * 2u];
1824     S1 = pSrc16[(i2 * 2u) + 1u];
1825
1826     /* R0 = (ya + yc), R1 = (xa + xc) */
1827     R0 = __SSAT(T0 + S0, 16u);
1828     R1 = __SSAT(T1 + S1, 16u);
1829     /* S0 = (ya - yc), S1 = (xa - xc) */
1830     S0 = __SSAT(T0 - S0, 16u);
1831     S1 = __SSAT(T1 - S1, 16u);
1832
1833     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
1834     /* Read yb (real), xb(imag) input */
1835     T0 = pSrc16[i1 * 2u];
1836     T1 = pSrc16[(i1 * 2u) + 1u];
1837     /* Read yd (real), xd(imag) input */
1838     U0 = pSrc16[i3 * 2u];
1839     U1 = pSrc16[(i3 * 2u) + 1u];
1840
1841     /* T0 = (yb + yd), T1 = (xb + xd) */
1842     T0 = __SSAT(T0 + U0, 16u);
1843     T1 = __SSAT(T1 + U1, 16u);
1844
1845     /*  writing the butterfly processed i0 sample */
1846     /* xa' = xa + xb + xc + xd */
1847     /* ya' = ya + yb + yc + yd */
1848     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
1849     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
1850
1851     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
1852     R0 = (R0 >> 1u) - (T0 >> 1u);
1853     R1 = (R1 >> 1u) - (T1 >> 1u);
1854
1855     /* Read yb (real), xb(imag) input */
1856     T0 = pSrc16[i1 * 2u];
1857     T1 = pSrc16[(i1 * 2u) + 1u];
1858
1859     /*  writing the butterfly processed i0 + fftLen/4 sample */
1860     /* xc' = (xa-xb+xc-xd) */
1861     /* yc' = (ya-yb+yc-yd) */
1862     pSrc16[i1 * 2u] = R0;
1863     pSrc16[(i1 * 2u) + 1u] = R1;
1864
1865     /* Read yd (real), xd(imag) input */
1866     U0 = pSrc16[i3 * 2u];
1867     U1 = pSrc16[(i3 * 2u) + 1u];
1868     /* T0 = (yb - yd), T1 = (xb - xd) */
1869     T0 = __SSAT(T0 - U0, 16u);
1870     T1 = __SSAT(T1 - U1, 16u);
1871
1872     /*  writing the butterfly processed i0 + fftLen/2 sample */
1873     /* xb' = (xa-yb-xc+yd) */
1874     /* yb' = (ya+xb-yc-xd) */
1875     pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u);
1876     pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
1877
1878
1879     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
1880     /* xd' = (xa+yb-xc-yd) */
1881     /* yd' = (ya-xb-yc+xd) */
1882     pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u);
1883     pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
1884   }
1885   /* end of last stage  process */
1886
1887   /* output is in 11.5(q5) format for the 1024 point */
1888   /* output is in 9.7(q7) format for the 256 point   */
1889   /* output is in 7.9(q9) format for the 64 point  */
1890   /* output is in 5.11(q11) format for the 16 point  */
1891
1892 #endif /* #ifndef ARM_MATH_CM0 */
1893
1894 }
1895
1896
1897 /*
1898    * @brief  In-place bit reversal function.
1899    * @param[in, out] *pSrc        points to the in-place buffer of Q15 data type.
1900    * @param[in]      fftLen       length of the FFT.
1901    * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
1902    * @param[in]      *pBitRevTab  points to bit reversal table.
1903    * @return none.
1904  */
1905
1906 void arm_bitreversal_q15(
1907   q15_t * pSrc16,
1908   uint32_t fftLen,
1909   uint16_t bitRevFactor,
1910   uint16_t * pBitRevTab)
1911 {
1912   q31_t *pSrc = (q31_t *) pSrc16;
1913   q31_t in;
1914   uint32_t fftLenBy2, fftLenBy2p1;
1915   uint32_t i, j;
1916
1917   /*  Initializations */
1918   j = 0u;
1919   fftLenBy2 = fftLen / 2u;
1920   fftLenBy2p1 = (fftLen / 2u) + 1u;
1921
1922   /* Bit Reversal Implementation */
1923   for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
1924   {
1925     if(i < j)
1926     {
1927       /*  pSrc[i] <-> pSrc[j]; */
1928       /*  pSrc[i+1u] <-> pSrc[j+1u] */
1929       in = pSrc[i];
1930       pSrc[i] = pSrc[j];
1931       pSrc[j] = in;
1932
1933       /*  pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1];  */
1934       /*  pSrc[i + fftLenBy2p1+1u] <-> pSrc[j + fftLenBy2p1+1u] */
1935       in = pSrc[i + fftLenBy2p1];
1936       pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
1937       pSrc[j + fftLenBy2p1] = in;
1938     }
1939
1940     /*  pSrc[i+1u] <-> pSrc[j+fftLenBy2];         */
1941     /*  pSrc[i+2] <-> pSrc[j+fftLenBy2+1u]  */
1942     in = pSrc[i + 1u];
1943     pSrc[i + 1u] = pSrc[j + fftLenBy2];
1944     pSrc[j + fftLenBy2] = in;
1945
1946     /*  Reading the index for the bit reversal */
1947     j = *pBitRevTab;
1948
1949     /*  Updating the bit reversal index depending on the fft length  */
1950     pBitRevTab += bitRevFactor;
1951   }
1952 }