git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:                arm_cmplx_dot_prod_f32.c
   9 *
  10 * Description:  Floating-point complex dot product
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated.
  28 * ---------------------------------------------------------------------------- */
  29
  30 #include "arm_math.h"
  31
  32 /**
  33  * @ingroup groupCmplxMath
  34  */
  35
  36 /**
  37  * @defgroup cmplx_dot_prod Complex Dot Product
  38  *
  39  * Computes the dot product of two complex vectors.
  40  * The vectors are multiplied element-by-element and then summed.
  41  *
  42  * The <code>pSrcA</code> points to the first complex input vector and
  43  * <code>pSrcB</code> points to the second complex input vector.
  44  * <code>numSamples</code> specifies the number of complex samples
  45  * and the data in each array is stored in an interleaved fashion
  46  * (real, imag, real, imag, ...).
  47  * Each array has a total of <code>2*numSamples</code> values.
  48  *
  49  * The underlying algorithm is used:
  50  * <pre>
  51  * realResult=0;
  52  * imagResult=0;
  53  * for(n=0; n<numSamples; n++) {
  54  *     realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1];
  55  *     imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0];
  56  * }
  57  * </pre>
  58  *
  59  * There are separate functions for floating-point, Q15, and Q31 data types.
  60  */
  61
  62 /**
  63  * @addtogroup cmplx_dot_prod
  64  * @{
  65  */
  66
  67 /**
  68  * @brief  Floating-point complex dot product
  69  * @param  *pSrcA points to the first input vector
  70  * @param  *pSrcB points to the second input vector
  71  * @param  numSamples number of complex samples in each vector
  72  * @param  *realResult real part of the result returned here
  73  * @param  *imagResult imaginary part of the result returned here
  74  * @return none.
  75  */
  76
  77 void arm_cmplx_dot_prod_f32(
  78   float32_t * pSrcA,
  79   float32_t * pSrcB,
  80   uint32_t numSamples,
  81   float32_t * realResult,
  82   float32_t * imagResult)
  83 {
  84   float32_t real_sum = 0.0f, imag_sum = 0.0f;    /* Temporary result storage */
  85
  86 #ifndef ARM_MATH_CM0
  87
  88   /* Run the below code for Cortex-M4 and Cortex-M3 */
  89   uint32_t blkCnt;                               /* loop counter */
  90
  91   /*loop Unrolling */
  92   blkCnt = numSamples >> 2u;
  93
  94   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  95    ** a second loop below computes the remaining 1 to 3 samples. */
  96   while(blkCnt > 0u)
  97   {
  98     /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
  99     real_sum += (*pSrcA++) * (*pSrcB++);
 100     /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
 101     imag_sum += (*pSrcA++) * (*pSrcB++);
 102
 103     real_sum += (*pSrcA++) * (*pSrcB++);
 104     imag_sum += (*pSrcA++) * (*pSrcB++);
 105
 106     real_sum += (*pSrcA++) * (*pSrcB++);
 107     imag_sum += (*pSrcA++) * (*pSrcB++);
 108
 109     real_sum += (*pSrcA++) * (*pSrcB++);
 110     imag_sum += (*pSrcA++) * (*pSrcB++);
 111
 112     /* Decrement the loop counter */
 113     blkCnt--;
 114   }
 115
 116   /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
 117    ** No loop unrolling is used. */
 118   blkCnt = numSamples % 0x4u;
 119
 120   while(blkCnt > 0u)
 121   {
 122     /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
 123     real_sum += (*pSrcA++) * (*pSrcB++);
 124     /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
 125     imag_sum += (*pSrcA++) * (*pSrcB++);
 126
 127
 128     /* Decrement the loop counter */
 129     blkCnt--;
 130   }
 131
 132 #else
 133
 134   /* Run the below code for Cortex-M0 */
 135
 136   while(numSamples > 0u)
 137   {
 138     /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
 139     real_sum += (*pSrcA++) * (*pSrcB++);
 140     /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
 141     imag_sum += (*pSrcA++) * (*pSrcB++);
 142
 143
 144     /* Decrement the loop counter */
 145     numSamples--;
 146   }
 147
 148 #endif /* #ifndef ARM_MATH_CM0 */
 149
 150   /* Store the real and imaginary results in the destination buffers */
 151   *realResult = real_sum;
 152   *imagResult = imag_sum;
 153 }
 154
 155 /**
 156  * @} end of cmplx_dot_prod group
 157  */