1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010 ARM Limited. All rights reserved.
7 * Project: CMSIS DSP Library
8 * Title: arm_cmplx_dot_prod_q15.c
10 * Description: Processing function for the Q15 Complex Dot product
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Version 1.0.10 2011/7/15
15 * Big Endian support added and Merged M0 and M3/M4 Source code.
17 * Version 1.0.3 2010/11/29
18 * Re-organized the CMSIS folders and updated documentation.
20 * Version 1.0.2 2010/11/11
21 * Documentation updated.
23 * Version 1.0.1 2010/10/05
24 * Production release and review comments incorporated.
26 * Version 1.0.0 2010/09/20
27 * Production release and review comments incorporated.
28 * -------------------------------------------------------------------- */
33 * @ingroup groupCmplxMath
37 * @addtogroup cmplx_dot_prod
42 * @brief Q15 complex dot product
43 * @param *pSrcA points to the first input vector
44 * @param *pSrcB points to the second input vector
45 * @param numSamples number of complex samples in each vector
46 * @param *realResult real part of the result returned here
47 * @param *imagResult imaginary part of the result returned here
50 * <b>Scaling and Overflow Behavior:</b>
52 * The function is implemented using an internal 64-bit accumulator.
53 * The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.
54 * These are accumulated in a 64-bit accumulator with 34.30 precision.
55 * As a final step, the accumulators are converted to 8.24 format.
56 * The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.
59 void arm_cmplx_dot_prod_q15(
66 q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
70 /* Run the below code for Cortex-M4 and Cortex-M3 */
71 uint32_t blkCnt; /* loop counter */
75 blkCnt = numSamples >> 2u;
77 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
78 ** a second loop below computes the remaining 1 to 3 samples. */
81 /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
82 real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
84 /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
85 imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
87 real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
88 imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
90 real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
91 imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
93 real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
94 imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
96 /* Decrement the loop counter */
100 /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
101 ** No loop unrolling is used. */
102 blkCnt = numSamples % 0x4u;
106 /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
107 real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
108 /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
109 imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
111 /* Decrement the loop counter */
117 /* Run the below code for Cortex-M0 */
119 while(numSamples > 0u)
121 /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */
122 real_sum += ((q31_t) * pSrcA++ * *pSrcB++);
123 /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */
124 imag_sum += ((q31_t) * pSrcA++ * *pSrcB++);
126 /* Decrement the loop counter */
130 #endif /* #ifndef ARM_MATH_CM0 */
132 /* Store the real and imaginary results in 8.24 format */
133 /* Convert real data in 34.30 to 8.24 by 6 right shifts */
134 *realResult = (q31_t) (real_sum) >> 6;
135 /* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */
136 *imagResult = (q31_t) (imag_sum) >> 6;
140 * @} end of cmplx_dot_prod group