1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010 ARM Limited. All rights reserved.
7 * Project: CMSIS DSP Library
8 * Title: arm_cmplx_mult_cmplx_q31.c
10 * Description: Q31 complex-by-complex multiplication
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Version 1.0.10 2011/7/15
15 * Big Endian support added and Merged M0 and M3/M4 Source code.
17 * Version 1.0.3 2010/11/29
18 * Re-organized the CMSIS folders and updated documentation.
20 * Version 1.0.2 2010/11/11
21 * Documentation updated.
23 * Version 1.0.1 2010/10/05
24 * Production release and review comments incorporated.
26 * Version 1.0.0 2010/09/20
27 * Production release and review comments incorporated.
28 * -------------------------------------------------------------------- */
33 * @ingroup groupCmplxMath
37 * @addtogroup CmplxByCmplxMult
43 * @brief Q31 complex-by-complex multiplication
44 * @param[in] *pSrcA points to the first input vector
45 * @param[in] *pSrcB points to the second input vector
46 * @param[out] *pDst points to the output vector
47 * @param[in] numSamples number of complex samples in each vector
50 * <b>Scaling and Overflow Behavior:</b>
52 * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
53 * Input down scaling is not required.
56 void arm_cmplx_mult_cmplx_q31(
62 q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
63 uint32_t blkCnt; /* loop counters */
67 /* Run the below code for Cortex-M4 and Cortex-M3 */
70 blkCnt = numSamples >> 2u;
72 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
73 ** a second loop below computes the remaining 1 to 3 samples. */
76 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
77 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
83 /* store the real result in 3.29 format in the destination buffer. */
84 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33));
85 /* store the imag result in 3.29 format in the destination buffer. */
86 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33));
93 /* store the result in 3.29 format in the destination buffer. */
94 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33));
95 /* store the result in 3.29 format in the destination buffer. */
96 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33));
103 /* store the result in 3.29 format in the destination buffer. */
104 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33));
105 /* store the result in 3.29 format in the destination buffer. */
106 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33));
113 /* store the result in 3.29 format in the destination buffer. */
114 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33));
115 /* store the result in 3.29 format in the destination buffer. */
116 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33));
118 /* Decrement the blockSize loop counter */
122 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
123 ** No loop unrolling is used. */
124 blkCnt = numSamples % 0x4u;
128 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
129 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
135 /* store the result in 3.29 format in the destination buffer. */
136 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33));
137 /* store the result in 3.29 format in the destination buffer. */
138 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33));
140 /* Decrement the blockSize loop counter */
146 /* Run the below code for Cortex-M0 */
149 blkCnt = numSamples >> 1u;
151 /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
152 ** a second loop below computes the remaining 1 sample. */
155 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
156 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
162 /* store the real result in 3.29 format in the destination buffer. */
163 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33));
164 /* store the imag result in 3.29 format in the destination buffer. */
165 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33));
172 /* store the result in 3.29 format in the destination buffer. */
173 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33));
174 /* store the result in 3.29 format in the destination buffer. */
175 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33));
177 /* Decrement the blockSize loop counter */
181 /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
182 ** No loop unrolling is used. */
183 blkCnt = numSamples % 0x2u;
187 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
188 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
194 /* store the result in 3.29 format in the destination buffer. */
195 *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33));
196 /* store the result in 3.29 format in the destination buffer. */
197 *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33));
199 /* Decrement the blockSize loop counter */
203 #endif /* #ifndef ARM_MATH_CM0 */
208 * @} end of CmplxByCmplxMult group