CMSIS DSP Software Library: arm_mult

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------   
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.   
00003 *   
00004 * $Date:        15. July 2011  
00005 * $Revision:    V1.0.10  
00006 *   
00007 * Project:      CMSIS DSP Library   
00008 * Title:        arm_mult_q31.c   
00009 *   
00010 * Description:  Q31 vector multiplication.   
00011 *   
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Version 1.0.10 2011/7/15 
00015 *    Big Endian support added and Merged M0 and M3/M4 Source code.  
00016 *   
00017 * Version 1.0.3 2010/11/29  
00018 *    Re-organized the CMSIS folders and updated documentation.   
00019 *    
00020 * Version 1.0.2 2010/11/11   
00021 *    Documentation updated.    
00022 *   
00023 * Version 1.0.1 2010/10/05    
00024 *    Production release and review comments incorporated.   
00025 *   
00026 * Version 1.0.0 2010/09/20    
00027 *    Production release and review comments incorporated.   
00028 *   
00029 * Version 0.0.5  2010/04/26    
00030 *    incorporated review comments and updated with latest CMSIS layer   
00031 *   
00032 * Version 0.0.3  2010/03/10    
00033 *    Initial version   
00034 * -------------------------------------------------------------------- */
00035 
00036 #include "arm_math.h"
00037 
00061 void arm_mult_q31(
00062   q31_t * pSrcA,
00063   q31_t * pSrcB,
00064   q31_t * pDst,
00065   uint32_t blockSize)
00066 {
00067   uint32_t blkCnt;                               /* loop counters */
00068 
00069 #ifndef ARM_MATH_CM0
00070 
00071 /* Run the below code for Cortex-M4 and Cortex-M3 */
00072   /* loop Unrolling */
00073   blkCnt = blockSize >> 2u;
00074 
00075   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
00076    ** a second loop below computes the remaining 1 to 3 samples. */
00077   while(blkCnt > 0u)
00078   {
00079     /* C = A * B */
00080     /* Multiply the inputs and then store the results in the destination buffer. */
00081     *pDst++ =
00082       (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
00083     *pDst++ =
00084       (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
00085     *pDst++ =
00086       (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
00087     *pDst++ =
00088       (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
00089 
00090     /* Decrement the blockSize loop counter */
00091     blkCnt--;
00092   }
00093 
00094   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
00095    ** No loop unrolling is used. */
00096   blkCnt = blockSize % 0x4u;
00097 
00098 #else
00099 
00100   /* Run the below code for Cortex-M0 */
00101 
00102   /* Initialize blkCnt with number of samples */
00103   blkCnt = blockSize;
00104 
00105 #endif /* #ifndef ARM_MATH_CM0 */
00106 
00107   while(blkCnt > 0u)
00108   {
00109     /* C = A * B */
00110     /* Multiply the inputs and then store the results in the destination buffer. */
00111     *pDst++ =
00112       (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
00113 
00114     /* Decrement the blockSize loop counter */
00115     blkCnt--;
00116   }
00117 }
00118