CMSIS DSP Software Library: arm_dot_prod

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------   
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.   
00003 *   
00004 * $Date:        15. July 2011  
00005 * $Revision:    V1.0.10  
00006 *   
00007 * Project:      CMSIS DSP Library   
00008 * Title:        arm_dot_prod_q31.c   
00009 *   
00010 * Description:  Q31 dot product.   
00011 *   
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Version 1.0.10 2011/7/15 
00015 *    Big Endian support added and Merged M0 and M3/M4 Source code.  
00016 *   
00017 * Version 1.0.3 2010/11/29  
00018 *    Re-organized the CMSIS folders and updated documentation.   
00019 *    
00020 * Version 1.0.2 2010/11/11   
00021 *    Documentation updated.    
00022 *   
00023 * Version 1.0.1 2010/10/05    
00024 *    Production release and review comments incorporated.   
00025 *   
00026 * Version 1.0.0 2010/09/20    
00027 *    Production release and review comments incorporated.   
00028 *   
00029 * Version 0.0.7  2010/06/10    
00030 *    Misra-C changes done   
00031 * -------------------------------------------------------------------- */
00032 
00033 #include "arm_math.h"
00034 
00062 void arm_dot_prod_q31(
00063   q31_t * pSrcA,
00064   q31_t * pSrcB,
00065   uint32_t blockSize,
00066   q63_t * result)
00067 {
00068   q63_t sum = 0;                                 /* Temporary result storage */
00069   uint32_t blkCnt;                               /* loop counter */
00070 
00071 
00072 #ifndef ARM_MATH_CM0
00073 
00074 /* Run the below code for Cortex-M4 and Cortex-M3 */
00075 
00076   /*loop Unrolling */
00077   blkCnt = blockSize >> 2u;
00078 
00079   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
00080    ** a second loop below computes the remaining 1 to 3 samples. */
00081   while(blkCnt > 0u)
00082   {
00083     /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
00084     /* Calculate dot product and then store the result in a temporary buffer. */
00085     sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
00086     sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
00087     sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
00088     sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
00089 
00090     /* Decrement the loop counter */
00091     blkCnt--;
00092   }
00093 
00094   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
00095    ** No loop unrolling is used. */
00096   blkCnt = blockSize % 0x4u;
00097 
00098 #else
00099 
00100   /* Run the below code for Cortex-M0 */
00101 
00102   /* Initialize blkCnt with number of samples */
00103   blkCnt = blockSize;
00104 
00105 #endif /* #ifndef ARM_MATH_CM0 */
00106 
00107 
00108   while(blkCnt > 0u)
00109   {
00110     /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
00111     /* Calculate dot product and then store the result in a temporary buffer. */
00112     sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
00113 
00114     /* Decrement the loop counter */
00115     blkCnt--;
00116   }
00117 
00118   /* Store the result in the destination buffer in 16.48 format */
00119   *result = sum;
00120 }
00121