CMSIS DSP Software Library: arm_rms

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------   
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.   
00003 *   
00004 * $Date:        15. July 2011  
00005 * $Revision:    V1.0.10  
00006 *   
00007 * Project:      CMSIS DSP Library   
00008 * Title:        arm_rms_q15.c   
00009 *   
00010 * Description:  Root Mean Square of the elements of a Q15 vector. 
00011 *   
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Version 1.0.10 2011/7/15 
00015 *    Big Endian support added and Merged M0 and M3/M4 Source code.  
00016 *   
00017 * Version 1.0.3 2010/11/29  
00018 *    Re-organized the CMSIS folders and updated documentation.   
00019 *    
00020 * Version 1.0.2 2010/11/11   
00021 *    Documentation updated.    
00022 *   
00023 * Version 1.0.1 2010/10/05    
00024 *    Production release and review comments incorporated.   
00025 *   
00026 * Version 1.0.0 2010/09/20    
00027 *    Production release and review comments incorporated.   
00028 * ---------------------------------------------------------------------------- */
00029 
00030 #include "arm_math.h"
00031 
00059 void arm_rms_q15(
00060   q15_t * pSrc,
00061   uint32_t blockSize,
00062   q15_t * pResult)
00063 {
00064   q63_t sum = 0;                                 /* accumulator */
00065 
00066 #ifndef ARM_MATH_CM0
00067 
00068   /* Run the below code for Cortex-M4 and Cortex-M3 */
00069 
00070   q31_t in;                                      /* temporary variable to store the input value */
00071   q15_t in1;                                     /* temporary variable to store the input value */
00072   uint32_t blkCnt;                               /* loop counter */
00073 
00074   /* loop Unrolling */
00075   blkCnt = blockSize >> 2u;
00076 
00077   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
00078    ** a second loop below computes the remaining 1 to 3 samples. */
00079   while(blkCnt > 0u)
00080   {
00081     /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
00082     /* Compute sum of the squares and then store the results in a temporary variable, sum */
00083     in = *__SIMD32(pSrc)++;
00084     sum = __SMLALD(in, in, sum);
00085     in = *__SIMD32(pSrc)++;
00086     sum = __SMLALD(in, in, sum);
00087 
00088     /* Decrement the loop counter */
00089     blkCnt--;
00090   }
00091 
00092   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
00093    ** No loop unrolling is used. */
00094   blkCnt = blockSize % 0x4u;
00095 
00096   while(blkCnt > 0u)
00097   {
00098     /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
00099     /* Compute sum of the squares and then store the results in a temporary variable, sum */
00100     in1 = *pSrc++;
00101     sum = __SMLALD(in1, in1, sum);
00102 
00103     /* Decrement the loop counter */
00104     blkCnt--;
00105   }
00106 
00107   /* Truncating and saturating the accumulator to 1.15 format */
00108   sum = __SSAT((q31_t) (sum >> 15), 16);
00109 
00110   in1 = (q15_t) (sum / blockSize);
00111 
00112   /* Store the result in the destination */
00113   arm_sqrt_q15(in1, pResult);
00114 
00115 #else
00116 
00117   /* Run the below code for Cortex-M0 */
00118 
00119   q15_t in;                                      /* temporary variable to store the input value */
00120   uint32_t blkCnt;                               /* loop counter */
00121 
00122   /* Loop over blockSize number of values */
00123   blkCnt = blockSize;
00124 
00125   while(blkCnt > 0u)
00126   {
00127     /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
00128     /* Compute sum of the squares and then store the results in a temporary variable, sum */
00129     in = *pSrc++;
00130     sum += ((q31_t) in * in);
00131 
00132     /* Decrement the loop counter */
00133     blkCnt--;
00134   }
00135 
00136   /* Truncating and saturating the accumulator to 1.15 format */
00137   sum = __SSAT((q31_t) (sum >> 15), 16);
00138 
00139   in = (q15_t) (sum / blockSize);
00140 
00141   /* Store the result in the destination */
00142   arm_sqrt_q15(in, pResult);
00143 
00144 #endif /* #ifndef ARM_MATH_CM0 */
00145 
00146 }
00147