CMSIS DSP Software Library: arm_sub

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------   
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.   
00003 *   
00004 * $Date:        15. July 2011  
00005 * $Revision:    V1.0.10  
00006 *   
00007 * Project:      CMSIS DSP Library   
00008 * Title:        arm_sub_q15.c   
00009 *   
00010 * Description:  Q15 vector subtraction.   
00011 *   
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Version 1.0.10 2011/7/15 
00015 *    Big Endian support added and Merged M0 and M3/M4 Source code.  
00016 *   
00017 * Version 1.0.3 2010/11/29  
00018 *    Re-organized the CMSIS folders and updated documentation.   
00019 *    
00020 * Version 1.0.2 2010/11/11   
00021 *    Documentation updated.    
00022 *   
00023 * Version 1.0.1 2010/10/05    
00024 *    Production release and review comments incorporated.   
00025 *   
00026 * Version 1.0.0 2010/09/20    
00027 *    Production release and review comments incorporated.   
00028 *   
00029 * Version 0.0.7  2010/06/10    
00030 *    Misra-C changes done   
00031 * -------------------------------------------------------------------- */
00032 
00033 #include "arm_math.h"
00034 
00058 void arm_sub_q15(
00059   q15_t * pSrcA,
00060   q15_t * pSrcB,
00061   q15_t * pDst,
00062   uint32_t blockSize)
00063 {
00064   uint32_t blkCnt;                               /* loop counter */
00065 
00066 
00067 #ifndef ARM_MATH_CM0
00068 
00069 /* Run the below code for Cortex-M4 and Cortex-M3 */
00070   /*loop Unrolling */
00071   blkCnt = blockSize >> 2u;
00072 
00073   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
00074    ** a second loop below computes the remaining 1 to 3 samples. */
00075   while(blkCnt > 0u)
00076   {
00077     /* C = A - B */
00078     /* Subtract and then store the results in the destination buffer two samples at a time. */
00079     *__SIMD32(pDst)++ = __QSUB16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
00080     *__SIMD32(pDst)++ = __QSUB16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
00081 
00082     /* Decrement the loop counter */
00083     blkCnt--;
00084   }
00085 
00086   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
00087    ** No loop unrolling is used. */
00088   blkCnt = blockSize % 0x4u;
00089 
00090   while(blkCnt > 0u)
00091   {
00092     /* C = A - B */
00093     /* Subtract and then store the result in the destination buffer. */
00094     *pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++);
00095 
00096     /* Decrement the loop counter */
00097     blkCnt--;
00098   }
00099 
00100 #else
00101 
00102   /* Run the below code for Cortex-M0 */
00103 
00104   /* Initialize blkCnt with number of samples */
00105   blkCnt = blockSize;
00106 
00107   while(blkCnt > 0u)
00108   {
00109     /* C = A - B */
00110     /* Subtract and then store the result in the destination buffer. */
00111     *pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ - *pSrcB++), 16);
00112 
00113     /* Decrement the loop counter */
00114     blkCnt--;
00115   }
00116 
00117 #endif /* #ifndef ARM_MATH_CM0 */
00118 
00119 
00120 }
00121