CMSIS DSP Software Library: arm_fir

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------   
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.   
00003 *   
00004 * $Date:        15. July 2011  
00005 * $Revision:    V1.0.10  
00006 *   
00007 * Project:      CMSIS DSP Library   
00008 * Title:        arm_fir_q7.c   
00009 *   
00010 * Description:  Q7 FIR filter processing function.   
00011 *   
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Version 1.0.10 2011/7/15 
00015 *    Big Endian support added and Merged M0 and M3/M4 Source code.  
00016 *   
00017 * Version 1.0.3 2010/11/29  
00018 *    Re-organized the CMSIS folders and updated documentation.   
00019 *    
00020 * Version 1.0.2 2010/11/11   
00021 *    Documentation updated.    
00022 *   
00023 * Version 1.0.1 2010/10/05    
00024 *    Production release and review comments incorporated.   
00025 *   
00026 * Version 1.0.0 2010/09/20    
00027 *    Production release and review comments incorporated.   
00028 *   
00029 * Version 0.0.5  2010/04/26    
00030 *    incorporated review comments and updated with latest CMSIS layer   
00031 *   
00032 * Version 0.0.3  2010/03/10    
00033 *    Initial version   
00034 * -------------------------------------------------------------------- */
00035 
00036 #include "arm_math.h"
00037 
00064 void arm_fir_q7(
00065   const arm_fir_instance_q7 * S,
00066   q7_t * pSrc,
00067   q7_t * pDst,
00068   uint32_t blockSize)
00069 {
00070 
00071 #ifndef ARM_MATH_CM0
00072 
00073   /* Run the below code for Cortex-M4 and Cortex-M3 */
00074 
00075   q7_t *pState = S->pState;                      /* State pointer */
00076   q7_t *pCoeffs = S->pCoeffs;                    /* Coefficient pointer */
00077   q7_t *pStateCurnt;                             /* Points to the current sample of the state */
00078   q7_t x0, x1, x2, x3;                           /* Temporary variables to hold state */
00079   q7_t c0;                                       /* Temporary variable to hold coefficient value */
00080   q7_t *px;                                      /* Temporary pointer for state */
00081   q7_t *pb;                                      /* Temporary pointer for coefficient buffer */
00082   q31_t acc0, acc1, acc2, acc3;                  /* Accumulators */
00083   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
00084   uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
00085 
00086   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
00087   /* pStateCurnt points to the location where the new input data should be written */
00088   pStateCurnt = &(S->pState[(numTaps - 1u)]);
00089 
00090   /* Apply loop unrolling and compute 4 output values simultaneously.   
00091    * The variables acc0 ... acc3 hold output values that are being computed:   
00092    *   
00093    *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]   
00094    *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]   
00095    *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]   
00096    *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]   
00097    */
00098   blkCnt = blockSize >> 2;
00099 
00100   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
00101    ** a second loop below computes the remaining 1 to 3 samples. */
00102   while(blkCnt > 0u)
00103   {
00104     /* Copy four new input samples into the state buffer */
00105     *pStateCurnt++ = *pSrc++;
00106     *pStateCurnt++ = *pSrc++;
00107     *pStateCurnt++ = *pSrc++;
00108     *pStateCurnt++ = *pSrc++;
00109 
00110     /* Set all accumulators to zero */
00111     acc0 = 0;
00112     acc1 = 0;
00113     acc2 = 0;
00114     acc3 = 0;
00115 
00116     /* Initialize state pointer */
00117     px = pState;
00118 
00119     /* Initialize coefficient pointer */
00120     pb = pCoeffs;
00121 
00122     /* Read the first three samples from the state buffer:   
00123      *  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
00124     x0 = *(px++);
00125     x1 = *(px++);
00126     x2 = *(px++);
00127 
00128     /* Loop unrolling.  Process 4 taps at a time. */
00129     tapCnt = numTaps >> 2;
00130     i = tapCnt;
00131 
00132     while(i > 0u)
00133     {
00134       /* Read the b[numTaps] coefficient */
00135       c0 = *(pb++);
00136 
00137       /* Read x[n-numTaps-3] sample */
00138       x3 = *(px++);
00139 
00140       /* acc0 +=  b[numTaps] * x[n-numTaps] */
00141       acc0 += ((q15_t) x0 * c0);
00142 
00143       /* acc1 +=  b[numTaps] * x[n-numTaps-1] */
00144       acc1 += ((q15_t) x1 * c0);
00145 
00146       /* acc2 +=  b[numTaps] * x[n-numTaps-2] */
00147       acc2 += ((q15_t) x2 * c0);
00148 
00149       /* acc3 +=  b[numTaps] * x[n-numTaps-3] */
00150       acc3 += ((q15_t) x3 * c0);
00151 
00152       /* Read the b[numTaps-1] coefficient */
00153       c0 = *(pb++);
00154 
00155       /* Read x[n-numTaps-4] sample */
00156       x0 = *(px++);
00157 
00158       /* Perform the multiply-accumulates */
00159       acc0 += ((q15_t) x1 * c0);
00160       acc1 += ((q15_t) x2 * c0);
00161       acc2 += ((q15_t) x3 * c0);
00162       acc3 += ((q15_t) x0 * c0);
00163 
00164       /* Read the b[numTaps-2] coefficient */
00165       c0 = *(pb++);
00166 
00167       /* Read x[n-numTaps-5] sample */
00168       x1 = *(px++);
00169 
00170       /* Perform the multiply-accumulates */
00171       acc0 += ((q15_t) x2 * c0);
00172       acc1 += ((q15_t) x3 * c0);
00173       acc2 += ((q15_t) x0 * c0);
00174       acc3 += ((q15_t) x1 * c0);
00175       /* Read the b[numTaps-3] coefficients */
00176       c0 = *(pb++);
00177 
00178       /* Read x[n-numTaps-6] sample */
00179       x2 = *(px++);
00180 
00181       /* Perform the multiply-accumulates */
00182       acc0 += ((q15_t) x3 * c0);
00183       acc1 += ((q15_t) x0 * c0);
00184       acc2 += ((q15_t) x1 * c0);
00185       acc3 += ((q15_t) x2 * c0);
00186       i--;
00187     }
00188 
00189     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00190 
00191     i = numTaps - (tapCnt * 4u);
00192     while(i > 0u)
00193     {
00194       /* Read coefficients */
00195       c0 = *(pb++);
00196 
00197       /* Fetch 1 state variable */
00198       x3 = *(px++);
00199 
00200       /* Perform the multiply-accumulates */
00201       acc0 += ((q15_t) x0 * c0);
00202       acc1 += ((q15_t) x1 * c0);
00203       acc2 += ((q15_t) x2 * c0);
00204       acc3 += ((q15_t) x3 * c0);
00205 
00206       /* Reuse the present sample states for next sample */
00207       x0 = x1;
00208       x1 = x2;
00209       x2 = x3;
00210 
00211       /* Decrement the loop counter */
00212       i--;
00213     }
00214 
00215     /* Advance the state pointer by 4 to process the next group of 4 samples */
00216     pState = pState + 4;
00217 
00218     /* The results in the 4 accumulators are in 2.62 format.  Convert to 1.31   
00219      ** Then store the 4 outputs in the destination buffer. */
00220     acc0 = __SSAT((acc0 >> 7u), 8);
00221     *pDst++ = acc0;
00222     acc1 = __SSAT((acc1 >> 7u), 8);
00223     *pDst++ = acc1;
00224     acc2 = __SSAT((acc2 >> 7u), 8);
00225     *pDst++ = acc2;
00226     acc3 = __SSAT((acc3 >> 7u), 8);
00227     *pDst++ = acc3;
00228 
00229     /* Decrement the samples loop counter */
00230     blkCnt--;
00231   }
00232 
00233 
00234   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
00235    ** No loop unrolling is used. */
00236   blkCnt = blockSize % 4u;
00237 
00238   while(blkCnt > 0u)
00239   {
00240     /* Copy one sample at a time into state buffer */
00241     *pStateCurnt++ = *pSrc++;
00242 
00243     /* Set the accumulator to zero */
00244     acc0 = 0;
00245 
00246     /* Initialize state pointer */
00247     px = pState;
00248 
00249     /* Initialize Coefficient pointer */
00250     pb = (pCoeffs);
00251 
00252     i = numTaps;
00253 
00254     /* Perform the multiply-accumulates */
00255     do
00256     {
00257       acc0 += (q15_t) * (px++) * (*(pb++));
00258       i--;
00259     } while(i > 0u);
00260 
00261     /* The result is in 2.14 format.  Convert to 1.7   
00262      ** Then store the output in the destination buffer. */
00263     *pDst++ = __SSAT((acc0 >> 7u), 8);
00264 
00265     /* Advance state pointer by 1 for the next sample */
00266     pState = pState + 1;
00267 
00268     /* Decrement the samples loop counter */
00269     blkCnt--;
00270   }
00271 
00272   /* Processing is complete.   
00273    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.   
00274    ** This prepares the state buffer for the next function call. */
00275 
00276   /* Points to the start of the state buffer */
00277   pStateCurnt = S->pState;
00278 
00279   tapCnt = (numTaps - 1u) >> 2u;
00280 
00281   /* copy data */
00282   while(tapCnt > 0u)
00283   {
00284     *pStateCurnt++ = *pState++;
00285     *pStateCurnt++ = *pState++;
00286     *pStateCurnt++ = *pState++;
00287     *pStateCurnt++ = *pState++;
00288 
00289     /* Decrement the loop counter */
00290     tapCnt--;
00291   }
00292 
00293   /* Calculate remaining number of copies */
00294   tapCnt = (numTaps - 1u) % 0x4u;
00295 
00296   /* Copy the remaining q31_t data */
00297   while(tapCnt > 0u)
00298   {
00299     *pStateCurnt++ = *pState++;
00300 
00301     /* Decrement the loop counter */
00302     tapCnt--;
00303   }
00304 
00305 #else
00306 
00307 /* Run the below code for Cortex-M0 */
00308 
00309   uint32_t numTaps = S->numTaps;                 /* Number of taps in the filter */
00310   uint32_t i, blkCnt;                            /* Loop counters */
00311   q7_t *pState = S->pState;                      /* State pointer */
00312   q7_t *pCoeffs = S->pCoeffs;                    /* Coefficient pointer */
00313   q7_t *px, *pb;                                 /* Temporary pointers to state and coeff */
00314   q31_t acc = 0;                                 /* Accumlator */
00315   q7_t *pStateCurnt;                             /* Points to the current sample of the state */
00316 
00317 
00318   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
00319   /* pStateCurnt points to the location where the new input data should be written */
00320   pStateCurnt = S->pState + (numTaps - 1u);
00321 
00322   /* Initialize blkCnt with blockSize */
00323   blkCnt = blockSize;
00324 
00325   /* Perform filtering upto BlockSize - BlockSize%4  */
00326   while(blkCnt > 0u)
00327   {
00328     /* Copy one sample at a time into state buffer */
00329     *pStateCurnt++ = *pSrc++;
00330 
00331     /* Set accumulator to zero */
00332     acc = 0;
00333 
00334     /* Initialize state pointer of type q7 */
00335     px = pState;
00336 
00337     /* Initialize coeff pointer of type q7 */
00338     pb = pCoeffs;
00339 
00340 
00341     i = numTaps;
00342 
00343     while(i > 0u)
00344     {
00345       /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
00346       acc += (q15_t) * px++ * *pb++;
00347       i--;
00348     }
00349 
00350     /* Store the 1.7 format filter output in destination buffer */
00351     *pDst++ = (q7_t) __SSAT((acc >> 7), 8);
00352 
00353     /* Advance the state pointer by 1 to process the next sample */
00354     pState = pState + 1;
00355 
00356     /* Decrement the loop counter */
00357     blkCnt--;
00358   }
00359 
00360   /* Processing is complete.        
00361    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.      
00362    ** This prepares the state buffer for the next function call. */
00363 
00364 
00365   /* Points to the start of the state buffer */
00366   pStateCurnt = S->pState;
00367 
00368 
00369   /* Copy numTaps number of values */
00370   i = (numTaps - 1u);
00371 
00372   /* Copy q7_t data */
00373   while(i > 0u)
00374   {
00375     *pStateCurnt++ = *pState++;
00376     i--;
00377   }
00378 
00379 #endif /*   #ifndef ARM_MATH_CM0 */
00380 
00381 }
00382