CMSIS DSP Software Library: arm_biquad_cascade_df1

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------   
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.   
00003 *   
00004 * $Date:        15. July 2011  
00005 * $Revision:    V1.0.10  
00006 *   
00007 * Project:      CMSIS DSP Library   
00008 * Title:        arm_biquad_cascade_df1_q15.c   
00009 *   
00010 * Description:  Processing function for the   
00011 *               Q15 Biquad cascade DirectFormI(DF1) filter.   
00012 *   
00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00014 *  
00015 * Version 1.0.10 2011/7/15 
00016 *    Big Endian support added and Merged M0 and M3/M4 Source code.  
00017 *   
00018 * Version 1.0.3 2010/11/29  
00019 *    Re-organized the CMSIS folders and updated documentation.   
00020 *    
00021 * Version 1.0.2 2010/11/11   
00022 *    Documentation updated.    
00023 *   
00024 * Version 1.0.1 2010/10/05    
00025 *    Production release and review comments incorporated.   
00026 *   
00027 * Version 1.0.0 2010/09/20    
00028 *    Production release and review comments incorporated.   
00029 *   
00030 * Version 0.0.5  2010/04/26    
00031 *    incorporated review comments and updated with latest CMSIS layer   
00032 *   
00033 * Version 0.0.3  2010/03/10    
00034 *    Initial version   
00035 * -------------------------------------------------------------------- */
00036 
00037 #include "arm_math.h"
00038 
00070 void arm_biquad_cascade_df1_q15(
00071   const arm_biquad_casd_df1_inst_q15 * S,
00072   q15_t * pSrc,
00073   q15_t * pDst,
00074   uint32_t blockSize)
00075 {
00076 
00077 
00078 #ifndef ARM_MATH_CM0
00079 
00080   /* Run the below code for Cortex-M4 and Cortex-M3 */
00081 
00082   q15_t *pIn = pSrc;                             /*  Source pointer                               */
00083   q15_t *pOut = pDst;                            /*  Destination pointer                          */
00084   q31_t in;                                      /*  Temporary variable to hold input value       */
00085   q31_t out;                                     /*  Temporary variable to hold output value      */
00086   q31_t b0;                                      /*  Temporary variable to hold bo value          */
00087   q31_t b1, a1;                                  /*  Filter coefficients                          */
00088   q31_t state_in, state_out;                     /*  Filter state variables                       */
00089   q63_t acc;                                     /*  Accumulator                                  */
00090   int32_t shift = (15 - (int32_t) S->postShift); /*  Post shift                                   */
00091   q15_t *pState = S->pState;                     /*  State pointer                                */
00092   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
00093   q31_t *pState_q31;                             /*  32-bit state pointer for SIMD implementation */
00094   uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */
00095 
00096   do
00097   {
00098     /* Initialize state pointer of type q31 */
00099     pState_q31 = (q31_t *) (pState);
00100 
00101     /* Read the b0 and 0 coefficients using SIMD  */
00102     b0 = *__SIMD32(pCoeffs)++;
00103 
00104     /* Read the b1 and b2 coefficients using SIMD */
00105     b1 = *__SIMD32(pCoeffs)++;
00106 
00107     /* Read the a1 and a2 coefficients using SIMD */
00108     a1 = *__SIMD32(pCoeffs)++;
00109 
00110     /* Read the input state values from the state buffer:  x[n-1], x[n-2] */
00111     state_in = (q31_t) (*pState_q31++);
00112 
00113     /* Read the output state values from the state buffer:  y[n-1], y[n-2] */
00114     state_out = (q31_t) (*pState_q31);
00115 
00116     /* Apply loop unrolling and compute 2 output values simultaneously. */
00117     /*      The variable acc hold output values that are being computed:   
00118      *   
00119      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]   
00120      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]   
00121      */
00122     sample = blockSize >> 1u;
00123 
00124     /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.   
00125      ** a second loop below computes the remaining 1 sample. */
00126     while(sample > 0u)
00127     {
00128 
00129       /* Read the input */
00130       in = *__SIMD32(pIn)++;
00131 
00132       /* out =  b0 * x[n] + 0 * 0 */
00133       out = __SMUAD(b0, in);
00134 
00135       /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */
00136       acc = __SMLALD(b1, state_in, out);
00137       /* acc +=  a1 * y[n-1] +  a2 * y[n-2] */
00138       acc = __SMLALD(a1, state_out, acc);
00139 
00140       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00141       out = __SSAT((acc >> shift), 16);
00142 
00143       /* Every time after the output is computed state should be updated. */
00144       /* The states should be updated as:  */
00145       /* Xn2 = Xn1    */
00146       /* Xn1 = Xn     */
00147       /* Yn2 = Yn1    */
00148       /* Yn1 = acc   */
00149       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00150       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00151 
00152 #ifndef  ARM_MATH_BIG_ENDIAN
00153 
00154       state_in = __PKHBT(in, state_in, 16);
00155       state_out = __PKHBT(out, state_out, 16);
00156 
00157 #else
00158 
00159       state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
00160       state_out = __PKHBT(state_out >> 16, (out), 16);
00161 
00162 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00163 
00164       /* out =  b0 * x[n] + 0 * 0 */
00165       out = __SMUADX(b0, in);
00166       /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */
00167       acc = __SMLALD(b1, state_in, out);
00168       /* acc +=  a1 * y[n-1] + a2 * y[n-2] */
00169       acc = __SMLALD(a1, state_out, acc);
00170 
00171       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00172       out = __SSAT((acc >> shift), 16);
00173 
00174       /* Store the output in the destination buffer. */
00175 
00176 #ifndef  ARM_MATH_BIG_ENDIAN
00177 
00178       *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
00179 
00180 #else
00181 
00182       *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
00183 
00184 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00185 
00186       /* Every time after the output is computed state should be updated. */
00187       /* The states should be updated as:  */
00188       /* Xn2 = Xn1    */
00189       /* Xn1 = Xn     */
00190       /* Yn2 = Yn1    */
00191       /* Yn1 = acc   */
00192       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00193       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00194 #ifndef  ARM_MATH_BIG_ENDIAN
00195 
00196       state_in = __PKHBT(in >> 16, state_in, 16);
00197       state_out = __PKHBT(out, state_out, 16);
00198 
00199 #else
00200 
00201       state_in = __PKHBT(state_in >> 16, in, 16);
00202       state_out = __PKHBT(state_out >> 16, out, 16);
00203 
00204 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00205 
00206 
00207       /* Decrement the loop counter */
00208       sample--;
00209 
00210     }
00211 
00212     /* If the blockSize is not a multiple of 2, compute any remaining output samples here.   
00213      ** No loop unrolling is used. */
00214 
00215     if((blockSize & 0x1u) != 0u)
00216     {
00217       /* Read the input */
00218       in = *pIn++;
00219 
00220       /* out =  b0 * x[n] + 0 * 0 */
00221 
00222 #ifndef  ARM_MATH_BIG_ENDIAN
00223 
00224       out = __SMUAD(b0, in);
00225 
00226 #else
00227 
00228       out = __SMUADX(b0, in);
00229 
00230 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00231 
00232       /* acc =  b1 * x[n-1] + b2 * x[n-2] + out */
00233       acc = __SMLALD(b1, state_in, out);
00234       /* acc +=  a1 * y[n-1] + a2 * y[n-2] */
00235       acc = __SMLALD(a1, state_out, acc);
00236 
00237       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00238       out = __SSAT((acc >> shift), 16);
00239 
00240       /* Store the output in the destination buffer. */
00241       *pOut++ = (q15_t) out;
00242 
00243       /* Every time after the output is computed state should be updated. */
00244       /* The states should be updated as:  */
00245       /* Xn2 = Xn1    */
00246       /* Xn1 = Xn     */
00247       /* Yn2 = Yn1    */
00248       /* Yn1 = acc   */
00249       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00250       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00251 
00252 #ifndef  ARM_MATH_BIG_ENDIAN
00253 
00254       state_in = __PKHBT(in, state_in, 16);
00255       state_out = __PKHBT(out, state_out, 16);
00256 
00257 #else
00258 
00259       state_in = __PKHBT(state_in >> 16, in, 16);
00260       state_out = __PKHBT(state_out >> 16, out, 16);
00261 
00262 #endif /*   #ifndef  ARM_MATH_BIG_ENDIAN    */
00263 
00264     }
00265 
00266     /*  The first stage goes from the input wire to the output wire.  */
00267     /*  Subsequent numStages occur in-place in the output wire  */
00268     pIn = pDst;
00269 
00270     /* Reset the output pointer */
00271     pOut = pDst;
00272 
00273     /*  Store the updated state variables back into the state array */
00274     *__SIMD32(pState)++ = state_in;
00275     *__SIMD32(pState)++ = state_out;
00276 
00277 
00278     /* Decrement the loop counter */
00279     stage--;
00280 
00281   } while(stage > 0u);
00282 
00283 #else
00284 
00285   /* Run the below code for Cortex-M0 */
00286 
00287   q15_t *pIn = pSrc;                             /*  Source pointer                               */
00288   q15_t *pOut = pDst;                            /*  Destination pointer                          */
00289   q15_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
00290   q15_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
00291   q15_t Xn;                                      /*  temporary input               */
00292   q63_t acc;                                     /*  Accumulator                                  */
00293   int32_t shift = (15 - (int32_t) S->postShift); /*  Post shift                                   */
00294   q15_t *pState = S->pState;                     /*  State pointer                                */
00295   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
00296   uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */
00297 
00298   do
00299   {
00300     /* Reading the coefficients */
00301     b0 = *pCoeffs++;
00302     b1 = *pCoeffs++;
00303     b2 = *pCoeffs++;
00304     a1 = *pCoeffs++;
00305     a2 = *pCoeffs++;
00306 
00307     /* Reading the state values */
00308     Xn1 = pState[0];
00309     Xn2 = pState[1];
00310     Yn1 = pState[2];
00311     Yn2 = pState[3];
00312 
00313     /*      The variables acc holds the output value that is computed:        
00314      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]        
00315      */
00316 
00317     sample = blockSize;
00318 
00319     while(sample > 0u)
00320     {
00321       /* Read the input */
00322       Xn = *pIn++;
00323 
00324       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00325       /* acc =  b0 * x[n] */
00326       acc = (q31_t) b0 *Xn;
00327 
00328       /* acc +=  b1 * x[n-1] */
00329       acc += (q31_t) b1 *Xn1;
00330       /* acc +=  b[2] * x[n-2] */
00331       acc += (q31_t) b2 *Xn2;
00332       /* acc +=  a1 * y[n-1] */
00333       acc += (q31_t) a1 *Yn1;
00334       /* acc +=  a2 * y[n-2] */
00335       acc += (q31_t) a2 *Yn2;
00336 
00337       /* The result is converted to 1.31  */
00338       acc = __SSAT((acc >> shift), 16);
00339 
00340       /* Every time after the output is computed state should be updated. */
00341       /* The states should be updated as:  */
00342       /* Xn2 = Xn1    */
00343       /* Xn1 = Xn     */
00344       /* Yn2 = Yn1    */
00345       /* Yn1 = acc    */
00346       Xn2 = Xn1;
00347       Xn1 = Xn;
00348       Yn2 = Yn1;
00349       Yn1 = (q15_t) acc;
00350 
00351       /* Store the output in the destination buffer. */
00352       *pOut++ = (q15_t) acc;
00353 
00354       /* decrement the loop counter */
00355       sample--;
00356     }
00357 
00358     /*  The first stage goes from the input buffer to the output buffer. */
00359     /*  Subsequent stages occur in-place in the output buffer */
00360     pIn = pDst;
00361 
00362     /* Reset to destination pointer */
00363     pOut = pDst;
00364 
00365     /*  Store the updated state variables back into the pState array */
00366     *pState++ = Xn1;
00367     *pState++ = Xn2;
00368     *pState++ = Yn1;
00369     *pState++ = Yn2;
00370 
00371   } while(--stage);
00372 
00373 #endif /*     #ifndef ARM_MATH_CM0 */
00374 
00375 }
00376 
00377