git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_biquad_cascade_df1_f32.c
   9 *
  10 * Description:  Processing function for the
  11 *               floating-point Biquad cascade DirectFormI(DF1) filter.
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14 *
  15 * Version 1.0.10 2011/7/15
  16 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  17 *
  18 * Version 1.0.3 2010/11/29
  19 *    Re-organized the CMSIS folders and updated documentation.
  20 *
  21 * Version 1.0.2 2010/11/11
  22 *    Documentation updated.
  23 *
  24 * Version 1.0.1 2010/10/05
  25 *    Production release and review comments incorporated.
  26 *
  27 * Version 1.0.0 2010/09/20
  28 *    Production release and review comments incorporated.
  29 *
  30 * Version 0.0.5  2010/04/26
  31 *        incorporated review comments and updated with latest CMSIS layer
  32 *
  33 * Version 0.0.3  2010/03/10
  34 *    Initial version
  35 * -------------------------------------------------------------------- */
  36
  37 #include "arm_math.h"
  38
  39 /**
  40  * @ingroup groupFilters
  41  */
  42
  43 /**
  44  * @defgroup BiquadCascadeDF1 Biquad Cascade IIR Filters Using Direct Form I Structure
  45  *
  46  * This set of functions implements arbitrary order recursive (IIR) filters.
  47  * The filters are implemented as a cascade of second order Biquad sections.
  48  * The functions support Q15, Q31 and floating-point data types.
  49  * Fast version of Q15 and Q31 also supported on CortexM4 and Cortex-M3.
  50  *
  51  * The functions operate on blocks of input and output data and each call to the function
  52  * processes <code>blockSize</code> samples through the filter.
  53  * <code>pSrc</code> points to the array of input data and
  54  * <code>pDst</code> points to the array of output data.
  55  * Both arrays contain <code>blockSize</code> values.
  56  *
  57  * \par Algorithm
  58  * Each Biquad stage implements a second order filter using the difference equation:
  59  * <pre>
  60  *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  61  * </pre>
  62  * A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.
  63  * \image html Biquad.gif "Single Biquad filter stage"
  64  * Coefficients <code>b0, b1 and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
  65  * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
  66  * Pay careful attention to the sign of the feedback coefficients.
  67  * Some design tools use the difference equation
  68  * <pre>
  69  *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]
  70  * </pre>
  71  * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
  72  *
  73  * \par
  74  * Higher order filters are realized as a cascade of second order sections.
  75  * <code>numStages</code> refers to the number of second order stages used.
  76  * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
  77  * \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"
  78  * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
  79  *
  80  * \par
  81  * The <code>pState</code> points to state variables array.
  82  * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
  83  * The state variables are arranged in the <code>pState</code> array as:
  84  * <pre>
  85  *     {x[n-1], x[n-2], y[n-1], y[n-2]}
  86  * </pre>
  87  *
  88  * \par
  89  * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
  90  * The state array has a total length of <code>4*numStages</code> values.
  91  * The state variables are updated after each block of data is processed, the coefficients are untouched.
  92  *
  93  * \par Instance Structure
  94  * The coefficients and state variables for a filter are stored together in an instance data structure.
  95  * A separate instance structure must be defined for each filter.
  96  * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
  97  * There are separate instance structure declarations for each of the 3 supported data types.
  98  *
  99  * \par Init Functions
 100  * There is also an associated initialization function for each data type.
 101  * The initialization function performs following operations:
 102  * - Sets the values of the internal structure fields.
 103  * - Zeros out the values in the state buffer.
 104  *
 105  * \par
 106  * Use of the initialization function is optional.
 107  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
 108  * To place an instance structure into a const data section, the instance structure must be manually initialized.
 109  * Set the values in the state buffer to zeros before static initialization.
 110  * The code below statically initializes each of the 3 different data type filter instance structures
 111  * <pre>
 112  *     arm_biquad_casd_df1_inst_f32 S1 = {numStages, pState, pCoeffs};
 113  *     arm_biquad_casd_df1_inst_q15 S2 = {numStages, pState, pCoeffs, postShift};
 114  *     arm_biquad_casd_df1_inst_q31 S3 = {numStages, pState, pCoeffs, postShift};
 115  * </pre>
 116  * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;
 117  * <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied.
 118  *
 119  * \par Fixed-Point Behavior
 120  * Care must be taken when using the Q15 and Q31 versions of the Biquad Cascade filter functions.
 121  * Following issues must be considered:
 122  * - Scaling of coefficients
 123  * - Filter gain
 124  * - Overflow and saturation
 125  *
 126  * \par
 127  * <b>Scaling of coefficients: </b>
 128  * Filter coefficients are represented as fractional values and
 129  * coefficients are restricted to lie in the range <code>[-1 +1)</code>.
 130  * The fixed-point functions have an additional scaling parameter <code>postShift</code>
 131  * which allow the filter coefficients to exceed the range <code>[+1 -1)</code>.
 132  * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
 133  * \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"
 134  * This essentially scales the filter coefficients by <code>2^postShift</code>.
 135  * For example, to realize the coefficients
 136  * <pre>
 137  *    {1.5, -0.8, 1.2, 1.6, -0.9}
 138  * </pre>
 139  * set the pCoeffs array to:
 140  * <pre>
 141  *    {0.75, -0.4, 0.6, 0.8, -0.45}
 142  * </pre>
 143  * and set <code>postShift=1</code>
 144  *
 145  * \par
 146  * <b>Filter gain: </b>
 147  * The frequency response of a Biquad filter is a function of its coefficients.
 148  * It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.
 149  * This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.
 150  * To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.
 151  *
 152  * \par
 153  * <b>Overflow and saturation: </b>
 154  * For Q15 and Q31 versions, it is described separately as part of the function specific documentation below.
 155  */
 156
 157 /**
 158  * @addtogroup BiquadCascadeDF1
 159  * @{
 160  */
 161
 162 /**
 163  * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
 164  * @param[in]  *pSrc      points to the block of input data.
 165  * @param[out] *pDst      points to the block of output data.
 166  * @param[in]  blockSize  number of samples to process per call.
 167  * @return     none.
 168  *
 169  */
 170
 171 void arm_biquad_cascade_df1_f32(
 172   const arm_biquad_casd_df1_inst_f32 * S,
 173   float32_t * pSrc,
 174   float32_t * pDst,
 175   uint32_t blockSize)
 176 {
 177   float32_t *pIn = pSrc;                         /*  source pointer            */
 178   float32_t *pOut = pDst;                        /*  destination pointer       */
 179   float32_t *pState = S->pState;                 /*  pState pointer            */
 180   float32_t *pCoeffs = S->pCoeffs;               /*  coefficient pointer       */
 181   float32_t acc;                                 /*  Simulates the accumulator */
 182   float32_t b0, b1, b2, a1, a2;                  /*  Filter coefficients       */
 183   float32_t Xn1, Xn2, Yn1, Yn2;                  /*  Filter pState variables   */
 184   float32_t Xn;                                  /*  temporary input           */
 185   uint32_t sample, stage = S->numStages;         /*  loop counters             */
 186
 187
 188 #ifndef ARM_MATH_CM0
 189
 190   /* Run the below code for Cortex-M4 and Cortex-M3 */
 191
 192   do
 193   {
 194     /* Reading the coefficients */
 195     b0 = *pCoeffs++;
 196     b1 = *pCoeffs++;
 197     b2 = *pCoeffs++;
 198     a1 = *pCoeffs++;
 199     a2 = *pCoeffs++;
 200
 201     /* Reading the pState values */
 202     Xn1 = pState[0];
 203     Xn2 = pState[1];
 204     Yn1 = pState[2];
 205     Yn2 = pState[3];
 206
 207     /* Apply loop unrolling and compute 4 output values simultaneously. */
 208     /*      The variable acc hold output values that are being computed:
 209      *
 210      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]
 211      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]
 212      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]
 213      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]
 214      */
 215
 216     sample = blockSize >> 2u;
 217
 218     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
 219      ** a second loop below computes the remaining 1 to 3 samples. */
 220     while(sample > 0u)
 221     {
 222       /* Read the first input */
 223       Xn = *pIn++;
 224
 225       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 226       Yn2 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
 227
 228       /* Store the result in the accumulator in the destination buffer. */
 229       *pOut++ = Yn2;
 230
 231       /* Every time after the output is computed state should be updated. */
 232       /* The states should be updated as:  */
 233       /* Xn2 = Xn1    */
 234       /* Xn1 = Xn     */
 235       /* Yn2 = Yn1    */
 236       /* Yn1 = acc   */
 237
 238       /* Read the second input */
 239       Xn2 = *pIn++;
 240
 241       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 242       Yn1 = (b0 * Xn2) + (b1 * Xn) + (b2 * Xn1) + (a1 * Yn2) + (a2 * Yn1);
 243
 244       /* Store the result in the accumulator in the destination buffer. */
 245       *pOut++ = Yn1;
 246
 247       /* Every time after the output is computed state should be updated. */
 248       /* The states should be updated as:  */
 249       /* Xn2 = Xn1    */
 250       /* Xn1 = Xn     */
 251       /* Yn2 = Yn1    */
 252       /* Yn1 = acc   */
 253
 254       /* Read the third input */
 255       Xn1 = *pIn++;
 256
 257       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 258       Yn2 = (b0 * Xn1) + (b1 * Xn2) + (b2 * Xn) + (a1 * Yn1) + (a2 * Yn2);
 259
 260       /* Store the result in the accumulator in the destination buffer. */
 261       *pOut++ = Yn2;
 262
 263       /* Every time after the output is computed state should be updated. */
 264       /* The states should be updated as: */
 265       /* Xn2 = Xn1    */
 266       /* Xn1 = Xn     */
 267       /* Yn2 = Yn1    */
 268       /* Yn1 = acc   */
 269
 270       /* Read the forth input */
 271       Xn = *pIn++;
 272
 273       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 274       Yn1 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn2) + (a2 * Yn1);
 275
 276       /* Store the result in the accumulator in the destination buffer. */
 277       *pOut++ = Yn1;
 278
 279       /* Every time after the output is computed state should be updated. */
 280       /* The states should be updated as:  */
 281       /* Xn2 = Xn1    */
 282       /* Xn1 = Xn     */
 283       /* Yn2 = Yn1    */
 284       /* Yn1 = acc   */
 285       Xn2 = Xn1;
 286       Xn1 = Xn;
 287
 288       /* decrement the loop counter */
 289       sample--;
 290
 291     }
 292
 293     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 294      ** No loop unrolling is used. */
 295     sample = blockSize & 0x3u;
 296
 297     while(sample > 0u)
 298     {
 299       /* Read the input */
 300       Xn = *pIn++;
 301
 302       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 303       acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
 304
 305       /* Store the result in the accumulator in the destination buffer. */
 306       *pOut++ = acc;
 307
 308       /* Every time after the output is computed state should be updated. */
 309       /* The states should be updated as:    */
 310       /* Xn2 = Xn1    */
 311       /* Xn1 = Xn     */
 312       /* Yn2 = Yn1    */
 313       /* Yn1 = acc   */
 314       Xn2 = Xn1;
 315       Xn1 = Xn;
 316       Yn2 = Yn1;
 317       Yn1 = acc;
 318
 319       /* decrement the loop counter */
 320       sample--;
 321
 322     }
 323
 324     /*  Store the updated state variables back into the pState array */
 325     *pState++ = Xn1;
 326     *pState++ = Xn2;
 327     *pState++ = Yn1;
 328     *pState++ = Yn2;
 329
 330     /*  The first stage goes from the input buffer to the output buffer. */
 331     /*  Subsequent numStages  occur in-place in the output buffer */
 332     pIn = pDst;
 333
 334     /* Reset the output pointer */
 335     pOut = pDst;
 336
 337     /* decrement the loop counter */
 338     stage--;
 339
 340   } while(stage > 0u);
 341
 342 #else
 343
 344   /* Run the below code for Cortex-M0 */
 345
 346   do
 347   {
 348     /* Reading the coefficients */
 349     b0 = *pCoeffs++;
 350     b1 = *pCoeffs++;
 351     b2 = *pCoeffs++;
 352     a1 = *pCoeffs++;
 353     a2 = *pCoeffs++;
 354
 355     /* Reading the pState values */
 356     Xn1 = pState[0];
 357     Xn2 = pState[1];
 358     Yn1 = pState[2];
 359     Yn2 = pState[3];
 360
 361     /*      The variables acc holds the output value that is computed:
 362      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]
 363      */
 364
 365     sample = blockSize;
 366
 367     while(sample > 0u)
 368     {
 369       /* Read the input */
 370       Xn = *pIn++;
 371
 372       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 373       acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
 374
 375       /* Store the result in the accumulator in the destination buffer. */
 376       *pOut++ = acc;
 377
 378       /* Every time after the output is computed state should be updated. */
 379       /* The states should be updated as:    */
 380       /* Xn2 = Xn1    */
 381       /* Xn1 = Xn     */
 382       /* Yn2 = Yn1    */
 383       /* Yn1 = acc   */
 384       Xn2 = Xn1;
 385       Xn1 = Xn;
 386       Yn2 = Yn1;
 387       Yn1 = acc;
 388
 389       /* decrement the loop counter */
 390       sample--;
 391     }
 392
 393     /*  Store the updated state variables back into the pState array */
 394     *pState++ = Xn1;
 395     *pState++ = Xn2;
 396     *pState++ = Yn1;
 397     *pState++ = Yn2;
 398
 399     /*  The first stage goes from the input buffer to the output buffer. */
 400     /*  Subsequent numStages  occur in-place in the output buffer */
 401     pIn = pDst;
 402
 403     /* Reset the output pointer */
 404     pOut = pDst;
 405
 406     /* decrement the loop counter */
 407     stage--;
 408
 409   } while(stage > 0u);
 410
 411 #endif /*   #ifndef ARM_MATH_CM0         */
 412
 413 }
 414
 415
 416   /**
 417    * @} end of BiquadCascadeDF1 group
 418    */