git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_biquad_cascade_df1_32x64_q31.c
   9 *
  10 * Description:  High precision Q31 Biquad cascade filter processing function
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated.
  28 *
  29 * Version 0.0.7  2010/06/10
  30 *    Misra-C changes done
  31 * -------------------------------------------------------------------- */
  32
  33 #include "arm_math.h"
  34
  35 /**
  36  * @ingroup groupFilters
  37  */
  38
  39 /**
  40  * @defgroup BiquadCascadeDF1_32x64 High Precision Q31 Biquad Cascade Filter
  41  *
  42  * This function implements a high precision Biquad cascade filter which operates on
  43  * Q31 data values.  The filter coefficients are in 1.31 format and the state variables
  44  * are in 1.63 format.  The double precision state variables reduce quantization noise
  45  * in the filter and provide a cleaner output.
  46  * These filters are particularly useful when implementing filters in which the
  47  * singularities are close to the unit circle.  This is common for low pass or high
  48  * pass filters with very low cutoff frequencies.
  49  *
  50  * The function operates on blocks of input and output data
  51  * and each call to the function processes <code>blockSize</code> samples through
  52  * the filter. <code>pSrc</code> and <code>pDst</code> points to input and output arrays
  53  * containing <code>blockSize</code> Q31 values.
  54  *
  55  * \par Algorithm
  56  * Each Biquad stage implements a second order filter using the difference equation:
  57  * <pre>
  58  *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  59  * </pre>
  60  * A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.
  61  * \image html Biquad.gif "Single Biquad filter stage"
  62  * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
  63  * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
  64  * Pay careful attention to the sign of the feedback coefficients.
  65  * Some design tools use the difference equation
  66  * <pre>
  67  *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]
  68  * </pre>
  69  * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
  70  *
  71  * \par
  72  * Higher order filters are realized as a cascade of second order sections.
  73  * <code>numStages</code> refers to the number of second order stages used.
  74  * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
  75  * \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"
  76  * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
  77  *
  78  * \par
  79  * The <code>pState</code> points to state variables array .
  80  * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code> and each state variable in 1.63 format to improve precision.
  81  * The state variables are arranged in the array as:
  82  * <pre>
  83  *     {x[n-1], x[n-2], y[n-1], y[n-2]}
  84  * </pre>
  85  *
  86  * \par
  87  * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
  88  * The state array has a total length of <code>4*numStages</code> values of data in 1.63 format.
  89  * The state variables are updated after each block of data is processed; the coefficients are untouched.
  90  *
  91  * \par Instance Structure
  92  * The coefficients and state variables for a filter are stored together in an instance data structure.
  93  * A separate instance structure must be defined for each filter.
  94  * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
  95  *
  96  * \par Init Function
  97  * There is also an associated initialization function which performs the following operations:
  98  * - Sets the values of the internal structure fields.
  99  * - Zeros out the values in the state buffer.
 100  * \par
 101  * Use of the initialization function is optional.
 102  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
 103  * To place an instance structure into a const data section, the instance structure must be manually initialized.
 104  * Set the values in the state buffer to zeros before static initialization.
 105  * For example, to statically initialize the filter instance structure use
 106  * <pre>
 107  *     arm_biquad_cas_df1_32x64_ins_q31 S1 = {numStages, pState, pCoeffs, postShift};
 108  * </pre>
 109  * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;
 110  * <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied which is described in detail below.
 111  * \par Fixed-Point Behavior
 112  * Care must be taken while using Biquad Cascade 32x64 filter function.
 113  * Following issues must be considered:
 114  * - Scaling of coefficients
 115  * - Filter gain
 116  * - Overflow and saturation
 117  *
 118  * \par
 119  * Filter coefficients are represented as fractional values and
 120  * restricted to lie in the range <code>[-1 +1)</code>.
 121  * The processing function has an additional scaling parameter <code>postShift</code>
 122  * which allows the filter coefficients to exceed the range <code>[+1 -1)</code>.
 123  * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
 124  * \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"
 125  * This essentially scales the filter coefficients by <code>2^postShift</code>.
 126  * For example, to realize the coefficients
 127  * <pre>
 128  *    {1.5, -0.8, 1.2, 1.6, -0.9}
 129  * </pre>
 130  * set the Coefficient array to:
 131  * <pre>
 132  *    {0.75, -0.4, 0.6, 0.8, -0.45}
 133  * </pre>
 134  * and set <code>postShift=1</code>
 135  *
 136  * \par
 137  * The second thing to keep in mind is the gain through the filter.
 138  * The frequency response of a Biquad filter is a function of its coefficients.
 139  * It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.
 140  * This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.
 141  * To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.
 142  *
 143  * \par
 144  * The third item to consider is the overflow and saturation behavior of the fixed-point Q31 version.
 145  * This is described in the function specific documentation below.
 146  */
 147
 148 /**
 149  * @addtogroup BiquadCascadeDF1_32x64
 150  * @{
 151  */
 152
 153 /**
 154  * @details
 155
 156  * @param[in]  *S points to an instance of the high precision Q31 Biquad cascade filter.
 157  * @param[in]  *pSrc points to the block of input data.
 158  * @param[out] *pDst points to the block of output data.
 159  * @param[in]  blockSize number of samples to process.
 160  * @return none.
 161  *
 162  * \par
 163  * The function is implemented using an internal 64-bit accumulator.
 164  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
 165  * Thus, if the accumulator result overflows it wraps around rather than clip.
 166  * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
 167  * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
 168  * 1.31 format by discarding the low 32 bits.
 169  *
 170  * \par
 171  * Two related functions are provided in the CMSIS DSP library.
 172  * <code>arm_biquad_cascade_df1_q31()</code> implements a Biquad cascade with 32-bit coefficients and state variables with a Q63 accumulator.
 173  * <code>arm_biquad_cascade_df1_fast_q31()</code> implements a Biquad cascade with 32-bit coefficients and state variables with a Q31 accumulator.
 174  */
 175
 176 void arm_biquad_cas_df1_32x64_q31(
 177   const arm_biquad_cas_df1_32x64_ins_q31 * S,
 178   q31_t * pSrc,
 179   q31_t * pDst,
 180   uint32_t blockSize)
 181 {
 182   q31_t *pIn = pSrc;                             /*  input pointer initialization  */
 183   q31_t *pOut = pDst;                            /*  output pointer initialization */
 184   q63_t *pState = S->pState;                     /*  state pointer initialization  */
 185   q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */
 186   q63_t acc;                                     /*  accumulator                   */
 187   q63_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
 188   q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
 189   q63_t Xn;                                      /*  temporary input               */
 190   int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output */
 191   uint32_t sample, stage = S->numStages;         /*  loop counters                     */
 192
 193
 194 #ifndef ARM_MATH_CM0
 195
 196   /* Run the below code for Cortex-M4 and Cortex-M3 */
 197
 198   do
 199   {
 200     /* Reading the coefficients */
 201     b0 = *pCoeffs++;
 202     b1 = *pCoeffs++;
 203     b2 = *pCoeffs++;
 204     a1 = *pCoeffs++;
 205     a2 = *pCoeffs++;
 206
 207     /* Reading the state values */
 208     Xn1 = pState[0];
 209     Xn2 = pState[1];
 210     Yn1 = pState[2];
 211     Yn2 = pState[3];
 212
 213     /* Apply loop unrolling and compute 4 output values simultaneously. */
 214     /* The variable acc hold output value that is being computed and
 215      * stored in the destination buffer
 216      * acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 217      */
 218
 219     sample = blockSize >> 2u;
 220
 221     /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
 222      ** a second loop below computes the remaining 1 to 3 samples. */
 223     while(sample > 0u)
 224     {
 225       /* Read the input */
 226       Xn = *pIn++;
 227
 228       /* The value is shifted to the MSB to perform 32x64 multiplication */
 229       Xn = Xn << 32;
 230
 231       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 232
 233       /* acc =  b0 * x[n] */
 234       acc = mult32x64(Xn, b0);
 235       /* acc +=  b1 * x[n-1] */
 236       acc += mult32x64(Xn1, b1);
 237       /* acc +=  b[2] * x[n-2] */
 238       acc += mult32x64(Xn2, b2);
 239       /* acc +=  a1 * y[n-1] */
 240       acc += mult32x64(Yn1, a1);
 241       /* acc +=  a2 * y[n-2] */
 242       acc += mult32x64(Yn2, a2);
 243
 244       /* The result is converted to 1.63 , Yn2 variable is reused */
 245       Yn2 = acc << shift;
 246
 247       /* Store the output in the destination buffer in 1.31 format. */
 248       *pOut++ = (q31_t) (acc >> (32 - shift));
 249
 250       /* Read the second input into Xn2, to reuse the value */
 251       Xn2 = *pIn++;
 252
 253       /* The value is shifted to the MSB to perform 32x64 multiplication */
 254       Xn2 = Xn2 << 32;
 255
 256       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 257
 258       /* acc =  b0 * x[n] */
 259       acc = mult32x64(Xn2, b0);
 260       /* acc +=  b1 * x[n-1] */
 261       acc += mult32x64(Xn, b1);
 262       /* acc +=  b[2] * x[n-2] */
 263       acc += mult32x64(Xn1, b2);
 264       /* acc +=  a1 * y[n-1] */
 265       acc += mult32x64(Yn2, a1);
 266       /* acc +=  a2 * y[n-2] */
 267       acc += mult32x64(Yn1, a2);
 268
 269       /* The result is converted to 1.63, Yn1 variable is reused */
 270       Yn1 = acc << shift;
 271
 272       /* The result is converted to 1.31 */
 273       /* Store the output in the destination buffer. */
 274       *pOut++ = (q31_t) (acc >> (32 - shift));
 275
 276       /* Read the third input into Xn1, to reuse the value */
 277       Xn1 = *pIn++;
 278
 279       /* The value is shifted to the MSB to perform 32x64 multiplication */
 280       Xn1 = Xn1 << 32;
 281
 282       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 283       /* acc =  b0 * x[n] */
 284       acc = mult32x64(Xn1, b0);
 285       /* acc +=  b1 * x[n-1] */
 286       acc += mult32x64(Xn2, b1);
 287       /* acc +=  b[2] * x[n-2] */
 288       acc += mult32x64(Xn, b2);
 289       /* acc +=  a1 * y[n-1] */
 290       acc += mult32x64(Yn1, a1);
 291       /* acc +=  a2 * y[n-2] */
 292       acc += mult32x64(Yn2, a2);
 293
 294       /* The result is converted to 1.63, Yn2 variable is reused  */
 295       Yn2 = acc << shift;
 296
 297       /* Store the output in the destination buffer in 1.31 format. */
 298       *pOut++ = (q31_t) (acc >> (32 - shift));
 299
 300       /* Read the fourth input into Xn, to reuse the value */
 301       Xn = *pIn++;
 302
 303       /* The value is shifted to the MSB to perform 32x64 multiplication */
 304       Xn = Xn << 32;
 305
 306       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 307       /* acc =  b0 * x[n] */
 308       acc = mult32x64(Xn, b0);
 309       /* acc +=  b1 * x[n-1] */
 310       acc += mult32x64(Xn1, b1);
 311       /* acc +=  b[2] * x[n-2] */
 312       acc += mult32x64(Xn2, b2);
 313       /* acc +=  a1 * y[n-1] */
 314       acc += mult32x64(Yn2, a1);
 315       /* acc +=  a2 * y[n-2] */
 316       acc += mult32x64(Yn1, a2);
 317
 318       /* The result is converted to 1.63, Yn1 variable is reused  */
 319       Yn1 = acc << shift;
 320
 321       /* Every time after the output is computed state should be updated. */
 322       /* The states should be updated as:  */
 323       /* Xn2 = Xn1    */
 324       /* Xn1 = Xn     */
 325       /* Yn2 = Yn1    */
 326       /* Yn1 = acc    */
 327       Xn2 = Xn1;
 328       Xn1 = Xn;
 329
 330       /* Store the output in the destination buffer in 1.31 format. */
 331       *pOut++ = (q31_t) (acc >> (32 - shift));
 332
 333       /* decrement the loop counter */
 334       sample--;
 335     }
 336
 337     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 338      ** No loop unrolling is used. */
 339     sample = (blockSize & 0x3u);
 340
 341     while(sample > 0u)
 342     {
 343       /* Read the input */
 344       Xn = *pIn++;
 345
 346       /* The value is shifted to the MSB to perform 32x64 multiplication */
 347       Xn = Xn << 32;
 348
 349       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 350       /* acc =  b0 * x[n] */
 351       acc = mult32x64(Xn, b0);
 352       /* acc +=  b1 * x[n-1] */
 353       acc += mult32x64(Xn1, b1);
 354       /* acc +=  b[2] * x[n-2] */
 355       acc += mult32x64(Xn2, b2);
 356       /* acc +=  a1 * y[n-1] */
 357       acc += mult32x64(Yn1, a1);
 358       /* acc +=  a2 * y[n-2] */
 359       acc += mult32x64(Yn2, a2);
 360
 361       /* Every time after the output is computed state should be updated. */
 362       /* The states should be updated as:  */
 363       /* Xn2 = Xn1    */
 364       /* Xn1 = Xn     */
 365       /* Yn2 = Yn1    */
 366       /* Yn1 = acc    */
 367       Xn2 = Xn1;
 368       Xn1 = Xn;
 369       Yn2 = Yn1;
 370       Yn1 = acc << shift;
 371
 372       /* Store the output in the destination buffer in 1.31 format. */
 373       *pOut++ = (q31_t) (acc >> (32 - shift));
 374
 375       /* decrement the loop counter */
 376       sample--;
 377     }
 378
 379     /*  The first stage output is given as input to the second stage. */
 380     pIn = pDst;
 381
 382     /* Reset to destination buffer working pointer */
 383     pOut = pDst;
 384
 385     /*  Store the updated state variables back into the pState array */
 386     *pState++ = Xn1;
 387     *pState++ = Xn2;
 388     *pState++ = Yn1;
 389     *pState++ = Yn2;
 390
 391   } while(--stage);
 392
 393 #else
 394
 395   /* Run the below code for Cortex-M0 */
 396
 397   do
 398   {
 399     /* Reading the coefficients */
 400     b0 = *pCoeffs++;
 401     b1 = *pCoeffs++;
 402     b2 = *pCoeffs++;
 403     a1 = *pCoeffs++;
 404     a2 = *pCoeffs++;
 405
 406     /* Reading the state values */
 407     Xn1 = pState[0];
 408     Xn2 = pState[1];
 409     Yn1 = pState[2];
 410     Yn2 = pState[3];
 411
 412     /* The variable acc hold output value that is being computed and
 413      * stored in the destination buffer
 414      * acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 415      */
 416
 417     sample = blockSize;
 418
 419     while(sample > 0u)
 420     {
 421       /* Read the input */
 422       Xn = *pIn++;
 423
 424       /* The value is shifted to the MSB to perform 32x64 multiplication */
 425       Xn = Xn << 32;
 426
 427       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 428       /* acc =  b0 * x[n] */
 429       acc = mult32x64(Xn, b0);
 430       /* acc +=  b1 * x[n-1] */
 431       acc += mult32x64(Xn1, b1);
 432       /* acc +=  b[2] * x[n-2] */
 433       acc += mult32x64(Xn2, b2);
 434       /* acc +=  a1 * y[n-1] */
 435       acc += mult32x64(Yn1, a1);
 436       /* acc +=  a2 * y[n-2] */
 437       acc += mult32x64(Yn2, a2);
 438
 439       /* Every time after the output is computed state should be updated. */
 440       /* The states should be updated as:  */
 441       /* Xn2 = Xn1    */
 442       /* Xn1 = Xn     */
 443       /* Yn2 = Yn1    */
 444       /* Yn1 = acc    */
 445       Xn2 = Xn1;
 446       Xn1 = Xn;
 447       Yn2 = Yn1;
 448       Yn1 = acc << shift;
 449
 450       /* Store the output in the destination buffer in 1.31 format. */
 451       *pOut++ = (q31_t) (acc >> (32 - shift));
 452
 453       /* decrement the loop counter */
 454       sample--;
 455     }
 456
 457     /*  The first stage output is given as input to the second stage. */
 458     pIn = pDst;
 459
 460     /* Reset to destination buffer working pointer */
 461     pOut = pDst;
 462
 463     /*  Store the updated state variables back into the pState array */
 464     *pState++ = Xn1;
 465     *pState++ = Xn2;
 466     *pState++ = Yn1;
 467     *pState++ = Yn2;
 468
 469   } while(--stage);
 470
 471 #endif /*    #ifndef ARM_MATH_CM0     */
 472 }
 473
 474   /**
 475    * @} end of BiquadCascadeDF1_32x64 group
 476    */