git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_biquad_cascade_df1_fast_q31.c
   9 *
  10 * Description:  Processing function for the
  11 *                               Q31 Fast Biquad cascade DirectFormI(DF1) filter.
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3
  14 *
  15 * Version 1.0.10 2011/7/15
  16 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  17 *
  18 * Version 1.0.3 2010/11/29
  19 *    Re-organized the CMSIS folders and updated documentation.
  20 *
  21 * Version 1.0.2 2010/11/11
  22 *    Documentation updated.
  23 *
  24 * Version 1.0.1 2010/10/05
  25 *    Production release and review comments incorporated.
  26 *
  27 * Version 1.0.0 2010/09/20
  28 *    Production release and review comments incorporated.
  29 *
  30 * Version 0.0.9  2010/08/27
  31 *    Initial version
  32 *
  33 * -------------------------------------------------------------------- */
  34
  35 #include "arm_math.h"
  36
  37 /**
  38  * @ingroup groupFilters
  39  */
  40
  41 /**
  42  * @addtogroup BiquadCascadeDF1
  43  * @{
  44  */
  45
  46 /**
  47  * @details
  48  *
  49  * @param[in]  *S        points to an instance of the Q31 Biquad cascade structure.
  50  * @param[in]  *pSrc     points to the block of input data.
  51  * @param[out] *pDst     points to the block of output data.
  52  * @param[in]  blockSize number of samples to process per call.
  53  * @return         none.
  54  *
  55  * <b>Scaling and Overflow Behavior:</b>
  56  * \par
  57  * This function is optimized for speed at the expense of fixed-point precision and overflow protection.
  58  * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.
  59  * These intermediate results are added to a 2.30 accumulator.
  60  * Finally, the accumulator is saturated and converted to a 1.31 result.
  61  * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.
  62  * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function
  63  * arm_biquad_cascade_df1_init_q31() to initialize filter structure.
  64  *
  65  * \par
  66  * Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision.  Both the slow and the fast versions use the same instance structure.
  67  * Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure.
  68  */
  69
  70 void arm_biquad_cascade_df1_fast_q31(
  71   const arm_biquad_casd_df1_inst_q31 * S,
  72   q31_t * pSrc,
  73   q31_t * pDst,
  74   uint32_t blockSize)
  75 {
  76   q31_t *pIn = pSrc;                             /*  input pointer initialization  */
  77   q31_t *pOut = pDst;                            /*  output pointer initialization */
  78   q31_t *pState = S->pState;                     /*  pState pointer initialization */
  79   q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */
  80   q31_t acc;                                     /*  accumulator                   */
  81   q31_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
  82   q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
  83   q31_t Xn;                                      /*  temporary input               */
  84   int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output */
  85   uint32_t sample, stage = S->numStages;         /*  loop counters                     */
  86
  87
  88   do
  89   {
  90     /* Reading the coefficients */
  91     b0 = *pCoeffs++;
  92     b1 = *pCoeffs++;
  93     b2 = *pCoeffs++;
  94     a1 = *pCoeffs++;
  95     a2 = *pCoeffs++;
  96
  97     /* Reading the state values */
  98     Xn1 = pState[0];
  99     Xn2 = pState[1];
 100     Yn1 = pState[2];
 101     Yn2 = pState[3];
 102
 103     /* Apply loop unrolling and compute 4 output values simultaneously. */
 104     /*      The variables acc ... acc3 hold output values that are being computed:
 105      *
 106      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
 107      */
 108
 109     sample = blockSize >> 2u;
 110
 111     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
 112      ** a second loop below computes the remaining 1 to 3 samples. */
 113     while(sample > 0u)
 114     {
 115       /* Read the input */
 116       Xn = *pIn++;
 117
 118       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 119       /* acc =  b0 * x[n] */
 120       acc = (q31_t) (((q63_t) b0 * Xn) >> 32);
 121       /* acc +=  b1 * x[n-1] */
 122       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
 123       /* acc +=  b[2] * x[n-2] */
 124       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
 125       /* acc +=  a1 * y[n-1] */
 126       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
 127       /* acc +=  a2 * y[n-2] */
 128       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
 129
 130       /* The result is converted to 1.31 , Yn2 variable is reused */
 131       Yn2 = acc << shift;
 132
 133       /* Store the output in the destination buffer. */
 134       *pOut++ = Yn2;
 135
 136       /* Read the second input */
 137       Xn2 = *pIn++;
 138
 139       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 140       /* acc =  b0 * x[n] */
 141       acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32);
 142       /* acc +=  b1 * x[n-1] */
 143       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32);
 144       /* acc +=  b[2] * x[n-2] */
 145       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32);
 146       /* acc +=  a1 * y[n-1] */
 147       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);
 148       /* acc +=  a2 * y[n-2] */
 149       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);
 150
 151       /* The result is converted to 1.31, Yn1 variable is reused  */
 152       Yn1 = acc << shift;
 153
 154       /* Store the output in the destination buffer. */
 155       *pOut++ = Yn1;
 156
 157       /* Read the third input  */
 158       Xn1 = *pIn++;
 159
 160       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 161       /* acc =  b0 * x[n] */
 162       acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32);
 163       /* acc +=  b1 * x[n-1] */
 164       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32);
 165       /* acc +=  b[2] * x[n-2] */
 166       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32);
 167       /* acc +=  a1 * y[n-1] */
 168       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
 169       /* acc +=  a2 * y[n-2] */
 170       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
 171
 172       /* The result is converted to 1.31, Yn2 variable is reused  */
 173       Yn2 = acc << shift;
 174
 175       /* Store the output in the destination buffer. */
 176       *pOut++ = Yn2;
 177
 178       /* Read the forth input */
 179       Xn = *pIn++;
 180
 181       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 182       /* acc =  b0 * x[n] */
 183       acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);
 184       /* acc +=  b1 * x[n-1] */
 185       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
 186       /* acc +=  b[2] * x[n-2] */
 187       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
 188       /* acc +=  a1 * y[n-1] */
 189       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);
 190       /* acc +=  a2 * y[n-2] */
 191       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);
 192
 193       /* The result is converted to 1.31, Yn1 variable is reused  */
 194       Yn1 = acc << shift;
 195
 196       /* Every time after the output is computed state should be updated. */
 197       /* The states should be updated as:  */
 198       /* Xn2 = Xn1    */
 199       /* Xn1 = Xn     */
 200       /* Yn2 = Yn1    */
 201       /* Yn1 = acc    */
 202       Xn2 = Xn1;
 203       Xn1 = Xn;
 204
 205       /* Store the output in the destination buffer. */
 206       *pOut++ = Yn1;
 207
 208       /* decrement the loop counter */
 209       sample--;
 210     }
 211
 212     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 213      ** No loop unrolling is used. */
 214     sample = (blockSize & 0x3u);
 215
 216     while(sample > 0u)
 217     {
 218       /* Read the input */
 219       Xn = *pIn++;
 220
 221       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
 222       /* acc =  b0 * x[n] */
 223       acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);
 224       /* acc +=  b1 * x[n-1] */
 225       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
 226       /* acc +=  b[2] * x[n-2] */
 227       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
 228       /* acc +=  a1 * y[n-1] */
 229       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
 230       /* acc +=  a2 * y[n-2] */
 231       acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
 232       /* The result is converted to 1.31  */
 233       acc = acc << shift;
 234
 235       /* Every time after the output is computed state should be updated. */
 236       /* The states should be updated as:  */
 237       /* Xn2 = Xn1    */
 238       /* Xn1 = Xn     */
 239       /* Yn2 = Yn1    */
 240       /* Yn1 = acc    */
 241       Xn2 = Xn1;
 242       Xn1 = Xn;
 243       Yn2 = Yn1;
 244       Yn1 = acc;
 245
 246       /* Store the output in the destination buffer. */
 247       *pOut++ = acc;
 248
 249       /* decrement the loop counter */
 250       sample--;
 251     }
 252
 253     /*  The first stage goes from the input buffer to the output buffer. */
 254     /*  Subsequent stages occur in-place in the output buffer */
 255     pIn = pDst;
 256
 257     /* Reset to destination pointer */
 258     pOut = pDst;
 259
 260     /*  Store the updated state variables back into the pState array */
 261     *pState++ = Xn1;
 262     *pState++ = Xn2;
 263     *pState++ = Yn1;
 264     *pState++ = Yn2;
 265
 266   } while(--stage);
 267 }
 268
 269 /**
 270   * @} end of BiquadCascadeDF1 group
 271   */