git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_fir_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_fir_f32.c
   9 *
  10 * Description:  Floating-point FIR filter processing function.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated.
  28 *
  29 * Version 0.0.5  2010/04/26
  30 *        incorporated review comments and updated with latest CMSIS layer
  31 *
  32 * Version 0.0.3  2010/03/10
  33 *    Initial version
  34 * -------------------------------------------------------------------- */
  35
  36 #include "arm_math.h"
  37
  38 /**
  39  * @ingroup groupFilters
  40  */
  41
  42 /**
  43  * @defgroup FIR Finite Impulse Response (FIR) Filters
  44  *
  45  * This set of functions implements Finite Impulse Response (FIR) filters
  46  * for Q7, Q15, Q31, and floating-point data types.
  47  * Fast versions of Q15 and Q31 are also provided on Cortex-M4 and Cortex-M3.
  48  * The functions operate on blocks of input and output data and each call to the function processes
  49  * <code>blockSize</code> samples through the filter.  <code>pSrc</code> and
  50  * <code>pDst</code> points to input and output arrays containing <code>blockSize</code> values.
  51  *
  52  * \par Algorithm:
  53  * The FIR filter algorithm is based upon a sequence of multiply-accumulate (MAC) operations.
  54  * Each filter coefficient <code>b[n]</code> is multiplied by a state variable which equals a previous input sample <code>x[n]</code>.
  55  * <pre>
  56  *    y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]
  57  * </pre>
  58  * \par
  59  * \image html FIR.gif "Finite Impulse Response filter"
  60  * \par
  61  * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
  62  * Coefficients are stored in time reversed order.
  63  * \par
  64  * <pre>
  65  *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
  66  * </pre>
  67  * \par
  68  * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.
  69  * Samples in the state buffer are stored in the following order.
  70  * \par
  71  * <pre>
  72  *    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}
  73  * </pre>
  74  * \par
  75  * Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code>.
  76  * The increased state buffer length allows circular addressing, which is traditionally used in the FIR filters,
  77  * to be avoided and yields a significant speed improvement.
  78  * The state variables are updated after each block of data is processed; the coefficients are untouched.
  79  * \par Instance Structure
  80  * The coefficients and state variables for a filter are stored together in an instance data structure.
  81  * A separate instance structure must be defined for each filter.
  82  * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
  83  * There are separate instance structure declarations for each of the 4 supported data types.
  84  *
  85  * \par Initialization Functions
  86  * There is also an associated initialization function for each data type.
  87  * The initialization function performs the following operations:
  88  * - Sets the values of the internal structure fields.
  89  * - Zeros out the values in the state buffer.
  90  *
  91  * \par
  92  * Use of the initialization function is optional.
  93  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
  94  * To place an instance structure into a const data section, the instance structure must be manually initialized.
  95  * Set the values in the state buffer to zeros before static initialization.
  96  * The code below statically initializes each of the 4 different data type filter instance structures
  97  * <pre>
  98  *arm_fir_instance_f32 S = {numTaps, pState, pCoeffs};
  99  *arm_fir_instance_q31 S = {numTaps, pState, pCoeffs};
 100  *arm_fir_instance_q15 S = {numTaps, pState, pCoeffs};
 101  *arm_fir_instance_q7 S =  {numTaps, pState, pCoeffs};
 102  * </pre>
 103  *
 104  * where <code>numTaps</code> is the number of filter coefficients in the filter; <code>pState</code> is the address of the state buffer;
 105  * <code>pCoeffs</code> is the address of the coefficient buffer.
 106  *
 107  * \par Fixed-Point Behavior
 108  * Care must be taken when using the fixed-point versions of the FIR filter functions.
 109  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
 110  * Refer to the function specific documentation below for usage guidelines.
 111  */
 112
 113 /**
 114  * @addtogroup FIR
 115  * @{
 116  */
 117
 118 /**
 119  *
 120  * @param[in]  *S points to an instance of the floating-point FIR filter structure.
 121  * @param[in]  *pSrc points to the block of input data.
 122  * @param[out] *pDst points to the block of output data.
 123  * @param[in]  blockSize number of samples to process per call.
 124  * @return     none.
 125  *
 126  */
 127
 128 void arm_fir_f32(
 129   const arm_fir_instance_f32 * S,
 130   float32_t * pSrc,
 131   float32_t * pDst,
 132   uint32_t blockSize)
 133 {
 134
 135   float32_t *pState = S->pState;                 /* State pointer */
 136   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
 137   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
 138   float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */
 139   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
 140   uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
 141
 142
 143 #ifndef ARM_MATH_CM0
 144
 145   /* Run the below code for Cortex-M4 and Cortex-M3 */
 146
 147   float32_t acc0, acc1, acc2, acc3;              /* Accumulators */
 148   float32_t x0, x1, x2, x3, c0;                  /* Temporary variables to hold state and coefficient values */
 149
 150
 151   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
 152   /* pStateCurnt points to the location where the new input data should be written */
 153   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 154
 155   /* Apply loop unrolling and compute 4 output values simultaneously.
 156    * The variables acc0 ... acc3 hold output values that are being computed:
 157    *
 158    *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
 159    *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
 160    *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
 161    *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]
 162    */
 163   blkCnt = blockSize >> 2;
 164
 165   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
 166    ** a second loop below computes the remaining 1 to 3 samples. */
 167   while(blkCnt > 0u)
 168   {
 169     /* Copy four new input samples into the state buffer */
 170     *pStateCurnt++ = *pSrc++;
 171     *pStateCurnt++ = *pSrc++;
 172     *pStateCurnt++ = *pSrc++;
 173     *pStateCurnt++ = *pSrc++;
 174
 175     /* Set all accumulators to zero */
 176     acc0 = 0.0f;
 177     acc1 = 0.0f;
 178     acc2 = 0.0f;
 179     acc3 = 0.0f;
 180
 181     /* Initialize state pointer */
 182     px = pState;
 183
 184     /* Initialize coeff pointer */
 185     pb = (pCoeffs);
 186
 187     /* Read the first three samples from the state buffer:  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
 188     x0 = *px++;
 189     x1 = *px++;
 190     x2 = *px++;
 191
 192     /* Loop unrolling.  Process 4 taps at a time. */
 193     tapCnt = numTaps >> 2u;
 194
 195     /* Loop over the number of taps.  Unroll by a factor of 4.
 196      ** Repeat until we've computed numTaps-4 coefficients. */
 197     while(tapCnt > 0u)
 198     {
 199       /* Read the b[numTaps-1] coefficient */
 200       c0 = *(pb++);
 201
 202       /* Read x[n-numTaps-3] sample */
 203       x3 = *(px++);
 204
 205       /* acc0 +=  b[numTaps-1] * x[n-numTaps] */
 206       acc0 += x0 * c0;
 207
 208       /* acc1 +=  b[numTaps-1] * x[n-numTaps-1] */
 209       acc1 += x1 * c0;
 210
 211       /* acc2 +=  b[numTaps-1] * x[n-numTaps-2] */
 212       acc2 += x2 * c0;
 213
 214       /* acc3 +=  b[numTaps-1] * x[n-numTaps-3] */
 215       acc3 += x3 * c0;
 216
 217       /* Read the b[numTaps-2] coefficient */
 218       c0 = *(pb++);
 219
 220       /* Read x[n-numTaps-4] sample */
 221       x0 = *(px++);
 222
 223       /* Perform the multiply-accumulate */
 224       acc0 += x1 * c0;
 225       acc1 += x2 * c0;
 226       acc2 += x3 * c0;
 227       acc3 += x0 * c0;
 228
 229       /* Read the b[numTaps-3] coefficient */
 230       c0 = *(pb++);
 231
 232       /* Read x[n-numTaps-5] sample */
 233       x1 = *(px++);
 234
 235       /* Perform the multiply-accumulates */
 236       acc0 += x2 * c0;
 237       acc1 += x3 * c0;
 238       acc2 += x0 * c0;
 239       acc3 += x1 * c0;
 240
 241       /* Read the b[numTaps-4] coefficient */
 242       c0 = *(pb++);
 243
 244       /* Read x[n-numTaps-6] sample */
 245       x2 = *(px++);
 246
 247       /* Perform the multiply-accumulates */
 248       acc0 += x3 * c0;
 249       acc1 += x0 * c0;
 250       acc2 += x1 * c0;
 251       acc3 += x2 * c0;
 252
 253       tapCnt--;
 254     }
 255
 256     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 257     tapCnt = numTaps % 0x4u;
 258
 259     while(tapCnt > 0u)
 260     {
 261       /* Read coefficients */
 262       c0 = *(pb++);
 263
 264       /* Fetch 1 state variable */
 265       x3 = *(px++);
 266
 267       /* Perform the multiply-accumulates */
 268       acc0 += x0 * c0;
 269       acc1 += x1 * c0;
 270       acc2 += x2 * c0;
 271       acc3 += x3 * c0;
 272
 273       /* Reuse the present sample states for next sample */
 274       x0 = x1;
 275       x1 = x2;
 276       x2 = x3;
 277
 278       /* Decrement the loop counter */
 279       tapCnt--;
 280     }
 281
 282     /* Advance the state pointer by 4 to process the next group of 4 samples */
 283     pState = pState + 4;
 284
 285     /* The results in the 4 accumulators, store in the destination buffer. */
 286     *pDst++ = acc0;
 287     *pDst++ = acc1;
 288     *pDst++ = acc2;
 289     *pDst++ = acc3;
 290
 291     blkCnt--;
 292   }
 293
 294   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
 295    ** No loop unrolling is used. */
 296   blkCnt = blockSize % 0x4u;
 297
 298   while(blkCnt > 0u)
 299   {
 300     /* Copy one sample at a time into state buffer */
 301     *pStateCurnt++ = *pSrc++;
 302
 303     /* Set the accumulator to zero */
 304     acc0 = 0.0f;
 305
 306     /* Initialize state pointer */
 307     px = pState;
 308
 309     /* Initialize Coefficient pointer */
 310     pb = (pCoeffs);
 311
 312     i = numTaps;
 313
 314     /* Perform the multiply-accumulates */
 315     do
 316     {
 317       acc0 += *px++ * *pb++;
 318       i--;
 319
 320     } while(i > 0u);
 321
 322     /* The result is store in the destination buffer. */
 323     *pDst++ = acc0;
 324
 325     /* Advance state pointer by 1 for the next sample */
 326     pState = pState + 1;
 327
 328     blkCnt--;
 329   }
 330
 331   /* Processing is complete.
 332    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
 333    ** This prepares the state buffer for the next function call. */
 334
 335   /* Points to the start of the state buffer */
 336   pStateCurnt = S->pState;
 337
 338   tapCnt = (numTaps - 1u) >> 2u;
 339
 340   /* copy data */
 341   while(tapCnt > 0u)
 342   {
 343     *pStateCurnt++ = *pState++;
 344     *pStateCurnt++ = *pState++;
 345     *pStateCurnt++ = *pState++;
 346     *pStateCurnt++ = *pState++;
 347
 348     /* Decrement the loop counter */
 349     tapCnt--;
 350   }
 351
 352   /* Calculate remaining number of copies */
 353   tapCnt = (numTaps - 1u) % 0x4u;
 354
 355   /* Copy the remaining q31_t data */
 356   while(tapCnt > 0u)
 357   {
 358     *pStateCurnt++ = *pState++;
 359
 360     /* Decrement the loop counter */
 361     tapCnt--;
 362   }
 363
 364 #else
 365
 366   /* Run the below code for Cortex-M0 */
 367
 368   float32_t acc;
 369
 370   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
 371   /* pStateCurnt points to the location where the new input data should be written */
 372   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 373
 374   /* Initialize blkCnt with blockSize */
 375   blkCnt = blockSize;
 376
 377   while(blkCnt > 0u)
 378   {
 379     /* Copy one sample at a time into state buffer */
 380     *pStateCurnt++ = *pSrc++;
 381
 382     /* Set the accumulator to zero */
 383     acc = 0.0f;
 384
 385     /* Initialize state pointer */
 386     px = pState;
 387
 388     /* Initialize Coefficient pointer */
 389     pb = pCoeffs;
 390
 391     i = numTaps;
 392
 393     /* Perform the multiply-accumulates */
 394     do
 395     {
 396       /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
 397       acc += *px++ * *pb++;
 398       i--;
 399
 400     } while(i > 0u);
 401
 402     /* The result is store in the destination buffer. */
 403     *pDst++ = acc;
 404
 405     /* Advance state pointer by 1 for the next sample */
 406     pState = pState + 1;
 407
 408     blkCnt--;
 409   }
 410
 411   /* Processing is complete.
 412    ** Now copy the last numTaps - 1 samples to the starting of the state buffer.
 413    ** This prepares the state buffer for the next function call. */
 414
 415   /* Points to the start of the state buffer */
 416   pStateCurnt = S->pState;
 417
 418   /* Copy numTaps number of values */
 419   tapCnt = numTaps - 1u;
 420
 421   /* Copy data */
 422   while(tapCnt > 0u)
 423   {
 424     *pStateCurnt++ = *pState++;
 425
 426     /* Decrement the loop counter */
 427     tapCnt--;
 428   }
 429
 430 #endif /*   #ifndef ARM_MATH_CM0 */
 431
 432 }
 433
 434 /**
 435  * @} end of FIR group
 436  */