git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_fir_sparse_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_fir_sparse_f32.c
   9 *
  10 * Description:  Floating-point sparse FIR filter processing function.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated
  28 *
  29 * Version 0.0.7  2010/06/10
  30 *    Misra-C changes done
  31 * ------------------------------------------------------------------- */
  32 #include "arm_math.h"
  33
  34 /**
  35  * @ingroup groupFilters
  36  */
  37
  38 /**
  39  * @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters
  40  *
  41  * This group of functions implements sparse FIR filters.
  42  * Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero.
  43  * Sparse filters are used for simulating reflections in communications and audio applications.
  44  *
  45  * There are separate functions for Q7, Q15, Q31, and floating-point data types.
  46  * The functions operate on blocks  of input and output data and each call to the function processes
  47  * <code>blockSize</code> samples through the filter.  <code>pSrc</code> and
  48  * <code>pDst</code> points to input and output arrays respectively containing <code>blockSize</code> values.
  49  *
  50  * \par Algorithm:
  51  * The sparse filter instant structure contains an array of tap indices <code>pTapDelay</code> which specifies the locations of the non-zero coefficients.
  52  * This is in addition to the coefficient array <code>b</code>.
  53  * The implementation essentially skips the multiplications by zero and leads to an efficient realization.
  54  * <pre>
  55  *     y[n] = b[0] * x[n-pTapDelay[0]] + b[1] * x[n-pTapDelay[1]] + b[2] * x[n-pTapDelay[2]] + ...+ b[numTaps-1] * x[n-pTapDelay[numTaps-1]]
  56  * </pre>
  57  * \par
  58  * \image html FIRSparse.gif "Sparse FIR filter.  b[n] represents the filter coefficients"
  59  * \par
  60  * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>;
  61  * <code>pTapDelay</code> points to an array of nonzero indices and is also of size <code>numTaps</code>;
  62  * <code>pState</code> points to a state array of size <code>maxDelay + blockSize</code>, where
  63  * <code>maxDelay</code> is the largest offset value that is ever used in the <code>pTapDelay</code> array.
  64  * Some of the processing functions also require temporary working buffers.
  65  *
  66  * \par Instance Structure
  67  * The coefficients and state variables for a filter are stored together in an instance data structure.
  68  * A separate instance structure must be defined for each filter.
  69  * Coefficient and offset arrays may be shared among several instances while state variable arrays cannot be shared.
  70  * There are separate instance structure declarations for each of the 4 supported data types.
  71  *
  72  * \par Initialization Functions
  73  * There is also an associated initialization function for each data type.
  74  * The initialization function performs the following operations:
  75  * - Sets the values of the internal structure fields.
  76  * - Zeros out the values in the state buffer.
  77  *
  78  * \par
  79  * Use of the initialization function is optional.
  80  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
  81  * To place an instance structure into a const data section, the instance structure must be manually initialized.
  82  * Set the values in the state buffer to zeros before static initialization.
  83  * The code below statically initializes each of the 4 different data type filter instance structures
  84  * <pre>
  85  *arm_fir_sparse_instance_f32 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
  86  *arm_fir_sparse_instance_q31 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
  87  *arm_fir_sparse_instance_q15 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
  88  *arm_fir_sparse_instance_q7 S =  {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
  89  * </pre>
  90  * \par
  91  *
  92  * \par Fixed-Point Behavior
  93  * Care must be taken when using the fixed-point versions of the sparse FIR filter functions.
  94  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
  95  * Refer to the function specific documentation below for usage guidelines.
  96  */
  97
  98 /**
  99  * @addtogroup FIR_Sparse
 100  * @{
 101  */
 102
 103 /**
 104  * @brief Processing function for the floating-point sparse FIR filter.
 105  * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
 106  * @param[in]  *pSrc       points to the block of input data.
 107  * @param[out] *pDst       points to the block of output data
 108  * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
 109  * @param[in]  blockSize   number of input samples to process per call.
 110  * @return none.
 111  */
 112
 113 void arm_fir_sparse_f32(
 114   arm_fir_sparse_instance_f32 * S,
 115   float32_t * pSrc,
 116   float32_t * pDst,
 117   float32_t * pScratchIn,
 118   uint32_t blockSize)
 119 {
 120
 121   float32_t *pState = S->pState;                 /* State pointer */
 122   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
 123   float32_t *px;                                 /* Scratch buffer pointer */
 124   float32_t *py = pState;                        /* Temporary pointers for state buffer */
 125   float32_t *pb = pScratchIn;                    /* Temporary pointers for scratch buffer */
 126   float32_t *pOut;                               /* Destination pointer */
 127   int32_t *pTapDelay = S->pTapDelay;             /* Pointer to the array containing offset of the non-zero tap values. */
 128   uint32_t delaySize = S->maxDelay + blockSize;  /* state length */
 129   uint16_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter  */
 130   int32_t readIndex;                             /* Read index of the state buffer */
 131   uint32_t tapCnt, blkCnt;                       /* loop counters */
 132   float32_t coeff = *pCoeffs++;                  /* Read the first coefficient value */
 133
 134
 135
 136   /* BlockSize of Input samples are copied into the state buffer */
 137   /* StateIndex points to the starting position to write in the state buffer */
 138   arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1,
 139                         (int32_t *) pSrc, 1, blockSize);
 140
 141
 142   /* Read Index, from where the state buffer should be read, is calculated. */
 143   readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
 144
 145   /* Wraparound of readIndex */
 146   if(readIndex < 0)
 147   {
 148     readIndex += (int32_t) delaySize;
 149   }
 150
 151   /* Working pointer for state buffer is updated */
 152   py = pState;
 153
 154   /* blockSize samples are read from the state buffer */
 155   arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
 156                        (int32_t *) pb, (int32_t *) pb, blockSize, 1,
 157                        blockSize);
 158
 159   /* Working pointer for the scratch buffer */
 160   px = pb;
 161
 162   /* Working pointer for destination buffer */
 163   pOut = pDst;
 164
 165
 166 #ifndef ARM_MATH_CM0
 167
 168   /* Run the below code for Cortex-M4 and Cortex-M3 */
 169
 170   /* Loop over the blockSize. Unroll by a factor of 4.
 171    * Compute 4 Multiplications at a time. */
 172   blkCnt = blockSize >> 2u;
 173
 174   while(blkCnt > 0u)
 175   {
 176     /* Perform Multiplications and store in destination buffer */
 177     *pOut++ = *px++ * coeff;
 178     *pOut++ = *px++ * coeff;
 179     *pOut++ = *px++ * coeff;
 180     *pOut++ = *px++ * coeff;
 181
 182     /* Decrement the loop counter */
 183     blkCnt--;
 184   }
 185
 186   /* If the blockSize is not a multiple of 4,
 187    * compute the remaining samples */
 188   blkCnt = blockSize % 0x4u;
 189
 190   while(blkCnt > 0u)
 191   {
 192     /* Perform Multiplications and store in destination buffer */
 193     *pOut++ = *px++ * coeff;
 194
 195     /* Decrement the loop counter */
 196     blkCnt--;
 197   }
 198
 199   /* Load the coefficient value and
 200    * increment the coefficient buffer for the next set of state values */
 201   coeff = *pCoeffs++;
 202
 203   /* Read Index, from where the state buffer should be read, is calculated. */
 204   readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
 205
 206   /* Wraparound of readIndex */
 207   if(readIndex < 0)
 208   {
 209     readIndex += (int32_t) delaySize;
 210   }
 211
 212   /* Loop over the number of taps. */
 213   tapCnt = (uint32_t) numTaps - 1u;
 214
 215   while(tapCnt > 0u)
 216   {
 217
 218     /* Working pointer for state buffer is updated */
 219     py = pState;
 220
 221     /* blockSize samples are read from the state buffer */
 222     arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
 223                          (int32_t *) pb, (int32_t *) pb, blockSize, 1,
 224                          blockSize);
 225
 226     /* Working pointer for the scratch buffer */
 227     px = pb;
 228
 229     /* Working pointer for destination buffer */
 230     pOut = pDst;
 231
 232     /* Loop over the blockSize. Unroll by a factor of 4.
 233      * Compute 4 MACS at a time. */
 234     blkCnt = blockSize >> 2u;
 235
 236     while(blkCnt > 0u)
 237     {
 238       /* Perform Multiply-Accumulate */
 239       *pOut++ += *px++ * coeff;
 240       *pOut++ += *px++ * coeff;
 241       *pOut++ += *px++ * coeff;
 242       *pOut++ += *px++ * coeff;
 243
 244       /* Decrement the loop counter */
 245       blkCnt--;
 246     }
 247
 248     /* If the blockSize is not a multiple of 4,
 249      * compute the remaining samples */
 250     blkCnt = blockSize % 0x4u;
 251
 252     while(blkCnt > 0u)
 253     {
 254       /* Perform Multiply-Accumulate */
 255       *pOut++ += *px++ * coeff;
 256
 257       /* Decrement the loop counter */
 258       blkCnt--;
 259     }
 260
 261     /* Load the coefficient value and
 262      * increment the coefficient buffer for the next set of state values */
 263     coeff = *pCoeffs++;
 264
 265     /* Read Index, from where the state buffer should be read, is calculated. */
 266     readIndex = ((int32_t) S->stateIndex -
 267                  (int32_t) blockSize) - *pTapDelay++;
 268
 269     /* Wraparound of readIndex */
 270     if(readIndex < 0)
 271     {
 272       readIndex += (int32_t) delaySize;
 273     }
 274
 275     /* Decrement the tap loop counter */
 276     tapCnt--;
 277   }
 278
 279 #else
 280
 281 /* Run the below code for Cortex-M0 */
 282
 283   blkCnt = blockSize;
 284
 285   while(blkCnt > 0u)
 286   {
 287     /* Perform Multiplications and store in destination buffer */
 288     *pOut++ = *px++ * coeff;
 289
 290     /* Decrement the loop counter */
 291     blkCnt--;
 292   }
 293
 294   /* Load the coefficient value and
 295    * increment the coefficient buffer for the next set of state values */
 296   coeff = *pCoeffs++;
 297
 298   /* Read Index, from where the state buffer should be read, is calculated. */
 299   readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
 300
 301   /* Wraparound of readIndex */
 302   if(readIndex < 0)
 303   {
 304     readIndex += (int32_t) delaySize;
 305   }
 306
 307   /* Loop over the number of taps. */
 308   tapCnt = (uint32_t) numTaps - 1u;
 309
 310   while(tapCnt > 0u)
 311   {
 312
 313     /* Working pointer for state buffer is updated */
 314     py = pState;
 315
 316     /* blockSize samples are read from the state buffer */
 317     arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
 318                          (int32_t *) pb, (int32_t *) pb, blockSize, 1,
 319                          blockSize);
 320
 321     /* Working pointer for the scratch buffer */
 322     px = pb;
 323
 324     /* Working pointer for destination buffer */
 325     pOut = pDst;
 326
 327     blkCnt = blockSize;
 328
 329     while(blkCnt > 0u)
 330     {
 331       /* Perform Multiply-Accumulate */
 332       *pOut++ += *px++ * coeff;
 333
 334       /* Decrement the loop counter */
 335       blkCnt--;
 336     }
 337
 338     /* Load the coefficient value and
 339      * increment the coefficient buffer for the next set of state values */
 340     coeff = *pCoeffs++;
 341
 342     /* Read Index, from where the state buffer should be read, is calculated. */
 343     readIndex =
 344       ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
 345
 346     /* Wraparound of readIndex */
 347     if(readIndex < 0)
 348     {
 349       readIndex += (int32_t) delaySize;
 350     }
 351
 352     /* Decrement the tap loop counter */
 353     tapCnt--;
 354   }
 355
 356 #endif /*   #ifndef ARM_MATH_CM0        */
 357
 358 }
 359
 360 /**
 361  * @} end of FIR_Sparse group
 362  */