git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_fir_decimate_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_fir_decimate_q15.c
   9 *
  10 * Description:  Q15 FIR Decimator.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated
  28 *
  29 * Version 0.0.7  2010/06/10
  30 *    Misra-C changes done
  31 * -------------------------------------------------------------------- */
  32
  33 #include "arm_math.h"
  34
  35 /**
  36  * @ingroup groupFilters
  37  */
  38
  39 /**
  40  * @addtogroup FIR_decimate
  41  * @{
  42  */
  43
  44 /**
  45  * @brief Processing function for the Q15 FIR decimator.
  46  * @param[in] *S points to an instance of the Q15 FIR decimator structure.
  47  * @param[in] *pSrc points to the block of input data.
  48  * @param[out] *pDst points to the location where the output result is written.
  49  * @param[in] blockSize number of input samples to process per call.
  50  * @return none.
  51  *
  52  * <b>Scaling and Overflow Behavior:</b>
  53  * \par
  54  * The function is implemented using a 64-bit internal accumulator.
  55  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  56  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  57  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  58  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
  59  * Lastly, the accumulator is saturated to yield a result in 1.15 format.
  60  *
  61  * \par
  62  * Refer to the function <code>arm_fir_decimate_fast_q15()</code> for a faster but less precise implementation of this function for Cortex-M3 and Cortex-M4.
  63  */
  64
  65 void arm_fir_decimate_q15(
  66   const arm_fir_decimate_instance_q15 * S,
  67   q15_t * pSrc,
  68   q15_t * pDst,
  69   uint32_t blockSize)
  70 {
  71   q15_t *pState = S->pState;                     /* State pointer */
  72   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  73   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
  74   q15_t *px;                                     /* Temporary pointer for state buffer */
  75   q15_t *pb;                                     /* Temporary pointer coefficient buffer */
  76   q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */
  77   q63_t sum0;                                    /* Accumulators */
  78   uint32_t numTaps = S->numTaps;                 /* Number of taps */
  79   uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;  /* Loop counters */
  80
  81 #ifndef ARM_MATH_CM0
  82
  83   /* Run the below code for Cortex-M4 and Cortex-M3 */
  84
  85   /* S->pState buffer contains previous frame (numTaps - 1) samples */
  86   /* pStateCurnt points to the location where the new input data should be written */
  87   pStateCurnt = S->pState + (numTaps - 1u);
  88
  89   /* Total number of output samples to be computed */
  90   blkCnt = outBlockSize;
  91
  92   while(blkCnt > 0u)
  93   {
  94     /* Copy decimation factor number of new input samples into the state buffer */
  95     i = S->M;
  96
  97     do
  98     {
  99       *pStateCurnt++ = *pSrc++;
 100
 101     } while(--i);
 102
 103     /*Set sum to zero */
 104     sum0 = 0;
 105
 106     /* Initialize state pointer */
 107     px = pState;
 108
 109     /* Initialize coeff pointer */
 110     pb = pCoeffs;
 111
 112     /* Loop unrolling.  Process 4 taps at a time. */
 113     tapCnt = numTaps >> 2;
 114
 115     /* Loop over the number of taps.  Unroll by a factor of 4.
 116      ** Repeat until we've computed numTaps-4 coefficients. */
 117     while(tapCnt > 0u)
 118     {
 119       /* Read the Read b[numTaps-1] and b[numTaps-2]  coefficients */
 120       c0 = *__SIMD32(pb)++;
 121
 122       /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
 123       x0 = *__SIMD32(px)++;
 124
 125       /* Perform the multiply-accumulate */
 126       sum0 = __SMLALD(x0, c0, sum0);
 127
 128       /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
 129       c0 = *__SIMD32(pb)++;
 130
 131       /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
 132       x0 = *__SIMD32(px)++;
 133
 134       /* Perform the multiply-accumulate */
 135       sum0 = __SMLALD(x0, c0, sum0);
 136
 137       /* Decrement the loop counter */
 138       tapCnt--;
 139     }
 140
 141     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 142     tapCnt = numTaps % 0x4u;
 143
 144     while(tapCnt > 0u)
 145     {
 146       /* Read coefficients */
 147       c0 = *pb++;
 148
 149       /* Fetch 1 state variable */
 150       x0 = *px++;
 151
 152       /* Perform the multiply-accumulate */
 153       sum0 = __SMLALD(x0, c0, sum0);
 154
 155       /* Decrement the loop counter */
 156       tapCnt--;
 157     }
 158
 159     /* Advance the state pointer by the decimation factor
 160      * to process the next group of decimation factor number samples */
 161     pState = pState + S->M;
 162
 163     /* Store filter output, smlad returns the values in 2.14 format */
 164     /* so downsacle by 15 to get output in 1.15 */
 165     *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
 166
 167     /* Decrement the loop counter */
 168     blkCnt--;
 169   }
 170
 171   /* Processing is complete.
 172    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
 173    ** This prepares the state buffer for the next function call. */
 174
 175   /* Points to the start of the state buffer */
 176   pStateCurnt = S->pState;
 177
 178   i = (numTaps - 1u) >> 2u;
 179
 180   /* copy data */
 181   while(i > 0u)
 182   {
 183     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 184     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 185
 186     /* Decrement the loop counter */
 187     i--;
 188   }
 189
 190   i = (numTaps - 1u) % 0x04u;
 191
 192   /* copy data */
 193   while(i > 0u)
 194   {
 195     *pStateCurnt++ = *pState++;
 196
 197     /* Decrement the loop counter */
 198     i--;
 199   }
 200
 201 #else
 202
 203 /* Run the below code for Cortex-M0 */
 204
 205   /* S->pState buffer contains previous frame (numTaps - 1) samples */
 206   /* pStateCurnt points to the location where the new input data should be written */
 207   pStateCurnt = S->pState + (numTaps - 1u);
 208
 209   /* Total number of output samples to be computed */
 210   blkCnt = outBlockSize;
 211
 212   while(blkCnt > 0u)
 213   {
 214     /* Copy decimation factor number of new input samples into the state buffer */
 215     i = S->M;
 216
 217     do
 218     {
 219       *pStateCurnt++ = *pSrc++;
 220
 221     } while(--i);
 222
 223     /*Set sum to zero */
 224     sum0 = 0;
 225
 226     /* Initialize state pointer */
 227     px = pState;
 228
 229     /* Initialize coeff pointer */
 230     pb = pCoeffs;
 231
 232     tapCnt = numTaps;
 233
 234     while(tapCnt > 0u)
 235     {
 236       /* Read coefficients */
 237       c0 = *pb++;
 238
 239       /* Fetch 1 state variable */
 240       x0 = *px++;
 241
 242       /* Perform the multiply-accumulate */
 243       sum0 += (q31_t) x0 *c0;
 244
 245       /* Decrement the loop counter */
 246       tapCnt--;
 247     }
 248
 249     /* Advance the state pointer by the decimation factor
 250      * to process the next group of decimation factor number samples */
 251     pState = pState + S->M;
 252
 253     /*Store filter output , smlad will return the values in 2.14 format */
 254     /* so downsacle by 15 to get output in 1.15 */
 255     *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
 256
 257     /* Decrement the loop counter */
 258     blkCnt--;
 259   }
 260
 261   /* Processing is complete.
 262    ** Now copy the last numTaps - 1 samples to the start of the state buffer.
 263    ** This prepares the state buffer for the next function call. */
 264
 265   /* Points to the start of the state buffer */
 266   pStateCurnt = S->pState;
 267
 268   i = numTaps - 1u;
 269
 270   /* copy data */
 271   while(i > 0u)
 272   {
 273     *pStateCurnt++ = *pState++;
 274
 275     /* Decrement the loop counter */
 276     i--;
 277   }
 278
 279 #endif /*   #ifndef ARM_MATH_CM0 */
 280
 281 }
 282
 283 /**
 284  * @} end of FIR_decimate group
 285  */