git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_iir_lattice_q31.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_iir_lattice_q31.c
   9 *
  10 * Description:  Q31 IIR lattice filter processing function.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated
  28 *
  29 * Version 0.0.7  2010/06/10
  30 *    Misra-C changes done
  31 * -------------------------------------------------------------------- */
  32
  33 #include "arm_math.h"
  34
  35 /**
  36  * @ingroup groupFilters
  37  */
  38
  39 /**
  40  * @addtogroup IIR_Lattice
  41  * @{
  42  */
  43
  44 /**
  45  * @brief Processing function for the Q31 IIR lattice filter.
  46  * @param[in] *S points to an instance of the Q31 IIR lattice structure.
  47  * @param[in] *pSrc points to the block of input data.
  48  * @param[out] *pDst points to the block of output data.
  49  * @param[in] blockSize number of samples to process.
  50  * @return none.
  51  *
  52  * @details
  53  * <b>Scaling and Overflow Behavior:</b>
  54  * \par
  55  * The function is implemented using an internal 64-bit accumulator.
  56  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
  57  * Thus, if the accumulator result overflows it wraps around rather than clip.
  58  * In order to avoid overflows completely the input signal must be scaled down by 2*log2(numStages) bits.
  59  * After all multiply-accumulates are performed, the 2.62 accumulator is saturated to 1.32 format and then truncated to 1.31 format.
  60  */
  61
  62 void arm_iir_lattice_q31(
  63   const arm_iir_lattice_instance_q31 * S,
  64   q31_t * pSrc,
  65   q31_t * pDst,
  66   uint32_t blockSize)
  67 {
  68   q31_t fcurr, fnext = 0, gcurr = 0, gnext;      /* Temporary variables for lattice stages */
  69   q63_t acc;                                     /* Accumlator */
  70   uint32_t blkCnt, tapCnt;                       /* Temporary variables for counts */
  71   q31_t *px1, *px2, *pk, *pv;                    /* Temporary pointers for state and coef */
  72   uint32_t numStages = S->numStages;             /* number of stages */
  73   q31_t *pState;                                 /* State pointer */
  74   q31_t *pStateCurnt;                            /* State current pointer */
  75
  76   blkCnt = blockSize;
  77
  78   pState = &S->pState[0];
  79
  80
  81 #ifndef ARM_MATH_CM0
  82
  83   /* Run the below code for Cortex-M4 and Cortex-M3 */
  84
  85   /* Sample processing */
  86   while(blkCnt > 0u)
  87   {
  88     /* Read Sample from input buffer */
  89     /* fN(n) = x(n) */
  90     fcurr = *pSrc++;
  91
  92     /* Initialize state read pointer */
  93     px1 = pState;
  94     /* Initialize state write pointer */
  95     px2 = pState;
  96     /* Set accumulator to zero */
  97     acc = 0;
  98     /* Initialize Ladder coeff pointer */
  99     pv = &S->pvCoeffs[0];
 100     /* Initialize Reflection coeff pointer */
 101     pk = &S->pkCoeffs[0];
 102
 103
 104     /* Process sample for first tap */
 105     gcurr = *px1++;
 106     /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
 107     fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk)) >> 31));
 108     /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
 109     gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
 110     /* write gN-1(n-1) into state for next sample processing */
 111     *px2++ = gnext;
 112     /* y(n) += gN(n) * vN  */
 113     acc += ((q63_t) gnext * *pv++);
 114
 115     /* Update f values for next coefficient processing */
 116     fcurr = fnext;
 117
 118     /* Loop unrolling.  Process 4 taps at a time. */
 119     tapCnt = (numStages - 1u) >> 2;
 120
 121     while(tapCnt > 0u)
 122     {
 123
 124       /* Process sample for 2nd, 6th .. taps */
 125       /* Read gN-2(n-1) from state buffer */
 126       gcurr = *px1++;
 127       /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
 128       fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk)) >> 31));
 129       /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
 130       gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
 131       /* y(n) += gN-1(n) * vN-1  */
 132       /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
 133       acc += ((q63_t) gnext * *pv++);
 134       /* write gN-1(n) into state for next sample processing */
 135       *px2++ = gnext;
 136
 137       /* Process sample for 3nd, 7th ...taps */
 138       /* Read gN-3(n-1) from state buffer */
 139       gcurr = *px1++;
 140       /* Process sample for 3rd, 7th .. taps */
 141       /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
 142       fcurr = __QSUB(fnext, (q31_t) (((q63_t) gcurr * (*pk)) >> 31));
 143       /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
 144       gnext = __QADD(gcurr, (q31_t) (((q63_t) fcurr * (*pk++)) >> 31));
 145       /* y(n) += gN-2(n) * vN-2  */
 146       /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
 147       acc += ((q63_t) gnext * *pv++);
 148       /* write gN-2(n) into state for next sample processing */
 149       *px2++ = gnext;
 150
 151
 152       /* Process sample for 4th, 8th ...taps */
 153       /* Read gN-4(n-1) from state buffer */
 154       gcurr = *px1++;
 155       /* Process sample for 4th, 8th .. taps */
 156       /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
 157       fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk)) >> 31));
 158       /* gN-3(n) = kN-3 * fN-4(n) + gN-4(n-1) */
 159       gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
 160       /* y(n) += gN-3(n) * vN-3  */
 161       /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
 162       acc += ((q63_t) gnext * *pv++);
 163       /* write gN-3(n) into state for next sample processing */
 164       *px2++ = gnext;
 165
 166
 167       /* Process sample for 5th, 9th ...taps */
 168       /* Read gN-5(n-1) from state buffer */
 169       gcurr = *px1++;
 170       /* Process sample for 5th, 9th .. taps */
 171       /* fN-5(n) = fN-4(n) - kN-4 * gN-1(n-1) */
 172       fcurr = __QSUB(fnext, (q31_t) (((q63_t) gcurr * (*pk)) >> 31));
 173       /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
 174       gnext = __QADD(gcurr, (q31_t) (((q63_t) fcurr * (*pk++)) >> 31));
 175       /* y(n) += gN-4(n) * vN-4  */
 176       /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
 177       acc += ((q63_t) gnext * *pv++);
 178       /* write gN-4(n) into state for next sample processing */
 179       *px2++ = gnext;
 180
 181       tapCnt--;
 182
 183     }
 184
 185     fnext = fcurr;
 186
 187     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 188     tapCnt = (numStages - 1u) % 0x4u;
 189
 190     while(tapCnt > 0u)
 191     {
 192       gcurr = *px1++;
 193       /* Process sample for last taps */
 194       fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk)) >> 31));
 195       gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31));
 196       /* Output samples for last taps */
 197       acc += ((q63_t) gnext * *pv++);
 198       *px2++ = gnext;
 199       fcurr = fnext;
 200
 201       tapCnt--;
 202
 203     }
 204
 205     /* y(n) += g0(n) * v0 */
 206     acc += (q63_t) fnext *(
 207   *pv++);
 208
 209     *px2++ = fnext;
 210
 211     /* write out into pDst */
 212     *pDst++ = (q31_t) (acc >> 31u);
 213
 214     /* Advance the state pointer by 4 to process the next group of 4 samples */
 215     pState = pState + 1u;
 216     blkCnt--;
 217
 218   }
 219
 220   /* Processing is complete. Now copy last S->numStages samples to start of the buffer
 221      for the preperation of next frame process */
 222
 223   /* Points to the start of the state buffer */
 224   pStateCurnt = &S->pState[0];
 225   pState = &S->pState[blockSize];
 226
 227   tapCnt = numStages >> 2u;
 228
 229   /* copy data */
 230   while(tapCnt > 0u)
 231   {
 232     *pStateCurnt++ = *pState++;
 233     *pStateCurnt++ = *pState++;
 234     *pStateCurnt++ = *pState++;
 235     *pStateCurnt++ = *pState++;
 236
 237     /* Decrement the loop counter */
 238     tapCnt--;
 239
 240   }
 241
 242   /* Calculate remaining number of copies */
 243   tapCnt = (numStages) % 0x4u;
 244
 245   /* Copy the remaining q31_t data */
 246   while(tapCnt > 0u)
 247   {
 248     *pStateCurnt++ = *pState++;
 249
 250     /* Decrement the loop counter */
 251     tapCnt--;
 252   };
 253
 254 #else
 255
 256   /* Run the below code for Cortex-M0 */
 257   /* Sample processing */
 258   while(blkCnt > 0u)
 259   {
 260     /* Read Sample from input buffer */
 261     /* fN(n) = x(n) */
 262     fcurr = *pSrc++;
 263
 264     /* Initialize state read pointer */
 265     px1 = pState;
 266     /* Initialize state write pointer */
 267     px2 = pState;
 268     /* Set accumulator to zero */
 269     acc = 0;
 270     /* Initialize Ladder coeff pointer */
 271     pv = &S->pvCoeffs[0];
 272     /* Initialize Reflection coeff pointer */
 273     pk = &S->pkCoeffs[0];
 274
 275     tapCnt = numStages;
 276
 277     while(tapCnt > 0u)
 278     {
 279       gcurr = *px1++;
 280       /* Process sample */
 281       /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
 282       fnext =
 283         clip_q63_to_q31(((q63_t) fcurr -
 284                          ((q31_t) (((q63_t) gcurr * (*pk)) >> 31))));
 285       /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
 286       gnext =
 287         clip_q63_to_q31(((q63_t) gcurr +
 288                          ((q31_t) (((q63_t) fnext * (*pk++)) >> 31))));
 289       /* Output samples */
 290       /* y(n) += gN(n) * vN  */
 291       acc += ((q63_t) gnext * *pv++);
 292       /* write gN-1(n-1) into state for next sample processing */
 293       *px2++ = gnext;
 294       /* Update f values for next coefficient processing */
 295       fcurr = fnext;
 296
 297       tapCnt--;
 298     }
 299
 300     /* y(n) += g0(n) * v0 */
 301     acc += (q63_t) fnext *(
 302   *pv++);
 303
 304     *px2++ = fnext;
 305
 306     /* write out into pDst */
 307     *pDst++ = (q31_t) (acc >> 31u);
 308
 309     /* Advance the state pointer by 1 to process the next group of samples */
 310     pState = pState + 1u;
 311     blkCnt--;
 312
 313   }
 314
 315   /* Processing is complete. Now copy last S->numStages samples to start of the buffer
 316      for the preperation of next frame process */
 317
 318   /* Points to the start of the state buffer */
 319   pStateCurnt = &S->pState[0];
 320   pState = &S->pState[blockSize];
 321
 322   tapCnt = numStages;
 323
 324   /* Copy the remaining q31_t data */
 325   while(tapCnt > 0u)
 326   {
 327     *pStateCurnt++ = *pState++;
 328
 329     /* Decrement the loop counter */
 330     tapCnt--;
 331   }
 332
 333 #endif /*   #ifndef ARM_MATH_CM0 */
 334
 335 }
 336
 337
 338
 339
 340 /**
 341  * @} end of IIR_Lattice group
 342  */