git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_lms_norm_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_lms_norm_q15.c
   9 *
  10 * Description:  Q15 NLMS filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated
  28 *
  29 * Version 0.0.7  2010/06/10
  30 *    Misra-C changes done
  31 * -------------------------------------------------------------------- */
  32
  33 #include "arm_math.h"
  34
  35 /**
  36  * @ingroup groupFilters
  37  */
  38
  39 /**
  40  * @addtogroup LMS_NORM
  41  * @{
  42  */
  43
  44 /**
  45 * @brief Processing function for Q15 normalized LMS filter.
  46 * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
  47 * @param[in] *pSrc points to the block of input data.
  48 * @param[in] *pRef points to the block of reference data.
  49 * @param[out] *pOut points to the block of output data.
  50 * @param[out] *pErr points to the block of error data.
  51 * @param[in] blockSize number of samples to process.
  52 * @return none.
  53 *
  54 * <b>Scaling and Overflow Behavior:</b>
  55 * \par
  56 * The function is implemented using a 64-bit internal accumulator.
  57 * Both coefficients and state variables are represented in 1.15 format and
  58 * multiplications yield a 2.30 result. The 2.30 intermediate results are
  59 * accumulated in a 64-bit accumulator in 34.30 format.
  60 * There is no risk of internal overflow with this approach and the full
  61 * precision of intermediate multiplications is preserved. After all additions
  62 * have been performed, the accumulator is truncated to 34.15 format by
  63 * discarding low 15 bits. Lastly, the accumulator is saturated to yield a
  64 * result in 1.15 format.
  65 *
  66 * \par
  67 *       In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
  68 *
  69  */
  70
  71 void arm_lms_norm_q15(
  72   arm_lms_norm_instance_q15 * S,
  73   q15_t * pSrc,
  74   q15_t * pRef,
  75   q15_t * pOut,
  76   q15_t * pErr,
  77   uint32_t blockSize)
  78 {
  79   q15_t *pState = S->pState;                     /* State pointer */
  80   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  81   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
  82   q15_t *px, *pb;                                /* Temporary pointers for state and coefficient buffers */
  83   q15_t mu = S->mu;                              /* Adaptive factor */
  84   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  85   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  86   q31_t energy;                                  /* Energy of the input */
  87   q63_t acc;                                     /* Accumulator */
  88   q15_t e = 0, d = 0;                            /* error, reference data sample */
  89   q15_t w = 0, in;                               /* weight factor and state */
  90   q15_t x0;                                      /* temporary variable to hold input sample */
  91   uint32_t shift = (uint32_t) S->postShift + 1u; /* Shift to be applied to the output */
  92   q15_t errorXmu, oneByEnergy;                   /* Temporary variables to store error and mu product and reciprocal of energy */
  93   q15_t postShift;                               /* Post shift to be applied to weight after reciprocal calculation */
  94   q31_t coef;                                    /* Teporary variable for coefficient */
  95
  96   energy = S->energy;
  97   x0 = S->x0;
  98
  99   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
 100   /* pStateCurnt points to the location where the new input data should be written */
 101   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 102
 103   /* Loop over blockSize number of values */
 104   blkCnt = blockSize;
 105
 106
 107 #ifndef ARM_MATH_CM0
 108
 109   /* Run the below code for Cortex-M4 and Cortex-M3 */
 110
 111   while(blkCnt > 0u)
 112   {
 113     /* Copy the new input sample into the state buffer */
 114     *pStateCurnt++ = *pSrc;
 115
 116     /* Initialize pState pointer */
 117     px = pState;
 118
 119     /* Initialize coeff pointer */
 120     pb = (pCoeffs);
 121
 122     /* Read the sample from input buffer */
 123     in = *pSrc++;
 124
 125     /* Update the energy calculation */
 126     energy -= (((q31_t) x0 * (x0)) >> 15);
 127     energy += (((q31_t) in * (in)) >> 15);
 128
 129     /* Set the accumulator to zero */
 130     acc = 0;
 131
 132     /* Loop unrolling.  Process 4 taps at a time. */
 133     tapCnt = numTaps >> 2;
 134
 135     while(tapCnt > 0u)
 136     {
 137
 138       /* Perform the multiply-accumulate */
 139       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
 140       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
 141
 142       /* Decrement the loop counter */
 143       tapCnt--;
 144     }
 145
 146     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 147     tapCnt = numTaps % 0x4u;
 148
 149     while(tapCnt > 0u)
 150     {
 151       /* Perform the multiply-accumulate */
 152       acc += (((q31_t) * px++ * (*pb++)));
 153
 154       /* Decrement the loop counter */
 155       tapCnt--;
 156     }
 157
 158     /* Converting the result to 1.15 format */
 159     acc = __SSAT((acc >> (16u - shift)), 16u);
 160
 161     /* Store the result from accumulator into the destination buffer. */
 162     *pOut++ = (q15_t) acc;
 163
 164     /* Compute and store error */
 165     d = *pRef++;
 166     e = d - (q15_t) acc;
 167     *pErr++ = e;
 168
 169     /* Calculation of 1/energy */
 170     postShift = arm_recip_q15((q15_t) energy + DELTA_Q15,
 171                               &oneByEnergy, S->recipTable);
 172
 173     /* Calculation of e * mu value */
 174     errorXmu = (q15_t) (((q31_t) e * mu) >> 15);
 175
 176     /* Calculation of (e * mu) * (1/energy) value */
 177     acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift));
 178
 179     /* Weighting factor for the normalized version */
 180     w = (q15_t) __SSAT((q31_t) acc, 16);
 181
 182     /* Initialize pState pointer */
 183     px = pState;
 184
 185     /* Initialize coeff pointer */
 186     pb = (pCoeffs);
 187
 188     /* Loop unrolling.  Process 4 taps at a time. */
 189     tapCnt = numTaps >> 2;
 190
 191     /* Update filter coefficients */
 192     while(tapCnt > 0u)
 193     {
 194       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 195       *pb++ = (q15_t) __SSAT((coef), 16);
 196       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 197       *pb++ = (q15_t) __SSAT((coef), 16);
 198       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 199       *pb++ = (q15_t) __SSAT((coef), 16);
 200       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 201       *pb++ = (q15_t) __SSAT((coef), 16);
 202
 203       /* Decrement the loop counter */
 204       tapCnt--;
 205     }
 206
 207     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 208     tapCnt = numTaps % 0x4u;
 209
 210     while(tapCnt > 0u)
 211     {
 212       /* Perform the multiply-accumulate */
 213       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 214       *pb++ = (q15_t) __SSAT((coef), 16);
 215
 216       /* Decrement the loop counter */
 217       tapCnt--;
 218     }
 219
 220     /* Read the sample from state buffer */
 221     x0 = *pState;
 222
 223     /* Advance state pointer by 1 for the next sample */
 224     pState = pState + 1u;
 225
 226     /* Decrement the loop counter */
 227     blkCnt--;
 228   }
 229
 230   /* Save energy and x0 values for the next frame */
 231   S->energy = (q15_t) energy;
 232   S->x0 = x0;
 233
 234   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 235      satrt of the state buffer. This prepares the state buffer for the
 236      next function call. */
 237
 238   /* Points to the start of the pState buffer */
 239   pStateCurnt = S->pState;
 240
 241   /* Calculation of count for copying integer writes */
 242   tapCnt = (numTaps - 1u) >> 2;
 243
 244   while(tapCnt > 0u)
 245   {
 246
 247     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 248     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 249
 250     tapCnt--;
 251
 252   }
 253
 254   /* Calculation of count for remaining q15_t data */
 255   tapCnt = (numTaps - 1u) % 0x4u;
 256
 257   /* copy data */
 258   while(tapCnt > 0u)
 259   {
 260     *pStateCurnt++ = *pState++;
 261
 262     /* Decrement the loop counter */
 263     tapCnt--;
 264   }
 265
 266 #else
 267
 268   /* Run the below code for Cortex-M0 */
 269
 270   while(blkCnt > 0u)
 271   {
 272     /* Copy the new input sample into the state buffer */
 273     *pStateCurnt++ = *pSrc;
 274
 275     /* Initialize pState pointer */
 276     px = pState;
 277
 278     /* Initialize pCoeffs pointer */
 279     pb = pCoeffs;
 280
 281     /* Read the sample from input buffer */
 282     in = *pSrc++;
 283
 284     /* Update the energy calculation */
 285     energy -= (((q31_t) x0 * (x0)) >> 15);
 286     energy += (((q31_t) in * (in)) >> 15);
 287
 288     /* Set the accumulator to zero */
 289     acc = 0;
 290
 291     /* Loop over numTaps number of values */
 292     tapCnt = numTaps;
 293
 294     while(tapCnt > 0u)
 295     {
 296       /* Perform the multiply-accumulate */
 297       acc += (((q31_t) * px++ * (*pb++)));
 298
 299       /* Decrement the loop counter */
 300       tapCnt--;
 301     }
 302
 303     /* Converting the result to 1.15 format */
 304     acc = __SSAT((acc >> (16u - shift)), 16u);
 305
 306     /* Store the result from accumulator into the destination buffer. */
 307     *pOut++ = (q15_t) acc;
 308
 309     /* Compute and store error */
 310     d = *pRef++;
 311     e = d - (q15_t) acc;
 312     *pErr++ = e;
 313
 314     /* Calculation of 1/energy */
 315     postShift = arm_recip_q15((q15_t) energy + DELTA_Q15,
 316                               &oneByEnergy, S->recipTable);
 317
 318     /* Calculation of e * mu value */
 319     errorXmu = (q15_t) (((q31_t) e * mu) >> 15);
 320
 321     /* Calculation of (e * mu) * (1/energy) value */
 322     acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift));
 323
 324     /* Weighting factor for the normalized version */
 325     w = (q15_t) __SSAT((q31_t) acc, 16);
 326
 327     /* Initialize pState pointer */
 328     px = pState;
 329
 330     /* Initialize coeff pointer */
 331     pb = (pCoeffs);
 332
 333     /* Loop over numTaps number of values */
 334     tapCnt = numTaps;
 335
 336     while(tapCnt > 0u)
 337     {
 338       /* Perform the multiply-accumulate */
 339       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 340       *pb++ = (q15_t) __SSAT((coef), 16);
 341
 342       /* Decrement the loop counter */
 343       tapCnt--;
 344     }
 345
 346     /* Read the sample from state buffer */
 347     x0 = *pState;
 348
 349     /* Advance state pointer by 1 for the next sample */
 350     pState = pState + 1u;
 351
 352     /* Decrement the loop counter */
 353     blkCnt--;
 354   }
 355
 356   /* Save energy and x0 values for the next frame */
 357   S->energy = (q15_t) energy;
 358   S->x0 = x0;
 359
 360   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 361      satrt of the state buffer. This prepares the state buffer for the
 362      next function call. */
 363
 364   /* Points to the start of the pState buffer */
 365   pStateCurnt = S->pState;
 366
 367   /* copy (numTaps - 1u) data */
 368   tapCnt = (numTaps - 1u);
 369
 370   /* copy data */
 371   while(tapCnt > 0u)
 372   {
 373     *pStateCurnt++ = *pState++;
 374
 375     /* Decrement the loop counter */
 376     tapCnt--;
 377   }
 378
 379 #endif /*   #ifndef ARM_MATH_CM0 */
 380
 381 }
 382
 383
 384 /**
 385    * @} end of LMS_NORM group
 386    */