git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_lms_norm_q31.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_lms_norm_q31.c
   9 *
  10 * Description:  Processing function for the Q31 NLMS filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated
  28 *
  29 * Version 0.0.7  2010/06/10
  30 *    Misra-C changes done
  31 * -------------------------------------------------------------------- */
  32
  33 #include "arm_math.h"
  34
  35 /**
  36  * @ingroup groupFilters
  37  */
  38
  39 /**
  40  * @addtogroup LMS_NORM
  41  * @{
  42  */
  43
  44 /**
  45 * @brief Processing function for Q31 normalized LMS filter.
  46 * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
  47 * @param[in] *pSrc points to the block of input data.
  48 * @param[in] *pRef points to the block of reference data.
  49 * @param[out] *pOut points to the block of output data.
  50 * @param[out] *pErr points to the block of error data.
  51 * @param[in] blockSize number of samples to process.
  52 * @return none.
  53 *
  54 * <b>Scaling and Overflow Behavior:</b>
  55 * \par
  56 * The function is implemented using an internal 64-bit accumulator.
  57 * The accumulator has a 2.62 format and maintains full precision of the intermediate
  58 * multiplication results but provides only a single guard bit.
  59 * Thus, if the accumulator result overflows it wraps around rather than clip.
  60 * In order to avoid overflows completely the input signal must be scaled down by
  61 * log2(numTaps) bits. The reference signal should not be scaled down.
  62 * After all multiply-accumulates are performed, the 2.62 accumulator is shifted
  63 * and saturated to 1.31 format to yield the final result.
  64 * The output signal and error signal are in 1.31 format.
  65 *
  66 * \par
  67 *       In this filter, filter coefficients are updated for each sample and the
  68 * updation of filter cofficients are saturted.
  69 *
  70 */
  71
  72 void arm_lms_norm_q31(
  73   arm_lms_norm_instance_q31 * S,
  74   q31_t * pSrc,
  75   q31_t * pRef,
  76   q31_t * pOut,
  77   q31_t * pErr,
  78   uint32_t blockSize)
  79 {
  80   q31_t *pState = S->pState;                     /* State pointer */
  81   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  82   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
  83   q31_t *px, *pb;                                /* Temporary pointers for state and coefficient buffers */
  84   q31_t mu = S->mu;                              /* Adaptive factor */
  85   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  86   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  87   q63_t energy;                                  /* Energy of the input */
  88   q63_t acc;                                     /* Accumulator */
  89   q31_t e = 0, d = 0;                            /* error, reference data sample */
  90   q31_t w = 0, in;                               /* weight factor and state */
  91   q31_t x0;                                      /* temporary variable to hold input sample */
  92   uint32_t shift = 32u - ((uint32_t) S->postShift + 1u);        /* Shift to be applied to the output */
  93   q31_t errorXmu, oneByEnergy;                   /* Temporary variables to store error and mu product and reciprocal of energy */
  94   q31_t postShift;                               /* Post shift to be applied to weight after reciprocal calculation */
  95   q31_t coef;                                    /* Temporary variable for coef */
  96
  97   energy = S->energy;
  98   x0 = S->x0;
  99
 100   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
 101   /* pStateCurnt points to the location where the new input data should be written */
 102   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 103
 104   /* Loop over blockSize number of values */
 105   blkCnt = blockSize;
 106
 107
 108 #ifndef ARM_MATH_CM0
 109
 110   /* Run the below code for Cortex-M4 and Cortex-M3 */
 111
 112   while(blkCnt > 0u)
 113   {
 114
 115     /* Copy the new input sample into the state buffer */
 116     *pStateCurnt++ = *pSrc;
 117
 118     /* Initialize pState pointer */
 119     px = pState;
 120
 121     /* Initialize coeff pointer */
 122     pb = (pCoeffs);
 123
 124     /* Read the sample from input buffer */
 125     in = *pSrc++;
 126
 127     /* Update the energy calculation */
 128     energy = (q31_t) ((((q63_t) energy << 32) -
 129                        (((q63_t) x0 * x0) << 1)) >> 32);
 130     energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32);
 131
 132     /* Set the accumulator to zero */
 133     acc = 0;
 134
 135     /* Loop unrolling.  Process 4 taps at a time. */
 136     tapCnt = numTaps >> 2;
 137
 138     while(tapCnt > 0u)
 139     {
 140       /* Perform the multiply-accumulate */
 141       acc += ((q63_t) (*px++)) * (*pb++);
 142       acc += ((q63_t) (*px++)) * (*pb++);
 143       acc += ((q63_t) (*px++)) * (*pb++);
 144       acc += ((q63_t) (*px++)) * (*pb++);
 145
 146       /* Decrement the loop counter */
 147       tapCnt--;
 148     }
 149
 150     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 151     tapCnt = numTaps % 0x4u;
 152
 153     while(tapCnt > 0u)
 154     {
 155       /* Perform the multiply-accumulate */
 156       acc += ((q63_t) (*px++)) * (*pb++);
 157
 158       /* Decrement the loop counter */
 159       tapCnt--;
 160     }
 161
 162     /* Converting the result to 1.31 format */
 163     acc = (q31_t) (acc >> shift);
 164
 165     /* Store the result from accumulator into the destination buffer. */
 166     *pOut++ = (q31_t) acc;
 167
 168     /* Compute and store error */
 169     d = *pRef++;
 170     e = d - (q31_t) acc;
 171     *pErr++ = e;
 172
 173     /* Calculates the reciprocal of energy */
 174     postShift = arm_recip_q31(energy + DELTA_Q31,
 175                               &oneByEnergy, &S->recipTable[0]);
 176
 177     /* Calculation of product of (e * mu) */
 178     errorXmu = (q31_t) (((q63_t) e * mu) >> 31);
 179
 180     /* Weighting factor for the normalized version */
 181     w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift));
 182
 183     /* Initialize pState pointer */
 184     px = pState;
 185
 186     /* Initialize coeff pointer */
 187     pb = (pCoeffs);
 188
 189     /* Loop unrolling.  Process 4 taps at a time. */
 190     tapCnt = numTaps >> 2;
 191
 192     /* Update filter coefficients */
 193     while(tapCnt > 0u)
 194     {
 195       /* Perform the multiply-accumulate */
 196
 197       /* coef is in 2.30 format */
 198       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 199       /* get coef in 1.31 format by left shifting */
 200       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 201       /* update coefficient buffer to next coefficient */
 202       pb++;
 203
 204       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 205       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 206       pb++;
 207
 208       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 209       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 210       pb++;
 211
 212       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 213       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 214       pb++;
 215
 216       /* Decrement the loop counter */
 217       tapCnt--;
 218     }
 219
 220     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 221     tapCnt = numTaps % 0x4u;
 222
 223     while(tapCnt > 0u)
 224     {
 225       /* Perform the multiply-accumulate */
 226       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 227       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 228       pb++;
 229
 230       /* Decrement the loop counter */
 231       tapCnt--;
 232     }
 233
 234     /* Read the sample from state buffer */
 235     x0 = *pState;
 236
 237     /* Advance state pointer by 1 for the next sample */
 238     pState = pState + 1;
 239
 240     /* Decrement the loop counter */
 241     blkCnt--;
 242   }
 243
 244   /* Save energy and x0 values for the next frame */
 245   S->energy = (q31_t) energy;
 246   S->x0 = x0;
 247
 248   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 249      satrt of the state buffer. This prepares the state buffer for the
 250      next function call. */
 251
 252   /* Points to the start of the pState buffer */
 253   pStateCurnt = S->pState;
 254
 255   /* Loop unrolling for (numTaps - 1u) samples copy */
 256   tapCnt = (numTaps - 1u) >> 2u;
 257
 258   /* copy data */
 259   while(tapCnt > 0u)
 260   {
 261     *pStateCurnt++ = *pState++;
 262     *pStateCurnt++ = *pState++;
 263     *pStateCurnt++ = *pState++;
 264     *pStateCurnt++ = *pState++;
 265
 266     /* Decrement the loop counter */
 267     tapCnt--;
 268   }
 269
 270   /* Calculate remaining number of copies */
 271   tapCnt = (numTaps - 1u) % 0x4u;
 272
 273   /* Copy the remaining q31_t data */
 274   while(tapCnt > 0u)
 275   {
 276     *pStateCurnt++ = *pState++;
 277
 278     /* Decrement the loop counter */
 279     tapCnt--;
 280   }
 281
 282 #else
 283
 284   /* Run the below code for Cortex-M0 */
 285
 286   while(blkCnt > 0u)
 287   {
 288
 289     /* Copy the new input sample into the state buffer */
 290     *pStateCurnt++ = *pSrc;
 291
 292     /* Initialize pState pointer */
 293     px = pState;
 294
 295     /* Initialize pCoeffs pointer */
 296     pb = pCoeffs;
 297
 298     /* Read the sample from input buffer */
 299     in = *pSrc++;
 300
 301     /* Update the energy calculation */
 302     energy =
 303       (q31_t) ((((q63_t) energy << 32) - (((q63_t) x0 * x0) << 1)) >> 32);
 304     energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32);
 305
 306     /* Set the accumulator to zero */
 307     acc = 0;
 308
 309     /* Loop over numTaps number of values */
 310     tapCnt = numTaps;
 311
 312     while(tapCnt > 0u)
 313     {
 314       /* Perform the multiply-accumulate */
 315       acc += ((q63_t) (*px++)) * (*pb++);
 316
 317       /* Decrement the loop counter */
 318       tapCnt--;
 319     }
 320
 321     /* Converting the result to 1.31 format */
 322     acc = (q31_t) (acc >> shift);
 323
 324     /* Store the result from accumulator into the destination buffer. */
 325     *pOut++ = (q31_t) acc;
 326
 327     /* Compute and store error */
 328     d = *pRef++;
 329     e = d - (q31_t) acc;
 330     *pErr++ = e;
 331
 332     /* Calculates the reciprocal of energy */
 333     postShift =
 334       arm_recip_q31(energy + DELTA_Q31, &oneByEnergy, &S->recipTable[0]);
 335
 336     /* Calculation of product of (e * mu) */
 337     errorXmu = (q31_t) (((q63_t) e * mu) >> 31);
 338
 339     /* Weighting factor for the normalized version */
 340     w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift));
 341
 342     /* Initialize pState pointer */
 343     px = pState;
 344
 345     /* Initialize coeff pointer */
 346     pb = (pCoeffs);
 347
 348     /* Loop over numTaps number of values */
 349     tapCnt = numTaps;
 350
 351     while(tapCnt > 0u)
 352     {
 353       /* Perform the multiply-accumulate */
 354       /* coef is in 2.30 format */
 355       coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
 356       /* get coef in 1.31 format by left shifting */
 357       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 358       /* update coefficient buffer to next coefficient */
 359       pb++;
 360
 361       /* Decrement the loop counter */
 362       tapCnt--;
 363     }
 364
 365     /* Read the sample from state buffer */
 366     x0 = *pState;
 367
 368     /* Advance state pointer by 1 for the next sample */
 369     pState = pState + 1;
 370
 371     /* Decrement the loop counter */
 372     blkCnt--;
 373   }
 374
 375   /* Save energy and x0 values for the next frame */
 376   S->energy = (q31_t) energy;
 377   S->x0 = x0;
 378
 379   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 380      start of the state buffer. This prepares the state buffer for the
 381      next function call. */
 382
 383   /* Points to the start of the pState buffer */
 384   pStateCurnt = S->pState;
 385
 386   /* Loop for (numTaps - 1u) samples copy */
 387   tapCnt = (numTaps - 1u);
 388
 389   /* Copy the remaining q31_t data */
 390   while(tapCnt > 0u)
 391   {
 392     *pStateCurnt++ = *pState++;
 393
 394     /* Decrement the loop counter */
 395     tapCnt--;
 396   }
 397
 398 #endif /*   #ifndef ARM_MATH_CM0 */
 399
 400 }
 401
 402 /**
 403  * @} end of LMS_NORM group
 404  */