git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_lms_q31.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_lms_q31.c
   9 *
  10 * Description:  Processing function for the Q31 LMS filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated
  28 *
  29 * Version 0.0.7  2010/06/10
  30 *    Misra-C changes done
  31 * -------------------------------------------------------------------- */
  32
  33 #include "arm_math.h"
  34 /**
  35  * @ingroup groupFilters
  36  */
  37
  38 /**
  39  * @addtogroup LMS
  40  * @{
  41  */
  42
  43  /**
  44  * @brief Processing function for Q31 LMS filter.
  45  * @param[in]  *S points to an instance of the Q15 LMS filter structure.
  46  * @param[in]  *pSrc points to the block of input data.
  47  * @param[in]  *pRef points to the block of reference data.
  48  * @param[out] *pOut points to the block of output data.
  49  * @param[out] *pErr points to the block of error data.
  50  * @param[in]  blockSize number of samples to process.
  51  * @return     none.
  52  *
  53  * \par Scaling and Overflow Behavior:
  54  * The function is implemented using an internal 64-bit accumulator.
  55  * The accumulator has a 2.62 format and maintains full precision of the intermediate
  56  * multiplication results but provides only a single guard bit.
  57  * Thus, if the accumulator result overflows it wraps around rather than clips.
  58  * In order to avoid overflows completely the input signal must be scaled down by
  59  * log2(numTaps) bits.
  60  * The reference signal should not be scaled down.
  61  * After all multiply-accumulates are performed, the 2.62 accumulator is shifted
  62  * and saturated to 1.31 format to yield the final result.
  63  * The output signal and error signal are in 1.31 format.
  64  *
  65  * \par
  66  *      In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
  67  */
  68
  69 void arm_lms_q31(
  70   const arm_lms_instance_q31 * S,
  71   q31_t * pSrc,
  72   q31_t * pRef,
  73   q31_t * pOut,
  74   q31_t * pErr,
  75   uint32_t blockSize)
  76 {
  77   q31_t *pState = S->pState;                     /* State pointer */
  78   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  79   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  80   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
  81   q31_t mu = S->mu;                              /* Adaptive factor */
  82   q31_t *px;                                     /* Temporary pointer for state */
  83   q31_t *pb;                                     /* Temporary pointer for coefficient buffer */
  84   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  85   q63_t acc;                                     /* Accumulator */
  86   q31_t e = 0;                                   /* error of data sample */
  87   q31_t alpha;                                   /* Intermediate constant for taps update */
  88   uint8_t shift = (uint8_t) (32u - (S->postShift + 1u));        /* Shift to be applied to the output */
  89   q31_t coef;                                    /* Temporary variable for coef */
  90
  91   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  92   /* pStateCurnt points to the location where the new input data should be written */
  93   pStateCurnt = &(S->pState[(numTaps - 1u)]);
  94
  95   /* Initializing blkCnt with blockSize */
  96   blkCnt = blockSize;
  97
  98
  99 #ifndef ARM_MATH_CM0
 100
 101   /* Run the below code for Cortex-M4 and Cortex-M3 */
 102
 103   while(blkCnt > 0u)
 104   {
 105     /* Copy the new input sample into the state buffer */
 106     *pStateCurnt++ = *pSrc++;
 107
 108     /* Initialize state pointer */
 109     px = pState;
 110
 111     /* Initialize coefficient pointer */
 112     pb = pCoeffs;
 113
 114     /* Set the accumulator to zero */
 115     acc = 0;
 116
 117     /* Loop unrolling.  Process 4 taps at a time. */
 118     tapCnt = numTaps >> 2;
 119
 120     while(tapCnt > 0u)
 121     {
 122       /* Perform the multiply-accumulate */
 123       /* acc +=  b[N] * x[n-N] */
 124       acc += ((q63_t) (*px++)) * (*pb++);
 125
 126       /* acc +=  b[N-1] * x[n-N-1] */
 127       acc += ((q63_t) (*px++)) * (*pb++);
 128
 129       /* acc +=  b[N-2] * x[n-N-2] */
 130       acc += ((q63_t) (*px++)) * (*pb++);
 131
 132       /* acc +=  b[N-3] * x[n-N-3] */
 133       acc += ((q63_t) (*px++)) * (*pb++);
 134
 135       /* Decrement the loop counter */
 136       tapCnt--;
 137     }
 138
 139     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 140     tapCnt = numTaps % 0x4u;
 141
 142     while(tapCnt > 0u)
 143     {
 144       /* Perform the multiply-accumulate */
 145       acc += ((q63_t) (*px++)) * (*pb++);
 146
 147       /* Decrement the loop counter */
 148       tapCnt--;
 149     }
 150
 151     /* Converting the result to 1.31 format */
 152     /* Store the result from accumulator into the destination buffer. */
 153     acc = (q31_t) (acc >> shift);
 154
 155     *pOut++ = (q31_t) acc;
 156
 157     /* Compute and store error */
 158     e = *pRef++ - (q31_t) acc;
 159
 160     *pErr++ = (q31_t) e;
 161
 162     /* Compute alpha i.e. intermediate constant for taps update */
 163     alpha = (q31_t) (((q63_t) e * mu) >> 31);
 164
 165     /* Initialize state pointer */
 166     /* Advance state pointer by 1 for the next sample */
 167     px = pState++;
 168
 169     /* Initialize coefficient pointer */
 170     pb = pCoeffs;
 171
 172     /* Loop unrolling.  Process 4 taps at a time. */
 173     tapCnt = numTaps >> 2;
 174
 175     /* Update filter coefficients */
 176     while(tapCnt > 0u)
 177     {
 178       /* coef is in 2.30 format */
 179       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 180       /* get coef in 1.31 format by left shifting */
 181       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 182       /* update coefficient buffer to next coefficient */
 183       pb++;
 184
 185       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 186       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 187       pb++;
 188
 189       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 190       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 191       pb++;
 192
 193       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 194       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 195       pb++;
 196
 197       /* Decrement the loop counter */
 198       tapCnt--;
 199     }
 200
 201     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 202     tapCnt = numTaps % 0x4u;
 203
 204     while(tapCnt > 0u)
 205     {
 206       /* Perform the multiply-accumulate */
 207       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 208       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 209       pb++;
 210
 211       /* Decrement the loop counter */
 212       tapCnt--;
 213     }
 214
 215     /* Decrement the loop counter */
 216     blkCnt--;
 217   }
 218
 219   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 220      satrt of the state buffer. This prepares the state buffer for the
 221      next function call. */
 222
 223   /* Points to the start of the pState buffer */
 224   pStateCurnt = S->pState;
 225
 226   /* Loop unrolling for (numTaps - 1u) samples copy */
 227   tapCnt = (numTaps - 1u) >> 2u;
 228
 229   /* copy data */
 230   while(tapCnt > 0u)
 231   {
 232     *pStateCurnt++ = *pState++;
 233     *pStateCurnt++ = *pState++;
 234     *pStateCurnt++ = *pState++;
 235     *pStateCurnt++ = *pState++;
 236
 237     /* Decrement the loop counter */
 238     tapCnt--;
 239   }
 240
 241   /* Calculate remaining number of copies */
 242   tapCnt = (numTaps - 1u) % 0x4u;
 243
 244   /* Copy the remaining q31_t data */
 245   while(tapCnt > 0u)
 246   {
 247     *pStateCurnt++ = *pState++;
 248
 249     /* Decrement the loop counter */
 250     tapCnt--;
 251   }
 252
 253 #else
 254
 255   /* Run the below code for Cortex-M0 */
 256
 257   while(blkCnt > 0u)
 258   {
 259     /* Copy the new input sample into the state buffer */
 260     *pStateCurnt++ = *pSrc++;
 261
 262     /* Initialize pState pointer */
 263     px = pState;
 264
 265     /* Initialize pCoeffs pointer */
 266     pb = pCoeffs;
 267
 268     /* Set the accumulator to zero */
 269     acc = 0;
 270
 271     /* Loop over numTaps number of values */
 272     tapCnt = numTaps;
 273
 274     while(tapCnt > 0u)
 275     {
 276       /* Perform the multiply-accumulate */
 277       acc += ((q63_t) (*px++)) * (*pb++);
 278
 279       /* Decrement the loop counter */
 280       tapCnt--;
 281     }
 282
 283     /* Converting the result to 1.31 format */
 284     /* Store the result from accumulator into the destination buffer. */
 285     acc = (q31_t) (acc >> shift);
 286
 287     *pOut++ = (q31_t) acc;
 288
 289     /* Compute and store error */
 290     e = *pRef++ - (q31_t) acc;
 291
 292     *pErr++ = (q31_t) e;
 293
 294     /* Weighting factor for the LMS version */
 295     alpha = (q31_t) (((q63_t) e * mu) >> 31);
 296
 297     /* Initialize pState pointer */
 298     /* Advance state pointer by 1 for the next sample */
 299     px = pState++;
 300
 301     /* Initialize pCoeffs pointer */
 302     pb = pCoeffs;
 303
 304     /* Loop over numTaps number of values */
 305     tapCnt = numTaps;
 306
 307     while(tapCnt > 0u)
 308     {
 309       /* Perform the multiply-accumulate */
 310       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 311       *pb += (coef << 1u);
 312       pb++;
 313
 314       /* Decrement the loop counter */
 315       tapCnt--;
 316     }
 317
 318     /* Decrement the loop counter */
 319     blkCnt--;
 320   }
 321
 322   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 323      start of the state buffer. This prepares the state buffer for the
 324      next function call. */
 325
 326   /* Points to the start of the pState buffer */
 327   pStateCurnt = S->pState;
 328
 329   /*  Copy (numTaps - 1u) samples  */
 330   tapCnt = (numTaps - 1u);
 331
 332   /* Copy the data */
 333   while(tapCnt > 0u)
 334   {
 335     *pStateCurnt++ = *pState++;
 336
 337     /* Decrement the loop counter */
 338     tapCnt--;
 339   }
 340
 341 #endif /*   #ifndef ARM_MATH_CM0 */
 342
 343 }
 344
 345 /**
 346    * @} end of LMS group
 347    */