git.gag.com Git - fw/stlink/blob - exampleF4/CMSIS/DSP_Lib/Source/FilteringFunctions/arm_lms_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        15. July 2011
   5 * $Revision:    V1.0.10
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_lms_q15.c
   9 *
  10 * Description:  Processing function for the Q15 LMS filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Version 1.0.10 2011/7/15
  15 *    Big Endian support added and Merged M0 and M3/M4 Source code.
  16 *
  17 * Version 1.0.3 2010/11/29
  18 *    Re-organized the CMSIS folders and updated documentation.
  19 *
  20 * Version 1.0.2 2010/11/11
  21 *    Documentation updated.
  22 *
  23 * Version 1.0.1 2010/10/05
  24 *    Production release and review comments incorporated.
  25 *
  26 * Version 1.0.0 2010/09/20
  27 *    Production release and review comments incorporated
  28 *
  29 * Version 0.0.7  2010/06/10
  30 *    Misra-C changes done
  31 * -------------------------------------------------------------------- */
  32
  33 #include "arm_math.h"
  34 /**
  35  * @ingroup groupFilters
  36  */
  37
  38 /**
  39  * @addtogroup LMS
  40  * @{
  41  */
  42
  43  /**
  44  * @brief Processing function for Q15 LMS filter.
  45  * @param[in] *S points to an instance of the Q15 LMS filter structure.
  46  * @param[in] *pSrc points to the block of input data.
  47  * @param[in] *pRef points to the block of reference data.
  48  * @param[out] *pOut points to the block of output data.
  49  * @param[out] *pErr points to the block of error data.
  50  * @param[in] blockSize number of samples to process.
  51  * @return none.
  52  *
  53  * \par Scaling and Overflow Behavior:
  54  * The function is implemented using a 64-bit internal accumulator.
  55  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  56  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  57  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  58  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
  59  * Lastly, the accumulator is saturated to yield a result in 1.15 format.
  60  *
  61  * \par
  62  *      In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
  63  *
  64  */
  65
  66 void arm_lms_q15(
  67   const arm_lms_instance_q15 * S,
  68   q15_t * pSrc,
  69   q15_t * pRef,
  70   q15_t * pOut,
  71   q15_t * pErr,
  72   uint32_t blockSize)
  73 {
  74   q15_t *pState = S->pState;                     /* State pointer */
  75   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  76   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  77   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
  78   q15_t mu = S->mu;                              /* Adaptive factor */
  79   q15_t *px;                                     /* Temporary pointer for state */
  80   q15_t *pb;                                     /* Temporary pointer for coefficient buffer */
  81   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  82   q63_t acc;                                     /* Accumulator */
  83   q15_t e = 0;                                   /* error of data sample */
  84   q15_t alpha;                                   /* Intermediate constant for taps update */
  85   uint32_t shift = S->postShift + 1u;            /* Shift to be applied to the output */
  86
  87
  88 #ifndef ARM_MATH_CM0
  89
  90   /* Run the below code for Cortex-M4 and Cortex-M3 */
  91
  92   q31_t coef;                                    /* Teporary variable for coefficient */
  93
  94   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  95   /* pStateCurnt points to the location where the new input data should be written */
  96   pStateCurnt = &(S->pState[(numTaps - 1u)]);
  97
  98   /* Initializing blkCnt with blockSize */
  99   blkCnt = blockSize;
 100
 101   while(blkCnt > 0u)
 102   {
 103     /* Copy the new input sample into the state buffer */
 104     *pStateCurnt++ = *pSrc++;
 105
 106     /* Initialize state pointer */
 107     px = pState;
 108
 109     /* Initialize coefficient pointer */
 110     pb = pCoeffs;
 111
 112     /* Set the accumulator to zero */
 113     acc = 0;
 114
 115     /* Loop unrolling.  Process 4 taps at a time. */
 116     tapCnt = numTaps >> 2u;
 117
 118     while(tapCnt > 0u)
 119     {
 120       /* acc +=  b[N] * x[n-N] + b[N-1] * x[n-N-1] */
 121       /* Perform the multiply-accumulate */
 122       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
 123       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
 124
 125       /* Decrement the loop counter */
 126       tapCnt--;
 127     }
 128
 129     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 130     tapCnt = numTaps % 0x4u;
 131
 132     while(tapCnt > 0u)
 133     {
 134       /* Perform the multiply-accumulate */
 135       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
 136
 137       /* Decrement the loop counter */
 138       tapCnt--;
 139     }
 140
 141     /* Converting the result to 1.15 format and saturate the output */
 142     acc = __SSAT((acc >> (16 - shift)), 16);
 143
 144     /* Store the result from accumulator into the destination buffer. */
 145     *pOut++ = (q15_t) acc;
 146
 147     /* Compute and store error */
 148     e = *pRef++ - (q15_t) acc;
 149
 150     *pErr++ = (q15_t) e;
 151
 152     /* Compute alpha i.e. intermediate constant for taps update */
 153     alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
 154
 155     /* Initialize state pointer */
 156     /* Advance state pointer by 1 for the next sample */
 157     px = pState++;
 158
 159     /* Initialize coefficient pointer */
 160     pb = pCoeffs;
 161
 162     /* Loop unrolling.  Process 4 taps at a time. */
 163     tapCnt = numTaps >> 2u;
 164
 165     /* Update filter coefficients */
 166     while(tapCnt > 0u)
 167     {
 168       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 169       *pb++ = (q15_t) __SSAT((coef), 16);
 170       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 171       *pb++ = (q15_t) __SSAT((coef), 16);
 172       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 173       *pb++ = (q15_t) __SSAT((coef), 16);
 174       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 175       *pb++ = (q15_t) __SSAT((coef), 16);
 176
 177       /* Decrement the loop counter */
 178       tapCnt--;
 179     }
 180
 181     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 182     tapCnt = numTaps % 0x4u;
 183
 184     while(tapCnt > 0u)
 185     {
 186       /* Perform the multiply-accumulate */
 187       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 188       *pb++ = (q15_t) __SSAT((coef), 16);
 189
 190       /* Decrement the loop counter */
 191       tapCnt--;
 192     }
 193
 194     /* Decrement the loop counter */
 195     blkCnt--;
 196
 197   }
 198
 199   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 200      satrt of the state buffer. This prepares the state buffer for the
 201      next function call. */
 202
 203   /* Points to the start of the pState buffer */
 204   pStateCurnt = S->pState;
 205
 206   /* Calculation of count for copying integer writes */
 207   tapCnt = (numTaps - 1u) >> 2;
 208
 209   while(tapCnt > 0u)
 210   {
 211
 212     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 213     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 214
 215     tapCnt--;
 216
 217   }
 218
 219   /* Calculation of count for remaining q15_t data */
 220   tapCnt = (numTaps - 1u) % 0x4u;
 221
 222   /* copy data */
 223   while(tapCnt > 0u)
 224   {
 225     *pStateCurnt++ = *pState++;
 226
 227     /* Decrement the loop counter */
 228     tapCnt--;
 229   }
 230
 231 #else
 232
 233   /* Run the below code for Cortex-M0 */
 234
 235   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
 236   /* pStateCurnt points to the location where the new input data should be written */
 237   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 238
 239   /* Loop over blockSize number of values */
 240   blkCnt = blockSize;
 241
 242   while(blkCnt > 0u)
 243   {
 244     /* Copy the new input sample into the state buffer */
 245     *pStateCurnt++ = *pSrc++;
 246
 247     /* Initialize pState pointer */
 248     px = pState;
 249
 250     /* Initialize pCoeffs pointer */
 251     pb = pCoeffs;
 252
 253     /* Set the accumulator to zero */
 254     acc = 0;
 255
 256     /* Loop over numTaps number of values */
 257     tapCnt = numTaps;
 258
 259     while(tapCnt > 0u)
 260     {
 261       /* Perform the multiply-accumulate */
 262       acc += (q63_t) ((q31_t) (*px++) * (*pb++));
 263
 264       /* Decrement the loop counter */
 265       tapCnt--;
 266     }
 267
 268     /* Converting the result to 1.15 format and saturate the output */
 269     acc = __SSAT((acc >> (16 - shift)), 16);
 270
 271     /* Store the result from accumulator into the destination buffer. */
 272     *pOut++ = (q15_t) acc;
 273
 274     /* Compute and store error */
 275     e = *pRef++ - (q15_t) acc;
 276
 277     *pErr++ = (q15_t) e;
 278
 279     /* Compute alpha i.e. intermediate constant for taps update */
 280     alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
 281
 282     /* Initialize pState pointer */
 283     /* Advance state pointer by 1 for the next sample */
 284     px = pState++;
 285
 286     /* Initialize pCoeffs pointer */
 287     pb = pCoeffs;
 288
 289     /* Loop over numTaps number of values */
 290     tapCnt = numTaps;
 291
 292     while(tapCnt > 0u)
 293     {
 294       /* Perform the multiply-accumulate */
 295       *pb++ += (q15_t) (((q31_t) alpha * (*px++)) >> 15);
 296
 297       /* Decrement the loop counter */
 298       tapCnt--;
 299     }
 300
 301     /* Decrement the loop counter */
 302     blkCnt--;
 303
 304   }
 305
 306   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 307      start of the state buffer. This prepares the state buffer for the
 308      next function call. */
 309
 310   /* Points to the start of the pState buffer */
 311   pStateCurnt = S->pState;
 312
 313   /*  Copy (numTaps - 1u) samples  */
 314   tapCnt = (numTaps - 1u);
 315
 316   /* Copy the data */
 317   while(tapCnt > 0u)
 318   {
 319     *pStateCurnt++ = *pState++;
 320
 321     /* Decrement the loop counter */
 322     tapCnt--;
 323   }
 324
 325 #endif /*   #ifndef ARM_MATH_CM0 */
 326
 327 }
 328
 329 /**
 330    * @} end of LMS group
 331    */