00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "arm_math.h"
00034
00054 void arm_fir_lattice_q15(
00055 const arm_fir_lattice_instance_q15 * S,
00056 q15_t * pSrc,
00057 q15_t * pDst,
00058 uint32_t blockSize)
00059 {
00060 q15_t *pState;
00061 q15_t *pCoeffs = S->pCoeffs;
00062 q15_t *px;
00063 q15_t *pk;
00064
00065
00066 #ifndef ARM_MATH_CM0
00067
00068
00069
00070 q31_t fcurnt1, fnext1, gcurnt1 = 0, gnext1;
00071 q31_t fcurnt2, fnext2, gnext2;
00072 q31_t fcurnt3, fnext3, gnext3;
00073 q31_t fcurnt4, fnext4, gnext4;
00074 uint32_t numStages = S->numStages;
00075 uint32_t blkCnt, stageCnt;
00076
00077 pState = &S->pState[0];
00078
00079 blkCnt = blockSize >> 2u;
00080
00081
00082
00083 while(blkCnt > 0u)
00084 {
00085
00086
00087
00088 fcurnt1 = *pSrc++;
00089 fcurnt2 = *pSrc++;
00090
00091
00092 pk = (pCoeffs);
00093
00094
00095 px = pState;
00096
00097
00098 gcurnt1 = *px;
00099
00100
00101
00102 fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1;
00103 fnext1 = __SSAT(fnext1, 16);
00104
00105
00106 gnext1 = (q31_t) ((fcurnt1 * (*pk)) >> 15u) + gcurnt1;
00107 gnext1 = __SSAT(gnext1, 16);
00108
00109
00110
00111 fnext2 = (q31_t) ((fcurnt1 * (*pk)) >> 15u) + fcurnt2;
00112 fnext2 = __SSAT(fnext2, 16);
00113
00114 gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + fcurnt1;
00115 gnext2 = __SSAT(gnext2, 16);
00116
00117
00118
00119
00120 fcurnt3 = *pSrc++;
00121 fcurnt4 = *pSrc++;
00122
00123
00124
00125 *px++ = (q15_t) fcurnt4;
00126
00127
00128 fnext3 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + fcurnt3;
00129 fnext3 = __SSAT(fnext3, 16);
00130 gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + fcurnt2;
00131 gnext3 = __SSAT(gnext3, 16);
00132
00133
00134 fnext4 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + fcurnt4;
00135 fnext4 = __SSAT(fnext4, 16);
00136 gnext4 = (q31_t) ((fcurnt4 * (*pk++)) >> 15u) + fcurnt3;
00137 gnext4 = __SSAT(gnext4, 16);
00138
00139
00140 fcurnt1 = fnext1;
00141 fcurnt2 = fnext2;
00142 fcurnt3 = fnext3;
00143 fcurnt4 = fnext4;
00144
00145
00146
00147 stageCnt = (numStages - 1u) >> 2;
00148
00149
00150
00151
00152
00153
00154 while(stageCnt > 0u)
00155 {
00156
00157 gcurnt1 = *px;
00158
00159
00160 *px++ = (q15_t) gnext4;
00161
00162
00163
00164
00165 fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1;
00166 fnext1 = __SSAT(fnext1, 16);
00167
00168
00169
00170
00171 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2;
00172 fnext2 = __SSAT(fnext2, 16);
00173
00174 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3;
00175 fnext3 = __SSAT(fnext3, 16);
00176
00177
00178 fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4;
00179 fnext4 = __SSAT(fnext4, 16);
00180
00181
00182
00183 gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3;
00184 gnext4 = __SSAT(gnext4, 16);
00185 gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2;
00186 gnext3 = __SSAT(gnext3, 16);
00187
00188 gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1;
00189 gnext2 = __SSAT(gnext2, 16);
00190
00191 gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1;
00192 gnext1 = __SSAT(gnext1, 16);
00193
00194
00195
00196 gcurnt1 = *px;
00197
00198
00199 *px++ = (q15_t) gnext4;
00200
00201
00202
00203
00204 fcurnt1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fnext1;
00205 fcurnt1 = __SSAT(fcurnt1, 16);
00206
00207
00208 fcurnt2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fnext2;
00209 fcurnt2 = __SSAT(fcurnt2, 16);
00210
00211
00212 fcurnt3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fnext3;
00213 fcurnt3 = __SSAT(fcurnt3, 16);
00214
00215
00216 fcurnt4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fnext4;
00217 fcurnt4 = __SSAT(fcurnt4, 16);
00218
00219
00220
00221 gnext4 = (q31_t) ((fnext4 * (*pk)) >> 15u) + gnext3;
00222 gnext4 = __SSAT(gnext4, 16);
00223
00224 gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15u) + gnext2;
00225 gnext3 = __SSAT(gnext3, 16);
00226
00227 gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15u) + gnext1;
00228 gnext2 = __SSAT(gnext2, 16);
00229
00230 gnext1 = (q31_t) ((fnext1 * (*pk++)) >> 15u) + gcurnt1;
00231 gnext1 = __SSAT(gnext1, 16);
00232
00233
00234 gcurnt1 = *px;
00235
00236
00237 *px++ = (q15_t) gnext4;
00238
00239
00240
00241
00242 fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1;
00243 fnext1 = __SSAT(fnext1, 16);
00244
00245
00246
00247 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2;
00248 fnext2 = __SSAT(fnext2, 16);
00249
00250
00251 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3;
00252 fnext3 = __SSAT(fnext3, 16);
00253
00254
00255 fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4;
00256 fnext4 = __SSAT(fnext4, 16);
00257
00258
00259
00260 gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3;
00261 gnext4 = __SSAT(gnext4, 16);
00262
00263 gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2;
00264 gnext3 = __SSAT(gnext3, 16);
00265
00266 gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1;
00267 gnext2 = __SSAT(gnext2, 16);
00268 gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1;
00269 gnext1 = __SSAT(gnext1, 16);
00270
00271
00272
00273 gcurnt1 = *px;
00274
00275
00276 *px++ = (q15_t) gnext4;
00277
00278
00279
00280
00281 fcurnt1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fnext1;
00282 fcurnt1 = __SSAT(fcurnt1, 16);
00283
00284
00285 fcurnt2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fnext2;
00286 fcurnt2 = __SSAT(fcurnt2, 16);
00287
00288
00289 fcurnt3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fnext3;
00290 fcurnt3 = __SSAT(fcurnt3, 16);
00291
00292
00293 fcurnt4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fnext4;
00294 fcurnt4 = __SSAT(fcurnt4, 16);
00295
00296
00297
00298 gnext4 = (q31_t) ((fnext4 * (*pk)) >> 15u) + gnext3;
00299 gnext4 = __SSAT(gnext4, 16);
00300 gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15u) + gnext2;
00301 gnext3 = __SSAT(gnext3, 16);
00302 gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15u) + gnext1;
00303 gnext2 = __SSAT(gnext2, 16);
00304 gnext1 = (q31_t) ((fnext1 * (*pk++)) >> 15u) + gcurnt1;
00305 gnext1 = __SSAT(gnext1, 16);
00306
00307 stageCnt--;
00308 }
00309
00310
00311 stageCnt = (numStages - 1u) % 0x4u;
00312
00313 while(stageCnt > 0u)
00314 {
00315 gcurnt1 = *px;
00316
00317
00318 *px++ = (q15_t) gnext4;
00319
00320
00321 fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1;
00322 fnext1 = __SSAT(fnext1, 16);
00323 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2;
00324 fnext2 = __SSAT(fnext2, 16);
00325
00326 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3;
00327 fnext3 = __SSAT(fnext3, 16);
00328
00329 fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4;
00330 fnext4 = __SSAT(fnext4, 16);
00331
00332
00333 gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3;
00334 gnext4 = __SSAT(gnext4, 16);
00335 gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2;
00336 gnext3 = __SSAT(gnext3, 16);
00337 gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1;
00338 gnext2 = __SSAT(gnext2, 16);
00339 gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1;
00340 gnext1 = __SSAT(gnext1, 16);
00341
00342
00343 fcurnt1 = fnext1;
00344 fcurnt2 = fnext2;
00345 fcurnt3 = fnext3;
00346 fcurnt4 = fnext4;
00347
00348 stageCnt--;
00349
00350 }
00351
00352
00353
00354
00355 #ifndef ARM_MATH_BIG_ENDIAN
00356
00357 *__SIMD32(pDst)++ = __PKHBT(fcurnt1, fcurnt2, 16);
00358 *__SIMD32(pDst)++ = __PKHBT(fcurnt3, fcurnt4, 16);
00359
00360 #else
00361
00362 *__SIMD32(pDst)++ = __PKHBT(fcurnt2, fcurnt1, 16);
00363 *__SIMD32(pDst)++ = __PKHBT(fcurnt4, fcurnt3, 16);
00364
00365 #endif
00366
00367 blkCnt--;
00368 }
00369
00370
00371
00372 blkCnt = blockSize % 0x4u;
00373
00374 while(blkCnt > 0u)
00375 {
00376
00377 fcurnt1 = *pSrc++;
00378
00379
00380 pk = (pCoeffs);
00381
00382
00383 px = pState;
00384
00385
00386 gcurnt1 = *px;
00387
00388
00389
00390 fnext1 = (((q31_t) gcurnt1 * (*pk)) >> 15u) + fcurnt1;
00391 fnext1 = __SSAT(fnext1, 16);
00392
00393
00394
00395 gnext1 = (((q31_t) fcurnt1 * (*pk++)) >> 15u) + gcurnt1;
00396 gnext1 = __SSAT(gnext1, 16);
00397
00398
00399 *px++ = (q15_t) fcurnt1;
00400
00401
00402
00403 fcurnt1 = fnext1;
00404
00405 stageCnt = (numStages - 1u);
00406
00407
00408 while(stageCnt > 0u)
00409 {
00410
00411 gcurnt1 = *px;
00412
00413
00414 *px++ = (q15_t) gnext1;
00415
00416
00417
00418 fnext1 = (((q31_t) gcurnt1 * (*pk)) >> 15u) + fcurnt1;
00419 fnext1 = __SSAT(fnext1, 16);
00420
00421
00422 gnext1 = (((q31_t) fcurnt1 * (*pk++)) >> 15u) + gcurnt1;
00423 gnext1 = __SSAT(gnext1, 16);
00424
00425
00426
00427
00428 fcurnt1 = fnext1;
00429
00430 stageCnt--;
00431
00432 }
00433
00434
00435 *pDst++ = __SSAT(fcurnt1, 16);
00436
00437
00438 blkCnt--;
00439
00440 }
00441
00442 #else
00443
00444
00445
00446 q31_t fcurnt, fnext, gcurnt, gnext;
00447 uint32_t numStages = S->numStages;
00448 uint32_t blkCnt, stageCnt;
00449
00450 pState = &S->pState[0];
00451
00452 blkCnt = blockSize;
00453
00454 while(blkCnt > 0u)
00455 {
00456
00457 fcurnt = *pSrc++;
00458
00459
00460 pk = (pCoeffs);
00461
00462
00463 px = pState;
00464
00465
00466 gcurnt = *px;
00467
00468
00469
00470 fnext = ((gcurnt * (*pk)) >> 15u) + fcurnt;
00471 fnext = __SSAT(fnext, 16);
00472
00473
00474
00475 gnext = ((fcurnt * (*pk++)) >> 15u) + gcurnt;
00476 gnext = __SSAT(gnext, 16);
00477
00478
00479 *px++ = (q15_t) fcurnt;
00480
00481
00482
00483 fcurnt = fnext;
00484
00485 stageCnt = (numStages - 1u);
00486
00487
00488 while(stageCnt > 0u)
00489 {
00490
00491 gcurnt = *px;
00492
00493
00494 *px++ = (q15_t) gnext;
00495
00496
00497
00498 fnext = ((gcurnt * (*pk)) >> 15u) + fcurnt;
00499 fnext = __SSAT(fnext, 16);
00500
00501
00502 gnext = ((fcurnt * (*pk++)) >> 15u) + gcurnt;
00503 gnext = __SSAT(gnext, 16);
00504
00505
00506
00507
00508 fcurnt = fnext;
00509
00510 stageCnt--;
00511
00512 }
00513
00514
00515 *pDst++ = __SSAT(fcurnt, 16);
00516
00517
00518 blkCnt--;
00519
00520 }
00521
00522 #endif
00523
00524 }
00525