00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include "arm_math.h"
00033
00060 void arm_fir_sparse_q15(
00061 arm_fir_sparse_instance_q15 * S,
00062 q15_t * pSrc,
00063 q15_t * pDst,
00064 q15_t * pScratchIn,
00065 q31_t * pScratchOut,
00066 uint32_t blockSize)
00067 {
00068
00069 q15_t *pState = S->pState;
00070 q15_t *pIn = pSrc;
00071 q15_t *pOut = pDst;
00072 q15_t *pCoeffs = S->pCoeffs;
00073 q15_t *px;
00074 q15_t *pb = pScratchIn;
00075 q15_t *py = pState;
00076 int32_t *pTapDelay = S->pTapDelay;
00077 uint32_t delaySize = S->maxDelay + blockSize;
00078 uint16_t numTaps = S->numTaps;
00079 int32_t readIndex;
00080 uint32_t tapCnt, blkCnt;
00081 q15_t coeff = *pCoeffs++;
00082 q31_t *pScr2 = pScratchOut;
00083
00084
00085 #ifndef ARM_MATH_CM0
00086
00087
00088
00089 q31_t in1, in2;
00090
00091
00092
00093
00094 arm_circularWrite_q15(py, delaySize, &S->stateIndex, 1, pIn, 1, blockSize);
00095
00096
00097 tapCnt = numTaps;
00098
00099
00100 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00101
00102
00103 if(readIndex < 0)
00104 {
00105 readIndex += (int32_t) delaySize;
00106 }
00107
00108
00109 py = pState;
00110
00111
00112 arm_circularRead_q15(py, delaySize, &readIndex, 1,
00113 pb, pb, blockSize, 1, blockSize);
00114
00115
00116 px = pb;
00117
00118
00119 pScratchOut = pScr2;
00120
00121
00122
00123 blkCnt = blockSize >> 2;
00124
00125 while(blkCnt > 0u)
00126 {
00127
00128 *pScratchOut++ = ((q31_t) * px++ * coeff);
00129 *pScratchOut++ = ((q31_t) * px++ * coeff);
00130 *pScratchOut++ = ((q31_t) * px++ * coeff);
00131 *pScratchOut++ = ((q31_t) * px++ * coeff);
00132
00133
00134 blkCnt--;
00135 }
00136
00137
00138
00139 blkCnt = blockSize % 0x4u;
00140
00141 while(blkCnt > 0u)
00142 {
00143
00144 *pScratchOut++ = ((q31_t) * px++ * coeff);
00145
00146
00147 blkCnt--;
00148 }
00149
00150
00151
00152 coeff = *pCoeffs++;
00153
00154
00155 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00156
00157
00158 if(readIndex < 0)
00159 {
00160 readIndex += (int32_t) delaySize;
00161 }
00162
00163
00164 tapCnt = (uint32_t) numTaps - 1u;
00165
00166 while(tapCnt > 0u)
00167 {
00168
00169 py = pState;
00170
00171
00172 arm_circularRead_q15(py, delaySize, &readIndex, 1,
00173 pb, pb, blockSize, 1, blockSize);
00174
00175
00176 px = pb;
00177
00178
00179 pScratchOut = pScr2;
00180
00181
00182
00183 blkCnt = blockSize >> 2;
00184
00185 while(blkCnt > 0u)
00186 {
00187
00188 *pScratchOut++ += (q31_t) * px++ * coeff;
00189 *pScratchOut++ += (q31_t) * px++ * coeff;
00190 *pScratchOut++ += (q31_t) * px++ * coeff;
00191 *pScratchOut++ += (q31_t) * px++ * coeff;
00192
00193
00194 blkCnt--;
00195 }
00196
00197
00198
00199 blkCnt = blockSize % 0x4u;
00200
00201 while(blkCnt > 0u)
00202 {
00203
00204 *pScratchOut++ += (q31_t) * px++ * coeff;
00205
00206
00207 blkCnt--;
00208 }
00209
00210
00211
00212 coeff = *pCoeffs++;
00213
00214
00215 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00216
00217
00218 if(readIndex < 0)
00219 {
00220 readIndex += (int32_t) delaySize;
00221 }
00222
00223
00224 tapCnt--;
00225 }
00226
00227
00228
00229
00230 blkCnt = blockSize >> 2;
00231
00232 while(blkCnt > 0u)
00233 {
00234 in1 = *pScr2++;
00235 in2 = *pScr2++;
00236
00237 #ifndef ARM_MATH_BIG_ENDIAN
00238
00239 *__SIMD32(pOut)++ =
00240 __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16),
00241 16);
00242
00243 #else
00244 *__SIMD32(pOut)++ =
00245 __PKHBT((q15_t) __SSAT(in2 >> 15, 16), (q15_t) __SSAT(in1 >> 15, 16),
00246 16);
00247
00248 #endif
00249
00250 in1 = *pScr2++;
00251
00252 in2 = *pScr2++;
00253
00254 #ifndef ARM_MATH_BIG_ENDIAN
00255
00256 *__SIMD32(pOut)++ =
00257 __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16),
00258 16);
00259
00260 #else
00261
00262 *__SIMD32(pOut)++ =
00263 __PKHBT((q15_t) __SSAT(in2 >> 15, 16), (q15_t) __SSAT(in1 >> 15, 16),
00264 16);
00265
00266 #endif
00267
00268
00269 blkCnt--;
00270
00271 }
00272
00273
00274
00275 blkCnt = blockSize % 0x4u;
00276
00277 while(blkCnt > 0u)
00278 {
00279 *pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16);
00280 blkCnt--;
00281 }
00282
00283 #else
00284
00285
00286
00287
00288
00289 arm_circularWrite_q15(py, delaySize, &S->stateIndex, 1, pIn, 1, blockSize);
00290
00291
00292 tapCnt = numTaps;
00293
00294
00295 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00296
00297
00298 if(readIndex < 0)
00299 {
00300 readIndex += (int32_t) delaySize;
00301 }
00302
00303
00304 py = pState;
00305
00306
00307 arm_circularRead_q15(py, delaySize, &readIndex, 1,
00308 pb, pb, blockSize, 1, blockSize);
00309
00310
00311 px = pb;
00312
00313
00314 pScratchOut = pScr2;
00315
00316 blkCnt = blockSize;
00317
00318 while(blkCnt > 0u)
00319 {
00320
00321 *pScratchOut++ = ((q31_t) * px++ * coeff);
00322
00323
00324 blkCnt--;
00325 }
00326
00327
00328
00329 coeff = *pCoeffs++;
00330
00331
00332 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00333
00334
00335 if(readIndex < 0)
00336 {
00337 readIndex += (int32_t) delaySize;
00338 }
00339
00340
00341 tapCnt = (uint32_t) numTaps - 1u;
00342
00343 while(tapCnt > 0u)
00344 {
00345
00346 py = pState;
00347
00348
00349 arm_circularRead_q15(py, delaySize, &readIndex, 1,
00350 pb, pb, blockSize, 1, blockSize);
00351
00352
00353 px = pb;
00354
00355
00356 pScratchOut = pScr2;
00357
00358 blkCnt = blockSize;
00359
00360 while(blkCnt > 0u)
00361 {
00362
00363 *pScratchOut++ += (q31_t) * px++ * coeff;
00364
00365
00366 blkCnt--;
00367 }
00368
00369
00370
00371 coeff = *pCoeffs++;
00372
00373
00374 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00375
00376
00377 if(readIndex < 0)
00378 {
00379 readIndex += (int32_t) delaySize;
00380 }
00381
00382
00383 tapCnt--;
00384 }
00385
00386
00387
00388
00389 blkCnt = blockSize;
00390
00391 while(blkCnt > 0u)
00392 {
00393 *pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16);
00394 blkCnt--;
00395 }
00396
00397 #endif
00398
00399 }
00400