00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. July 2011 00005 * $Revision: V1.0.10 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_q15.c 00009 * 00010 * Description: Processing function for the 00011 * Q15 Biquad cascade DirectFormI(DF1) filter. 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Version 1.0.10 2011/7/15 00016 * Big Endian support added and Merged M0 and M3/M4 Source code. 00017 * 00018 * Version 1.0.3 2010/11/29 00019 * Re-organized the CMSIS folders and updated documentation. 00020 * 00021 * Version 1.0.2 2010/11/11 00022 * Documentation updated. 00023 * 00024 * Version 1.0.1 2010/10/05 00025 * Production release and review comments incorporated. 00026 * 00027 * Version 1.0.0 2010/09/20 00028 * Production release and review comments incorporated. 00029 * 00030 * Version 0.0.5 2010/04/26 00031 * incorporated review comments and updated with latest CMSIS layer 00032 * 00033 * Version 0.0.3 2010/03/10 00034 * Initial version 00035 * -------------------------------------------------------------------- */ 00036 00037 #include "arm_math.h" 00038 00070 void arm_biquad_cascade_df1_q15( 00071 const arm_biquad_casd_df1_inst_q15 * S, 00072 q15_t * pSrc, 00073 q15_t * pDst, 00074 uint32_t blockSize) 00075 { 00076 00077 00078 #ifndef ARM_MATH_CM0 00079 00080 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00081 00082 q15_t *pIn = pSrc; /* Source pointer */ 00083 q15_t *pOut = pDst; /* Destination pointer */ 00084 q31_t in; /* Temporary variable to hold input value */ 00085 q31_t out; /* Temporary variable to hold output value */ 00086 q31_t b0; /* Temporary variable to hold bo value */ 00087 q31_t b1, a1; /* Filter coefficients */ 00088 q31_t state_in, state_out; /* Filter state variables */ 00089 q63_t acc; /* Accumulator */ 00090 int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */ 00091 q15_t *pState = S->pState; /* State pointer */ 00092 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00093 q31_t *pState_q31; /* 32-bit state pointer for SIMD implementation */ 00094 uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ 00095 00096 do 00097 { 00098 /* Initialize state pointer of type q31 */ 00099 pState_q31 = (q31_t *) (pState); 00100 00101 /* Read the b0 and 0 coefficients using SIMD */ 00102 b0 = *__SIMD32(pCoeffs)++; 00103 00104 /* Read the b1 and b2 coefficients using SIMD */ 00105 b1 = *__SIMD32(pCoeffs)++; 00106 00107 /* Read the a1 and a2 coefficients using SIMD */ 00108 a1 = *__SIMD32(pCoeffs)++; 00109 00110 /* Read the input state values from the state buffer: x[n-1], x[n-2] */ 00111 state_in = (q31_t) (*pState_q31++); 00112 00113 /* Read the output state values from the state buffer: y[n-1], y[n-2] */ 00114 state_out = (q31_t) (*pState_q31); 00115 00116 /* Apply loop unrolling and compute 2 output values simultaneously. */ 00117 /* The variable acc hold output values that are being computed: 00118 * 00119 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00120 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00121 */ 00122 sample = blockSize >> 1u; 00123 00124 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00125 ** a second loop below computes the remaining 1 sample. */ 00126 while(sample > 0u) 00127 { 00128 00129 /* Read the input */ 00130 in = *__SIMD32(pIn)++; 00131 00132 /* out = b0 * x[n] + 0 * 0 */ 00133 out = __SMUAD(b0, in); 00134 00135 /* acc += b1 * x[n-1] + b2 * x[n-2] + out */ 00136 acc = __SMLALD(b1, state_in, out); 00137 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00138 acc = __SMLALD(a1, state_out, acc); 00139 00140 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00141 out = __SSAT((acc >> shift), 16); 00142 00143 /* Every time after the output is computed state should be updated. */ 00144 /* The states should be updated as: */ 00145 /* Xn2 = Xn1 */ 00146 /* Xn1 = Xn */ 00147 /* Yn2 = Yn1 */ 00148 /* Yn1 = acc */ 00149 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00150 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00151 00152 #ifndef ARM_MATH_BIG_ENDIAN 00153 00154 state_in = __PKHBT(in, state_in, 16); 00155 state_out = __PKHBT(out, state_out, 16); 00156 00157 #else 00158 00159 state_in = __PKHBT(state_in >> 16, (in >> 16), 16); 00160 state_out = __PKHBT(state_out >> 16, (out), 16); 00161 00162 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00163 00164 /* out = b0 * x[n] + 0 * 0 */ 00165 out = __SMUADX(b0, in); 00166 /* acc += b1 * x[n-1] + b2 * x[n-2] + out */ 00167 acc = __SMLALD(b1, state_in, out); 00168 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00169 acc = __SMLALD(a1, state_out, acc); 00170 00171 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00172 out = __SSAT((acc >> shift), 16); 00173 00174 /* Store the output in the destination buffer. */ 00175 00176 #ifndef ARM_MATH_BIG_ENDIAN 00177 00178 *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 00179 00180 #else 00181 00182 *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16); 00183 00184 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00185 00186 /* Every time after the output is computed state should be updated. */ 00187 /* The states should be updated as: */ 00188 /* Xn2 = Xn1 */ 00189 /* Xn1 = Xn */ 00190 /* Yn2 = Yn1 */ 00191 /* Yn1 = acc */ 00192 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00193 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00194 #ifndef ARM_MATH_BIG_ENDIAN 00195 00196 state_in = __PKHBT(in >> 16, state_in, 16); 00197 state_out = __PKHBT(out, state_out, 16); 00198 00199 #else 00200 00201 state_in = __PKHBT(state_in >> 16, in, 16); 00202 state_out = __PKHBT(state_out >> 16, out, 16); 00203 00204 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00205 00206 00207 /* Decrement the loop counter */ 00208 sample--; 00209 00210 } 00211 00212 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00213 ** No loop unrolling is used. */ 00214 00215 if((blockSize & 0x1u) != 0u) 00216 { 00217 /* Read the input */ 00218 in = *pIn++; 00219 00220 /* out = b0 * x[n] + 0 * 0 */ 00221 00222 #ifndef ARM_MATH_BIG_ENDIAN 00223 00224 out = __SMUAD(b0, in); 00225 00226 #else 00227 00228 out = __SMUADX(b0, in); 00229 00230 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00231 00232 /* acc = b1 * x[n-1] + b2 * x[n-2] + out */ 00233 acc = __SMLALD(b1, state_in, out); 00234 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00235 acc = __SMLALD(a1, state_out, acc); 00236 00237 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00238 out = __SSAT((acc >> shift), 16); 00239 00240 /* Store the output in the destination buffer. */ 00241 *pOut++ = (q15_t) out; 00242 00243 /* Every time after the output is computed state should be updated. */ 00244 /* The states should be updated as: */ 00245 /* Xn2 = Xn1 */ 00246 /* Xn1 = Xn */ 00247 /* Yn2 = Yn1 */ 00248 /* Yn1 = acc */ 00249 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00250 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00251 00252 #ifndef ARM_MATH_BIG_ENDIAN 00253 00254 state_in = __PKHBT(in, state_in, 16); 00255 state_out = __PKHBT(out, state_out, 16); 00256 00257 #else 00258 00259 state_in = __PKHBT(state_in >> 16, in, 16); 00260 state_out = __PKHBT(state_out >> 16, out, 16); 00261 00262 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00263 00264 } 00265 00266 /* The first stage goes from the input wire to the output wire. */ 00267 /* Subsequent numStages occur in-place in the output wire */ 00268 pIn = pDst; 00269 00270 /* Reset the output pointer */ 00271 pOut = pDst; 00272 00273 /* Store the updated state variables back into the state array */ 00274 *__SIMD32(pState)++ = state_in; 00275 *__SIMD32(pState)++ = state_out; 00276 00277 00278 /* Decrement the loop counter */ 00279 stage--; 00280 00281 } while(stage > 0u); 00282 00283 #else 00284 00285 /* Run the below code for Cortex-M0 */ 00286 00287 q15_t *pIn = pSrc; /* Source pointer */ 00288 q15_t *pOut = pDst; /* Destination pointer */ 00289 q15_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00290 q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00291 q15_t Xn; /* temporary input */ 00292 q63_t acc; /* Accumulator */ 00293 int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */ 00294 q15_t *pState = S->pState; /* State pointer */ 00295 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00296 uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ 00297 00298 do 00299 { 00300 /* Reading the coefficients */ 00301 b0 = *pCoeffs++; 00302 b1 = *pCoeffs++; 00303 b2 = *pCoeffs++; 00304 a1 = *pCoeffs++; 00305 a2 = *pCoeffs++; 00306 00307 /* Reading the state values */ 00308 Xn1 = pState[0]; 00309 Xn2 = pState[1]; 00310 Yn1 = pState[2]; 00311 Yn2 = pState[3]; 00312 00313 /* The variables acc holds the output value that is computed: 00314 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00315 */ 00316 00317 sample = blockSize; 00318 00319 while(sample > 0u) 00320 { 00321 /* Read the input */ 00322 Xn = *pIn++; 00323 00324 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00325 /* acc = b0 * x[n] */ 00326 acc = (q31_t) b0 *Xn; 00327 00328 /* acc += b1 * x[n-1] */ 00329 acc += (q31_t) b1 *Xn1; 00330 /* acc += b[2] * x[n-2] */ 00331 acc += (q31_t) b2 *Xn2; 00332 /* acc += a1 * y[n-1] */ 00333 acc += (q31_t) a1 *Yn1; 00334 /* acc += a2 * y[n-2] */ 00335 acc += (q31_t) a2 *Yn2; 00336 00337 /* The result is converted to 1.31 */ 00338 acc = __SSAT((acc >> shift), 16); 00339 00340 /* Every time after the output is computed state should be updated. */ 00341 /* The states should be updated as: */ 00342 /* Xn2 = Xn1 */ 00343 /* Xn1 = Xn */ 00344 /* Yn2 = Yn1 */ 00345 /* Yn1 = acc */ 00346 Xn2 = Xn1; 00347 Xn1 = Xn; 00348 Yn2 = Yn1; 00349 Yn1 = (q15_t) acc; 00350 00351 /* Store the output in the destination buffer. */ 00352 *pOut++ = (q15_t) acc; 00353 00354 /* decrement the loop counter */ 00355 sample--; 00356 } 00357 00358 /* The first stage goes from the input buffer to the output buffer. */ 00359 /* Subsequent stages occur in-place in the output buffer */ 00360 pIn = pDst; 00361 00362 /* Reset to destination pointer */ 00363 pOut = pDst; 00364 00365 /* Store the updated state variables back into the pState array */ 00366 *pState++ = Xn1; 00367 *pState++ = Xn2; 00368 *pState++ = Yn1; 00369 *pState++ = Yn2; 00370 00371 } while(--stage); 00372 00373 #endif /* #ifndef ARM_MATH_CM0 */ 00374 00375 } 00376 00377