CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details
Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more
arm_fir_lattice_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_lattice_q15.c 00009 * 00010 * Description: Q15 FIR lattice filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * -------------------------------------------------------------------- */ 00029 00030 #include "arm_math.h" 00031 00032 /** 00033 * @ingroup groupFilters 00034 */ 00035 00036 /** 00037 * @addtogroup FIR_Lattice 00038 * @{ 00039 */ 00040 00041 00042 /** 00043 * @brief Processing function for the Q15 FIR lattice filter. 00044 * @param[in] *S points to an instance of the Q15 FIR lattice structure. 00045 * @param[in] *pSrc points to the block of input data. 00046 * @param[out] *pDst points to the block of output data 00047 * @param[in] blockSize number of samples to process. 00048 * @return none. 00049 */ 00050 00051 void arm_fir_lattice_q15( 00052 const arm_fir_lattice_instance_q15 * S, 00053 q15_t * pSrc, 00054 q15_t * pDst, 00055 uint32_t blockSize) 00056 { 00057 q15_t *pState; /* State pointer */ 00058 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00059 q15_t *px; /* temporary state pointer */ 00060 q15_t *pk; /* temporary coefficient pointer */ 00061 q31_t fcurnt1, fnext1, gcurnt1 = 0, gnext1; /* temporary variables for first sample in loop unrolling */ 00062 q31_t fcurnt2, fnext2, gnext2; /* temporary variables for second sample in loop unrolling */ 00063 q31_t fcurnt3, fnext3, gnext3; /* temporary variables for third sample in loop unrolling */ 00064 q31_t fcurnt4, fnext4, gnext4; /* temporary variables for fourth sample in loop unrolling */ 00065 uint32_t numStages = S->numStages; /* Number of stages in the filter */ 00066 uint32_t blkCnt, stageCnt; /* temporary variables for counts */ 00067 00068 pState = &S->pState[0]; 00069 00070 blkCnt = blockSize >> 2u; 00071 00072 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00073 ** a second loop below computes the remaining 1 to 3 samples. */ 00074 while(blkCnt > 0u) 00075 { 00076 00077 /* Read two samples from input buffer */ 00078 /* f0(n) = x(n) */ 00079 fcurnt1 = *pSrc++; 00080 fcurnt2 = *pSrc++; 00081 00082 /* Initialize coeff pointer */ 00083 pk = (pCoeffs); 00084 00085 /* Initialize state pointer */ 00086 px = pState; 00087 00088 /* Read g0(n-1) from state */ 00089 gcurnt1 = *px; 00090 00091 /* Process first sample for first tap */ 00092 /* f1(n) = f0(n) + K1 * g0(n-1) */ 00093 fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1; 00094 fnext1 = __SSAT(fnext1, 16); 00095 00096 /* g1(n) = f0(n) * K1 + g0(n-1) */ 00097 gnext1 = (q31_t) ((fcurnt1 * (*pk)) >> 15u) + gcurnt1; 00098 gnext1 = __SSAT(gnext1, 16); 00099 00100 /* Process second sample for first tap */ 00101 /* for sample 2 processing */ 00102 fnext2 = (q31_t) ((fcurnt1 * (*pk)) >> 15u) + fcurnt2; 00103 fnext2 = __SSAT(fnext2, 16); 00104 00105 gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + fcurnt1; 00106 gnext2 = __SSAT(gnext2, 16); 00107 00108 00109 /* Read next two samples from input buffer */ 00110 /* f0(n+2) = x(n+2) */ 00111 fcurnt3 = *pSrc++; 00112 fcurnt4 = *pSrc++; 00113 00114 /* Copy only last input samples into the state buffer 00115 which is used for next four samples processing */ 00116 *px++ = (q15_t) fcurnt4; 00117 00118 /* Process third sample for first tap */ 00119 fnext3 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + fcurnt3; 00120 fnext3 = __SSAT(fnext3, 16); 00121 gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + fcurnt2; 00122 gnext3 = __SSAT(gnext3, 16); 00123 00124 /* Process fourth sample for first tap */ 00125 fnext4 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + fcurnt4; 00126 fnext4 = __SSAT(fnext4, 16); 00127 gnext4 = (q31_t) ((fcurnt4 * (*pk++)) >> 15u) + fcurnt3; 00128 gnext4 = __SSAT(gnext4, 16); 00129 00130 /* Update of f values for next coefficient set processing */ 00131 fcurnt1 = fnext1; 00132 fcurnt2 = fnext2; 00133 fcurnt3 = fnext3; 00134 fcurnt4 = fnext4; 00135 00136 00137 /* Loop unrolling. Process 4 taps at a time . */ 00138 stageCnt = (numStages - 1u) >> 2; 00139 00140 00141 /* Loop over the number of taps. Unroll by a factor of 4. 00142 ** Repeat until we've computed numStages-3 coefficients. */ 00143 00144 /* Process 2nd, 3rd, 4th and 5th taps ... here */ 00145 while(stageCnt > 0u) 00146 { 00147 /* Read g1(n-1), g3(n-1) .... from state */ 00148 gcurnt1 = *px; 00149 00150 /* save g1(n) in state buffer */ 00151 *px++ = (q15_t) gnext4; 00152 00153 /* Process first sample for 2nd, 6th .. tap */ 00154 /* Sample processing for K2, K6.... */ 00155 /* f1(n) = f0(n) + K1 * g0(n-1) */ 00156 fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1; 00157 fnext1 = __SSAT(fnext1, 16); 00158 00159 00160 /* Process second sample for 2nd, 6th .. tap */ 00161 /* for sample 2 processing */ 00162 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2; 00163 fnext2 = __SSAT(fnext2, 16); 00164 /* Process third sample for 2nd, 6th .. tap */ 00165 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3; 00166 fnext3 = __SSAT(fnext3, 16); 00167 /* Process fourth sample for 2nd, 6th .. tap */ 00168 /* fnext4 = fcurnt4 + (*pk) * gnext3; */ 00169 fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4; 00170 fnext4 = __SSAT(fnext4, 16); 00171 00172 /* g1(n) = f0(n) * K1 + g0(n-1) */ 00173 /* Calculation of state values for next stage */ 00174 gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3; 00175 gnext4 = __SSAT(gnext4, 16); 00176 gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2; 00177 gnext3 = __SSAT(gnext3, 16); 00178 00179 gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1; 00180 gnext2 = __SSAT(gnext2, 16); 00181 00182 gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 00183 gnext1 = __SSAT(gnext1, 16); 00184 00185 00186 /* Read g2(n-1), g4(n-1) .... from state */ 00187 gcurnt1 = *px; 00188 00189 /* save g1(n) in state buffer */ 00190 *px++ = (q15_t) gnext4; 00191 00192 /* Sample processing for K3, K7.... */ 00193 /* Process first sample for 3rd, 7th .. tap */ 00194 /* f3(n) = f2(n) + K3 * g2(n-1) */ 00195 fcurnt1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fnext1; 00196 fcurnt1 = __SSAT(fcurnt1, 16); 00197 00198 /* Process second sample for 3rd, 7th .. tap */ 00199 fcurnt2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fnext2; 00200 fcurnt2 = __SSAT(fcurnt2, 16); 00201 00202 /* Process third sample for 3rd, 7th .. tap */ 00203 fcurnt3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fnext3; 00204 fcurnt3 = __SSAT(fcurnt3, 16); 00205 00206 /* Process fourth sample for 3rd, 7th .. tap */ 00207 fcurnt4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fnext4; 00208 fcurnt4 = __SSAT(fcurnt4, 16); 00209 00210 /* Calculation of state values for next stage */ 00211 /* g3(n) = f2(n) * K3 + g2(n-1) */ 00212 gnext4 = (q31_t) ((fnext4 * (*pk)) >> 15u) + gnext3; 00213 gnext4 = __SSAT(gnext4, 16); 00214 00215 gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15u) + gnext2; 00216 gnext3 = __SSAT(gnext3, 16); 00217 00218 gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15u) + gnext1; 00219 gnext2 = __SSAT(gnext2, 16); 00220 00221 gnext1 = (q31_t) ((fnext1 * (*pk++)) >> 15u) + gcurnt1; 00222 gnext1 = __SSAT(gnext1, 16); 00223 00224 /* Read g1(n-1), g3(n-1) .... from state */ 00225 gcurnt1 = *px; 00226 00227 /* save g1(n) in state buffer */ 00228 *px++ = (q15_t) gnext4; 00229 00230 /* Sample processing for K4, K8.... */ 00231 /* Process first sample for 4th, 8th .. tap */ 00232 /* f4(n) = f3(n) + K4 * g3(n-1) */ 00233 fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1; 00234 fnext1 = __SSAT(fnext1, 16); 00235 00236 /* Process second sample for 4th, 8th .. tap */ 00237 /* for sample 2 processing */ 00238 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2; 00239 fnext2 = __SSAT(fnext2, 16); 00240 00241 /* Process third sample for 4th, 8th .. tap */ 00242 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3; 00243 fnext3 = __SSAT(fnext3, 16); 00244 00245 /* Process fourth sample for 4th, 8th .. tap */ 00246 fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4; 00247 fnext4 = __SSAT(fnext4, 16); 00248 00249 /* g4(n) = f3(n) * K4 + g3(n-1) */ 00250 /* Calculation of state values for next stage */ 00251 gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3; 00252 gnext4 = __SSAT(gnext4, 16); 00253 00254 gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2; 00255 gnext3 = __SSAT(gnext3, 16); 00256 00257 gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1; 00258 gnext2 = __SSAT(gnext2, 16); 00259 gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 00260 gnext1 = __SSAT(gnext1, 16); 00261 00262 00263 /* Read g2(n-1), g4(n-1) .... from state */ 00264 gcurnt1 = *px; 00265 00266 /* save g4(n) in state buffer */ 00267 *px++ = (q15_t) gnext4; 00268 00269 /* Sample processing for K5, K9.... */ 00270 /* Process first sample for 5th, 9th .. tap */ 00271 /* f5(n) = f4(n) + K5 * g4(n-1) */ 00272 fcurnt1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fnext1; 00273 fcurnt1 = __SSAT(fcurnt1, 16); 00274 00275 /* Process second sample for 5th, 9th .. tap */ 00276 fcurnt2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fnext2; 00277 fcurnt2 = __SSAT(fcurnt2, 16); 00278 00279 /* Process third sample for 5th, 9th .. tap */ 00280 fcurnt3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fnext3; 00281 fcurnt3 = __SSAT(fcurnt3, 16); 00282 00283 /* Process fourth sample for 5th, 9th .. tap */ 00284 fcurnt4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fnext4; 00285 fcurnt4 = __SSAT(fcurnt4, 16); 00286 00287 /* Calculation of state values for next stage */ 00288 /* g5(n) = f4(n) * K5 + g4(n-1) */ 00289 gnext4 = (q31_t) ((fnext4 * (*pk)) >> 15u) + gnext3; 00290 gnext4 = __SSAT(gnext4, 16); 00291 gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15u) + gnext2; 00292 gnext3 = __SSAT(gnext3, 16); 00293 gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15u) + gnext1; 00294 gnext2 = __SSAT(gnext2, 16); 00295 gnext1 = (q31_t) ((fnext1 * (*pk++)) >> 15u) + gcurnt1; 00296 gnext1 = __SSAT(gnext1, 16); 00297 00298 stageCnt--; 00299 } 00300 00301 /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */ 00302 stageCnt = (numStages - 1u) % 0x4u; 00303 00304 while(stageCnt > 0u) 00305 { 00306 gcurnt1 = *px; 00307 00308 /* save g value in state buffer */ 00309 *px++ = (q15_t) gnext4; 00310 00311 /* Process four samples for last three taps here */ 00312 fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1; 00313 fnext1 = __SSAT(fnext1, 16); 00314 fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2; 00315 fnext2 = __SSAT(fnext2, 16); 00316 00317 fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3; 00318 fnext3 = __SSAT(fnext3, 16); 00319 00320 fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4; 00321 fnext4 = __SSAT(fnext4, 16); 00322 00323 /* g1(n) = f0(n) * K1 + g0(n-1) */ 00324 gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3; 00325 gnext4 = __SSAT(gnext4, 16); 00326 gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2; 00327 gnext3 = __SSAT(gnext3, 16); 00328 gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1; 00329 gnext2 = __SSAT(gnext2, 16); 00330 gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 00331 gnext1 = __SSAT(gnext1, 16); 00332 00333 /* Update of f values for next coefficient set processing */ 00334 fcurnt1 = fnext1; 00335 fcurnt2 = fnext2; 00336 fcurnt3 = fnext3; 00337 fcurnt4 = fnext4; 00338 00339 stageCnt--; 00340 00341 } 00342 00343 /* The results in the 4 accumulators, store in the destination buffer. */ 00344 /* y(n) = fN(n) */ 00345 *__SIMD32(pDst)++ = __PKHBT(fcurnt1, fcurnt2, 16); 00346 *__SIMD32(pDst)++ = __PKHBT(fcurnt3, fcurnt4, 16); 00347 00348 blkCnt--; 00349 } 00350 00351 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00352 ** No loop unrolling is used. */ 00353 blkCnt = blockSize % 0x4u; 00354 00355 while(blkCnt > 0u) 00356 { 00357 /* f0(n) = x(n) */ 00358 fcurnt1 = *pSrc++; 00359 00360 /* Initialize coeff pointer */ 00361 pk = (pCoeffs); 00362 00363 /* Initialize state pointer */ 00364 px = pState; 00365 00366 /* read g2(n) from state buffer */ 00367 gcurnt1 = *px; 00368 00369 /* for sample 1 processing */ 00370 /* f1(n) = f0(n) + K1 * g0(n-1) */ 00371 fnext1 = (((q31_t) gcurnt1 * (*pk)) >> 15u) + fcurnt1; 00372 fnext1 = __SSAT(fnext1, 16); 00373 00374 00375 /* g1(n) = f0(n) * K1 + g0(n-1) */ 00376 gnext1 = (((q31_t) fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 00377 gnext1 = __SSAT(gnext1, 16); 00378 00379 /* save g1(n) in state buffer */ 00380 *px++ = (q15_t) fcurnt1; 00381 00382 /* f1(n) is saved in fcurnt1 00383 for next stage processing */ 00384 fcurnt1 = fnext1; 00385 00386 stageCnt = (numStages - 1u); 00387 00388 /* stage loop */ 00389 while(stageCnt > 0u) 00390 { 00391 /* read g2(n) from state buffer */ 00392 gcurnt1 = *px; 00393 00394 /* save g1(n) in state buffer */ 00395 *px++ = (q15_t) gnext1; 00396 00397 /* Sample processing for K2, K3.... */ 00398 /* f2(n) = f1(n) + K2 * g1(n-1) */ 00399 fnext1 = (((q31_t) gcurnt1 * (*pk)) >> 15u) + fcurnt1; 00400 fnext1 = __SSAT(fnext1, 16); 00401 00402 /* g2(n) = f1(n) * K2 + g1(n-1) */ 00403 gnext1 = (((q31_t) fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 00404 gnext1 = __SSAT(gnext1, 16); 00405 00406 00407 /* f1(n) is saved in fcurnt1 00408 for next stage processing */ 00409 fcurnt1 = fnext1; 00410 00411 stageCnt--; 00412 00413 } 00414 00415 /* y(n) = fN(n) */ 00416 *pDst++ = __SSAT(fcurnt1, 16); 00417 00418 00419 blkCnt--; 00420 00421 } 00422 } 00423 00424 /** 00425 * @} end of FIR_Lattice group 00426 */
Generated on Tue Jul 12 2022 14:13:53 by 1.7.2