Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_iir_lattice_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_iir_lattice_q15.c 00009 * 00010 * Description: Q15 IIR lattice filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup IIR_Lattice 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for the Q15 IIR lattice filter. 00054 * @param[in] *S points to an instance of the Q15 IIR lattice structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[out] *pDst points to the block of output data. 00057 * @param[in] blockSize number of samples to process. 00058 * @return none. 00059 * 00060 * @details 00061 * <b>Scaling and Overflow Behavior:</b> 00062 * \par 00063 * The function is implemented using a 64-bit internal accumulator. 00064 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00065 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00066 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00067 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 00068 * Lastly, the accumulator is saturated to yield a result in 1.15 format. 00069 */ 00070 00071 void arm_iir_lattice_q15( 00072 const arm_iir_lattice_instance_q15 * S, 00073 q15_t * pSrc, 00074 q15_t * pDst, 00075 uint32_t blockSize) 00076 { 00077 00078 00079 #ifndef ARM_MATH_CM0_FAMILY 00080 00081 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00082 00083 q31_t fcurr, fnext, gcurr = 0, gnext; /* Temporary variables for lattice stages */ 00084 q15_t gnext1, gnext2; /* Temporary variables for lattice stages */ 00085 uint32_t stgCnt; /* Temporary variables for counts */ 00086 q63_t acc; /* Accumlator */ 00087 uint32_t blkCnt, tapCnt; /* Temporary variables for counts */ 00088 q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */ 00089 uint32_t numStages = S->numStages; /* number of stages */ 00090 q15_t *pState; /* State pointer */ 00091 q15_t *pStateCurnt; /* State current pointer */ 00092 q15_t out; /* Temporary variable for output */ 00093 q31_t v; /* Temporary variable for ladder coefficient */ 00094 #ifdef UNALIGNED_SUPPORT_DISABLE 00095 q15_t v1, v2; 00096 #endif 00097 00098 00099 blkCnt = blockSize; 00100 00101 pState = &S->pState[0]; 00102 00103 /* Sample processing */ 00104 while(blkCnt > 0u) 00105 { 00106 /* Read Sample from input buffer */ 00107 /* fN(n) = x(n) */ 00108 fcurr = *pSrc++; 00109 00110 /* Initialize state read pointer */ 00111 px1 = pState; 00112 /* Initialize state write pointer */ 00113 px2 = pState; 00114 /* Set accumulator to zero */ 00115 acc = 0; 00116 /* Initialize Ladder coeff pointer */ 00117 pv = &S->pvCoeffs[0]; 00118 /* Initialize Reflection coeff pointer */ 00119 pk = &S->pkCoeffs[0]; 00120 00121 00122 /* Process sample for first tap */ 00123 gcurr = *px1++; 00124 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ 00125 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 00126 fnext = __SSAT(fnext, 16); 00127 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ 00128 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 00129 gnext = __SSAT(gnext, 16); 00130 /* write gN(n) into state for next sample processing */ 00131 *px2++ = (q15_t) gnext; 00132 /* y(n) += gN(n) * vN */ 00133 acc += (q31_t) ((gnext * (*pv++))); 00134 00135 00136 /* Update f values for next coefficient processing */ 00137 fcurr = fnext; 00138 00139 /* Loop unrolling. Process 4 taps at a time. */ 00140 tapCnt = (numStages - 1u) >> 2; 00141 00142 while(tapCnt > 0u) 00143 { 00144 00145 /* Process sample for 2nd, 6th ...taps */ 00146 /* Read gN-2(n-1) from state buffer */ 00147 gcurr = *px1++; 00148 /* Process sample for 2nd, 6th .. taps */ 00149 /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */ 00150 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 00151 fnext = __SSAT(fnext, 16); 00152 /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */ 00153 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 00154 gnext1 = (q15_t) __SSAT(gnext, 16); 00155 /* write gN-1(n) into state */ 00156 *px2++ = (q15_t) gnext1; 00157 00158 00159 /* Process sample for 3nd, 7th ...taps */ 00160 /* Read gN-3(n-1) from state */ 00161 gcurr = *px1++; 00162 /* Process sample for 3rd, 7th .. taps */ 00163 /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */ 00164 fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15); 00165 fcurr = __SSAT(fcurr, 16); 00166 /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */ 00167 gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr; 00168 gnext2 = (q15_t) __SSAT(gnext, 16); 00169 /* write gN-2(n) into state */ 00170 *px2++ = (q15_t) gnext2; 00171 00172 /* Read vN-1 and vN-2 at a time */ 00173 #ifndef UNALIGNED_SUPPORT_DISABLE 00174 00175 v = *__SIMD32(pv)++; 00176 00177 #else 00178 00179 v1 = *pv++; 00180 v2 = *pv++; 00181 00182 #ifndef ARM_MATH_BIG_ENDIAN 00183 00184 v = __PKHBT(v1, v2, 16); 00185 00186 #else 00187 00188 v = __PKHBT(v2, v1, 16); 00189 00190 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00191 00192 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00193 00194 00195 /* Pack gN-1(n) and gN-2(n) */ 00196 00197 #ifndef ARM_MATH_BIG_ENDIAN 00198 00199 gnext = __PKHBT(gnext1, gnext2, 16); 00200 00201 #else 00202 00203 gnext = __PKHBT(gnext2, gnext1, 16); 00204 00205 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00206 00207 /* y(n) += gN-1(n) * vN-1 */ 00208 /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */ 00209 /* y(n) += gN-2(n) * vN-2 */ 00210 /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */ 00211 acc = __SMLALD(gnext, v, acc); 00212 00213 00214 /* Process sample for 4th, 8th ...taps */ 00215 /* Read gN-4(n-1) from state */ 00216 gcurr = *px1++; 00217 /* Process sample for 4th, 8th .. taps */ 00218 /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */ 00219 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 00220 fnext = __SSAT(fnext, 16); 00221 /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */ 00222 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 00223 gnext1 = (q15_t) __SSAT(gnext, 16); 00224 /* write gN-3(n) for the next sample process */ 00225 *px2++ = (q15_t) gnext1; 00226 00227 00228 /* Process sample for 5th, 9th ...taps */ 00229 /* Read gN-5(n-1) from state */ 00230 gcurr = *px1++; 00231 /* Process sample for 5th, 9th .. taps */ 00232 /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */ 00233 fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15); 00234 fcurr = __SSAT(fcurr, 16); 00235 /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */ 00236 gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr; 00237 gnext2 = (q15_t) __SSAT(gnext, 16); 00238 /* write gN-4(n) for the next sample process */ 00239 *px2++ = (q15_t) gnext2; 00240 00241 /* Read vN-3 and vN-4 at a time */ 00242 #ifndef UNALIGNED_SUPPORT_DISABLE 00243 00244 v = *__SIMD32(pv)++; 00245 00246 #else 00247 00248 v1 = *pv++; 00249 v2 = *pv++; 00250 00251 #ifndef ARM_MATH_BIG_ENDIAN 00252 00253 v = __PKHBT(v1, v2, 16); 00254 00255 #else 00256 00257 v = __PKHBT(v2, v1, 16); 00258 00259 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00260 00261 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00262 00263 00264 /* Pack gN-3(n) and gN-4(n) */ 00265 #ifndef ARM_MATH_BIG_ENDIAN 00266 00267 gnext = __PKHBT(gnext1, gnext2, 16); 00268 00269 #else 00270 00271 gnext = __PKHBT(gnext2, gnext1, 16); 00272 00273 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00274 00275 /* y(n) += gN-4(n) * vN-4 */ 00276 /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */ 00277 /* y(n) += gN-3(n) * vN-3 */ 00278 /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */ 00279 acc = __SMLALD(gnext, v, acc); 00280 00281 tapCnt--; 00282 00283 } 00284 00285 fnext = fcurr; 00286 00287 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00288 tapCnt = (numStages - 1u) % 0x4u; 00289 00290 while(tapCnt > 0u) 00291 { 00292 gcurr = *px1++; 00293 /* Process sample for last taps */ 00294 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 00295 fnext = __SSAT(fnext, 16); 00296 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 00297 gnext = __SSAT(gnext, 16); 00298 /* Output samples for last taps */ 00299 acc += (q31_t) (((q31_t) gnext * (*pv++))); 00300 *px2++ = (q15_t) gnext; 00301 fcurr = fnext; 00302 00303 tapCnt--; 00304 } 00305 00306 /* y(n) += g0(n) * v0 */ 00307 acc += (q31_t) (((q31_t) fnext * (*pv++))); 00308 00309 out = (q15_t) __SSAT(acc >> 15, 16); 00310 *px2++ = (q15_t) fnext; 00311 00312 /* write out into pDst */ 00313 *pDst++ = out; 00314 00315 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00316 pState = pState + 1u; 00317 blkCnt--; 00318 00319 } 00320 00321 /* Processing is complete. Now copy last S->numStages samples to start of the buffer 00322 for the preperation of next frame process */ 00323 /* Points to the start of the state buffer */ 00324 pStateCurnt = &S->pState[0]; 00325 pState = &S->pState[blockSize]; 00326 00327 stgCnt = (numStages >> 2u); 00328 00329 /* copy data */ 00330 while(stgCnt > 0u) 00331 { 00332 #ifndef UNALIGNED_SUPPORT_DISABLE 00333 00334 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00335 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00336 00337 #else 00338 00339 *pStateCurnt++ = *pState++; 00340 *pStateCurnt++ = *pState++; 00341 *pStateCurnt++ = *pState++; 00342 *pStateCurnt++ = *pState++; 00343 00344 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00345 00346 /* Decrement the loop counter */ 00347 stgCnt--; 00348 00349 } 00350 00351 /* Calculation of count for remaining q15_t data */ 00352 stgCnt = (numStages) % 0x4u; 00353 00354 /* copy data */ 00355 while(stgCnt > 0u) 00356 { 00357 *pStateCurnt++ = *pState++; 00358 00359 /* Decrement the loop counter */ 00360 stgCnt--; 00361 } 00362 00363 #else 00364 00365 /* Run the below code for Cortex-M0 */ 00366 00367 q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */ 00368 uint32_t stgCnt; /* Temporary variables for counts */ 00369 q63_t acc; /* Accumlator */ 00370 uint32_t blkCnt, tapCnt; /* Temporary variables for counts */ 00371 q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */ 00372 uint32_t numStages = S->numStages; /* number of stages */ 00373 q15_t *pState; /* State pointer */ 00374 q15_t *pStateCurnt; /* State current pointer */ 00375 q15_t out; /* Temporary variable for output */ 00376 00377 00378 blkCnt = blockSize; 00379 00380 pState = &S->pState[0]; 00381 00382 /* Sample processing */ 00383 while(blkCnt > 0u) 00384 { 00385 /* Read Sample from input buffer */ 00386 /* fN(n) = x(n) */ 00387 fcurr = *pSrc++; 00388 00389 /* Initialize state read pointer */ 00390 px1 = pState; 00391 /* Initialize state write pointer */ 00392 px2 = pState; 00393 /* Set accumulator to zero */ 00394 acc = 0; 00395 /* Initialize Ladder coeff pointer */ 00396 pv = &S->pvCoeffs[0]; 00397 /* Initialize Reflection coeff pointer */ 00398 pk = &S->pkCoeffs[0]; 00399 00400 tapCnt = numStages; 00401 00402 while(tapCnt > 0u) 00403 { 00404 gcurr = *px1++; 00405 /* Process sample */ 00406 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ 00407 fnext = fcurr - ((gcurr * (*pk)) >> 15); 00408 fnext = __SSAT(fnext, 16); 00409 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ 00410 gnext = ((fnext * (*pk++)) >> 15) + gcurr; 00411 gnext = __SSAT(gnext, 16); 00412 /* Output samples */ 00413 /* y(n) += gN(n) * vN */ 00414 acc += (q31_t) ((gnext * (*pv++))); 00415 /* write gN(n) into state for next sample processing */ 00416 *px2++ = (q15_t) gnext; 00417 /* Update f values for next coefficient processing */ 00418 fcurr = fnext; 00419 00420 tapCnt--; 00421 } 00422 00423 /* y(n) += g0(n) * v0 */ 00424 acc += (q31_t) ((fnext * (*pv++))); 00425 00426 out = (q15_t) __SSAT(acc >> 15, 16); 00427 *px2++ = (q15_t) fnext; 00428 00429 /* write out into pDst */ 00430 *pDst++ = out; 00431 00432 /* Advance the state pointer by 1 to process the next group of samples */ 00433 pState = pState + 1u; 00434 blkCnt--; 00435 00436 } 00437 00438 /* Processing is complete. Now copy last S->numStages samples to start of the buffer 00439 for the preperation of next frame process */ 00440 /* Points to the start of the state buffer */ 00441 pStateCurnt = &S->pState[0]; 00442 pState = &S->pState[blockSize]; 00443 00444 stgCnt = numStages; 00445 00446 /* copy data */ 00447 while(stgCnt > 0u) 00448 { 00449 *pStateCurnt++ = *pState++; 00450 00451 /* Decrement the loop counter */ 00452 stgCnt--; 00453 } 00454 00455 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00456 00457 } 00458 00459 00460 00461 00462 /** 00463 * @} end of IIR_Lattice group 00464 */
Generated on Tue Jul 12 2022 13:15:25 by
