CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details
Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more
arm_iir_lattice_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_iir_lattice_q15.c 00009 * 00010 * Description: Q15 IIR lattice filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * -------------------------------------------------------------------- */ 00029 00030 #include "arm_math.h" 00031 00032 /** 00033 * @ingroup groupFilters 00034 */ 00035 00036 /** 00037 * @addtogroup IIR_Lattice 00038 * @{ 00039 */ 00040 00041 /** 00042 * @brief Processing function for the Q15 IIR lattice filter. 00043 * @param[in] *S points to an instance of the Q15 IIR lattice structure. 00044 * @param[in] *pSrc points to the block of input data. 00045 * @param[out] *pDst points to the block of output data. 00046 * @param[in] blockSize number of samples to process. 00047 * @return none. 00048 * 00049 * @details 00050 * <b>Scaling and Overflow Behavior:</b> 00051 * \par 00052 * The function is implemented using a 64-bit internal accumulator. 00053 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00054 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00055 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00056 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 00057 * Lastly, the accumulator is saturated to yield a result in 1.15 format. 00058 */ 00059 00060 void arm_iir_lattice_q15( 00061 const arm_iir_lattice_instance_q15 * S, 00062 q15_t * pSrc, 00063 q15_t * pDst, 00064 uint32_t blockSize) 00065 { 00066 q31_t fcurr, fnext, gcurr = 0, gnext; /* Temporary variables for lattice stages */ 00067 q15_t gnext1, gnext2; /* Temporary variables for lattice stages */ 00068 uint32_t stgCnt; /* Temporary variables for counts */ 00069 q63_t acc; /* Accumlator */ 00070 uint32_t blkCnt, tapCnt; /* Temporary variables for counts */ 00071 q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */ 00072 uint32_t numStages = S->numStages; /* number of stages */ 00073 q15_t *pState; /* State pointer */ 00074 q15_t *pStateCurnt; /* State current pointer */ 00075 q15_t out; /* Temporary variable for output */ 00076 q31_t v; /* Temporary variable for ladder coefficient */ 00077 00078 00079 blkCnt = blockSize; 00080 00081 pState = &S->pState[0]; 00082 00083 /* Sample processing */ 00084 while(blkCnt > 0u) 00085 { 00086 /* Read Sample from input buffer */ 00087 /* fN(n) = x(n) */ 00088 fcurr = *pSrc++; 00089 00090 /* Initialize state read pointer */ 00091 px1 = pState; 00092 /* Initialize state write pointer */ 00093 px2 = pState; 00094 /* Set accumulator to zero */ 00095 acc = 0; 00096 /* Initialize Ladder coeff pointer */ 00097 pv = &S->pvCoeffs[0]; 00098 /* Initialize Reflection coeff pointer */ 00099 pk = &S->pkCoeffs[0]; 00100 00101 00102 /* Process sample for first tap */ 00103 gcurr = *px1++; 00104 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ 00105 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 00106 fnext = __SSAT(fnext, 16); 00107 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ 00108 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 00109 gnext = __SSAT(gnext, 16); 00110 /* write gN(n) into state for next sample processing */ 00111 *px2++ = (q15_t) gnext; 00112 /* y(n) += gN(n) * vN */ 00113 acc += (q31_t) ((gnext * (*pv++))); 00114 00115 00116 /* Update f values for next coefficient processing */ 00117 fcurr = fnext; 00118 00119 /* Loop unrolling. Process 4 taps at a time. */ 00120 tapCnt = (numStages - 1u) >> 2; 00121 00122 while(tapCnt > 0u) 00123 { 00124 00125 /* Process sample for 2nd, 6th ...taps */ 00126 /* Read gN-2(n-1) from state buffer */ 00127 gcurr = *px1++; 00128 /* Process sample for 2nd, 6th .. taps */ 00129 /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */ 00130 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 00131 fnext = __SSAT(fnext, 16); 00132 /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */ 00133 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 00134 gnext1 = (q15_t) __SSAT(gnext, 16); 00135 /* write gN-1(n) into state */ 00136 *px2++ = (q15_t) gnext1; 00137 00138 00139 /* Process sample for 3nd, 7th ...taps */ 00140 /* Read gN-3(n-1) from state */ 00141 gcurr = *px1++; 00142 /* Process sample for 3rd, 7th .. taps */ 00143 /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */ 00144 fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15); 00145 fcurr = __SSAT(fcurr, 16); 00146 /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */ 00147 gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr; 00148 gnext2 = (q15_t) __SSAT(gnext, 16); 00149 /* write gN-2(n) into state */ 00150 *px2++ = (q15_t) gnext2; 00151 00152 /* Read vN-1 and vN-2 at a time */ 00153 v = *__SIMD32(pv)++; 00154 00155 00156 /* Pack gN-1(n) and gN-2(n) */ 00157 gnext = __PKHBT(gnext1, gnext2, 16); 00158 /* y(n) += gN-1(n) * vN-1 */ 00159 /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */ 00160 /* y(n) += gN-2(n) * vN-2 */ 00161 /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */ 00162 acc = __SMLALD(gnext, v, acc); 00163 00164 00165 /* Process sample for 4th, 8th ...taps */ 00166 /* Read gN-4(n-1) from state */ 00167 gcurr = *px1++; 00168 /* Process sample for 4th, 8th .. taps */ 00169 /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */ 00170 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 00171 fnext = __SSAT(fnext, 16); 00172 /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */ 00173 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 00174 gnext1 = (q15_t) __SSAT(gnext, 16); 00175 /* write gN-3(n) for the next sample process */ 00176 *px2++ = (q15_t) gnext1; 00177 00178 00179 /* Process sample for 5th, 9th ...taps */ 00180 /* Read gN-5(n-1) from state */ 00181 gcurr = *px1++; 00182 /* Process sample for 5th, 9th .. taps */ 00183 /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */ 00184 fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15); 00185 fcurr = __SSAT(fcurr, 16); 00186 /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */ 00187 gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr; 00188 gnext2 = (q15_t) __SSAT(gnext, 16); 00189 /* write gN-4(n) for the next sample process */ 00190 *px2++ = (q15_t) gnext2; 00191 00192 /* Read vN-3 and vN-4 at a time */ 00193 v = *__SIMD32(pv)++; 00194 00195 /* Pack gN-3(n) and gN-4(n) */ 00196 gnext = __PKHBT(gnext1, gnext2, 16); 00197 /* y(n) += gN-4(n) * vN-4 */ 00198 /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */ 00199 /* y(n) += gN-3(n) * vN-3 */ 00200 /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */ 00201 acc = __SMLALD(gnext, v, acc); 00202 00203 tapCnt--; 00204 00205 } 00206 00207 fnext = fcurr; 00208 00209 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00210 tapCnt = (numStages - 1u) % 0x4u; 00211 00212 while(tapCnt > 0u) 00213 { 00214 gcurr = *px1++; 00215 /* Process sample for last taps */ 00216 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 00217 fnext = __SSAT(fnext, 16); 00218 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 00219 gnext = __SSAT(gnext, 16); 00220 /* Output samples for last taps */ 00221 acc += (q31_t) (((q31_t) gnext * (*pv++))); 00222 *px2++ = (q15_t) gnext; 00223 fcurr = fnext; 00224 00225 tapCnt--; 00226 } 00227 00228 /* y(n) += g0(n) * v0 */ 00229 acc += (q31_t) (((q31_t) fnext * (*pv++))); 00230 00231 out = (q15_t) __SSAT(acc >> 15, 16); 00232 *px2++ = (q15_t) fnext; 00233 00234 /* write out into pDst */ 00235 *pDst++ = out; 00236 00237 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00238 pState = pState + 1u; 00239 blkCnt--; 00240 00241 } 00242 00243 /* Processing is complete. Now copy last S->numStages samples to start of the buffer 00244 for the preperation of next frame process */ 00245 /* Points to the start of the state buffer */ 00246 pStateCurnt = &S->pState[0]; 00247 pState = &S->pState[blockSize]; 00248 00249 stgCnt = (numStages >> 2u); 00250 00251 /* copy data */ 00252 while(stgCnt > 0u) 00253 { 00254 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00255 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00256 00257 /* Decrement the loop counter */ 00258 stgCnt--; 00259 00260 } 00261 00262 /* Calculation of count for remaining q15_t data */ 00263 stgCnt = (numStages) % 0x4u; 00264 00265 /* copy data */ 00266 while(stgCnt > 0u) 00267 { 00268 *pStateCurnt++ = *pState++; 00269 00270 /* Decrement the loop counter */ 00271 stgCnt--; 00272 } 00273 00274 } 00275 00276 00277 00278 00279 /** 00280 * @} end of IIR_Lattice group 00281 */
Generated on Tue Jul 12 2022 14:13:53 by 1.7.2