CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_iir_lattice_q15.c Source File

arm_iir_lattice_q15.c

00001 /* ----------------------------------------------------------------------  
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.  
00003 *  
00004 * $Date:        29. November 2010  
00005 * $Revision:    V1.0.3  
00006 *  
00007 * Project:      CMSIS DSP Library  
00008 * Title:        arm_iir_lattice_q15.c  
00009 *  
00010 * Description:  Q15 IIR lattice filter processing function.  
00011 *  
00012 * Target Processor: Cortex-M4/Cortex-M3
00013 *  
00014 * Version 1.0.3 2010/11/29 
00015 *    Re-organized the CMSIS folders and updated documentation.  
00016 *   
00017 * Version 1.0.2 2010/11/11  
00018 *    Documentation updated.   
00019 *  
00020 * Version 1.0.1 2010/10/05   
00021 *    Production release and review comments incorporated.  
00022 *  
00023 * Version 1.0.0 2010/09/20   
00024 *    Production release and review comments incorporated  
00025 *  
00026 * Version 0.0.7  2010/06/10   
00027 *    Misra-C changes done  
00028 * -------------------------------------------------------------------- */ 
00029  
00030 #include "arm_math.h" 
00031  
00032 /**  
00033  * @ingroup groupFilters  
00034  */ 
00035  
00036 /**  
00037  * @addtogroup IIR_Lattice  
00038  * @{  
00039  */ 
00040  
00041 /**  
00042  * @brief Processing function for the Q15 IIR lattice filter.  
00043  * @param[in] *S points to an instance of the Q15 IIR lattice structure.  
00044  * @param[in] *pSrc points to the block of input data.  
00045  * @param[out] *pDst points to the block of output data.  
00046  * @param[in] blockSize number of samples to process.  
00047  * @return none.  
00048  *  
00049  * @details  
00050  * <b>Scaling and Overflow Behavior:</b>  
00051  * \par  
00052  * The function is implemented using a 64-bit internal accumulator.  
00053  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.  
00054  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
00055  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
00056  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.  
00057  * Lastly, the accumulator is saturated to yield a result in 1.15 format.  
00058  */ 
00059  
00060 void arm_iir_lattice_q15( 
00061   const arm_iir_lattice_instance_q15 * S, 
00062   q15_t * pSrc, 
00063   q15_t * pDst, 
00064   uint32_t blockSize) 
00065 { 
00066   q31_t fcurr, fnext, gcurr = 0, gnext;          /* Temporary variables for lattice stages */ 
00067   q15_t gnext1, gnext2;                          /* Temporary variables for lattice stages */ 
00068   uint32_t stgCnt;                               /* Temporary variables for counts */ 
00069   q63_t acc;                                     /* Accumlator */ 
00070   uint32_t blkCnt, tapCnt;                       /* Temporary variables for counts */ 
00071   q15_t *px1, *px2, *pk, *pv;                    /* temporary pointers for state and coef */ 
00072   uint32_t numStages = S->numStages;             /* number of stages */ 
00073   q15_t *pState;                                 /* State pointer */ 
00074   q15_t *pStateCurnt;                            /* State current pointer */ 
00075   q15_t out;                                     /* Temporary variable for output */ 
00076   q31_t v;                                       /* Temporary variable for ladder coefficient */ 
00077  
00078  
00079   blkCnt = blockSize; 
00080  
00081   pState = &S->pState[0]; 
00082  
00083   /* Sample processing */ 
00084   while(blkCnt > 0u) 
00085   { 
00086     /* Read Sample from input buffer */ 
00087     /* fN(n) = x(n) */ 
00088     fcurr = *pSrc++; 
00089  
00090     /* Initialize state read pointer */ 
00091     px1 = pState; 
00092     /* Initialize state write pointer */ 
00093     px2 = pState; 
00094     /* Set accumulator to zero */ 
00095     acc = 0; 
00096     /* Initialize Ladder coeff pointer */ 
00097     pv = &S->pvCoeffs[0]; 
00098     /* Initialize Reflection coeff pointer */ 
00099     pk = &S->pkCoeffs[0]; 
00100  
00101  
00102     /* Process sample for first tap */ 
00103     gcurr = *px1++; 
00104     /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ 
00105     fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 
00106     fnext = __SSAT(fnext, 16); 
00107     /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ 
00108     gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 
00109     gnext = __SSAT(gnext, 16); 
00110     /* write gN(n) into state for next sample processing */ 
00111     *px2++ = (q15_t) gnext; 
00112     /* y(n) += gN(n) * vN  */ 
00113     acc += (q31_t) ((gnext * (*pv++))); 
00114  
00115  
00116     /* Update f values for next coefficient processing */ 
00117     fcurr = fnext; 
00118  
00119     /* Loop unrolling.  Process 4 taps at a time. */ 
00120     tapCnt = (numStages - 1u) >> 2; 
00121  
00122     while(tapCnt > 0u) 
00123     { 
00124  
00125       /* Process sample for 2nd, 6th ...taps */ 
00126       /* Read gN-2(n-1) from state buffer */ 
00127       gcurr = *px1++; 
00128       /* Process sample for 2nd, 6th .. taps */ 
00129       /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */ 
00130       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 
00131       fnext = __SSAT(fnext, 16); 
00132       /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */ 
00133       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 
00134       gnext1 = (q15_t) __SSAT(gnext, 16); 
00135       /* write gN-1(n) into state */ 
00136       *px2++ = (q15_t) gnext1; 
00137  
00138  
00139       /* Process sample for 3nd, 7th ...taps */ 
00140       /* Read gN-3(n-1) from state */ 
00141       gcurr = *px1++; 
00142       /* Process sample for 3rd, 7th .. taps */ 
00143       /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */ 
00144       fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15); 
00145       fcurr = __SSAT(fcurr, 16); 
00146       /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */ 
00147       gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr; 
00148       gnext2 = (q15_t) __SSAT(gnext, 16); 
00149       /* write gN-2(n) into state */ 
00150       *px2++ = (q15_t) gnext2; 
00151  
00152       /* Read vN-1 and vN-2 at a time */ 
00153       v = *__SIMD32(pv)++; 
00154  
00155  
00156       /* Pack gN-1(n) and gN-2(n) */ 
00157       gnext = __PKHBT(gnext1, gnext2, 16); 
00158       /* y(n) += gN-1(n) * vN-1  */ 
00159       /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */ 
00160       /* y(n) += gN-2(n) * vN-2  */ 
00161       /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */ 
00162       acc = __SMLALD(gnext, v, acc); 
00163  
00164  
00165       /* Process sample for 4th, 8th ...taps */ 
00166       /* Read gN-4(n-1) from state */ 
00167       gcurr = *px1++; 
00168       /* Process sample for 4th, 8th .. taps */ 
00169       /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */ 
00170       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 
00171       fnext = __SSAT(fnext, 16); 
00172       /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */ 
00173       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 
00174       gnext1 = (q15_t) __SSAT(gnext, 16); 
00175       /* write  gN-3(n) for the next sample process */ 
00176       *px2++ = (q15_t) gnext1; 
00177  
00178  
00179       /* Process sample for 5th, 9th ...taps */ 
00180       /* Read gN-5(n-1) from state */ 
00181       gcurr = *px1++; 
00182       /* Process sample for 5th, 9th .. taps */ 
00183       /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */ 
00184       fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15); 
00185       fcurr = __SSAT(fcurr, 16); 
00186       /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */ 
00187       gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr; 
00188       gnext2 = (q15_t) __SSAT(gnext, 16); 
00189       /* write      gN-4(n) for the next sample process */ 
00190       *px2++ = (q15_t) gnext2; 
00191  
00192       /* Read vN-3 and vN-4 at a time */ 
00193       v = *__SIMD32(pv)++; 
00194  
00195       /* Pack gN-3(n) and gN-4(n) */ 
00196       gnext = __PKHBT(gnext1, gnext2, 16); 
00197       /* y(n) += gN-4(n) * vN-4  */ 
00198       /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */ 
00199       /* y(n) += gN-3(n) * vN-3  */ 
00200       /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */ 
00201       acc = __SMLALD(gnext, v, acc); 
00202  
00203       tapCnt--; 
00204  
00205     } 
00206  
00207     fnext = fcurr; 
00208  
00209     /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
00210     tapCnt = (numStages - 1u) % 0x4u; 
00211  
00212     while(tapCnt > 0u) 
00213     { 
00214       gcurr = *px1++; 
00215       /* Process sample for last taps */ 
00216       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 
00217       fnext = __SSAT(fnext, 16); 
00218       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 
00219       gnext = __SSAT(gnext, 16); 
00220       /* Output samples for last taps */ 
00221       acc += (q31_t) (((q31_t) gnext * (*pv++))); 
00222       *px2++ = (q15_t) gnext; 
00223       fcurr = fnext; 
00224  
00225       tapCnt--; 
00226     } 
00227  
00228     /* y(n) += g0(n) * v0 */ 
00229     acc += (q31_t) (((q31_t) fnext * (*pv++))); 
00230  
00231     out = (q15_t) __SSAT(acc >> 15, 16); 
00232     *px2++ = (q15_t) fnext; 
00233  
00234     /* write out into pDst */ 
00235     *pDst++ = out; 
00236  
00237     /* Advance the state pointer by 4 to process the next group of 4 samples */ 
00238     pState = pState + 1u; 
00239     blkCnt--; 
00240  
00241   } 
00242  
00243   /* Processing is complete. Now copy last S->numStages samples to start of the buffer  
00244      for the preperation of next frame process */ 
00245   /* Points to the start of the state buffer */ 
00246   pStateCurnt = &S->pState[0]; 
00247   pState = &S->pState[blockSize]; 
00248  
00249   stgCnt = (numStages >> 2u); 
00250  
00251   /* copy data */ 
00252   while(stgCnt > 0u) 
00253   { 
00254     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
00255     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
00256  
00257     /* Decrement the loop counter */ 
00258     stgCnt--; 
00259  
00260   } 
00261  
00262   /* Calculation of count for remaining q15_t data */ 
00263   stgCnt = (numStages) % 0x4u; 
00264  
00265   /* copy data */ 
00266   while(stgCnt > 0u) 
00267   { 
00268     *pStateCurnt++ = *pState++; 
00269  
00270     /* Decrement the loop counter */ 
00271     stgCnt--; 
00272   } 
00273  
00274 } 
00275  
00276  
00277  
00278  
00279 /**  
00280  * @} end of IIR_Lattice group  
00281  */