CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_fast_q15.c Source File

arm_biquad_cascade_df1_fast_q15.c

00001 /* ----------------------------------------------------------------------  
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.  
00003 *  
00004 * $Date:        29. November 2010  
00005 * $Revision:    V1.0.3  
00006 *  
00007 * Project:      CMSIS DSP Library  
00008 * Title:        arm_biquad_cascade_df1_fast_q15.c  
00009 *  
00010 * Description:  Fast processing function for the  
00011 *               Q15 Biquad cascade filter.  
00012 *  
00013 * Target Processor: Cortex-M4/Cortex-M3
00014 *  
00015 * Version 1.0.3 2010/11/29 
00016 *    Re-organized the CMSIS folders and updated documentation.  
00017 *   
00018 * Version 1.0.2 2010/11/11  
00019 *    Documentation updated.   
00020 *  
00021 * Version 1.0.1 2010/10/05   
00022 *    Production release and review comments incorporated.  
00023 *  
00024 * Version 1.0.0 2010/09/20   
00025 *    Production release and review comments incorporated.  
00026 *  
00027 * Version 0.0.9  2010/08/16   
00028 *    Initial version  
00029 *  
00030 *  
00031 * -------------------------------------------------------------------- */ 
00032  
00033 #include "arm_math.h" 
00034  
00035 /**  
00036  * @ingroup groupFilters  
00037  */ 
00038  
00039 /**  
00040  * @addtogroup BiquadCascadeDF1  
00041  * @{  
00042  */ 
00043  
00044 /**  
00045  * @details  
00046  * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.  
00047  * @param[in]  *pSrc points to the block of input data.  
00048  * @param[out] *pDst points to the block of output data.  
00049  * @param[in]  blockSize number of samples to process per call.  
00050  * @return none.  
00051  *  
00052  * <b>Scaling and Overflow Behavior:</b>  
00053  * \par  
00054  * This fast version uses a 32-bit accumulator with 2.30 format.  
00055  * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
00056  * Thus, if the accumulator result overflows it wraps around and distorts the result.  
00057  * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).  
00058  * The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.  
00059  *  
00060  * \par  
00061  * Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.  Both the slow and the fast versions use the same instance structure.  
00062  * Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.  
00063  *  
00064  */ 
00065  
00066 void arm_biquad_cascade_df1_fast_q15( 
00067   const arm_biquad_casd_df1_inst_q15 * S, 
00068   q15_t * pSrc, 
00069   q15_t * pDst, 
00070   uint32_t blockSize) 
00071 { 
00072   q15_t *pIn = pSrc;                             /*  Source pointer                               */ 
00073   q15_t *pOut = pDst;                            /*  Destination pointer                          */ 
00074   q31_t in;                                      /*  Temporary variable to hold input value       */ 
00075   q31_t out;                                     /*  Temporary variable to hold output value      */ 
00076   q15_t b0; 
00077   q31_t b1, a1;                                  /*  Filter coefficients                          */ 
00078   q31_t state_in, state_out;                     /*  Filter state variables                       */ 
00079   q31_t acc0;                                    /*  Accumulator                                  */ 
00080   int32_t shift = (int32_t) (15 - S->postShift); /*  Post shift                                   */ 
00081   q15_t *pState = S->pState;                     /*  State pointer                                */ 
00082   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */ 
00083   q31_t *pState_q31;                             /*  32-bit state pointer for SIMD implementation */ 
00084   uint32_t sample, stage = S->numStages;         /*  Stage loop counter                           */ 
00085  
00086  
00087  
00088   do 
00089   { 
00090     /* Initialize state pointer of type q31 */ 
00091     pState_q31 = (q31_t *) (pState); 
00092  
00093     /* Read the b0 and 0 coefficients using SIMD  */ 
00094     b0 = *__SIMD32(pCoeffs)++; 
00095  
00096     /* Read the b1 and b2 coefficients using SIMD */ 
00097     b1 = *__SIMD32(pCoeffs)++; 
00098  
00099     /* Read the a1 and a2 coefficients using SIMD */ 
00100     a1 = *__SIMD32(pCoeffs)++; 
00101  
00102     /* Read the input state values from the state buffer:  x[n-1], x[n-2] */ 
00103     state_in = (q31_t) (*pState_q31++); 
00104  
00105     /* Read the output state values from the state buffer:  y[n-1], y[n-2] */ 
00106     state_out = (q31_t) (*pState_q31); 
00107  
00108     /* Apply loop unrolling and compute 2 output values simultaneously. */ 
00109     /*      The variables acc0 ... acc3 hold output values that are being computed:  
00110      *  
00111      *    acc0 =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
00112      *    acc0 =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
00113      */ 
00114     sample = blockSize >> 1u; 
00115  
00116     /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.  
00117      ** a second loop below computes the remaining 1 sample. */ 
00118     while(sample > 0u) 
00119     { 
00120  
00121       /* Read the input */ 
00122       in = *__SIMD32(pIn)++; 
00123  
00124       /* out =  b0 * x[n] + 0 * 0 */ 
00125       out = (q31_t) b0 * ((q15_t) in); 
00126       /* acc0 =  b1 * x[n-1] + acc0 +=  b2 * x[n-2] + out */ 
00127       acc0 = __SMLAD(b1, state_in, out); 
00128       /* acc0 +=  a1 * y[n-1] + acc0 +=  a2 * y[n-2] */ 
00129       acc0 = __SMLAD(a1, state_out, acc0); 
00130  
00131       /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 
00132       out = __SSAT((acc0 >> shift), 16); 
00133  
00134       /* Every time after the output is computed state should be updated. */ 
00135       /* The states should be updated as:  */ 
00136       /* Xn2 = Xn1    */ 
00137       /* Xn1 = Xn     */ 
00138       /* Yn2 = Yn1    */ 
00139       /* Yn1 = acc0   */ 
00140       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
00141       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
00142       state_in = __PKHBT(in, state_in, 16); 
00143       state_out = __PKHBT(out, state_out, 16); 
00144  
00145       /* out =  b0 * x[n] + 0 * 0 */ 
00146       out = (q31_t) b0 *((q15_t)(in >> 16)); 
00147       /* acc0 =  b1 * x[n-1] + acc0 +=  b2 * x[n-2] + out */ 
00148       acc0 = __SMLAD(b1, state_in, out); 
00149       /* acc0 +=  a1 * y[n-1] + acc0 +=  a2 * y[n-2] */ 
00150       acc0 = __SMLAD(a1, state_out, acc0); 
00151  
00152       /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 
00153       out = __SSAT((acc0 >> shift), 16); 
00154  
00155       /* Store the output in the destination buffer. */ 
00156       *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 
00157  
00158       /* Every time after the output is computed state should be updated. */ 
00159       /* The states should be updated as:  */ 
00160       /* Xn2 = Xn1    */ 
00161       /* Xn1 = Xn     */ 
00162       /* Yn2 = Yn1    */ 
00163       /* Yn1 = acc0   */ 
00164       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
00165       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
00166       state_in = __PKHBT(in >> 16, state_in, 16); 
00167       state_out = __PKHBT(out, state_out, 16); 
00168  
00169       /* Decrement the loop counter */ 
00170       sample--; 
00171  
00172     } 
00173  
00174     /* If the blockSize is not a multiple of 2, compute any remaining output samples here.  
00175      ** No loop unrolling is used. */ 
00176  
00177     if((blockSize & 0x1u) != 0u) 
00178     { 
00179       /* Read the input */ 
00180       in = *pIn++; 
00181  
00182       /* out =  b0 * x[n] + 0 * 0 */ 
00183       out = (q31_t) in *b0; 
00184       /* acc0 =  b1 * x[n-1] + acc0 +=  b2 * x[n-2] + out */ 
00185       acc0 = __SMLAD(b1, state_in, out); 
00186       /* acc0 +=  a1 * y[n-1] + acc0 +=  a2 * y[n-2] */ 
00187       acc0 = __SMLAD(a1, state_out, acc0); 
00188  
00189       /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 
00190       out = __SSAT((acc0 >> shift), 16); 
00191  
00192       /* Store the output in the destination buffer. */ 
00193       *pOut++ = (q15_t) out; 
00194  
00195       /* Every time after the output is computed state should be updated. */ 
00196       /* The states should be updated as:  */ 
00197       /* Xn2 = Xn1    */ 
00198       /* Xn1 = Xn     */ 
00199       /* Yn2 = Yn1    */ 
00200       /* Yn1 = acc0   */ 
00201       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
00202       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
00203       state_in = __PKHBT(in, state_in, 16); 
00204       state_out = __PKHBT(out, state_out, 16); 
00205  
00206     } 
00207  
00208     /*  The first stage goes from the input buffer to the output buffer.  */ 
00209     /*  Subsequent (numStages - 1) occur in-place in the output buffer  */ 
00210     pIn = pDst; 
00211  
00212     /* Reset the output pointer */ 
00213     pOut = pDst; 
00214  
00215     /*  Store the updated state variables back into the state array */ 
00216     *__SIMD32(pState)++ = __PKHBT(state_in, (state_in >> 16), 16); 
00217     *__SIMD32(pState)++ = __PKHBT(state_out, (state_out >> 16), 16); 
00218  
00219     /* Decrement the loop counter */ 
00220     stage--; 
00221  
00222   } while(stage > 0u); 
00223 } 
00224  
00225  
00226 /**  
00227  * @} end of BiquadCascadeDF1 group  
00228  */