CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details
Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more
arm_biquad_cascade_df1_fast_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_fast_q15.c 00009 * 00010 * Description: Fast processing function for the 00011 * Q15 Biquad cascade filter. 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3 00014 * 00015 * Version 1.0.3 2010/11/29 00016 * Re-organized the CMSIS folders and updated documentation. 00017 * 00018 * Version 1.0.2 2010/11/11 00019 * Documentation updated. 00020 * 00021 * Version 1.0.1 2010/10/05 00022 * Production release and review comments incorporated. 00023 * 00024 * Version 1.0.0 2010/09/20 00025 * Production release and review comments incorporated. 00026 * 00027 * Version 0.0.9 2010/08/16 00028 * Initial version 00029 * 00030 * 00031 * -------------------------------------------------------------------- */ 00032 00033 #include "arm_math.h" 00034 00035 /** 00036 * @ingroup groupFilters 00037 */ 00038 00039 /** 00040 * @addtogroup BiquadCascadeDF1 00041 * @{ 00042 */ 00043 00044 /** 00045 * @details 00046 * @param[in] *S points to an instance of the Q15 Biquad cascade structure. 00047 * @param[in] *pSrc points to the block of input data. 00048 * @param[out] *pDst points to the block of output data. 00049 * @param[in] blockSize number of samples to process per call. 00050 * @return none. 00051 * 00052 * <b>Scaling and Overflow Behavior:</b> 00053 * \par 00054 * This fast version uses a 32-bit accumulator with 2.30 format. 00055 * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit. 00056 * Thus, if the accumulator result overflows it wraps around and distorts the result. 00057 * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). 00058 * The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits. 00059 * 00060 * \par 00061 * Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure. 00062 * Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure. 00063 * 00064 */ 00065 00066 void arm_biquad_cascade_df1_fast_q15( 00067 const arm_biquad_casd_df1_inst_q15 * S, 00068 q15_t * pSrc, 00069 q15_t * pDst, 00070 uint32_t blockSize) 00071 { 00072 q15_t *pIn = pSrc; /* Source pointer */ 00073 q15_t *pOut = pDst; /* Destination pointer */ 00074 q31_t in; /* Temporary variable to hold input value */ 00075 q31_t out; /* Temporary variable to hold output value */ 00076 q15_t b0; 00077 q31_t b1, a1; /* Filter coefficients */ 00078 q31_t state_in, state_out; /* Filter state variables */ 00079 q31_t acc0; /* Accumulator */ 00080 int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */ 00081 q15_t *pState = S->pState; /* State pointer */ 00082 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00083 q31_t *pState_q31; /* 32-bit state pointer for SIMD implementation */ 00084 uint32_t sample, stage = S->numStages; /* Stage loop counter */ 00085 00086 00087 00088 do 00089 { 00090 /* Initialize state pointer of type q31 */ 00091 pState_q31 = (q31_t *) (pState); 00092 00093 /* Read the b0 and 0 coefficients using SIMD */ 00094 b0 = *__SIMD32(pCoeffs)++; 00095 00096 /* Read the b1 and b2 coefficients using SIMD */ 00097 b1 = *__SIMD32(pCoeffs)++; 00098 00099 /* Read the a1 and a2 coefficients using SIMD */ 00100 a1 = *__SIMD32(pCoeffs)++; 00101 00102 /* Read the input state values from the state buffer: x[n-1], x[n-2] */ 00103 state_in = (q31_t) (*pState_q31++); 00104 00105 /* Read the output state values from the state buffer: y[n-1], y[n-2] */ 00106 state_out = (q31_t) (*pState_q31); 00107 00108 /* Apply loop unrolling and compute 2 output values simultaneously. */ 00109 /* The variables acc0 ... acc3 hold output values that are being computed: 00110 * 00111 * acc0 = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00112 * acc0 = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00113 */ 00114 sample = blockSize >> 1u; 00115 00116 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00117 ** a second loop below computes the remaining 1 sample. */ 00118 while(sample > 0u) 00119 { 00120 00121 /* Read the input */ 00122 in = *__SIMD32(pIn)++; 00123 00124 /* out = b0 * x[n] + 0 * 0 */ 00125 out = (q31_t) b0 * ((q15_t) in); 00126 /* acc0 = b1 * x[n-1] + acc0 += b2 * x[n-2] + out */ 00127 acc0 = __SMLAD(b1, state_in, out); 00128 /* acc0 += a1 * y[n-1] + acc0 += a2 * y[n-2] */ 00129 acc0 = __SMLAD(a1, state_out, acc0); 00130 00131 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00132 out = __SSAT((acc0 >> shift), 16); 00133 00134 /* Every time after the output is computed state should be updated. */ 00135 /* The states should be updated as: */ 00136 /* Xn2 = Xn1 */ 00137 /* Xn1 = Xn */ 00138 /* Yn2 = Yn1 */ 00139 /* Yn1 = acc0 */ 00140 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00141 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00142 state_in = __PKHBT(in, state_in, 16); 00143 state_out = __PKHBT(out, state_out, 16); 00144 00145 /* out = b0 * x[n] + 0 * 0 */ 00146 out = (q31_t) b0 *((q15_t)(in >> 16)); 00147 /* acc0 = b1 * x[n-1] + acc0 += b2 * x[n-2] + out */ 00148 acc0 = __SMLAD(b1, state_in, out); 00149 /* acc0 += a1 * y[n-1] + acc0 += a2 * y[n-2] */ 00150 acc0 = __SMLAD(a1, state_out, acc0); 00151 00152 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00153 out = __SSAT((acc0 >> shift), 16); 00154 00155 /* Store the output in the destination buffer. */ 00156 *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 00157 00158 /* Every time after the output is computed state should be updated. */ 00159 /* The states should be updated as: */ 00160 /* Xn2 = Xn1 */ 00161 /* Xn1 = Xn */ 00162 /* Yn2 = Yn1 */ 00163 /* Yn1 = acc0 */ 00164 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00165 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00166 state_in = __PKHBT(in >> 16, state_in, 16); 00167 state_out = __PKHBT(out, state_out, 16); 00168 00169 /* Decrement the loop counter */ 00170 sample--; 00171 00172 } 00173 00174 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00175 ** No loop unrolling is used. */ 00176 00177 if((blockSize & 0x1u) != 0u) 00178 { 00179 /* Read the input */ 00180 in = *pIn++; 00181 00182 /* out = b0 * x[n] + 0 * 0 */ 00183 out = (q31_t) in *b0; 00184 /* acc0 = b1 * x[n-1] + acc0 += b2 * x[n-2] + out */ 00185 acc0 = __SMLAD(b1, state_in, out); 00186 /* acc0 += a1 * y[n-1] + acc0 += a2 * y[n-2] */ 00187 acc0 = __SMLAD(a1, state_out, acc0); 00188 00189 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00190 out = __SSAT((acc0 >> shift), 16); 00191 00192 /* Store the output in the destination buffer. */ 00193 *pOut++ = (q15_t) out; 00194 00195 /* Every time after the output is computed state should be updated. */ 00196 /* The states should be updated as: */ 00197 /* Xn2 = Xn1 */ 00198 /* Xn1 = Xn */ 00199 /* Yn2 = Yn1 */ 00200 /* Yn1 = acc0 */ 00201 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00202 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00203 state_in = __PKHBT(in, state_in, 16); 00204 state_out = __PKHBT(out, state_out, 16); 00205 00206 } 00207 00208 /* The first stage goes from the input buffer to the output buffer. */ 00209 /* Subsequent (numStages - 1) occur in-place in the output buffer */ 00210 pIn = pDst; 00211 00212 /* Reset the output pointer */ 00213 pOut = pDst; 00214 00215 /* Store the updated state variables back into the state array */ 00216 *__SIMD32(pState)++ = __PKHBT(state_in, (state_in >> 16), 16); 00217 *__SIMD32(pState)++ = __PKHBT(state_out, (state_out >> 16), 16); 00218 00219 /* Decrement the loop counter */ 00220 stage--; 00221 00222 } while(stage > 0u); 00223 } 00224 00225 00226 /** 00227 * @} end of BiquadCascadeDF1 group 00228 */
Generated on Tue Jul 12 2022 14:13:52 by 1.7.2