Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_biquad_cascade_df1_fast_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_fast_q15.c 00009 * 00010 * Description: Fast processing function for the 00011 * Q15 Biquad cascade filter. 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3 00014 * 00015 * Version 1.0.3 2010/11/29 00016 * Re-organized the CMSIS folders and updated documentation. 00017 * 00018 * Version 1.0.2 2010/11/11 00019 * Documentation updated. 00020 * 00021 * Version 1.0.1 2010/10/05 00022 * Production release and review comments incorporated. 00023 * 00024 * Version 1.0.0 2010/09/20 00025 * Production release and review comments incorporated. 00026 * 00027 * Version 0.0.9 2010/08/16 00028 * Initial version 00029 * 00030 * 00031 * -------------------------------------------------------------------- */ 00032 00033 #include "arm_math.h" 00034 00035 /** 00036 * @ingroup groupFilters 00037 */ 00038 00039 /** 00040 * @addtogroup BiquadCascadeDF1 00041 * @{ 00042 */ 00043 00044 /** 00045 * @details 00046 * @param[in] *S points to an instance of the Q15 Biquad cascade structure. 00047 * @param[in] *pSrc points to the block of input data. 00048 * @param[out] *pDst points to the block of output data. 00049 * @param[in] blockSize number of samples to process per call. 00050 * @return none. 00051 * 00052 * <b>Scaling and Overflow Behavior:</b> 00053 * \par 00054 * This fast version uses a 32-bit accumulator with 2.30 format. 00055 * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit. 00056 * Thus, if the accumulator result overflows it wraps around and distorts the result. 00057 * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). 00058 * The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits. 00059 * 00060 * \par 00061 * Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure. 00062 * Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure. 00063 * 00064 */ 00065 00066 void arm_biquad_cascade_df1_fast_q15( 00067 const arm_biquad_casd_df1_inst_q15 * S, 00068 q15_t * pSrc, 00069 q15_t * pDst, 00070 uint32_t blockSize) 00071 { 00072 q15_t *pIn = pSrc; /* Source pointer */ 00073 q15_t *pOut = pDst; /* Destination pointer */ 00074 q31_t in; /* Temporary variable to hold input value */ 00075 q31_t out; /* Temporary variable to hold output value */ 00076 q15_t b0; 00077 q31_t b1, a1; /* Filter coefficients */ 00078 q31_t state_in, state_out; /* Filter state variables */ 00079 q31_t acc0; /* Accumulator */ 00080 int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */ 00081 q15_t *pState = S->pState; /* State pointer */ 00082 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00083 q31_t *pState_q31; /* 32-bit state pointer for SIMD implementation */ 00084 uint32_t sample, stage = S->numStages; /* Stage loop counter */ 00085 00086 00087 00088 do 00089 { 00090 /* Initialize state pointer of type q31 */ 00091 pState_q31 = (q31_t *) (pState); 00092 00093 /* Read the b0 and 0 coefficients using SIMD */ 00094 b0 = *__SIMD32(pCoeffs)++; 00095 00096 /* Read the b1 and b2 coefficients using SIMD */ 00097 b1 = *__SIMD32(pCoeffs)++; 00098 00099 /* Read the a1 and a2 coefficients using SIMD */ 00100 a1 = *__SIMD32(pCoeffs)++; 00101 00102 /* Read the input state values from the state buffer: x[n-1], x[n-2] */ 00103 state_in = (q31_t) (*pState_q31++); 00104 00105 /* Read the output state values from the state buffer: y[n-1], y[n-2] */ 00106 state_out = (q31_t) (*pState_q31); 00107 00108 /* Apply loop unrolling and compute 2 output values simultaneously. */ 00109 /* The variables acc0 ... acc3 hold output values that are being computed: 00110 * 00111 * acc0 = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00112 * acc0 = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00113 */ 00114 sample = blockSize >> 1u; 00115 00116 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00117 ** a second loop below computes the remaining 1 sample. */ 00118 while(sample > 0u) 00119 { 00120 00121 /* Read the input */ 00122 in = *__SIMD32(pIn)++; 00123 00124 /* out = b0 * x[n] + 0 * 0 */ 00125 out = (q31_t) b0 * ((q15_t) in); 00126 /* acc0 = b1 * x[n-1] + acc0 += b2 * x[n-2] + out */ 00127 acc0 = __SMLAD(b1, state_in, out); 00128 /* acc0 += a1 * y[n-1] + acc0 += a2 * y[n-2] */ 00129 acc0 = __SMLAD(a1, state_out, acc0); 00130 00131 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00132 out = __SSAT((acc0 >> shift), 16); 00133 00134 /* Every time after the output is computed state should be updated. */ 00135 /* The states should be updated as: */ 00136 /* Xn2 = Xn1 */ 00137 /* Xn1 = Xn */ 00138 /* Yn2 = Yn1 */ 00139 /* Yn1 = acc0 */ 00140 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00141 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00142 state_in = __PKHBT(in, state_in, 16); 00143 state_out = __PKHBT(out, state_out, 16); 00144 00145 /* out = b0 * x[n] + 0 * 0 */ 00146 out = (q31_t) b0 *((q15_t)(in >> 16)); 00147 /* acc0 = b1 * x[n-1] + acc0 += b2 * x[n-2] + out */ 00148 acc0 = __SMLAD(b1, state_in, out); 00149 /* acc0 += a1 * y[n-1] + acc0 += a2 * y[n-2] */ 00150 acc0 = __SMLAD(a1, state_out, acc0); 00151 00152 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00153 out = __SSAT((acc0 >> shift), 16); 00154 00155 /* Store the output in the destination buffer. */ 00156 *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 00157 00158 /* Every time after the output is computed state should be updated. */ 00159 /* The states should be updated as: */ 00160 /* Xn2 = Xn1 */ 00161 /* Xn1 = Xn */ 00162 /* Yn2 = Yn1 */ 00163 /* Yn1 = acc0 */ 00164 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00165 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00166 state_in = __PKHBT(in >> 16, state_in, 16); 00167 state_out = __PKHBT(out, state_out, 16); 00168 00169 /* Decrement the loop counter */ 00170 sample--; 00171 00172 } 00173 00174 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00175 ** No loop unrolling is used. */ 00176 00177 if((blockSize & 0x1u) != 0u) 00178 { 00179 /* Read the input */ 00180 in = *pIn++; 00181 00182 /* out = b0 * x[n] + 0 * 0 */ 00183 out = (q31_t) in *b0; 00184 /* acc0 = b1 * x[n-1] + acc0 += b2 * x[n-2] + out */ 00185 acc0 = __SMLAD(b1, state_in, out); 00186 /* acc0 += a1 * y[n-1] + acc0 += a2 * y[n-2] */ 00187 acc0 = __SMLAD(a1, state_out, acc0); 00188 00189 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00190 out = __SSAT((acc0 >> shift), 16); 00191 00192 /* Store the output in the destination buffer. */ 00193 *pOut++ = (q15_t) out; 00194 00195 /* Every time after the output is computed state should be updated. */ 00196 /* The states should be updated as: */ 00197 /* Xn2 = Xn1 */ 00198 /* Xn1 = Xn */ 00199 /* Yn2 = Yn1 */ 00200 /* Yn1 = acc0 */ 00201 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00202 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00203 state_in = __PKHBT(in, state_in, 16); 00204 state_out = __PKHBT(out, state_out, 16); 00205 00206 } 00207 00208 /* The first stage goes from the input buffer to the output buffer. */ 00209 /* Subsequent (numStages - 1) occur in-place in the output buffer */ 00210 pIn = pDst; 00211 00212 /* Reset the output pointer */ 00213 pOut = pDst; 00214 00215 /* Store the updated state variables back into the state array */ 00216 *__SIMD32(pState)++ = __PKHBT(state_in, (state_in >> 16), 16); 00217 *__SIMD32(pState)++ = __PKHBT(state_out, (state_out >> 16), 16); 00218 00219 /* Decrement the loop counter */ 00220 stage--; 00221 00222 } while(stage > 0u); 00223 } 00224 00225 00226 /** 00227 * @} end of BiquadCascadeDF1 group 00228 */
Generated on Tue Jul 12 2022 19:55:42 by
1.7.2
