Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_biquad_cascade_df1_fast_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_fast_q31.c 00009 * 00010 * Description: Processing function for the 00011 * Q31 Fast Biquad cascade DirectFormI(DF1) filter. 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3 00014 * 00015 * Version 1.0.3 2010/11/29 00016 * Re-organized the CMSIS folders and updated documentation. 00017 * 00018 * Version 1.0.2 2010/11/11 00019 * Documentation updated. 00020 * 00021 * Version 1.0.1 2010/10/05 00022 * Production release and review comments incorporated. 00023 * 00024 * Version 1.0.0 2010/09/20 00025 * Production release and review comments incorporated. 00026 * 00027 * Version 0.0.9 2010/08/27 00028 * Initial version 00029 * 00030 * -------------------------------------------------------------------- */ 00031 00032 #include "arm_math.h" 00033 00034 /** 00035 * @ingroup groupFilters 00036 */ 00037 00038 /** 00039 * @addtogroup BiquadCascadeDF1 00040 * @{ 00041 */ 00042 00043 /** 00044 * @details 00045 * 00046 * @param[in] *S points to an instance of the Q31 Biquad cascade structure. 00047 * @param[in] *pSrc points to the block of input data. 00048 * @param[out] *pDst points to the block of output data. 00049 * @param[in] blockSize number of samples to process per call. 00050 * @return none. 00051 * 00052 * <b>Scaling and Overflow Behavior:</b> 00053 * \par 00054 * This function is optimized for speed at the expense of fixed-point precision and overflow protection. 00055 * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format. 00056 * These intermediate results are added to a 2.30 accumulator. 00057 * Finally, the accumulator is saturated and converted to a 1.31 result. 00058 * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result. 00059 * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function 00060 * arm_biquad_cascade_df1_init_q31() to initialize filter structure. 00061 * 00062 * \par 00063 * Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision. Both the slow and the fast versions use the same instance structure. 00064 * Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure. 00065 */ 00066 00067 void arm_biquad_cascade_df1_fast_q31( 00068 const arm_biquad_casd_df1_inst_q31 * S, 00069 q31_t * pSrc, 00070 q31_t * pDst, 00071 uint32_t blockSize) 00072 { 00073 q31_t *pIn = pSrc; /* input pointer initialization */ 00074 q31_t *pOut = pDst; /* output pointer initialization */ 00075 q31_t *pState = S->pState; /* pState pointer initialization */ 00076 q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ 00077 q31_t acc; /* accumulator */ 00078 q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00079 q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00080 q31_t Xn; /* temporary input */ 00081 int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */ 00082 uint32_t sample, stage = S->numStages; /* loop counters */ 00083 00084 00085 do 00086 { 00087 /* Reading the coefficients */ 00088 b0 = *pCoeffs++; 00089 b1 = *pCoeffs++; 00090 b2 = *pCoeffs++; 00091 a1 = *pCoeffs++; 00092 a2 = *pCoeffs++; 00093 00094 /* Reading the state values */ 00095 Xn1 = pState[0]; 00096 Xn2 = pState[1]; 00097 Yn1 = pState[2]; 00098 Yn2 = pState[3]; 00099 00100 /* Apply loop unrolling and compute 4 output values simultaneously. */ 00101 /* The variables acc ... acc3 hold output values that are being computed: 00102 * 00103 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00104 */ 00105 00106 sample = blockSize >> 2u; 00107 00108 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00109 ** a second loop below computes the remaining 1 to 3 samples. */ 00110 while(sample > 0u) 00111 { 00112 /* Read the input */ 00113 Xn = *pIn++; 00114 00115 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00116 /* acc = b0 * x[n] */ 00117 acc = (q31_t) (((q63_t) b0 * Xn) >> 32); 00118 /* acc += b1 * x[n-1] */ 00119 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32); 00120 /* acc += b[2] * x[n-2] */ 00121 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 00122 /* acc += a1 * y[n-1] */ 00123 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 00124 /* acc += a2 * y[n-2] */ 00125 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 00126 00127 /* The result is converted to 1.31 , Yn2 variable is reused */ 00128 Yn2 = acc << shift; 00129 00130 /* Store the output in the destination buffer. */ 00131 *pOut++ = Yn2; 00132 00133 /* Read the second input */ 00134 Xn2 = *pIn++; 00135 00136 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00137 /* acc = b0 * x[n] */ 00138 acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32); 00139 /* acc += b1 * x[n-1] */ 00140 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32); 00141 /* acc += b[2] * x[n-2] */ 00142 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32); 00143 /* acc += a1 * y[n-1] */ 00144 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32); 00145 /* acc += a2 * y[n-2] */ 00146 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32); 00147 00148 /* The result is converted to 1.31, Yn1 variable is reused */ 00149 Yn1 = acc << shift; 00150 00151 /* Store the output in the destination buffer. */ 00152 *pOut++ = Yn1; 00153 00154 /* Read the third input */ 00155 Xn1 = *pIn++; 00156 00157 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00158 /* acc = b0 * x[n] */ 00159 acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32); 00160 /* acc += b1 * x[n-1] */ 00161 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32); 00162 /* acc += b[2] * x[n-2] */ 00163 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32); 00164 /* acc += a1 * y[n-1] */ 00165 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 00166 /* acc += a2 * y[n-2] */ 00167 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 00168 00169 /* The result is converted to 1.31, Yn2 variable is reused */ 00170 Yn2 = acc << shift; 00171 00172 /* Store the output in the destination buffer. */ 00173 *pOut++ = Yn2; 00174 00175 /* Read the forth input */ 00176 Xn = *pIn++; 00177 00178 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00179 /* acc = b0 * x[n] */ 00180 acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32); 00181 /* acc += b1 * x[n-1] */ 00182 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32); 00183 /* acc += b[2] * x[n-2] */ 00184 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 00185 /* acc += a1 * y[n-1] */ 00186 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32); 00187 /* acc += a2 * y[n-2] */ 00188 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32); 00189 00190 /* The result is converted to 1.31, Yn1 variable is reused */ 00191 Yn1 = acc << shift; 00192 00193 /* Every time after the output is computed state should be updated. */ 00194 /* The states should be updated as: */ 00195 /* Xn2 = Xn1 */ 00196 /* Xn1 = Xn */ 00197 /* Yn2 = Yn1 */ 00198 /* Yn1 = acc */ 00199 Xn2 = Xn1; 00200 Xn1 = Xn; 00201 00202 /* Store the output in the destination buffer. */ 00203 *pOut++ = Yn1; 00204 00205 /* decrement the loop counter */ 00206 sample--; 00207 } 00208 00209 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00210 ** No loop unrolling is used. */ 00211 sample = (blockSize & 0x3u); 00212 00213 while(sample > 0u) 00214 { 00215 /* Read the input */ 00216 Xn = *pIn++; 00217 00218 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00219 /* acc = b0 * x[n] */ 00220 acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32); 00221 /* acc += b1 * x[n-1] */ 00222 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32); 00223 /* acc += b[2] * x[n-2] */ 00224 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 00225 /* acc += a1 * y[n-1] */ 00226 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 00227 /* acc += a2 * y[n-2] */ 00228 acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 00229 /* The result is converted to 1.31 */ 00230 acc = acc << shift; 00231 00232 /* Every time after the output is computed state should be updated. */ 00233 /* The states should be updated as: */ 00234 /* Xn2 = Xn1 */ 00235 /* Xn1 = Xn */ 00236 /* Yn2 = Yn1 */ 00237 /* Yn1 = acc */ 00238 Xn2 = Xn1; 00239 Xn1 = Xn; 00240 Yn2 = Yn1; 00241 Yn1 = acc; 00242 00243 /* Store the output in the destination buffer. */ 00244 *pOut++ = acc; 00245 00246 /* decrement the loop counter */ 00247 sample--; 00248 } 00249 00250 /* The first stage goes from the input buffer to the output buffer. */ 00251 /* Subsequent stages occur in-place in the output buffer */ 00252 pIn = pDst; 00253 00254 /* Reset to destination pointer */ 00255 pOut = pDst; 00256 00257 /* Store the updated state variables back into the pState array */ 00258 *pState++ = Xn1; 00259 *pState++ = Xn2; 00260 *pState++ = Yn1; 00261 *pState++ = Yn2; 00262 00263 } while(--stage); 00264 } 00265 00266 /** 00267 * @} end of BiquadCascadeDF1 group 00268 */
Generated on Tue Jul 12 2022 19:55:42 by
1.7.2
