Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_biquad_cascade_df1_fast_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_fast_q31.c 00009 * 00010 * Description: Processing function for the 00011 * Q31 Fast Biquad cascade DirectFormI(DF1) filter. 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 /** 00045 * @ingroup groupFilters 00046 */ 00047 00048 /** 00049 * @addtogroup BiquadCascadeDF1 00050 * @{ 00051 */ 00052 00053 /** 00054 * @details 00055 * 00056 * @param[in] *S points to an instance of the Q31 Biquad cascade structure. 00057 * @param[in] *pSrc points to the block of input data. 00058 * @param[out] *pDst points to the block of output data. 00059 * @param[in] blockSize number of samples to process per call. 00060 * @return none. 00061 * 00062 * <b>Scaling and Overflow Behavior:</b> 00063 * \par 00064 * This function is optimized for speed at the expense of fixed-point precision and overflow protection. 00065 * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format. 00066 * These intermediate results are added to a 2.30 accumulator. 00067 * Finally, the accumulator is saturated and converted to a 1.31 result. 00068 * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result. 00069 * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function 00070 * arm_biquad_cascade_df1_init_q31() to initialize filter structure. 00071 * 00072 * \par 00073 * Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision. Both the slow and the fast versions use the same instance structure. 00074 * Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure. 00075 */ 00076 00077 void arm_biquad_cascade_df1_fast_q31( 00078 const arm_biquad_casd_df1_inst_q31 * S, 00079 q31_t * pSrc, 00080 q31_t * pDst, 00081 uint32_t blockSize) 00082 { 00083 q31_t acc = 0; /* accumulator */ 00084 q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00085 q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00086 q31_t *pIn = pSrc; /* input pointer initialization */ 00087 q31_t *pOut = pDst; /* output pointer initialization */ 00088 q31_t *pState = S->pState; /* pState pointer initialization */ 00089 q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ 00090 q31_t Xn; /* temporary input */ 00091 int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */ 00092 uint32_t sample, stage = S->numStages; /* loop counters */ 00093 00094 00095 do 00096 { 00097 /* Reading the coefficients */ 00098 b0 = *pCoeffs++; 00099 b1 = *pCoeffs++; 00100 b2 = *pCoeffs++; 00101 a1 = *pCoeffs++; 00102 a2 = *pCoeffs++; 00103 00104 /* Reading the state values */ 00105 Xn1 = pState[0]; 00106 Xn2 = pState[1]; 00107 Yn1 = pState[2]; 00108 Yn2 = pState[3]; 00109 00110 /* Apply loop unrolling and compute 4 output values simultaneously. */ 00111 /* The variables acc ... acc3 hold output values that are being computed: 00112 * 00113 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00114 */ 00115 00116 sample = blockSize >> 2u; 00117 00118 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00119 ** a second loop below computes the remaining 1 to 3 samples. */ 00120 while(sample > 0u) 00121 { 00122 /* Read the input */ 00123 Xn = *pIn; 00124 00125 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00126 /* acc = b0 * x[n] */ 00127 //acc = (q31_t) (((q63_t) b1 * Xn1) >> 32); 00128 mult_32x32_keep32_R(acc, b1, Xn1); 00129 /* acc += b1 * x[n-1] */ 00130 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b0 * (Xn))) >> 32); 00131 multAcc_32x32_keep32_R(acc, b0, Xn); 00132 /* acc += b[2] * x[n-2] */ 00133 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 00134 multAcc_32x32_keep32_R(acc, b2, Xn2); 00135 /* acc += a1 * y[n-1] */ 00136 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 00137 multAcc_32x32_keep32_R(acc, a1, Yn1); 00138 /* acc += a2 * y[n-2] */ 00139 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 00140 multAcc_32x32_keep32_R(acc, a2, Yn2); 00141 00142 /* The result is converted to 1.31 , Yn2 variable is reused */ 00143 Yn2 = acc << shift; 00144 00145 /* Read the second input */ 00146 Xn2 = *(pIn + 1u); 00147 00148 /* Store the output in the destination buffer. */ 00149 *pOut = Yn2; 00150 00151 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00152 /* acc = b0 * x[n] */ 00153 //acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32); 00154 mult_32x32_keep32_R(acc, b0, Xn2); 00155 /* acc += b1 * x[n-1] */ 00156 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32); 00157 multAcc_32x32_keep32_R(acc, b1, Xn); 00158 /* acc += b[2] * x[n-2] */ 00159 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32); 00160 multAcc_32x32_keep32_R(acc, b2, Xn1); 00161 /* acc += a1 * y[n-1] */ 00162 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32); 00163 multAcc_32x32_keep32_R(acc, a1, Yn2); 00164 /* acc += a2 * y[n-2] */ 00165 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32); 00166 multAcc_32x32_keep32_R(acc, a2, Yn1); 00167 00168 /* The result is converted to 1.31, Yn1 variable is reused */ 00169 Yn1 = acc << shift; 00170 00171 /* Read the third input */ 00172 Xn1 = *(pIn + 2u); 00173 00174 /* Store the output in the destination buffer. */ 00175 *(pOut + 1u) = Yn1; 00176 00177 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00178 /* acc = b0 * x[n] */ 00179 //acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32); 00180 mult_32x32_keep32_R(acc, b0, Xn1); 00181 /* acc += b1 * x[n-1] */ 00182 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32); 00183 multAcc_32x32_keep32_R(acc, b1, Xn2); 00184 /* acc += b[2] * x[n-2] */ 00185 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32); 00186 multAcc_32x32_keep32_R(acc, b2, Xn); 00187 /* acc += a1 * y[n-1] */ 00188 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 00189 multAcc_32x32_keep32_R(acc, a1, Yn1); 00190 /* acc += a2 * y[n-2] */ 00191 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 00192 multAcc_32x32_keep32_R(acc, a2, Yn2); 00193 00194 /* The result is converted to 1.31, Yn2 variable is reused */ 00195 Yn2 = acc << shift; 00196 00197 /* Read the forth input */ 00198 Xn = *(pIn + 3u); 00199 00200 /* Store the output in the destination buffer. */ 00201 *(pOut + 2u) = Yn2; 00202 pIn += 4u; 00203 00204 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00205 /* acc = b0 * x[n] */ 00206 //acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32); 00207 mult_32x32_keep32_R(acc, b0, Xn); 00208 /* acc += b1 * x[n-1] */ 00209 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32); 00210 multAcc_32x32_keep32_R(acc, b1, Xn1); 00211 /* acc += b[2] * x[n-2] */ 00212 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 00213 multAcc_32x32_keep32_R(acc, b2, Xn2); 00214 /* acc += a1 * y[n-1] */ 00215 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32); 00216 multAcc_32x32_keep32_R(acc, a1, Yn2); 00217 /* acc += a2 * y[n-2] */ 00218 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32); 00219 multAcc_32x32_keep32_R(acc, a2, Yn1); 00220 00221 /* Every time after the output is computed state should be updated. */ 00222 /* The states should be updated as: */ 00223 /* Xn2 = Xn1 */ 00224 Xn2 = Xn1; 00225 00226 /* The result is converted to 1.31, Yn1 variable is reused */ 00227 Yn1 = acc << shift; 00228 00229 /* Xn1 = Xn */ 00230 Xn1 = Xn; 00231 00232 /* Store the output in the destination buffer. */ 00233 *(pOut + 3u) = Yn1; 00234 pOut += 4u; 00235 00236 /* decrement the loop counter */ 00237 sample--; 00238 } 00239 00240 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00241 ** No loop unrolling is used. */ 00242 sample = (blockSize & 0x3u); 00243 00244 while(sample > 0u) 00245 { 00246 /* Read the input */ 00247 Xn = *pIn++; 00248 00249 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00250 /* acc = b0 * x[n] */ 00251 //acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32); 00252 mult_32x32_keep32_R(acc, b0, Xn); 00253 /* acc += b1 * x[n-1] */ 00254 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32); 00255 multAcc_32x32_keep32_R(acc, b1, Xn1); 00256 /* acc += b[2] * x[n-2] */ 00257 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 00258 multAcc_32x32_keep32_R(acc, b2, Xn2); 00259 /* acc += a1 * y[n-1] */ 00260 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 00261 multAcc_32x32_keep32_R(acc, a1, Yn1); 00262 /* acc += a2 * y[n-2] */ 00263 //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 00264 multAcc_32x32_keep32_R(acc, a2, Yn2); 00265 00266 /* The result is converted to 1.31 */ 00267 acc = acc << shift; 00268 00269 /* Every time after the output is computed state should be updated. */ 00270 /* The states should be updated as: */ 00271 /* Xn2 = Xn1 */ 00272 /* Xn1 = Xn */ 00273 /* Yn2 = Yn1 */ 00274 /* Yn1 = acc */ 00275 Xn2 = Xn1; 00276 Xn1 = Xn; 00277 Yn2 = Yn1; 00278 Yn1 = acc; 00279 00280 /* Store the output in the destination buffer. */ 00281 *pOut++ = acc; 00282 00283 /* decrement the loop counter */ 00284 sample--; 00285 } 00286 00287 /* The first stage goes from the input buffer to the output buffer. */ 00288 /* Subsequent stages occur in-place in the output buffer */ 00289 pIn = pDst; 00290 00291 /* Reset to destination pointer */ 00292 pOut = pDst; 00293 00294 /* Store the updated state variables back into the pState array */ 00295 *pState++ = Xn1; 00296 *pState++ = Xn2; 00297 *pState++ = Yn1; 00298 *pState++ = Yn2; 00299 00300 } while(--stage); 00301 } 00302 00303 /** 00304 * @} end of BiquadCascadeDF1 group 00305 */
Generated on Tue Jul 12 2022 18:44:08 by
