Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_biquad_cascade_df1_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_q31.c 00009 * 00010 * Description: Processing function for the 00011 * Q31 Biquad cascade filter 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 /** 00045 * @ingroup groupFilters 00046 */ 00047 00048 /** 00049 * @addtogroup BiquadCascadeDF1 00050 * @{ 00051 */ 00052 00053 /** 00054 * @brief Processing function for the Q31 Biquad cascade filter. 00055 * @param[in] *S points to an instance of the Q31 Biquad cascade structure. 00056 * @param[in] *pSrc points to the block of input data. 00057 * @param[out] *pDst points to the block of output data. 00058 * @param[in] blockSize number of samples to process per call. 00059 * @return none. 00060 * 00061 * <b>Scaling and Overflow Behavior:</b> 00062 * \par 00063 * The function is implemented using an internal 64-bit accumulator. 00064 * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. 00065 * Thus, if the accumulator result overflows it wraps around rather than clip. 00066 * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25). 00067 * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to 00068 * 1.31 format by discarding the low 32 bits. 00069 * 00070 * \par 00071 * Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4. 00072 */ 00073 00074 void arm_biquad_cascade_df1_q31( 00075 const arm_biquad_casd_df1_inst_q31 * S, 00076 q31_t * pSrc, 00077 q31_t * pDst, 00078 uint32_t blockSize) 00079 { 00080 q63_t acc; /* accumulator */ 00081 uint32_t uShift = ((uint32_t) S->postShift + 1u); 00082 uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */ 00083 q31_t *pIn = pSrc; /* input pointer initialization */ 00084 q31_t *pOut = pDst; /* output pointer initialization */ 00085 q31_t *pState = S->pState; /* pState pointer initialization */ 00086 q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ 00087 q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00088 q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00089 q31_t Xn; /* temporary input */ 00090 uint32_t sample, stage = S->numStages; /* loop counters */ 00091 00092 00093 #ifndef ARM_MATH_CM0_FAMILY 00094 00095 q31_t acc_l, acc_h; /* temporary output variables */ 00096 00097 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00098 00099 do 00100 { 00101 /* Reading the coefficients */ 00102 b0 = *pCoeffs++; 00103 b1 = *pCoeffs++; 00104 b2 = *pCoeffs++; 00105 a1 = *pCoeffs++; 00106 a2 = *pCoeffs++; 00107 00108 /* Reading the state values */ 00109 Xn1 = pState[0]; 00110 Xn2 = pState[1]; 00111 Yn1 = pState[2]; 00112 Yn2 = pState[3]; 00113 00114 /* Apply loop unrolling and compute 4 output values simultaneously. */ 00115 /* The variable acc hold output values that are being computed: 00116 * 00117 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00118 */ 00119 00120 sample = blockSize >> 2u; 00121 00122 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00123 ** a second loop below computes the remaining 1 to 3 samples. */ 00124 while(sample > 0u) 00125 { 00126 /* Read the input */ 00127 Xn = *pIn++; 00128 00129 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00130 00131 /* acc = b0 * x[n] */ 00132 acc = (q63_t) b0 *Xn; 00133 /* acc += b1 * x[n-1] */ 00134 acc += (q63_t) b1 *Xn1; 00135 /* acc += b[2] * x[n-2] */ 00136 acc += (q63_t) b2 *Xn2; 00137 /* acc += a1 * y[n-1] */ 00138 acc += (q63_t) a1 *Yn1; 00139 /* acc += a2 * y[n-2] */ 00140 acc += (q63_t) a2 *Yn2; 00141 00142 /* The result is converted to 1.31 , Yn2 variable is reused */ 00143 00144 /* Calc lower part of acc */ 00145 acc_l = acc & 0xffffffff; 00146 00147 /* Calc upper part of acc */ 00148 acc_h = (acc >> 32) & 0xffffffff; 00149 00150 /* Apply shift for lower part of acc and upper part of acc */ 00151 Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift; 00152 00153 /* Store the output in the destination buffer. */ 00154 *pOut++ = Yn2; 00155 00156 /* Read the second input */ 00157 Xn2 = *pIn++; 00158 00159 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00160 00161 /* acc = b0 * x[n] */ 00162 acc = (q63_t) b0 *Xn2; 00163 /* acc += b1 * x[n-1] */ 00164 acc += (q63_t) b1 *Xn; 00165 /* acc += b[2] * x[n-2] */ 00166 acc += (q63_t) b2 *Xn1; 00167 /* acc += a1 * y[n-1] */ 00168 acc += (q63_t) a1 *Yn2; 00169 /* acc += a2 * y[n-2] */ 00170 acc += (q63_t) a2 *Yn1; 00171 00172 00173 /* The result is converted to 1.31, Yn1 variable is reused */ 00174 00175 /* Calc lower part of acc */ 00176 acc_l = acc & 0xffffffff; 00177 00178 /* Calc upper part of acc */ 00179 acc_h = (acc >> 32) & 0xffffffff; 00180 00181 00182 /* Apply shift for lower part of acc and upper part of acc */ 00183 Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift; 00184 00185 /* Store the output in the destination buffer. */ 00186 *pOut++ = Yn1; 00187 00188 /* Read the third input */ 00189 Xn1 = *pIn++; 00190 00191 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00192 00193 /* acc = b0 * x[n] */ 00194 acc = (q63_t) b0 *Xn1; 00195 /* acc += b1 * x[n-1] */ 00196 acc += (q63_t) b1 *Xn2; 00197 /* acc += b[2] * x[n-2] */ 00198 acc += (q63_t) b2 *Xn; 00199 /* acc += a1 * y[n-1] */ 00200 acc += (q63_t) a1 *Yn1; 00201 /* acc += a2 * y[n-2] */ 00202 acc += (q63_t) a2 *Yn2; 00203 00204 /* The result is converted to 1.31, Yn2 variable is reused */ 00205 /* Calc lower part of acc */ 00206 acc_l = acc & 0xffffffff; 00207 00208 /* Calc upper part of acc */ 00209 acc_h = (acc >> 32) & 0xffffffff; 00210 00211 00212 /* Apply shift for lower part of acc and upper part of acc */ 00213 Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift; 00214 00215 /* Store the output in the destination buffer. */ 00216 *pOut++ = Yn2; 00217 00218 /* Read the forth input */ 00219 Xn = *pIn++; 00220 00221 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00222 00223 /* acc = b0 * x[n] */ 00224 acc = (q63_t) b0 *Xn; 00225 /* acc += b1 * x[n-1] */ 00226 acc += (q63_t) b1 *Xn1; 00227 /* acc += b[2] * x[n-2] */ 00228 acc += (q63_t) b2 *Xn2; 00229 /* acc += a1 * y[n-1] */ 00230 acc += (q63_t) a1 *Yn2; 00231 /* acc += a2 * y[n-2] */ 00232 acc += (q63_t) a2 *Yn1; 00233 00234 /* The result is converted to 1.31, Yn1 variable is reused */ 00235 /* Calc lower part of acc */ 00236 acc_l = acc & 0xffffffff; 00237 00238 /* Calc upper part of acc */ 00239 acc_h = (acc >> 32) & 0xffffffff; 00240 00241 /* Apply shift for lower part of acc and upper part of acc */ 00242 Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift; 00243 00244 /* Every time after the output is computed state should be updated. */ 00245 /* The states should be updated as: */ 00246 /* Xn2 = Xn1 */ 00247 /* Xn1 = Xn */ 00248 /* Yn2 = Yn1 */ 00249 /* Yn1 = acc */ 00250 Xn2 = Xn1; 00251 Xn1 = Xn; 00252 00253 /* Store the output in the destination buffer. */ 00254 *pOut++ = Yn1; 00255 00256 /* decrement the loop counter */ 00257 sample--; 00258 } 00259 00260 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00261 ** No loop unrolling is used. */ 00262 sample = (blockSize & 0x3u); 00263 00264 while(sample > 0u) 00265 { 00266 /* Read the input */ 00267 Xn = *pIn++; 00268 00269 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00270 00271 /* acc = b0 * x[n] */ 00272 acc = (q63_t) b0 *Xn; 00273 /* acc += b1 * x[n-1] */ 00274 acc += (q63_t) b1 *Xn1; 00275 /* acc += b[2] * x[n-2] */ 00276 acc += (q63_t) b2 *Xn2; 00277 /* acc += a1 * y[n-1] */ 00278 acc += (q63_t) a1 *Yn1; 00279 /* acc += a2 * y[n-2] */ 00280 acc += (q63_t) a2 *Yn2; 00281 00282 /* The result is converted to 1.31 */ 00283 acc = acc >> lShift; 00284 00285 /* Every time after the output is computed state should be updated. */ 00286 /* The states should be updated as: */ 00287 /* Xn2 = Xn1 */ 00288 /* Xn1 = Xn */ 00289 /* Yn2 = Yn1 */ 00290 /* Yn1 = acc */ 00291 Xn2 = Xn1; 00292 Xn1 = Xn; 00293 Yn2 = Yn1; 00294 Yn1 = (q31_t) acc; 00295 00296 /* Store the output in the destination buffer. */ 00297 *pOut++ = (q31_t) acc; 00298 00299 /* decrement the loop counter */ 00300 sample--; 00301 } 00302 00303 /* The first stage goes from the input buffer to the output buffer. */ 00304 /* Subsequent stages occur in-place in the output buffer */ 00305 pIn = pDst; 00306 00307 /* Reset to destination pointer */ 00308 pOut = pDst; 00309 00310 /* Store the updated state variables back into the pState array */ 00311 *pState++ = Xn1; 00312 *pState++ = Xn2; 00313 *pState++ = Yn1; 00314 *pState++ = Yn2; 00315 00316 } while(--stage); 00317 00318 #else 00319 00320 /* Run the below code for Cortex-M0 */ 00321 00322 do 00323 { 00324 /* Reading the coefficients */ 00325 b0 = *pCoeffs++; 00326 b1 = *pCoeffs++; 00327 b2 = *pCoeffs++; 00328 a1 = *pCoeffs++; 00329 a2 = *pCoeffs++; 00330 00331 /* Reading the state values */ 00332 Xn1 = pState[0]; 00333 Xn2 = pState[1]; 00334 Yn1 = pState[2]; 00335 Yn2 = pState[3]; 00336 00337 /* The variables acc holds the output value that is computed: 00338 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00339 */ 00340 00341 sample = blockSize; 00342 00343 while(sample > 0u) 00344 { 00345 /* Read the input */ 00346 Xn = *pIn++; 00347 00348 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00349 /* acc = b0 * x[n] */ 00350 acc = (q63_t) b0 *Xn; 00351 00352 /* acc += b1 * x[n-1] */ 00353 acc += (q63_t) b1 *Xn1; 00354 /* acc += b[2] * x[n-2] */ 00355 acc += (q63_t) b2 *Xn2; 00356 /* acc += a1 * y[n-1] */ 00357 acc += (q63_t) a1 *Yn1; 00358 /* acc += a2 * y[n-2] */ 00359 acc += (q63_t) a2 *Yn2; 00360 00361 /* The result is converted to 1.31 */ 00362 acc = acc >> lShift; 00363 00364 /* Every time after the output is computed state should be updated. */ 00365 /* The states should be updated as: */ 00366 /* Xn2 = Xn1 */ 00367 /* Xn1 = Xn */ 00368 /* Yn2 = Yn1 */ 00369 /* Yn1 = acc */ 00370 Xn2 = Xn1; 00371 Xn1 = Xn; 00372 Yn2 = Yn1; 00373 Yn1 = (q31_t) acc; 00374 00375 /* Store the output in the destination buffer. */ 00376 *pOut++ = (q31_t) acc; 00377 00378 /* decrement the loop counter */ 00379 sample--; 00380 } 00381 00382 /* The first stage goes from the input buffer to the output buffer. */ 00383 /* Subsequent stages occur in-place in the output buffer */ 00384 pIn = pDst; 00385 00386 /* Reset to destination pointer */ 00387 pOut = pDst; 00388 00389 /* Store the updated state variables back into the pState array */ 00390 *pState++ = Xn1; 00391 *pState++ = Xn2; 00392 *pState++ = Yn1; 00393 *pState++ = Yn2; 00394 00395 } while(--stage); 00396 00397 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00398 } 00399 00400 /** 00401 * @} end of BiquadCascadeDF1 group 00402 */
Generated on Tue Jul 12 2022 18:44:08 by
