V4.0.1 of the ARM CMSIS DSP libraries. Note that arm_bitreversal2.s, arm_cfft_f32.c and arm_rfft_fast_f32.c had to be removed. arm_bitreversal2.s will not assemble with the online tools. So, the fast f32 FFT functions are not yet available. All the other FFT functions are available.
Dependents: MPU9150_Example fir_f32 fir_f32 MPU9150_nucleo_noni2cdev ... more
arm_biquad_cascade_df1_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 12. March 2014 00005 * $Revision: V1.4.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_q31.c 00009 * 00010 * Description: Processing function for the 00011 * Q31 Biquad cascade filter 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 /** 00045 * @ingroup groupFilters 00046 */ 00047 00048 /** 00049 * @addtogroup BiquadCascadeDF1 00050 * @{ 00051 */ 00052 00053 /** 00054 * @brief Processing function for the Q31 Biquad cascade filter. 00055 * @param[in] *S points to an instance of the Q31 Biquad cascade structure. 00056 * @param[in] *pSrc points to the block of input data. 00057 * @param[out] *pDst points to the block of output data. 00058 * @param[in] blockSize number of samples to process per call. 00059 * @return none. 00060 * 00061 * <b>Scaling and Overflow Behavior:</b> 00062 * \par 00063 * The function is implemented using an internal 64-bit accumulator. 00064 * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. 00065 * Thus, if the accumulator result overflows it wraps around rather than clip. 00066 * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25). 00067 * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to 00068 * 1.31 format by discarding the low 32 bits. 00069 * 00070 * \par 00071 * Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4. 00072 */ 00073 00074 void arm_biquad_cascade_df1_q31( 00075 const arm_biquad_casd_df1_inst_q31 * S, 00076 q31_t * pSrc, 00077 q31_t * pDst, 00078 uint32_t blockSize) 00079 { 00080 q63_t acc; /* accumulator */ 00081 uint32_t uShift = ((uint32_t) S->postShift + 1u); 00082 uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */ 00083 q31_t *pIn = pSrc; /* input pointer initialization */ 00084 q31_t *pOut = pDst; /* output pointer initialization */ 00085 q31_t *pState = S->pState; /* pState pointer initialization */ 00086 q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ 00087 q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00088 q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00089 q31_t Xn; /* temporary input */ 00090 uint32_t sample, stage = S->numStages; /* loop counters */ 00091 00092 00093 #ifndef ARM_MATH_CM0_FAMILY_FAMILY 00094 00095 q31_t acc_l, acc_h; /* temporary output variables */ 00096 00097 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00098 00099 do 00100 { 00101 /* Reading the coefficients */ 00102 b0 = *pCoeffs++; 00103 b1 = *pCoeffs++; 00104 b2 = *pCoeffs++; 00105 a1 = *pCoeffs++; 00106 a2 = *pCoeffs++; 00107 00108 /* Reading the state values */ 00109 Xn1 = pState[0]; 00110 Xn2 = pState[1]; 00111 Yn1 = pState[2]; 00112 Yn2 = pState[3]; 00113 00114 /* Apply loop unrolling and compute 4 output values simultaneously. */ 00115 /* The variable acc hold output values that are being computed: 00116 * 00117 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00118 */ 00119 00120 sample = blockSize >> 2u; 00121 00122 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00123 ** a second loop below computes the remaining 1 to 3 samples. */ 00124 while(sample > 0u) 00125 { 00126 /* Read the input */ 00127 Xn = *pIn++; 00128 00129 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00130 00131 /* acc = b0 * x[n] */ 00132 acc = (q63_t) b0 *Xn; 00133 /* acc += b1 * x[n-1] */ 00134 acc += (q63_t) b1 *Xn1; 00135 /* acc += b[2] * x[n-2] */ 00136 acc += (q63_t) b2 *Xn2; 00137 /* acc += a1 * y[n-1] */ 00138 acc += (q63_t) a1 *Yn1; 00139 /* acc += a2 * y[n-2] */ 00140 acc += (q63_t) a2 *Yn2; 00141 00142 /* The result is converted to 1.31 , Yn2 variable is reused */ 00143 00144 /* Calc lower part of acc */ 00145 acc_l = acc & 0xffffffff; 00146 00147 /* Calc upper part of acc */ 00148 acc_h = (acc >> 32) & 0xffffffff; 00149 00150 /* Apply shift for lower part of acc and upper part of acc */ 00151 Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift; 00152 00153 /* Store the output in the destination buffer. */ 00154 *pOut++ = Yn2; 00155 00156 /* Read the second input */ 00157 Xn2 = *pIn++; 00158 00159 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00160 00161 /* acc = b0 * x[n] */ 00162 acc = (q63_t) b0 *Xn2; 00163 /* acc += b1 * x[n-1] */ 00164 acc += (q63_t) b1 *Xn; 00165 /* acc += b[2] * x[n-2] */ 00166 acc += (q63_t) b2 *Xn1; 00167 /* acc += a1 * y[n-1] */ 00168 acc += (q63_t) a1 *Yn2; 00169 /* acc += a2 * y[n-2] */ 00170 acc += (q63_t) a2 *Yn1; 00171 00172 00173 /* The result is converted to 1.31, Yn1 variable is reused */ 00174 00175 /* Calc lower part of acc */ 00176 acc_l = acc & 0xffffffff; 00177 00178 /* Calc upper part of acc */ 00179 acc_h = (acc >> 32) & 0xffffffff; 00180 00181 00182 /* Apply shift for lower part of acc and upper part of acc */ 00183 Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift; 00184 00185 /* Store the output in the destination buffer. */ 00186 *pOut++ = Yn1; 00187 00188 /* Read the third input */ 00189 Xn1 = *pIn++; 00190 00191 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00192 00193 /* acc = b0 * x[n] */ 00194 acc = (q63_t) b0 *Xn1; 00195 /* acc += b1 * x[n-1] */ 00196 acc += (q63_t) b1 *Xn2; 00197 /* acc += b[2] * x[n-2] */ 00198 acc += (q63_t) b2 *Xn; 00199 /* acc += a1 * y[n-1] */ 00200 acc += (q63_t) a1 *Yn1; 00201 /* acc += a2 * y[n-2] */ 00202 acc += (q63_t) a2 *Yn2; 00203 00204 /* The result is converted to 1.31, Yn2 variable is reused */ 00205 /* Calc lower part of acc */ 00206 acc_l = acc & 0xffffffff; 00207 00208 /* Calc upper part of acc */ 00209 acc_h = (acc >> 32) & 0xffffffff; 00210 00211 00212 /* Apply shift for lower part of acc and upper part of acc */ 00213 Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift; 00214 00215 /* Store the output in the destination buffer. */ 00216 *pOut++ = Yn2; 00217 00218 /* Read the forth input */ 00219 Xn = *pIn++; 00220 00221 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00222 00223 /* acc = b0 * x[n] */ 00224 acc = (q63_t) b0 *Xn; 00225 /* acc += b1 * x[n-1] */ 00226 acc += (q63_t) b1 *Xn1; 00227 /* acc += b[2] * x[n-2] */ 00228 acc += (q63_t) b2 *Xn2; 00229 /* acc += a1 * y[n-1] */ 00230 acc += (q63_t) a1 *Yn2; 00231 /* acc += a2 * y[n-2] */ 00232 acc += (q63_t) a2 *Yn1; 00233 00234 /* The result is converted to 1.31, Yn1 variable is reused */ 00235 /* Calc lower part of acc */ 00236 acc_l = acc & 0xffffffff; 00237 00238 /* Calc upper part of acc */ 00239 acc_h = (acc >> 32) & 0xffffffff; 00240 00241 /* Apply shift for lower part of acc and upper part of acc */ 00242 Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift; 00243 00244 /* Every time after the output is computed state should be updated. */ 00245 /* The states should be updated as: */ 00246 /* Xn2 = Xn1 */ 00247 /* Xn1 = Xn */ 00248 /* Yn2 = Yn1 */ 00249 /* Yn1 = acc */ 00250 Xn2 = Xn1; 00251 Xn1 = Xn; 00252 00253 /* Store the output in the destination buffer. */ 00254 *pOut++ = Yn1; 00255 00256 /* decrement the loop counter */ 00257 sample--; 00258 } 00259 00260 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00261 ** No loop unrolling is used. */ 00262 sample = (blockSize & 0x3u); 00263 00264 while(sample > 0u) 00265 { 00266 /* Read the input */ 00267 Xn = *pIn++; 00268 00269 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00270 00271 /* acc = b0 * x[n] */ 00272 acc = (q63_t) b0 *Xn; 00273 /* acc += b1 * x[n-1] */ 00274 acc += (q63_t) b1 *Xn1; 00275 /* acc += b[2] * x[n-2] */ 00276 acc += (q63_t) b2 *Xn2; 00277 /* acc += a1 * y[n-1] */ 00278 acc += (q63_t) a1 *Yn1; 00279 /* acc += a2 * y[n-2] */ 00280 acc += (q63_t) a2 *Yn2; 00281 00282 /* The result is converted to 1.31 */ 00283 acc = acc >> lShift; 00284 00285 /* Every time after the output is computed state should be updated. */ 00286 /* The states should be updated as: */ 00287 /* Xn2 = Xn1 */ 00288 /* Xn1 = Xn */ 00289 /* Yn2 = Yn1 */ 00290 /* Yn1 = acc */ 00291 Xn2 = Xn1; 00292 Xn1 = Xn; 00293 Yn2 = Yn1; 00294 Yn1 = (q31_t) acc; 00295 00296 /* Store the output in the destination buffer. */ 00297 *pOut++ = (q31_t) acc; 00298 00299 /* decrement the loop counter */ 00300 sample--; 00301 } 00302 00303 /* The first stage goes from the input buffer to the output buffer. */ 00304 /* Subsequent stages occur in-place in the output buffer */ 00305 pIn = pDst; 00306 00307 /* Reset to destination pointer */ 00308 pOut = pDst; 00309 00310 /* Store the updated state variables back into the pState array */ 00311 *pState++ = Xn1; 00312 *pState++ = Xn2; 00313 *pState++ = Yn1; 00314 *pState++ = Yn2; 00315 00316 } while(--stage); 00317 00318 #else 00319 00320 /* Run the below code for Cortex-M0 */ 00321 00322 do 00323 { 00324 /* Reading the coefficients */ 00325 b0 = *pCoeffs++; 00326 b1 = *pCoeffs++; 00327 b2 = *pCoeffs++; 00328 a1 = *pCoeffs++; 00329 a2 = *pCoeffs++; 00330 00331 /* Reading the state values */ 00332 Xn1 = pState[0]; 00333 Xn2 = pState[1]; 00334 Yn1 = pState[2]; 00335 Yn2 = pState[3]; 00336 00337 /* The variables acc holds the output value that is computed: 00338 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00339 */ 00340 00341 sample = blockSize; 00342 00343 while(sample > 0u) 00344 { 00345 /* Read the input */ 00346 Xn = *pIn++; 00347 00348 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00349 /* acc = b0 * x[n] */ 00350 acc = (q63_t) b0 *Xn; 00351 00352 /* acc += b1 * x[n-1] */ 00353 acc += (q63_t) b1 *Xn1; 00354 /* acc += b[2] * x[n-2] */ 00355 acc += (q63_t) b2 *Xn2; 00356 /* acc += a1 * y[n-1] */ 00357 acc += (q63_t) a1 *Yn1; 00358 /* acc += a2 * y[n-2] */ 00359 acc += (q63_t) a2 *Yn2; 00360 00361 /* The result is converted to 1.31 */ 00362 acc = acc >> lShift; 00363 00364 /* Every time after the output is computed state should be updated. */ 00365 /* The states should be updated as: */ 00366 /* Xn2 = Xn1 */ 00367 /* Xn1 = Xn */ 00368 /* Yn2 = Yn1 */ 00369 /* Yn1 = acc */ 00370 Xn2 = Xn1; 00371 Xn1 = Xn; 00372 Yn2 = Yn1; 00373 Yn1 = (q31_t) acc; 00374 00375 /* Store the output in the destination buffer. */ 00376 *pOut++ = (q31_t) acc; 00377 00378 /* decrement the loop counter */ 00379 sample--; 00380 } 00381 00382 /* The first stage goes from the input buffer to the output buffer. */ 00383 /* Subsequent stages occur in-place in the output buffer */ 00384 pIn = pDst; 00385 00386 /* Reset to destination pointer */ 00387 pOut = pDst; 00388 00389 /* Store the updated state variables back into the pState array */ 00390 *pState++ = Xn1; 00391 *pState++ = Xn2; 00392 *pState++ = Yn1; 00393 *pState++ = Yn2; 00394 00395 } while(--stage); 00396 00397 #endif /* #ifndef ARM_MATH_CM0_FAMILY_FAMILY */ 00398 } 00399 00400 00401 00402 00403 /** 00404 * @} end of BiquadCascadeDF1 group 00405 */
Generated on Tue Jul 12 2022 19:48:43 by 1.7.2