CMSIS DSP library
Dependents: performance_timer Surfboard_ gps2rtty Capstone ... more
arm_biquad_cascade_df1_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_q15.c 00009 * 00010 * Description: Processing function for the 00011 * Q15 Biquad cascade DirectFormI(DF1) filter. 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 /** 00045 * @ingroup groupFilters 00046 */ 00047 00048 /** 00049 * @addtogroup BiquadCascadeDF1 00050 * @{ 00051 */ 00052 00053 /** 00054 * @brief Processing function for the Q15 Biquad cascade filter. 00055 * @param[in] *S points to an instance of the Q15 Biquad cascade structure. 00056 * @param[in] *pSrc points to the block of input data. 00057 * @param[out] *pDst points to the location where the output result is written. 00058 * @param[in] blockSize number of samples to process per call. 00059 * @return none. 00060 * 00061 * 00062 * <b>Scaling and Overflow Behavior:</b> 00063 * \par 00064 * The function is implemented using a 64-bit internal accumulator. 00065 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00066 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00067 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00068 * The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits. 00069 * Finally, the result is saturated to 1.15 format. 00070 * 00071 * \par 00072 * Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4. 00073 */ 00074 00075 void arm_biquad_cascade_df1_q15( 00076 const arm_biquad_casd_df1_inst_q15 * S, 00077 q15_t * pSrc, 00078 q15_t * pDst, 00079 uint32_t blockSize) 00080 { 00081 00082 00083 #ifndef ARM_MATH_CM0_FAMILY 00084 00085 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00086 00087 q15_t *pIn = pSrc; /* Source pointer */ 00088 q15_t *pOut = pDst; /* Destination pointer */ 00089 q31_t in; /* Temporary variable to hold input value */ 00090 q31_t out; /* Temporary variable to hold output value */ 00091 q31_t b0; /* Temporary variable to hold bo value */ 00092 q31_t b1, a1; /* Filter coefficients */ 00093 q31_t state_in, state_out; /* Filter state variables */ 00094 q31_t acc_l, acc_h; 00095 q63_t acc; /* Accumulator */ 00096 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */ 00097 q15_t *pState = S->pState; /* State pointer */ 00098 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00099 uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ 00100 int32_t uShift = (32 - lShift); 00101 00102 do 00103 { 00104 /* Read the b0 and 0 coefficients using SIMD */ 00105 b0 = *__SIMD32(pCoeffs)++; 00106 00107 /* Read the b1 and b2 coefficients using SIMD */ 00108 b1 = *__SIMD32(pCoeffs)++; 00109 00110 /* Read the a1 and a2 coefficients using SIMD */ 00111 a1 = *__SIMD32(pCoeffs)++; 00112 00113 /* Read the input state values from the state buffer: x[n-1], x[n-2] */ 00114 state_in = *__SIMD32(pState)++; 00115 00116 /* Read the output state values from the state buffer: y[n-1], y[n-2] */ 00117 state_out = *__SIMD32(pState)--; 00118 00119 /* Apply loop unrolling and compute 2 output values simultaneously. */ 00120 /* The variable acc hold output values that are being computed: 00121 * 00122 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00123 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00124 */ 00125 sample = blockSize >> 1u; 00126 00127 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00128 ** a second loop below computes the remaining 1 sample. */ 00129 while(sample > 0u) 00130 { 00131 00132 /* Read the input */ 00133 in = *__SIMD32(pIn)++; 00134 00135 /* out = b0 * x[n] + 0 * 0 */ 00136 out = __SMUAD(b0, in); 00137 00138 /* acc += b1 * x[n-1] + b2 * x[n-2] + out */ 00139 acc = __SMLALD(b1, state_in, out); 00140 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00141 acc = __SMLALD(a1, state_out, acc); 00142 00143 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00144 /* Calc lower part of acc */ 00145 acc_l = acc & 0xffffffff; 00146 00147 /* Calc upper part of acc */ 00148 acc_h = (acc >> 32) & 0xffffffff; 00149 00150 /* Apply shift for lower part of acc and upper part of acc */ 00151 out = (uint32_t) acc_l >> lShift | acc_h << uShift; 00152 00153 out = __SSAT(out, 16); 00154 00155 /* Every time after the output is computed state should be updated. */ 00156 /* The states should be updated as: */ 00157 /* Xn2 = Xn1 */ 00158 /* Xn1 = Xn */ 00159 /* Yn2 = Yn1 */ 00160 /* Yn1 = acc */ 00161 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00162 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00163 00164 #ifndef ARM_MATH_BIG_ENDIAN 00165 00166 state_in = __PKHBT(in, state_in, 16); 00167 state_out = __PKHBT(out, state_out, 16); 00168 00169 #else 00170 00171 state_in = __PKHBT(state_in >> 16, (in >> 16), 16); 00172 state_out = __PKHBT(state_out >> 16, (out), 16); 00173 00174 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00175 00176 /* out = b0 * x[n] + 0 * 0 */ 00177 out = __SMUADX(b0, in); 00178 /* acc += b1 * x[n-1] + b2 * x[n-2] + out */ 00179 acc = __SMLALD(b1, state_in, out); 00180 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00181 acc = __SMLALD(a1, state_out, acc); 00182 00183 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00184 /* Calc lower part of acc */ 00185 acc_l = acc & 0xffffffff; 00186 00187 /* Calc upper part of acc */ 00188 acc_h = (acc >> 32) & 0xffffffff; 00189 00190 /* Apply shift for lower part of acc and upper part of acc */ 00191 out = (uint32_t) acc_l >> lShift | acc_h << uShift; 00192 00193 out = __SSAT(out, 16); 00194 00195 /* Store the output in the destination buffer. */ 00196 00197 #ifndef ARM_MATH_BIG_ENDIAN 00198 00199 *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 00200 00201 #else 00202 00203 *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16); 00204 00205 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00206 00207 /* Every time after the output is computed state should be updated. */ 00208 /* The states should be updated as: */ 00209 /* Xn2 = Xn1 */ 00210 /* Xn1 = Xn */ 00211 /* Yn2 = Yn1 */ 00212 /* Yn1 = acc */ 00213 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00214 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00215 #ifndef ARM_MATH_BIG_ENDIAN 00216 00217 state_in = __PKHBT(in >> 16, state_in, 16); 00218 state_out = __PKHBT(out, state_out, 16); 00219 00220 #else 00221 00222 state_in = __PKHBT(state_in >> 16, in, 16); 00223 state_out = __PKHBT(state_out >> 16, out, 16); 00224 00225 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00226 00227 00228 /* Decrement the loop counter */ 00229 sample--; 00230 00231 } 00232 00233 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00234 ** No loop unrolling is used. */ 00235 00236 if((blockSize & 0x1u) != 0u) 00237 { 00238 /* Read the input */ 00239 in = *pIn++; 00240 00241 /* out = b0 * x[n] + 0 * 0 */ 00242 00243 #ifndef ARM_MATH_BIG_ENDIAN 00244 00245 out = __SMUAD(b0, in); 00246 00247 #else 00248 00249 out = __SMUADX(b0, in); 00250 00251 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00252 00253 /* acc = b1 * x[n-1] + b2 * x[n-2] + out */ 00254 acc = __SMLALD(b1, state_in, out); 00255 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00256 acc = __SMLALD(a1, state_out, acc); 00257 00258 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00259 /* Calc lower part of acc */ 00260 acc_l = acc & 0xffffffff; 00261 00262 /* Calc upper part of acc */ 00263 acc_h = (acc >> 32) & 0xffffffff; 00264 00265 /* Apply shift for lower part of acc and upper part of acc */ 00266 out = (uint32_t) acc_l >> lShift | acc_h << uShift; 00267 00268 out = __SSAT(out, 16); 00269 00270 /* Store the output in the destination buffer. */ 00271 *pOut++ = (q15_t) out; 00272 00273 /* Every time after the output is computed state should be updated. */ 00274 /* The states should be updated as: */ 00275 /* Xn2 = Xn1 */ 00276 /* Xn1 = Xn */ 00277 /* Yn2 = Yn1 */ 00278 /* Yn1 = acc */ 00279 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00280 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00281 00282 #ifndef ARM_MATH_BIG_ENDIAN 00283 00284 state_in = __PKHBT(in, state_in, 16); 00285 state_out = __PKHBT(out, state_out, 16); 00286 00287 #else 00288 00289 state_in = __PKHBT(state_in >> 16, in, 16); 00290 state_out = __PKHBT(state_out >> 16, out, 16); 00291 00292 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00293 00294 } 00295 00296 /* The first stage goes from the input wire to the output wire. */ 00297 /* Subsequent numStages occur in-place in the output wire */ 00298 pIn = pDst; 00299 00300 /* Reset the output pointer */ 00301 pOut = pDst; 00302 00303 /* Store the updated state variables back into the state array */ 00304 *__SIMD32(pState)++ = state_in; 00305 *__SIMD32(pState)++ = state_out; 00306 00307 00308 /* Decrement the loop counter */ 00309 stage--; 00310 00311 } while(stage > 0u); 00312 00313 #else 00314 00315 /* Run the below code for Cortex-M0 */ 00316 00317 q15_t *pIn = pSrc; /* Source pointer */ 00318 q15_t *pOut = pDst; /* Destination pointer */ 00319 q15_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00320 q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00321 q15_t Xn; /* temporary input */ 00322 q63_t acc; /* Accumulator */ 00323 int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */ 00324 q15_t *pState = S->pState; /* State pointer */ 00325 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00326 uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ 00327 00328 do 00329 { 00330 /* Reading the coefficients */ 00331 b0 = *pCoeffs++; 00332 pCoeffs++; // skip the 0 coefficient 00333 b1 = *pCoeffs++; 00334 b2 = *pCoeffs++; 00335 a1 = *pCoeffs++; 00336 a2 = *pCoeffs++; 00337 00338 /* Reading the state values */ 00339 Xn1 = pState[0]; 00340 Xn2 = pState[1]; 00341 Yn1 = pState[2]; 00342 Yn2 = pState[3]; 00343 00344 /* The variables acc holds the output value that is computed: 00345 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00346 */ 00347 00348 sample = blockSize; 00349 00350 while(sample > 0u) 00351 { 00352 /* Read the input */ 00353 Xn = *pIn++; 00354 00355 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00356 /* acc = b0 * x[n] */ 00357 acc = (q31_t) b0 *Xn; 00358 00359 /* acc += b1 * x[n-1] */ 00360 acc += (q31_t) b1 *Xn1; 00361 /* acc += b[2] * x[n-2] */ 00362 acc += (q31_t) b2 *Xn2; 00363 /* acc += a1 * y[n-1] */ 00364 acc += (q31_t) a1 *Yn1; 00365 /* acc += a2 * y[n-2] */ 00366 acc += (q31_t) a2 *Yn2; 00367 00368 /* The result is converted to 1.31 */ 00369 acc = __SSAT((acc >> shift), 16); 00370 00371 /* Every time after the output is computed state should be updated. */ 00372 /* The states should be updated as: */ 00373 /* Xn2 = Xn1 */ 00374 /* Xn1 = Xn */ 00375 /* Yn2 = Yn1 */ 00376 /* Yn1 = acc */ 00377 Xn2 = Xn1; 00378 Xn1 = Xn; 00379 Yn2 = Yn1; 00380 Yn1 = (q15_t) acc; 00381 00382 /* Store the output in the destination buffer. */ 00383 *pOut++ = (q15_t) acc; 00384 00385 /* decrement the loop counter */ 00386 sample--; 00387 } 00388 00389 /* The first stage goes from the input buffer to the output buffer. */ 00390 /* Subsequent stages occur in-place in the output buffer */ 00391 pIn = pDst; 00392 00393 /* Reset to destination pointer */ 00394 pOut = pDst; 00395 00396 /* Store the updated state variables back into the pState array */ 00397 *pState++ = Xn1; 00398 *pState++ = Xn2; 00399 *pState++ = Yn1; 00400 *pState++ = Yn2; 00401 00402 } while(--stage); 00403 00404 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00405 00406 } 00407 00408 00409 /** 00410 * @} end of BiquadCascadeDF1 group 00411 */
Generated on Tue Jul 12 2022 11:59:15 by 1.7.2