Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_biquad_cascade_df1_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_biquad_cascade_df1_q15.c 00004 * Description: Processing function for the Q15 Biquad cascade DirectFormI(DF1) filter 00005 * 00006 * $Date: 27. January 2017 00007 * $Revision: V.1.5.1 00008 * 00009 * Target Processor: Cortex-M cores 00010 * -------------------------------------------------------------------- */ 00011 /* 00012 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00013 * 00014 * SPDX-License-Identifier: Apache-2.0 00015 * 00016 * Licensed under the Apache License, Version 2.0 (the License); you may 00017 * not use this file except in compliance with the License. 00018 * You may obtain a copy of the License at 00019 * 00020 * www.apache.org/licenses/LICENSE-2.0 00021 * 00022 * Unless required by applicable law or agreed to in writing, software 00023 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00024 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00025 * See the License for the specific language governing permissions and 00026 * limitations under the License. 00027 */ 00028 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupFilters 00033 */ 00034 00035 /** 00036 * @addtogroup BiquadCascadeDF1 00037 * @{ 00038 */ 00039 00040 /** 00041 * @brief Processing function for the Q15 Biquad cascade filter. 00042 * @param[in] *S points to an instance of the Q15 Biquad cascade structure. 00043 * @param[in] *pSrc points to the block of input data. 00044 * @param[out] *pDst points to the location where the output result is written. 00045 * @param[in] blockSize number of samples to process per call. 00046 * @return none. 00047 * 00048 * 00049 * <b>Scaling and Overflow Behavior:</b> 00050 * \par 00051 * The function is implemented using a 64-bit internal accumulator. 00052 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00053 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00054 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00055 * The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits. 00056 * Finally, the result is saturated to 1.15 format. 00057 * 00058 * \par 00059 * Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4. 00060 */ 00061 00062 void arm_biquad_cascade_df1_q15( 00063 const arm_biquad_casd_df1_inst_q15 * S, 00064 q15_t * pSrc, 00065 q15_t * pDst, 00066 uint32_t blockSize) 00067 { 00068 00069 00070 #if defined (ARM_MATH_DSP) 00071 00072 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00073 00074 q15_t *pIn = pSrc; /* Source pointer */ 00075 q15_t *pOut = pDst; /* Destination pointer */ 00076 q31_t in; /* Temporary variable to hold input value */ 00077 q31_t out; /* Temporary variable to hold output value */ 00078 q31_t b0; /* Temporary variable to hold bo value */ 00079 q31_t b1, a1; /* Filter coefficients */ 00080 q31_t state_in, state_out; /* Filter state variables */ 00081 q31_t acc_l, acc_h; 00082 q63_t acc; /* Accumulator */ 00083 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */ 00084 q15_t *pState = S->pState; /* State pointer */ 00085 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00086 uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ 00087 int32_t uShift = (32 - lShift); 00088 00089 do 00090 { 00091 /* Read the b0 and 0 coefficients using SIMD */ 00092 b0 = *__SIMD32(pCoeffs)++; 00093 00094 /* Read the b1 and b2 coefficients using SIMD */ 00095 b1 = *__SIMD32(pCoeffs)++; 00096 00097 /* Read the a1 and a2 coefficients using SIMD */ 00098 a1 = *__SIMD32(pCoeffs)++; 00099 00100 /* Read the input state values from the state buffer: x[n-1], x[n-2] */ 00101 state_in = *__SIMD32(pState)++; 00102 00103 /* Read the output state values from the state buffer: y[n-1], y[n-2] */ 00104 state_out = *__SIMD32(pState)--; 00105 00106 /* Apply loop unrolling and compute 2 output values simultaneously. */ 00107 /* The variable acc hold output values that are being computed: 00108 * 00109 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00110 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00111 */ 00112 sample = blockSize >> 1U; 00113 00114 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00115 ** a second loop below computes the remaining 1 sample. */ 00116 while (sample > 0U) 00117 { 00118 00119 /* Read the input */ 00120 in = *__SIMD32(pIn)++; 00121 00122 /* out = b0 * x[n] + 0 * 0 */ 00123 out = __SMUAD(b0, in); 00124 00125 /* acc += b1 * x[n-1] + b2 * x[n-2] + out */ 00126 acc = __SMLALD(b1, state_in, out); 00127 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00128 acc = __SMLALD(a1, state_out, acc); 00129 00130 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00131 /* Calc lower part of acc */ 00132 acc_l = acc & 0xffffffff; 00133 00134 /* Calc upper part of acc */ 00135 acc_h = (acc >> 32) & 0xffffffff; 00136 00137 /* Apply shift for lower part of acc and upper part of acc */ 00138 out = (uint32_t) acc_l >> lShift | acc_h << uShift; 00139 00140 out = __SSAT(out, 16); 00141 00142 /* Every time after the output is computed state should be updated. */ 00143 /* The states should be updated as: */ 00144 /* Xn2 = Xn1 */ 00145 /* Xn1 = Xn */ 00146 /* Yn2 = Yn1 */ 00147 /* Yn1 = acc */ 00148 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00149 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00150 00151 #ifndef ARM_MATH_BIG_ENDIAN 00152 00153 state_in = __PKHBT(in, state_in, 16); 00154 state_out = __PKHBT(out, state_out, 16); 00155 00156 #else 00157 00158 state_in = __PKHBT(state_in >> 16, (in >> 16), 16); 00159 state_out = __PKHBT(state_out >> 16, (out), 16); 00160 00161 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00162 00163 /* out = b0 * x[n] + 0 * 0 */ 00164 out = __SMUADX(b0, in); 00165 /* acc += b1 * x[n-1] + b2 * x[n-2] + out */ 00166 acc = __SMLALD(b1, state_in, out); 00167 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00168 acc = __SMLALD(a1, state_out, acc); 00169 00170 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00171 /* Calc lower part of acc */ 00172 acc_l = acc & 0xffffffff; 00173 00174 /* Calc upper part of acc */ 00175 acc_h = (acc >> 32) & 0xffffffff; 00176 00177 /* Apply shift for lower part of acc and upper part of acc */ 00178 out = (uint32_t) acc_l >> lShift | acc_h << uShift; 00179 00180 out = __SSAT(out, 16); 00181 00182 /* Store the output in the destination buffer. */ 00183 00184 #ifndef ARM_MATH_BIG_ENDIAN 00185 00186 *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 00187 00188 #else 00189 00190 *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16); 00191 00192 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00193 00194 /* Every time after the output is computed state should be updated. */ 00195 /* The states should be updated as: */ 00196 /* Xn2 = Xn1 */ 00197 /* Xn1 = Xn */ 00198 /* Yn2 = Yn1 */ 00199 /* Yn1 = acc */ 00200 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00201 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00202 #ifndef ARM_MATH_BIG_ENDIAN 00203 00204 state_in = __PKHBT(in >> 16, state_in, 16); 00205 state_out = __PKHBT(out, state_out, 16); 00206 00207 #else 00208 00209 state_in = __PKHBT(state_in >> 16, in, 16); 00210 state_out = __PKHBT(state_out >> 16, out, 16); 00211 00212 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00213 00214 00215 /* Decrement the loop counter */ 00216 sample--; 00217 00218 } 00219 00220 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00221 ** No loop unrolling is used. */ 00222 00223 if ((blockSize & 0x1U) != 0U) 00224 { 00225 /* Read the input */ 00226 in = *pIn++; 00227 00228 /* out = b0 * x[n] + 0 * 0 */ 00229 00230 #ifndef ARM_MATH_BIG_ENDIAN 00231 00232 out = __SMUAD(b0, in); 00233 00234 #else 00235 00236 out = __SMUADX(b0, in); 00237 00238 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00239 00240 /* acc = b1 * x[n-1] + b2 * x[n-2] + out */ 00241 acc = __SMLALD(b1, state_in, out); 00242 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00243 acc = __SMLALD(a1, state_out, acc); 00244 00245 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00246 /* Calc lower part of acc */ 00247 acc_l = acc & 0xffffffff; 00248 00249 /* Calc upper part of acc */ 00250 acc_h = (acc >> 32) & 0xffffffff; 00251 00252 /* Apply shift for lower part of acc and upper part of acc */ 00253 out = (uint32_t) acc_l >> lShift | acc_h << uShift; 00254 00255 out = __SSAT(out, 16); 00256 00257 /* Store the output in the destination buffer. */ 00258 *pOut++ = (q15_t) out; 00259 00260 /* Every time after the output is computed state should be updated. */ 00261 /* The states should be updated as: */ 00262 /* Xn2 = Xn1 */ 00263 /* Xn1 = Xn */ 00264 /* Yn2 = Yn1 */ 00265 /* Yn1 = acc */ 00266 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00267 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00268 00269 #ifndef ARM_MATH_BIG_ENDIAN 00270 00271 state_in = __PKHBT(in, state_in, 16); 00272 state_out = __PKHBT(out, state_out, 16); 00273 00274 #else 00275 00276 state_in = __PKHBT(state_in >> 16, in, 16); 00277 state_out = __PKHBT(state_out >> 16, out, 16); 00278 00279 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00280 00281 } 00282 00283 /* The first stage goes from the input wire to the output wire. */ 00284 /* Subsequent numStages occur in-place in the output wire */ 00285 pIn = pDst; 00286 00287 /* Reset the output pointer */ 00288 pOut = pDst; 00289 00290 /* Store the updated state variables back into the state array */ 00291 *__SIMD32(pState)++ = state_in; 00292 *__SIMD32(pState)++ = state_out; 00293 00294 00295 /* Decrement the loop counter */ 00296 stage--; 00297 00298 } while (stage > 0U); 00299 00300 #else 00301 00302 /* Run the below code for Cortex-M0 */ 00303 00304 q15_t *pIn = pSrc; /* Source pointer */ 00305 q15_t *pOut = pDst; /* Destination pointer */ 00306 q15_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00307 q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00308 q15_t Xn; /* temporary input */ 00309 q63_t acc; /* Accumulator */ 00310 int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */ 00311 q15_t *pState = S->pState; /* State pointer */ 00312 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00313 uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ 00314 00315 do 00316 { 00317 /* Reading the coefficients */ 00318 b0 = *pCoeffs++; 00319 pCoeffs++; // skip the 0 coefficient 00320 b1 = *pCoeffs++; 00321 b2 = *pCoeffs++; 00322 a1 = *pCoeffs++; 00323 a2 = *pCoeffs++; 00324 00325 /* Reading the state values */ 00326 Xn1 = pState[0]; 00327 Xn2 = pState[1]; 00328 Yn1 = pState[2]; 00329 Yn2 = pState[3]; 00330 00331 /* The variables acc holds the output value that is computed: 00332 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00333 */ 00334 00335 sample = blockSize; 00336 00337 while (sample > 0U) 00338 { 00339 /* Read the input */ 00340 Xn = *pIn++; 00341 00342 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00343 /* acc = b0 * x[n] */ 00344 acc = (q31_t) b0 *Xn; 00345 00346 /* acc += b1 * x[n-1] */ 00347 acc += (q31_t) b1 *Xn1; 00348 /* acc += b[2] * x[n-2] */ 00349 acc += (q31_t) b2 *Xn2; 00350 /* acc += a1 * y[n-1] */ 00351 acc += (q31_t) a1 *Yn1; 00352 /* acc += a2 * y[n-2] */ 00353 acc += (q31_t) a2 *Yn2; 00354 00355 /* The result is converted to 1.31 */ 00356 acc = __SSAT((acc >> shift), 16); 00357 00358 /* Every time after the output is computed state should be updated. */ 00359 /* The states should be updated as: */ 00360 /* Xn2 = Xn1 */ 00361 /* Xn1 = Xn */ 00362 /* Yn2 = Yn1 */ 00363 /* Yn1 = acc */ 00364 Xn2 = Xn1; 00365 Xn1 = Xn; 00366 Yn2 = Yn1; 00367 Yn1 = (q15_t) acc; 00368 00369 /* Store the output in the destination buffer. */ 00370 *pOut++ = (q15_t) acc; 00371 00372 /* decrement the loop counter */ 00373 sample--; 00374 } 00375 00376 /* The first stage goes from the input buffer to the output buffer. */ 00377 /* Subsequent stages occur in-place in the output buffer */ 00378 pIn = pDst; 00379 00380 /* Reset to destination pointer */ 00381 pOut = pDst; 00382 00383 /* Store the updated state variables back into the pState array */ 00384 *pState++ = Xn1; 00385 *pState++ = Xn2; 00386 *pState++ = Yn1; 00387 *pState++ = Yn2; 00388 00389 } while (--stage); 00390 00391 #endif /* #if defined (ARM_MATH_DSP) */ 00392 00393 } 00394 00395 00396 /** 00397 * @} end of BiquadCascadeDF1 group 00398 */ 00399
Generated on Tue Jul 12 2022 16:46:22 by
 1.7.2
 1.7.2