Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_fir_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_q15.c 00009 * 00010 * Description: Q15 FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for the Q15 FIR filter. 00054 * @param[in] *S points to an instance of the Q15 FIR structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[out] *pDst points to the block of output data. 00057 * @param[in] blockSize number of samples to process per call. 00058 * @return none. 00059 * 00060 * 00061 * \par Restrictions 00062 * If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE 00063 * In this case input, output, state buffers should be aligned by 32-bit 00064 * 00065 * <b>Scaling and Overflow Behavior:</b> 00066 * \par 00067 * The function is implemented using a 64-bit internal accumulator. 00068 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00069 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00070 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00071 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 00072 * Lastly, the accumulator is saturated to yield a result in 1.15 format. 00073 * 00074 * \par 00075 * Refer to the function <code>arm_fir_fast_q15()</code> for a faster but less precise implementation of this function. 00076 */ 00077 00078 #ifndef ARM_MATH_CM0_FAMILY 00079 00080 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00081 00082 #ifndef UNALIGNED_SUPPORT_DISABLE 00083 00084 00085 void arm_fir_q15( 00086 const arm_fir_instance_q15 * S, 00087 q15_t * pSrc, 00088 q15_t * pDst, 00089 uint32_t blockSize) 00090 { 00091 q15_t *pState = S->pState; /* State pointer */ 00092 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00093 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00094 q15_t *px1; /* Temporary q15 pointer for state buffer */ 00095 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00096 q31_t x0, x1, x2, x3, c0; /* Temporary variables to hold SIMD state and coefficient values */ 00097 q63_t acc0, acc1, acc2, acc3; /* Accumulators */ 00098 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */ 00099 uint32_t tapCnt, blkCnt; /* Loop counters */ 00100 00101 00102 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00103 /* pStateCurnt points to the location where the new input data should be written */ 00104 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00105 00106 /* Apply loop unrolling and compute 4 output values simultaneously. 00107 * The variables acc0 ... acc3 hold output values that are being computed: 00108 * 00109 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00110 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00111 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00112 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00113 */ 00114 00115 blkCnt = blockSize >> 2; 00116 00117 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00118 ** a second loop below computes the remaining 1 to 3 samples. */ 00119 while(blkCnt > 0u) 00120 { 00121 /* Copy four new input samples into the state buffer. 00122 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */ 00123 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 00124 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 00125 00126 /* Set all accumulators to zero */ 00127 acc0 = 0; 00128 acc1 = 0; 00129 acc2 = 0; 00130 acc3 = 0; 00131 00132 /* Initialize state pointer of type q15 */ 00133 px1 = pState; 00134 00135 /* Initialize coeff pointer of type q31 */ 00136 pb = pCoeffs; 00137 00138 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */ 00139 x0 = _SIMD32_OFFSET(px1); 00140 00141 /* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */ 00142 x1 = _SIMD32_OFFSET(px1 + 1u); 00143 00144 px1 += 2u; 00145 00146 /* Loop over the number of taps. Unroll by a factor of 4. 00147 ** Repeat until we've computed numTaps-4 coefficients. */ 00148 tapCnt = numTaps >> 2; 00149 00150 while(tapCnt > 0u) 00151 { 00152 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */ 00153 c0 = *__SIMD32(pb)++; 00154 00155 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 00156 acc0 = __SMLALD(x0, c0, acc0); 00157 00158 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */ 00159 acc1 = __SMLALD(x1, c0, acc1); 00160 00161 /* Read state x[n-N-2], x[n-N-3] */ 00162 x2 = _SIMD32_OFFSET(px1); 00163 00164 /* Read state x[n-N-3], x[n-N-4] */ 00165 x3 = _SIMD32_OFFSET(px1 + 1u); 00166 00167 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */ 00168 acc2 = __SMLALD(x2, c0, acc2); 00169 00170 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */ 00171 acc3 = __SMLALD(x3, c0, acc3); 00172 00173 /* Read coefficients b[N-2], b[N-3] */ 00174 c0 = *__SIMD32(pb)++; 00175 00176 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */ 00177 acc0 = __SMLALD(x2, c0, acc0); 00178 00179 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */ 00180 acc1 = __SMLALD(x3, c0, acc1); 00181 00182 /* Read state x[n-N-4], x[n-N-5] */ 00183 x0 = _SIMD32_OFFSET(px1 + 2u); 00184 00185 /* Read state x[n-N-5], x[n-N-6] */ 00186 x1 = _SIMD32_OFFSET(px1 + 3u); 00187 00188 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */ 00189 acc2 = __SMLALD(x0, c0, acc2); 00190 00191 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */ 00192 acc3 = __SMLALD(x1, c0, acc3); 00193 00194 px1 += 4u; 00195 00196 tapCnt--; 00197 00198 } 00199 00200 00201 /* If the filter length is not a multiple of 4, compute the remaining filter taps. 00202 ** This is always be 2 taps since the filter length is even. */ 00203 if((numTaps & 0x3u) != 0u) 00204 { 00205 /* Read 2 coefficients */ 00206 c0 = *__SIMD32(pb)++; 00207 00208 /* Fetch 4 state variables */ 00209 x2 = _SIMD32_OFFSET(px1); 00210 00211 x3 = _SIMD32_OFFSET(px1 + 1u); 00212 00213 /* Perform the multiply-accumulates */ 00214 acc0 = __SMLALD(x0, c0, acc0); 00215 00216 px1 += 2u; 00217 00218 acc1 = __SMLALD(x1, c0, acc1); 00219 acc2 = __SMLALD(x2, c0, acc2); 00220 acc3 = __SMLALD(x3, c0, acc3); 00221 } 00222 00223 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation. 00224 ** Then store the 4 outputs in the destination buffer. */ 00225 00226 #ifndef ARM_MATH_BIG_ENDIAN 00227 00228 *__SIMD32(pDst)++ = 00229 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 00230 *__SIMD32(pDst)++ = 00231 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 00232 00233 #else 00234 00235 *__SIMD32(pDst)++ = 00236 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16); 00237 *__SIMD32(pDst)++ = 00238 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16); 00239 00240 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00241 00242 00243 00244 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00245 pState = pState + 4; 00246 00247 /* Decrement the loop counter */ 00248 blkCnt--; 00249 } 00250 00251 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00252 ** No loop unrolling is used. */ 00253 blkCnt = blockSize % 0x4u; 00254 while(blkCnt > 0u) 00255 { 00256 /* Copy two samples into state buffer */ 00257 *pStateCurnt++ = *pSrc++; 00258 00259 /* Set the accumulator to zero */ 00260 acc0 = 0; 00261 00262 /* Initialize state pointer of type q15 */ 00263 px1 = pState; 00264 00265 /* Initialize coeff pointer of type q31 */ 00266 pb = pCoeffs; 00267 00268 tapCnt = numTaps >> 1; 00269 00270 do 00271 { 00272 00273 c0 = *__SIMD32(pb)++; 00274 x0 = *__SIMD32(px1)++; 00275 00276 acc0 = __SMLALD(x0, c0, acc0); 00277 tapCnt--; 00278 } 00279 while(tapCnt > 0u); 00280 00281 /* The result is in 2.30 format. Convert to 1.15 with saturation. 00282 ** Then store the output in the destination buffer. */ 00283 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16)); 00284 00285 /* Advance state pointer by 1 for the next sample */ 00286 pState = pState + 1; 00287 00288 /* Decrement the loop counter */ 00289 blkCnt--; 00290 } 00291 00292 /* Processing is complete. 00293 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00294 ** This prepares the state buffer for the next function call. */ 00295 00296 /* Points to the start of the state buffer */ 00297 pStateCurnt = S->pState; 00298 00299 /* Calculation of count for copying integer writes */ 00300 tapCnt = (numTaps - 1u) >> 2; 00301 00302 while(tapCnt > 0u) 00303 { 00304 00305 /* Copy state values to start of state buffer */ 00306 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00307 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00308 00309 tapCnt--; 00310 00311 } 00312 00313 /* Calculation of count for remaining q15_t data */ 00314 tapCnt = (numTaps - 1u) % 0x4u; 00315 00316 /* copy remaining data */ 00317 while(tapCnt > 0u) 00318 { 00319 *pStateCurnt++ = *pState++; 00320 00321 /* Decrement the loop counter */ 00322 tapCnt--; 00323 } 00324 } 00325 00326 #else /* UNALIGNED_SUPPORT_DISABLE */ 00327 00328 void arm_fir_q15( 00329 const arm_fir_instance_q15 * S, 00330 q15_t * pSrc, 00331 q15_t * pDst, 00332 uint32_t blockSize) 00333 { 00334 q15_t *pState = S->pState; /* State pointer */ 00335 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00336 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00337 q63_t acc0, acc1, acc2, acc3; /* Accumulators */ 00338 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00339 q15_t *px; /* Temporary q31 pointer for SIMD state buffer accesses */ 00340 q31_t x0, x1, x2, c0; /* Temporary variables to hold SIMD state and coefficient values */ 00341 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */ 00342 uint32_t tapCnt, blkCnt; /* Loop counters */ 00343 00344 00345 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00346 /* pStateCurnt points to the location where the new input data should be written */ 00347 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00348 00349 /* Apply loop unrolling and compute 4 output values simultaneously. 00350 * The variables acc0 ... acc3 hold output values that are being computed: 00351 * 00352 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00353 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00354 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00355 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00356 */ 00357 00358 blkCnt = blockSize >> 2; 00359 00360 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00361 ** a second loop below computes the remaining 1 to 3 samples. */ 00362 while(blkCnt > 0u) 00363 { 00364 /* Copy four new input samples into the state buffer. 00365 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */ 00366 *pStateCurnt++ = *pSrc++; 00367 *pStateCurnt++ = *pSrc++; 00368 *pStateCurnt++ = *pSrc++; 00369 *pStateCurnt++ = *pSrc++; 00370 00371 00372 /* Set all accumulators to zero */ 00373 acc0 = 0; 00374 acc1 = 0; 00375 acc2 = 0; 00376 acc3 = 0; 00377 00378 /* Typecast q15_t pointer to q31_t pointer for state reading in q31_t */ 00379 px = pState; 00380 00381 /* Typecast q15_t pointer to q31_t pointer for coefficient reading in q31_t */ 00382 pb = pCoeffs; 00383 00384 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */ 00385 x0 = *__SIMD32(px)++; 00386 00387 /* Read the third and forth samples from the state buffer: x[n-N-2], x[n-N-3] */ 00388 x2 = *__SIMD32(px)++; 00389 00390 /* Loop over the number of taps. Unroll by a factor of 4. 00391 ** Repeat until we've computed numTaps-(numTaps%4) coefficients. */ 00392 tapCnt = numTaps >> 2; 00393 00394 while(tapCnt > 0) 00395 { 00396 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */ 00397 c0 = *__SIMD32(pb)++; 00398 00399 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 00400 acc0 = __SMLALD(x0, c0, acc0); 00401 00402 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */ 00403 acc2 = __SMLALD(x2, c0, acc2); 00404 00405 /* pack x[n-N-1] and x[n-N-2] */ 00406 #ifndef ARM_MATH_BIG_ENDIAN 00407 x1 = __PKHBT(x2, x0, 0); 00408 #else 00409 x1 = __PKHBT(x0, x2, 0); 00410 #endif 00411 00412 /* Read state x[n-N-4], x[n-N-5] */ 00413 x0 = _SIMD32_OFFSET(px); 00414 00415 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */ 00416 acc1 = __SMLALDX(x1, c0, acc1); 00417 00418 /* pack x[n-N-3] and x[n-N-4] */ 00419 #ifndef ARM_MATH_BIG_ENDIAN 00420 x1 = __PKHBT(x0, x2, 0); 00421 #else 00422 x1 = __PKHBT(x2, x0, 0); 00423 #endif 00424 00425 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */ 00426 acc3 = __SMLALDX(x1, c0, acc3); 00427 00428 /* Read coefficients b[N-2], b[N-3] */ 00429 c0 = *__SIMD32(pb)++; 00430 00431 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */ 00432 acc0 = __SMLALD(x2, c0, acc0); 00433 00434 /* Read state x[n-N-6], x[n-N-7] with offset */ 00435 x2 = _SIMD32_OFFSET(px + 2u); 00436 00437 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */ 00438 acc2 = __SMLALD(x0, c0, acc2); 00439 00440 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */ 00441 acc1 = __SMLALDX(x1, c0, acc1); 00442 00443 /* pack x[n-N-5] and x[n-N-6] */ 00444 #ifndef ARM_MATH_BIG_ENDIAN 00445 x1 = __PKHBT(x2, x0, 0); 00446 #else 00447 x1 = __PKHBT(x0, x2, 0); 00448 #endif 00449 00450 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */ 00451 acc3 = __SMLALDX(x1, c0, acc3); 00452 00453 /* Update state pointer for next state reading */ 00454 px += 4u; 00455 00456 /* Decrement tap count */ 00457 tapCnt--; 00458 00459 } 00460 00461 /* If the filter length is not a multiple of 4, compute the remaining filter taps. 00462 ** This is always be 2 taps since the filter length is even. */ 00463 if((numTaps & 0x3u) != 0u) 00464 { 00465 00466 /* Read last two coefficients */ 00467 c0 = *__SIMD32(pb)++; 00468 00469 /* Perform the multiply-accumulates */ 00470 acc0 = __SMLALD(x0, c0, acc0); 00471 acc2 = __SMLALD(x2, c0, acc2); 00472 00473 /* pack state variables */ 00474 #ifndef ARM_MATH_BIG_ENDIAN 00475 x1 = __PKHBT(x2, x0, 0); 00476 #else 00477 x1 = __PKHBT(x0, x2, 0); 00478 #endif 00479 00480 /* Read last state variables */ 00481 x0 = *__SIMD32(px); 00482 00483 /* Perform the multiply-accumulates */ 00484 acc1 = __SMLALDX(x1, c0, acc1); 00485 00486 /* pack state variables */ 00487 #ifndef ARM_MATH_BIG_ENDIAN 00488 x1 = __PKHBT(x0, x2, 0); 00489 #else 00490 x1 = __PKHBT(x2, x0, 0); 00491 #endif 00492 00493 /* Perform the multiply-accumulates */ 00494 acc3 = __SMLALDX(x1, c0, acc3); 00495 } 00496 00497 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation. 00498 ** Then store the 4 outputs in the destination buffer. */ 00499 00500 #ifndef ARM_MATH_BIG_ENDIAN 00501 00502 *__SIMD32(pDst)++ = 00503 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 00504 00505 *__SIMD32(pDst)++ = 00506 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 00507 00508 #else 00509 00510 *__SIMD32(pDst)++ = 00511 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16); 00512 00513 *__SIMD32(pDst)++ = 00514 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16); 00515 00516 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00517 00518 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00519 pState = pState + 4; 00520 00521 /* Decrement the loop counter */ 00522 blkCnt--; 00523 } 00524 00525 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00526 ** No loop unrolling is used. */ 00527 blkCnt = blockSize % 0x4u; 00528 while(blkCnt > 0u) 00529 { 00530 /* Copy two samples into state buffer */ 00531 *pStateCurnt++ = *pSrc++; 00532 00533 /* Set the accumulator to zero */ 00534 acc0 = 0; 00535 00536 /* Use SIMD to hold states and coefficients */ 00537 px = pState; 00538 pb = pCoeffs; 00539 00540 tapCnt = numTaps >> 1u; 00541 00542 do 00543 { 00544 acc0 += (q31_t) * px++ * *pb++; 00545 acc0 += (q31_t) * px++ * *pb++; 00546 tapCnt--; 00547 } 00548 while(tapCnt > 0u); 00549 00550 /* The result is in 2.30 format. Convert to 1.15 with saturation. 00551 ** Then store the output in the destination buffer. */ 00552 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16)); 00553 00554 /* Advance state pointer by 1 for the next sample */ 00555 pState = pState + 1u; 00556 00557 /* Decrement the loop counter */ 00558 blkCnt--; 00559 } 00560 00561 /* Processing is complete. 00562 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00563 ** This prepares the state buffer for the next function call. */ 00564 00565 /* Points to the start of the state buffer */ 00566 pStateCurnt = S->pState; 00567 00568 /* Calculation of count for copying integer writes */ 00569 tapCnt = (numTaps - 1u) >> 2; 00570 00571 while(tapCnt > 0u) 00572 { 00573 *pStateCurnt++ = *pState++; 00574 *pStateCurnt++ = *pState++; 00575 *pStateCurnt++ = *pState++; 00576 *pStateCurnt++ = *pState++; 00577 00578 tapCnt--; 00579 00580 } 00581 00582 /* Calculation of count for remaining q15_t data */ 00583 tapCnt = (numTaps - 1u) % 0x4u; 00584 00585 /* copy remaining data */ 00586 while(tapCnt > 0u) 00587 { 00588 *pStateCurnt++ = *pState++; 00589 00590 /* Decrement the loop counter */ 00591 tapCnt--; 00592 } 00593 } 00594 00595 00596 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00597 00598 #else /* ARM_MATH_CM0_FAMILY */ 00599 00600 00601 /* Run the below code for Cortex-M0 */ 00602 00603 void arm_fir_q15( 00604 const arm_fir_instance_q15 * S, 00605 q15_t * pSrc, 00606 q15_t * pDst, 00607 uint32_t blockSize) 00608 { 00609 q15_t *pState = S->pState; /* State pointer */ 00610 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00611 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00612 00613 00614 00615 q15_t *px; /* Temporary pointer for state buffer */ 00616 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00617 q63_t acc; /* Accumulator */ 00618 uint32_t numTaps = S->numTaps; /* Number of nTaps in the filter */ 00619 uint32_t tapCnt, blkCnt; /* Loop counters */ 00620 00621 /* S->pState buffer contains previous frame (numTaps - 1) samples */ 00622 /* pStateCurnt points to the location where the new input data should be written */ 00623 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00624 00625 /* Initialize blkCnt with blockSize */ 00626 blkCnt = blockSize; 00627 00628 while(blkCnt > 0u) 00629 { 00630 /* Copy one sample at a time into state buffer */ 00631 *pStateCurnt++ = *pSrc++; 00632 00633 /* Set the accumulator to zero */ 00634 acc = 0; 00635 00636 /* Initialize state pointer */ 00637 px = pState; 00638 00639 /* Initialize Coefficient pointer */ 00640 pb = pCoeffs; 00641 00642 tapCnt = numTaps; 00643 00644 /* Perform the multiply-accumulates */ 00645 do 00646 { 00647 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */ 00648 acc += (q31_t) * px++ * *pb++; 00649 tapCnt--; 00650 } while(tapCnt > 0u); 00651 00652 /* The result is in 2.30 format. Convert to 1.15 00653 ** Then store the output in the destination buffer. */ 00654 *pDst++ = (q15_t) __SSAT((acc >> 15u), 16); 00655 00656 /* Advance state pointer by 1 for the next sample */ 00657 pState = pState + 1; 00658 00659 /* Decrement the samples loop counter */ 00660 blkCnt--; 00661 } 00662 00663 /* Processing is complete. 00664 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00665 ** This prepares the state buffer for the next function call. */ 00666 00667 /* Points to the start of the state buffer */ 00668 pStateCurnt = S->pState; 00669 00670 /* Copy numTaps number of values */ 00671 tapCnt = (numTaps - 1u); 00672 00673 /* copy data */ 00674 while(tapCnt > 0u) 00675 { 00676 *pStateCurnt++ = *pState++; 00677 00678 /* Decrement the loop counter */ 00679 tapCnt--; 00680 } 00681 00682 } 00683 00684 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00685 00686 00687 00688 00689 /** 00690 * @} end of FIR group 00691 */
Generated on Tue Jul 12 2022 13:15:24 by
