CMSIS DSP library
Dependents: KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more
Fork of mbed-dsp by
arm_fir_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_q15.c 00009 * 00010 * Description: Q15 FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for the Q15 FIR filter. 00054 * @param[in] *S points to an instance of the Q15 FIR structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[out] *pDst points to the block of output data. 00057 * @param[in] blockSize number of samples to process per call. 00058 * @return none. 00059 * 00060 * 00061 * \par Restrictions 00062 * If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE 00063 * In this case input, output, state buffers should be aligned by 32-bit 00064 * 00065 * <b>Scaling and Overflow Behavior:</b> 00066 * \par 00067 * The function is implemented using a 64-bit internal accumulator. 00068 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00069 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00070 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00071 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 00072 * Lastly, the accumulator is saturated to yield a result in 1.15 format. 00073 * 00074 * \par 00075 * Refer to the function <code>arm_fir_fast_q15()</code> for a faster but less precise implementation of this function. 00076 */ 00077 00078 #ifndef ARM_MATH_CM0_FAMILY 00079 00080 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00081 00082 #ifndef UNALIGNED_SUPPORT_DISABLE 00083 00084 00085 void arm_fir_q15( 00086 const arm_fir_instance_q15 * S, 00087 q15_t * pSrc, 00088 q15_t * pDst, 00089 uint32_t blockSize) 00090 { 00091 q15_t *pState = S->pState; /* State pointer */ 00092 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00093 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00094 q15_t *px1; /* Temporary q15 pointer for state buffer */ 00095 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00096 q31_t x0, x1, x2, x3, c0; /* Temporary variables to hold SIMD state and coefficient values */ 00097 q63_t acc0, acc1, acc2, acc3; /* Accumulators */ 00098 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */ 00099 uint32_t tapCnt, blkCnt; /* Loop counters */ 00100 00101 00102 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00103 /* pStateCurnt points to the location where the new input data should be written */ 00104 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00105 00106 /* Apply loop unrolling and compute 4 output values simultaneously. 00107 * The variables acc0 ... acc3 hold output values that are being computed: 00108 * 00109 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00110 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00111 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00112 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00113 */ 00114 00115 blkCnt = blockSize >> 2; 00116 00117 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00118 ** a second loop below computes the remaining 1 to 3 samples. */ 00119 while(blkCnt > 0u) 00120 { 00121 /* Copy four new input samples into the state buffer. 00122 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */ 00123 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 00124 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 00125 00126 /* Set all accumulators to zero */ 00127 acc0 = 0; 00128 acc1 = 0; 00129 acc2 = 0; 00130 acc3 = 0; 00131 00132 /* Initialize state pointer of type q15 */ 00133 px1 = pState; 00134 00135 /* Initialize coeff pointer of type q31 */ 00136 pb = pCoeffs; 00137 00138 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */ 00139 x0 = _SIMD32_OFFSET(px1); 00140 00141 /* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */ 00142 x1 = _SIMD32_OFFSET(px1 + 1u); 00143 00144 px1 += 2u; 00145 00146 /* Loop over the number of taps. Unroll by a factor of 4. 00147 ** Repeat until we've computed numTaps-4 coefficients. */ 00148 tapCnt = numTaps >> 2; 00149 00150 while(tapCnt > 0u) 00151 { 00152 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */ 00153 c0 = *__SIMD32(pb)++; 00154 00155 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 00156 acc0 = __SMLALD(x0, c0, acc0); 00157 00158 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */ 00159 acc1 = __SMLALD(x1, c0, acc1); 00160 00161 /* Read state x[n-N-2], x[n-N-3] */ 00162 x2 = _SIMD32_OFFSET(px1); 00163 00164 /* Read state x[n-N-3], x[n-N-4] */ 00165 x3 = _SIMD32_OFFSET(px1 + 1u); 00166 00167 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */ 00168 acc2 = __SMLALD(x2, c0, acc2); 00169 00170 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */ 00171 acc3 = __SMLALD(x3, c0, acc3); 00172 00173 /* Read coefficients b[N-2], b[N-3] */ 00174 c0 = *__SIMD32(pb)++; 00175 00176 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */ 00177 acc0 = __SMLALD(x2, c0, acc0); 00178 00179 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */ 00180 acc1 = __SMLALD(x3, c0, acc1); 00181 00182 /* Read state x[n-N-4], x[n-N-5] */ 00183 x0 = _SIMD32_OFFSET(px1 + 2u); 00184 00185 /* Read state x[n-N-5], x[n-N-6] */ 00186 x1 = _SIMD32_OFFSET(px1 + 3u); 00187 00188 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */ 00189 acc2 = __SMLALD(x0, c0, acc2); 00190 00191 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */ 00192 acc3 = __SMLALD(x1, c0, acc3); 00193 00194 px1 += 4u; 00195 00196 tapCnt--; 00197 00198 } 00199 00200 00201 /* If the filter length is not a multiple of 4, compute the remaining filter taps. 00202 ** This is always be 2 taps since the filter length is even. */ 00203 if((numTaps & 0x3u) != 0u) 00204 { 00205 /* Read 2 coefficients */ 00206 c0 = *__SIMD32(pb)++; 00207 00208 /* Fetch 4 state variables */ 00209 x2 = _SIMD32_OFFSET(px1); 00210 00211 x3 = _SIMD32_OFFSET(px1 + 1u); 00212 00213 /* Perform the multiply-accumulates */ 00214 acc0 = __SMLALD(x0, c0, acc0); 00215 00216 px1 += 2u; 00217 00218 acc1 = __SMLALD(x1, c0, acc1); 00219 acc2 = __SMLALD(x2, c0, acc2); 00220 acc3 = __SMLALD(x3, c0, acc3); 00221 } 00222 00223 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation. 00224 ** Then store the 4 outputs in the destination buffer. */ 00225 00226 #ifndef ARM_MATH_BIG_ENDIAN 00227 00228 *__SIMD32(pDst)++ = 00229 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 00230 *__SIMD32(pDst)++ = 00231 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 00232 00233 #else 00234 00235 *__SIMD32(pDst)++ = 00236 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16); 00237 *__SIMD32(pDst)++ = 00238 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16); 00239 00240 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00241 00242 00243 00244 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00245 pState = pState + 4; 00246 00247 /* Decrement the loop counter */ 00248 blkCnt--; 00249 } 00250 00251 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00252 ** No loop unrolling is used. */ 00253 blkCnt = blockSize % 0x4u; 00254 while(blkCnt > 0u) 00255 { 00256 /* Copy two samples into state buffer */ 00257 *pStateCurnt++ = *pSrc++; 00258 00259 /* Set the accumulator to zero */ 00260 acc0 = 0; 00261 00262 /* Initialize state pointer of type q15 */ 00263 px1 = pState; 00264 00265 /* Initialize coeff pointer of type q31 */ 00266 pb = pCoeffs; 00267 00268 tapCnt = numTaps >> 1; 00269 00270 do 00271 { 00272 00273 c0 = *__SIMD32(pb)++; 00274 x0 = *__SIMD32(px1)++; 00275 00276 acc0 = __SMLALD(x0, c0, acc0); 00277 tapCnt--; 00278 } 00279 while(tapCnt > 0u); 00280 00281 /* The result is in 2.30 format. Convert to 1.15 with saturation. 00282 ** Then store the output in the destination buffer. */ 00283 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16)); 00284 00285 /* Advance state pointer by 1 for the next sample */ 00286 pState = pState + 1; 00287 00288 /* Decrement the loop counter */ 00289 blkCnt--; 00290 } 00291 00292 /* Processing is complete. 00293 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00294 ** This prepares the state buffer for the next function call. */ 00295 00296 /* Points to the start of the state buffer */ 00297 pStateCurnt = S->pState; 00298 00299 /* Calculation of count for copying integer writes */ 00300 tapCnt = (numTaps - 1u) >> 2; 00301 00302 while(tapCnt > 0u) 00303 { 00304 00305 /* Copy state values to start of state buffer */ 00306 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00307 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00308 00309 tapCnt--; 00310 00311 } 00312 00313 /* Calculation of count for remaining q15_t data */ 00314 tapCnt = (numTaps - 1u) % 0x4u; 00315 00316 /* copy remaining data */ 00317 while(tapCnt > 0u) 00318 { 00319 *pStateCurnt++ = *pState++; 00320 00321 /* Decrement the loop counter */ 00322 tapCnt--; 00323 } 00324 } 00325 00326 #else /* UNALIGNED_SUPPORT_DISABLE */ 00327 00328 void arm_fir_q15( 00329 const arm_fir_instance_q15 * S, 00330 q15_t * pSrc, 00331 q15_t * pDst, 00332 uint32_t blockSize) 00333 { 00334 q15_t *pState = S->pState; /* State pointer */ 00335 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00336 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00337 q63_t acc0, acc1, acc2, acc3; /* Accumulators */ 00338 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00339 q15_t *px; /* Temporary q31 pointer for SIMD state buffer accesses */ 00340 q31_t x0, x1, x2, c0; /* Temporary variables to hold SIMD state and coefficient values */ 00341 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */ 00342 uint32_t tapCnt, blkCnt; /* Loop counters */ 00343 00344 00345 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00346 /* pStateCurnt points to the location where the new input data should be written */ 00347 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00348 00349 /* Apply loop unrolling and compute 4 output values simultaneously. 00350 * The variables acc0 ... acc3 hold output values that are being computed: 00351 * 00352 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00353 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00354 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00355 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00356 */ 00357 00358 blkCnt = blockSize >> 2; 00359 00360 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00361 ** a second loop below computes the remaining 1 to 3 samples. */ 00362 while(blkCnt > 0u) 00363 { 00364 /* Copy four new input samples into the state buffer. 00365 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */ 00366 *pStateCurnt++ = *pSrc++; 00367 *pStateCurnt++ = *pSrc++; 00368 *pStateCurnt++ = *pSrc++; 00369 *pStateCurnt++ = *pSrc++; 00370 00371 00372 /* Set all accumulators to zero */ 00373 acc0 = 0; 00374 acc1 = 0; 00375 acc2 = 0; 00376 acc3 = 0; 00377 00378 /* Typecast q15_t pointer to q31_t pointer for state reading in q31_t */ 00379 px = pState; 00380 00381 /* Typecast q15_t pointer to q31_t pointer for coefficient reading in q31_t */ 00382 pb = pCoeffs; 00383 00384 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */ 00385 x0 = *__SIMD32(px)++; 00386 00387 /* Read the third and forth samples from the state buffer: x[n-N-2], x[n-N-3] */ 00388 x2 = *__SIMD32(px)++; 00389 00390 /* Loop over the number of taps. Unroll by a factor of 4. 00391 ** Repeat until we've computed numTaps-(numTaps%4) coefficients. */ 00392 tapCnt = numTaps >> 2; 00393 00394 while(tapCnt > 0) 00395 { 00396 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */ 00397 c0 = *__SIMD32(pb)++; 00398 00399 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 00400 acc0 = __SMLALD(x0, c0, acc0); 00401 00402 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */ 00403 acc2 = __SMLALD(x2, c0, acc2); 00404 00405 /* pack x[n-N-1] and x[n-N-2] */ 00406 #ifndef ARM_MATH_BIG_ENDIAN 00407 x1 = __PKHBT(x2, x0, 0); 00408 #else 00409 x1 = __PKHBT(x0, x2, 0); 00410 #endif 00411 00412 /* Read state x[n-N-4], x[n-N-5] */ 00413 x0 = _SIMD32_OFFSET(px); 00414 00415 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */ 00416 acc1 = __SMLALDX(x1, c0, acc1); 00417 00418 /* pack x[n-N-3] and x[n-N-4] */ 00419 #ifndef ARM_MATH_BIG_ENDIAN 00420 x1 = __PKHBT(x0, x2, 0); 00421 #else 00422 x1 = __PKHBT(x2, x0, 0); 00423 #endif 00424 00425 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */ 00426 acc3 = __SMLALDX(x1, c0, acc3); 00427 00428 /* Read coefficients b[N-2], b[N-3] */ 00429 c0 = *__SIMD32(pb)++; 00430 00431 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */ 00432 acc0 = __SMLALD(x2, c0, acc0); 00433 00434 /* Read state x[n-N-6], x[n-N-7] with offset */ 00435 x2 = _SIMD32_OFFSET(px + 2u); 00436 00437 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */ 00438 acc2 = __SMLALD(x0, c0, acc2); 00439 00440 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */ 00441 acc1 = __SMLALDX(x1, c0, acc1); 00442 00443 /* pack x[n-N-5] and x[n-N-6] */ 00444 #ifndef ARM_MATH_BIG_ENDIAN 00445 x1 = __PKHBT(x2, x0, 0); 00446 #else 00447 x1 = __PKHBT(x0, x2, 0); 00448 #endif 00449 00450 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */ 00451 acc3 = __SMLALDX(x1, c0, acc3); 00452 00453 /* Update state pointer for next state reading */ 00454 px += 4u; 00455 00456 /* Decrement tap count */ 00457 tapCnt--; 00458 00459 } 00460 00461 /* If the filter length is not a multiple of 4, compute the remaining filter taps. 00462 ** This is always be 2 taps since the filter length is even. */ 00463 if((numTaps & 0x3u) != 0u) 00464 { 00465 00466 /* Read last two coefficients */ 00467 c0 = *__SIMD32(pb)++; 00468 00469 /* Perform the multiply-accumulates */ 00470 acc0 = __SMLALD(x0, c0, acc0); 00471 acc2 = __SMLALD(x2, c0, acc2); 00472 00473 /* pack state variables */ 00474 #ifndef ARM_MATH_BIG_ENDIAN 00475 x1 = __PKHBT(x2, x0, 0); 00476 #else 00477 x1 = __PKHBT(x0, x2, 0); 00478 #endif 00479 00480 /* Read last state variables */ 00481 x0 = *__SIMD32(px); 00482 00483 /* Perform the multiply-accumulates */ 00484 acc1 = __SMLALDX(x1, c0, acc1); 00485 00486 /* pack state variables */ 00487 #ifndef ARM_MATH_BIG_ENDIAN 00488 x1 = __PKHBT(x0, x2, 0); 00489 #else 00490 x1 = __PKHBT(x2, x0, 0); 00491 #endif 00492 00493 /* Perform the multiply-accumulates */ 00494 acc3 = __SMLALDX(x1, c0, acc3); 00495 } 00496 00497 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation. 00498 ** Then store the 4 outputs in the destination buffer. */ 00499 00500 #ifndef ARM_MATH_BIG_ENDIAN 00501 00502 *__SIMD32(pDst)++ = 00503 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 00504 00505 *__SIMD32(pDst)++ = 00506 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 00507 00508 #else 00509 00510 *__SIMD32(pDst)++ = 00511 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16); 00512 00513 *__SIMD32(pDst)++ = 00514 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16); 00515 00516 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00517 00518 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00519 pState = pState + 4; 00520 00521 /* Decrement the loop counter */ 00522 blkCnt--; 00523 } 00524 00525 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00526 ** No loop unrolling is used. */ 00527 blkCnt = blockSize % 0x4u; 00528 while(blkCnt > 0u) 00529 { 00530 /* Copy two samples into state buffer */ 00531 *pStateCurnt++ = *pSrc++; 00532 00533 /* Set the accumulator to zero */ 00534 acc0 = 0; 00535 00536 /* Use SIMD to hold states and coefficients */ 00537 px = pState; 00538 pb = pCoeffs; 00539 00540 tapCnt = numTaps >> 1u; 00541 00542 do 00543 { 00544 acc0 += (q31_t) * px++ * *pb++; 00545 acc0 += (q31_t) * px++ * *pb++; 00546 tapCnt--; 00547 } 00548 while(tapCnt > 0u); 00549 00550 /* The result is in 2.30 format. Convert to 1.15 with saturation. 00551 ** Then store the output in the destination buffer. */ 00552 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16)); 00553 00554 /* Advance state pointer by 1 for the next sample */ 00555 pState = pState + 1u; 00556 00557 /* Decrement the loop counter */ 00558 blkCnt--; 00559 } 00560 00561 /* Processing is complete. 00562 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00563 ** This prepares the state buffer for the next function call. */ 00564 00565 /* Points to the start of the state buffer */ 00566 pStateCurnt = S->pState; 00567 00568 /* Calculation of count for copying integer writes */ 00569 tapCnt = (numTaps - 1u) >> 2; 00570 00571 while(tapCnt > 0u) 00572 { 00573 *pStateCurnt++ = *pState++; 00574 *pStateCurnt++ = *pState++; 00575 *pStateCurnt++ = *pState++; 00576 *pStateCurnt++ = *pState++; 00577 00578 tapCnt--; 00579 00580 } 00581 00582 /* Calculation of count for remaining q15_t data */ 00583 tapCnt = (numTaps - 1u) % 0x4u; 00584 00585 /* copy remaining data */ 00586 while(tapCnt > 0u) 00587 { 00588 *pStateCurnt++ = *pState++; 00589 00590 /* Decrement the loop counter */ 00591 tapCnt--; 00592 } 00593 } 00594 00595 00596 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00597 00598 #else /* ARM_MATH_CM0_FAMILY */ 00599 00600 00601 /* Run the below code for Cortex-M0 */ 00602 00603 void arm_fir_q15( 00604 const arm_fir_instance_q15 * S, 00605 q15_t * pSrc, 00606 q15_t * pDst, 00607 uint32_t blockSize) 00608 { 00609 q15_t *pState = S->pState; /* State pointer */ 00610 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00611 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00612 00613 00614 00615 q15_t *px; /* Temporary pointer for state buffer */ 00616 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00617 q63_t acc; /* Accumulator */ 00618 uint32_t numTaps = S->numTaps; /* Number of nTaps in the filter */ 00619 uint32_t tapCnt, blkCnt; /* Loop counters */ 00620 00621 /* S->pState buffer contains previous frame (numTaps - 1) samples */ 00622 /* pStateCurnt points to the location where the new input data should be written */ 00623 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00624 00625 /* Initialize blkCnt with blockSize */ 00626 blkCnt = blockSize; 00627 00628 while(blkCnt > 0u) 00629 { 00630 /* Copy one sample at a time into state buffer */ 00631 *pStateCurnt++ = *pSrc++; 00632 00633 /* Set the accumulator to zero */ 00634 acc = 0; 00635 00636 /* Initialize state pointer */ 00637 px = pState; 00638 00639 /* Initialize Coefficient pointer */ 00640 pb = pCoeffs; 00641 00642 tapCnt = numTaps; 00643 00644 /* Perform the multiply-accumulates */ 00645 do 00646 { 00647 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */ 00648 acc += (q31_t) * px++ * *pb++; 00649 tapCnt--; 00650 } while(tapCnt > 0u); 00651 00652 /* The result is in 2.30 format. Convert to 1.15 00653 ** Then store the output in the destination buffer. */ 00654 *pDst++ = (q15_t) __SSAT((acc >> 15u), 16); 00655 00656 /* Advance state pointer by 1 for the next sample */ 00657 pState = pState + 1; 00658 00659 /* Decrement the samples loop counter */ 00660 blkCnt--; 00661 } 00662 00663 /* Processing is complete. 00664 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00665 ** This prepares the state buffer for the next function call. */ 00666 00667 /* Points to the start of the state buffer */ 00668 pStateCurnt = S->pState; 00669 00670 /* Copy numTaps number of values */ 00671 tapCnt = (numTaps - 1u); 00672 00673 /* copy data */ 00674 while(tapCnt > 0u) 00675 { 00676 *pStateCurnt++ = *pState++; 00677 00678 /* Decrement the loop counter */ 00679 tapCnt--; 00680 } 00681 00682 } 00683 00684 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00685 00686 00687 00688 00689 /** 00690 * @} end of FIR group 00691 */
Generated on Tue Jul 12 2022 12:36:55 by 1.7.2