CMSIS DSP library
Dependents: performance_timer Surfboard_ gps2rtty Capstone ... more
arm_fir_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_f32.c 00009 * 00010 * Description: Floating-point FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @defgroup FIR Finite Impulse Response (FIR) Filters 00049 * 00050 * This set of functions implements Finite Impulse Response (FIR) filters 00051 * for Q7, Q15, Q31, and floating-point data types. Fast versions of Q15 and Q31 are also provided. 00052 * The functions operate on blocks of input and output data and each call to the function processes 00053 * <code>blockSize</code> samples through the filter. <code>pSrc</code> and 00054 * <code>pDst</code> points to input and output arrays containing <code>blockSize</code> values. 00055 * 00056 * \par Algorithm: 00057 * The FIR filter algorithm is based upon a sequence of multiply-accumulate (MAC) operations. 00058 * Each filter coefficient <code>b[n]</code> is multiplied by a state variable which equals a previous input sample <code>x[n]</code>. 00059 * <pre> 00060 * y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1] 00061 * </pre> 00062 * \par 00063 * \image html FIR.gif "Finite Impulse Response filter" 00064 * \par 00065 * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>. 00066 * Coefficients are stored in time reversed order. 00067 * \par 00068 * <pre> 00069 * {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]} 00070 * </pre> 00071 * \par 00072 * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>. 00073 * Samples in the state buffer are stored in the following order. 00074 * \par 00075 * <pre> 00076 * {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]} 00077 * </pre> 00078 * \par 00079 * Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code>. 00080 * The increased state buffer length allows circular addressing, which is traditionally used in the FIR filters, 00081 * to be avoided and yields a significant speed improvement. 00082 * The state variables are updated after each block of data is processed; the coefficients are untouched. 00083 * \par Instance Structure 00084 * The coefficients and state variables for a filter are stored together in an instance data structure. 00085 * A separate instance structure must be defined for each filter. 00086 * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared. 00087 * There are separate instance structure declarations for each of the 4 supported data types. 00088 * 00089 * \par Initialization Functions 00090 * There is also an associated initialization function for each data type. 00091 * The initialization function performs the following operations: 00092 * - Sets the values of the internal structure fields. 00093 * - Zeros out the values in the state buffer. 00094 * To do this manually without calling the init function, assign the follow subfields of the instance structure: 00095 * numTaps, pCoeffs, pState. Also set all of the values in pState to zero. 00096 * 00097 * \par 00098 * Use of the initialization function is optional. 00099 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. 00100 * To place an instance structure into a const data section, the instance structure must be manually initialized. 00101 * Set the values in the state buffer to zeros before static initialization. 00102 * The code below statically initializes each of the 4 different data type filter instance structures 00103 * <pre> 00104 *arm_fir_instance_f32 S = {numTaps, pState, pCoeffs}; 00105 *arm_fir_instance_q31 S = {numTaps, pState, pCoeffs}; 00106 *arm_fir_instance_q15 S = {numTaps, pState, pCoeffs}; 00107 *arm_fir_instance_q7 S = {numTaps, pState, pCoeffs}; 00108 * </pre> 00109 * 00110 * where <code>numTaps</code> is the number of filter coefficients in the filter; <code>pState</code> is the address of the state buffer; 00111 * <code>pCoeffs</code> is the address of the coefficient buffer. 00112 * 00113 * \par Fixed-Point Behavior 00114 * Care must be taken when using the fixed-point versions of the FIR filter functions. 00115 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. 00116 * Refer to the function specific documentation below for usage guidelines. 00117 */ 00118 00119 /** 00120 * @addtogroup FIR 00121 * @{ 00122 */ 00123 00124 /** 00125 * 00126 * @param[in] *S points to an instance of the floating-point FIR filter structure. 00127 * @param[in] *pSrc points to the block of input data. 00128 * @param[out] *pDst points to the block of output data. 00129 * @param[in] blockSize number of samples to process per call. 00130 * @return none. 00131 * 00132 */ 00133 00134 #if defined(ARM_MATH_CM7) 00135 00136 void arm_fir_f32( 00137 const arm_fir_instance_f32 * S, 00138 float32_t * pSrc, 00139 float32_t * pDst, 00140 uint32_t blockSize) 00141 { 00142 float32_t *pState = S->pState; /* State pointer */ 00143 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00144 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00145 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00146 float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; /* Accumulators */ 00147 float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0; /* Temporary variables to hold state and coefficient values */ 00148 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00149 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00150 00151 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00152 /* pStateCurnt points to the location where the new input data should be written */ 00153 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00154 00155 /* Apply loop unrolling and compute 8 output values simultaneously. 00156 * The variables acc0 ... acc7 hold output values that are being computed: 00157 * 00158 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00159 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00160 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00161 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00162 */ 00163 blkCnt = blockSize >> 3; 00164 00165 /* First part of the processing with loop unrolling. Compute 8 outputs at a time. 00166 ** a second loop below computes the remaining 1 to 7 samples. */ 00167 while(blkCnt > 0u) 00168 { 00169 /* Copy four new input samples into the state buffer */ 00170 *pStateCurnt++ = *pSrc++; 00171 *pStateCurnt++ = *pSrc++; 00172 *pStateCurnt++ = *pSrc++; 00173 *pStateCurnt++ = *pSrc++; 00174 00175 /* Set all accumulators to zero */ 00176 acc0 = 0.0f; 00177 acc1 = 0.0f; 00178 acc2 = 0.0f; 00179 acc3 = 0.0f; 00180 acc4 = 0.0f; 00181 acc5 = 0.0f; 00182 acc6 = 0.0f; 00183 acc7 = 0.0f; 00184 00185 /* Initialize state pointer */ 00186 px = pState; 00187 00188 /* Initialize coeff pointer */ 00189 pb = (pCoeffs); 00190 00191 /* This is separated from the others to avoid 00192 * a call to __aeabi_memmove which would be slower 00193 */ 00194 *pStateCurnt++ = *pSrc++; 00195 *pStateCurnt++ = *pSrc++; 00196 *pStateCurnt++ = *pSrc++; 00197 *pStateCurnt++ = *pSrc++; 00198 00199 /* Read the first seven samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 00200 x0 = *px++; 00201 x1 = *px++; 00202 x2 = *px++; 00203 x3 = *px++; 00204 x4 = *px++; 00205 x5 = *px++; 00206 x6 = *px++; 00207 00208 /* Loop unrolling. Process 8 taps at a time. */ 00209 tapCnt = numTaps >> 3u; 00210 00211 /* Loop over the number of taps. Unroll by a factor of 8. 00212 ** Repeat until we've computed numTaps-8 coefficients. */ 00213 while(tapCnt > 0u) 00214 { 00215 /* Read the b[numTaps-1] coefficient */ 00216 c0 = *(pb++); 00217 00218 /* Read x[n-numTaps-3] sample */ 00219 x7 = *(px++); 00220 00221 /* acc0 += b[numTaps-1] * x[n-numTaps] */ 00222 acc0 += x0 * c0; 00223 00224 /* acc1 += b[numTaps-1] * x[n-numTaps-1] */ 00225 acc1 += x1 * c0; 00226 00227 /* acc2 += b[numTaps-1] * x[n-numTaps-2] */ 00228 acc2 += x2 * c0; 00229 00230 /* acc3 += b[numTaps-1] * x[n-numTaps-3] */ 00231 acc3 += x3 * c0; 00232 00233 /* acc4 += b[numTaps-1] * x[n-numTaps-4] */ 00234 acc4 += x4 * c0; 00235 00236 /* acc1 += b[numTaps-1] * x[n-numTaps-5] */ 00237 acc5 += x5 * c0; 00238 00239 /* acc2 += b[numTaps-1] * x[n-numTaps-6] */ 00240 acc6 += x6 * c0; 00241 00242 /* acc3 += b[numTaps-1] * x[n-numTaps-7] */ 00243 acc7 += x7 * c0; 00244 00245 /* Read the b[numTaps-2] coefficient */ 00246 c0 = *(pb++); 00247 00248 /* Read x[n-numTaps-4] sample */ 00249 x0 = *(px++); 00250 00251 /* Perform the multiply-accumulate */ 00252 acc0 += x1 * c0; 00253 acc1 += x2 * c0; 00254 acc2 += x3 * c0; 00255 acc3 += x4 * c0; 00256 acc4 += x5 * c0; 00257 acc5 += x6 * c0; 00258 acc6 += x7 * c0; 00259 acc7 += x0 * c0; 00260 00261 /* Read the b[numTaps-3] coefficient */ 00262 c0 = *(pb++); 00263 00264 /* Read x[n-numTaps-5] sample */ 00265 x1 = *(px++); 00266 00267 /* Perform the multiply-accumulates */ 00268 acc0 += x2 * c0; 00269 acc1 += x3 * c0; 00270 acc2 += x4 * c0; 00271 acc3 += x5 * c0; 00272 acc4 += x6 * c0; 00273 acc5 += x7 * c0; 00274 acc6 += x0 * c0; 00275 acc7 += x1 * c0; 00276 00277 /* Read the b[numTaps-4] coefficient */ 00278 c0 = *(pb++); 00279 00280 /* Read x[n-numTaps-6] sample */ 00281 x2 = *(px++); 00282 00283 /* Perform the multiply-accumulates */ 00284 acc0 += x3 * c0; 00285 acc1 += x4 * c0; 00286 acc2 += x5 * c0; 00287 acc3 += x6 * c0; 00288 acc4 += x7 * c0; 00289 acc5 += x0 * c0; 00290 acc6 += x1 * c0; 00291 acc7 += x2 * c0; 00292 00293 /* Read the b[numTaps-4] coefficient */ 00294 c0 = *(pb++); 00295 00296 /* Read x[n-numTaps-6] sample */ 00297 x3 = *(px++); 00298 /* Perform the multiply-accumulates */ 00299 acc0 += x4 * c0; 00300 acc1 += x5 * c0; 00301 acc2 += x6 * c0; 00302 acc3 += x7 * c0; 00303 acc4 += x0 * c0; 00304 acc5 += x1 * c0; 00305 acc6 += x2 * c0; 00306 acc7 += x3 * c0; 00307 00308 /* Read the b[numTaps-4] coefficient */ 00309 c0 = *(pb++); 00310 00311 /* Read x[n-numTaps-6] sample */ 00312 x4 = *(px++); 00313 00314 /* Perform the multiply-accumulates */ 00315 acc0 += x5 * c0; 00316 acc1 += x6 * c0; 00317 acc2 += x7 * c0; 00318 acc3 += x0 * c0; 00319 acc4 += x1 * c0; 00320 acc5 += x2 * c0; 00321 acc6 += x3 * c0; 00322 acc7 += x4 * c0; 00323 00324 /* Read the b[numTaps-4] coefficient */ 00325 c0 = *(pb++); 00326 00327 /* Read x[n-numTaps-6] sample */ 00328 x5 = *(px++); 00329 00330 /* Perform the multiply-accumulates */ 00331 acc0 += x6 * c0; 00332 acc1 += x7 * c0; 00333 acc2 += x0 * c0; 00334 acc3 += x1 * c0; 00335 acc4 += x2 * c0; 00336 acc5 += x3 * c0; 00337 acc6 += x4 * c0; 00338 acc7 += x5 * c0; 00339 00340 /* Read the b[numTaps-4] coefficient */ 00341 c0 = *(pb++); 00342 00343 /* Read x[n-numTaps-6] sample */ 00344 x6 = *(px++); 00345 00346 /* Perform the multiply-accumulates */ 00347 acc0 += x7 * c0; 00348 acc1 += x0 * c0; 00349 acc2 += x1 * c0; 00350 acc3 += x2 * c0; 00351 acc4 += x3 * c0; 00352 acc5 += x4 * c0; 00353 acc6 += x5 * c0; 00354 acc7 += x6 * c0; 00355 00356 tapCnt--; 00357 } 00358 00359 /* If the filter length is not a multiple of 8, compute the remaining filter taps */ 00360 tapCnt = numTaps % 0x8u; 00361 00362 while(tapCnt > 0u) 00363 { 00364 /* Read coefficients */ 00365 c0 = *(pb++); 00366 00367 /* Fetch 1 state variable */ 00368 x7 = *(px++); 00369 00370 /* Perform the multiply-accumulates */ 00371 acc0 += x0 * c0; 00372 acc1 += x1 * c0; 00373 acc2 += x2 * c0; 00374 acc3 += x3 * c0; 00375 acc4 += x4 * c0; 00376 acc5 += x5 * c0; 00377 acc6 += x6 * c0; 00378 acc7 += x7 * c0; 00379 00380 /* Reuse the present sample states for next sample */ 00381 x0 = x1; 00382 x1 = x2; 00383 x2 = x3; 00384 x3 = x4; 00385 x4 = x5; 00386 x5 = x6; 00387 x6 = x7; 00388 00389 /* Decrement the loop counter */ 00390 tapCnt--; 00391 } 00392 00393 /* Advance the state pointer by 8 to process the next group of 8 samples */ 00394 pState = pState + 8; 00395 00396 /* The results in the 8 accumulators, store in the destination buffer. */ 00397 *pDst++ = acc0; 00398 *pDst++ = acc1; 00399 *pDst++ = acc2; 00400 *pDst++ = acc3; 00401 *pDst++ = acc4; 00402 *pDst++ = acc5; 00403 *pDst++ = acc6; 00404 *pDst++ = acc7; 00405 00406 blkCnt--; 00407 } 00408 00409 /* If the blockSize is not a multiple of 8, compute any remaining output samples here. 00410 ** No loop unrolling is used. */ 00411 blkCnt = blockSize % 0x8u; 00412 00413 while(blkCnt > 0u) 00414 { 00415 /* Copy one sample at a time into state buffer */ 00416 *pStateCurnt++ = *pSrc++; 00417 00418 /* Set the accumulator to zero */ 00419 acc0 = 0.0f; 00420 00421 /* Initialize state pointer */ 00422 px = pState; 00423 00424 /* Initialize Coefficient pointer */ 00425 pb = (pCoeffs); 00426 00427 i = numTaps; 00428 00429 /* Perform the multiply-accumulates */ 00430 do 00431 { 00432 acc0 += *px++ * *pb++; 00433 i--; 00434 00435 } while(i > 0u); 00436 00437 /* The result is store in the destination buffer. */ 00438 *pDst++ = acc0; 00439 00440 /* Advance state pointer by 1 for the next sample */ 00441 pState = pState + 1; 00442 00443 blkCnt--; 00444 } 00445 00446 /* Processing is complete. 00447 ** Now copy the last numTaps - 1 samples to the start of the state buffer. 00448 ** This prepares the state buffer for the next function call. */ 00449 00450 /* Points to the start of the state buffer */ 00451 pStateCurnt = S->pState; 00452 00453 tapCnt = (numTaps - 1u) >> 2u; 00454 00455 /* copy data */ 00456 while(tapCnt > 0u) 00457 { 00458 *pStateCurnt++ = *pState++; 00459 *pStateCurnt++ = *pState++; 00460 *pStateCurnt++ = *pState++; 00461 *pStateCurnt++ = *pState++; 00462 00463 /* Decrement the loop counter */ 00464 tapCnt--; 00465 } 00466 00467 /* Calculate remaining number of copies */ 00468 tapCnt = (numTaps - 1u) % 0x4u; 00469 00470 /* Copy the remaining q31_t data */ 00471 while(tapCnt > 0u) 00472 { 00473 *pStateCurnt++ = *pState++; 00474 00475 /* Decrement the loop counter */ 00476 tapCnt--; 00477 } 00478 } 00479 00480 #elif defined(ARM_MATH_CM0_FAMILY) 00481 00482 void arm_fir_f32( 00483 const arm_fir_instance_f32 * S, 00484 float32_t * pSrc, 00485 float32_t * pDst, 00486 uint32_t blockSize) 00487 { 00488 float32_t *pState = S->pState; /* State pointer */ 00489 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00490 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00491 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00492 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00493 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00494 00495 /* Run the below code for Cortex-M0 */ 00496 00497 float32_t acc; 00498 00499 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00500 /* pStateCurnt points to the location where the new input data should be written */ 00501 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00502 00503 /* Initialize blkCnt with blockSize */ 00504 blkCnt = blockSize; 00505 00506 while(blkCnt > 0u) 00507 { 00508 /* Copy one sample at a time into state buffer */ 00509 *pStateCurnt++ = *pSrc++; 00510 00511 /* Set the accumulator to zero */ 00512 acc = 0.0f; 00513 00514 /* Initialize state pointer */ 00515 px = pState; 00516 00517 /* Initialize Coefficient pointer */ 00518 pb = pCoeffs; 00519 00520 i = numTaps; 00521 00522 /* Perform the multiply-accumulates */ 00523 do 00524 { 00525 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */ 00526 acc += *px++ * *pb++; 00527 i--; 00528 00529 } while(i > 0u); 00530 00531 /* The result is store in the destination buffer. */ 00532 *pDst++ = acc; 00533 00534 /* Advance state pointer by 1 for the next sample */ 00535 pState = pState + 1; 00536 00537 blkCnt--; 00538 } 00539 00540 /* Processing is complete. 00541 ** Now copy the last numTaps - 1 samples to the starting of the state buffer. 00542 ** This prepares the state buffer for the next function call. */ 00543 00544 /* Points to the start of the state buffer */ 00545 pStateCurnt = S->pState; 00546 00547 /* Copy numTaps number of values */ 00548 tapCnt = numTaps - 1u; 00549 00550 /* Copy data */ 00551 while(tapCnt > 0u) 00552 { 00553 *pStateCurnt++ = *pState++; 00554 00555 /* Decrement the loop counter */ 00556 tapCnt--; 00557 } 00558 00559 } 00560 00561 #else 00562 00563 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00564 00565 void arm_fir_f32( 00566 const arm_fir_instance_f32 * S, 00567 float32_t * pSrc, 00568 float32_t * pDst, 00569 uint32_t blockSize) 00570 { 00571 float32_t *pState = S->pState; /* State pointer */ 00572 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00573 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00574 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00575 float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; /* Accumulators */ 00576 float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0; /* Temporary variables to hold state and coefficient values */ 00577 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00578 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00579 float32_t p0,p1,p2,p3,p4,p5,p6,p7; /* Temporary product values */ 00580 00581 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00582 /* pStateCurnt points to the location where the new input data should be written */ 00583 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00584 00585 /* Apply loop unrolling and compute 8 output values simultaneously. 00586 * The variables acc0 ... acc7 hold output values that are being computed: 00587 * 00588 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00589 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00590 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00591 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00592 */ 00593 blkCnt = blockSize >> 3; 00594 00595 /* First part of the processing with loop unrolling. Compute 8 outputs at a time. 00596 ** a second loop below computes the remaining 1 to 7 samples. */ 00597 while(blkCnt > 0u) 00598 { 00599 /* Copy four new input samples into the state buffer */ 00600 *pStateCurnt++ = *pSrc++; 00601 *pStateCurnt++ = *pSrc++; 00602 *pStateCurnt++ = *pSrc++; 00603 *pStateCurnt++ = *pSrc++; 00604 00605 /* Set all accumulators to zero */ 00606 acc0 = 0.0f; 00607 acc1 = 0.0f; 00608 acc2 = 0.0f; 00609 acc3 = 0.0f; 00610 acc4 = 0.0f; 00611 acc5 = 0.0f; 00612 acc6 = 0.0f; 00613 acc7 = 0.0f; 00614 00615 /* Initialize state pointer */ 00616 px = pState; 00617 00618 /* Initialize coeff pointer */ 00619 pb = (pCoeffs); 00620 00621 /* This is separated from the others to avoid 00622 * a call to __aeabi_memmove which would be slower 00623 */ 00624 *pStateCurnt++ = *pSrc++; 00625 *pStateCurnt++ = *pSrc++; 00626 *pStateCurnt++ = *pSrc++; 00627 *pStateCurnt++ = *pSrc++; 00628 00629 /* Read the first seven samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 00630 x0 = *px++; 00631 x1 = *px++; 00632 x2 = *px++; 00633 x3 = *px++; 00634 x4 = *px++; 00635 x5 = *px++; 00636 x6 = *px++; 00637 00638 /* Loop unrolling. Process 8 taps at a time. */ 00639 tapCnt = numTaps >> 3u; 00640 00641 /* Loop over the number of taps. Unroll by a factor of 8. 00642 ** Repeat until we've computed numTaps-8 coefficients. */ 00643 while(tapCnt > 0u) 00644 { 00645 /* Read the b[numTaps-1] coefficient */ 00646 c0 = *(pb++); 00647 00648 /* Read x[n-numTaps-3] sample */ 00649 x7 = *(px++); 00650 00651 /* acc0 += b[numTaps-1] * x[n-numTaps] */ 00652 p0 = x0 * c0; 00653 00654 /* acc1 += b[numTaps-1] * x[n-numTaps-1] */ 00655 p1 = x1 * c0; 00656 00657 /* acc2 += b[numTaps-1] * x[n-numTaps-2] */ 00658 p2 = x2 * c0; 00659 00660 /* acc3 += b[numTaps-1] * x[n-numTaps-3] */ 00661 p3 = x3 * c0; 00662 00663 /* acc4 += b[numTaps-1] * x[n-numTaps-4] */ 00664 p4 = x4 * c0; 00665 00666 /* acc1 += b[numTaps-1] * x[n-numTaps-5] */ 00667 p5 = x5 * c0; 00668 00669 /* acc2 += b[numTaps-1] * x[n-numTaps-6] */ 00670 p6 = x6 * c0; 00671 00672 /* acc3 += b[numTaps-1] * x[n-numTaps-7] */ 00673 p7 = x7 * c0; 00674 00675 /* Read the b[numTaps-2] coefficient */ 00676 c0 = *(pb++); 00677 00678 /* Read x[n-numTaps-4] sample */ 00679 x0 = *(px++); 00680 00681 acc0 += p0; 00682 acc1 += p1; 00683 acc2 += p2; 00684 acc3 += p3; 00685 acc4 += p4; 00686 acc5 += p5; 00687 acc6 += p6; 00688 acc7 += p7; 00689 00690 00691 /* Perform the multiply-accumulate */ 00692 p0 = x1 * c0; 00693 p1 = x2 * c0; 00694 p2 = x3 * c0; 00695 p3 = x4 * c0; 00696 p4 = x5 * c0; 00697 p5 = x6 * c0; 00698 p6 = x7 * c0; 00699 p7 = x0 * c0; 00700 00701 /* Read the b[numTaps-3] coefficient */ 00702 c0 = *(pb++); 00703 00704 /* Read x[n-numTaps-5] sample */ 00705 x1 = *(px++); 00706 00707 acc0 += p0; 00708 acc1 += p1; 00709 acc2 += p2; 00710 acc3 += p3; 00711 acc4 += p4; 00712 acc5 += p5; 00713 acc6 += p6; 00714 acc7 += p7; 00715 00716 /* Perform the multiply-accumulates */ 00717 p0 = x2 * c0; 00718 p1 = x3 * c0; 00719 p2 = x4 * c0; 00720 p3 = x5 * c0; 00721 p4 = x6 * c0; 00722 p5 = x7 * c0; 00723 p6 = x0 * c0; 00724 p7 = x1 * c0; 00725 00726 /* Read the b[numTaps-4] coefficient */ 00727 c0 = *(pb++); 00728 00729 /* Read x[n-numTaps-6] sample */ 00730 x2 = *(px++); 00731 00732 acc0 += p0; 00733 acc1 += p1; 00734 acc2 += p2; 00735 acc3 += p3; 00736 acc4 += p4; 00737 acc5 += p5; 00738 acc6 += p6; 00739 acc7 += p7; 00740 00741 /* Perform the multiply-accumulates */ 00742 p0 = x3 * c0; 00743 p1 = x4 * c0; 00744 p2 = x5 * c0; 00745 p3 = x6 * c0; 00746 p4 = x7 * c0; 00747 p5 = x0 * c0; 00748 p6 = x1 * c0; 00749 p7 = x2 * c0; 00750 00751 /* Read the b[numTaps-4] coefficient */ 00752 c0 = *(pb++); 00753 00754 /* Read x[n-numTaps-6] sample */ 00755 x3 = *(px++); 00756 00757 acc0 += p0; 00758 acc1 += p1; 00759 acc2 += p2; 00760 acc3 += p3; 00761 acc4 += p4; 00762 acc5 += p5; 00763 acc6 += p6; 00764 acc7 += p7; 00765 00766 /* Perform the multiply-accumulates */ 00767 p0 = x4 * c0; 00768 p1 = x5 * c0; 00769 p2 = x6 * c0; 00770 p3 = x7 * c0; 00771 p4 = x0 * c0; 00772 p5 = x1 * c0; 00773 p6 = x2 * c0; 00774 p7 = x3 * c0; 00775 00776 /* Read the b[numTaps-4] coefficient */ 00777 c0 = *(pb++); 00778 00779 /* Read x[n-numTaps-6] sample */ 00780 x4 = *(px++); 00781 00782 acc0 += p0; 00783 acc1 += p1; 00784 acc2 += p2; 00785 acc3 += p3; 00786 acc4 += p4; 00787 acc5 += p5; 00788 acc6 += p6; 00789 acc7 += p7; 00790 00791 /* Perform the multiply-accumulates */ 00792 p0 = x5 * c0; 00793 p1 = x6 * c0; 00794 p2 = x7 * c0; 00795 p3 = x0 * c0; 00796 p4 = x1 * c0; 00797 p5 = x2 * c0; 00798 p6 = x3 * c0; 00799 p7 = x4 * c0; 00800 00801 /* Read the b[numTaps-4] coefficient */ 00802 c0 = *(pb++); 00803 00804 /* Read x[n-numTaps-6] sample */ 00805 x5 = *(px++); 00806 00807 acc0 += p0; 00808 acc1 += p1; 00809 acc2 += p2; 00810 acc3 += p3; 00811 acc4 += p4; 00812 acc5 += p5; 00813 acc6 += p6; 00814 acc7 += p7; 00815 00816 /* Perform the multiply-accumulates */ 00817 p0 = x6 * c0; 00818 p1 = x7 * c0; 00819 p2 = x0 * c0; 00820 p3 = x1 * c0; 00821 p4 = x2 * c0; 00822 p5 = x3 * c0; 00823 p6 = x4 * c0; 00824 p7 = x5 * c0; 00825 00826 /* Read the b[numTaps-4] coefficient */ 00827 c0 = *(pb++); 00828 00829 /* Read x[n-numTaps-6] sample */ 00830 x6 = *(px++); 00831 00832 acc0 += p0; 00833 acc1 += p1; 00834 acc2 += p2; 00835 acc3 += p3; 00836 acc4 += p4; 00837 acc5 += p5; 00838 acc6 += p6; 00839 acc7 += p7; 00840 00841 /* Perform the multiply-accumulates */ 00842 p0 = x7 * c0; 00843 p1 = x0 * c0; 00844 p2 = x1 * c0; 00845 p3 = x2 * c0; 00846 p4 = x3 * c0; 00847 p5 = x4 * c0; 00848 p6 = x5 * c0; 00849 p7 = x6 * c0; 00850 00851 tapCnt--; 00852 00853 acc0 += p0; 00854 acc1 += p1; 00855 acc2 += p2; 00856 acc3 += p3; 00857 acc4 += p4; 00858 acc5 += p5; 00859 acc6 += p6; 00860 acc7 += p7; 00861 } 00862 00863 /* If the filter length is not a multiple of 8, compute the remaining filter taps */ 00864 tapCnt = numTaps % 0x8u; 00865 00866 while(tapCnt > 0u) 00867 { 00868 /* Read coefficients */ 00869 c0 = *(pb++); 00870 00871 /* Fetch 1 state variable */ 00872 x7 = *(px++); 00873 00874 /* Perform the multiply-accumulates */ 00875 p0 = x0 * c0; 00876 p1 = x1 * c0; 00877 p2 = x2 * c0; 00878 p3 = x3 * c0; 00879 p4 = x4 * c0; 00880 p5 = x5 * c0; 00881 p6 = x6 * c0; 00882 p7 = x7 * c0; 00883 00884 /* Reuse the present sample states for next sample */ 00885 x0 = x1; 00886 x1 = x2; 00887 x2 = x3; 00888 x3 = x4; 00889 x4 = x5; 00890 x5 = x6; 00891 x6 = x7; 00892 00893 acc0 += p0; 00894 acc1 += p1; 00895 acc2 += p2; 00896 acc3 += p3; 00897 acc4 += p4; 00898 acc5 += p5; 00899 acc6 += p6; 00900 acc7 += p7; 00901 00902 /* Decrement the loop counter */ 00903 tapCnt--; 00904 } 00905 00906 /* Advance the state pointer by 8 to process the next group of 8 samples */ 00907 pState = pState + 8; 00908 00909 /* The results in the 8 accumulators, store in the destination buffer. */ 00910 *pDst++ = acc0; 00911 *pDst++ = acc1; 00912 *pDst++ = acc2; 00913 *pDst++ = acc3; 00914 *pDst++ = acc4; 00915 *pDst++ = acc5; 00916 *pDst++ = acc6; 00917 *pDst++ = acc7; 00918 00919 blkCnt--; 00920 } 00921 00922 /* If the blockSize is not a multiple of 8, compute any remaining output samples here. 00923 ** No loop unrolling is used. */ 00924 blkCnt = blockSize % 0x8u; 00925 00926 while(blkCnt > 0u) 00927 { 00928 /* Copy one sample at a time into state buffer */ 00929 *pStateCurnt++ = *pSrc++; 00930 00931 /* Set the accumulator to zero */ 00932 acc0 = 0.0f; 00933 00934 /* Initialize state pointer */ 00935 px = pState; 00936 00937 /* Initialize Coefficient pointer */ 00938 pb = (pCoeffs); 00939 00940 i = numTaps; 00941 00942 /* Perform the multiply-accumulates */ 00943 do 00944 { 00945 acc0 += *px++ * *pb++; 00946 i--; 00947 00948 } while(i > 0u); 00949 00950 /* The result is store in the destination buffer. */ 00951 *pDst++ = acc0; 00952 00953 /* Advance state pointer by 1 for the next sample */ 00954 pState = pState + 1; 00955 00956 blkCnt--; 00957 } 00958 00959 /* Processing is complete. 00960 ** Now copy the last numTaps - 1 samples to the start of the state buffer. 00961 ** This prepares the state buffer for the next function call. */ 00962 00963 /* Points to the start of the state buffer */ 00964 pStateCurnt = S->pState; 00965 00966 tapCnt = (numTaps - 1u) >> 2u; 00967 00968 /* copy data */ 00969 while(tapCnt > 0u) 00970 { 00971 *pStateCurnt++ = *pState++; 00972 *pStateCurnt++ = *pState++; 00973 *pStateCurnt++ = *pState++; 00974 *pStateCurnt++ = *pState++; 00975 00976 /* Decrement the loop counter */ 00977 tapCnt--; 00978 } 00979 00980 /* Calculate remaining number of copies */ 00981 tapCnt = (numTaps - 1u) % 0x4u; 00982 00983 /* Copy the remaining q31_t data */ 00984 while(tapCnt > 0u) 00985 { 00986 *pStateCurnt++ = *pState++; 00987 00988 /* Decrement the loop counter */ 00989 tapCnt--; 00990 } 00991 } 00992 00993 #endif 00994 00995 /** 00996 * @} end of FIR group 00997 */
Generated on Tue Jul 12 2022 11:59:17 by 1.7.2