Aded CMSIS5 DSP and NN folder. Needs some work
Embed:
(wiki syntax)
Show/hide line numbers
arm_fir_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_fir_f32.c 00004 * Description: Floating-point FIR filter processing function 00005 * 00006 * $Date: 27. January 2017 00007 * $Revision: V.1.5.1 00008 * 00009 * Target Processor: Cortex-M cores 00010 * -------------------------------------------------------------------- */ 00011 /* 00012 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00013 * 00014 * SPDX-License-Identifier: Apache-2.0 00015 * 00016 * Licensed under the Apache License, Version 2.0 (the License); you may 00017 * not use this file except in compliance with the License. 00018 * You may obtain a copy of the License at 00019 * 00020 * www.apache.org/licenses/LICENSE-2.0 00021 * 00022 * Unless required by applicable law or agreed to in writing, software 00023 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00024 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00025 * See the License for the specific language governing permissions and 00026 * limitations under the License. 00027 */ 00028 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupFilters 00033 */ 00034 00035 /** 00036 * @defgroup FIR Finite Impulse Response (FIR) Filters 00037 * 00038 * This set of functions implements Finite Impulse Response (FIR) filters 00039 * for Q7, Q15, Q31, and floating-point data types. Fast versions of Q15 and Q31 are also provided. 00040 * The functions operate on blocks of input and output data and each call to the function processes 00041 * <code>blockSize</code> samples through the filter. <code>pSrc</code> and 00042 * <code>pDst</code> points to input and output arrays containing <code>blockSize</code> values. 00043 * 00044 * \par Algorithm: 00045 * The FIR filter algorithm is based upon a sequence of multiply-accumulate (MAC) operations. 00046 * Each filter coefficient <code>b[n]</code> is multiplied by a state variable which equals a previous input sample <code>x[n]</code>. 00047 * <pre> 00048 * y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1] 00049 * </pre> 00050 * \par 00051 * \image html FIR.gif "Finite Impulse Response filter" 00052 * \par 00053 * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>. 00054 * Coefficients are stored in time reversed order. 00055 * \par 00056 * <pre> 00057 * {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]} 00058 * </pre> 00059 * \par 00060 * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>. 00061 * Samples in the state buffer are stored in the following order. 00062 * \par 00063 * <pre> 00064 * {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]} 00065 * </pre> 00066 * \par 00067 * Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code>. 00068 * The increased state buffer length allows circular addressing, which is traditionally used in the FIR filters, 00069 * to be avoided and yields a significant speed improvement. 00070 * The state variables are updated after each block of data is processed; the coefficients are untouched. 00071 * \par Instance Structure 00072 * The coefficients and state variables for a filter are stored together in an instance data structure. 00073 * A separate instance structure must be defined for each filter. 00074 * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared. 00075 * There are separate instance structure declarations for each of the 4 supported data types. 00076 * 00077 * \par Initialization Functions 00078 * There is also an associated initialization function for each data type. 00079 * The initialization function performs the following operations: 00080 * - Sets the values of the internal structure fields. 00081 * - Zeros out the values in the state buffer. 00082 * To do this manually without calling the init function, assign the follow subfields of the instance structure: 00083 * numTaps, pCoeffs, pState. Also set all of the values in pState to zero. 00084 * 00085 * \par 00086 * Use of the initialization function is optional. 00087 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. 00088 * To place an instance structure into a const data section, the instance structure must be manually initialized. 00089 * Set the values in the state buffer to zeros before static initialization. 00090 * The code below statically initializes each of the 4 different data type filter instance structures 00091 * <pre> 00092 *arm_fir_instance_f32 S = {numTaps, pState, pCoeffs}; 00093 *arm_fir_instance_q31 S = {numTaps, pState, pCoeffs}; 00094 *arm_fir_instance_q15 S = {numTaps, pState, pCoeffs}; 00095 *arm_fir_instance_q7 S = {numTaps, pState, pCoeffs}; 00096 * </pre> 00097 * 00098 * where <code>numTaps</code> is the number of filter coefficients in the filter; <code>pState</code> is the address of the state buffer; 00099 * <code>pCoeffs</code> is the address of the coefficient buffer. 00100 * 00101 * \par Fixed-Point Behavior 00102 * Care must be taken when using the fixed-point versions of the FIR filter functions. 00103 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. 00104 * Refer to the function specific documentation below for usage guidelines. 00105 */ 00106 00107 /** 00108 * @addtogroup FIR 00109 * @{ 00110 */ 00111 00112 /** 00113 * 00114 * @param[in] *S points to an instance of the floating-point FIR filter structure. 00115 * @param[in] *pSrc points to the block of input data. 00116 * @param[out] *pDst points to the block of output data. 00117 * @param[in] blockSize number of samples to process per call. 00118 * @return none. 00119 * 00120 */ 00121 00122 #if defined(ARM_MATH_CM7) 00123 00124 void arm_fir_f32( 00125 const arm_fir_instance_f32 * S, 00126 float32_t * pSrc, 00127 float32_t * pDst, 00128 uint32_t blockSize) 00129 { 00130 float32_t *pState = S->pState; /* State pointer */ 00131 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00132 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00133 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00134 float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; /* Accumulators */ 00135 float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0; /* Temporary variables to hold state and coefficient values */ 00136 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00137 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00138 00139 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00140 /* pStateCurnt points to the location where the new input data should be written */ 00141 pStateCurnt = &(S->pState[(numTaps - 1U)]); 00142 00143 /* Apply loop unrolling and compute 8 output values simultaneously. 00144 * The variables acc0 ... acc7 hold output values that are being computed: 00145 * 00146 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00147 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00148 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00149 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00150 */ 00151 blkCnt = blockSize >> 3; 00152 00153 /* First part of the processing with loop unrolling. Compute 8 outputs at a time. 00154 ** a second loop below computes the remaining 1 to 7 samples. */ 00155 while (blkCnt > 0U) 00156 { 00157 /* Copy four new input samples into the state buffer */ 00158 *pStateCurnt++ = *pSrc++; 00159 *pStateCurnt++ = *pSrc++; 00160 *pStateCurnt++ = *pSrc++; 00161 *pStateCurnt++ = *pSrc++; 00162 00163 /* Set all accumulators to zero */ 00164 acc0 = 0.0f; 00165 acc1 = 0.0f; 00166 acc2 = 0.0f; 00167 acc3 = 0.0f; 00168 acc4 = 0.0f; 00169 acc5 = 0.0f; 00170 acc6 = 0.0f; 00171 acc7 = 0.0f; 00172 00173 /* Initialize state pointer */ 00174 px = pState; 00175 00176 /* Initialize coeff pointer */ 00177 pb = (pCoeffs); 00178 00179 /* This is separated from the others to avoid 00180 * a call to __aeabi_memmove which would be slower 00181 */ 00182 *pStateCurnt++ = *pSrc++; 00183 *pStateCurnt++ = *pSrc++; 00184 *pStateCurnt++ = *pSrc++; 00185 *pStateCurnt++ = *pSrc++; 00186 00187 /* Read the first seven samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 00188 x0 = *px++; 00189 x1 = *px++; 00190 x2 = *px++; 00191 x3 = *px++; 00192 x4 = *px++; 00193 x5 = *px++; 00194 x6 = *px++; 00195 00196 /* Loop unrolling. Process 8 taps at a time. */ 00197 tapCnt = numTaps >> 3U; 00198 00199 /* Loop over the number of taps. Unroll by a factor of 8. 00200 ** Repeat until we've computed numTaps-8 coefficients. */ 00201 while (tapCnt > 0U) 00202 { 00203 /* Read the b[numTaps-1] coefficient */ 00204 c0 = *(pb++); 00205 00206 /* Read x[n-numTaps-3] sample */ 00207 x7 = *(px++); 00208 00209 /* acc0 += b[numTaps-1] * x[n-numTaps] */ 00210 acc0 += x0 * c0; 00211 00212 /* acc1 += b[numTaps-1] * x[n-numTaps-1] */ 00213 acc1 += x1 * c0; 00214 00215 /* acc2 += b[numTaps-1] * x[n-numTaps-2] */ 00216 acc2 += x2 * c0; 00217 00218 /* acc3 += b[numTaps-1] * x[n-numTaps-3] */ 00219 acc3 += x3 * c0; 00220 00221 /* acc4 += b[numTaps-1] * x[n-numTaps-4] */ 00222 acc4 += x4 * c0; 00223 00224 /* acc1 += b[numTaps-1] * x[n-numTaps-5] */ 00225 acc5 += x5 * c0; 00226 00227 /* acc2 += b[numTaps-1] * x[n-numTaps-6] */ 00228 acc6 += x6 * c0; 00229 00230 /* acc3 += b[numTaps-1] * x[n-numTaps-7] */ 00231 acc7 += x7 * c0; 00232 00233 /* Read the b[numTaps-2] coefficient */ 00234 c0 = *(pb++); 00235 00236 /* Read x[n-numTaps-4] sample */ 00237 x0 = *(px++); 00238 00239 /* Perform the multiply-accumulate */ 00240 acc0 += x1 * c0; 00241 acc1 += x2 * c0; 00242 acc2 += x3 * c0; 00243 acc3 += x4 * c0; 00244 acc4 += x5 * c0; 00245 acc5 += x6 * c0; 00246 acc6 += x7 * c0; 00247 acc7 += x0 * c0; 00248 00249 /* Read the b[numTaps-3] coefficient */ 00250 c0 = *(pb++); 00251 00252 /* Read x[n-numTaps-5] sample */ 00253 x1 = *(px++); 00254 00255 /* Perform the multiply-accumulates */ 00256 acc0 += x2 * c0; 00257 acc1 += x3 * c0; 00258 acc2 += x4 * c0; 00259 acc3 += x5 * c0; 00260 acc4 += x6 * c0; 00261 acc5 += x7 * c0; 00262 acc6 += x0 * c0; 00263 acc7 += x1 * c0; 00264 00265 /* Read the b[numTaps-4] coefficient */ 00266 c0 = *(pb++); 00267 00268 /* Read x[n-numTaps-6] sample */ 00269 x2 = *(px++); 00270 00271 /* Perform the multiply-accumulates */ 00272 acc0 += x3 * c0; 00273 acc1 += x4 * c0; 00274 acc2 += x5 * c0; 00275 acc3 += x6 * c0; 00276 acc4 += x7 * c0; 00277 acc5 += x0 * c0; 00278 acc6 += x1 * c0; 00279 acc7 += x2 * c0; 00280 00281 /* Read the b[numTaps-4] coefficient */ 00282 c0 = *(pb++); 00283 00284 /* Read x[n-numTaps-6] sample */ 00285 x3 = *(px++); 00286 /* Perform the multiply-accumulates */ 00287 acc0 += x4 * c0; 00288 acc1 += x5 * c0; 00289 acc2 += x6 * c0; 00290 acc3 += x7 * c0; 00291 acc4 += x0 * c0; 00292 acc5 += x1 * c0; 00293 acc6 += x2 * c0; 00294 acc7 += x3 * c0; 00295 00296 /* Read the b[numTaps-4] coefficient */ 00297 c0 = *(pb++); 00298 00299 /* Read x[n-numTaps-6] sample */ 00300 x4 = *(px++); 00301 00302 /* Perform the multiply-accumulates */ 00303 acc0 += x5 * c0; 00304 acc1 += x6 * c0; 00305 acc2 += x7 * c0; 00306 acc3 += x0 * c0; 00307 acc4 += x1 * c0; 00308 acc5 += x2 * c0; 00309 acc6 += x3 * c0; 00310 acc7 += x4 * c0; 00311 00312 /* Read the b[numTaps-4] coefficient */ 00313 c0 = *(pb++); 00314 00315 /* Read x[n-numTaps-6] sample */ 00316 x5 = *(px++); 00317 00318 /* Perform the multiply-accumulates */ 00319 acc0 += x6 * c0; 00320 acc1 += x7 * c0; 00321 acc2 += x0 * c0; 00322 acc3 += x1 * c0; 00323 acc4 += x2 * c0; 00324 acc5 += x3 * c0; 00325 acc6 += x4 * c0; 00326 acc7 += x5 * c0; 00327 00328 /* Read the b[numTaps-4] coefficient */ 00329 c0 = *(pb++); 00330 00331 /* Read x[n-numTaps-6] sample */ 00332 x6 = *(px++); 00333 00334 /* Perform the multiply-accumulates */ 00335 acc0 += x7 * c0; 00336 acc1 += x0 * c0; 00337 acc2 += x1 * c0; 00338 acc3 += x2 * c0; 00339 acc4 += x3 * c0; 00340 acc5 += x4 * c0; 00341 acc6 += x5 * c0; 00342 acc7 += x6 * c0; 00343 00344 tapCnt--; 00345 } 00346 00347 /* If the filter length is not a multiple of 8, compute the remaining filter taps */ 00348 tapCnt = numTaps % 0x8U; 00349 00350 while (tapCnt > 0U) 00351 { 00352 /* Read coefficients */ 00353 c0 = *(pb++); 00354 00355 /* Fetch 1 state variable */ 00356 x7 = *(px++); 00357 00358 /* Perform the multiply-accumulates */ 00359 acc0 += x0 * c0; 00360 acc1 += x1 * c0; 00361 acc2 += x2 * c0; 00362 acc3 += x3 * c0; 00363 acc4 += x4 * c0; 00364 acc5 += x5 * c0; 00365 acc6 += x6 * c0; 00366 acc7 += x7 * c0; 00367 00368 /* Reuse the present sample states for next sample */ 00369 x0 = x1; 00370 x1 = x2; 00371 x2 = x3; 00372 x3 = x4; 00373 x4 = x5; 00374 x5 = x6; 00375 x6 = x7; 00376 00377 /* Decrement the loop counter */ 00378 tapCnt--; 00379 } 00380 00381 /* Advance the state pointer by 8 to process the next group of 8 samples */ 00382 pState = pState + 8; 00383 00384 /* The results in the 8 accumulators, store in the destination buffer. */ 00385 *pDst++ = acc0; 00386 *pDst++ = acc1; 00387 *pDst++ = acc2; 00388 *pDst++ = acc3; 00389 *pDst++ = acc4; 00390 *pDst++ = acc5; 00391 *pDst++ = acc6; 00392 *pDst++ = acc7; 00393 00394 blkCnt--; 00395 } 00396 00397 /* If the blockSize is not a multiple of 8, compute any remaining output samples here. 00398 ** No loop unrolling is used. */ 00399 blkCnt = blockSize % 0x8U; 00400 00401 while (blkCnt > 0U) 00402 { 00403 /* Copy one sample at a time into state buffer */ 00404 *pStateCurnt++ = *pSrc++; 00405 00406 /* Set the accumulator to zero */ 00407 acc0 = 0.0f; 00408 00409 /* Initialize state pointer */ 00410 px = pState; 00411 00412 /* Initialize Coefficient pointer */ 00413 pb = (pCoeffs); 00414 00415 i = numTaps; 00416 00417 /* Perform the multiply-accumulates */ 00418 do 00419 { 00420 acc0 += *px++ * *pb++; 00421 i--; 00422 00423 } while (i > 0U); 00424 00425 /* The result is store in the destination buffer. */ 00426 *pDst++ = acc0; 00427 00428 /* Advance state pointer by 1 for the next sample */ 00429 pState = pState + 1; 00430 00431 blkCnt--; 00432 } 00433 00434 /* Processing is complete. 00435 ** Now copy the last numTaps - 1 samples to the start of the state buffer. 00436 ** This prepares the state buffer for the next function call. */ 00437 00438 /* Points to the start of the state buffer */ 00439 pStateCurnt = S->pState; 00440 00441 tapCnt = (numTaps - 1U) >> 2U; 00442 00443 /* copy data */ 00444 while (tapCnt > 0U) 00445 { 00446 *pStateCurnt++ = *pState++; 00447 *pStateCurnt++ = *pState++; 00448 *pStateCurnt++ = *pState++; 00449 *pStateCurnt++ = *pState++; 00450 00451 /* Decrement the loop counter */ 00452 tapCnt--; 00453 } 00454 00455 /* Calculate remaining number of copies */ 00456 tapCnt = (numTaps - 1U) % 0x4U; 00457 00458 /* Copy the remaining q31_t data */ 00459 while (tapCnt > 0U) 00460 { 00461 *pStateCurnt++ = *pState++; 00462 00463 /* Decrement the loop counter */ 00464 tapCnt--; 00465 } 00466 } 00467 00468 #elif defined(ARM_MATH_CM0_FAMILY) 00469 00470 void arm_fir_f32( 00471 const arm_fir_instance_f32 * S, 00472 float32_t * pSrc, 00473 float32_t * pDst, 00474 uint32_t blockSize) 00475 { 00476 float32_t *pState = S->pState; /* State pointer */ 00477 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00478 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00479 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00480 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00481 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00482 00483 /* Run the below code for Cortex-M0 */ 00484 00485 float32_t acc; 00486 00487 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00488 /* pStateCurnt points to the location where the new input data should be written */ 00489 pStateCurnt = &(S->pState[(numTaps - 1U)]); 00490 00491 /* Initialize blkCnt with blockSize */ 00492 blkCnt = blockSize; 00493 00494 while (blkCnt > 0U) 00495 { 00496 /* Copy one sample at a time into state buffer */ 00497 *pStateCurnt++ = *pSrc++; 00498 00499 /* Set the accumulator to zero */ 00500 acc = 0.0f; 00501 00502 /* Initialize state pointer */ 00503 px = pState; 00504 00505 /* Initialize Coefficient pointer */ 00506 pb = pCoeffs; 00507 00508 i = numTaps; 00509 00510 /* Perform the multiply-accumulates */ 00511 do 00512 { 00513 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */ 00514 acc += *px++ * *pb++; 00515 i--; 00516 00517 } while (i > 0U); 00518 00519 /* The result is store in the destination buffer. */ 00520 *pDst++ = acc; 00521 00522 /* Advance state pointer by 1 for the next sample */ 00523 pState = pState + 1; 00524 00525 blkCnt--; 00526 } 00527 00528 /* Processing is complete. 00529 ** Now copy the last numTaps - 1 samples to the starting of the state buffer. 00530 ** This prepares the state buffer for the next function call. */ 00531 00532 /* Points to the start of the state buffer */ 00533 pStateCurnt = S->pState; 00534 00535 /* Copy numTaps number of values */ 00536 tapCnt = numTaps - 1U; 00537 00538 /* Copy data */ 00539 while (tapCnt > 0U) 00540 { 00541 *pStateCurnt++ = *pState++; 00542 00543 /* Decrement the loop counter */ 00544 tapCnt--; 00545 } 00546 00547 } 00548 00549 #else 00550 00551 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00552 00553 void arm_fir_f32( 00554 const arm_fir_instance_f32 * S, 00555 float32_t * pSrc, 00556 float32_t * pDst, 00557 uint32_t blockSize) 00558 { 00559 float32_t *pState = S->pState; /* State pointer */ 00560 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00561 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00562 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00563 float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; /* Accumulators */ 00564 float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0; /* Temporary variables to hold state and coefficient values */ 00565 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00566 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00567 float32_t p0,p1,p2,p3,p4,p5,p6,p7; /* Temporary product values */ 00568 00569 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00570 /* pStateCurnt points to the location where the new input data should be written */ 00571 pStateCurnt = &(S->pState[(numTaps - 1U)]); 00572 00573 /* Apply loop unrolling and compute 8 output values simultaneously. 00574 * The variables acc0 ... acc7 hold output values that are being computed: 00575 * 00576 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00577 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00578 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00579 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00580 */ 00581 blkCnt = blockSize >> 3; 00582 00583 /* First part of the processing with loop unrolling. Compute 8 outputs at a time. 00584 ** a second loop below computes the remaining 1 to 7 samples. */ 00585 while (blkCnt > 0U) 00586 { 00587 /* Copy four new input samples into the state buffer */ 00588 *pStateCurnt++ = *pSrc++; 00589 *pStateCurnt++ = *pSrc++; 00590 *pStateCurnt++ = *pSrc++; 00591 *pStateCurnt++ = *pSrc++; 00592 00593 /* Set all accumulators to zero */ 00594 acc0 = 0.0f; 00595 acc1 = 0.0f; 00596 acc2 = 0.0f; 00597 acc3 = 0.0f; 00598 acc4 = 0.0f; 00599 acc5 = 0.0f; 00600 acc6 = 0.0f; 00601 acc7 = 0.0f; 00602 00603 /* Initialize state pointer */ 00604 px = pState; 00605 00606 /* Initialize coeff pointer */ 00607 pb = (pCoeffs); 00608 00609 /* This is separated from the others to avoid 00610 * a call to __aeabi_memmove which would be slower 00611 */ 00612 *pStateCurnt++ = *pSrc++; 00613 *pStateCurnt++ = *pSrc++; 00614 *pStateCurnt++ = *pSrc++; 00615 *pStateCurnt++ = *pSrc++; 00616 00617 /* Read the first seven samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 00618 x0 = *px++; 00619 x1 = *px++; 00620 x2 = *px++; 00621 x3 = *px++; 00622 x4 = *px++; 00623 x5 = *px++; 00624 x6 = *px++; 00625 00626 /* Loop unrolling. Process 8 taps at a time. */ 00627 tapCnt = numTaps >> 3U; 00628 00629 /* Loop over the number of taps. Unroll by a factor of 8. 00630 ** Repeat until we've computed numTaps-8 coefficients. */ 00631 while (tapCnt > 0U) 00632 { 00633 /* Read the b[numTaps-1] coefficient */ 00634 c0 = *(pb++); 00635 00636 /* Read x[n-numTaps-3] sample */ 00637 x7 = *(px++); 00638 00639 /* acc0 += b[numTaps-1] * x[n-numTaps] */ 00640 p0 = x0 * c0; 00641 00642 /* acc1 += b[numTaps-1] * x[n-numTaps-1] */ 00643 p1 = x1 * c0; 00644 00645 /* acc2 += b[numTaps-1] * x[n-numTaps-2] */ 00646 p2 = x2 * c0; 00647 00648 /* acc3 += b[numTaps-1] * x[n-numTaps-3] */ 00649 p3 = x3 * c0; 00650 00651 /* acc4 += b[numTaps-1] * x[n-numTaps-4] */ 00652 p4 = x4 * c0; 00653 00654 /* acc1 += b[numTaps-1] * x[n-numTaps-5] */ 00655 p5 = x5 * c0; 00656 00657 /* acc2 += b[numTaps-1] * x[n-numTaps-6] */ 00658 p6 = x6 * c0; 00659 00660 /* acc3 += b[numTaps-1] * x[n-numTaps-7] */ 00661 p7 = x7 * c0; 00662 00663 /* Read the b[numTaps-2] coefficient */ 00664 c0 = *(pb++); 00665 00666 /* Read x[n-numTaps-4] sample */ 00667 x0 = *(px++); 00668 00669 acc0 += p0; 00670 acc1 += p1; 00671 acc2 += p2; 00672 acc3 += p3; 00673 acc4 += p4; 00674 acc5 += p5; 00675 acc6 += p6; 00676 acc7 += p7; 00677 00678 00679 /* Perform the multiply-accumulate */ 00680 p0 = x1 * c0; 00681 p1 = x2 * c0; 00682 p2 = x3 * c0; 00683 p3 = x4 * c0; 00684 p4 = x5 * c0; 00685 p5 = x6 * c0; 00686 p6 = x7 * c0; 00687 p7 = x0 * c0; 00688 00689 /* Read the b[numTaps-3] coefficient */ 00690 c0 = *(pb++); 00691 00692 /* Read x[n-numTaps-5] sample */ 00693 x1 = *(px++); 00694 00695 acc0 += p0; 00696 acc1 += p1; 00697 acc2 += p2; 00698 acc3 += p3; 00699 acc4 += p4; 00700 acc5 += p5; 00701 acc6 += p6; 00702 acc7 += p7; 00703 00704 /* Perform the multiply-accumulates */ 00705 p0 = x2 * c0; 00706 p1 = x3 * c0; 00707 p2 = x4 * c0; 00708 p3 = x5 * c0; 00709 p4 = x6 * c0; 00710 p5 = x7 * c0; 00711 p6 = x0 * c0; 00712 p7 = x1 * c0; 00713 00714 /* Read the b[numTaps-4] coefficient */ 00715 c0 = *(pb++); 00716 00717 /* Read x[n-numTaps-6] sample */ 00718 x2 = *(px++); 00719 00720 acc0 += p0; 00721 acc1 += p1; 00722 acc2 += p2; 00723 acc3 += p3; 00724 acc4 += p4; 00725 acc5 += p5; 00726 acc6 += p6; 00727 acc7 += p7; 00728 00729 /* Perform the multiply-accumulates */ 00730 p0 = x3 * c0; 00731 p1 = x4 * c0; 00732 p2 = x5 * c0; 00733 p3 = x6 * c0; 00734 p4 = x7 * c0; 00735 p5 = x0 * c0; 00736 p6 = x1 * c0; 00737 p7 = x2 * c0; 00738 00739 /* Read the b[numTaps-4] coefficient */ 00740 c0 = *(pb++); 00741 00742 /* Read x[n-numTaps-6] sample */ 00743 x3 = *(px++); 00744 00745 acc0 += p0; 00746 acc1 += p1; 00747 acc2 += p2; 00748 acc3 += p3; 00749 acc4 += p4; 00750 acc5 += p5; 00751 acc6 += p6; 00752 acc7 += p7; 00753 00754 /* Perform the multiply-accumulates */ 00755 p0 = x4 * c0; 00756 p1 = x5 * c0; 00757 p2 = x6 * c0; 00758 p3 = x7 * c0; 00759 p4 = x0 * c0; 00760 p5 = x1 * c0; 00761 p6 = x2 * c0; 00762 p7 = x3 * c0; 00763 00764 /* Read the b[numTaps-4] coefficient */ 00765 c0 = *(pb++); 00766 00767 /* Read x[n-numTaps-6] sample */ 00768 x4 = *(px++); 00769 00770 acc0 += p0; 00771 acc1 += p1; 00772 acc2 += p2; 00773 acc3 += p3; 00774 acc4 += p4; 00775 acc5 += p5; 00776 acc6 += p6; 00777 acc7 += p7; 00778 00779 /* Perform the multiply-accumulates */ 00780 p0 = x5 * c0; 00781 p1 = x6 * c0; 00782 p2 = x7 * c0; 00783 p3 = x0 * c0; 00784 p4 = x1 * c0; 00785 p5 = x2 * c0; 00786 p6 = x3 * c0; 00787 p7 = x4 * c0; 00788 00789 /* Read the b[numTaps-4] coefficient */ 00790 c0 = *(pb++); 00791 00792 /* Read x[n-numTaps-6] sample */ 00793 x5 = *(px++); 00794 00795 acc0 += p0; 00796 acc1 += p1; 00797 acc2 += p2; 00798 acc3 += p3; 00799 acc4 += p4; 00800 acc5 += p5; 00801 acc6 += p6; 00802 acc7 += p7; 00803 00804 /* Perform the multiply-accumulates */ 00805 p0 = x6 * c0; 00806 p1 = x7 * c0; 00807 p2 = x0 * c0; 00808 p3 = x1 * c0; 00809 p4 = x2 * c0; 00810 p5 = x3 * c0; 00811 p6 = x4 * c0; 00812 p7 = x5 * c0; 00813 00814 /* Read the b[numTaps-4] coefficient */ 00815 c0 = *(pb++); 00816 00817 /* Read x[n-numTaps-6] sample */ 00818 x6 = *(px++); 00819 00820 acc0 += p0; 00821 acc1 += p1; 00822 acc2 += p2; 00823 acc3 += p3; 00824 acc4 += p4; 00825 acc5 += p5; 00826 acc6 += p6; 00827 acc7 += p7; 00828 00829 /* Perform the multiply-accumulates */ 00830 p0 = x7 * c0; 00831 p1 = x0 * c0; 00832 p2 = x1 * c0; 00833 p3 = x2 * c0; 00834 p4 = x3 * c0; 00835 p5 = x4 * c0; 00836 p6 = x5 * c0; 00837 p7 = x6 * c0; 00838 00839 tapCnt--; 00840 00841 acc0 += p0; 00842 acc1 += p1; 00843 acc2 += p2; 00844 acc3 += p3; 00845 acc4 += p4; 00846 acc5 += p5; 00847 acc6 += p6; 00848 acc7 += p7; 00849 } 00850 00851 /* If the filter length is not a multiple of 8, compute the remaining filter taps */ 00852 tapCnt = numTaps % 0x8U; 00853 00854 while (tapCnt > 0U) 00855 { 00856 /* Read coefficients */ 00857 c0 = *(pb++); 00858 00859 /* Fetch 1 state variable */ 00860 x7 = *(px++); 00861 00862 /* Perform the multiply-accumulates */ 00863 p0 = x0 * c0; 00864 p1 = x1 * c0; 00865 p2 = x2 * c0; 00866 p3 = x3 * c0; 00867 p4 = x4 * c0; 00868 p5 = x5 * c0; 00869 p6 = x6 * c0; 00870 p7 = x7 * c0; 00871 00872 /* Reuse the present sample states for next sample */ 00873 x0 = x1; 00874 x1 = x2; 00875 x2 = x3; 00876 x3 = x4; 00877 x4 = x5; 00878 x5 = x6; 00879 x6 = x7; 00880 00881 acc0 += p0; 00882 acc1 += p1; 00883 acc2 += p2; 00884 acc3 += p3; 00885 acc4 += p4; 00886 acc5 += p5; 00887 acc6 += p6; 00888 acc7 += p7; 00889 00890 /* Decrement the loop counter */ 00891 tapCnt--; 00892 } 00893 00894 /* Advance the state pointer by 8 to process the next group of 8 samples */ 00895 pState = pState + 8; 00896 00897 /* The results in the 8 accumulators, store in the destination buffer. */ 00898 *pDst++ = acc0; 00899 *pDst++ = acc1; 00900 *pDst++ = acc2; 00901 *pDst++ = acc3; 00902 *pDst++ = acc4; 00903 *pDst++ = acc5; 00904 *pDst++ = acc6; 00905 *pDst++ = acc7; 00906 00907 blkCnt--; 00908 } 00909 00910 /* If the blockSize is not a multiple of 8, compute any remaining output samples here. 00911 ** No loop unrolling is used. */ 00912 blkCnt = blockSize % 0x8U; 00913 00914 while (blkCnt > 0U) 00915 { 00916 /* Copy one sample at a time into state buffer */ 00917 *pStateCurnt++ = *pSrc++; 00918 00919 /* Set the accumulator to zero */ 00920 acc0 = 0.0f; 00921 00922 /* Initialize state pointer */ 00923 px = pState; 00924 00925 /* Initialize Coefficient pointer */ 00926 pb = (pCoeffs); 00927 00928 i = numTaps; 00929 00930 /* Perform the multiply-accumulates */ 00931 do 00932 { 00933 acc0 += *px++ * *pb++; 00934 i--; 00935 00936 } while (i > 0U); 00937 00938 /* The result is store in the destination buffer. */ 00939 *pDst++ = acc0; 00940 00941 /* Advance state pointer by 1 for the next sample */ 00942 pState = pState + 1; 00943 00944 blkCnt--; 00945 } 00946 00947 /* Processing is complete. 00948 ** Now copy the last numTaps - 1 samples to the start of the state buffer. 00949 ** This prepares the state buffer for the next function call. */ 00950 00951 /* Points to the start of the state buffer */ 00952 pStateCurnt = S->pState; 00953 00954 tapCnt = (numTaps - 1U) >> 2U; 00955 00956 /* copy data */ 00957 while (tapCnt > 0U) 00958 { 00959 *pStateCurnt++ = *pState++; 00960 *pStateCurnt++ = *pState++; 00961 *pStateCurnt++ = *pState++; 00962 *pStateCurnt++ = *pState++; 00963 00964 /* Decrement the loop counter */ 00965 tapCnt--; 00966 } 00967 00968 /* Calculate remaining number of copies */ 00969 tapCnt = (numTaps - 1U) % 0x4U; 00970 00971 /* Copy the remaining q31_t data */ 00972 while (tapCnt > 0U) 00973 { 00974 *pStateCurnt++ = *pState++; 00975 00976 /* Decrement the loop counter */ 00977 tapCnt--; 00978 } 00979 } 00980 00981 #endif 00982 00983 /** 00984 * @} end of FIR group 00985 */ 00986
Generated on Tue Jul 12 2022 16:47:27 by 1.7.2