CMSIS DSP library
Dependents: performance_timer Surfboard_ gps2rtty Capstone ... more
arm_fir_interpolate_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_interpolate_f32.c 00009 * 00010 * Description: FIR interpolation for floating-point sequences. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @defgroup FIR_Interpolate Finite Impulse Response (FIR) Interpolator 00045 * 00046 * These functions combine an upsampler (zero stuffer) and an FIR filter. 00047 * They are used in multirate systems for increasing the sample rate of a signal without introducing high frequency images. 00048 * Conceptually, the functions are equivalent to the block diagram below: 00049 * \image html FIRInterpolator.gif "Components included in the FIR Interpolator functions" 00050 * After upsampling by a factor of <code>L</code>, the signal should be filtered by a lowpass filter with a normalized 00051 * cutoff frequency of <code>1/L</code> in order to eliminate high frequency copies of the spectrum. 00052 * The user of the function is responsible for providing the filter coefficients. 00053 * 00054 * The FIR interpolator functions provided in the CMSIS DSP Library combine the upsampler and FIR filter in an efficient manner. 00055 * The upsampler inserts <code>L-1</code> zeros between each sample. 00056 * Instead of multiplying by these zero values, the FIR filter is designed to skip them. 00057 * This leads to an efficient implementation without any wasted effort. 00058 * The functions operate on blocks of input and output data. 00059 * <code>pSrc</code> points to an array of <code>blockSize</code> input values and 00060 * <code>pDst</code> points to an array of <code>blockSize*L</code> output values. 00061 * 00062 * The library provides separate functions for Q15, Q31, and floating-point data types. 00063 * 00064 * \par Algorithm: 00065 * The functions use a polyphase filter structure: 00066 * <pre> 00067 * y[n] = b[0] * x[n] + b[L] * x[n-1] + ... + b[L*(phaseLength-1)] * x[n-phaseLength+1] 00068 * y[n+1] = b[1] * x[n] + b[L+1] * x[n-1] + ... + b[L*(phaseLength-1)+1] * x[n-phaseLength+1] 00069 * ... 00070 * y[n+(L-1)] = b[L-1] * x[n] + b[2*L-1] * x[n-1] + ....+ b[L*(phaseLength-1)+(L-1)] * x[n-phaseLength+1] 00071 * </pre> 00072 * This approach is more efficient than straightforward upsample-then-filter algorithms. 00073 * With this method the computation is reduced by a factor of <code>1/L</code> when compared to using a standard FIR filter. 00074 * \par 00075 * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>. 00076 * <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code> and this is checked by the 00077 * initialization functions. 00078 * Internally, the function divides the FIR filter's impulse response into shorter filters of length 00079 * <code>phaseLength=numTaps/L</code>. 00080 * Coefficients are stored in time reversed order. 00081 * \par 00082 * <pre> 00083 * {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]} 00084 * </pre> 00085 * \par 00086 * <code>pState</code> points to a state array of size <code>blockSize + phaseLength - 1</code>. 00087 * Samples in the state buffer are stored in the order: 00088 * \par 00089 * <pre> 00090 * {x[n-phaseLength+1], x[n-phaseLength], x[n-phaseLength-1], x[n-phaseLength-2]....x[0], x[1], ..., x[blockSize-1]} 00091 * </pre> 00092 * The state variables are updated after each block of data is processed, the coefficients are untouched. 00093 * 00094 * \par Instance Structure 00095 * The coefficients and state variables for a filter are stored together in an instance data structure. 00096 * A separate instance structure must be defined for each filter. 00097 * Coefficient arrays may be shared among several instances while state variable array should be allocated separately. 00098 * There are separate instance structure declarations for each of the 3 supported data types. 00099 * 00100 * \par Initialization Functions 00101 * There is also an associated initialization function for each data type. 00102 * The initialization function performs the following operations: 00103 * - Sets the values of the internal structure fields. 00104 * - Zeros out the values in the state buffer. 00105 * - Checks to make sure that the length of the filter is a multiple of the interpolation factor. 00106 * To do this manually without calling the init function, assign the follow subfields of the instance structure: 00107 * L (interpolation factor), pCoeffs, phaseLength (numTaps / L), pState. Also set all of the values in pState to zero. 00108 * 00109 * \par 00110 * Use of the initialization function is optional. 00111 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. 00112 * To place an instance structure into a const data section, the instance structure must be manually initialized. 00113 * The code below statically initializes each of the 3 different data type filter instance structures 00114 * <pre> 00115 * arm_fir_interpolate_instance_f32 S = {L, phaseLength, pCoeffs, pState}; 00116 * arm_fir_interpolate_instance_q31 S = {L, phaseLength, pCoeffs, pState}; 00117 * arm_fir_interpolate_instance_q15 S = {L, phaseLength, pCoeffs, pState}; 00118 * </pre> 00119 * where <code>L</code> is the interpolation factor; <code>phaseLength=numTaps/L</code> is the 00120 * length of each of the shorter FIR filters used internally, 00121 * <code>pCoeffs</code> is the address of the coefficient buffer; 00122 * <code>pState</code> is the address of the state buffer. 00123 * Be sure to set the values in the state buffer to zeros when doing static initialization. 00124 * 00125 * \par Fixed-Point Behavior 00126 * Care must be taken when using the fixed-point versions of the FIR interpolate filter functions. 00127 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. 00128 * Refer to the function specific documentation below for usage guidelines. 00129 */ 00130 00131 /** 00132 * @addtogroup FIR_Interpolate 00133 * @{ 00134 */ 00135 00136 /** 00137 * @brief Processing function for the floating-point FIR interpolator. 00138 * @param[in] *S points to an instance of the floating-point FIR interpolator structure. 00139 * @param[in] *pSrc points to the block of input data. 00140 * @param[out] *pDst points to the block of output data. 00141 * @param[in] blockSize number of input samples to process per call. 00142 * @return none. 00143 */ 00144 #ifndef ARM_MATH_CM0_FAMILY 00145 00146 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00147 00148 void arm_fir_interpolate_f32( 00149 const arm_fir_interpolate_instance_f32 * S, 00150 float32_t * pSrc, 00151 float32_t * pDst, 00152 uint32_t blockSize) 00153 { 00154 float32_t *pState = S->pState; /* State pointer */ 00155 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00156 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00157 float32_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ 00158 float32_t sum0; /* Accumulators */ 00159 float32_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00160 uint32_t i, blkCnt, j; /* Loop counters */ 00161 uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */ 00162 float32_t acc0, acc1, acc2, acc3; 00163 float32_t x1, x2, x3; 00164 uint32_t blkCntN4; 00165 float32_t c1, c2, c3; 00166 00167 /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 00168 /* pStateCurnt points to the location where the new input data should be written */ 00169 pStateCurnt = S->pState + (phaseLen - 1u); 00170 00171 /* Initialise blkCnt */ 00172 blkCnt = blockSize / 4; 00173 blkCntN4 = blockSize - (4 * blkCnt); 00174 00175 /* Samples loop unrolled by 4 */ 00176 while(blkCnt > 0u) 00177 { 00178 /* Copy new input sample into the state buffer */ 00179 *pStateCurnt++ = *pSrc++; 00180 *pStateCurnt++ = *pSrc++; 00181 *pStateCurnt++ = *pSrc++; 00182 *pStateCurnt++ = *pSrc++; 00183 00184 /* Address modifier index of coefficient buffer */ 00185 j = 1u; 00186 00187 /* Loop over the Interpolation factor. */ 00188 i = (S->L); 00189 00190 while(i > 0u) 00191 { 00192 /* Set accumulator to zero */ 00193 acc0 = 0.0f; 00194 acc1 = 0.0f; 00195 acc2 = 0.0f; 00196 acc3 = 0.0f; 00197 00198 /* Initialize state pointer */ 00199 ptr1 = pState; 00200 00201 /* Initialize coefficient pointer */ 00202 ptr2 = pCoeffs + (S->L - j); 00203 00204 /* Loop over the polyPhase length. Unroll by a factor of 4. 00205 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 00206 tapCnt = phaseLen >> 2u; 00207 00208 x0 = *(ptr1++); 00209 x1 = *(ptr1++); 00210 x2 = *(ptr1++); 00211 00212 while(tapCnt > 0u) 00213 { 00214 00215 /* Read the input sample */ 00216 x3 = *(ptr1++); 00217 00218 /* Read the coefficient */ 00219 c0 = *(ptr2); 00220 00221 /* Perform the multiply-accumulate */ 00222 acc0 += x0 * c0; 00223 acc1 += x1 * c0; 00224 acc2 += x2 * c0; 00225 acc3 += x3 * c0; 00226 00227 /* Read the coefficient */ 00228 c1 = *(ptr2 + S->L); 00229 00230 /* Read the input sample */ 00231 x0 = *(ptr1++); 00232 00233 /* Perform the multiply-accumulate */ 00234 acc0 += x1 * c1; 00235 acc1 += x2 * c1; 00236 acc2 += x3 * c1; 00237 acc3 += x0 * c1; 00238 00239 /* Read the coefficient */ 00240 c2 = *(ptr2 + S->L * 2); 00241 00242 /* Read the input sample */ 00243 x1 = *(ptr1++); 00244 00245 /* Perform the multiply-accumulate */ 00246 acc0 += x2 * c2; 00247 acc1 += x3 * c2; 00248 acc2 += x0 * c2; 00249 acc3 += x1 * c2; 00250 00251 /* Read the coefficient */ 00252 c3 = *(ptr2 + S->L * 3); 00253 00254 /* Read the input sample */ 00255 x2 = *(ptr1++); 00256 00257 /* Perform the multiply-accumulate */ 00258 acc0 += x3 * c3; 00259 acc1 += x0 * c3; 00260 acc2 += x1 * c3; 00261 acc3 += x2 * c3; 00262 00263 00264 /* Upsampling is done by stuffing L-1 zeros between each sample. 00265 * So instead of multiplying zeros with coefficients, 00266 * Increment the coefficient pointer by interpolation factor times. */ 00267 ptr2 += 4 * S->L; 00268 00269 /* Decrement the loop counter */ 00270 tapCnt--; 00271 } 00272 00273 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 00274 tapCnt = phaseLen % 0x4u; 00275 00276 while(tapCnt > 0u) 00277 { 00278 00279 /* Read the input sample */ 00280 x3 = *(ptr1++); 00281 00282 /* Read the coefficient */ 00283 c0 = *(ptr2); 00284 00285 /* Perform the multiply-accumulate */ 00286 acc0 += x0 * c0; 00287 acc1 += x1 * c0; 00288 acc2 += x2 * c0; 00289 acc3 += x3 * c0; 00290 00291 /* Increment the coefficient pointer by interpolation factor times. */ 00292 ptr2 += S->L; 00293 00294 /* update states for next sample processing */ 00295 x0 = x1; 00296 x1 = x2; 00297 x2 = x3; 00298 00299 /* Decrement the loop counter */ 00300 tapCnt--; 00301 } 00302 00303 /* The result is in the accumulator, store in the destination buffer. */ 00304 *pDst = acc0; 00305 *(pDst + S->L) = acc1; 00306 *(pDst + 2 * S->L) = acc2; 00307 *(pDst + 3 * S->L) = acc3; 00308 00309 pDst++; 00310 00311 /* Increment the address modifier index of coefficient buffer */ 00312 j++; 00313 00314 /* Decrement the loop counter */ 00315 i--; 00316 } 00317 00318 /* Advance the state pointer by 1 00319 * to process the next group of interpolation factor number samples */ 00320 pState = pState + 4; 00321 00322 pDst += S->L * 3; 00323 00324 /* Decrement the loop counter */ 00325 blkCnt--; 00326 } 00327 00328 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00329 ** No loop unrolling is used. */ 00330 00331 while(blkCntN4 > 0u) 00332 { 00333 /* Copy new input sample into the state buffer */ 00334 *pStateCurnt++ = *pSrc++; 00335 00336 /* Address modifier index of coefficient buffer */ 00337 j = 1u; 00338 00339 /* Loop over the Interpolation factor. */ 00340 i = S->L; 00341 while(i > 0u) 00342 { 00343 /* Set accumulator to zero */ 00344 sum0 = 0.0f; 00345 00346 /* Initialize state pointer */ 00347 ptr1 = pState; 00348 00349 /* Initialize coefficient pointer */ 00350 ptr2 = pCoeffs + (S->L - j); 00351 00352 /* Loop over the polyPhase length. Unroll by a factor of 4. 00353 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 00354 tapCnt = phaseLen >> 2u; 00355 while(tapCnt > 0u) 00356 { 00357 00358 /* Read the coefficient */ 00359 c0 = *(ptr2); 00360 00361 /* Upsampling is done by stuffing L-1 zeros between each sample. 00362 * So instead of multiplying zeros with coefficients, 00363 * Increment the coefficient pointer by interpolation factor times. */ 00364 ptr2 += S->L; 00365 00366 /* Read the input sample */ 00367 x0 = *(ptr1++); 00368 00369 /* Perform the multiply-accumulate */ 00370 sum0 += x0 * c0; 00371 00372 /* Read the coefficient */ 00373 c0 = *(ptr2); 00374 00375 /* Increment the coefficient pointer by interpolation factor times. */ 00376 ptr2 += S->L; 00377 00378 /* Read the input sample */ 00379 x0 = *(ptr1++); 00380 00381 /* Perform the multiply-accumulate */ 00382 sum0 += x0 * c0; 00383 00384 /* Read the coefficient */ 00385 c0 = *(ptr2); 00386 00387 /* Increment the coefficient pointer by interpolation factor times. */ 00388 ptr2 += S->L; 00389 00390 /* Read the input sample */ 00391 x0 = *(ptr1++); 00392 00393 /* Perform the multiply-accumulate */ 00394 sum0 += x0 * c0; 00395 00396 /* Read the coefficient */ 00397 c0 = *(ptr2); 00398 00399 /* Increment the coefficient pointer by interpolation factor times. */ 00400 ptr2 += S->L; 00401 00402 /* Read the input sample */ 00403 x0 = *(ptr1++); 00404 00405 /* Perform the multiply-accumulate */ 00406 sum0 += x0 * c0; 00407 00408 /* Decrement the loop counter */ 00409 tapCnt--; 00410 } 00411 00412 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 00413 tapCnt = phaseLen % 0x4u; 00414 00415 while(tapCnt > 0u) 00416 { 00417 /* Perform the multiply-accumulate */ 00418 sum0 += *(ptr1++) * (*ptr2); 00419 00420 /* Increment the coefficient pointer by interpolation factor times. */ 00421 ptr2 += S->L; 00422 00423 /* Decrement the loop counter */ 00424 tapCnt--; 00425 } 00426 00427 /* The result is in the accumulator, store in the destination buffer. */ 00428 *pDst++ = sum0; 00429 00430 /* Increment the address modifier index of coefficient buffer */ 00431 j++; 00432 00433 /* Decrement the loop counter */ 00434 i--; 00435 } 00436 00437 /* Advance the state pointer by 1 00438 * to process the next group of interpolation factor number samples */ 00439 pState = pState + 1; 00440 00441 /* Decrement the loop counter */ 00442 blkCntN4--; 00443 } 00444 00445 /* Processing is complete. 00446 ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer. 00447 ** This prepares the state buffer for the next function call. */ 00448 00449 /* Points to the start of the state buffer */ 00450 pStateCurnt = S->pState; 00451 00452 tapCnt = (phaseLen - 1u) >> 2u; 00453 00454 /* copy data */ 00455 while(tapCnt > 0u) 00456 { 00457 *pStateCurnt++ = *pState++; 00458 *pStateCurnt++ = *pState++; 00459 *pStateCurnt++ = *pState++; 00460 *pStateCurnt++ = *pState++; 00461 00462 /* Decrement the loop counter */ 00463 tapCnt--; 00464 } 00465 00466 tapCnt = (phaseLen - 1u) % 0x04u; 00467 00468 /* copy data */ 00469 while(tapCnt > 0u) 00470 { 00471 *pStateCurnt++ = *pState++; 00472 00473 /* Decrement the loop counter */ 00474 tapCnt--; 00475 } 00476 } 00477 00478 #else 00479 00480 /* Run the below code for Cortex-M0 */ 00481 00482 void arm_fir_interpolate_f32( 00483 const arm_fir_interpolate_instance_f32 * S, 00484 float32_t * pSrc, 00485 float32_t * pDst, 00486 uint32_t blockSize) 00487 { 00488 float32_t *pState = S->pState; /* State pointer */ 00489 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00490 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00491 float32_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ 00492 00493 00494 float32_t sum; /* Accumulator */ 00495 uint32_t i, blkCnt; /* Loop counters */ 00496 uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */ 00497 00498 00499 /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 00500 /* pStateCurnt points to the location where the new input data should be written */ 00501 pStateCurnt = S->pState + (phaseLen - 1u); 00502 00503 /* Total number of intput samples */ 00504 blkCnt = blockSize; 00505 00506 /* Loop over the blockSize. */ 00507 while(blkCnt > 0u) 00508 { 00509 /* Copy new input sample into the state buffer */ 00510 *pStateCurnt++ = *pSrc++; 00511 00512 /* Loop over the Interpolation factor. */ 00513 i = S->L; 00514 00515 while(i > 0u) 00516 { 00517 /* Set accumulator to zero */ 00518 sum = 0.0f; 00519 00520 /* Initialize state pointer */ 00521 ptr1 = pState; 00522 00523 /* Initialize coefficient pointer */ 00524 ptr2 = pCoeffs + (i - 1u); 00525 00526 /* Loop over the polyPhase length */ 00527 tapCnt = phaseLen; 00528 00529 while(tapCnt > 0u) 00530 { 00531 /* Perform the multiply-accumulate */ 00532 sum += *ptr1++ * *ptr2; 00533 00534 /* Increment the coefficient pointer by interpolation factor times. */ 00535 ptr2 += S->L; 00536 00537 /* Decrement the loop counter */ 00538 tapCnt--; 00539 } 00540 00541 /* The result is in the accumulator, store in the destination buffer. */ 00542 *pDst++ = sum; 00543 00544 /* Decrement the loop counter */ 00545 i--; 00546 } 00547 00548 /* Advance the state pointer by 1 00549 * to process the next group of interpolation factor number samples */ 00550 pState = pState + 1; 00551 00552 /* Decrement the loop counter */ 00553 blkCnt--; 00554 } 00555 00556 /* Processing is complete. 00557 ** Now copy the last phaseLen - 1 samples to the start of the state buffer. 00558 ** This prepares the state buffer for the next function call. */ 00559 00560 /* Points to the start of the state buffer */ 00561 pStateCurnt = S->pState; 00562 00563 tapCnt = phaseLen - 1u; 00564 00565 while(tapCnt > 0u) 00566 { 00567 *pStateCurnt++ = *pState++; 00568 00569 /* Decrement the loop counter */ 00570 tapCnt--; 00571 } 00572 00573 } 00574 00575 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00576 00577 00578 00579 /** 00580 * @} end of FIR_Interpolate group 00581 */
Generated on Tue Jul 12 2022 11:59:17 by 1.7.2