CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_interpolate_f32.c Source File

arm_fir_interpolate_f32.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        19. March 2015
00005 * $Revision:    V.1.4.5
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_fir_interpolate_f32.c    
00009 *    
00010 * Description:  FIR interpolation for floating-point sequences.    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE.   
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 
00043 /**    
00044  * @defgroup FIR_Interpolate Finite Impulse Response (FIR) Interpolator    
00045  *    
00046  * These functions combine an upsampler (zero stuffer) and an FIR filter.    
00047  * They are used in multirate systems for increasing the sample rate of a signal without introducing high frequency images.    
00048  * Conceptually, the functions are equivalent to the block diagram below:    
00049  * \image html FIRInterpolator.gif "Components included in the FIR Interpolator functions"    
00050  * After upsampling by a factor of <code>L</code>, the signal should be filtered by a lowpass filter with a normalized    
00051  * cutoff frequency of <code>1/L</code> in order to eliminate high frequency copies of the spectrum.    
00052  * The user of the function is responsible for providing the filter coefficients.    
00053  *    
00054  * The FIR interpolator functions provided in the CMSIS DSP Library combine the upsampler and FIR filter in an efficient manner.    
00055  * The upsampler inserts <code>L-1</code> zeros between each sample.    
00056  * Instead of multiplying by these zero values, the FIR filter is designed to skip them.    
00057  * This leads to an efficient implementation without any wasted effort.    
00058  * The functions operate on blocks of input and output data.    
00059  * <code>pSrc</code> points to an array of <code>blockSize</code> input values and    
00060  * <code>pDst</code> points to an array of <code>blockSize*L</code> output values.    
00061  *    
00062  * The library provides separate functions for Q15, Q31, and floating-point data types.    
00063  *    
00064  * \par Algorithm:    
00065  * The functions use a polyphase filter structure:    
00066  * <pre>    
00067  *    y[n] = b[0] * x[n] + b[L]   * x[n-1] + ... + b[L*(phaseLength-1)] * x[n-phaseLength+1]    
00068  *    y[n+1] = b[1] * x[n] + b[L+1] * x[n-1] + ... + b[L*(phaseLength-1)+1] * x[n-phaseLength+1]    
00069  *    ...    
00070  *    y[n+(L-1)] = b[L-1] * x[n] + b[2*L-1] * x[n-1] + ....+ b[L*(phaseLength-1)+(L-1)] * x[n-phaseLength+1]    
00071  * </pre>    
00072  * This approach is more efficient than straightforward upsample-then-filter algorithms.    
00073  * With this method the computation is reduced by a factor of <code>1/L</code> when compared to using a standard FIR filter.    
00074  * \par    
00075  * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.    
00076  * <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code> and this is checked by the    
00077  * initialization functions.    
00078  * Internally, the function divides the FIR filter's impulse response into shorter filters of length    
00079  * <code>phaseLength=numTaps/L</code>.    
00080  * Coefficients are stored in time reversed order.    
00081  * \par    
00082  * <pre>    
00083  *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}    
00084  * </pre>    
00085  * \par    
00086  * <code>pState</code> points to a state array of size <code>blockSize + phaseLength - 1</code>.    
00087  * Samples in the state buffer are stored in the order:    
00088  * \par    
00089  * <pre>    
00090  *    {x[n-phaseLength+1], x[n-phaseLength], x[n-phaseLength-1], x[n-phaseLength-2]....x[0], x[1], ..., x[blockSize-1]}    
00091  * </pre>    
00092  * The state variables are updated after each block of data is processed, the coefficients are untouched.    
00093  *    
00094  * \par Instance Structure    
00095  * The coefficients and state variables for a filter are stored together in an instance data structure.    
00096  * A separate instance structure must be defined for each filter.    
00097  * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.    
00098  * There are separate instance structure declarations for each of the 3 supported data types.    
00099  *    
00100  * \par Initialization Functions    
00101  * There is also an associated initialization function for each data type.    
00102  * The initialization function performs the following operations:    
00103  * - Sets the values of the internal structure fields.    
00104  * - Zeros out the values in the state buffer.    
00105  * - Checks to make sure that the length of the filter is a multiple of the interpolation factor.    
00106  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
00107  * L (interpolation factor), pCoeffs, phaseLength (numTaps / L), pState. Also set all of the values in pState to zero. 
00108  *    
00109  * \par    
00110  * Use of the initialization function is optional.    
00111  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.    
00112  * To place an instance structure into a const data section, the instance structure must be manually initialized.    
00113  * The code below statically initializes each of the 3 different data type filter instance structures    
00114  * <pre>    
00115  * arm_fir_interpolate_instance_f32 S = {L, phaseLength, pCoeffs, pState};    
00116  * arm_fir_interpolate_instance_q31 S = {L, phaseLength, pCoeffs, pState};    
00117  * arm_fir_interpolate_instance_q15 S = {L, phaseLength, pCoeffs, pState};    
00118  * </pre>    
00119  * where <code>L</code> is the interpolation factor; <code>phaseLength=numTaps/L</code> is the    
00120  * length of each of the shorter FIR filters used internally,    
00121  * <code>pCoeffs</code> is the address of the coefficient buffer;    
00122  * <code>pState</code> is the address of the state buffer.    
00123  * Be sure to set the values in the state buffer to zeros when doing static initialization.    
00124  *    
00125  * \par Fixed-Point Behavior    
00126  * Care must be taken when using the fixed-point versions of the FIR interpolate filter functions.    
00127  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.    
00128  * Refer to the function specific documentation below for usage guidelines.    
00129  */
00130 
00131 /**    
00132  * @addtogroup FIR_Interpolate    
00133  * @{    
00134  */
00135 
00136 /**    
00137  * @brief Processing function for the floating-point FIR interpolator.    
00138  * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.    
00139  * @param[in] *pSrc     points to the block of input data.    
00140  * @param[out] *pDst    points to the block of output data.    
00141  * @param[in] blockSize number of input samples to process per call.    
00142  * @return none.    
00143  */
00144 #ifndef ARM_MATH_CM0_FAMILY
00145 
00146   /* Run the below code for Cortex-M4 and Cortex-M3 */
00147 
00148 void arm_fir_interpolate_f32(
00149   const arm_fir_interpolate_instance_f32 * S,
00150   float32_t * pSrc,
00151   float32_t * pDst,
00152   uint32_t blockSize)
00153 {
00154   float32_t *pState = S->pState;                 /* State pointer */
00155   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
00156   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
00157   float32_t *ptr1, *ptr2;                        /* Temporary pointers for state and coefficient buffers */
00158   float32_t sum0;                                /* Accumulators */
00159   float32_t x0, c0;                              /* Temporary variables to hold state and coefficient values */
00160   uint32_t i, blkCnt, j;                         /* Loop counters */
00161   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
00162   float32_t acc0, acc1, acc2, acc3;
00163   float32_t x1, x2, x3;
00164   uint32_t blkCntN4;
00165   float32_t c1, c2, c3;
00166 
00167   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
00168   /* pStateCurnt points to the location where the new input data should be written */
00169   pStateCurnt = S->pState + (phaseLen - 1u);
00170 
00171   /* Initialise  blkCnt */
00172   blkCnt = blockSize / 4;
00173   blkCntN4 = blockSize - (4 * blkCnt);
00174 
00175   /* Samples loop unrolled by 4 */
00176   while(blkCnt > 0u)
00177   {
00178     /* Copy new input sample into the state buffer */
00179     *pStateCurnt++ = *pSrc++;
00180     *pStateCurnt++ = *pSrc++;
00181     *pStateCurnt++ = *pSrc++;
00182     *pStateCurnt++ = *pSrc++;
00183 
00184     /* Address modifier index of coefficient buffer */
00185     j = 1u;
00186 
00187     /* Loop over the Interpolation factor. */
00188     i = (S->L);
00189 
00190     while(i > 0u)
00191     {
00192       /* Set accumulator to zero */
00193       acc0 = 0.0f;
00194       acc1 = 0.0f;
00195       acc2 = 0.0f;
00196       acc3 = 0.0f;
00197 
00198       /* Initialize state pointer */
00199       ptr1 = pState;
00200 
00201       /* Initialize coefficient pointer */
00202       ptr2 = pCoeffs + (S->L - j);
00203 
00204       /* Loop over the polyPhase length. Unroll by a factor of 4.        
00205        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
00206       tapCnt = phaseLen >> 2u;
00207 
00208       x0 = *(ptr1++);
00209       x1 = *(ptr1++);
00210       x2 = *(ptr1++);
00211 
00212       while(tapCnt > 0u)
00213       {
00214 
00215         /* Read the input sample */
00216         x3 = *(ptr1++);
00217 
00218         /* Read the coefficient */
00219         c0 = *(ptr2);
00220 
00221         /* Perform the multiply-accumulate */
00222         acc0 += x0 * c0;
00223         acc1 += x1 * c0;
00224         acc2 += x2 * c0;
00225         acc3 += x3 * c0;
00226 
00227         /* Read the coefficient */
00228         c1 = *(ptr2 + S->L);
00229 
00230         /* Read the input sample */
00231         x0 = *(ptr1++);
00232 
00233         /* Perform the multiply-accumulate */
00234         acc0 += x1 * c1;
00235         acc1 += x2 * c1;
00236         acc2 += x3 * c1;
00237         acc3 += x0 * c1;
00238 
00239         /* Read the coefficient */
00240         c2 = *(ptr2 + S->L * 2);
00241 
00242         /* Read the input sample */
00243         x1 = *(ptr1++);
00244 
00245         /* Perform the multiply-accumulate */
00246         acc0 += x2 * c2;
00247         acc1 += x3 * c2;
00248         acc2 += x0 * c2;
00249         acc3 += x1 * c2;
00250 
00251         /* Read the coefficient */
00252         c3 = *(ptr2 + S->L * 3);
00253 
00254         /* Read the input sample */
00255         x2 = *(ptr1++);
00256 
00257         /* Perform the multiply-accumulate */
00258         acc0 += x3 * c3;
00259         acc1 += x0 * c3;
00260         acc2 += x1 * c3;
00261         acc3 += x2 * c3;
00262 
00263 
00264         /* Upsampling is done by stuffing L-1 zeros between each sample.        
00265          * So instead of multiplying zeros with coefficients,        
00266          * Increment the coefficient pointer by interpolation factor times. */
00267         ptr2 += 4 * S->L;
00268 
00269         /* Decrement the loop counter */
00270         tapCnt--;
00271       }
00272 
00273       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
00274       tapCnt = phaseLen % 0x4u;
00275 
00276       while(tapCnt > 0u)
00277       {
00278 
00279         /* Read the input sample */
00280         x3 = *(ptr1++);
00281 
00282         /* Read the coefficient */
00283         c0 = *(ptr2);
00284 
00285         /* Perform the multiply-accumulate */
00286         acc0 += x0 * c0;
00287         acc1 += x1 * c0;
00288         acc2 += x2 * c0;
00289         acc3 += x3 * c0;
00290 
00291         /* Increment the coefficient pointer by interpolation factor times. */
00292         ptr2 += S->L;
00293 
00294         /* update states for next sample processing */
00295         x0 = x1;
00296         x1 = x2;
00297         x2 = x3;
00298 
00299         /* Decrement the loop counter */
00300         tapCnt--;
00301       }
00302 
00303       /* The result is in the accumulator, store in the destination buffer. */
00304       *pDst = acc0;
00305       *(pDst + S->L) = acc1;
00306       *(pDst + 2 * S->L) = acc2;
00307       *(pDst + 3 * S->L) = acc3;
00308 
00309       pDst++;
00310 
00311       /* Increment the address modifier index of coefficient buffer */
00312       j++;
00313 
00314       /* Decrement the loop counter */
00315       i--;
00316     }
00317 
00318     /* Advance the state pointer by 1        
00319      * to process the next group of interpolation factor number samples */
00320     pState = pState + 4;
00321 
00322     pDst += S->L * 3;
00323 
00324     /* Decrement the loop counter */
00325     blkCnt--;
00326   }
00327 
00328   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.        
00329    ** No loop unrolling is used. */
00330 
00331   while(blkCntN4 > 0u)
00332   {
00333     /* Copy new input sample into the state buffer */
00334     *pStateCurnt++ = *pSrc++;
00335 
00336     /* Address modifier index of coefficient buffer */
00337     j = 1u;
00338 
00339     /* Loop over the Interpolation factor. */
00340     i = S->L;
00341     while(i > 0u)
00342     {
00343       /* Set accumulator to zero */
00344       sum0 = 0.0f;
00345 
00346       /* Initialize state pointer */
00347       ptr1 = pState;
00348 
00349       /* Initialize coefficient pointer */
00350       ptr2 = pCoeffs + (S->L - j);
00351 
00352       /* Loop over the polyPhase length. Unroll by a factor of 4.        
00353        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
00354       tapCnt = phaseLen >> 2u;
00355       while(tapCnt > 0u)
00356       {
00357 
00358         /* Read the coefficient */
00359         c0 = *(ptr2);
00360 
00361         /* Upsampling is done by stuffing L-1 zeros between each sample.        
00362          * So instead of multiplying zeros with coefficients,        
00363          * Increment the coefficient pointer by interpolation factor times. */
00364         ptr2 += S->L;
00365 
00366         /* Read the input sample */
00367         x0 = *(ptr1++);
00368 
00369         /* Perform the multiply-accumulate */
00370         sum0 += x0 * c0;
00371 
00372         /* Read the coefficient */
00373         c0 = *(ptr2);
00374 
00375         /* Increment the coefficient pointer by interpolation factor times. */
00376         ptr2 += S->L;
00377 
00378         /* Read the input sample */
00379         x0 = *(ptr1++);
00380 
00381         /* Perform the multiply-accumulate */
00382         sum0 += x0 * c0;
00383 
00384         /* Read the coefficient */
00385         c0 = *(ptr2);
00386 
00387         /* Increment the coefficient pointer by interpolation factor times. */
00388         ptr2 += S->L;
00389 
00390         /* Read the input sample */
00391         x0 = *(ptr1++);
00392 
00393         /* Perform the multiply-accumulate */
00394         sum0 += x0 * c0;
00395 
00396         /* Read the coefficient */
00397         c0 = *(ptr2);
00398 
00399         /* Increment the coefficient pointer by interpolation factor times. */
00400         ptr2 += S->L;
00401 
00402         /* Read the input sample */
00403         x0 = *(ptr1++);
00404 
00405         /* Perform the multiply-accumulate */
00406         sum0 += x0 * c0;
00407 
00408         /* Decrement the loop counter */
00409         tapCnt--;
00410       }
00411 
00412       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
00413       tapCnt = phaseLen % 0x4u;
00414 
00415       while(tapCnt > 0u)
00416       {
00417         /* Perform the multiply-accumulate */
00418         sum0 += *(ptr1++) * (*ptr2);
00419 
00420         /* Increment the coefficient pointer by interpolation factor times. */
00421         ptr2 += S->L;
00422 
00423         /* Decrement the loop counter */
00424         tapCnt--;
00425       }
00426 
00427       /* The result is in the accumulator, store in the destination buffer. */
00428       *pDst++ = sum0;
00429 
00430       /* Increment the address modifier index of coefficient buffer */
00431       j++;
00432 
00433       /* Decrement the loop counter */
00434       i--;
00435     }
00436 
00437     /* Advance the state pointer by 1        
00438      * to process the next group of interpolation factor number samples */
00439     pState = pState + 1;
00440 
00441     /* Decrement the loop counter */
00442     blkCntN4--;
00443   }
00444 
00445   /* Processing is complete.        
00446    ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.        
00447    ** This prepares the state buffer for the next function call. */
00448 
00449   /* Points to the start of the state buffer */
00450   pStateCurnt = S->pState;
00451 
00452   tapCnt = (phaseLen - 1u) >> 2u;
00453 
00454   /* copy data */
00455   while(tapCnt > 0u)
00456   {
00457     *pStateCurnt++ = *pState++;
00458     *pStateCurnt++ = *pState++;
00459     *pStateCurnt++ = *pState++;
00460     *pStateCurnt++ = *pState++;
00461 
00462     /* Decrement the loop counter */
00463     tapCnt--;
00464   }
00465 
00466   tapCnt = (phaseLen - 1u) % 0x04u;
00467 
00468   /* copy data */
00469   while(tapCnt > 0u)
00470   {
00471     *pStateCurnt++ = *pState++;
00472 
00473     /* Decrement the loop counter */
00474     tapCnt--;
00475   }
00476 }
00477 
00478 #else
00479 
00480   /* Run the below code for Cortex-M0 */
00481 
00482 void arm_fir_interpolate_f32(
00483   const arm_fir_interpolate_instance_f32 * S,
00484   float32_t * pSrc,
00485   float32_t * pDst,
00486   uint32_t blockSize)
00487 {
00488   float32_t *pState = S->pState;                 /* State pointer */
00489   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
00490   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
00491   float32_t *ptr1, *ptr2;                        /* Temporary pointers for state and coefficient buffers */
00492 
00493 
00494   float32_t sum;                                 /* Accumulator */
00495   uint32_t i, blkCnt;                            /* Loop counters */
00496   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
00497 
00498 
00499   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
00500   /* pStateCurnt points to the location where the new input data should be written */
00501   pStateCurnt = S->pState + (phaseLen - 1u);
00502 
00503   /* Total number of intput samples */
00504   blkCnt = blockSize;
00505 
00506   /* Loop over the blockSize. */
00507   while(blkCnt > 0u)
00508   {
00509     /* Copy new input sample into the state buffer */
00510     *pStateCurnt++ = *pSrc++;
00511 
00512     /* Loop over the Interpolation factor. */
00513     i = S->L;
00514 
00515     while(i > 0u)
00516     {
00517       /* Set accumulator to zero */
00518       sum = 0.0f;
00519 
00520       /* Initialize state pointer */
00521       ptr1 = pState;
00522 
00523       /* Initialize coefficient pointer */
00524       ptr2 = pCoeffs + (i - 1u);
00525 
00526       /* Loop over the polyPhase length */
00527       tapCnt = phaseLen;
00528 
00529       while(tapCnt > 0u)
00530       {
00531         /* Perform the multiply-accumulate */
00532         sum += *ptr1++ * *ptr2;
00533 
00534         /* Increment the coefficient pointer by interpolation factor times. */
00535         ptr2 += S->L;
00536 
00537         /* Decrement the loop counter */
00538         tapCnt--;
00539       }
00540 
00541       /* The result is in the accumulator, store in the destination buffer. */
00542       *pDst++ = sum;
00543 
00544       /* Decrement the loop counter */
00545       i--;
00546     }
00547 
00548     /* Advance the state pointer by 1           
00549      * to process the next group of interpolation factor number samples */
00550     pState = pState + 1;
00551 
00552     /* Decrement the loop counter */
00553     blkCnt--;
00554   }
00555 
00556   /* Processing is complete.         
00557    ** Now copy the last phaseLen - 1 samples to the start of the state buffer.       
00558    ** This prepares the state buffer for the next function call. */
00559 
00560   /* Points to the start of the state buffer */
00561   pStateCurnt = S->pState;
00562 
00563   tapCnt = phaseLen - 1u;
00564 
00565   while(tapCnt > 0u)
00566   {
00567     *pStateCurnt++ = *pState++;
00568 
00569     /* Decrement the loop counter */
00570     tapCnt--;
00571   }
00572 
00573 }
00574 
00575 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
00576 
00577 
00578 
00579  /**    
00580   * @} end of FIR_Interpolate group    
00581   */