Robert Lopez / CMSIS5
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_interpolate_f32.c Source File

arm_fir_interpolate_f32.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_fir_interpolate_f32.c
00004  * Description:  Floating-point FIR interpolation sequences
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @defgroup FIR_Interpolate Finite Impulse Response (FIR) Interpolator
00033  *
00034  * These functions combine an upsampler (zero stuffer) and an FIR filter.
00035  * They are used in multirate systems for increasing the sample rate of a signal without introducing high frequency images.
00036  * Conceptually, the functions are equivalent to the block diagram below:
00037  * \image html FIRInterpolator.gif "Components included in the FIR Interpolator functions"
00038  * After upsampling by a factor of <code>L</code>, the signal should be filtered by a lowpass filter with a normalized
00039  * cutoff frequency of <code>1/L</code> in order to eliminate high frequency copies of the spectrum.
00040  * The user of the function is responsible for providing the filter coefficients.
00041  *
00042  * The FIR interpolator functions provided in the CMSIS DSP Library combine the upsampler and FIR filter in an efficient manner.
00043  * The upsampler inserts <code>L-1</code> zeros between each sample.
00044  * Instead of multiplying by these zero values, the FIR filter is designed to skip them.
00045  * This leads to an efficient implementation without any wasted effort.
00046  * The functions operate on blocks of input and output data.
00047  * <code>pSrc</code> points to an array of <code>blockSize</code> input values and
00048  * <code>pDst</code> points to an array of <code>blockSize*L</code> output values.
00049  *
00050  * The library provides separate functions for Q15, Q31, and floating-point data types.
00051  *
00052  * \par Algorithm:
00053  * The functions use a polyphase filter structure:
00054  * <pre>
00055  *    y[n] = b[0] * x[n] + b[L]   * x[n-1] + ... + b[L*(phaseLength-1)] * x[n-phaseLength+1]
00056  *    y[n+1] = b[1] * x[n] + b[L+1] * x[n-1] + ... + b[L*(phaseLength-1)+1] * x[n-phaseLength+1]
00057  *    ...
00058  *    y[n+(L-1)] = b[L-1] * x[n] + b[2*L-1] * x[n-1] + ....+ b[L*(phaseLength-1)+(L-1)] * x[n-phaseLength+1]
00059  * </pre>
00060  * This approach is more efficient than straightforward upsample-then-filter algorithms.
00061  * With this method the computation is reduced by a factor of <code>1/L</code> when compared to using a standard FIR filter.
00062  * \par
00063  * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
00064  * <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code> and this is checked by the
00065  * initialization functions.
00066  * Internally, the function divides the FIR filter's impulse response into shorter filters of length
00067  * <code>phaseLength=numTaps/L</code>.
00068  * Coefficients are stored in time reversed order.
00069  * \par
00070  * <pre>
00071  *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
00072  * </pre>
00073  * \par
00074  * <code>pState</code> points to a state array of size <code>blockSize + phaseLength - 1</code>.
00075  * Samples in the state buffer are stored in the order:
00076  * \par
00077  * <pre>
00078  *    {x[n-phaseLength+1], x[n-phaseLength], x[n-phaseLength-1], x[n-phaseLength-2]....x[0], x[1], ..., x[blockSize-1]}
00079  * </pre>
00080  * The state variables are updated after each block of data is processed, the coefficients are untouched.
00081  *
00082  * \par Instance Structure
00083  * The coefficients and state variables for a filter are stored together in an instance data structure.
00084  * A separate instance structure must be defined for each filter.
00085  * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.
00086  * There are separate instance structure declarations for each of the 3 supported data types.
00087  *
00088  * \par Initialization Functions
00089  * There is also an associated initialization function for each data type.
00090  * The initialization function performs the following operations:
00091  * - Sets the values of the internal structure fields.
00092  * - Zeros out the values in the state buffer.
00093  * - Checks to make sure that the length of the filter is a multiple of the interpolation factor.
00094  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
00095  * L (interpolation factor), pCoeffs, phaseLength (numTaps / L), pState. Also set all of the values in pState to zero.
00096  *
00097  * \par
00098  * Use of the initialization function is optional.
00099  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
00100  * To place an instance structure into a const data section, the instance structure must be manually initialized.
00101  * The code below statically initializes each of the 3 different data type filter instance structures
00102  * <pre>
00103  * arm_fir_interpolate_instance_f32 S = {L, phaseLength, pCoeffs, pState};
00104  * arm_fir_interpolate_instance_q31 S = {L, phaseLength, pCoeffs, pState};
00105  * arm_fir_interpolate_instance_q15 S = {L, phaseLength, pCoeffs, pState};
00106  * </pre>
00107  * where <code>L</code> is the interpolation factor; <code>phaseLength=numTaps/L</code> is the
00108  * length of each of the shorter FIR filters used internally,
00109  * <code>pCoeffs</code> is the address of the coefficient buffer;
00110  * <code>pState</code> is the address of the state buffer.
00111  * Be sure to set the values in the state buffer to zeros when doing static initialization.
00112  *
00113  * \par Fixed-Point Behavior
00114  * Care must be taken when using the fixed-point versions of the FIR interpolate filter functions.
00115  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
00116  * Refer to the function specific documentation below for usage guidelines.
00117  */
00118 
00119 /**
00120  * @addtogroup FIR_Interpolate
00121  * @{
00122  */
00123 
00124 /**
00125  * @brief Processing function for the floating-point FIR interpolator.
00126  * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
00127  * @param[in] *pSrc     points to the block of input data.
00128  * @param[out] *pDst    points to the block of output data.
00129  * @param[in] blockSize number of input samples to process per call.
00130  * @return none.
00131  */
00132 #if defined (ARM_MATH_DSP)
00133 
00134   /* Run the below code for Cortex-M4 and Cortex-M3 */
00135 
00136 void arm_fir_interpolate_f32(
00137   const arm_fir_interpolate_instance_f32 * S,
00138   float32_t * pSrc,
00139   float32_t * pDst,
00140   uint32_t blockSize)
00141 {
00142   float32_t *pState = S->pState;                 /* State pointer */
00143   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
00144   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
00145   float32_t *ptr1, *ptr2;                        /* Temporary pointers for state and coefficient buffers */
00146   float32_t sum0;                                /* Accumulators */
00147   float32_t x0, c0;                              /* Temporary variables to hold state and coefficient values */
00148   uint32_t i, blkCnt, j;                         /* Loop counters */
00149   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
00150   float32_t acc0, acc1, acc2, acc3;
00151   float32_t x1, x2, x3;
00152   uint32_t blkCntN4;
00153   float32_t c1, c2, c3;
00154 
00155   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
00156   /* pStateCurnt points to the location where the new input data should be written */
00157   pStateCurnt = S->pState + (phaseLen - 1U);
00158 
00159   /* Initialise  blkCnt */
00160   blkCnt = blockSize / 4;
00161   blkCntN4 = blockSize - (4 * blkCnt);
00162 
00163   /* Samples loop unrolled by 4 */
00164   while (blkCnt > 0U)
00165   {
00166     /* Copy new input sample into the state buffer */
00167     *pStateCurnt++ = *pSrc++;
00168     *pStateCurnt++ = *pSrc++;
00169     *pStateCurnt++ = *pSrc++;
00170     *pStateCurnt++ = *pSrc++;
00171 
00172     /* Address modifier index of coefficient buffer */
00173     j = 1U;
00174 
00175     /* Loop over the Interpolation factor. */
00176     i = (S->L);
00177 
00178     while (i > 0U)
00179     {
00180       /* Set accumulator to zero */
00181       acc0 = 0.0f;
00182       acc1 = 0.0f;
00183       acc2 = 0.0f;
00184       acc3 = 0.0f;
00185 
00186       /* Initialize state pointer */
00187       ptr1 = pState;
00188 
00189       /* Initialize coefficient pointer */
00190       ptr2 = pCoeffs + (S->L - j);
00191 
00192       /* Loop over the polyPhase length. Unroll by a factor of 4.
00193        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
00194       tapCnt = phaseLen >> 2U;
00195 
00196       x0 = *(ptr1++);
00197       x1 = *(ptr1++);
00198       x2 = *(ptr1++);
00199 
00200       while (tapCnt > 0U)
00201       {
00202 
00203         /* Read the input sample */
00204         x3 = *(ptr1++);
00205 
00206         /* Read the coefficient */
00207         c0 = *(ptr2);
00208 
00209         /* Perform the multiply-accumulate */
00210         acc0 += x0 * c0;
00211         acc1 += x1 * c0;
00212         acc2 += x2 * c0;
00213         acc3 += x3 * c0;
00214 
00215         /* Read the coefficient */
00216         c1 = *(ptr2 + S->L);
00217 
00218         /* Read the input sample */
00219         x0 = *(ptr1++);
00220 
00221         /* Perform the multiply-accumulate */
00222         acc0 += x1 * c1;
00223         acc1 += x2 * c1;
00224         acc2 += x3 * c1;
00225         acc3 += x0 * c1;
00226 
00227         /* Read the coefficient */
00228         c2 = *(ptr2 + S->L * 2);
00229 
00230         /* Read the input sample */
00231         x1 = *(ptr1++);
00232 
00233         /* Perform the multiply-accumulate */
00234         acc0 += x2 * c2;
00235         acc1 += x3 * c2;
00236         acc2 += x0 * c2;
00237         acc3 += x1 * c2;
00238 
00239         /* Read the coefficient */
00240         c3 = *(ptr2 + S->L * 3);
00241 
00242         /* Read the input sample */
00243         x2 = *(ptr1++);
00244 
00245         /* Perform the multiply-accumulate */
00246         acc0 += x3 * c3;
00247         acc1 += x0 * c3;
00248         acc2 += x1 * c3;
00249         acc3 += x2 * c3;
00250 
00251 
00252         /* Upsampling is done by stuffing L-1 zeros between each sample.
00253          * So instead of multiplying zeros with coefficients,
00254          * Increment the coefficient pointer by interpolation factor times. */
00255         ptr2 += 4 * S->L;
00256 
00257         /* Decrement the loop counter */
00258         tapCnt--;
00259       }
00260 
00261       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
00262       tapCnt = phaseLen % 0x4U;
00263 
00264       while (tapCnt > 0U)
00265       {
00266 
00267         /* Read the input sample */
00268         x3 = *(ptr1++);
00269 
00270         /* Read the coefficient */
00271         c0 = *(ptr2);
00272 
00273         /* Perform the multiply-accumulate */
00274         acc0 += x0 * c0;
00275         acc1 += x1 * c0;
00276         acc2 += x2 * c0;
00277         acc3 += x3 * c0;
00278 
00279         /* Increment the coefficient pointer by interpolation factor times. */
00280         ptr2 += S->L;
00281 
00282         /* update states for next sample processing */
00283         x0 = x1;
00284         x1 = x2;
00285         x2 = x3;
00286 
00287         /* Decrement the loop counter */
00288         tapCnt--;
00289       }
00290 
00291       /* The result is in the accumulator, store in the destination buffer. */
00292       *pDst = acc0;
00293       *(pDst + S->L) = acc1;
00294       *(pDst + 2 * S->L) = acc2;
00295       *(pDst + 3 * S->L) = acc3;
00296 
00297       pDst++;
00298 
00299       /* Increment the address modifier index of coefficient buffer */
00300       j++;
00301 
00302       /* Decrement the loop counter */
00303       i--;
00304     }
00305 
00306     /* Advance the state pointer by 1
00307      * to process the next group of interpolation factor number samples */
00308     pState = pState + 4;
00309 
00310     pDst += S->L * 3;
00311 
00312     /* Decrement the loop counter */
00313     blkCnt--;
00314   }
00315 
00316   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
00317    ** No loop unrolling is used. */
00318 
00319   while (blkCntN4 > 0U)
00320   {
00321     /* Copy new input sample into the state buffer */
00322     *pStateCurnt++ = *pSrc++;
00323 
00324     /* Address modifier index of coefficient buffer */
00325     j = 1U;
00326 
00327     /* Loop over the Interpolation factor. */
00328     i = S->L;
00329     while (i > 0U)
00330     {
00331       /* Set accumulator to zero */
00332       sum0 = 0.0f;
00333 
00334       /* Initialize state pointer */
00335       ptr1 = pState;
00336 
00337       /* Initialize coefficient pointer */
00338       ptr2 = pCoeffs + (S->L - j);
00339 
00340       /* Loop over the polyPhase length. Unroll by a factor of 4.
00341        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
00342       tapCnt = phaseLen >> 2U;
00343       while (tapCnt > 0U)
00344       {
00345 
00346         /* Read the coefficient */
00347         c0 = *(ptr2);
00348 
00349         /* Upsampling is done by stuffing L-1 zeros between each sample.
00350          * So instead of multiplying zeros with coefficients,
00351          * Increment the coefficient pointer by interpolation factor times. */
00352         ptr2 += S->L;
00353 
00354         /* Read the input sample */
00355         x0 = *(ptr1++);
00356 
00357         /* Perform the multiply-accumulate */
00358         sum0 += x0 * c0;
00359 
00360         /* Read the coefficient */
00361         c0 = *(ptr2);
00362 
00363         /* Increment the coefficient pointer by interpolation factor times. */
00364         ptr2 += S->L;
00365 
00366         /* Read the input sample */
00367         x0 = *(ptr1++);
00368 
00369         /* Perform the multiply-accumulate */
00370         sum0 += x0 * c0;
00371 
00372         /* Read the coefficient */
00373         c0 = *(ptr2);
00374 
00375         /* Increment the coefficient pointer by interpolation factor times. */
00376         ptr2 += S->L;
00377 
00378         /* Read the input sample */
00379         x0 = *(ptr1++);
00380 
00381         /* Perform the multiply-accumulate */
00382         sum0 += x0 * c0;
00383 
00384         /* Read the coefficient */
00385         c0 = *(ptr2);
00386 
00387         /* Increment the coefficient pointer by interpolation factor times. */
00388         ptr2 += S->L;
00389 
00390         /* Read the input sample */
00391         x0 = *(ptr1++);
00392 
00393         /* Perform the multiply-accumulate */
00394         sum0 += x0 * c0;
00395 
00396         /* Decrement the loop counter */
00397         tapCnt--;
00398       }
00399 
00400       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
00401       tapCnt = phaseLen % 0x4U;
00402 
00403       while (tapCnt > 0U)
00404       {
00405         /* Perform the multiply-accumulate */
00406         sum0 += *(ptr1++) * (*ptr2);
00407 
00408         /* Increment the coefficient pointer by interpolation factor times. */
00409         ptr2 += S->L;
00410 
00411         /* Decrement the loop counter */
00412         tapCnt--;
00413       }
00414 
00415       /* The result is in the accumulator, store in the destination buffer. */
00416       *pDst++ = sum0;
00417 
00418       /* Increment the address modifier index of coefficient buffer */
00419       j++;
00420 
00421       /* Decrement the loop counter */
00422       i--;
00423     }
00424 
00425     /* Advance the state pointer by 1
00426      * to process the next group of interpolation factor number samples */
00427     pState = pState + 1;
00428 
00429     /* Decrement the loop counter */
00430     blkCntN4--;
00431   }
00432 
00433   /* Processing is complete.
00434    ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
00435    ** This prepares the state buffer for the next function call. */
00436 
00437   /* Points to the start of the state buffer */
00438   pStateCurnt = S->pState;
00439 
00440   tapCnt = (phaseLen - 1U) >> 2U;
00441 
00442   /* copy data */
00443   while (tapCnt > 0U)
00444   {
00445     *pStateCurnt++ = *pState++;
00446     *pStateCurnt++ = *pState++;
00447     *pStateCurnt++ = *pState++;
00448     *pStateCurnt++ = *pState++;
00449 
00450     /* Decrement the loop counter */
00451     tapCnt--;
00452   }
00453 
00454   tapCnt = (phaseLen - 1U) % 0x04U;
00455 
00456   /* copy data */
00457   while (tapCnt > 0U)
00458   {
00459     *pStateCurnt++ = *pState++;
00460 
00461     /* Decrement the loop counter */
00462     tapCnt--;
00463   }
00464 }
00465 
00466 #else
00467 
00468   /* Run the below code for Cortex-M0 */
00469 
00470 void arm_fir_interpolate_f32(
00471   const arm_fir_interpolate_instance_f32 * S,
00472   float32_t * pSrc,
00473   float32_t * pDst,
00474   uint32_t blockSize)
00475 {
00476   float32_t *pState = S->pState;                 /* State pointer */
00477   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
00478   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
00479   float32_t *ptr1, *ptr2;                        /* Temporary pointers for state and coefficient buffers */
00480 
00481 
00482   float32_t sum;                                 /* Accumulator */
00483   uint32_t i, blkCnt;                            /* Loop counters */
00484   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
00485 
00486 
00487   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
00488   /* pStateCurnt points to the location where the new input data should be written */
00489   pStateCurnt = S->pState + (phaseLen - 1U);
00490 
00491   /* Total number of intput samples */
00492   blkCnt = blockSize;
00493 
00494   /* Loop over the blockSize. */
00495   while (blkCnt > 0U)
00496   {
00497     /* Copy new input sample into the state buffer */
00498     *pStateCurnt++ = *pSrc++;
00499 
00500     /* Loop over the Interpolation factor. */
00501     i = S->L;
00502 
00503     while (i > 0U)
00504     {
00505       /* Set accumulator to zero */
00506       sum = 0.0f;
00507 
00508       /* Initialize state pointer */
00509       ptr1 = pState;
00510 
00511       /* Initialize coefficient pointer */
00512       ptr2 = pCoeffs + (i - 1U);
00513 
00514       /* Loop over the polyPhase length */
00515       tapCnt = phaseLen;
00516 
00517       while (tapCnt > 0U)
00518       {
00519         /* Perform the multiply-accumulate */
00520         sum += *ptr1++ * *ptr2;
00521 
00522         /* Increment the coefficient pointer by interpolation factor times. */
00523         ptr2 += S->L;
00524 
00525         /* Decrement the loop counter */
00526         tapCnt--;
00527       }
00528 
00529       /* The result is in the accumulator, store in the destination buffer. */
00530       *pDst++ = sum;
00531 
00532       /* Decrement the loop counter */
00533       i--;
00534     }
00535 
00536     /* Advance the state pointer by 1
00537      * to process the next group of interpolation factor number samples */
00538     pState = pState + 1;
00539 
00540     /* Decrement the loop counter */
00541     blkCnt--;
00542   }
00543 
00544   /* Processing is complete.
00545    ** Now copy the last phaseLen - 1 samples to the start of the state buffer.
00546    ** This prepares the state buffer for the next function call. */
00547 
00548   /* Points to the start of the state buffer */
00549   pStateCurnt = S->pState;
00550 
00551   tapCnt = phaseLen - 1U;
00552 
00553   while (tapCnt > 0U)
00554   {
00555     *pStateCurnt++ = *pState++;
00556 
00557     /* Decrement the loop counter */
00558     tapCnt--;
00559   }
00560 
00561 }
00562 
00563 #endif /*   #if defined (ARM_MATH_DSP) */
00564 
00565 
00566 
00567  /**
00568   * @} end of FIR_Interpolate group
00569   */
00570