CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_interpolate_q31.c Source File

arm_fir_interpolate_q31.c

00001 /*-----------------------------------------------------------------------------    
00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        19. March 2015
00005 * $Revision:    V.1.4.5
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_fir_interpolate_q31.c    
00009 *    
00010 * Description:  Q31 FIR interpolation.    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE.  
00039 * ---------------------------------------------------------------------------*/
00040 
00041 #include "arm_math.h"
00042 
00043 /**    
00044  * @ingroup groupFilters    
00045  */
00046 
00047 /**    
00048  * @addtogroup FIR_Interpolate    
00049  * @{    
00050  */
00051 
00052 /**    
00053  * @brief Processing function for the Q31 FIR interpolator.    
00054  * @param[in] *S        points to an instance of the Q31 FIR interpolator structure.    
00055  * @param[in] *pSrc     points to the block of input data.    
00056  * @param[out] *pDst    points to the block of output data.    
00057  * @param[in] blockSize number of input samples to process per call.    
00058  * @return none.    
00059  *    
00060  * <b>Scaling and Overflow Behavior:</b>    
00061  * \par    
00062  * The function is implemented using an internal 64-bit accumulator.    
00063  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.    
00064  * Thus, if the accumulator result overflows it wraps around rather than clip.    
00065  * In order to avoid overflows completely the input signal must be scaled down by <code>1/(numTaps/L)</code>.    
00066  * since <code>numTaps/L</code> additions occur per output sample.    
00067  * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.    
00068  */
00069 
00070 #ifndef ARM_MATH_CM0_FAMILY
00071 
00072   /* Run the below code for Cortex-M4 and Cortex-M3 */
00073 
00074 void arm_fir_interpolate_q31(
00075   const arm_fir_interpolate_instance_q31 * S,
00076   q31_t * pSrc,
00077   q31_t * pDst,
00078   uint32_t blockSize)
00079 {
00080   q31_t *pState = S->pState;                     /* State pointer */
00081   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00082   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
00083   q31_t *ptr1, *ptr2;                            /* Temporary pointers for state and coefficient buffers */
00084   q63_t sum0;                                    /* Accumulators */
00085   q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */
00086   uint32_t i, blkCnt, j;                         /* Loop counters */
00087   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
00088 
00089   uint32_t blkCntN2;
00090   q63_t acc0, acc1;
00091   q31_t x1;
00092 
00093   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
00094   /* pStateCurnt points to the location where the new input data should be written */
00095   pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
00096 
00097   /* Initialise  blkCnt */
00098   blkCnt = blockSize / 2;
00099   blkCntN2 = blockSize - (2 * blkCnt);
00100 
00101   /* Samples loop unrolled by 2 */
00102   while(blkCnt > 0u)
00103   {
00104     /* Copy new input sample into the state buffer */
00105     *pStateCurnt++ = *pSrc++;
00106     *pStateCurnt++ = *pSrc++;
00107 
00108     /* Address modifier index of coefficient buffer */
00109     j = 1u;
00110 
00111     /* Loop over the Interpolation factor. */
00112     i = (S->L);
00113 
00114     while(i > 0u)
00115     {
00116       /* Set accumulator to zero */
00117       acc0 = 0;
00118       acc1 = 0;
00119 
00120       /* Initialize state pointer */
00121       ptr1 = pState;
00122 
00123       /* Initialize coefficient pointer */
00124       ptr2 = pCoeffs + (S->L - j);
00125 
00126       /* Loop over the polyPhase length. Unroll by a factor of 4.        
00127        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
00128       tapCnt = phaseLen >> 2u;
00129 
00130       x0 = *(ptr1++);
00131 
00132       while(tapCnt > 0u)
00133       {
00134 
00135         /* Read the input sample */
00136         x1 = *(ptr1++);
00137 
00138         /* Read the coefficient */
00139         c0 = *(ptr2);
00140 
00141         /* Perform the multiply-accumulate */
00142         acc0 += (q63_t) x0 *c0;
00143         acc1 += (q63_t) x1 *c0;
00144 
00145 
00146         /* Read the coefficient */
00147         c0 = *(ptr2 + S->L);
00148 
00149         /* Read the input sample */
00150         x0 = *(ptr1++);
00151 
00152         /* Perform the multiply-accumulate */
00153         acc0 += (q63_t) x1 *c0;
00154         acc1 += (q63_t) x0 *c0;
00155 
00156 
00157         /* Read the coefficient */
00158         c0 = *(ptr2 + S->L * 2);
00159 
00160         /* Read the input sample */
00161         x1 = *(ptr1++);
00162 
00163         /* Perform the multiply-accumulate */
00164         acc0 += (q63_t) x0 *c0;
00165         acc1 += (q63_t) x1 *c0;
00166 
00167         /* Read the coefficient */
00168         c0 = *(ptr2 + S->L * 3);
00169 
00170         /* Read the input sample */
00171         x0 = *(ptr1++);
00172 
00173         /* Perform the multiply-accumulate */
00174         acc0 += (q63_t) x1 *c0;
00175         acc1 += (q63_t) x0 *c0;
00176 
00177 
00178         /* Upsampling is done by stuffing L-1 zeros between each sample.        
00179          * So instead of multiplying zeros with coefficients,        
00180          * Increment the coefficient pointer by interpolation factor times. */
00181         ptr2 += 4 * S->L;
00182 
00183         /* Decrement the loop counter */
00184         tapCnt--;
00185       }
00186 
00187       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
00188       tapCnt = phaseLen % 0x4u;
00189 
00190       while(tapCnt > 0u)
00191       {
00192 
00193         /* Read the input sample */
00194         x1 = *(ptr1++);
00195 
00196         /* Read the coefficient */
00197         c0 = *(ptr2);
00198 
00199         /* Perform the multiply-accumulate */
00200         acc0 += (q63_t) x0 *c0;
00201         acc1 += (q63_t) x1 *c0;
00202 
00203         /* Increment the coefficient pointer by interpolation factor times. */
00204         ptr2 += S->L;
00205 
00206         /* update states for next sample processing */
00207         x0 = x1;
00208 
00209         /* Decrement the loop counter */
00210         tapCnt--;
00211       }
00212 
00213       /* The result is in the accumulator, store in the destination buffer. */
00214       *pDst = (q31_t) (acc0 >> 31);
00215       *(pDst + S->L) = (q31_t) (acc1 >> 31);
00216 
00217 
00218       pDst++;
00219 
00220       /* Increment the address modifier index of coefficient buffer */
00221       j++;
00222 
00223       /* Decrement the loop counter */
00224       i--;
00225     }
00226 
00227     /* Advance the state pointer by 1        
00228      * to process the next group of interpolation factor number samples */
00229     pState = pState + 2;
00230 
00231     pDst += S->L;
00232 
00233     /* Decrement the loop counter */
00234     blkCnt--;
00235   }
00236 
00237   /* If the blockSize is not a multiple of 2, compute any remaining output samples here.        
00238    ** No loop unrolling is used. */
00239   blkCnt = blkCntN2;
00240 
00241   /* Loop over the blockSize. */
00242   while(blkCnt > 0u)
00243   {
00244     /* Copy new input sample into the state buffer */
00245     *pStateCurnt++ = *pSrc++;
00246 
00247     /* Address modifier index of coefficient buffer */
00248     j = 1u;
00249 
00250     /* Loop over the Interpolation factor. */
00251     i = S->L;
00252     while(i > 0u)
00253     {
00254       /* Set accumulator to zero */
00255       sum0 = 0;
00256 
00257       /* Initialize state pointer */
00258       ptr1 = pState;
00259 
00260       /* Initialize coefficient pointer */
00261       ptr2 = pCoeffs + (S->L - j);
00262 
00263       /* Loop over the polyPhase length. Unroll by a factor of 4.        
00264        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
00265       tapCnt = phaseLen >> 2;
00266       while(tapCnt > 0u)
00267       {
00268 
00269         /* Read the coefficient */
00270         c0 = *(ptr2);
00271 
00272         /* Upsampling is done by stuffing L-1 zeros between each sample.        
00273          * So instead of multiplying zeros with coefficients,        
00274          * Increment the coefficient pointer by interpolation factor times. */
00275         ptr2 += S->L;
00276 
00277         /* Read the input sample */
00278         x0 = *(ptr1++);
00279 
00280         /* Perform the multiply-accumulate */
00281         sum0 += (q63_t) x0 *c0;
00282 
00283         /* Read the coefficient */
00284         c0 = *(ptr2);
00285 
00286         /* Increment the coefficient pointer by interpolation factor times. */
00287         ptr2 += S->L;
00288 
00289         /* Read the input sample */
00290         x0 = *(ptr1++);
00291 
00292         /* Perform the multiply-accumulate */
00293         sum0 += (q63_t) x0 *c0;
00294 
00295         /* Read the coefficient */
00296         c0 = *(ptr2);
00297 
00298         /* Increment the coefficient pointer by interpolation factor times. */
00299         ptr2 += S->L;
00300 
00301         /* Read the input sample */
00302         x0 = *(ptr1++);
00303 
00304         /* Perform the multiply-accumulate */
00305         sum0 += (q63_t) x0 *c0;
00306 
00307         /* Read the coefficient */
00308         c0 = *(ptr2);
00309 
00310         /* Increment the coefficient pointer by interpolation factor times. */
00311         ptr2 += S->L;
00312 
00313         /* Read the input sample */
00314         x0 = *(ptr1++);
00315 
00316         /* Perform the multiply-accumulate */
00317         sum0 += (q63_t) x0 *c0;
00318 
00319         /* Decrement the loop counter */
00320         tapCnt--;
00321       }
00322 
00323       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
00324       tapCnt = phaseLen & 0x3u;
00325 
00326       while(tapCnt > 0u)
00327       {
00328         /* Read the coefficient */
00329         c0 = *(ptr2);
00330 
00331         /* Increment the coefficient pointer by interpolation factor times. */
00332         ptr2 += S->L;
00333 
00334         /* Read the input sample */
00335         x0 = *(ptr1++);
00336 
00337         /* Perform the multiply-accumulate */
00338         sum0 += (q63_t) x0 *c0;
00339 
00340         /* Decrement the loop counter */
00341         tapCnt--;
00342       }
00343 
00344       /* The result is in the accumulator, store in the destination buffer. */
00345       *pDst++ = (q31_t) (sum0 >> 31);
00346 
00347       /* Increment the address modifier index of coefficient buffer */
00348       j++;
00349 
00350       /* Decrement the loop counter */
00351       i--;
00352     }
00353 
00354     /* Advance the state pointer by 1        
00355      * to process the next group of interpolation factor number samples */
00356     pState = pState + 1;
00357 
00358     /* Decrement the loop counter */
00359     blkCnt--;
00360   }
00361 
00362   /* Processing is complete.        
00363    ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.        
00364    ** This prepares the state buffer for the next function call. */
00365 
00366   /* Points to the start of the state buffer */
00367   pStateCurnt = S->pState;
00368 
00369   tapCnt = (phaseLen - 1u) >> 2u;
00370 
00371   /* copy data */
00372   while(tapCnt > 0u)
00373   {
00374     *pStateCurnt++ = *pState++;
00375     *pStateCurnt++ = *pState++;
00376     *pStateCurnt++ = *pState++;
00377     *pStateCurnt++ = *pState++;
00378 
00379     /* Decrement the loop counter */
00380     tapCnt--;
00381   }
00382 
00383   tapCnt = (phaseLen - 1u) % 0x04u;
00384 
00385   /* copy data */
00386   while(tapCnt > 0u)
00387   {
00388     *pStateCurnt++ = *pState++;
00389 
00390     /* Decrement the loop counter */
00391     tapCnt--;
00392   }
00393 
00394 }
00395 
00396 
00397 #else
00398 
00399 void arm_fir_interpolate_q31(
00400   const arm_fir_interpolate_instance_q31 * S,
00401   q31_t * pSrc,
00402   q31_t * pDst,
00403   uint32_t blockSize)
00404 {
00405   q31_t *pState = S->pState;                     /* State pointer */
00406   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00407   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
00408   q31_t *ptr1, *ptr2;                            /* Temporary pointers for state and coefficient buffers */
00409 
00410   /* Run the below code for Cortex-M0 */
00411 
00412   q63_t sum;                                     /* Accumulator */
00413   q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */
00414   uint32_t i, blkCnt;                            /* Loop counters */
00415   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
00416 
00417 
00418   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
00419   /* pStateCurnt points to the location where the new input data should be written */
00420   pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
00421 
00422   /* Total number of intput samples */
00423   blkCnt = blockSize;
00424 
00425   /* Loop over the blockSize. */
00426   while(blkCnt > 0u)
00427   {
00428     /* Copy new input sample into the state buffer */
00429     *pStateCurnt++ = *pSrc++;
00430 
00431     /* Loop over the Interpolation factor. */
00432     i = S->L;
00433 
00434     while(i > 0u)
00435     {
00436       /* Set accumulator to zero */
00437       sum = 0;
00438 
00439       /* Initialize state pointer */
00440       ptr1 = pState;
00441 
00442       /* Initialize coefficient pointer */
00443       ptr2 = pCoeffs + (i - 1u);
00444 
00445       tapCnt = phaseLen;
00446 
00447       while(tapCnt > 0u)
00448       {
00449         /* Read the coefficient */
00450         c0 = *(ptr2);
00451 
00452         /* Increment the coefficient pointer by interpolation factor times. */
00453         ptr2 += S->L;
00454 
00455         /* Read the input sample */
00456         x0 = *ptr1++;
00457 
00458         /* Perform the multiply-accumulate */
00459         sum += (q63_t) x0 *c0;
00460 
00461         /* Decrement the loop counter */
00462         tapCnt--;
00463       }
00464 
00465       /* The result is in the accumulator, store in the destination buffer. */
00466       *pDst++ = (q31_t) (sum >> 31);
00467 
00468       /* Decrement the loop counter */
00469       i--;
00470     }
00471 
00472     /* Advance the state pointer by 1           
00473      * to process the next group of interpolation factor number samples */
00474     pState = pState + 1;
00475 
00476     /* Decrement the loop counter */
00477     blkCnt--;
00478   }
00479 
00480   /* Processing is complete.         
00481    ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.       
00482    ** This prepares the state buffer for the next function call. */
00483 
00484   /* Points to the start of the state buffer */
00485   pStateCurnt = S->pState;
00486 
00487   tapCnt = phaseLen - 1u;
00488 
00489   /* copy data */
00490   while(tapCnt > 0u)
00491   {
00492     *pStateCurnt++ = *pState++;
00493 
00494     /* Decrement the loop counter */
00495     tapCnt--;
00496   }
00497 
00498 }
00499 
00500 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
00501 
00502  /**    
00503   * @} end of FIR_Interpolate group    
00504   */