CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_decimate_fast_q31.c Source File

arm_fir_decimate_fast_q31.c

00001 /* ----------------------------------------------------------------------  
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.  
00003 *  
00004 * $Date:        29. November 2010  
00005 * $Revision:    V1.0.3  
00006 *  
00007 * Project:      CMSIS DSP Library  
00008 * Title:        arm_fir_decimate_fast_q31.c  
00009 *  
00010 * Description:  Fast Q31 FIR Decimator.  
00011 *  
00012 * Target Processor: Cortex-M4/Cortex-M3
00013 *  
00014 * Version 1.0.3 2010/11/29 
00015 *    Re-organized the CMSIS folders and updated documentation.  
00016 *   
00017 * Version 1.0.2 2010/11/11  
00018 *    Documentation updated.   
00019 *  
00020 * Version 1.0.1 2010/10/05   
00021 *    Production release and review comments incorporated.  
00022 *  
00023 * Version 1.0.0 2010/09/20   
00024 *    Production release and review comments incorporated.  
00025 * -------------------------------------------------------------------- */ 
00026  
00027 #include "arm_math.h" 
00028  
00029 /**  
00030  * @ingroup groupFilters  
00031  */ 
00032  
00033 /**  
00034  * @addtogroup FIR_decimate  
00035  * @{  
00036  */ 
00037  
00038 /**  
00039  * @brief Processing function for the Q31 FIR decimator (fast variant).  
00040  * @param[in] *S points to an instance of the Q31 FIR decimator structure.  
00041  * @param[in] *pSrc points to the block of input data.  
00042  * @param[out] *pDst points to the block of output data  
00043  * @param[in] blockSize number of input samples to process per call.  
00044  * @return none  
00045  *  
00046  * <b>Scaling and Overflow Behavior:</b>  
00047  *  
00048  * \par  
00049  * This function is optimized for speed at the expense of fixed-point precision and overflow protection.  
00050  * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.  
00051  * These intermediate results are added to a 2.30 accumulator.  
00052  * Finally, the accumulator is saturated and converted to a 1.31 result.  
00053  * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.  
00054  * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (where log2 is read as log to the base 2).  
00055  *  
00056  * \par  
00057  * Refer to the function <code>arm_fir_decimate_q31()</code> for a slower implementation of this function which uses a 64-bit accumulator to provide higher precision.  
00058  * Both the slow and the fast versions use the same instance structure.  
00059  * Use the function <code>arm_fir_decimate_init_q31()</code> to initialize the filter structure.  
00060  */ 
00061  
00062 void arm_fir_decimate_fast_q31( 
00063   arm_fir_decimate_instance_q31 * S, 
00064   q31_t * pSrc, 
00065   q31_t * pDst, 
00066   uint32_t blockSize) 
00067 { 
00068   q31_t *pState = S->pState;                     /* State pointer */ 
00069   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
00070   q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
00071   q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */ 
00072   q31_t *px;                                     /* Temporary pointers for state buffer */ 
00073   q31_t *pb;                                     /* Temporary pointers for coefficient buffer */ 
00074   q63_t sum0;                                    /* Accumulator */ 
00075   uint32_t numTaps = S->numTaps;                 /* Number of taps */ 
00076   uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;  /* Loop counters */ 
00077  
00078  
00079   /* S->pState buffer contains previous frame (numTaps - 1) samples */ 
00080   /* pStateCurnt points to the location where the new input data should be written */ 
00081   pStateCurnt = S->pState + (numTaps - 1u); 
00082  
00083   /* Total number of output samples to be computed */ 
00084   blkCnt = outBlockSize; 
00085  
00086   while(blkCnt > 0u) 
00087   { 
00088     /* Copy decimation factor number of new input samples into the state buffer */ 
00089     i = S->M; 
00090  
00091     do 
00092     { 
00093       *pStateCurnt++ = *pSrc++; 
00094  
00095     } while(--i); 
00096  
00097     /* Set accumulator to zero */ 
00098     sum0 = 0; 
00099  
00100     /* Initialize state pointer */ 
00101     px = pState; 
00102  
00103     /* Initialize coeff pointer */ 
00104     pb = pCoeffs; 
00105  
00106     /* Loop unrolling.  Process 4 taps at a time. */ 
00107     tapCnt = numTaps >> 2; 
00108  
00109     /* Loop over the number of taps.  Unroll by a factor of 4.  
00110      ** Repeat until we've computed numTaps-4 coefficients. */ 
00111     while(tapCnt > 0u) 
00112     { 
00113       /* Read the b[numTaps-1] coefficient */ 
00114       c0 = *(pb++); 
00115  
00116       /* Read x[n-numTaps-1] sample */ 
00117       x0 = *(px++); 
00118  
00119       /* Perform the multiply-accumulate */ 
00120       sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
00121  
00122       /* Read the b[numTaps-2] coefficient */ 
00123       c0 = *(pb++); 
00124  
00125       /* Read x[n-numTaps-2] sample */ 
00126       x0 = *(px++); 
00127  
00128       /* Perform the multiply-accumulate */ 
00129       sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
00130  
00131       /* Read the b[numTaps-3] coefficient */ 
00132       c0 = *(pb++); 
00133  
00134       /* Read x[n-numTaps-3] sample */ 
00135       x0 = *(px++); 
00136  
00137       /* Perform the multiply-accumulate */ 
00138       sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
00139  
00140       /* Read the b[numTaps-4] coefficient */ 
00141       c0 = *(pb++); 
00142  
00143       /* Read x[n-numTaps-4] sample */ 
00144       x0 = *(px++); 
00145  
00146       /* Perform the multiply-accumulate */ 
00147       sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
00148  
00149       /* Decrement the loop counter */ 
00150       tapCnt--; 
00151     } 
00152  
00153     /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
00154     tapCnt = numTaps % 0x4u; 
00155  
00156     while(tapCnt > 0u) 
00157     { 
00158       /* Read coefficients */ 
00159       c0 = *(pb++); 
00160  
00161       /* Fetch 1 state variable */ 
00162       x0 = *(px++); 
00163  
00164       /* Perform the multiply-accumulate */ 
00165       sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
00166  
00167       /* Decrement the loop counter */ 
00168       tapCnt--; 
00169     } 
00170  
00171     /* Advance the state pointer by the decimation factor  
00172      * to process the next group of decimation factor number samples */ 
00173     pState = pState + S->M; 
00174  
00175     /* The result is in the accumulator, store in the destination buffer. */ 
00176     *pDst++ = (q31_t) (sum0 << 1); 
00177  
00178     /* Decrement the loop counter */ 
00179     blkCnt--; 
00180   } 
00181  
00182   /* Processing is complete.  
00183    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
00184    ** This prepares the state buffer for the next function call. */ 
00185  
00186   /* Points to the start of the state buffer */ 
00187   pStateCurnt = S->pState; 
00188  
00189   i = (numTaps - 1u) >> 2u; 
00190  
00191   /* copy data */ 
00192   while(i > 0u) 
00193   { 
00194     *pStateCurnt++ = *pState++; 
00195     *pStateCurnt++ = *pState++; 
00196     *pStateCurnt++ = *pState++; 
00197     *pStateCurnt++ = *pState++; 
00198  
00199     /* Decrement the loop counter */ 
00200     i--; 
00201   } 
00202  
00203   i = (numTaps - 1u) % 0x04u; 
00204  
00205   /* copy data */ 
00206   while(i > 0u) 
00207   { 
00208     *pStateCurnt++ = *pState++; 
00209  
00210     /* Decrement the loop counter */ 
00211     i--; 
00212   } 
00213 } 
00214  
00215 /**  
00216  * @} end of FIR_decimate group  
00217  */