Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_fir_decimate_fast_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_decimate_fast_q31.c 00009 * 00010 * Description: Fast Q31 FIR Decimator. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * -------------------------------------------------------------------- */ 00026 00027 #include "arm_math.h" 00028 00029 /** 00030 * @ingroup groupFilters 00031 */ 00032 00033 /** 00034 * @addtogroup FIR_decimate 00035 * @{ 00036 */ 00037 00038 /** 00039 * @brief Processing function for the Q31 FIR decimator (fast variant). 00040 * @param[in] *S points to an instance of the Q31 FIR decimator structure. 00041 * @param[in] *pSrc points to the block of input data. 00042 * @param[out] *pDst points to the block of output data 00043 * @param[in] blockSize number of input samples to process per call. 00044 * @return none 00045 * 00046 * <b>Scaling and Overflow Behavior:</b> 00047 * 00048 * \par 00049 * This function is optimized for speed at the expense of fixed-point precision and overflow protection. 00050 * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format. 00051 * These intermediate results are added to a 2.30 accumulator. 00052 * Finally, the accumulator is saturated and converted to a 1.31 result. 00053 * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result. 00054 * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (where log2 is read as log to the base 2). 00055 * 00056 * \par 00057 * Refer to the function <code>arm_fir_decimate_q31()</code> for a slower implementation of this function which uses a 64-bit accumulator to provide higher precision. 00058 * Both the slow and the fast versions use the same instance structure. 00059 * Use the function <code>arm_fir_decimate_init_q31()</code> to initialize the filter structure. 00060 */ 00061 00062 void arm_fir_decimate_fast_q31( 00063 arm_fir_decimate_instance_q31 * S, 00064 q31_t * pSrc, 00065 q31_t * pDst, 00066 uint32_t blockSize) 00067 { 00068 q31_t *pState = S->pState; /* State pointer */ 00069 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00070 q31_t *pStateCurnt; /* Points to the current sample of the state */ 00071 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00072 q31_t *px; /* Temporary pointers for state buffer */ 00073 q31_t *pb; /* Temporary pointers for coefficient buffer */ 00074 q63_t sum0; /* Accumulator */ 00075 uint32_t numTaps = S->numTaps; /* Number of taps */ 00076 uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M; /* Loop counters */ 00077 00078 00079 /* S->pState buffer contains previous frame (numTaps - 1) samples */ 00080 /* pStateCurnt points to the location where the new input data should be written */ 00081 pStateCurnt = S->pState + (numTaps - 1u); 00082 00083 /* Total number of output samples to be computed */ 00084 blkCnt = outBlockSize; 00085 00086 while(blkCnt > 0u) 00087 { 00088 /* Copy decimation factor number of new input samples into the state buffer */ 00089 i = S->M; 00090 00091 do 00092 { 00093 *pStateCurnt++ = *pSrc++; 00094 00095 } while(--i); 00096 00097 /* Set accumulator to zero */ 00098 sum0 = 0; 00099 00100 /* Initialize state pointer */ 00101 px = pState; 00102 00103 /* Initialize coeff pointer */ 00104 pb = pCoeffs; 00105 00106 /* Loop unrolling. Process 4 taps at a time. */ 00107 tapCnt = numTaps >> 2; 00108 00109 /* Loop over the number of taps. Unroll by a factor of 4. 00110 ** Repeat until we've computed numTaps-4 coefficients. */ 00111 while(tapCnt > 0u) 00112 { 00113 /* Read the b[numTaps-1] coefficient */ 00114 c0 = *(pb++); 00115 00116 /* Read x[n-numTaps-1] sample */ 00117 x0 = *(px++); 00118 00119 /* Perform the multiply-accumulate */ 00120 sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 00121 00122 /* Read the b[numTaps-2] coefficient */ 00123 c0 = *(pb++); 00124 00125 /* Read x[n-numTaps-2] sample */ 00126 x0 = *(px++); 00127 00128 /* Perform the multiply-accumulate */ 00129 sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 00130 00131 /* Read the b[numTaps-3] coefficient */ 00132 c0 = *(pb++); 00133 00134 /* Read x[n-numTaps-3] sample */ 00135 x0 = *(px++); 00136 00137 /* Perform the multiply-accumulate */ 00138 sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 00139 00140 /* Read the b[numTaps-4] coefficient */ 00141 c0 = *(pb++); 00142 00143 /* Read x[n-numTaps-4] sample */ 00144 x0 = *(px++); 00145 00146 /* Perform the multiply-accumulate */ 00147 sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 00148 00149 /* Decrement the loop counter */ 00150 tapCnt--; 00151 } 00152 00153 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00154 tapCnt = numTaps % 0x4u; 00155 00156 while(tapCnt > 0u) 00157 { 00158 /* Read coefficients */ 00159 c0 = *(pb++); 00160 00161 /* Fetch 1 state variable */ 00162 x0 = *(px++); 00163 00164 /* Perform the multiply-accumulate */ 00165 sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 00166 00167 /* Decrement the loop counter */ 00168 tapCnt--; 00169 } 00170 00171 /* Advance the state pointer by the decimation factor 00172 * to process the next group of decimation factor number samples */ 00173 pState = pState + S->M; 00174 00175 /* The result is in the accumulator, store in the destination buffer. */ 00176 *pDst++ = (q31_t) (sum0 << 1); 00177 00178 /* Decrement the loop counter */ 00179 blkCnt--; 00180 } 00181 00182 /* Processing is complete. 00183 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00184 ** This prepares the state buffer for the next function call. */ 00185 00186 /* Points to the start of the state buffer */ 00187 pStateCurnt = S->pState; 00188 00189 i = (numTaps - 1u) >> 2u; 00190 00191 /* copy data */ 00192 while(i > 0u) 00193 { 00194 *pStateCurnt++ = *pState++; 00195 *pStateCurnt++ = *pState++; 00196 *pStateCurnt++ = *pState++; 00197 *pStateCurnt++ = *pState++; 00198 00199 /* Decrement the loop counter */ 00200 i--; 00201 } 00202 00203 i = (numTaps - 1u) % 0x04u; 00204 00205 /* copy data */ 00206 while(i > 0u) 00207 { 00208 *pStateCurnt++ = *pState++; 00209 00210 /* Decrement the loop counter */ 00211 i--; 00212 } 00213 } 00214 00215 /** 00216 * @} end of FIR_decimate group 00217 */
Generated on Tue Jul 12 2022 19:55:43 by
 1.7.2
 1.7.2 
    