Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_fir_decimate_fast_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_decimate_fast_q15.c 00009 * 00010 * Description: Fast Q15 FIR Decimator. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * -------------------------------------------------------------------- */ 00026 00027 #include "arm_math.h" 00028 00029 /** 00030 * @ingroup groupFilters 00031 */ 00032 00033 /** 00034 * @addtogroup FIR_decimate 00035 * @{ 00036 */ 00037 00038 /** 00039 * @brief Processing function for the Q15 FIR decimator (fast variant). 00040 * @param[in] *S points to an instance of the Q15 FIR decimator structure. 00041 * @param[in] *pSrc points to the block of input data. 00042 * @param[out] *pDst points to the block of output data 00043 * @param[in] blockSize number of input samples to process per call. 00044 * @return none 00045 * 00046 * <b>Scaling and Overflow Behavior:</b> 00047 * \par 00048 * This fast version uses a 32-bit accumulator with 2.30 format. 00049 * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit. 00050 * Thus, if the accumulator result overflows it wraps around and distorts the result. 00051 * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (log2 is read as log to the base 2). 00052 * The 2.30 accumulator is then truncated to 2.15 format and saturated to yield the 1.15 result. 00053 * 00054 * \par 00055 * Refer to the function <code>arm_fir_decimate_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. 00056 * Both the slow and the fast versions use the same instance structure. 00057 * Use the function <code>arm_fir_decimate_init_q15()</code> to initialize the filter structure. 00058 */ 00059 00060 void arm_fir_decimate_fast_q15( 00061 const arm_fir_decimate_instance_q15 * S, 00062 q15_t * pSrc, 00063 q15_t * pDst, 00064 uint32_t blockSize) 00065 { 00066 q15_t *pState = S->pState; /* State pointer */ 00067 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00068 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00069 q15_t *px; /* Temporary pointer for state buffer */ 00070 q15_t *pb; /* Temporary pointer coefficient buffer */ 00071 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00072 q31_t sum0; /* Accumulators */ 00073 uint32_t numTaps = S->numTaps; /* Number of taps */ 00074 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */ 00075 00076 00077 /* S->pState buffer contains previous frame (numTaps - 1) samples */ 00078 /* pStateCurnt points to the location where the new input data should be written */ 00079 pStateCurnt = S->pState + (numTaps - 1u); 00080 00081 /* Total number of output samples to be computed */ 00082 blkCnt = outBlockSize; 00083 00084 while(blkCnt > 0u) 00085 { 00086 /* Copy decimation factor number of new input samples into the state buffer */ 00087 i = S->M; 00088 00089 do 00090 { 00091 *pStateCurnt++ = *pSrc++; 00092 00093 } while(--i); 00094 00095 /*Set sum to zero */ 00096 sum0 = 0; 00097 00098 /* Initialize state pointer */ 00099 px = pState; 00100 00101 /* Initialize coeff pointer */ 00102 pb = pCoeffs; 00103 00104 /* Loop unrolling. Process 4 taps at a time. */ 00105 tapCnt = numTaps >> 2; 00106 00107 /* Loop over the number of taps. Unroll by a factor of 4. 00108 ** Repeat until we've computed numTaps-4 coefficients. */ 00109 while(tapCnt > 0u) 00110 { 00111 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */ 00112 c0 = *__SIMD32(pb)++; 00113 00114 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */ 00115 x0 = *__SIMD32(px)++; 00116 00117 /* Perform the multiply-accumulate */ 00118 sum0 = __SMLAD(x0, c0, sum0); 00119 00120 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */ 00121 c0 = *__SIMD32(pb)++; 00122 00123 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */ 00124 x0 = *__SIMD32(px)++; 00125 00126 /* Perform the multiply-accumulate */ 00127 sum0 = __SMLAD(x0, c0, sum0); 00128 00129 /* Decrement the loop counter */ 00130 tapCnt--; 00131 } 00132 00133 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00134 tapCnt = numTaps % 0x4u; 00135 00136 while(tapCnt > 0u) 00137 { 00138 /* Read coefficients */ 00139 c0 = *pb++; 00140 00141 /* Fetch 1 state variable */ 00142 x0 = *px++; 00143 00144 /* Perform the multiply-accumulate */ 00145 sum0 = __SMLAD(x0, c0, sum0); 00146 00147 /* Decrement the loop counter */ 00148 tapCnt--; 00149 } 00150 00151 /* Advance the state pointer by the decimation factor 00152 * to process the next group of decimation factor number samples */ 00153 pState = pState + S->M; 00154 00155 /* Store filter output , smlad returns the values in 2.14 format */ 00156 /* so downsacle by 15 to get output in 1.15 */ 00157 *pDst++ = (q15_t) ((sum0 >> 15)); 00158 00159 /* Decrement the loop counter */ 00160 blkCnt--; 00161 } 00162 00163 /* Processing is complete. 00164 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00165 ** This prepares the state buffer for the next function call. */ 00166 00167 /* Points to the start of the state buffer */ 00168 pStateCurnt = S->pState; 00169 00170 i = (numTaps - 1u) >> 2u; 00171 00172 /* copy data */ 00173 while(i > 0u) 00174 { 00175 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00176 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00177 00178 /* Decrement the loop counter */ 00179 i--; 00180 } 00181 00182 i = (numTaps - 1u) % 0x04u; 00183 00184 /* copy data */ 00185 while(i > 0u) 00186 { 00187 *pStateCurnt++ = *pState++; 00188 00189 /* Decrement the loop counter */ 00190 i--; 00191 } 00192 } 00193 00194 /** 00195 * @} end of FIR_decimate group 00196 */
Generated on Tue Jul 12 2022 19:55:43 by
1.7.2
