Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_fir_decimate_fast_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_decimate_fast_q31.c 00009 * 00010 * Description: Fast Q31 FIR Decimator. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR_decimate 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4. 00054 * @param[in] *S points to an instance of the Q31 FIR decimator structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[out] *pDst points to the block of output data 00057 * @param[in] blockSize number of input samples to process per call. 00058 * @return none 00059 * 00060 * <b>Scaling and Overflow Behavior:</b> 00061 * 00062 * \par 00063 * This function is optimized for speed at the expense of fixed-point precision and overflow protection. 00064 * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format. 00065 * These intermediate results are added to a 2.30 accumulator. 00066 * Finally, the accumulator is saturated and converted to a 1.31 result. 00067 * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result. 00068 * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (where log2 is read as log to the base 2). 00069 * 00070 * \par 00071 * Refer to the function <code>arm_fir_decimate_q31()</code> for a slower implementation of this function which uses a 64-bit accumulator to provide higher precision. 00072 * Both the slow and the fast versions use the same instance structure. 00073 * Use the function <code>arm_fir_decimate_init_q31()</code> to initialize the filter structure. 00074 */ 00075 00076 void arm_fir_decimate_fast_q31( 00077 arm_fir_decimate_instance_q31 * S, 00078 q31_t * pSrc, 00079 q31_t * pDst, 00080 uint32_t blockSize) 00081 { 00082 q31_t *pState = S->pState; /* State pointer */ 00083 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00084 q31_t *pStateCurnt; /* Points to the current sample of the state */ 00085 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00086 q31_t *px; /* Temporary pointers for state buffer */ 00087 q31_t *pb; /* Temporary pointers for coefficient buffer */ 00088 q31_t sum0; /* Accumulator */ 00089 uint32_t numTaps = S->numTaps; /* Number of taps */ 00090 uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M; /* Loop counters */ 00091 uint32_t blkCntN2; 00092 q31_t x1; 00093 q31_t acc0, acc1; 00094 q31_t *px0, *px1; 00095 00096 /* S->pState buffer contains previous frame (numTaps - 1) samples */ 00097 /* pStateCurnt points to the location where the new input data should be written */ 00098 pStateCurnt = S->pState + (numTaps - 1u); 00099 00100 /* Total number of output samples to be computed */ 00101 00102 blkCnt = outBlockSize / 2; 00103 blkCntN2 = outBlockSize - (2 * blkCnt); 00104 00105 while(blkCnt > 0u) 00106 { 00107 /* Copy decimation factor number of new input samples into the state buffer */ 00108 i = 2 * S->M; 00109 00110 do 00111 { 00112 *pStateCurnt++ = *pSrc++; 00113 00114 } while(--i); 00115 00116 /* Set accumulator to zero */ 00117 acc0 = 0; 00118 acc1 = 0; 00119 00120 /* Initialize state pointer */ 00121 px0 = pState; 00122 px1 = pState + S->M; 00123 00124 /* Initialize coeff pointer */ 00125 pb = pCoeffs; 00126 00127 /* Loop unrolling. Process 4 taps at a time. */ 00128 tapCnt = numTaps >> 2; 00129 00130 /* Loop over the number of taps. Unroll by a factor of 4. 00131 ** Repeat until we've computed numTaps-4 coefficients. */ 00132 while(tapCnt > 0u) 00133 { 00134 /* Read the b[numTaps-1] coefficient */ 00135 c0 = *(pb); 00136 00137 /* Read x[n-numTaps-1] for sample 0 sample 1 */ 00138 x0 = *(px0); 00139 x1 = *(px1); 00140 00141 /* Perform the multiply-accumulate */ 00142 acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 00143 acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 00144 00145 /* Read the b[numTaps-2] coefficient */ 00146 c0 = *(pb + 1u); 00147 00148 /* Read x[n-numTaps-2] for sample 0 sample 1 */ 00149 x0 = *(px0 + 1u); 00150 x1 = *(px1 + 1u); 00151 00152 /* Perform the multiply-accumulate */ 00153 acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 00154 acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 00155 00156 /* Read the b[numTaps-3] coefficient */ 00157 c0 = *(pb + 2u); 00158 00159 /* Read x[n-numTaps-3] for sample 0 sample 1 */ 00160 x0 = *(px0 + 2u); 00161 x1 = *(px1 + 2u); 00162 pb += 4u; 00163 00164 /* Perform the multiply-accumulate */ 00165 acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 00166 acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 00167 00168 /* Read the b[numTaps-4] coefficient */ 00169 c0 = *(pb - 1u); 00170 00171 /* Read x[n-numTaps-4] for sample 0 sample 1 */ 00172 x0 = *(px0 + 3u); 00173 x1 = *(px1 + 3u); 00174 00175 00176 /* Perform the multiply-accumulate */ 00177 acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 00178 acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 00179 00180 /* update state pointers */ 00181 px0 += 4u; 00182 px1 += 4u; 00183 00184 /* Decrement the loop counter */ 00185 tapCnt--; 00186 } 00187 00188 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00189 tapCnt = numTaps % 0x4u; 00190 00191 while(tapCnt > 0u) 00192 { 00193 /* Read coefficients */ 00194 c0 = *(pb++); 00195 00196 /* Fetch 1 state variable */ 00197 x0 = *(px0++); 00198 x1 = *(px1++); 00199 00200 /* Perform the multiply-accumulate */ 00201 acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 00202 acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 00203 00204 /* Decrement the loop counter */ 00205 tapCnt--; 00206 } 00207 00208 /* Advance the state pointer by the decimation factor 00209 * to process the next group of decimation factor number samples */ 00210 pState = pState + S->M * 2; 00211 00212 /* The result is in the accumulator, store in the destination buffer. */ 00213 *pDst++ = (q31_t) (acc0 << 1); 00214 *pDst++ = (q31_t) (acc1 << 1); 00215 00216 /* Decrement the loop counter */ 00217 blkCnt--; 00218 } 00219 00220 while(blkCntN2 > 0u) 00221 { 00222 /* Copy decimation factor number of new input samples into the state buffer */ 00223 i = S->M; 00224 00225 do 00226 { 00227 *pStateCurnt++ = *pSrc++; 00228 00229 } while(--i); 00230 00231 /* Set accumulator to zero */ 00232 sum0 = 0; 00233 00234 /* Initialize state pointer */ 00235 px = pState; 00236 00237 /* Initialize coeff pointer */ 00238 pb = pCoeffs; 00239 00240 /* Loop unrolling. Process 4 taps at a time. */ 00241 tapCnt = numTaps >> 2; 00242 00243 /* Loop over the number of taps. Unroll by a factor of 4. 00244 ** Repeat until we've computed numTaps-4 coefficients. */ 00245 while(tapCnt > 0u) 00246 { 00247 /* Read the b[numTaps-1] coefficient */ 00248 c0 = *(pb++); 00249 00250 /* Read x[n-numTaps-1] sample */ 00251 x0 = *(px++); 00252 00253 /* Perform the multiply-accumulate */ 00254 sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32); 00255 00256 /* Read the b[numTaps-2] coefficient */ 00257 c0 = *(pb++); 00258 00259 /* Read x[n-numTaps-2] sample */ 00260 x0 = *(px++); 00261 00262 /* Perform the multiply-accumulate */ 00263 sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32); 00264 00265 /* Read the b[numTaps-3] coefficient */ 00266 c0 = *(pb++); 00267 00268 /* Read x[n-numTaps-3] sample */ 00269 x0 = *(px++); 00270 00271 /* Perform the multiply-accumulate */ 00272 sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32); 00273 00274 /* Read the b[numTaps-4] coefficient */ 00275 c0 = *(pb++); 00276 00277 /* Read x[n-numTaps-4] sample */ 00278 x0 = *(px++); 00279 00280 /* Perform the multiply-accumulate */ 00281 sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32); 00282 00283 /* Decrement the loop counter */ 00284 tapCnt--; 00285 } 00286 00287 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00288 tapCnt = numTaps % 0x4u; 00289 00290 while(tapCnt > 0u) 00291 { 00292 /* Read coefficients */ 00293 c0 = *(pb++); 00294 00295 /* Fetch 1 state variable */ 00296 x0 = *(px++); 00297 00298 /* Perform the multiply-accumulate */ 00299 sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32); 00300 00301 /* Decrement the loop counter */ 00302 tapCnt--; 00303 } 00304 00305 /* Advance the state pointer by the decimation factor 00306 * to process the next group of decimation factor number samples */ 00307 pState = pState + S->M; 00308 00309 /* The result is in the accumulator, store in the destination buffer. */ 00310 *pDst++ = (q31_t) (sum0 << 1); 00311 00312 /* Decrement the loop counter */ 00313 blkCntN2--; 00314 } 00315 00316 /* Processing is complete. 00317 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00318 ** This prepares the state buffer for the next function call. */ 00319 00320 /* Points to the start of the state buffer */ 00321 pStateCurnt = S->pState; 00322 00323 i = (numTaps - 1u) >> 2u; 00324 00325 /* copy data */ 00326 while(i > 0u) 00327 { 00328 *pStateCurnt++ = *pState++; 00329 *pStateCurnt++ = *pState++; 00330 *pStateCurnt++ = *pState++; 00331 *pStateCurnt++ = *pState++; 00332 00333 /* Decrement the loop counter */ 00334 i--; 00335 } 00336 00337 i = (numTaps - 1u) % 0x04u; 00338 00339 /* copy data */ 00340 while(i > 0u) 00341 { 00342 *pStateCurnt++ = *pState++; 00343 00344 /* Decrement the loop counter */ 00345 i--; 00346 } 00347 } 00348 00349 /** 00350 * @} end of FIR_decimate group 00351 */
Generated on Tue Jul 12 2022 18:44:09 by
