Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_fir_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_q31.c 00009 * 00010 * Description: Q31 FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR 00049 * @{ 00050 */ 00051 00052 /** 00053 * @param[in] *S points to an instance of the Q31 FIR filter structure. 00054 * @param[in] *pSrc points to the block of input data. 00055 * @param[out] *pDst points to the block of output data. 00056 * @param[in] blockSize number of samples to process per call. 00057 * @return none. 00058 * 00059 * @details 00060 * <b>Scaling and Overflow Behavior:</b> 00061 * \par 00062 * The function is implemented using an internal 64-bit accumulator. 00063 * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. 00064 * Thus, if the accumulator result overflows it wraps around rather than clip. 00065 * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits. 00066 * After all multiply-accumulates are performed, the 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result. 00067 * 00068 * \par 00069 * Refer to the function <code>arm_fir_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4. 00070 */ 00071 00072 void arm_fir_q31( 00073 const arm_fir_instance_q31 * S, 00074 q31_t * pSrc, 00075 q31_t * pDst, 00076 uint32_t blockSize) 00077 { 00078 q31_t *pState = S->pState; /* State pointer */ 00079 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00080 q31_t *pStateCurnt; /* Points to the current sample of the state */ 00081 00082 00083 #ifndef ARM_MATH_CM0_FAMILY 00084 00085 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00086 00087 q31_t x0, x1, x2; /* Temporary variables to hold state */ 00088 q31_t c0; /* Temporary variable to hold coefficient value */ 00089 q31_t *px; /* Temporary pointer for state */ 00090 q31_t *pb; /* Temporary pointer for coefficient buffer */ 00091 q63_t acc0, acc1, acc2; /* Accumulators */ 00092 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00093 uint32_t i, tapCnt, blkCnt, tapCntN3; /* Loop counters */ 00094 00095 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00096 /* pStateCurnt points to the location where the new input data should be written */ 00097 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00098 00099 /* Apply loop unrolling and compute 4 output values simultaneously. 00100 * The variables acc0 ... acc3 hold output values that are being computed: 00101 * 00102 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00103 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00104 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00105 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00106 */ 00107 blkCnt = blockSize / 3; 00108 blockSize = blockSize - (3 * blkCnt); 00109 00110 tapCnt = numTaps / 3; 00111 tapCntN3 = numTaps - (3 * tapCnt); 00112 00113 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00114 ** a second loop below computes the remaining 1 to 3 samples. */ 00115 while(blkCnt > 0u) 00116 { 00117 /* Copy three new input samples into the state buffer */ 00118 *pStateCurnt++ = *pSrc++; 00119 *pStateCurnt++ = *pSrc++; 00120 *pStateCurnt++ = *pSrc++; 00121 00122 /* Set all accumulators to zero */ 00123 acc0 = 0; 00124 acc1 = 0; 00125 acc2 = 0; 00126 00127 /* Initialize state pointer */ 00128 px = pState; 00129 00130 /* Initialize coefficient pointer */ 00131 pb = pCoeffs; 00132 00133 /* Read the first two samples from the state buffer: 00134 * x[n-numTaps], x[n-numTaps-1] */ 00135 x0 = *(px++); 00136 x1 = *(px++); 00137 00138 /* Loop unrolling. Process 3 taps at a time. */ 00139 i = tapCnt; 00140 00141 while(i > 0u) 00142 { 00143 /* Read the b[numTaps] coefficient */ 00144 c0 = *pb; 00145 00146 /* Read x[n-numTaps-2] sample */ 00147 x2 = *(px++); 00148 00149 /* Perform the multiply-accumulates */ 00150 acc0 += ((q63_t) x0 * c0); 00151 acc1 += ((q63_t) x1 * c0); 00152 acc2 += ((q63_t) x2 * c0); 00153 00154 /* Read the coefficient and state */ 00155 c0 = *(pb + 1u); 00156 x0 = *(px++); 00157 00158 /* Perform the multiply-accumulates */ 00159 acc0 += ((q63_t) x1 * c0); 00160 acc1 += ((q63_t) x2 * c0); 00161 acc2 += ((q63_t) x0 * c0); 00162 00163 /* Read the coefficient and state */ 00164 c0 = *(pb + 2u); 00165 x1 = *(px++); 00166 00167 /* update coefficient pointer */ 00168 pb += 3u; 00169 00170 /* Perform the multiply-accumulates */ 00171 acc0 += ((q63_t) x2 * c0); 00172 acc1 += ((q63_t) x0 * c0); 00173 acc2 += ((q63_t) x1 * c0); 00174 00175 /* Decrement the loop counter */ 00176 i--; 00177 } 00178 00179 /* If the filter length is not a multiple of 3, compute the remaining filter taps */ 00180 00181 i = tapCntN3; 00182 00183 while(i > 0u) 00184 { 00185 /* Read coefficients */ 00186 c0 = *(pb++); 00187 00188 /* Fetch 1 state variable */ 00189 x2 = *(px++); 00190 00191 /* Perform the multiply-accumulates */ 00192 acc0 += ((q63_t) x0 * c0); 00193 acc1 += ((q63_t) x1 * c0); 00194 acc2 += ((q63_t) x2 * c0); 00195 00196 /* Reuse the present sample states for next sample */ 00197 x0 = x1; 00198 x1 = x2; 00199 00200 /* Decrement the loop counter */ 00201 i--; 00202 } 00203 00204 /* Advance the state pointer by 3 to process the next group of 3 samples */ 00205 pState = pState + 3; 00206 00207 /* The results in the 3 accumulators are in 2.30 format. Convert to 1.31 00208 ** Then store the 3 outputs in the destination buffer. */ 00209 *pDst++ = (q31_t) (acc0 >> 31u); 00210 *pDst++ = (q31_t) (acc1 >> 31u); 00211 *pDst++ = (q31_t) (acc2 >> 31u); 00212 00213 /* Decrement the samples loop counter */ 00214 blkCnt--; 00215 } 00216 00217 /* If the blockSize is not a multiple of 3, compute any remaining output samples here. 00218 ** No loop unrolling is used. */ 00219 00220 while(blockSize > 0u) 00221 { 00222 /* Copy one sample at a time into state buffer */ 00223 *pStateCurnt++ = *pSrc++; 00224 00225 /* Set the accumulator to zero */ 00226 acc0 = 0; 00227 00228 /* Initialize state pointer */ 00229 px = pState; 00230 00231 /* Initialize Coefficient pointer */ 00232 pb = (pCoeffs); 00233 00234 i = numTaps; 00235 00236 /* Perform the multiply-accumulates */ 00237 do 00238 { 00239 acc0 += (q63_t) * (px++) * (*(pb++)); 00240 i--; 00241 } while(i > 0u); 00242 00243 /* The result is in 2.62 format. Convert to 1.31 00244 ** Then store the output in the destination buffer. */ 00245 *pDst++ = (q31_t) (acc0 >> 31u); 00246 00247 /* Advance state pointer by 1 for the next sample */ 00248 pState = pState + 1; 00249 00250 /* Decrement the samples loop counter */ 00251 blockSize--; 00252 } 00253 00254 /* Processing is complete. 00255 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00256 ** This prepares the state buffer for the next function call. */ 00257 00258 /* Points to the start of the state buffer */ 00259 pStateCurnt = S->pState; 00260 00261 tapCnt = (numTaps - 1u) >> 2u; 00262 00263 /* copy data */ 00264 while(tapCnt > 0u) 00265 { 00266 *pStateCurnt++ = *pState++; 00267 *pStateCurnt++ = *pState++; 00268 *pStateCurnt++ = *pState++; 00269 *pStateCurnt++ = *pState++; 00270 00271 /* Decrement the loop counter */ 00272 tapCnt--; 00273 } 00274 00275 /* Calculate remaining number of copies */ 00276 tapCnt = (numTaps - 1u) % 0x4u; 00277 00278 /* Copy the remaining q31_t data */ 00279 while(tapCnt > 0u) 00280 { 00281 *pStateCurnt++ = *pState++; 00282 00283 /* Decrement the loop counter */ 00284 tapCnt--; 00285 } 00286 00287 #else 00288 00289 /* Run the below code for Cortex-M0 */ 00290 00291 q31_t *px; /* Temporary pointer for state */ 00292 q31_t *pb; /* Temporary pointer for coefficient buffer */ 00293 q63_t acc; /* Accumulator */ 00294 uint32_t numTaps = S->numTaps; /* Length of the filter */ 00295 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00296 00297 /* S->pState buffer contains previous frame (numTaps - 1) samples */ 00298 /* pStateCurnt points to the location where the new input data should be written */ 00299 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00300 00301 /* Initialize blkCnt with blockSize */ 00302 blkCnt = blockSize; 00303 00304 while(blkCnt > 0u) 00305 { 00306 /* Copy one sample at a time into state buffer */ 00307 *pStateCurnt++ = *pSrc++; 00308 00309 /* Set the accumulator to zero */ 00310 acc = 0; 00311 00312 /* Initialize state pointer */ 00313 px = pState; 00314 00315 /* Initialize Coefficient pointer */ 00316 pb = pCoeffs; 00317 00318 i = numTaps; 00319 00320 /* Perform the multiply-accumulates */ 00321 do 00322 { 00323 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */ 00324 acc += (q63_t) * px++ * *pb++; 00325 i--; 00326 } while(i > 0u); 00327 00328 /* The result is in 2.62 format. Convert to 1.31 00329 ** Then store the output in the destination buffer. */ 00330 *pDst++ = (q31_t) (acc >> 31u); 00331 00332 /* Advance state pointer by 1 for the next sample */ 00333 pState = pState + 1; 00334 00335 /* Decrement the samples loop counter */ 00336 blkCnt--; 00337 } 00338 00339 /* Processing is complete. 00340 ** Now copy the last numTaps - 1 samples to the starting of the state buffer. 00341 ** This prepares the state buffer for the next function call. */ 00342 00343 /* Points to the start of the state buffer */ 00344 pStateCurnt = S->pState; 00345 00346 /* Copy numTaps number of values */ 00347 tapCnt = numTaps - 1u; 00348 00349 /* Copy the data */ 00350 while(tapCnt > 0u) 00351 { 00352 *pStateCurnt++ = *pState++; 00353 00354 /* Decrement the loop counter */ 00355 tapCnt--; 00356 } 00357 00358 00359 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00360 00361 } 00362 00363 /** 00364 * @} end of FIR group 00365 */
Generated on Tue Jul 12 2022 13:15:24 by
