Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_fir_q7.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_q7.c 00009 * 00010 * Description: Q7 FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR 00049 * @{ 00050 */ 00051 00052 /** 00053 * @param[in] *S points to an instance of the Q7 FIR filter structure. 00054 * @param[in] *pSrc points to the block of input data. 00055 * @param[out] *pDst points to the block of output data. 00056 * @param[in] blockSize number of samples to process per call. 00057 * @return none. 00058 * 00059 * <b>Scaling and Overflow Behavior:</b> 00060 * \par 00061 * The function is implemented using a 32-bit internal accumulator. 00062 * Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result. 00063 * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format. 00064 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00065 * The accumulator is converted to 18.7 format by discarding the low 7 bits. 00066 * Finally, the result is truncated to 1.7 format. 00067 */ 00068 00069 void arm_fir_q7( 00070 const arm_fir_instance_q7 * S, 00071 q7_t * pSrc, 00072 q7_t * pDst, 00073 uint32_t blockSize) 00074 { 00075 00076 #ifndef ARM_MATH_CM0_FAMILY 00077 00078 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00079 00080 q7_t *pState = S->pState; /* State pointer */ 00081 q7_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00082 q7_t *pStateCurnt; /* Points to the current sample of the state */ 00083 q7_t x0, x1, x2, x3; /* Temporary variables to hold state */ 00084 q7_t c0; /* Temporary variable to hold coefficient value */ 00085 q7_t *px; /* Temporary pointer for state */ 00086 q7_t *pb; /* Temporary pointer for coefficient buffer */ 00087 q31_t acc0, acc1, acc2, acc3; /* Accumulators */ 00088 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00089 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00090 00091 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00092 /* pStateCurnt points to the location where the new input data should be written */ 00093 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00094 00095 /* Apply loop unrolling and compute 4 output values simultaneously. 00096 * The variables acc0 ... acc3 hold output values that are being computed: 00097 * 00098 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00099 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00100 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00101 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00102 */ 00103 blkCnt = blockSize >> 2; 00104 00105 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00106 ** a second loop below computes the remaining 1 to 3 samples. */ 00107 while(blkCnt > 0u) 00108 { 00109 /* Copy four new input samples into the state buffer */ 00110 *pStateCurnt++ = *pSrc++; 00111 *pStateCurnt++ = *pSrc++; 00112 *pStateCurnt++ = *pSrc++; 00113 *pStateCurnt++ = *pSrc++; 00114 00115 /* Set all accumulators to zero */ 00116 acc0 = 0; 00117 acc1 = 0; 00118 acc2 = 0; 00119 acc3 = 0; 00120 00121 /* Initialize state pointer */ 00122 px = pState; 00123 00124 /* Initialize coefficient pointer */ 00125 pb = pCoeffs; 00126 00127 /* Read the first three samples from the state buffer: 00128 * x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 00129 x0 = *(px++); 00130 x1 = *(px++); 00131 x2 = *(px++); 00132 00133 /* Loop unrolling. Process 4 taps at a time. */ 00134 tapCnt = numTaps >> 2; 00135 i = tapCnt; 00136 00137 while(i > 0u) 00138 { 00139 /* Read the b[numTaps] coefficient */ 00140 c0 = *(pb++); 00141 00142 /* Read x[n-numTaps-3] sample */ 00143 x3 = *(px++); 00144 00145 /* acc0 += b[numTaps] * x[n-numTaps] */ 00146 acc0 += ((q15_t) x0 * c0); 00147 00148 /* acc1 += b[numTaps] * x[n-numTaps-1] */ 00149 acc1 += ((q15_t) x1 * c0); 00150 00151 /* acc2 += b[numTaps] * x[n-numTaps-2] */ 00152 acc2 += ((q15_t) x2 * c0); 00153 00154 /* acc3 += b[numTaps] * x[n-numTaps-3] */ 00155 acc3 += ((q15_t) x3 * c0); 00156 00157 /* Read the b[numTaps-1] coefficient */ 00158 c0 = *(pb++); 00159 00160 /* Read x[n-numTaps-4] sample */ 00161 x0 = *(px++); 00162 00163 /* Perform the multiply-accumulates */ 00164 acc0 += ((q15_t) x1 * c0); 00165 acc1 += ((q15_t) x2 * c0); 00166 acc2 += ((q15_t) x3 * c0); 00167 acc3 += ((q15_t) x0 * c0); 00168 00169 /* Read the b[numTaps-2] coefficient */ 00170 c0 = *(pb++); 00171 00172 /* Read x[n-numTaps-5] sample */ 00173 x1 = *(px++); 00174 00175 /* Perform the multiply-accumulates */ 00176 acc0 += ((q15_t) x2 * c0); 00177 acc1 += ((q15_t) x3 * c0); 00178 acc2 += ((q15_t) x0 * c0); 00179 acc3 += ((q15_t) x1 * c0); 00180 /* Read the b[numTaps-3] coefficients */ 00181 c0 = *(pb++); 00182 00183 /* Read x[n-numTaps-6] sample */ 00184 x2 = *(px++); 00185 00186 /* Perform the multiply-accumulates */ 00187 acc0 += ((q15_t) x3 * c0); 00188 acc1 += ((q15_t) x0 * c0); 00189 acc2 += ((q15_t) x1 * c0); 00190 acc3 += ((q15_t) x2 * c0); 00191 i--; 00192 } 00193 00194 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00195 00196 i = numTaps - (tapCnt * 4u); 00197 while(i > 0u) 00198 { 00199 /* Read coefficients */ 00200 c0 = *(pb++); 00201 00202 /* Fetch 1 state variable */ 00203 x3 = *(px++); 00204 00205 /* Perform the multiply-accumulates */ 00206 acc0 += ((q15_t) x0 * c0); 00207 acc1 += ((q15_t) x1 * c0); 00208 acc2 += ((q15_t) x2 * c0); 00209 acc3 += ((q15_t) x3 * c0); 00210 00211 /* Reuse the present sample states for next sample */ 00212 x0 = x1; 00213 x1 = x2; 00214 x2 = x3; 00215 00216 /* Decrement the loop counter */ 00217 i--; 00218 } 00219 00220 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00221 pState = pState + 4; 00222 00223 /* The results in the 4 accumulators are in 2.62 format. Convert to 1.31 00224 ** Then store the 4 outputs in the destination buffer. */ 00225 acc0 = __SSAT((acc0 >> 7u), 8); 00226 *pDst++ = acc0; 00227 acc1 = __SSAT((acc1 >> 7u), 8); 00228 *pDst++ = acc1; 00229 acc2 = __SSAT((acc2 >> 7u), 8); 00230 *pDst++ = acc2; 00231 acc3 = __SSAT((acc3 >> 7u), 8); 00232 *pDst++ = acc3; 00233 00234 /* Decrement the samples loop counter */ 00235 blkCnt--; 00236 } 00237 00238 00239 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00240 ** No loop unrolling is used. */ 00241 blkCnt = blockSize % 4u; 00242 00243 while(blkCnt > 0u) 00244 { 00245 /* Copy one sample at a time into state buffer */ 00246 *pStateCurnt++ = *pSrc++; 00247 00248 /* Set the accumulator to zero */ 00249 acc0 = 0; 00250 00251 /* Initialize state pointer */ 00252 px = pState; 00253 00254 /* Initialize Coefficient pointer */ 00255 pb = (pCoeffs); 00256 00257 i = numTaps; 00258 00259 /* Perform the multiply-accumulates */ 00260 do 00261 { 00262 acc0 += (q15_t) * (px++) * (*(pb++)); 00263 i--; 00264 } while(i > 0u); 00265 00266 /* The result is in 2.14 format. Convert to 1.7 00267 ** Then store the output in the destination buffer. */ 00268 *pDst++ = __SSAT((acc0 >> 7u), 8); 00269 00270 /* Advance state pointer by 1 for the next sample */ 00271 pState = pState + 1; 00272 00273 /* Decrement the samples loop counter */ 00274 blkCnt--; 00275 } 00276 00277 /* Processing is complete. 00278 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00279 ** This prepares the state buffer for the next function call. */ 00280 00281 /* Points to the start of the state buffer */ 00282 pStateCurnt = S->pState; 00283 00284 tapCnt = (numTaps - 1u) >> 2u; 00285 00286 /* copy data */ 00287 while(tapCnt > 0u) 00288 { 00289 *pStateCurnt++ = *pState++; 00290 *pStateCurnt++ = *pState++; 00291 *pStateCurnt++ = *pState++; 00292 *pStateCurnt++ = *pState++; 00293 00294 /* Decrement the loop counter */ 00295 tapCnt--; 00296 } 00297 00298 /* Calculate remaining number of copies */ 00299 tapCnt = (numTaps - 1u) % 0x4u; 00300 00301 /* Copy the remaining q31_t data */ 00302 while(tapCnt > 0u) 00303 { 00304 *pStateCurnt++ = *pState++; 00305 00306 /* Decrement the loop counter */ 00307 tapCnt--; 00308 } 00309 00310 #else 00311 00312 /* Run the below code for Cortex-M0 */ 00313 00314 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */ 00315 uint32_t i, blkCnt; /* Loop counters */ 00316 q7_t *pState = S->pState; /* State pointer */ 00317 q7_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00318 q7_t *px, *pb; /* Temporary pointers to state and coeff */ 00319 q31_t acc = 0; /* Accumlator */ 00320 q7_t *pStateCurnt; /* Points to the current sample of the state */ 00321 00322 00323 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00324 /* pStateCurnt points to the location where the new input data should be written */ 00325 pStateCurnt = S->pState + (numTaps - 1u); 00326 00327 /* Initialize blkCnt with blockSize */ 00328 blkCnt = blockSize; 00329 00330 /* Perform filtering upto BlockSize - BlockSize%4 */ 00331 while(blkCnt > 0u) 00332 { 00333 /* Copy one sample at a time into state buffer */ 00334 *pStateCurnt++ = *pSrc++; 00335 00336 /* Set accumulator to zero */ 00337 acc = 0; 00338 00339 /* Initialize state pointer of type q7 */ 00340 px = pState; 00341 00342 /* Initialize coeff pointer of type q7 */ 00343 pb = pCoeffs; 00344 00345 00346 i = numTaps; 00347 00348 while(i > 0u) 00349 { 00350 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */ 00351 acc += (q15_t) * px++ * *pb++; 00352 i--; 00353 } 00354 00355 /* Store the 1.7 format filter output in destination buffer */ 00356 *pDst++ = (q7_t) __SSAT((acc >> 7), 8); 00357 00358 /* Advance the state pointer by 1 to process the next sample */ 00359 pState = pState + 1; 00360 00361 /* Decrement the loop counter */ 00362 blkCnt--; 00363 } 00364 00365 /* Processing is complete. 00366 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00367 ** This prepares the state buffer for the next function call. */ 00368 00369 00370 /* Points to the start of the state buffer */ 00371 pStateCurnt = S->pState; 00372 00373 00374 /* Copy numTaps number of values */ 00375 i = (numTaps - 1u); 00376 00377 /* Copy q7_t data */ 00378 while(i > 0u) 00379 { 00380 *pStateCurnt++ = *pState++; 00381 i--; 00382 } 00383 00384 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00385 00386 } 00387 00388 /** 00389 * @} end of FIR group 00390 */
Generated on Tue Jul 12 2022 18:44:09 by
1.7.2
