Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_fir_fast_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_fast_q15.c 00009 * 00010 * Description: Q15 Fast FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR 00049 * @{ 00050 */ 00051 00052 /** 00053 * @param[in] *S points to an instance of the Q15 FIR filter structure. 00054 * @param[in] *pSrc points to the block of input data. 00055 * @param[out] *pDst points to the block of output data. 00056 * @param[in] blockSize number of samples to process per call. 00057 * @return none. 00058 * 00059 * <b>Scaling and Overflow Behavior:</b> 00060 * \par 00061 * This fast version uses a 32-bit accumulator with 2.30 format. 00062 * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit. 00063 * Thus, if the accumulator result overflows it wraps around and distorts the result. 00064 * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits. 00065 * The 2.30 accumulator is then truncated to 2.15 format and saturated to yield the 1.15 result. 00066 * 00067 * \par 00068 * Refer to the function <code>arm_fir_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure. 00069 * Use the function <code>arm_fir_init_q15()</code> to initialize the filter structure. 00070 */ 00071 00072 void arm_fir_fast_q15( 00073 const arm_fir_instance_q15 * S, 00074 q15_t * pSrc, 00075 q15_t * pDst, 00076 uint32_t blockSize) 00077 { 00078 q15_t *pState = S->pState; /* State pointer */ 00079 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00080 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00081 q31_t acc0, acc1, acc2, acc3; /* Accumulators */ 00082 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00083 q15_t *px; /* Temporary q31 pointer for SIMD state buffer accesses */ 00084 q31_t x0, x1, x2, c0; /* Temporary variables to hold SIMD state and coefficient values */ 00085 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */ 00086 uint32_t tapCnt, blkCnt; /* Loop counters */ 00087 00088 00089 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00090 /* pStateCurnt points to the location where the new input data should be written */ 00091 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00092 00093 /* Apply loop unrolling and compute 4 output values simultaneously. 00094 * The variables acc0 ... acc3 hold output values that are being computed: 00095 * 00096 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00097 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00098 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00099 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00100 */ 00101 00102 blkCnt = blockSize >> 2; 00103 00104 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00105 ** a second loop below computes the remaining 1 to 3 samples. */ 00106 while(blkCnt > 0u) 00107 { 00108 /* Copy four new input samples into the state buffer. 00109 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */ 00110 *pStateCurnt++ = *pSrc++; 00111 *pStateCurnt++ = *pSrc++; 00112 *pStateCurnt++ = *pSrc++; 00113 *pStateCurnt++ = *pSrc++; 00114 00115 00116 /* Set all accumulators to zero */ 00117 acc0 = 0; 00118 acc1 = 0; 00119 acc2 = 0; 00120 acc3 = 0; 00121 00122 /* Typecast q15_t pointer to q31_t pointer for state reading in q31_t */ 00123 px = pState; 00124 00125 /* Typecast q15_t pointer to q31_t pointer for coefficient reading in q31_t */ 00126 pb = pCoeffs; 00127 00128 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */ 00129 x0 = *__SIMD32(px)++; 00130 00131 /* Read the third and forth samples from the state buffer: x[n-N-2], x[n-N-3] */ 00132 x2 = *__SIMD32(px)++; 00133 00134 /* Loop over the number of taps. Unroll by a factor of 4. 00135 ** Repeat until we've computed numTaps-(numTaps%4) coefficients. */ 00136 tapCnt = numTaps >> 2; 00137 00138 while(tapCnt > 0) 00139 { 00140 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */ 00141 c0 = *__SIMD32(pb)++; 00142 00143 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 00144 acc0 = __SMLAD(x0, c0, acc0); 00145 00146 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */ 00147 acc2 = __SMLAD(x2, c0, acc2); 00148 00149 /* pack x[n-N-1] and x[n-N-2] */ 00150 #ifndef ARM_MATH_BIG_ENDIAN 00151 x1 = __PKHBT(x2, x0, 0); 00152 #else 00153 x1 = __PKHBT(x0, x2, 0); 00154 #endif 00155 00156 /* Read state x[n-N-4], x[n-N-5] */ 00157 x0 = _SIMD32_OFFSET(px); 00158 00159 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */ 00160 acc1 = __SMLADX(x1, c0, acc1); 00161 00162 /* pack x[n-N-3] and x[n-N-4] */ 00163 #ifndef ARM_MATH_BIG_ENDIAN 00164 x1 = __PKHBT(x0, x2, 0); 00165 #else 00166 x1 = __PKHBT(x2, x0, 0); 00167 #endif 00168 00169 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */ 00170 acc3 = __SMLADX(x1, c0, acc3); 00171 00172 /* Read coefficients b[N-2], b[N-3] */ 00173 c0 = *__SIMD32(pb)++; 00174 00175 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */ 00176 acc0 = __SMLAD(x2, c0, acc0); 00177 00178 /* Read state x[n-N-6], x[n-N-7] with offset */ 00179 x2 = _SIMD32_OFFSET(px + 2u); 00180 00181 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */ 00182 acc2 = __SMLAD(x0, c0, acc2); 00183 00184 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */ 00185 acc1 = __SMLADX(x1, c0, acc1); 00186 00187 /* pack x[n-N-5] and x[n-N-6] */ 00188 #ifndef ARM_MATH_BIG_ENDIAN 00189 x1 = __PKHBT(x2, x0, 0); 00190 #else 00191 x1 = __PKHBT(x0, x2, 0); 00192 #endif 00193 00194 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */ 00195 acc3 = __SMLADX(x1, c0, acc3); 00196 00197 /* Update state pointer for next state reading */ 00198 px += 4u; 00199 00200 /* Decrement tap count */ 00201 tapCnt--; 00202 00203 } 00204 00205 /* If the filter length is not a multiple of 4, compute the remaining filter taps. 00206 ** This is always be 2 taps since the filter length is even. */ 00207 if((numTaps & 0x3u) != 0u) 00208 { 00209 00210 /* Read last two coefficients */ 00211 c0 = *__SIMD32(pb)++; 00212 00213 /* Perform the multiply-accumulates */ 00214 acc0 = __SMLAD(x0, c0, acc0); 00215 acc2 = __SMLAD(x2, c0, acc2); 00216 00217 /* pack state variables */ 00218 #ifndef ARM_MATH_BIG_ENDIAN 00219 x1 = __PKHBT(x2, x0, 0); 00220 #else 00221 x1 = __PKHBT(x0, x2, 0); 00222 #endif 00223 00224 /* Read last state variables */ 00225 x0 = *__SIMD32(px); 00226 00227 /* Perform the multiply-accumulates */ 00228 acc1 = __SMLADX(x1, c0, acc1); 00229 00230 /* pack state variables */ 00231 #ifndef ARM_MATH_BIG_ENDIAN 00232 x1 = __PKHBT(x0, x2, 0); 00233 #else 00234 x1 = __PKHBT(x2, x0, 0); 00235 #endif 00236 00237 /* Perform the multiply-accumulates */ 00238 acc3 = __SMLADX(x1, c0, acc3); 00239 } 00240 00241 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation. 00242 ** Then store the 4 outputs in the destination buffer. */ 00243 00244 #ifndef ARM_MATH_BIG_ENDIAN 00245 00246 *__SIMD32(pDst)++ = 00247 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 00248 00249 *__SIMD32(pDst)++ = 00250 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 00251 00252 #else 00253 00254 *__SIMD32(pDst)++ = 00255 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16); 00256 00257 *__SIMD32(pDst)++ = 00258 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16); 00259 00260 00261 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00262 00263 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00264 pState = pState + 4u; 00265 00266 /* Decrement the loop counter */ 00267 blkCnt--; 00268 } 00269 00270 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00271 ** No loop unrolling is used. */ 00272 blkCnt = blockSize % 0x4u; 00273 while(blkCnt > 0u) 00274 { 00275 /* Copy two samples into state buffer */ 00276 *pStateCurnt++ = *pSrc++; 00277 00278 /* Set the accumulator to zero */ 00279 acc0 = 0; 00280 00281 /* Use SIMD to hold states and coefficients */ 00282 px = pState; 00283 pb = pCoeffs; 00284 00285 tapCnt = numTaps >> 1u; 00286 00287 do 00288 { 00289 00290 acc0 += (q31_t) * px++ * *pb++; 00291 acc0 += (q31_t) * px++ * *pb++; 00292 00293 tapCnt--; 00294 } 00295 while(tapCnt > 0u); 00296 00297 /* The result is in 2.30 format. Convert to 1.15 with saturation. 00298 ** Then store the output in the destination buffer. */ 00299 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16)); 00300 00301 /* Advance state pointer by 1 for the next sample */ 00302 pState = pState + 1u; 00303 00304 /* Decrement the loop counter */ 00305 blkCnt--; 00306 } 00307 00308 /* Processing is complete. 00309 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00310 ** This prepares the state buffer for the next function call. */ 00311 00312 /* Points to the start of the state buffer */ 00313 pStateCurnt = S->pState; 00314 00315 /* Calculation of count for copying integer writes */ 00316 tapCnt = (numTaps - 1u) >> 2; 00317 00318 while(tapCnt > 0u) 00319 { 00320 *pStateCurnt++ = *pState++; 00321 *pStateCurnt++ = *pState++; 00322 *pStateCurnt++ = *pState++; 00323 *pStateCurnt++ = *pState++; 00324 00325 tapCnt--; 00326 00327 } 00328 00329 /* Calculation of count for remaining q15_t data */ 00330 tapCnt = (numTaps - 1u) % 0x4u; 00331 00332 /* copy remaining data */ 00333 while(tapCnt > 0u) 00334 { 00335 *pStateCurnt++ = *pState++; 00336 00337 /* Decrement the loop counter */ 00338 tapCnt--; 00339 } 00340 00341 } 00342 00343 /** 00344 * @} end of FIR group 00345 */
Generated on Tue Jul 12 2022 18:44:09 by
1.7.2
