CMSIS DSP library
Dependents: KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more
Fork of mbed-dsp by
arm_fir_sparse_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_sparse_q31.c 00009 * 00010 * Description: Q31 sparse FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * ------------------------------------------------------------------- */ 00040 #include "arm_math.h" 00041 00042 00043 /** 00044 * @addtogroup FIR_Sparse 00045 * @{ 00046 */ 00047 00048 /** 00049 * @brief Processing function for the Q31 sparse FIR filter. 00050 * @param[in] *S points to an instance of the Q31 sparse FIR structure. 00051 * @param[in] *pSrc points to the block of input data. 00052 * @param[out] *pDst points to the block of output data 00053 * @param[in] *pScratchIn points to a temporary buffer of size blockSize. 00054 * @param[in] blockSize number of input samples to process per call. 00055 * @return none. 00056 * 00057 * <b>Scaling and Overflow Behavior:</b> 00058 * \par 00059 * The function is implemented using an internal 32-bit accumulator. 00060 * The 1.31 x 1.31 multiplications are truncated to 2.30 format. 00061 * This leads to loss of precision on the intermediate multiplications and provides only a single guard bit. 00062 * If the accumulator result overflows, it wraps around rather than saturate. 00063 * In order to avoid overflows the input signal or coefficients must be scaled down by log2(numTaps) bits. 00064 */ 00065 00066 void arm_fir_sparse_q31( 00067 arm_fir_sparse_instance_q31 * S, 00068 q31_t * pSrc, 00069 q31_t * pDst, 00070 q31_t * pScratchIn, 00071 uint32_t blockSize) 00072 { 00073 00074 q31_t *pState = S->pState; /* State pointer */ 00075 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00076 q31_t *px; /* Scratch buffer pointer */ 00077 q31_t *py = pState; /* Temporary pointers for state buffer */ 00078 q31_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ 00079 q31_t *pOut; /* Destination pointer */ 00080 q63_t out; /* Temporary output variable */ 00081 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ 00082 uint32_t delaySize = S->maxDelay + blockSize; /* state length */ 00083 uint16_t numTaps = S->numTaps; /* Filter order */ 00084 int32_t readIndex; /* Read index of the state buffer */ 00085 uint32_t tapCnt, blkCnt; /* loop counters */ 00086 q31_t coeff = *pCoeffs++; /* Read the first coefficient value */ 00087 q31_t in; 00088 00089 00090 /* BlockSize of Input samples are copied into the state buffer */ 00091 /* StateIndex points to the starting position to write in the state buffer */ 00092 arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1, 00093 (int32_t *) pSrc, 1, blockSize); 00094 00095 /* Read Index, from where the state buffer should be read, is calculated. */ 00096 readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 00097 00098 /* Wraparound of readIndex */ 00099 if(readIndex < 0) 00100 { 00101 readIndex += (int32_t) delaySize; 00102 } 00103 00104 /* Working pointer for state buffer is updated */ 00105 py = pState; 00106 00107 /* blockSize samples are read from the state buffer */ 00108 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00109 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00110 blockSize); 00111 00112 /* Working pointer for the scratch buffer of state values */ 00113 px = pb; 00114 00115 /* Working pointer for scratch buffer of output values */ 00116 pOut = pDst; 00117 00118 00119 #ifndef ARM_MATH_CM0_FAMILY 00120 00121 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00122 00123 /* Loop over the blockSize. Unroll by a factor of 4. 00124 * Compute 4 Multiplications at a time. */ 00125 blkCnt = blockSize >> 2; 00126 00127 while(blkCnt > 0u) 00128 { 00129 /* Perform Multiplications and store in the destination buffer */ 00130 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00131 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00132 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00133 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00134 00135 /* Decrement the loop counter */ 00136 blkCnt--; 00137 } 00138 00139 /* If the blockSize is not a multiple of 4, 00140 * compute the remaining samples */ 00141 blkCnt = blockSize % 0x4u; 00142 00143 while(blkCnt > 0u) 00144 { 00145 /* Perform Multiplications and store in the destination buffer */ 00146 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00147 00148 /* Decrement the loop counter */ 00149 blkCnt--; 00150 } 00151 00152 /* Load the coefficient value and 00153 * increment the coefficient buffer for the next set of state values */ 00154 coeff = *pCoeffs++; 00155 00156 /* Read Index, from where the state buffer should be read, is calculated. */ 00157 readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 00158 00159 /* Wraparound of readIndex */ 00160 if(readIndex < 0) 00161 { 00162 readIndex += (int32_t) delaySize; 00163 } 00164 00165 /* Loop over the number of taps. */ 00166 tapCnt = (uint32_t) numTaps - 1u; 00167 00168 while(tapCnt > 0u) 00169 { 00170 /* Working pointer for state buffer is updated */ 00171 py = pState; 00172 00173 /* blockSize samples are read from the state buffer */ 00174 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00175 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00176 blockSize); 00177 00178 /* Working pointer for the scratch buffer of state values */ 00179 px = pb; 00180 00181 /* Working pointer for scratch buffer of output values */ 00182 pOut = pDst; 00183 00184 /* Loop over the blockSize. Unroll by a factor of 4. 00185 * Compute 4 MACS at a time. */ 00186 blkCnt = blockSize >> 2; 00187 00188 while(blkCnt > 0u) 00189 { 00190 out = *pOut; 00191 out += ((q63_t) * px++ * coeff) >> 32; 00192 *pOut++ = (q31_t) (out); 00193 00194 out = *pOut; 00195 out += ((q63_t) * px++ * coeff) >> 32; 00196 *pOut++ = (q31_t) (out); 00197 00198 out = *pOut; 00199 out += ((q63_t) * px++ * coeff) >> 32; 00200 *pOut++ = (q31_t) (out); 00201 00202 out = *pOut; 00203 out += ((q63_t) * px++ * coeff) >> 32; 00204 *pOut++ = (q31_t) (out); 00205 00206 /* Decrement the loop counter */ 00207 blkCnt--; 00208 } 00209 00210 /* If the blockSize is not a multiple of 4, 00211 * compute the remaining samples */ 00212 blkCnt = blockSize % 0x4u; 00213 00214 while(blkCnt > 0u) 00215 { 00216 /* Perform Multiply-Accumulate */ 00217 out = *pOut; 00218 out += ((q63_t) * px++ * coeff) >> 32; 00219 *pOut++ = (q31_t) (out); 00220 00221 /* Decrement the loop counter */ 00222 blkCnt--; 00223 } 00224 00225 /* Load the coefficient value and 00226 * increment the coefficient buffer for the next set of state values */ 00227 coeff = *pCoeffs++; 00228 00229 /* Read Index, from where the state buffer should be read, is calculated. */ 00230 readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 00231 00232 /* Wraparound of readIndex */ 00233 if(readIndex < 0) 00234 { 00235 readIndex += (int32_t) delaySize; 00236 } 00237 00238 /* Decrement the tap loop counter */ 00239 tapCnt--; 00240 } 00241 00242 /* Working output pointer is updated */ 00243 pOut = pDst; 00244 00245 /* Output is converted into 1.31 format. */ 00246 /* Loop over the blockSize. Unroll by a factor of 4. 00247 * process 4 output samples at a time. */ 00248 blkCnt = blockSize >> 2; 00249 00250 while(blkCnt > 0u) 00251 { 00252 in = *pOut << 1; 00253 *pOut++ = in; 00254 in = *pOut << 1; 00255 *pOut++ = in; 00256 in = *pOut << 1; 00257 *pOut++ = in; 00258 in = *pOut << 1; 00259 *pOut++ = in; 00260 00261 /* Decrement the loop counter */ 00262 blkCnt--; 00263 } 00264 00265 /* If the blockSize is not a multiple of 4, 00266 * process the remaining output samples */ 00267 blkCnt = blockSize % 0x4u; 00268 00269 while(blkCnt > 0u) 00270 { 00271 in = *pOut << 1; 00272 *pOut++ = in; 00273 00274 /* Decrement the loop counter */ 00275 blkCnt--; 00276 } 00277 00278 #else 00279 00280 /* Run the below code for Cortex-M0 */ 00281 blkCnt = blockSize; 00282 00283 while(blkCnt > 0u) 00284 { 00285 /* Perform Multiplications and store in the destination buffer */ 00286 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00287 00288 /* Decrement the loop counter */ 00289 blkCnt--; 00290 } 00291 00292 /* Load the coefficient value and 00293 * increment the coefficient buffer for the next set of state values */ 00294 coeff = *pCoeffs++; 00295 00296 /* Read Index, from where the state buffer should be read, is calculated. */ 00297 readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 00298 00299 /* Wraparound of readIndex */ 00300 if(readIndex < 0) 00301 { 00302 readIndex += (int32_t) delaySize; 00303 } 00304 00305 /* Loop over the number of taps. */ 00306 tapCnt = (uint32_t) numTaps - 1u; 00307 00308 while(tapCnt > 0u) 00309 { 00310 /* Working pointer for state buffer is updated */ 00311 py = pState; 00312 00313 /* blockSize samples are read from the state buffer */ 00314 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00315 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00316 blockSize); 00317 00318 /* Working pointer for the scratch buffer of state values */ 00319 px = pb; 00320 00321 /* Working pointer for scratch buffer of output values */ 00322 pOut = pDst; 00323 00324 blkCnt = blockSize; 00325 00326 while(blkCnt > 0u) 00327 { 00328 /* Perform Multiply-Accumulate */ 00329 out = *pOut; 00330 out += ((q63_t) * px++ * coeff) >> 32; 00331 *pOut++ = (q31_t) (out); 00332 00333 /* Decrement the loop counter */ 00334 blkCnt--; 00335 } 00336 00337 /* Load the coefficient value and 00338 * increment the coefficient buffer for the next set of state values */ 00339 coeff = *pCoeffs++; 00340 00341 /* Read Index, from where the state buffer should be read, is calculated. */ 00342 readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 00343 00344 /* Wraparound of readIndex */ 00345 if(readIndex < 0) 00346 { 00347 readIndex += (int32_t) delaySize; 00348 } 00349 00350 /* Decrement the tap loop counter */ 00351 tapCnt--; 00352 } 00353 00354 /* Working output pointer is updated */ 00355 pOut = pDst; 00356 00357 /* Output is converted into 1.31 format. */ 00358 blkCnt = blockSize; 00359 00360 while(blkCnt > 0u) 00361 { 00362 in = *pOut << 1; 00363 *pOut++ = in; 00364 00365 /* Decrement the loop counter */ 00366 blkCnt--; 00367 } 00368 00369 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00370 00371 } 00372 00373 /** 00374 * @} end of FIR_Sparse group 00375 */
Generated on Tue Jul 12 2022 12:36:55 by 1.7.2