CMSIS DSP library
Dependents: performance_timer Surfboard_ gps2rtty Capstone ... more
arm_fir_sparse_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_sparse_f32.c 00009 * 00010 * Description: Floating-point sparse FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * ------------------------------------------------------------------- */ 00040 #include "arm_math.h" 00041 00042 /** 00043 * @ingroup groupFilters 00044 */ 00045 00046 /** 00047 * @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters 00048 * 00049 * This group of functions implements sparse FIR filters. 00050 * Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero. 00051 * Sparse filters are used for simulating reflections in communications and audio applications. 00052 * 00053 * There are separate functions for Q7, Q15, Q31, and floating-point data types. 00054 * The functions operate on blocks of input and output data and each call to the function processes 00055 * <code>blockSize</code> samples through the filter. <code>pSrc</code> and 00056 * <code>pDst</code> points to input and output arrays respectively containing <code>blockSize</code> values. 00057 * 00058 * \par Algorithm: 00059 * The sparse filter instant structure contains an array of tap indices <code>pTapDelay</code> which specifies the locations of the non-zero coefficients. 00060 * This is in addition to the coefficient array <code>b</code>. 00061 * The implementation essentially skips the multiplications by zero and leads to an efficient realization. 00062 * <pre> 00063 * y[n] = b[0] * x[n-pTapDelay[0]] + b[1] * x[n-pTapDelay[1]] + b[2] * x[n-pTapDelay[2]] + ...+ b[numTaps-1] * x[n-pTapDelay[numTaps-1]] 00064 * </pre> 00065 * \par 00066 * \image html FIRSparse.gif "Sparse FIR filter. b[n] represents the filter coefficients" 00067 * \par 00068 * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>; 00069 * <code>pTapDelay</code> points to an array of nonzero indices and is also of size <code>numTaps</code>; 00070 * <code>pState</code> points to a state array of size <code>maxDelay + blockSize</code>, where 00071 * <code>maxDelay</code> is the largest offset value that is ever used in the <code>pTapDelay</code> array. 00072 * Some of the processing functions also require temporary working buffers. 00073 * 00074 * \par Instance Structure 00075 * The coefficients and state variables for a filter are stored together in an instance data structure. 00076 * A separate instance structure must be defined for each filter. 00077 * Coefficient and offset arrays may be shared among several instances while state variable arrays cannot be shared. 00078 * There are separate instance structure declarations for each of the 4 supported data types. 00079 * 00080 * \par Initialization Functions 00081 * There is also an associated initialization function for each data type. 00082 * The initialization function performs the following operations: 00083 * - Sets the values of the internal structure fields. 00084 * - Zeros out the values in the state buffer. 00085 * To do this manually without calling the init function, assign the follow subfields of the instance structure: 00086 * numTaps, pCoeffs, pTapDelay, maxDelay, stateIndex, pState. Also set all of the values in pState to zero. 00087 * 00088 * \par 00089 * Use of the initialization function is optional. 00090 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. 00091 * To place an instance structure into a const data section, the instance structure must be manually initialized. 00092 * Set the values in the state buffer to zeros before static initialization. 00093 * The code below statically initializes each of the 4 different data type filter instance structures 00094 * <pre> 00095 *arm_fir_sparse_instance_f32 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay}; 00096 *arm_fir_sparse_instance_q31 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay}; 00097 *arm_fir_sparse_instance_q15 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay}; 00098 *arm_fir_sparse_instance_q7 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay}; 00099 * </pre> 00100 * \par 00101 * 00102 * \par Fixed-Point Behavior 00103 * Care must be taken when using the fixed-point versions of the sparse FIR filter functions. 00104 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. 00105 * Refer to the function specific documentation below for usage guidelines. 00106 */ 00107 00108 /** 00109 * @addtogroup FIR_Sparse 00110 * @{ 00111 */ 00112 00113 /** 00114 * @brief Processing function for the floating-point sparse FIR filter. 00115 * @param[in] *S points to an instance of the floating-point sparse FIR structure. 00116 * @param[in] *pSrc points to the block of input data. 00117 * @param[out] *pDst points to the block of output data 00118 * @param[in] *pScratchIn points to a temporary buffer of size blockSize. 00119 * @param[in] blockSize number of input samples to process per call. 00120 * @return none. 00121 */ 00122 00123 void arm_fir_sparse_f32( 00124 arm_fir_sparse_instance_f32 * S, 00125 float32_t * pSrc, 00126 float32_t * pDst, 00127 float32_t * pScratchIn, 00128 uint32_t blockSize) 00129 { 00130 00131 float32_t *pState = S->pState; /* State pointer */ 00132 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00133 float32_t *px; /* Scratch buffer pointer */ 00134 float32_t *py = pState; /* Temporary pointers for state buffer */ 00135 float32_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ 00136 float32_t *pOut; /* Destination pointer */ 00137 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ 00138 uint32_t delaySize = S->maxDelay + blockSize; /* state length */ 00139 uint16_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00140 int32_t readIndex; /* Read index of the state buffer */ 00141 uint32_t tapCnt, blkCnt; /* loop counters */ 00142 float32_t coeff = *pCoeffs++; /* Read the first coefficient value */ 00143 00144 00145 00146 /* BlockSize of Input samples are copied into the state buffer */ 00147 /* StateIndex points to the starting position to write in the state buffer */ 00148 arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1, 00149 (int32_t *) pSrc, 1, blockSize); 00150 00151 00152 /* Read Index, from where the state buffer should be read, is calculated. */ 00153 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00154 00155 /* Wraparound of readIndex */ 00156 if(readIndex < 0) 00157 { 00158 readIndex += (int32_t) delaySize; 00159 } 00160 00161 /* Working pointer for state buffer is updated */ 00162 py = pState; 00163 00164 /* blockSize samples are read from the state buffer */ 00165 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00166 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00167 blockSize); 00168 00169 /* Working pointer for the scratch buffer */ 00170 px = pb; 00171 00172 /* Working pointer for destination buffer */ 00173 pOut = pDst; 00174 00175 00176 #ifndef ARM_MATH_CM0_FAMILY 00177 00178 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00179 00180 /* Loop over the blockSize. Unroll by a factor of 4. 00181 * Compute 4 Multiplications at a time. */ 00182 blkCnt = blockSize >> 2u; 00183 00184 while(blkCnt > 0u) 00185 { 00186 /* Perform Multiplications and store in destination buffer */ 00187 *pOut++ = *px++ * coeff; 00188 *pOut++ = *px++ * coeff; 00189 *pOut++ = *px++ * coeff; 00190 *pOut++ = *px++ * coeff; 00191 00192 /* Decrement the loop counter */ 00193 blkCnt--; 00194 } 00195 00196 /* If the blockSize is not a multiple of 4, 00197 * compute the remaining samples */ 00198 blkCnt = blockSize % 0x4u; 00199 00200 while(blkCnt > 0u) 00201 { 00202 /* Perform Multiplications and store in destination buffer */ 00203 *pOut++ = *px++ * coeff; 00204 00205 /* Decrement the loop counter */ 00206 blkCnt--; 00207 } 00208 00209 /* Load the coefficient value and 00210 * increment the coefficient buffer for the next set of state values */ 00211 coeff = *pCoeffs++; 00212 00213 /* Read Index, from where the state buffer should be read, is calculated. */ 00214 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00215 00216 /* Wraparound of readIndex */ 00217 if(readIndex < 0) 00218 { 00219 readIndex += (int32_t) delaySize; 00220 } 00221 00222 /* Loop over the number of taps. */ 00223 tapCnt = (uint32_t) numTaps - 2u; 00224 00225 while(tapCnt > 0u) 00226 { 00227 00228 /* Working pointer for state buffer is updated */ 00229 py = pState; 00230 00231 /* blockSize samples are read from the state buffer */ 00232 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00233 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00234 blockSize); 00235 00236 /* Working pointer for the scratch buffer */ 00237 px = pb; 00238 00239 /* Working pointer for destination buffer */ 00240 pOut = pDst; 00241 00242 /* Loop over the blockSize. Unroll by a factor of 4. 00243 * Compute 4 MACS at a time. */ 00244 blkCnt = blockSize >> 2u; 00245 00246 while(blkCnt > 0u) 00247 { 00248 /* Perform Multiply-Accumulate */ 00249 *pOut++ += *px++ * coeff; 00250 *pOut++ += *px++ * coeff; 00251 *pOut++ += *px++ * coeff; 00252 *pOut++ += *px++ * coeff; 00253 00254 /* Decrement the loop counter */ 00255 blkCnt--; 00256 } 00257 00258 /* If the blockSize is not a multiple of 4, 00259 * compute the remaining samples */ 00260 blkCnt = blockSize % 0x4u; 00261 00262 while(blkCnt > 0u) 00263 { 00264 /* Perform Multiply-Accumulate */ 00265 *pOut++ += *px++ * coeff; 00266 00267 /* Decrement the loop counter */ 00268 blkCnt--; 00269 } 00270 00271 /* Load the coefficient value and 00272 * increment the coefficient buffer for the next set of state values */ 00273 coeff = *pCoeffs++; 00274 00275 /* Read Index, from where the state buffer should be read, is calculated. */ 00276 readIndex = ((int32_t) S->stateIndex - 00277 (int32_t) blockSize) - *pTapDelay++; 00278 00279 /* Wraparound of readIndex */ 00280 if(readIndex < 0) 00281 { 00282 readIndex += (int32_t) delaySize; 00283 } 00284 00285 /* Decrement the tap loop counter */ 00286 tapCnt--; 00287 } 00288 00289 /* Compute last tap without the final read of pTapDelay */ 00290 00291 /* Working pointer for state buffer is updated */ 00292 py = pState; 00293 00294 /* blockSize samples are read from the state buffer */ 00295 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00296 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00297 blockSize); 00298 00299 /* Working pointer for the scratch buffer */ 00300 px = pb; 00301 00302 /* Working pointer for destination buffer */ 00303 pOut = pDst; 00304 00305 /* Loop over the blockSize. Unroll by a factor of 4. 00306 * Compute 4 MACS at a time. */ 00307 blkCnt = blockSize >> 2u; 00308 00309 while(blkCnt > 0u) 00310 { 00311 /* Perform Multiply-Accumulate */ 00312 *pOut++ += *px++ * coeff; 00313 *pOut++ += *px++ * coeff; 00314 *pOut++ += *px++ * coeff; 00315 *pOut++ += *px++ * coeff; 00316 00317 /* Decrement the loop counter */ 00318 blkCnt--; 00319 } 00320 00321 /* If the blockSize is not a multiple of 4, 00322 * compute the remaining samples */ 00323 blkCnt = blockSize % 0x4u; 00324 00325 while(blkCnt > 0u) 00326 { 00327 /* Perform Multiply-Accumulate */ 00328 *pOut++ += *px++ * coeff; 00329 00330 /* Decrement the loop counter */ 00331 blkCnt--; 00332 } 00333 00334 #else 00335 00336 /* Run the below code for Cortex-M0 */ 00337 00338 blkCnt = blockSize; 00339 00340 while(blkCnt > 0u) 00341 { 00342 /* Perform Multiplications and store in destination buffer */ 00343 *pOut++ = *px++ * coeff; 00344 00345 /* Decrement the loop counter */ 00346 blkCnt--; 00347 } 00348 00349 /* Load the coefficient value and 00350 * increment the coefficient buffer for the next set of state values */ 00351 coeff = *pCoeffs++; 00352 00353 /* Read Index, from where the state buffer should be read, is calculated. */ 00354 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00355 00356 /* Wraparound of readIndex */ 00357 if(readIndex < 0) 00358 { 00359 readIndex += (int32_t) delaySize; 00360 } 00361 00362 /* Loop over the number of taps. */ 00363 tapCnt = (uint32_t) numTaps - 2u; 00364 00365 while(tapCnt > 0u) 00366 { 00367 00368 /* Working pointer for state buffer is updated */ 00369 py = pState; 00370 00371 /* blockSize samples are read from the state buffer */ 00372 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00373 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00374 blockSize); 00375 00376 /* Working pointer for the scratch buffer */ 00377 px = pb; 00378 00379 /* Working pointer for destination buffer */ 00380 pOut = pDst; 00381 00382 blkCnt = blockSize; 00383 00384 while(blkCnt > 0u) 00385 { 00386 /* Perform Multiply-Accumulate */ 00387 *pOut++ += *px++ * coeff; 00388 00389 /* Decrement the loop counter */ 00390 blkCnt--; 00391 } 00392 00393 /* Load the coefficient value and 00394 * increment the coefficient buffer for the next set of state values */ 00395 coeff = *pCoeffs++; 00396 00397 /* Read Index, from where the state buffer should be read, is calculated. */ 00398 readIndex = 00399 ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00400 00401 /* Wraparound of readIndex */ 00402 if(readIndex < 0) 00403 { 00404 readIndex += (int32_t) delaySize; 00405 } 00406 00407 /* Decrement the tap loop counter */ 00408 tapCnt--; 00409 } 00410 00411 /* Compute last tap without the final read of pTapDelay */ 00412 00413 /* Working pointer for state buffer is updated */ 00414 py = pState; 00415 00416 /* blockSize samples are read from the state buffer */ 00417 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00418 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00419 blockSize); 00420 00421 /* Working pointer for the scratch buffer */ 00422 px = pb; 00423 00424 /* Working pointer for destination buffer */ 00425 pOut = pDst; 00426 00427 blkCnt = blockSize; 00428 00429 while(blkCnt > 0u) 00430 { 00431 /* Perform Multiply-Accumulate */ 00432 *pOut++ += *px++ * coeff; 00433 00434 /* Decrement the loop counter */ 00435 blkCnt--; 00436 } 00437 00438 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00439 00440 } 00441 00442 /** 00443 * @} end of FIR_Sparse group 00444 */
Generated on Tue Jul 12 2022 11:59:17 by 1.7.2