CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details
Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more
arm_fir_sparse_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_sparse_f32.c 00009 * 00010 * Description: Floating-point sparse FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * ------------------------------------------------------------------- */ 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupFilters 00033 */ 00034 00035 /** 00036 * @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters 00037 * 00038 * This group of functions implements sparse FIR filters. 00039 * Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero. 00040 * Sparse filters are used for simulating reflections in communications and audio applications. 00041 * 00042 * There are separate functions for Q7, Q15, Q31, and floating-point data types. 00043 * The functions operate on blocks of input and output data and each call to the function processes 00044 * <code>blockSize</code> samples through the filter. <code>pSrc</code> and 00045 * <code>pDst</code> points to input and output arrays respectively containing <code>blockSize</code> values. 00046 * 00047 * \par Algorithm: 00048 * The sparse filter instant structure contains an array of tap indices <code>pTapDelay</code> which specifies the locations of the non-zero coefficients. 00049 * This is in addition to the coefficient array <code>b</code>. 00050 * The implementation essentially skips the multiplications by zero and leads to an efficient realization. 00051 * <pre> 00052 * y[n] = b[0] * x[n-pTapDelay[0]] + b[1] * x[n-pTapDelay[1]] + b[2] * x[n-pTapDelay[2]] + ...+ b[numTaps-1] * x[n-pTapDelay[numTaps-1]] 00053 * </pre> 00054 * \par 00055 * \image html FIRSparse.gif "Sparse FIR filter. b[n] represents the filter coefficients" 00056 * \par 00057 * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>; 00058 * <code>pTapDelay</code> points to an array of nonzero indices and is also of size <code>numTaps</code>; 00059 * <code>pState</code> points to a state array of size <code>maxDelay + blockSize</code>, where 00060 * <code>maxDelay</code> is the largest offset value that is ever used in the <code>pTapDelay</code> array. 00061 * Some of the processing functions also require temporary working buffers. 00062 * 00063 * \par Instance Structure 00064 * The coefficients and state variables for a filter are stored together in an instance data structure. 00065 * A separate instance structure must be defined for each filter. 00066 * Coefficient and offset arrays may be shared among several instances while state variable arrays cannot be shared. 00067 * There are separate instance structure declarations for each of the 4 supported data types. 00068 * 00069 * \par Initialization Functions 00070 * There is also an associated initialization function for each data type. 00071 * The initialization function performs the following operations: 00072 * - Sets the values of the internal structure fields. 00073 * - Zeros out the values in the state buffer. 00074 * 00075 * \par 00076 * Use of the initialization function is optional. 00077 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. 00078 * To place an instance structure into a const data section, the instance structure must be manually initialized. 00079 * Set the values in the state buffer to zeros before static initialization. 00080 * The code below statically initializes each of the 4 different data type filter instance structures 00081 * <pre> 00082 *arm_fir_sparse_instance_f32 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay}; 00083 *arm_fir_sparse_instance_q31 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay}; 00084 *arm_fir_sparse_instance_q15 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay}; 00085 *arm_fir_sparse_instance_q7 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay}; 00086 * </pre> 00087 * \par 00088 * 00089 * \par Fixed-Point Behavior 00090 * Care must be taken when using the fixed-point versions of the sparse FIR filter functions. 00091 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. 00092 * Refer to the function specific documentation below for usage guidelines. 00093 */ 00094 00095 /** 00096 * @addtogroup FIR_Sparse 00097 * @{ 00098 */ 00099 00100 /** 00101 * @brief Processing function for the floating-point sparse FIR filter. 00102 * @param[in] *S points to an instance of the floating-point sparse FIR structure. 00103 * @param[in] *pSrc points to the block of input data. 00104 * @param[out] *pDst points to the block of output data 00105 * @param[in] *pScratchIn points to a temporary buffer of size blockSize. 00106 * @param[in] blockSize number of input samples to process per call. 00107 * @return none. 00108 */ 00109 00110 void arm_fir_sparse_f32( 00111 arm_fir_sparse_instance_f32 * S, 00112 float32_t * pSrc, 00113 float32_t * pDst, 00114 float32_t * pScratchIn, 00115 uint32_t blockSize) 00116 { 00117 00118 float32_t *pState = S->pState; /* State pointer */ 00119 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00120 float32_t *px; /* Scratch buffer pointer */ 00121 float32_t *py = pState; /* Temporary pointers for state buffer */ 00122 float32_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ 00123 float32_t *pOut; /* Destination pointer */ 00124 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ 00125 uint32_t delaySize = S->maxDelay + blockSize; /* state length */ 00126 uint16_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00127 int32_t readIndex; /* Read index of the state buffer */ 00128 uint32_t tapCnt, blkCnt; /* loop counters */ 00129 float32_t coeff = *pCoeffs++; /* Read the first coefficient value */ 00130 00131 00132 00133 /* BlockSize of Input samples are copied into the state buffer */ 00134 /* StateIndex points to the starting position to write in the state buffer */ 00135 arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1, 00136 (int32_t *) pSrc, 1, blockSize); 00137 00138 00139 /* Read Index, from where the state buffer should be read, is calculated. */ 00140 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00141 00142 /* Wraparound of readIndex */ 00143 if(readIndex < 0) 00144 { 00145 readIndex += (int32_t) delaySize; 00146 } 00147 00148 /* Working pointer for state buffer is updated */ 00149 py = pState; 00150 00151 /* blockSize samples are read from the state buffer */ 00152 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00153 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00154 blockSize); 00155 00156 /* Working pointer for the scratch buffer */ 00157 px = pb; 00158 00159 /* Working pointer for destination buffer */ 00160 pOut = pDst; 00161 00162 /* Loop over the blockSize. Unroll by a factor of 4. 00163 * Compute 4 Multiplications at a time. */ 00164 blkCnt = blockSize >> 2u; 00165 00166 while(blkCnt > 0u) 00167 { 00168 /* Perform Multiplications and store in destination buffer */ 00169 *pOut++ = *px++ * coeff; 00170 *pOut++ = *px++ * coeff; 00171 *pOut++ = *px++ * coeff; 00172 *pOut++ = *px++ * coeff; 00173 00174 /* Decrement the loop counter */ 00175 blkCnt--; 00176 } 00177 00178 /* If the blockSize is not a multiple of 4, 00179 * compute the remaining samples */ 00180 blkCnt = blockSize % 0x4u; 00181 00182 while(blkCnt > 0u) 00183 { 00184 /* Perform Multiplications and store in destination buffer */ 00185 *pOut++ = *px++ * coeff; 00186 00187 /* Decrement the loop counter */ 00188 blkCnt--; 00189 } 00190 00191 /* Load the coefficient value and 00192 * increment the coefficient buffer for the next set of state values */ 00193 coeff = *pCoeffs++; 00194 00195 /* Read Index, from where the state buffer should be read, is calculated. */ 00196 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00197 00198 /* Wraparound of readIndex */ 00199 if(readIndex < 0) 00200 { 00201 readIndex += (int32_t) delaySize; 00202 } 00203 00204 /* Loop over the number of taps. */ 00205 tapCnt = (uint32_t) numTaps - 1u; 00206 00207 while(tapCnt > 0u) 00208 { 00209 00210 /* Working pointer for state buffer is updated */ 00211 py = pState; 00212 00213 /* blockSize samples are read from the state buffer */ 00214 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00215 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00216 blockSize); 00217 00218 /* Working pointer for the scratch buffer */ 00219 px = pb; 00220 00221 /* Working pointer for destination buffer */ 00222 pOut = pDst; 00223 00224 /* Loop over the blockSize. Unroll by a factor of 4. 00225 * Compute 4 MACS at a time. */ 00226 blkCnt = blockSize >> 2u; 00227 00228 while(blkCnt > 0u) 00229 { 00230 /* Perform Multiply-Accumulate */ 00231 *pOut++ += *px++ * coeff; 00232 *pOut++ += *px++ * coeff; 00233 *pOut++ += *px++ * coeff; 00234 *pOut++ += *px++ * coeff; 00235 00236 /* Decrement the loop counter */ 00237 blkCnt--; 00238 } 00239 00240 /* If the blockSize is not a multiple of 4, 00241 * compute the remaining samples */ 00242 blkCnt = blockSize % 0x4u; 00243 00244 while(blkCnt > 0u) 00245 { 00246 /* Perform Multiply-Accumulate */ 00247 *pOut++ += *px++ * coeff; 00248 00249 /* Decrement the loop counter */ 00250 blkCnt--; 00251 } 00252 00253 /* Load the coefficient value and 00254 * increment the coefficient buffer for the next set of state values */ 00255 coeff = *pCoeffs++; 00256 00257 /* Read Index, from where the state buffer should be read, is calculated. */ 00258 readIndex = ((int32_t) S->stateIndex - 00259 (int32_t) blockSize) - *pTapDelay++; 00260 00261 /* Wraparound of readIndex */ 00262 if(readIndex < 0) 00263 { 00264 readIndex += (int32_t) delaySize; 00265 } 00266 00267 /* Decrement the tap loop counter */ 00268 tapCnt--; 00269 } 00270 00271 } 00272 00273 /** 00274 * @} end of FIR_Sparse group 00275 */
Generated on Tue Jul 12 2022 14:13:53 by 1.7.2