CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details
Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more
arm_fir_sparse_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_sparse_q31.c 00009 * 00010 * Description: Q31 sparse FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * ------------------------------------------------------------------- */ 00029 #include "arm_math.h" 00030 00031 00032 /** 00033 * @addtogroup FIR_Sparse 00034 * @{ 00035 */ 00036 00037 /** 00038 * @brief Processing function for the Q31 sparse FIR filter. 00039 * @param[in] *S points to an instance of the Q31 sparse FIR structure. 00040 * @param[in] *pSrc points to the block of input data. 00041 * @param[out] *pDst points to the block of output data 00042 * @param[in] *pScratchIn points to a temporary buffer of size blockSize. 00043 * @param[in] blockSize number of input samples to process per call. 00044 * @return none. 00045 * 00046 * <b>Scaling and Overflow Behavior:</b> 00047 * \par 00048 * The function is implemented using an internal 32-bit accumulator. 00049 * The 1.31 x 1.31 multiplications are truncated to 2.30 format. 00050 * This leads to loss of precision on the intermediate multiplications and provides only a single guard bit. 00051 * If the accumulator result overflows, it wraps around rather than saturate. 00052 * In order to avoid overflows the input signal or coefficients must be scaled down by log2(numTaps) bits. 00053 */ 00054 00055 void arm_fir_sparse_q31( 00056 arm_fir_sparse_instance_q31 * S, 00057 q31_t * pSrc, 00058 q31_t * pDst, 00059 q31_t * pScratchIn, 00060 uint32_t blockSize) 00061 { 00062 00063 q31_t *pState = S->pState; /* State pointer */ 00064 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00065 q31_t *px; /* Scratch buffer pointer */ 00066 q31_t *py = pState; /* Temporary pointers for state buffer */ 00067 q31_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ 00068 q31_t *pOut; /* Destination pointer */ 00069 q63_t out; /* Temporary output variable */ 00070 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ 00071 uint32_t delaySize = S->maxDelay + blockSize; /* state length */ 00072 uint16_t numTaps = S->numTaps; /* Filter order */ 00073 int32_t readIndex; /* Read index of the state buffer */ 00074 uint32_t tapCnt, blkCnt; /* loop counters */ 00075 q31_t coeff = *pCoeffs++; /* Read the first coefficient value */ 00076 q31_t in; 00077 00078 00079 /* BlockSize of Input samples are copied into the state buffer */ 00080 /* StateIndex points to the starting position to write in the state buffer */ 00081 arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1, 00082 (int32_t *) pSrc, 1, blockSize); 00083 00084 /* Read Index, from where the state buffer should be read, is calculated. */ 00085 readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 00086 00087 /* Wraparound of readIndex */ 00088 if(readIndex < 0) 00089 { 00090 readIndex += (int32_t) delaySize; 00091 } 00092 00093 /* Working pointer for state buffer is updated */ 00094 py = pState; 00095 00096 /* blockSize samples are read from the state buffer */ 00097 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00098 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00099 blockSize); 00100 00101 /* Working pointer for the scratch buffer of state values */ 00102 px = pb; 00103 00104 /* Working pointer for scratch buffer of output values */ 00105 pOut = pDst; 00106 00107 /* Loop over the blockSize. Unroll by a factor of 4. 00108 * Compute 4 Multiplications at a time. */ 00109 blkCnt = blockSize >> 2; 00110 00111 while(blkCnt > 0u) 00112 { 00113 /* Perform Multiplications and store in the destination buffer */ 00114 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00115 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00116 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00117 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00118 00119 /* Decrement the loop counter */ 00120 blkCnt--; 00121 } 00122 00123 /* If the blockSize is not a multiple of 4, 00124 * compute the remaining samples */ 00125 blkCnt = blockSize % 0x4u; 00126 00127 while(blkCnt > 0u) 00128 { 00129 /* Perform Multiplications and store in the destination buffer */ 00130 *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 00131 00132 /* Decrement the loop counter */ 00133 blkCnt--; 00134 } 00135 00136 /* Load the coefficient value and 00137 * increment the coefficient buffer for the next set of state values */ 00138 coeff = *pCoeffs++; 00139 00140 /* Read Index, from where the state buffer should be read, is calculated. */ 00141 readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 00142 00143 /* Wraparound of readIndex */ 00144 if(readIndex < 0) 00145 { 00146 readIndex += (int32_t) delaySize; 00147 } 00148 00149 /* Loop over the number of taps. */ 00150 tapCnt = (uint32_t) numTaps - 1u; 00151 00152 while(tapCnt > 0u) 00153 { 00154 /* Working pointer for state buffer is updated */ 00155 py = pState; 00156 00157 /* blockSize samples are read from the state buffer */ 00158 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 00159 (int32_t *) pb, (int32_t *) pb, blockSize, 1, 00160 blockSize); 00161 00162 /* Working pointer for the scratch buffer of state values */ 00163 px = pb; 00164 00165 /* Working pointer for scratch buffer of output values */ 00166 pOut = pDst; 00167 00168 /* Loop over the blockSize. Unroll by a factor of 4. 00169 * Compute 4 MACS at a time. */ 00170 blkCnt = blockSize >> 2; 00171 00172 while(blkCnt > 0u) 00173 { 00174 out = *pOut; 00175 out += ((q63_t) * px++ * coeff) >> 32; 00176 *pOut++ = (q31_t) (out); 00177 00178 out = *pOut; 00179 out += ((q63_t) * px++ * coeff) >> 32; 00180 *pOut++ = (q31_t) (out); 00181 00182 out = *pOut; 00183 out += ((q63_t) * px++ * coeff) >> 32; 00184 *pOut++ = (q31_t) (out); 00185 00186 out = *pOut; 00187 out += ((q63_t) * px++ * coeff) >> 32; 00188 *pOut++ = (q31_t) (out); 00189 00190 /* Decrement the loop counter */ 00191 blkCnt--; 00192 } 00193 00194 /* If the blockSize is not a multiple of 4, 00195 * compute the remaining samples */ 00196 blkCnt = blockSize % 0x4u; 00197 00198 while(blkCnt > 0u) 00199 { 00200 /* Perform Multiply-Accumulate */ 00201 out = *pOut; 00202 out += ((q63_t) * px++ * coeff) >> 32; 00203 *pOut++ = (q31_t) (out); 00204 00205 /* Decrement the loop counter */ 00206 blkCnt--; 00207 } 00208 00209 /* Load the coefficient value and 00210 * increment the coefficient buffer for the next set of state values */ 00211 coeff = *pCoeffs++; 00212 00213 /* Read Index, from where the state buffer should be read, is calculated. */ 00214 readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 00215 00216 /* Wraparound of readIndex */ 00217 if(readIndex < 0) 00218 { 00219 readIndex += (int32_t) delaySize; 00220 } 00221 00222 /* Decrement the tap loop counter */ 00223 tapCnt--; 00224 } 00225 00226 /* Working output pointer is updated */ 00227 pOut = pDst; 00228 00229 /* Output is converted into 1.15 format. */ 00230 /* Loop over the blockSize. Unroll by a factor of 4. 00231 * process 4 output samples at a time. */ 00232 blkCnt = blockSize >> 2; 00233 00234 while(blkCnt > 0u) 00235 { 00236 in = *pOut << 1; 00237 *pOut++ = in; 00238 in = *pOut << 1; 00239 *pOut++ = in; 00240 in = *pOut << 1; 00241 *pOut++ = in; 00242 in = *pOut << 1; 00243 *pOut++ = in; 00244 00245 /* Decrement the loop counter */ 00246 blkCnt--; 00247 } 00248 00249 /* If the blockSize is not a multiple of 4, 00250 * process the remaining output samples */ 00251 blkCnt = blockSize % 0x4u; 00252 00253 while(blkCnt > 0u) 00254 { 00255 in = *pOut << 1; 00256 *pOut++ = in; 00257 00258 /* Decrement the loop counter */ 00259 blkCnt--; 00260 } 00261 } 00262 00263 /** 00264 * @} end of FIR_Sparse group 00265 */
Generated on Tue Jul 12 2022 14:13:53 by 1.7.2