Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_fir_sparse_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_sparse_q15.c 00009 * 00010 * Description: Q15 sparse FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * ------------------------------------------------------------------- */ 00029 #include "arm_math.h" 00030 00031 /** 00032 * @addtogroup FIR_Sparse 00033 * @{ 00034 */ 00035 00036 /** 00037 * @brief Processing function for the Q15 sparse FIR filter. 00038 * @param[in] *S points to an instance of the Q15 sparse FIR structure. 00039 * @param[in] *pSrc points to the block of input data. 00040 * @param[out] *pDst points to the block of output data 00041 * @param[in] *pScratchIn points to a temporary buffer of size blockSize. 00042 * @param[in] *pScratchOut points to a temporary buffer of size blockSize. 00043 * @param[in] blockSize number of input samples to process per call. 00044 * @return none. 00045 * 00046 * <b>Scaling and Overflow Behavior:</b> 00047 * \par 00048 * The function is implemented using an internal 32-bit accumulator. 00049 * The 1.15 x 1.15 multiplications yield a 2.30 result and these are added to a 2.30 accumulator. 00050 * Thus the full precision of the multiplications is maintained but there is only a single guard bit in the accumulator. 00051 * If the accumulator result overflows it will wrap around rather than saturate. 00052 * After all multiply-accumulates are performed, the 2.30 accumulator is truncated to 2.15 format and then saturated to 1.15 format. 00053 * In order to avoid overflows the input signal or coefficients must be scaled down by log2(numTaps) bits. 00054 */ 00055 00056 00057 void arm_fir_sparse_q15( 00058 arm_fir_sparse_instance_q15 * S, 00059 q15_t * pSrc, 00060 q15_t * pDst, 00061 q15_t * pScratchIn, 00062 q31_t * pScratchOut, 00063 uint32_t blockSize) 00064 { 00065 00066 q15_t *pState = S->pState; /* State pointer */ 00067 q15_t *pIn = (q15_t *) pSrc; /* Working pointer for input */ 00068 q15_t *pOut = pDst; /* Working pointer for output */ 00069 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00070 q15_t *px; /* Temporary pointers for scratch buffer */ 00071 q15_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ 00072 q15_t *py = pState; /* Temporary pointers for state buffer */ 00073 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ 00074 uint32_t delaySize = S->maxDelay + blockSize; /* state length */ 00075 uint16_t numTaps = S->numTaps; /* Filter order */ 00076 int32_t readIndex; /* Read index of the state buffer */ 00077 uint32_t tapCnt, blkCnt; /* loop counters */ 00078 q15_t coeff = *pCoeffs++; /* Read the first coefficient value */ 00079 q31_t *pScr2 = pScratchOut; /* Working pointer for pScratchOut */ 00080 q31_t in1, in2; /* Temporary variables */ 00081 00082 00083 00084 /* BlockSize of Input samples are copied into the state buffer */ 00085 /* StateIndex points to the starting position to write in the state buffer */ 00086 arm_circularWrite_q15(py, delaySize, &S->stateIndex, 1, pIn, 1, blockSize); 00087 00088 /* Loop over the number of taps. */ 00089 tapCnt = numTaps; 00090 00091 /* Read Index, from where the state buffer should be read, is calculated. */ 00092 readIndex = (S->stateIndex - blockSize) - *pTapDelay++; 00093 00094 /* Wraparound of readIndex */ 00095 if(readIndex < 0) 00096 { 00097 readIndex += (int32_t) delaySize; 00098 } 00099 00100 /* Working pointer for state buffer is updated */ 00101 py = pState; 00102 00103 /* blockSize samples are read from the state buffer */ 00104 arm_circularRead_q15(py, delaySize, &readIndex, 1, 00105 pb, pb, blockSize, 1, blockSize); 00106 00107 /* Working pointer for the scratch buffer of state values */ 00108 px = pb; 00109 00110 /* Working pointer for scratch buffer of output values */ 00111 pScratchOut = pScr2; 00112 00113 /* Loop over the blockSize. Unroll by a factor of 4. 00114 * Compute 4 multiplications at a time. */ 00115 blkCnt = blockSize >> 2; 00116 00117 while(blkCnt > 0u) 00118 { 00119 /* Perform multiplication and store in the scratch buffer */ 00120 *pScratchOut++ = ((q31_t) * px++ * coeff); 00121 *pScratchOut++ = ((q31_t) * px++ * coeff); 00122 *pScratchOut++ = ((q31_t) * px++ * coeff); 00123 *pScratchOut++ = ((q31_t) * px++ * coeff); 00124 00125 /* Decrement the loop counter */ 00126 blkCnt--; 00127 } 00128 00129 /* If the blockSize is not a multiple of 4, 00130 * compute the remaining samples */ 00131 blkCnt = blockSize % 0x4u; 00132 00133 while(blkCnt > 0u) 00134 { 00135 /* Perform multiplication and store in the scratch buffer */ 00136 *pScratchOut++ = ((q31_t) * px++ * coeff); 00137 00138 /* Decrement the loop counter */ 00139 blkCnt--; 00140 } 00141 00142 /* Load the coefficient value and 00143 * increment the coefficient buffer for the next set of state values */ 00144 coeff = *pCoeffs++; 00145 00146 /* Read Index, from where the state buffer should be read, is calculated. */ 00147 readIndex = (S->stateIndex - blockSize) - *pTapDelay++; 00148 00149 /* Wraparound of readIndex */ 00150 if(readIndex < 0) 00151 { 00152 readIndex += (int32_t) delaySize; 00153 } 00154 00155 /* Loop over the number of taps. */ 00156 tapCnt = (uint32_t) numTaps - 1u; 00157 00158 while(tapCnt > 0u) 00159 { 00160 /* Working pointer for state buffer is updated */ 00161 py = pState; 00162 00163 /* blockSize samples are read from the state buffer */ 00164 arm_circularRead_q15(py, delaySize, &readIndex, 1, 00165 pb, pb, blockSize, 1, blockSize); 00166 00167 /* Working pointer for the scratch buffer of state values */ 00168 px = pb; 00169 00170 /* Working pointer for scratch buffer of output values */ 00171 pScratchOut = pScr2; 00172 00173 /* Loop over the blockSize. Unroll by a factor of 4. 00174 * Compute 4 MACS at a time. */ 00175 blkCnt = blockSize >> 2; 00176 00177 while(blkCnt > 0u) 00178 { 00179 /* Perform Multiply-Accumulate */ 00180 *pScratchOut++ += (q31_t) * px++ * coeff; 00181 *pScratchOut++ += (q31_t) * px++ * coeff; 00182 *pScratchOut++ += (q31_t) * px++ * coeff; 00183 *pScratchOut++ += (q31_t) * px++ * coeff; 00184 00185 /* Decrement the loop counter */ 00186 blkCnt--; 00187 } 00188 00189 /* If the blockSize is not a multiple of 4, 00190 * compute the remaining samples */ 00191 blkCnt = blockSize % 0x4u; 00192 00193 while(blkCnt > 0u) 00194 { 00195 /* Perform Multiply-Accumulate */ 00196 *pScratchOut++ += (q31_t) * px++ * coeff; 00197 00198 /* Decrement the loop counter */ 00199 blkCnt--; 00200 } 00201 00202 /* Load the coefficient value and 00203 * increment the coefficient buffer for the next set of state values */ 00204 coeff = *pCoeffs++; 00205 00206 /* Read Index, from where the state buffer should be read, is calculated. */ 00207 readIndex = (S->stateIndex - blockSize) - *pTapDelay++; 00208 00209 /* Wraparound of readIndex */ 00210 if(readIndex < 0) 00211 { 00212 readIndex += (int32_t) delaySize; 00213 } 00214 00215 /* Decrement the tap loop counter */ 00216 tapCnt--; 00217 } 00218 00219 /* All the output values are in pScratchOut buffer. 00220 Convert them into 1.15 format, saturate and store in the destination buffer. */ 00221 /* Loop over the blockSize. */ 00222 blkCnt = blockSize >> 2; 00223 00224 while(blkCnt > 0u) 00225 { 00226 in1 = *pScr2++; 00227 in2 = *pScr2++; 00228 *__SIMD32(pOut)++ = 00229 __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16), 00230 16); 00231 00232 in1 = *pScr2++; 00233 in2 = *pScr2++; 00234 *__SIMD32(pOut)++ = 00235 __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16), 00236 16); 00237 00238 blkCnt--; 00239 00240 } 00241 00242 /* If the blockSize is not a multiple of 4, 00243 remaining samples are processed in the below loop */ 00244 blkCnt = blockSize % 0x4u; 00245 00246 while(blkCnt > 0u) 00247 { 00248 *pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16); 00249 blkCnt--; 00250 } 00251 } 00252 00253 /** 00254 * @} end of FIR_Sparse group 00255 */
Generated on Tue Jul 12 2022 19:55:43 by
1.7.2
