Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_fir_sparse_q7.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_sparse_q7.c 00009 * 00010 * Description: Q7 sparse FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * ------------------------------------------------------------------- */ 00040 #include "arm_math.h" 00041 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR_Sparse 00049 * @{ 00050 */ 00051 00052 00053 /** 00054 * @brief Processing function for the Q7 sparse FIR filter. 00055 * @param[in] *S points to an instance of the Q7 sparse FIR structure. 00056 * @param[in] *pSrc points to the block of input data. 00057 * @param[out] *pDst points to the block of output data 00058 * @param[in] *pScratchIn points to a temporary buffer of size blockSize. 00059 * @param[in] *pScratchOut points to a temporary buffer of size blockSize. 00060 * @param[in] blockSize number of input samples to process per call. 00061 * @return none. 00062 * 00063 * <b>Scaling and Overflow Behavior:</b> 00064 * \par 00065 * The function is implemented using a 32-bit internal accumulator. 00066 * Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result. 00067 * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format. 00068 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00069 * The accumulator is then converted to 18.7 format by discarding the low 7 bits. 00070 * Finally, the result is truncated to 1.7 format. 00071 */ 00072 00073 void arm_fir_sparse_q7( 00074 arm_fir_sparse_instance_q7 * S, 00075 q7_t * pSrc, 00076 q7_t * pDst, 00077 q7_t * pScratchIn, 00078 q31_t * pScratchOut, 00079 uint32_t blockSize) 00080 { 00081 00082 q7_t *pState = S->pState; /* State pointer */ 00083 q7_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00084 q7_t *px; /* Scratch buffer pointer */ 00085 q7_t *py = pState; /* Temporary pointers for state buffer */ 00086 q7_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ 00087 q7_t *pOut = pDst; /* Destination pointer */ 00088 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ 00089 uint32_t delaySize = S->maxDelay + blockSize; /* state length */ 00090 uint16_t numTaps = S->numTaps; /* Filter order */ 00091 int32_t readIndex; /* Read index of the state buffer */ 00092 uint32_t tapCnt, blkCnt; /* loop counters */ 00093 q7_t coeff = *pCoeffs++; /* Read the coefficient value */ 00094 q31_t *pScr2 = pScratchOut; /* Working pointer for scratch buffer of output values */ 00095 q31_t in; 00096 00097 00098 #ifndef ARM_MATH_CM0_FAMILY 00099 00100 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00101 00102 q7_t in1, in2, in3, in4; 00103 00104 /* BlockSize of Input samples are copied into the state buffer */ 00105 /* StateIndex points to the starting position to write in the state buffer */ 00106 arm_circularWrite_q7(py, (int32_t) delaySize, &S->stateIndex, 1, pSrc, 1, 00107 blockSize); 00108 00109 /* Loop over the number of taps. */ 00110 tapCnt = numTaps; 00111 00112 /* Read Index, from where the state buffer should be read, is calculated. */ 00113 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00114 00115 /* Wraparound of readIndex */ 00116 if(readIndex < 0) 00117 { 00118 readIndex += (int32_t) delaySize; 00119 } 00120 00121 /* Working pointer for state buffer is updated */ 00122 py = pState; 00123 00124 /* blockSize samples are read from the state buffer */ 00125 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 00126 (int32_t) blockSize, 1, blockSize); 00127 00128 /* Working pointer for the scratch buffer of state values */ 00129 px = pb; 00130 00131 /* Working pointer for scratch buffer of output values */ 00132 pScratchOut = pScr2; 00133 00134 /* Loop over the blockSize. Unroll by a factor of 4. 00135 * Compute 4 multiplications at a time. */ 00136 blkCnt = blockSize >> 2; 00137 00138 while(blkCnt > 0u) 00139 { 00140 /* Perform multiplication and store in the scratch buffer */ 00141 *pScratchOut++ = ((q31_t) * px++ * coeff); 00142 *pScratchOut++ = ((q31_t) * px++ * coeff); 00143 *pScratchOut++ = ((q31_t) * px++ * coeff); 00144 *pScratchOut++ = ((q31_t) * px++ * coeff); 00145 00146 /* Decrement the loop counter */ 00147 blkCnt--; 00148 } 00149 00150 /* If the blockSize is not a multiple of 4, 00151 * compute the remaining samples */ 00152 blkCnt = blockSize % 0x4u; 00153 00154 while(blkCnt > 0u) 00155 { 00156 /* Perform multiplication and store in the scratch buffer */ 00157 *pScratchOut++ = ((q31_t) * px++ * coeff); 00158 00159 /* Decrement the loop counter */ 00160 blkCnt--; 00161 } 00162 00163 /* Load the coefficient value and 00164 * increment the coefficient buffer for the next set of state values */ 00165 coeff = *pCoeffs++; 00166 00167 /* Read Index, from where the state buffer should be read, is calculated. */ 00168 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00169 00170 /* Wraparound of readIndex */ 00171 if(readIndex < 0) 00172 { 00173 readIndex += (int32_t) delaySize; 00174 } 00175 00176 /* Loop over the number of taps. */ 00177 tapCnt = (uint32_t) numTaps - 2u; 00178 00179 while(tapCnt > 0u) 00180 { 00181 /* Working pointer for state buffer is updated */ 00182 py = pState; 00183 00184 /* blockSize samples are read from the state buffer */ 00185 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 00186 (int32_t) blockSize, 1, blockSize); 00187 00188 /* Working pointer for the scratch buffer of state values */ 00189 px = pb; 00190 00191 /* Working pointer for scratch buffer of output values */ 00192 pScratchOut = pScr2; 00193 00194 /* Loop over the blockSize. Unroll by a factor of 4. 00195 * Compute 4 MACS at a time. */ 00196 blkCnt = blockSize >> 2; 00197 00198 while(blkCnt > 0u) 00199 { 00200 /* Perform Multiply-Accumulate */ 00201 in = *pScratchOut + ((q31_t) * px++ * coeff); 00202 *pScratchOut++ = in; 00203 in = *pScratchOut + ((q31_t) * px++ * coeff); 00204 *pScratchOut++ = in; 00205 in = *pScratchOut + ((q31_t) * px++ * coeff); 00206 *pScratchOut++ = in; 00207 in = *pScratchOut + ((q31_t) * px++ * coeff); 00208 *pScratchOut++ = in; 00209 00210 /* Decrement the loop counter */ 00211 blkCnt--; 00212 } 00213 00214 /* If the blockSize is not a multiple of 4, 00215 * compute the remaining samples */ 00216 blkCnt = blockSize % 0x4u; 00217 00218 while(blkCnt > 0u) 00219 { 00220 /* Perform Multiply-Accumulate */ 00221 in = *pScratchOut + ((q31_t) * px++ * coeff); 00222 *pScratchOut++ = in; 00223 00224 /* Decrement the loop counter */ 00225 blkCnt--; 00226 } 00227 00228 /* Load the coefficient value and 00229 * increment the coefficient buffer for the next set of state values */ 00230 coeff = *pCoeffs++; 00231 00232 /* Read Index, from where the state buffer should be read, is calculated. */ 00233 readIndex = ((int32_t) S->stateIndex - 00234 (int32_t) blockSize) - *pTapDelay++; 00235 00236 /* Wraparound of readIndex */ 00237 if(readIndex < 0) 00238 { 00239 readIndex += (int32_t) delaySize; 00240 } 00241 00242 /* Decrement the tap loop counter */ 00243 tapCnt--; 00244 } 00245 00246 /* Compute last tap without the final read of pTapDelay */ 00247 00248 /* Working pointer for state buffer is updated */ 00249 py = pState; 00250 00251 /* blockSize samples are read from the state buffer */ 00252 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 00253 (int32_t) blockSize, 1, blockSize); 00254 00255 /* Working pointer for the scratch buffer of state values */ 00256 px = pb; 00257 00258 /* Working pointer for scratch buffer of output values */ 00259 pScratchOut = pScr2; 00260 00261 /* Loop over the blockSize. Unroll by a factor of 4. 00262 * Compute 4 MACS at a time. */ 00263 blkCnt = blockSize >> 2; 00264 00265 while(blkCnt > 0u) 00266 { 00267 /* Perform Multiply-Accumulate */ 00268 in = *pScratchOut + ((q31_t) * px++ * coeff); 00269 *pScratchOut++ = in; 00270 in = *pScratchOut + ((q31_t) * px++ * coeff); 00271 *pScratchOut++ = in; 00272 in = *pScratchOut + ((q31_t) * px++ * coeff); 00273 *pScratchOut++ = in; 00274 in = *pScratchOut + ((q31_t) * px++ * coeff); 00275 *pScratchOut++ = in; 00276 00277 /* Decrement the loop counter */ 00278 blkCnt--; 00279 } 00280 00281 /* If the blockSize is not a multiple of 4, 00282 * compute the remaining samples */ 00283 blkCnt = blockSize % 0x4u; 00284 00285 while(blkCnt > 0u) 00286 { 00287 /* Perform Multiply-Accumulate */ 00288 in = *pScratchOut + ((q31_t) * px++ * coeff); 00289 *pScratchOut++ = in; 00290 00291 /* Decrement the loop counter */ 00292 blkCnt--; 00293 } 00294 00295 /* All the output values are in pScratchOut buffer. 00296 Convert them into 1.15 format, saturate and store in the destination buffer. */ 00297 /* Loop over the blockSize. */ 00298 blkCnt = blockSize >> 2; 00299 00300 while(blkCnt > 0u) 00301 { 00302 in1 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 00303 in2 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 00304 in3 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 00305 in4 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 00306 00307 *__SIMD32(pOut)++ = __PACKq7(in1, in2, in3, in4); 00308 00309 /* Decrement the blockSize loop counter */ 00310 blkCnt--; 00311 } 00312 00313 /* If the blockSize is not a multiple of 4, 00314 remaining samples are processed in the below loop */ 00315 blkCnt = blockSize % 0x4u; 00316 00317 while(blkCnt > 0u) 00318 { 00319 *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8); 00320 00321 /* Decrement the blockSize loop counter */ 00322 blkCnt--; 00323 } 00324 00325 #else 00326 00327 /* Run the below code for Cortex-M0 */ 00328 00329 /* BlockSize of Input samples are copied into the state buffer */ 00330 /* StateIndex points to the starting position to write in the state buffer */ 00331 arm_circularWrite_q7(py, (int32_t) delaySize, &S->stateIndex, 1, pSrc, 1, 00332 blockSize); 00333 00334 /* Loop over the number of taps. */ 00335 tapCnt = numTaps; 00336 00337 /* Read Index, from where the state buffer should be read, is calculated. */ 00338 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00339 00340 /* Wraparound of readIndex */ 00341 if(readIndex < 0) 00342 { 00343 readIndex += (int32_t) delaySize; 00344 } 00345 00346 /* Working pointer for state buffer is updated */ 00347 py = pState; 00348 00349 /* blockSize samples are read from the state buffer */ 00350 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 00351 (int32_t) blockSize, 1, blockSize); 00352 00353 /* Working pointer for the scratch buffer of state values */ 00354 px = pb; 00355 00356 /* Working pointer for scratch buffer of output values */ 00357 pScratchOut = pScr2; 00358 00359 /* Loop over the blockSize */ 00360 blkCnt = blockSize; 00361 00362 while(blkCnt > 0u) 00363 { 00364 /* Perform multiplication and store in the scratch buffer */ 00365 *pScratchOut++ = ((q31_t) * px++ * coeff); 00366 00367 /* Decrement the loop counter */ 00368 blkCnt--; 00369 } 00370 00371 /* Load the coefficient value and 00372 * increment the coefficient buffer for the next set of state values */ 00373 coeff = *pCoeffs++; 00374 00375 /* Read Index, from where the state buffer should be read, is calculated. */ 00376 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00377 00378 /* Wraparound of readIndex */ 00379 if(readIndex < 0) 00380 { 00381 readIndex += (int32_t) delaySize; 00382 } 00383 00384 /* Loop over the number of taps. */ 00385 tapCnt = (uint32_t) numTaps - 2u; 00386 00387 while(tapCnt > 0u) 00388 { 00389 /* Working pointer for state buffer is updated */ 00390 py = pState; 00391 00392 /* blockSize samples are read from the state buffer */ 00393 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 00394 (int32_t) blockSize, 1, blockSize); 00395 00396 /* Working pointer for the scratch buffer of state values */ 00397 px = pb; 00398 00399 /* Working pointer for scratch buffer of output values */ 00400 pScratchOut = pScr2; 00401 00402 /* Loop over the blockSize */ 00403 blkCnt = blockSize; 00404 00405 while(blkCnt > 0u) 00406 { 00407 /* Perform Multiply-Accumulate */ 00408 in = *pScratchOut + ((q31_t) * px++ * coeff); 00409 *pScratchOut++ = in; 00410 00411 /* Decrement the loop counter */ 00412 blkCnt--; 00413 } 00414 00415 /* Load the coefficient value and 00416 * increment the coefficient buffer for the next set of state values */ 00417 coeff = *pCoeffs++; 00418 00419 /* Read Index, from where the state buffer should be read, is calculated. */ 00420 readIndex = 00421 ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 00422 00423 /* Wraparound of readIndex */ 00424 if(readIndex < 0) 00425 { 00426 readIndex += (int32_t) delaySize; 00427 } 00428 00429 /* Decrement the tap loop counter */ 00430 tapCnt--; 00431 } 00432 00433 /* Compute last tap without the final read of pTapDelay */ 00434 00435 /* Working pointer for state buffer is updated */ 00436 py = pState; 00437 00438 /* blockSize samples are read from the state buffer */ 00439 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 00440 (int32_t) blockSize, 1, blockSize); 00441 00442 /* Working pointer for the scratch buffer of state values */ 00443 px = pb; 00444 00445 /* Working pointer for scratch buffer of output values */ 00446 pScratchOut = pScr2; 00447 00448 /* Loop over the blockSize */ 00449 blkCnt = blockSize; 00450 00451 while(blkCnt > 0u) 00452 { 00453 /* Perform Multiply-Accumulate */ 00454 in = *pScratchOut + ((q31_t) * px++ * coeff); 00455 *pScratchOut++ = in; 00456 00457 /* Decrement the loop counter */ 00458 blkCnt--; 00459 } 00460 00461 /* All the output values are in pScratchOut buffer. 00462 Convert them into 1.15 format, saturate and store in the destination buffer. */ 00463 /* Loop over the blockSize. */ 00464 blkCnt = blockSize; 00465 00466 while(blkCnt > 0u) 00467 { 00468 *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8); 00469 00470 /* Decrement the blockSize loop counter */ 00471 blkCnt--; 00472 } 00473 00474 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00475 00476 } 00477 00478 /** 00479 * @} end of FIR_Sparse group 00480 */
Generated on Tue Jul 12 2022 13:15:24 by
