Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_fir_interpolate_q15.c
00001 /*----------------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_interpolate_q15.c 00009 * 00010 * Description: Q15 FIR interpolation. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * ---------------------------------------------------------------------------*/ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR_Interpolate 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for the Q15 FIR interpolator. 00054 * @param[in] *S points to an instance of the Q15 FIR interpolator structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[out] *pDst points to the block of output data. 00057 * @param[in] blockSize number of input samples to process per call. 00058 * @return none. 00059 * 00060 * <b>Scaling and Overflow Behavior:</b> 00061 * \par 00062 * The function is implemented using a 64-bit internal accumulator. 00063 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00064 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00065 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00066 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 00067 * Lastly, the accumulator is saturated to yield a result in 1.15 format. 00068 */ 00069 00070 #ifndef ARM_MATH_CM0_FAMILY 00071 00072 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00073 00074 void arm_fir_interpolate_q15( 00075 const arm_fir_interpolate_instance_q15 * S, 00076 q15_t * pSrc, 00077 q15_t * pDst, 00078 uint32_t blockSize) 00079 { 00080 q15_t *pState = S->pState; /* State pointer */ 00081 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00082 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00083 q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ 00084 q63_t sum0; /* Accumulators */ 00085 q15_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00086 uint32_t i, blkCnt, j, tapCnt; /* Loop counters */ 00087 uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ 00088 uint32_t blkCntN2; 00089 q63_t acc0, acc1; 00090 q15_t x1; 00091 00092 /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 00093 /* pStateCurnt points to the location where the new input data should be written */ 00094 pStateCurnt = S->pState + ((q31_t) phaseLen - 1); 00095 00096 /* Initialise blkCnt */ 00097 blkCnt = blockSize / 2; 00098 blkCntN2 = blockSize - (2 * blkCnt); 00099 00100 /* Samples loop unrolled by 2 */ 00101 while(blkCnt > 0u) 00102 { 00103 /* Copy new input sample into the state buffer */ 00104 *pStateCurnt++ = *pSrc++; 00105 *pStateCurnt++ = *pSrc++; 00106 00107 /* Address modifier index of coefficient buffer */ 00108 j = 1u; 00109 00110 /* Loop over the Interpolation factor. */ 00111 i = (S->L); 00112 00113 while(i > 0u) 00114 { 00115 /* Set accumulator to zero */ 00116 acc0 = 0; 00117 acc1 = 0; 00118 00119 /* Initialize state pointer */ 00120 ptr1 = pState; 00121 00122 /* Initialize coefficient pointer */ 00123 ptr2 = pCoeffs + (S->L - j); 00124 00125 /* Loop over the polyPhase length. Unroll by a factor of 4. 00126 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 00127 tapCnt = phaseLen >> 2u; 00128 00129 x0 = *(ptr1++); 00130 00131 while(tapCnt > 0u) 00132 { 00133 00134 /* Read the input sample */ 00135 x1 = *(ptr1++); 00136 00137 /* Read the coefficient */ 00138 c0 = *(ptr2); 00139 00140 /* Perform the multiply-accumulate */ 00141 acc0 += (q63_t) x0 *c0; 00142 acc1 += (q63_t) x1 *c0; 00143 00144 00145 /* Read the coefficient */ 00146 c0 = *(ptr2 + S->L); 00147 00148 /* Read the input sample */ 00149 x0 = *(ptr1++); 00150 00151 /* Perform the multiply-accumulate */ 00152 acc0 += (q63_t) x1 *c0; 00153 acc1 += (q63_t) x0 *c0; 00154 00155 00156 /* Read the coefficient */ 00157 c0 = *(ptr2 + S->L * 2); 00158 00159 /* Read the input sample */ 00160 x1 = *(ptr1++); 00161 00162 /* Perform the multiply-accumulate */ 00163 acc0 += (q63_t) x0 *c0; 00164 acc1 += (q63_t) x1 *c0; 00165 00166 /* Read the coefficient */ 00167 c0 = *(ptr2 + S->L * 3); 00168 00169 /* Read the input sample */ 00170 x0 = *(ptr1++); 00171 00172 /* Perform the multiply-accumulate */ 00173 acc0 += (q63_t) x1 *c0; 00174 acc1 += (q63_t) x0 *c0; 00175 00176 00177 /* Upsampling is done by stuffing L-1 zeros between each sample. 00178 * So instead of multiplying zeros with coefficients, 00179 * Increment the coefficient pointer by interpolation factor times. */ 00180 ptr2 += 4 * S->L; 00181 00182 /* Decrement the loop counter */ 00183 tapCnt--; 00184 } 00185 00186 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 00187 tapCnt = phaseLen % 0x4u; 00188 00189 while(tapCnt > 0u) 00190 { 00191 00192 /* Read the input sample */ 00193 x1 = *(ptr1++); 00194 00195 /* Read the coefficient */ 00196 c0 = *(ptr2); 00197 00198 /* Perform the multiply-accumulate */ 00199 acc0 += (q63_t) x0 *c0; 00200 acc1 += (q63_t) x1 *c0; 00201 00202 /* Increment the coefficient pointer by interpolation factor times. */ 00203 ptr2 += S->L; 00204 00205 /* update states for next sample processing */ 00206 x0 = x1; 00207 00208 /* Decrement the loop counter */ 00209 tapCnt--; 00210 } 00211 00212 /* The result is in the accumulator, store in the destination buffer. */ 00213 *pDst = (q15_t) (__SSAT((acc0 >> 15), 16)); 00214 *(pDst + S->L) = (q15_t) (__SSAT((acc1 >> 15), 16)); 00215 00216 pDst++; 00217 00218 /* Increment the address modifier index of coefficient buffer */ 00219 j++; 00220 00221 /* Decrement the loop counter */ 00222 i--; 00223 } 00224 00225 /* Advance the state pointer by 1 00226 * to process the next group of interpolation factor number samples */ 00227 pState = pState + 2; 00228 00229 pDst += S->L; 00230 00231 /* Decrement the loop counter */ 00232 blkCnt--; 00233 } 00234 00235 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00236 ** No loop unrolling is used. */ 00237 blkCnt = blkCntN2; 00238 00239 /* Loop over the blockSize. */ 00240 while(blkCnt > 0u) 00241 { 00242 /* Copy new input sample into the state buffer */ 00243 *pStateCurnt++ = *pSrc++; 00244 00245 /* Address modifier index of coefficient buffer */ 00246 j = 1u; 00247 00248 /* Loop over the Interpolation factor. */ 00249 i = S->L; 00250 while(i > 0u) 00251 { 00252 /* Set accumulator to zero */ 00253 sum0 = 0; 00254 00255 /* Initialize state pointer */ 00256 ptr1 = pState; 00257 00258 /* Initialize coefficient pointer */ 00259 ptr2 = pCoeffs + (S->L - j); 00260 00261 /* Loop over the polyPhase length. Unroll by a factor of 4. 00262 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 00263 tapCnt = phaseLen >> 2; 00264 while(tapCnt > 0u) 00265 { 00266 00267 /* Read the coefficient */ 00268 c0 = *(ptr2); 00269 00270 /* Upsampling is done by stuffing L-1 zeros between each sample. 00271 * So instead of multiplying zeros with coefficients, 00272 * Increment the coefficient pointer by interpolation factor times. */ 00273 ptr2 += S->L; 00274 00275 /* Read the input sample */ 00276 x0 = *(ptr1++); 00277 00278 /* Perform the multiply-accumulate */ 00279 sum0 += (q63_t) x0 *c0; 00280 00281 /* Read the coefficient */ 00282 c0 = *(ptr2); 00283 00284 /* Increment the coefficient pointer by interpolation factor times. */ 00285 ptr2 += S->L; 00286 00287 /* Read the input sample */ 00288 x0 = *(ptr1++); 00289 00290 /* Perform the multiply-accumulate */ 00291 sum0 += (q63_t) x0 *c0; 00292 00293 /* Read the coefficient */ 00294 c0 = *(ptr2); 00295 00296 /* Increment the coefficient pointer by interpolation factor times. */ 00297 ptr2 += S->L; 00298 00299 /* Read the input sample */ 00300 x0 = *(ptr1++); 00301 00302 /* Perform the multiply-accumulate */ 00303 sum0 += (q63_t) x0 *c0; 00304 00305 /* Read the coefficient */ 00306 c0 = *(ptr2); 00307 00308 /* Increment the coefficient pointer by interpolation factor times. */ 00309 ptr2 += S->L; 00310 00311 /* Read the input sample */ 00312 x0 = *(ptr1++); 00313 00314 /* Perform the multiply-accumulate */ 00315 sum0 += (q63_t) x0 *c0; 00316 00317 /* Decrement the loop counter */ 00318 tapCnt--; 00319 } 00320 00321 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 00322 tapCnt = phaseLen & 0x3u; 00323 00324 while(tapCnt > 0u) 00325 { 00326 /* Read the coefficient */ 00327 c0 = *(ptr2); 00328 00329 /* Increment the coefficient pointer by interpolation factor times. */ 00330 ptr2 += S->L; 00331 00332 /* Read the input sample */ 00333 x0 = *(ptr1++); 00334 00335 /* Perform the multiply-accumulate */ 00336 sum0 += (q63_t) x0 *c0; 00337 00338 /* Decrement the loop counter */ 00339 tapCnt--; 00340 } 00341 00342 /* The result is in the accumulator, store in the destination buffer. */ 00343 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16)); 00344 00345 j++; 00346 00347 /* Decrement the loop counter */ 00348 i--; 00349 } 00350 00351 /* Advance the state pointer by 1 00352 * to process the next group of interpolation factor number samples */ 00353 pState = pState + 1; 00354 00355 /* Decrement the loop counter */ 00356 blkCnt--; 00357 } 00358 00359 00360 /* Processing is complete. 00361 ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer. 00362 ** This prepares the state buffer for the next function call. */ 00363 00364 /* Points to the start of the state buffer */ 00365 pStateCurnt = S->pState; 00366 00367 i = ((uint32_t) phaseLen - 1u) >> 2u; 00368 00369 /* copy data */ 00370 while(i > 0u) 00371 { 00372 #ifndef UNALIGNED_SUPPORT_DISABLE 00373 00374 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00375 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00376 00377 #else 00378 00379 *pStateCurnt++ = *pState++; 00380 *pStateCurnt++ = *pState++; 00381 *pStateCurnt++ = *pState++; 00382 *pStateCurnt++ = *pState++; 00383 00384 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00385 00386 /* Decrement the loop counter */ 00387 i--; 00388 } 00389 00390 i = ((uint32_t) phaseLen - 1u) % 0x04u; 00391 00392 while(i > 0u) 00393 { 00394 *pStateCurnt++ = *pState++; 00395 00396 /* Decrement the loop counter */ 00397 i--; 00398 } 00399 } 00400 00401 #else 00402 00403 /* Run the below code for Cortex-M0 */ 00404 00405 void arm_fir_interpolate_q15( 00406 const arm_fir_interpolate_instance_q15 * S, 00407 q15_t * pSrc, 00408 q15_t * pDst, 00409 uint32_t blockSize) 00410 { 00411 q15_t *pState = S->pState; /* State pointer */ 00412 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00413 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00414 q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ 00415 q63_t sum; /* Accumulator */ 00416 q15_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00417 uint32_t i, blkCnt, tapCnt; /* Loop counters */ 00418 uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ 00419 00420 00421 /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 00422 /* pStateCurnt points to the location where the new input data should be written */ 00423 pStateCurnt = S->pState + (phaseLen - 1u); 00424 00425 /* Total number of intput samples */ 00426 blkCnt = blockSize; 00427 00428 /* Loop over the blockSize. */ 00429 while(blkCnt > 0u) 00430 { 00431 /* Copy new input sample into the state buffer */ 00432 *pStateCurnt++ = *pSrc++; 00433 00434 /* Loop over the Interpolation factor. */ 00435 i = S->L; 00436 00437 while(i > 0u) 00438 { 00439 /* Set accumulator to zero */ 00440 sum = 0; 00441 00442 /* Initialize state pointer */ 00443 ptr1 = pState; 00444 00445 /* Initialize coefficient pointer */ 00446 ptr2 = pCoeffs + (i - 1u); 00447 00448 /* Loop over the polyPhase length */ 00449 tapCnt = (uint32_t) phaseLen; 00450 00451 while(tapCnt > 0u) 00452 { 00453 /* Read the coefficient */ 00454 c0 = *ptr2; 00455 00456 /* Increment the coefficient pointer by interpolation factor times. */ 00457 ptr2 += S->L; 00458 00459 /* Read the input sample */ 00460 x0 = *ptr1++; 00461 00462 /* Perform the multiply-accumulate */ 00463 sum += ((q31_t) x0 * c0); 00464 00465 /* Decrement the loop counter */ 00466 tapCnt--; 00467 } 00468 00469 /* Store the result after converting to 1.15 format in the destination buffer */ 00470 *pDst++ = (q15_t) (__SSAT((sum >> 15), 16)); 00471 00472 /* Decrement the loop counter */ 00473 i--; 00474 } 00475 00476 /* Advance the state pointer by 1 00477 * to process the next group of interpolation factor number samples */ 00478 pState = pState + 1; 00479 00480 /* Decrement the loop counter */ 00481 blkCnt--; 00482 } 00483 00484 /* Processing is complete. 00485 ** Now copy the last phaseLen - 1 samples to the start of the state buffer. 00486 ** This prepares the state buffer for the next function call. */ 00487 00488 /* Points to the start of the state buffer */ 00489 pStateCurnt = S->pState; 00490 00491 i = (uint32_t) phaseLen - 1u; 00492 00493 while(i > 0u) 00494 { 00495 *pStateCurnt++ = *pState++; 00496 00497 /* Decrement the loop counter */ 00498 i--; 00499 } 00500 00501 } 00502 00503 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00504 00505 00506 /** 00507 * @} end of FIR_Interpolate group 00508 */
Generated on Tue Jul 12 2022 13:15:24 by
