CMSIS DSP library
Dependents: KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more
Fork of mbed-dsp by
arm_fir_interpolate_q15.c
00001 /*----------------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_interpolate_q15.c 00009 * 00010 * Description: Q15 FIR interpolation. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * ---------------------------------------------------------------------------*/ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup FIR_Interpolate 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for the Q15 FIR interpolator. 00054 * @param[in] *S points to an instance of the Q15 FIR interpolator structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[out] *pDst points to the block of output data. 00057 * @param[in] blockSize number of input samples to process per call. 00058 * @return none. 00059 * 00060 * <b>Scaling and Overflow Behavior:</b> 00061 * \par 00062 * The function is implemented using a 64-bit internal accumulator. 00063 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00064 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00065 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00066 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 00067 * Lastly, the accumulator is saturated to yield a result in 1.15 format. 00068 */ 00069 00070 #ifndef ARM_MATH_CM0_FAMILY 00071 00072 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00073 00074 void arm_fir_interpolate_q15( 00075 const arm_fir_interpolate_instance_q15 * S, 00076 q15_t * pSrc, 00077 q15_t * pDst, 00078 uint32_t blockSize) 00079 { 00080 q15_t *pState = S->pState; /* State pointer */ 00081 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00082 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00083 q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ 00084 q63_t sum0; /* Accumulators */ 00085 q15_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00086 uint32_t i, blkCnt, j, tapCnt; /* Loop counters */ 00087 uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ 00088 uint32_t blkCntN2; 00089 q63_t acc0, acc1; 00090 q15_t x1; 00091 00092 /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 00093 /* pStateCurnt points to the location where the new input data should be written */ 00094 pStateCurnt = S->pState + ((q31_t) phaseLen - 1); 00095 00096 /* Initialise blkCnt */ 00097 blkCnt = blockSize / 2; 00098 blkCntN2 = blockSize - (2 * blkCnt); 00099 00100 /* Samples loop unrolled by 2 */ 00101 while(blkCnt > 0u) 00102 { 00103 /* Copy new input sample into the state buffer */ 00104 *pStateCurnt++ = *pSrc++; 00105 *pStateCurnt++ = *pSrc++; 00106 00107 /* Address modifier index of coefficient buffer */ 00108 j = 1u; 00109 00110 /* Loop over the Interpolation factor. */ 00111 i = (S->L); 00112 00113 while(i > 0u) 00114 { 00115 /* Set accumulator to zero */ 00116 acc0 = 0; 00117 acc1 = 0; 00118 00119 /* Initialize state pointer */ 00120 ptr1 = pState; 00121 00122 /* Initialize coefficient pointer */ 00123 ptr2 = pCoeffs + (S->L - j); 00124 00125 /* Loop over the polyPhase length. Unroll by a factor of 4. 00126 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 00127 tapCnt = phaseLen >> 2u; 00128 00129 x0 = *(ptr1++); 00130 00131 while(tapCnt > 0u) 00132 { 00133 00134 /* Read the input sample */ 00135 x1 = *(ptr1++); 00136 00137 /* Read the coefficient */ 00138 c0 = *(ptr2); 00139 00140 /* Perform the multiply-accumulate */ 00141 acc0 += (q63_t) x0 *c0; 00142 acc1 += (q63_t) x1 *c0; 00143 00144 00145 /* Read the coefficient */ 00146 c0 = *(ptr2 + S->L); 00147 00148 /* Read the input sample */ 00149 x0 = *(ptr1++); 00150 00151 /* Perform the multiply-accumulate */ 00152 acc0 += (q63_t) x1 *c0; 00153 acc1 += (q63_t) x0 *c0; 00154 00155 00156 /* Read the coefficient */ 00157 c0 = *(ptr2 + S->L * 2); 00158 00159 /* Read the input sample */ 00160 x1 = *(ptr1++); 00161 00162 /* Perform the multiply-accumulate */ 00163 acc0 += (q63_t) x0 *c0; 00164 acc1 += (q63_t) x1 *c0; 00165 00166 /* Read the coefficient */ 00167 c0 = *(ptr2 + S->L * 3); 00168 00169 /* Read the input sample */ 00170 x0 = *(ptr1++); 00171 00172 /* Perform the multiply-accumulate */ 00173 acc0 += (q63_t) x1 *c0; 00174 acc1 += (q63_t) x0 *c0; 00175 00176 00177 /* Upsampling is done by stuffing L-1 zeros between each sample. 00178 * So instead of multiplying zeros with coefficients, 00179 * Increment the coefficient pointer by interpolation factor times. */ 00180 ptr2 += 4 * S->L; 00181 00182 /* Decrement the loop counter */ 00183 tapCnt--; 00184 } 00185 00186 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 00187 tapCnt = phaseLen % 0x4u; 00188 00189 while(tapCnt > 0u) 00190 { 00191 00192 /* Read the input sample */ 00193 x1 = *(ptr1++); 00194 00195 /* Read the coefficient */ 00196 c0 = *(ptr2); 00197 00198 /* Perform the multiply-accumulate */ 00199 acc0 += (q63_t) x0 *c0; 00200 acc1 += (q63_t) x1 *c0; 00201 00202 /* Increment the coefficient pointer by interpolation factor times. */ 00203 ptr2 += S->L; 00204 00205 /* update states for next sample processing */ 00206 x0 = x1; 00207 00208 /* Decrement the loop counter */ 00209 tapCnt--; 00210 } 00211 00212 /* The result is in the accumulator, store in the destination buffer. */ 00213 *pDst = (q15_t) (__SSAT((acc0 >> 15), 16)); 00214 *(pDst + S->L) = (q15_t) (__SSAT((acc1 >> 15), 16)); 00215 00216 pDst++; 00217 00218 /* Increment the address modifier index of coefficient buffer */ 00219 j++; 00220 00221 /* Decrement the loop counter */ 00222 i--; 00223 } 00224 00225 /* Advance the state pointer by 1 00226 * to process the next group of interpolation factor number samples */ 00227 pState = pState + 2; 00228 00229 pDst += S->L; 00230 00231 /* Decrement the loop counter */ 00232 blkCnt--; 00233 } 00234 00235 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00236 ** No loop unrolling is used. */ 00237 blkCnt = blkCntN2; 00238 00239 /* Loop over the blockSize. */ 00240 while(blkCnt > 0u) 00241 { 00242 /* Copy new input sample into the state buffer */ 00243 *pStateCurnt++ = *pSrc++; 00244 00245 /* Address modifier index of coefficient buffer */ 00246 j = 1u; 00247 00248 /* Loop over the Interpolation factor. */ 00249 i = S->L; 00250 while(i > 0u) 00251 { 00252 /* Set accumulator to zero */ 00253 sum0 = 0; 00254 00255 /* Initialize state pointer */ 00256 ptr1 = pState; 00257 00258 /* Initialize coefficient pointer */ 00259 ptr2 = pCoeffs + (S->L - j); 00260 00261 /* Loop over the polyPhase length. Unroll by a factor of 4. 00262 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 00263 tapCnt = phaseLen >> 2; 00264 while(tapCnt > 0u) 00265 { 00266 00267 /* Read the coefficient */ 00268 c0 = *(ptr2); 00269 00270 /* Upsampling is done by stuffing L-1 zeros between each sample. 00271 * So instead of multiplying zeros with coefficients, 00272 * Increment the coefficient pointer by interpolation factor times. */ 00273 ptr2 += S->L; 00274 00275 /* Read the input sample */ 00276 x0 = *(ptr1++); 00277 00278 /* Perform the multiply-accumulate */ 00279 sum0 += (q63_t) x0 *c0; 00280 00281 /* Read the coefficient */ 00282 c0 = *(ptr2); 00283 00284 /* Increment the coefficient pointer by interpolation factor times. */ 00285 ptr2 += S->L; 00286 00287 /* Read the input sample */ 00288 x0 = *(ptr1++); 00289 00290 /* Perform the multiply-accumulate */ 00291 sum0 += (q63_t) x0 *c0; 00292 00293 /* Read the coefficient */ 00294 c0 = *(ptr2); 00295 00296 /* Increment the coefficient pointer by interpolation factor times. */ 00297 ptr2 += S->L; 00298 00299 /* Read the input sample */ 00300 x0 = *(ptr1++); 00301 00302 /* Perform the multiply-accumulate */ 00303 sum0 += (q63_t) x0 *c0; 00304 00305 /* Read the coefficient */ 00306 c0 = *(ptr2); 00307 00308 /* Increment the coefficient pointer by interpolation factor times. */ 00309 ptr2 += S->L; 00310 00311 /* Read the input sample */ 00312 x0 = *(ptr1++); 00313 00314 /* Perform the multiply-accumulate */ 00315 sum0 += (q63_t) x0 *c0; 00316 00317 /* Decrement the loop counter */ 00318 tapCnt--; 00319 } 00320 00321 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 00322 tapCnt = phaseLen & 0x3u; 00323 00324 while(tapCnt > 0u) 00325 { 00326 /* Read the coefficient */ 00327 c0 = *(ptr2); 00328 00329 /* Increment the coefficient pointer by interpolation factor times. */ 00330 ptr2 += S->L; 00331 00332 /* Read the input sample */ 00333 x0 = *(ptr1++); 00334 00335 /* Perform the multiply-accumulate */ 00336 sum0 += (q63_t) x0 *c0; 00337 00338 /* Decrement the loop counter */ 00339 tapCnt--; 00340 } 00341 00342 /* The result is in the accumulator, store in the destination buffer. */ 00343 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16)); 00344 00345 j++; 00346 00347 /* Decrement the loop counter */ 00348 i--; 00349 } 00350 00351 /* Advance the state pointer by 1 00352 * to process the next group of interpolation factor number samples */ 00353 pState = pState + 1; 00354 00355 /* Decrement the loop counter */ 00356 blkCnt--; 00357 } 00358 00359 00360 /* Processing is complete. 00361 ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer. 00362 ** This prepares the state buffer for the next function call. */ 00363 00364 /* Points to the start of the state buffer */ 00365 pStateCurnt = S->pState; 00366 00367 i = ((uint32_t) phaseLen - 1u) >> 2u; 00368 00369 /* copy data */ 00370 while(i > 0u) 00371 { 00372 #ifndef UNALIGNED_SUPPORT_DISABLE 00373 00374 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00375 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00376 00377 #else 00378 00379 *pStateCurnt++ = *pState++; 00380 *pStateCurnt++ = *pState++; 00381 *pStateCurnt++ = *pState++; 00382 *pStateCurnt++ = *pState++; 00383 00384 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00385 00386 /* Decrement the loop counter */ 00387 i--; 00388 } 00389 00390 i = ((uint32_t) phaseLen - 1u) % 0x04u; 00391 00392 while(i > 0u) 00393 { 00394 *pStateCurnt++ = *pState++; 00395 00396 /* Decrement the loop counter */ 00397 i--; 00398 } 00399 } 00400 00401 #else 00402 00403 /* Run the below code for Cortex-M0 */ 00404 00405 void arm_fir_interpolate_q15( 00406 const arm_fir_interpolate_instance_q15 * S, 00407 q15_t * pSrc, 00408 q15_t * pDst, 00409 uint32_t blockSize) 00410 { 00411 q15_t *pState = S->pState; /* State pointer */ 00412 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00413 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00414 q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ 00415 q63_t sum; /* Accumulator */ 00416 q15_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00417 uint32_t i, blkCnt, tapCnt; /* Loop counters */ 00418 uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ 00419 00420 00421 /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 00422 /* pStateCurnt points to the location where the new input data should be written */ 00423 pStateCurnt = S->pState + (phaseLen - 1u); 00424 00425 /* Total number of intput samples */ 00426 blkCnt = blockSize; 00427 00428 /* Loop over the blockSize. */ 00429 while(blkCnt > 0u) 00430 { 00431 /* Copy new input sample into the state buffer */ 00432 *pStateCurnt++ = *pSrc++; 00433 00434 /* Loop over the Interpolation factor. */ 00435 i = S->L; 00436 00437 while(i > 0u) 00438 { 00439 /* Set accumulator to zero */ 00440 sum = 0; 00441 00442 /* Initialize state pointer */ 00443 ptr1 = pState; 00444 00445 /* Initialize coefficient pointer */ 00446 ptr2 = pCoeffs + (i - 1u); 00447 00448 /* Loop over the polyPhase length */ 00449 tapCnt = (uint32_t) phaseLen; 00450 00451 while(tapCnt > 0u) 00452 { 00453 /* Read the coefficient */ 00454 c0 = *ptr2; 00455 00456 /* Increment the coefficient pointer by interpolation factor times. */ 00457 ptr2 += S->L; 00458 00459 /* Read the input sample */ 00460 x0 = *ptr1++; 00461 00462 /* Perform the multiply-accumulate */ 00463 sum += ((q31_t) x0 * c0); 00464 00465 /* Decrement the loop counter */ 00466 tapCnt--; 00467 } 00468 00469 /* Store the result after converting to 1.15 format in the destination buffer */ 00470 *pDst++ = (q15_t) (__SSAT((sum >> 15), 16)); 00471 00472 /* Decrement the loop counter */ 00473 i--; 00474 } 00475 00476 /* Advance the state pointer by 1 00477 * to process the next group of interpolation factor number samples */ 00478 pState = pState + 1; 00479 00480 /* Decrement the loop counter */ 00481 blkCnt--; 00482 } 00483 00484 /* Processing is complete. 00485 ** Now copy the last phaseLen - 1 samples to the start of the state buffer. 00486 ** This prepares the state buffer for the next function call. */ 00487 00488 /* Points to the start of the state buffer */ 00489 pStateCurnt = S->pState; 00490 00491 i = (uint32_t) phaseLen - 1u; 00492 00493 while(i > 0u) 00494 { 00495 *pStateCurnt++ = *pState++; 00496 00497 /* Decrement the loop counter */ 00498 i--; 00499 } 00500 00501 } 00502 00503 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00504 00505 00506 /** 00507 * @} end of FIR_Interpolate group 00508 */
Generated on Tue Jul 12 2022 12:36:55 by 1.7.2