CMSIS DSP library
Dependents: KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more
Fork of mbed-dsp by
arm_conv_partial_f32.c
00001 /* ---------------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_conv_partial_f32.c 00009 * 00010 * Description: Partial convolution of floating-point sequences. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @defgroup PartialConv Partial Convolution 00049 * 00050 * Partial Convolution is equivalent to Convolution except that a subset of the output samples is generated. 00051 * Each function has two additional arguments. 00052 * <code>firstIndex</code> specifies the starting index of the subset of output samples. 00053 * <code>numPoints</code> is the number of output samples to compute. 00054 * The function computes the output in the range 00055 * <code>[firstIndex, ..., firstIndex+numPoints-1]</code>. 00056 * The output array <code>pDst</code> contains <code>numPoints</code> values. 00057 * 00058 * The allowable range of output indices is [0 srcALen+srcBLen-2]. 00059 * If the requested subset does not fall in this range then the functions return ARM_MATH_ARGUMENT_ERROR. 00060 * Otherwise the functions return ARM_MATH_SUCCESS. 00061 * \note Refer arm_conv_f32() for details on fixed point behavior. 00062 * 00063 * 00064 * <b>Fast Versions</b> 00065 * 00066 * \par 00067 * Fast versions are supported for Q31 and Q15 of partial convolution. Cycles for Fast versions are less compared to Q31 and Q15 of partial conv and the design requires 00068 * the input signals should be scaled down to avoid intermediate overflows. 00069 * 00070 * 00071 * <b>Opt Versions</b> 00072 * 00073 * \par 00074 * Opt versions are supported for Q15 and Q7. Design uses internal scratch buffer for getting good optimisation. 00075 * These versions are optimised in cycles and consumes more memory(Scratch memory) compared to Q15 and Q7 versions of partial convolution 00076 */ 00077 00078 /** 00079 * @addtogroup PartialConv 00080 * @{ 00081 */ 00082 00083 /** 00084 * @brief Partial convolution of floating-point sequences. 00085 * @param[in] *pSrcA points to the first input sequence. 00086 * @param[in] srcALen length of the first input sequence. 00087 * @param[in] *pSrcB points to the second input sequence. 00088 * @param[in] srcBLen length of the second input sequence. 00089 * @param[out] *pDst points to the location where the output result is written. 00090 * @param[in] firstIndex is the first output sample to start with. 00091 * @param[in] numPoints is the number of output points to be computed. 00092 * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. 00093 */ 00094 00095 arm_status arm_conv_partial_f32( 00096 float32_t * pSrcA, 00097 uint32_t srcALen, 00098 float32_t * pSrcB, 00099 uint32_t srcBLen, 00100 float32_t * pDst, 00101 uint32_t firstIndex, 00102 uint32_t numPoints) 00103 { 00104 00105 00106 #ifndef ARM_MATH_CM0_FAMILY 00107 00108 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00109 00110 float32_t *pIn1 = pSrcA; /* inputA pointer */ 00111 float32_t *pIn2 = pSrcB; /* inputB pointer */ 00112 float32_t *pOut = pDst; /* output pointer */ 00113 float32_t *px; /* Intermediate inputA pointer */ 00114 float32_t *py; /* Intermediate inputB pointer */ 00115 float32_t *pSrc1, *pSrc2; /* Intermediate pointers */ 00116 float32_t sum, acc0, acc1, acc2, acc3; /* Accumulator */ 00117 float32_t x0, x1, x2, x3, c0; /* Temporary variables to hold state and coefficient values */ 00118 uint32_t j, k, count = 0u, blkCnt, check; 00119 int32_t blockSize1, blockSize2, blockSize3; /* loop counters */ 00120 arm_status status; /* status of Partial convolution */ 00121 00122 00123 /* Check for range of output samples to be calculated */ 00124 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u)))) 00125 { 00126 /* Set status as ARM_MATH_ARGUMENT_ERROR */ 00127 status = ARM_MATH_ARGUMENT_ERROR; 00128 } 00129 else 00130 { 00131 00132 /* The algorithm implementation is based on the lengths of the inputs. */ 00133 /* srcB is always made to slide across srcA. */ 00134 /* So srcBLen is always considered as shorter or equal to srcALen */ 00135 if(srcALen >= srcBLen) 00136 { 00137 /* Initialization of inputA pointer */ 00138 pIn1 = pSrcA; 00139 00140 /* Initialization of inputB pointer */ 00141 pIn2 = pSrcB; 00142 } 00143 else 00144 { 00145 /* Initialization of inputA pointer */ 00146 pIn1 = pSrcB; 00147 00148 /* Initialization of inputB pointer */ 00149 pIn2 = pSrcA; 00150 00151 /* srcBLen is always considered as shorter or equal to srcALen */ 00152 j = srcBLen; 00153 srcBLen = srcALen; 00154 srcALen = j; 00155 } 00156 00157 /* Conditions to check which loopCounter holds 00158 * the first and last indices of the output samples to be calculated. */ 00159 check = firstIndex + numPoints; 00160 blockSize3 = (int32_t) check - (int32_t) srcALen; 00161 blockSize3 = (blockSize3 > 0) ? blockSize3 : 0; 00162 blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex; 00163 blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 : 00164 (int32_t) numPoints) : 0; 00165 blockSize2 = ((int32_t) check - blockSize3) - 00166 (blockSize1 + (int32_t) firstIndex); 00167 blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; 00168 00169 /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 00170 /* The function is internally 00171 * divided into three stages according to the number of multiplications that has to be 00172 * taken place between inputA samples and inputB samples. In the first stage of the 00173 * algorithm, the multiplications increase by one for every iteration. 00174 * In the second stage of the algorithm, srcBLen number of multiplications are done. 00175 * In the third stage of the algorithm, the multiplications decrease by one 00176 * for every iteration. */ 00177 00178 /* Set the output pointer to point to the firstIndex 00179 * of the output sample to be calculated. */ 00180 pOut = pDst + firstIndex; 00181 00182 /* -------------------------- 00183 * Initializations of stage1 00184 * -------------------------*/ 00185 00186 /* sum = x[0] * y[0] 00187 * sum = x[0] * y[1] + x[1] * y[0] 00188 * .... 00189 * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0] 00190 */ 00191 00192 /* In this stage the MAC operations are increased by 1 for every iteration. 00193 The count variable holds the number of MAC operations performed. 00194 Since the partial convolution starts from from firstIndex 00195 Number of Macs to be performed is firstIndex + 1 */ 00196 count = 1u + firstIndex; 00197 00198 /* Working pointer of inputA */ 00199 px = pIn1; 00200 00201 /* Working pointer of inputB */ 00202 pSrc1 = pIn2 + firstIndex; 00203 py = pSrc1; 00204 00205 /* ------------------------ 00206 * Stage1 process 00207 * ----------------------*/ 00208 00209 /* The first stage starts here */ 00210 while(blockSize1 > 0) 00211 { 00212 /* Accumulator is made zero for every iteration */ 00213 sum = 0.0f; 00214 00215 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00216 k = count >> 2u; 00217 00218 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00219 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00220 while(k > 0u) 00221 { 00222 /* x[0] * y[srcBLen - 1] */ 00223 sum += *px++ * *py--; 00224 00225 /* x[1] * y[srcBLen - 2] */ 00226 sum += *px++ * *py--; 00227 00228 /* x[2] * y[srcBLen - 3] */ 00229 sum += *px++ * *py--; 00230 00231 /* x[3] * y[srcBLen - 4] */ 00232 sum += *px++ * *py--; 00233 00234 /* Decrement the loop counter */ 00235 k--; 00236 } 00237 00238 /* If the count is not a multiple of 4, compute any remaining MACs here. 00239 ** No loop unrolling is used. */ 00240 k = count % 0x4u; 00241 00242 while(k > 0u) 00243 { 00244 /* Perform the multiply-accumulates */ 00245 sum += *px++ * *py--; 00246 00247 /* Decrement the loop counter */ 00248 k--; 00249 } 00250 00251 /* Store the result in the accumulator in the destination buffer. */ 00252 *pOut++ = sum; 00253 00254 /* Update the inputA and inputB pointers for next MAC calculation */ 00255 py = ++pSrc1; 00256 px = pIn1; 00257 00258 /* Increment the MAC count */ 00259 count++; 00260 00261 /* Decrement the loop counter */ 00262 blockSize1--; 00263 } 00264 00265 /* -------------------------- 00266 * Initializations of stage2 00267 * ------------------------*/ 00268 00269 /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0] 00270 * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0] 00271 * .... 00272 * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0] 00273 */ 00274 00275 /* Working pointer of inputA */ 00276 px = pIn1; 00277 00278 /* Working pointer of inputB */ 00279 pSrc2 = pIn2 + (srcBLen - 1u); 00280 py = pSrc2; 00281 00282 /* count is index by which the pointer pIn1 to be incremented */ 00283 count = 0u; 00284 00285 /* ------------------- 00286 * Stage2 process 00287 * ------------------*/ 00288 00289 /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed. 00290 * So, to loop unroll over blockSize2, 00291 * srcBLen should be greater than or equal to 4 */ 00292 if(srcBLen >= 4u) 00293 { 00294 /* Loop unroll over blockSize2, by 4 */ 00295 blkCnt = ((uint32_t) blockSize2 >> 2u); 00296 00297 while(blkCnt > 0u) 00298 { 00299 /* Set all accumulators to zero */ 00300 acc0 = 0.0f; 00301 acc1 = 0.0f; 00302 acc2 = 0.0f; 00303 acc3 = 0.0f; 00304 00305 /* read x[0], x[1], x[2] samples */ 00306 x0 = *(px++); 00307 x1 = *(px++); 00308 x2 = *(px++); 00309 00310 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00311 k = srcBLen >> 2u; 00312 00313 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00314 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00315 do 00316 { 00317 /* Read y[srcBLen - 1] sample */ 00318 c0 = *(py--); 00319 00320 /* Read x[3] sample */ 00321 x3 = *(px++); 00322 00323 /* Perform the multiply-accumulate */ 00324 /* acc0 += x[0] * y[srcBLen - 1] */ 00325 acc0 += x0 * c0; 00326 00327 /* acc1 += x[1] * y[srcBLen - 1] */ 00328 acc1 += x1 * c0; 00329 00330 /* acc2 += x[2] * y[srcBLen - 1] */ 00331 acc2 += x2 * c0; 00332 00333 /* acc3 += x[3] * y[srcBLen - 1] */ 00334 acc3 += x3 * c0; 00335 00336 /* Read y[srcBLen - 2] sample */ 00337 c0 = *(py--); 00338 00339 /* Read x[4] sample */ 00340 x0 = *(px++); 00341 00342 /* Perform the multiply-accumulate */ 00343 /* acc0 += x[1] * y[srcBLen - 2] */ 00344 acc0 += x1 * c0; 00345 /* acc1 += x[2] * y[srcBLen - 2] */ 00346 acc1 += x2 * c0; 00347 /* acc2 += x[3] * y[srcBLen - 2] */ 00348 acc2 += x3 * c0; 00349 /* acc3 += x[4] * y[srcBLen - 2] */ 00350 acc3 += x0 * c0; 00351 00352 /* Read y[srcBLen - 3] sample */ 00353 c0 = *(py--); 00354 00355 /* Read x[5] sample */ 00356 x1 = *(px++); 00357 00358 /* Perform the multiply-accumulates */ 00359 /* acc0 += x[2] * y[srcBLen - 3] */ 00360 acc0 += x2 * c0; 00361 /* acc1 += x[3] * y[srcBLen - 2] */ 00362 acc1 += x3 * c0; 00363 /* acc2 += x[4] * y[srcBLen - 2] */ 00364 acc2 += x0 * c0; 00365 /* acc3 += x[5] * y[srcBLen - 2] */ 00366 acc3 += x1 * c0; 00367 00368 /* Read y[srcBLen - 4] sample */ 00369 c0 = *(py--); 00370 00371 /* Read x[6] sample */ 00372 x2 = *(px++); 00373 00374 /* Perform the multiply-accumulates */ 00375 /* acc0 += x[3] * y[srcBLen - 4] */ 00376 acc0 += x3 * c0; 00377 /* acc1 += x[4] * y[srcBLen - 4] */ 00378 acc1 += x0 * c0; 00379 /* acc2 += x[5] * y[srcBLen - 4] */ 00380 acc2 += x1 * c0; 00381 /* acc3 += x[6] * y[srcBLen - 4] */ 00382 acc3 += x2 * c0; 00383 00384 00385 } while(--k); 00386 00387 /* If the srcBLen is not a multiple of 4, compute any remaining MACs here. 00388 ** No loop unrolling is used. */ 00389 k = srcBLen % 0x4u; 00390 00391 while(k > 0u) 00392 { 00393 /* Read y[srcBLen - 5] sample */ 00394 c0 = *(py--); 00395 00396 /* Read x[7] sample */ 00397 x3 = *(px++); 00398 00399 /* Perform the multiply-accumulates */ 00400 /* acc0 += x[4] * y[srcBLen - 5] */ 00401 acc0 += x0 * c0; 00402 /* acc1 += x[5] * y[srcBLen - 5] */ 00403 acc1 += x1 * c0; 00404 /* acc2 += x[6] * y[srcBLen - 5] */ 00405 acc2 += x2 * c0; 00406 /* acc3 += x[7] * y[srcBLen - 5] */ 00407 acc3 += x3 * c0; 00408 00409 /* Reuse the present samples for the next MAC */ 00410 x0 = x1; 00411 x1 = x2; 00412 x2 = x3; 00413 00414 /* Decrement the loop counter */ 00415 k--; 00416 } 00417 00418 /* Store the result in the accumulator in the destination buffer. */ 00419 *pOut++ = acc0; 00420 *pOut++ = acc1; 00421 *pOut++ = acc2; 00422 *pOut++ = acc3; 00423 00424 /* Increment the pointer pIn1 index, count by 1 */ 00425 count += 4u; 00426 00427 /* Update the inputA and inputB pointers for next MAC calculation */ 00428 px = pIn1 + count; 00429 py = pSrc2; 00430 00431 /* Decrement the loop counter */ 00432 blkCnt--; 00433 } 00434 00435 /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here. 00436 ** No loop unrolling is used. */ 00437 blkCnt = (uint32_t) blockSize2 % 0x4u; 00438 00439 while(blkCnt > 0u) 00440 { 00441 /* Accumulator is made zero for every iteration */ 00442 sum = 0.0f; 00443 00444 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00445 k = srcBLen >> 2u; 00446 00447 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00448 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00449 while(k > 0u) 00450 { 00451 /* Perform the multiply-accumulates */ 00452 sum += *px++ * *py--; 00453 sum += *px++ * *py--; 00454 sum += *px++ * *py--; 00455 sum += *px++ * *py--; 00456 00457 /* Decrement the loop counter */ 00458 k--; 00459 } 00460 00461 /* If the srcBLen is not a multiple of 4, compute any remaining MACs here. 00462 ** No loop unrolling is used. */ 00463 k = srcBLen % 0x4u; 00464 00465 while(k > 0u) 00466 { 00467 /* Perform the multiply-accumulate */ 00468 sum += *px++ * *py--; 00469 00470 /* Decrement the loop counter */ 00471 k--; 00472 } 00473 00474 /* Store the result in the accumulator in the destination buffer. */ 00475 *pOut++ = sum; 00476 00477 /* Increment the MAC count */ 00478 count++; 00479 00480 /* Update the inputA and inputB pointers for next MAC calculation */ 00481 px = pIn1 + count; 00482 py = pSrc2; 00483 00484 /* Decrement the loop counter */ 00485 blkCnt--; 00486 } 00487 } 00488 else 00489 { 00490 /* If the srcBLen is not a multiple of 4, 00491 * the blockSize2 loop cannot be unrolled by 4 */ 00492 blkCnt = (uint32_t) blockSize2; 00493 00494 while(blkCnt > 0u) 00495 { 00496 /* Accumulator is made zero for every iteration */ 00497 sum = 0.0f; 00498 00499 /* srcBLen number of MACS should be performed */ 00500 k = srcBLen; 00501 00502 while(k > 0u) 00503 { 00504 /* Perform the multiply-accumulate */ 00505 sum += *px++ * *py--; 00506 00507 /* Decrement the loop counter */ 00508 k--; 00509 } 00510 00511 /* Store the result in the accumulator in the destination buffer. */ 00512 *pOut++ = sum; 00513 00514 /* Increment the MAC count */ 00515 count++; 00516 00517 /* Update the inputA and inputB pointers for next MAC calculation */ 00518 px = pIn1 + count; 00519 py = pSrc2; 00520 00521 /* Decrement the loop counter */ 00522 blkCnt--; 00523 } 00524 } 00525 00526 00527 /* -------------------------- 00528 * Initializations of stage3 00529 * -------------------------*/ 00530 00531 /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1] 00532 * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2] 00533 * .... 00534 * sum += x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2] 00535 * sum += x[srcALen-1] * y[srcBLen-1] 00536 */ 00537 00538 /* In this stage the MAC operations are decreased by 1 for every iteration. 00539 The count variable holds the number of MAC operations performed */ 00540 count = srcBLen - 1u; 00541 00542 /* Working pointer of inputA */ 00543 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 00544 px = pSrc1; 00545 00546 /* Working pointer of inputB */ 00547 pSrc2 = pIn2 + (srcBLen - 1u); 00548 py = pSrc2; 00549 00550 while(blockSize3 > 0) 00551 { 00552 /* Accumulator is made zero for every iteration */ 00553 sum = 0.0f; 00554 00555 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00556 k = count >> 2u; 00557 00558 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00559 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00560 while(k > 0u) 00561 { 00562 /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 00563 sum += *px++ * *py--; 00564 00565 /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 00566 sum += *px++ * *py--; 00567 00568 /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 00569 sum += *px++ * *py--; 00570 00571 /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 00572 sum += *px++ * *py--; 00573 00574 /* Decrement the loop counter */ 00575 k--; 00576 } 00577 00578 /* If the count is not a multiple of 4, compute any remaining MACs here. 00579 ** No loop unrolling is used. */ 00580 k = count % 0x4u; 00581 00582 while(k > 0u) 00583 { 00584 /* Perform the multiply-accumulates */ 00585 /* sum += x[srcALen-1] * y[srcBLen-1] */ 00586 sum += *px++ * *py--; 00587 00588 /* Decrement the loop counter */ 00589 k--; 00590 } 00591 00592 /* Store the result in the accumulator in the destination buffer. */ 00593 *pOut++ = sum; 00594 00595 /* Update the inputA and inputB pointers for next MAC calculation */ 00596 px = ++pSrc1; 00597 py = pSrc2; 00598 00599 /* Decrement the MAC count */ 00600 count--; 00601 00602 /* Decrement the loop counter */ 00603 blockSize3--; 00604 00605 } 00606 00607 /* set status as ARM_MATH_SUCCESS */ 00608 status = ARM_MATH_SUCCESS; 00609 } 00610 00611 /* Return to application */ 00612 return (status); 00613 00614 #else 00615 00616 /* Run the below code for Cortex-M0 */ 00617 00618 float32_t *pIn1 = pSrcA; /* inputA pointer */ 00619 float32_t *pIn2 = pSrcB; /* inputB pointer */ 00620 float32_t sum; /* Accumulator */ 00621 uint32_t i, j; /* loop counters */ 00622 arm_status status; /* status of Partial convolution */ 00623 00624 /* Check for range of output samples to be calculated */ 00625 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u)))) 00626 { 00627 /* Set status as ARM_ARGUMENT_ERROR */ 00628 status = ARM_MATH_ARGUMENT_ERROR; 00629 } 00630 else 00631 { 00632 /* Loop to calculate convolution for output length number of values */ 00633 for (i = firstIndex; i <= (firstIndex + numPoints - 1); i++) 00634 { 00635 /* Initialize sum with zero to carry on MAC operations */ 00636 sum = 0.0f; 00637 00638 /* Loop to perform MAC operations according to convolution equation */ 00639 for (j = 0u; j <= i; j++) 00640 { 00641 /* Check the array limitations for inputs */ 00642 if((((i - j) < srcBLen) && (j < srcALen))) 00643 { 00644 /* z[i] += x[i-j] * y[j] */ 00645 sum += pIn1[j] * pIn2[i - j]; 00646 } 00647 } 00648 /* Store the output in the destination buffer */ 00649 pDst[i] = sum; 00650 } 00651 /* set status as ARM_SUCCESS as there are no argument errors */ 00652 status = ARM_MATH_SUCCESS; 00653 } 00654 return (status); 00655 00656 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00657 00658 } 00659 00660 /** 00661 * @} end of PartialConv group 00662 */
Generated on Tue Jul 12 2022 12:36:54 by 1.7.2