Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_conv_partial_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_conv_partial_f32.c 00004 * Description: Partial convolution of floating-point sequences 00005 * 00006 * $Date: 27. January 2017 00007 * $Revision: V.1.5.1 00008 * 00009 * Target Processor: Cortex-M cores 00010 * -------------------------------------------------------------------- */ 00011 /* 00012 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00013 * 00014 * SPDX-License-Identifier: Apache-2.0 00015 * 00016 * Licensed under the Apache License, Version 2.0 (the License); you may 00017 * not use this file except in compliance with the License. 00018 * You may obtain a copy of the License at 00019 * 00020 * www.apache.org/licenses/LICENSE-2.0 00021 * 00022 * Unless required by applicable law or agreed to in writing, software 00023 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00024 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00025 * See the License for the specific language governing permissions and 00026 * limitations under the License. 00027 */ 00028 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupFilters 00033 */ 00034 00035 /** 00036 * @defgroup PartialConv Partial Convolution 00037 * 00038 * Partial Convolution is equivalent to Convolution except that a subset of the output samples is generated. 00039 * Each function has two additional arguments. 00040 * <code>firstIndex</code> specifies the starting index of the subset of output samples. 00041 * <code>numPoints</code> is the number of output samples to compute. 00042 * The function computes the output in the range 00043 * <code>[firstIndex, ..., firstIndex+numPoints-1]</code>. 00044 * The output array <code>pDst</code> contains <code>numPoints</code> values. 00045 * 00046 * The allowable range of output indices is [0 srcALen+srcBLen-2]. 00047 * If the requested subset does not fall in this range then the functions return ARM_MATH_ARGUMENT_ERROR. 00048 * Otherwise the functions return ARM_MATH_SUCCESS. 00049 * \note Refer arm_conv_f32() for details on fixed point behavior. 00050 * 00051 * 00052 * <b>Fast Versions</b> 00053 * 00054 * \par 00055 * Fast versions are supported for Q31 and Q15 of partial convolution. Cycles for Fast versions are less compared to Q31 and Q15 of partial conv and the design requires 00056 * the input signals should be scaled down to avoid intermediate overflows. 00057 * 00058 * 00059 * <b>Opt Versions</b> 00060 * 00061 * \par 00062 * Opt versions are supported for Q15 and Q7. Design uses internal scratch buffer for getting good optimisation. 00063 * These versions are optimised in cycles and consumes more memory(Scratch memory) compared to Q15 and Q7 versions of partial convolution 00064 */ 00065 00066 /** 00067 * @addtogroup PartialConv 00068 * @{ 00069 */ 00070 00071 /** 00072 * @brief Partial convolution of floating-point sequences. 00073 * @param[in] *pSrcA points to the first input sequence. 00074 * @param[in] srcALen length of the first input sequence. 00075 * @param[in] *pSrcB points to the second input sequence. 00076 * @param[in] srcBLen length of the second input sequence. 00077 * @param[out] *pDst points to the location where the output result is written. 00078 * @param[in] firstIndex is the first output sample to start with. 00079 * @param[in] numPoints is the number of output points to be computed. 00080 * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. 00081 */ 00082 00083 arm_status arm_conv_partial_f32( 00084 float32_t * pSrcA, 00085 uint32_t srcALen, 00086 float32_t * pSrcB, 00087 uint32_t srcBLen, 00088 float32_t * pDst, 00089 uint32_t firstIndex, 00090 uint32_t numPoints) 00091 { 00092 00093 00094 #if defined (ARM_MATH_DSP) 00095 00096 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00097 00098 float32_t *pIn1 = pSrcA; /* inputA pointer */ 00099 float32_t *pIn2 = pSrcB; /* inputB pointer */ 00100 float32_t *pOut = pDst; /* output pointer */ 00101 float32_t *px; /* Intermediate inputA pointer */ 00102 float32_t *py; /* Intermediate inputB pointer */ 00103 float32_t *pSrc1, *pSrc2; /* Intermediate pointers */ 00104 float32_t sum, acc0, acc1, acc2, acc3; /* Accumulator */ 00105 float32_t x0, x1, x2, x3, c0; /* Temporary variables to hold state and coefficient values */ 00106 uint32_t j, k, count = 0U, blkCnt, check; 00107 int32_t blockSize1, blockSize2, blockSize3; /* loop counters */ 00108 arm_status status; /* status of Partial convolution */ 00109 00110 00111 /* Check for range of output samples to be calculated */ 00112 if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U)))) 00113 { 00114 /* Set status as ARM_MATH_ARGUMENT_ERROR */ 00115 status = ARM_MATH_ARGUMENT_ERROR; 00116 } 00117 else 00118 { 00119 00120 /* The algorithm implementation is based on the lengths of the inputs. */ 00121 /* srcB is always made to slide across srcA. */ 00122 /* So srcBLen is always considered as shorter or equal to srcALen */ 00123 if (srcALen >= srcBLen) 00124 { 00125 /* Initialization of inputA pointer */ 00126 pIn1 = pSrcA; 00127 00128 /* Initialization of inputB pointer */ 00129 pIn2 = pSrcB; 00130 } 00131 else 00132 { 00133 /* Initialization of inputA pointer */ 00134 pIn1 = pSrcB; 00135 00136 /* Initialization of inputB pointer */ 00137 pIn2 = pSrcA; 00138 00139 /* srcBLen is always considered as shorter or equal to srcALen */ 00140 j = srcBLen; 00141 srcBLen = srcALen; 00142 srcALen = j; 00143 } 00144 00145 /* Conditions to check which loopCounter holds 00146 * the first and last indices of the output samples to be calculated. */ 00147 check = firstIndex + numPoints; 00148 blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0; 00149 blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3; 00150 blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex; 00151 blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : 00152 (int32_t) numPoints) : 0; 00153 blockSize2 = ((int32_t) check - blockSize3) - 00154 (blockSize1 + (int32_t) firstIndex); 00155 blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; 00156 00157 /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 00158 /* The function is internally 00159 * divided into three stages according to the number of multiplications that has to be 00160 * taken place between inputA samples and inputB samples. In the first stage of the 00161 * algorithm, the multiplications increase by one for every iteration. 00162 * In the second stage of the algorithm, srcBLen number of multiplications are done. 00163 * In the third stage of the algorithm, the multiplications decrease by one 00164 * for every iteration. */ 00165 00166 /* Set the output pointer to point to the firstIndex 00167 * of the output sample to be calculated. */ 00168 pOut = pDst + firstIndex; 00169 00170 /* -------------------------- 00171 * Initializations of stage1 00172 * -------------------------*/ 00173 00174 /* sum = x[0] * y[0] 00175 * sum = x[0] * y[1] + x[1] * y[0] 00176 * .... 00177 * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0] 00178 */ 00179 00180 /* In this stage the MAC operations are increased by 1 for every iteration. 00181 The count variable holds the number of MAC operations performed. 00182 Since the partial convolution starts from from firstIndex 00183 Number of Macs to be performed is firstIndex + 1 */ 00184 count = 1U + firstIndex; 00185 00186 /* Working pointer of inputA */ 00187 px = pIn1; 00188 00189 /* Working pointer of inputB */ 00190 pSrc1 = pIn2 + firstIndex; 00191 py = pSrc1; 00192 00193 /* ------------------------ 00194 * Stage1 process 00195 * ----------------------*/ 00196 00197 /* The first stage starts here */ 00198 while (blockSize1 > 0) 00199 { 00200 /* Accumulator is made zero for every iteration */ 00201 sum = 0.0f; 00202 00203 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00204 k = count >> 2U; 00205 00206 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00207 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00208 while (k > 0U) 00209 { 00210 /* x[0] * y[srcBLen - 1] */ 00211 sum += *px++ * *py--; 00212 00213 /* x[1] * y[srcBLen - 2] */ 00214 sum += *px++ * *py--; 00215 00216 /* x[2] * y[srcBLen - 3] */ 00217 sum += *px++ * *py--; 00218 00219 /* x[3] * y[srcBLen - 4] */ 00220 sum += *px++ * *py--; 00221 00222 /* Decrement the loop counter */ 00223 k--; 00224 } 00225 00226 /* If the count is not a multiple of 4, compute any remaining MACs here. 00227 ** No loop unrolling is used. */ 00228 k = count % 0x4U; 00229 00230 while (k > 0U) 00231 { 00232 /* Perform the multiply-accumulates */ 00233 sum += *px++ * *py--; 00234 00235 /* Decrement the loop counter */ 00236 k--; 00237 } 00238 00239 /* Store the result in the accumulator in the destination buffer. */ 00240 *pOut++ = sum; 00241 00242 /* Update the inputA and inputB pointers for next MAC calculation */ 00243 py = ++pSrc1; 00244 px = pIn1; 00245 00246 /* Increment the MAC count */ 00247 count++; 00248 00249 /* Decrement the loop counter */ 00250 blockSize1--; 00251 } 00252 00253 /* -------------------------- 00254 * Initializations of stage2 00255 * ------------------------*/ 00256 00257 /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0] 00258 * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0] 00259 * .... 00260 * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0] 00261 */ 00262 00263 /* Working pointer of inputA */ 00264 if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) 00265 { 00266 px = pIn1 + firstIndex - srcBLen + 1; 00267 } 00268 else 00269 { 00270 px = pIn1; 00271 } 00272 00273 /* Working pointer of inputB */ 00274 pSrc2 = pIn2 + (srcBLen - 1U); 00275 py = pSrc2; 00276 00277 /* count is index by which the pointer pIn1 to be incremented */ 00278 count = 0U; 00279 00280 /* ------------------- 00281 * Stage2 process 00282 * ------------------*/ 00283 00284 /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed. 00285 * So, to loop unroll over blockSize2, 00286 * srcBLen should be greater than or equal to 4 */ 00287 if (srcBLen >= 4U) 00288 { 00289 /* Loop unroll over blockSize2, by 4 */ 00290 blkCnt = ((uint32_t) blockSize2 >> 2U); 00291 00292 while (blkCnt > 0U) 00293 { 00294 /* Set all accumulators to zero */ 00295 acc0 = 0.0f; 00296 acc1 = 0.0f; 00297 acc2 = 0.0f; 00298 acc3 = 0.0f; 00299 00300 /* read x[0], x[1], x[2] samples */ 00301 x0 = *(px++); 00302 x1 = *(px++); 00303 x2 = *(px++); 00304 00305 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00306 k = srcBLen >> 2U; 00307 00308 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00309 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00310 do 00311 { 00312 /* Read y[srcBLen - 1] sample */ 00313 c0 = *(py--); 00314 00315 /* Read x[3] sample */ 00316 x3 = *(px++); 00317 00318 /* Perform the multiply-accumulate */ 00319 /* acc0 += x[0] * y[srcBLen - 1] */ 00320 acc0 += x0 * c0; 00321 00322 /* acc1 += x[1] * y[srcBLen - 1] */ 00323 acc1 += x1 * c0; 00324 00325 /* acc2 += x[2] * y[srcBLen - 1] */ 00326 acc2 += x2 * c0; 00327 00328 /* acc3 += x[3] * y[srcBLen - 1] */ 00329 acc3 += x3 * c0; 00330 00331 /* Read y[srcBLen - 2] sample */ 00332 c0 = *(py--); 00333 00334 /* Read x[4] sample */ 00335 x0 = *(px++); 00336 00337 /* Perform the multiply-accumulate */ 00338 /* acc0 += x[1] * y[srcBLen - 2] */ 00339 acc0 += x1 * c0; 00340 /* acc1 += x[2] * y[srcBLen - 2] */ 00341 acc1 += x2 * c0; 00342 /* acc2 += x[3] * y[srcBLen - 2] */ 00343 acc2 += x3 * c0; 00344 /* acc3 += x[4] * y[srcBLen - 2] */ 00345 acc3 += x0 * c0; 00346 00347 /* Read y[srcBLen - 3] sample */ 00348 c0 = *(py--); 00349 00350 /* Read x[5] sample */ 00351 x1 = *(px++); 00352 00353 /* Perform the multiply-accumulates */ 00354 /* acc0 += x[2] * y[srcBLen - 3] */ 00355 acc0 += x2 * c0; 00356 /* acc1 += x[3] * y[srcBLen - 2] */ 00357 acc1 += x3 * c0; 00358 /* acc2 += x[4] * y[srcBLen - 2] */ 00359 acc2 += x0 * c0; 00360 /* acc3 += x[5] * y[srcBLen - 2] */ 00361 acc3 += x1 * c0; 00362 00363 /* Read y[srcBLen - 4] sample */ 00364 c0 = *(py--); 00365 00366 /* Read x[6] sample */ 00367 x2 = *(px++); 00368 00369 /* Perform the multiply-accumulates */ 00370 /* acc0 += x[3] * y[srcBLen - 4] */ 00371 acc0 += x3 * c0; 00372 /* acc1 += x[4] * y[srcBLen - 4] */ 00373 acc1 += x0 * c0; 00374 /* acc2 += x[5] * y[srcBLen - 4] */ 00375 acc2 += x1 * c0; 00376 /* acc3 += x[6] * y[srcBLen - 4] */ 00377 acc3 += x2 * c0; 00378 00379 00380 } while (--k); 00381 00382 /* If the srcBLen is not a multiple of 4, compute any remaining MACs here. 00383 ** No loop unrolling is used. */ 00384 k = srcBLen % 0x4U; 00385 00386 while (k > 0U) 00387 { 00388 /* Read y[srcBLen - 5] sample */ 00389 c0 = *(py--); 00390 00391 /* Read x[7] sample */ 00392 x3 = *(px++); 00393 00394 /* Perform the multiply-accumulates */ 00395 /* acc0 += x[4] * y[srcBLen - 5] */ 00396 acc0 += x0 * c0; 00397 /* acc1 += x[5] * y[srcBLen - 5] */ 00398 acc1 += x1 * c0; 00399 /* acc2 += x[6] * y[srcBLen - 5] */ 00400 acc2 += x2 * c0; 00401 /* acc3 += x[7] * y[srcBLen - 5] */ 00402 acc3 += x3 * c0; 00403 00404 /* Reuse the present samples for the next MAC */ 00405 x0 = x1; 00406 x1 = x2; 00407 x2 = x3; 00408 00409 /* Decrement the loop counter */ 00410 k--; 00411 } 00412 00413 /* Store the result in the accumulator in the destination buffer. */ 00414 *pOut++ = acc0; 00415 *pOut++ = acc1; 00416 *pOut++ = acc2; 00417 *pOut++ = acc3; 00418 00419 /* Increment the pointer pIn1 index, count by 1 */ 00420 count += 4U; 00421 00422 /* Update the inputA and inputB pointers for next MAC calculation */ 00423 if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) 00424 { 00425 px = pIn1 + firstIndex - srcBLen + 1 + count; 00426 } 00427 else 00428 { 00429 px = pIn1 + count; 00430 } 00431 py = pSrc2; 00432 00433 /* Decrement the loop counter */ 00434 blkCnt--; 00435 } 00436 00437 /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here. 00438 ** No loop unrolling is used. */ 00439 blkCnt = (uint32_t) blockSize2 % 0x4U; 00440 00441 while (blkCnt > 0U) 00442 { 00443 /* Accumulator is made zero for every iteration */ 00444 sum = 0.0f; 00445 00446 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00447 k = srcBLen >> 2U; 00448 00449 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00450 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00451 while (k > 0U) 00452 { 00453 /* Perform the multiply-accumulates */ 00454 sum += *px++ * *py--; 00455 sum += *px++ * *py--; 00456 sum += *px++ * *py--; 00457 sum += *px++ * *py--; 00458 00459 /* Decrement the loop counter */ 00460 k--; 00461 } 00462 00463 /* If the srcBLen is not a multiple of 4, compute any remaining MACs here. 00464 ** No loop unrolling is used. */ 00465 k = srcBLen % 0x4U; 00466 00467 while (k > 0U) 00468 { 00469 /* Perform the multiply-accumulate */ 00470 sum += *px++ * *py--; 00471 00472 /* Decrement the loop counter */ 00473 k--; 00474 } 00475 00476 /* Store the result in the accumulator in the destination buffer. */ 00477 *pOut++ = sum; 00478 00479 /* Increment the MAC count */ 00480 count++; 00481 00482 /* Update the inputA and inputB pointers for next MAC calculation */ 00483 if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) 00484 { 00485 px = pIn1 + firstIndex - srcBLen + 1 + count; 00486 } 00487 else 00488 { 00489 px = pIn1 + count; 00490 } 00491 py = pSrc2; 00492 00493 /* Decrement the loop counter */ 00494 blkCnt--; 00495 } 00496 } 00497 else 00498 { 00499 /* If the srcBLen is not a multiple of 4, 00500 * the blockSize2 loop cannot be unrolled by 4 */ 00501 blkCnt = (uint32_t) blockSize2; 00502 00503 while (blkCnt > 0U) 00504 { 00505 /* Accumulator is made zero for every iteration */ 00506 sum = 0.0f; 00507 00508 /* srcBLen number of MACS should be performed */ 00509 k = srcBLen; 00510 00511 while (k > 0U) 00512 { 00513 /* Perform the multiply-accumulate */ 00514 sum += *px++ * *py--; 00515 00516 /* Decrement the loop counter */ 00517 k--; 00518 } 00519 00520 /* Store the result in the accumulator in the destination buffer. */ 00521 *pOut++ = sum; 00522 00523 /* Increment the MAC count */ 00524 count++; 00525 00526 /* Update the inputA and inputB pointers for next MAC calculation */ 00527 if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) 00528 { 00529 px = pIn1 + firstIndex - srcBLen + 1 + count; 00530 } 00531 else 00532 { 00533 px = pIn1 + count; 00534 } 00535 py = pSrc2; 00536 00537 /* Decrement the loop counter */ 00538 blkCnt--; 00539 } 00540 } 00541 00542 00543 /* -------------------------- 00544 * Initializations of stage3 00545 * -------------------------*/ 00546 00547 /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1] 00548 * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2] 00549 * .... 00550 * sum += x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2] 00551 * sum += x[srcALen-1] * y[srcBLen-1] 00552 */ 00553 00554 /* In this stage the MAC operations are decreased by 1 for every iteration. 00555 The count variable holds the number of MAC operations performed */ 00556 count = srcBLen - 1U; 00557 00558 /* Working pointer of inputA */ 00559 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U); 00560 px = pSrc1; 00561 00562 /* Working pointer of inputB */ 00563 pSrc2 = pIn2 + (srcBLen - 1U); 00564 py = pSrc2; 00565 00566 while (blockSize3 > 0) 00567 { 00568 /* Accumulator is made zero for every iteration */ 00569 sum = 0.0f; 00570 00571 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00572 k = count >> 2U; 00573 00574 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00575 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00576 while (k > 0U) 00577 { 00578 /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 00579 sum += *px++ * *py--; 00580 00581 /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 00582 sum += *px++ * *py--; 00583 00584 /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 00585 sum += *px++ * *py--; 00586 00587 /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 00588 sum += *px++ * *py--; 00589 00590 /* Decrement the loop counter */ 00591 k--; 00592 } 00593 00594 /* If the count is not a multiple of 4, compute any remaining MACs here. 00595 ** No loop unrolling is used. */ 00596 k = count % 0x4U; 00597 00598 while (k > 0U) 00599 { 00600 /* Perform the multiply-accumulates */ 00601 /* sum += x[srcALen-1] * y[srcBLen-1] */ 00602 sum += *px++ * *py--; 00603 00604 /* Decrement the loop counter */ 00605 k--; 00606 } 00607 00608 /* Store the result in the accumulator in the destination buffer. */ 00609 *pOut++ = sum; 00610 00611 /* Update the inputA and inputB pointers for next MAC calculation */ 00612 px = ++pSrc1; 00613 py = pSrc2; 00614 00615 /* Decrement the MAC count */ 00616 count--; 00617 00618 /* Decrement the loop counter */ 00619 blockSize3--; 00620 00621 } 00622 00623 /* set status as ARM_MATH_SUCCESS */ 00624 status = ARM_MATH_SUCCESS; 00625 } 00626 00627 /* Return to application */ 00628 return (status); 00629 00630 #else 00631 00632 /* Run the below code for Cortex-M0 */ 00633 00634 float32_t *pIn1 = pSrcA; /* inputA pointer */ 00635 float32_t *pIn2 = pSrcB; /* inputB pointer */ 00636 float32_t sum; /* Accumulator */ 00637 uint32_t i, j; /* loop counters */ 00638 arm_status status; /* status of Partial convolution */ 00639 00640 /* Check for range of output samples to be calculated */ 00641 if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U)))) 00642 { 00643 /* Set status as ARM_ARGUMENT_ERROR */ 00644 status = ARM_MATH_ARGUMENT_ERROR; 00645 } 00646 else 00647 { 00648 /* Loop to calculate convolution for output length number of values */ 00649 for (i = firstIndex; i <= (firstIndex + numPoints - 1); i++) 00650 { 00651 /* Initialize sum with zero to carry on MAC operations */ 00652 sum = 0.0f; 00653 00654 /* Loop to perform MAC operations according to convolution equation */ 00655 for (j = 0U; j <= i; j++) 00656 { 00657 /* Check the array limitations for inputs */ 00658 if ((((i - j) < srcBLen) && (j < srcALen))) 00659 { 00660 /* z[i] += x[i-j] * y[j] */ 00661 sum += pIn1[j] * pIn2[i - j]; 00662 } 00663 } 00664 /* Store the output in the destination buffer */ 00665 pDst[i] = sum; 00666 } 00667 /* set status as ARM_SUCCESS as there are no argument errors */ 00668 status = ARM_MATH_SUCCESS; 00669 } 00670 return (status); 00671 00672 #endif /* #if defined (ARM_MATH_DSP) */ 00673 00674 } 00675 00676 /** 00677 * @} end of PartialConv group 00678 */ 00679
Generated on Tue Jul 12 2022 16:46:23 by
