CMSIS DSP library
Dependents: performance_timer Surfboard_ gps2rtty Capstone ... more
arm_conv_f32.c
00001 /* ---------------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_conv_f32.c 00009 * 00010 * Description: Convolution of floating-point sequences. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @defgroup Conv Convolution 00049 * 00050 * Convolution is a mathematical operation that operates on two finite length vectors to generate a finite length output vector. 00051 * Convolution is similar to correlation and is frequently used in filtering and data analysis. 00052 * The CMSIS DSP library contains functions for convolving Q7, Q15, Q31, and floating-point data types. 00053 * The library also provides fast versions of the Q15 and Q31 functions on Cortex-M4 and Cortex-M3. 00054 * 00055 * \par Algorithm 00056 * Let <code>a[n]</code> and <code>b[n]</code> be sequences of length <code>srcALen</code> and <code>srcBLen</code> samples respectively. 00057 * Then the convolution 00058 * 00059 * <pre> 00060 * c[n] = a[n] * b[n] 00061 * </pre> 00062 * 00063 * \par 00064 * is defined as 00065 * \image html ConvolutionEquation.gif 00066 * \par 00067 * Note that <code>c[n]</code> is of length <code>srcALen + srcBLen - 1</code> and is defined over the interval <code>n=0, 1, 2, ..., srcALen + srcBLen - 2</code>. 00068 * <code>pSrcA</code> points to the first input vector of length <code>srcALen</code> and 00069 * <code>pSrcB</code> points to the second input vector of length <code>srcBLen</code>. 00070 * The output result is written to <code>pDst</code> and the calling function must allocate <code>srcALen+srcBLen-1</code> words for the result. 00071 * 00072 * \par 00073 * Conceptually, when two signals <code>a[n]</code> and <code>b[n]</code> are convolved, 00074 * the signal <code>b[n]</code> slides over <code>a[n]</code>. 00075 * For each offset \c n, the overlapping portions of a[n] and b[n] are multiplied and summed together. 00076 * 00077 * \par 00078 * Note that convolution is a commutative operation: 00079 * 00080 * <pre> 00081 * a[n] * b[n] = b[n] * a[n]. 00082 * </pre> 00083 * 00084 * \par 00085 * This means that switching the A and B arguments to the convolution functions has no effect. 00086 * 00087 * <b>Fixed-Point Behavior</b> 00088 * 00089 * \par 00090 * Convolution requires summing up a large number of intermediate products. 00091 * As such, the Q7, Q15, and Q31 functions run a risk of overflow and saturation. 00092 * Refer to the function specific documentation below for further details of the particular algorithm used. 00093 * 00094 * 00095 * <b>Fast Versions</b> 00096 * 00097 * \par 00098 * Fast versions are supported for Q31 and Q15. Cycles for Fast versions are less compared to Q31 and Q15 of conv and the design requires 00099 * the input signals should be scaled down to avoid intermediate overflows. 00100 * 00101 * 00102 * <b>Opt Versions</b> 00103 * 00104 * \par 00105 * Opt versions are supported for Q15 and Q7. Design uses internal scratch buffer for getting good optimisation. 00106 * These versions are optimised in cycles and consumes more memory(Scratch memory) compared to Q15 and Q7 versions 00107 */ 00108 00109 /** 00110 * @addtogroup Conv 00111 * @{ 00112 */ 00113 00114 /** 00115 * @brief Convolution of floating-point sequences. 00116 * @param[in] *pSrcA points to the first input sequence. 00117 * @param[in] srcALen length of the first input sequence. 00118 * @param[in] *pSrcB points to the second input sequence. 00119 * @param[in] srcBLen length of the second input sequence. 00120 * @param[out] *pDst points to the location where the output result is written. Length srcALen+srcBLen-1. 00121 * @return none. 00122 */ 00123 00124 void arm_conv_f32( 00125 float32_t * pSrcA, 00126 uint32_t srcALen, 00127 float32_t * pSrcB, 00128 uint32_t srcBLen, 00129 float32_t * pDst) 00130 { 00131 00132 00133 #ifndef ARM_MATH_CM0_FAMILY 00134 00135 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00136 00137 float32_t *pIn1; /* inputA pointer */ 00138 float32_t *pIn2; /* inputB pointer */ 00139 float32_t *pOut = pDst; /* output pointer */ 00140 float32_t *px; /* Intermediate inputA pointer */ 00141 float32_t *py; /* Intermediate inputB pointer */ 00142 float32_t *pSrc1, *pSrc2; /* Intermediate pointers */ 00143 float32_t sum, acc0, acc1, acc2, acc3; /* Accumulator */ 00144 float32_t x0, x1, x2, x3, c0; /* Temporary variables to hold state and coefficient values */ 00145 uint32_t j, k, count, blkCnt, blockSize1, blockSize2, blockSize3; /* loop counters */ 00146 00147 /* The algorithm implementation is based on the lengths of the inputs. */ 00148 /* srcB is always made to slide across srcA. */ 00149 /* So srcBLen is always considered as shorter or equal to srcALen */ 00150 if(srcALen >= srcBLen) 00151 { 00152 /* Initialization of inputA pointer */ 00153 pIn1 = pSrcA; 00154 00155 /* Initialization of inputB pointer */ 00156 pIn2 = pSrcB; 00157 } 00158 else 00159 { 00160 /* Initialization of inputA pointer */ 00161 pIn1 = pSrcB; 00162 00163 /* Initialization of inputB pointer */ 00164 pIn2 = pSrcA; 00165 00166 /* srcBLen is always considered as shorter or equal to srcALen */ 00167 j = srcBLen; 00168 srcBLen = srcALen; 00169 srcALen = j; 00170 } 00171 00172 /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 00173 /* The function is internally 00174 * divided into three stages according to the number of multiplications that has to be 00175 * taken place between inputA samples and inputB samples. In the first stage of the 00176 * algorithm, the multiplications increase by one for every iteration. 00177 * In the second stage of the algorithm, srcBLen number of multiplications are done. 00178 * In the third stage of the algorithm, the multiplications decrease by one 00179 * for every iteration. */ 00180 00181 /* The algorithm is implemented in three stages. 00182 The loop counters of each stage is initiated here. */ 00183 blockSize1 = srcBLen - 1u; 00184 blockSize2 = srcALen - (srcBLen - 1u); 00185 blockSize3 = blockSize1; 00186 00187 /* -------------------------- 00188 * initializations of stage1 00189 * -------------------------*/ 00190 00191 /* sum = x[0] * y[0] 00192 * sum = x[0] * y[1] + x[1] * y[0] 00193 * .... 00194 * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0] 00195 */ 00196 00197 /* In this stage the MAC operations are increased by 1 for every iteration. 00198 The count variable holds the number of MAC operations performed */ 00199 count = 1u; 00200 00201 /* Working pointer of inputA */ 00202 px = pIn1; 00203 00204 /* Working pointer of inputB */ 00205 py = pIn2; 00206 00207 00208 /* ------------------------ 00209 * Stage1 process 00210 * ----------------------*/ 00211 00212 /* The first stage starts here */ 00213 while(blockSize1 > 0u) 00214 { 00215 /* Accumulator is made zero for every iteration */ 00216 sum = 0.0f; 00217 00218 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00219 k = count >> 2u; 00220 00221 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00222 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00223 while(k > 0u) 00224 { 00225 /* x[0] * y[srcBLen - 1] */ 00226 sum += *px++ * *py--; 00227 00228 /* x[1] * y[srcBLen - 2] */ 00229 sum += *px++ * *py--; 00230 00231 /* x[2] * y[srcBLen - 3] */ 00232 sum += *px++ * *py--; 00233 00234 /* x[3] * y[srcBLen - 4] */ 00235 sum += *px++ * *py--; 00236 00237 /* Decrement the loop counter */ 00238 k--; 00239 } 00240 00241 /* If the count is not a multiple of 4, compute any remaining MACs here. 00242 ** No loop unrolling is used. */ 00243 k = count % 0x4u; 00244 00245 while(k > 0u) 00246 { 00247 /* Perform the multiply-accumulate */ 00248 sum += *px++ * *py--; 00249 00250 /* Decrement the loop counter */ 00251 k--; 00252 } 00253 00254 /* Store the result in the accumulator in the destination buffer. */ 00255 *pOut++ = sum; 00256 00257 /* Update the inputA and inputB pointers for next MAC calculation */ 00258 py = pIn2 + count; 00259 px = pIn1; 00260 00261 /* Increment the MAC count */ 00262 count++; 00263 00264 /* Decrement the loop counter */ 00265 blockSize1--; 00266 } 00267 00268 /* -------------------------- 00269 * Initializations of stage2 00270 * ------------------------*/ 00271 00272 /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0] 00273 * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0] 00274 * .... 00275 * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0] 00276 */ 00277 00278 /* Working pointer of inputA */ 00279 px = pIn1; 00280 00281 /* Working pointer of inputB */ 00282 pSrc2 = pIn2 + (srcBLen - 1u); 00283 py = pSrc2; 00284 00285 /* count is index by which the pointer pIn1 to be incremented */ 00286 count = 0u; 00287 00288 /* ------------------- 00289 * Stage2 process 00290 * ------------------*/ 00291 00292 /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed. 00293 * So, to loop unroll over blockSize2, 00294 * srcBLen should be greater than or equal to 4 */ 00295 if(srcBLen >= 4u) 00296 { 00297 /* Loop unroll over blockSize2, by 4 */ 00298 blkCnt = blockSize2 >> 2u; 00299 00300 while(blkCnt > 0u) 00301 { 00302 /* Set all accumulators to zero */ 00303 acc0 = 0.0f; 00304 acc1 = 0.0f; 00305 acc2 = 0.0f; 00306 acc3 = 0.0f; 00307 00308 /* read x[0], x[1], x[2] samples */ 00309 x0 = *(px++); 00310 x1 = *(px++); 00311 x2 = *(px++); 00312 00313 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00314 k = srcBLen >> 2u; 00315 00316 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00317 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00318 do 00319 { 00320 /* Read y[srcBLen - 1] sample */ 00321 c0 = *(py--); 00322 00323 /* Read x[3] sample */ 00324 x3 = *(px); 00325 00326 /* Perform the multiply-accumulate */ 00327 /* acc0 += x[0] * y[srcBLen - 1] */ 00328 acc0 += x0 * c0; 00329 00330 /* acc1 += x[1] * y[srcBLen - 1] */ 00331 acc1 += x1 * c0; 00332 00333 /* acc2 += x[2] * y[srcBLen - 1] */ 00334 acc2 += x2 * c0; 00335 00336 /* acc3 += x[3] * y[srcBLen - 1] */ 00337 acc3 += x3 * c0; 00338 00339 /* Read y[srcBLen - 2] sample */ 00340 c0 = *(py--); 00341 00342 /* Read x[4] sample */ 00343 x0 = *(px + 1u); 00344 00345 /* Perform the multiply-accumulate */ 00346 /* acc0 += x[1] * y[srcBLen - 2] */ 00347 acc0 += x1 * c0; 00348 /* acc1 += x[2] * y[srcBLen - 2] */ 00349 acc1 += x2 * c0; 00350 /* acc2 += x[3] * y[srcBLen - 2] */ 00351 acc2 += x3 * c0; 00352 /* acc3 += x[4] * y[srcBLen - 2] */ 00353 acc3 += x0 * c0; 00354 00355 /* Read y[srcBLen - 3] sample */ 00356 c0 = *(py--); 00357 00358 /* Read x[5] sample */ 00359 x1 = *(px + 2u); 00360 00361 /* Perform the multiply-accumulates */ 00362 /* acc0 += x[2] * y[srcBLen - 3] */ 00363 acc0 += x2 * c0; 00364 /* acc1 += x[3] * y[srcBLen - 2] */ 00365 acc1 += x3 * c0; 00366 /* acc2 += x[4] * y[srcBLen - 2] */ 00367 acc2 += x0 * c0; 00368 /* acc3 += x[5] * y[srcBLen - 2] */ 00369 acc3 += x1 * c0; 00370 00371 /* Read y[srcBLen - 4] sample */ 00372 c0 = *(py--); 00373 00374 /* Read x[6] sample */ 00375 x2 = *(px + 3u); 00376 px += 4u; 00377 00378 /* Perform the multiply-accumulates */ 00379 /* acc0 += x[3] * y[srcBLen - 4] */ 00380 acc0 += x3 * c0; 00381 /* acc1 += x[4] * y[srcBLen - 4] */ 00382 acc1 += x0 * c0; 00383 /* acc2 += x[5] * y[srcBLen - 4] */ 00384 acc2 += x1 * c0; 00385 /* acc3 += x[6] * y[srcBLen - 4] */ 00386 acc3 += x2 * c0; 00387 00388 00389 } while(--k); 00390 00391 /* If the srcBLen is not a multiple of 4, compute any remaining MACs here. 00392 ** No loop unrolling is used. */ 00393 k = srcBLen % 0x4u; 00394 00395 while(k > 0u) 00396 { 00397 /* Read y[srcBLen - 5] sample */ 00398 c0 = *(py--); 00399 00400 /* Read x[7] sample */ 00401 x3 = *(px++); 00402 00403 /* Perform the multiply-accumulates */ 00404 /* acc0 += x[4] * y[srcBLen - 5] */ 00405 acc0 += x0 * c0; 00406 /* acc1 += x[5] * y[srcBLen - 5] */ 00407 acc1 += x1 * c0; 00408 /* acc2 += x[6] * y[srcBLen - 5] */ 00409 acc2 += x2 * c0; 00410 /* acc3 += x[7] * y[srcBLen - 5] */ 00411 acc3 += x3 * c0; 00412 00413 /* Reuse the present samples for the next MAC */ 00414 x0 = x1; 00415 x1 = x2; 00416 x2 = x3; 00417 00418 /* Decrement the loop counter */ 00419 k--; 00420 } 00421 00422 /* Store the result in the accumulator in the destination buffer. */ 00423 *pOut++ = acc0; 00424 *pOut++ = acc1; 00425 *pOut++ = acc2; 00426 *pOut++ = acc3; 00427 00428 /* Increment the pointer pIn1 index, count by 4 */ 00429 count += 4u; 00430 00431 /* Update the inputA and inputB pointers for next MAC calculation */ 00432 px = pIn1 + count; 00433 py = pSrc2; 00434 00435 00436 /* Decrement the loop counter */ 00437 blkCnt--; 00438 } 00439 00440 00441 /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here. 00442 ** No loop unrolling is used. */ 00443 blkCnt = blockSize2 % 0x4u; 00444 00445 while(blkCnt > 0u) 00446 { 00447 /* Accumulator is made zero for every iteration */ 00448 sum = 0.0f; 00449 00450 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00451 k = srcBLen >> 2u; 00452 00453 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00454 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00455 while(k > 0u) 00456 { 00457 /* Perform the multiply-accumulates */ 00458 sum += *px++ * *py--; 00459 sum += *px++ * *py--; 00460 sum += *px++ * *py--; 00461 sum += *px++ * *py--; 00462 00463 /* Decrement the loop counter */ 00464 k--; 00465 } 00466 00467 /* If the srcBLen is not a multiple of 4, compute any remaining MACs here. 00468 ** No loop unrolling is used. */ 00469 k = srcBLen % 0x4u; 00470 00471 while(k > 0u) 00472 { 00473 /* Perform the multiply-accumulate */ 00474 sum += *px++ * *py--; 00475 00476 /* Decrement the loop counter */ 00477 k--; 00478 } 00479 00480 /* Store the result in the accumulator in the destination buffer. */ 00481 *pOut++ = sum; 00482 00483 /* Increment the MAC count */ 00484 count++; 00485 00486 /* Update the inputA and inputB pointers for next MAC calculation */ 00487 px = pIn1 + count; 00488 py = pSrc2; 00489 00490 /* Decrement the loop counter */ 00491 blkCnt--; 00492 } 00493 } 00494 else 00495 { 00496 /* If the srcBLen is not a multiple of 4, 00497 * the blockSize2 loop cannot be unrolled by 4 */ 00498 blkCnt = blockSize2; 00499 00500 while(blkCnt > 0u) 00501 { 00502 /* Accumulator is made zero for every iteration */ 00503 sum = 0.0f; 00504 00505 /* srcBLen number of MACS should be performed */ 00506 k = srcBLen; 00507 00508 while(k > 0u) 00509 { 00510 /* Perform the multiply-accumulate */ 00511 sum += *px++ * *py--; 00512 00513 /* Decrement the loop counter */ 00514 k--; 00515 } 00516 00517 /* Store the result in the accumulator in the destination buffer. */ 00518 *pOut++ = sum; 00519 00520 /* Increment the MAC count */ 00521 count++; 00522 00523 /* Update the inputA and inputB pointers for next MAC calculation */ 00524 px = pIn1 + count; 00525 py = pSrc2; 00526 00527 /* Decrement the loop counter */ 00528 blkCnt--; 00529 } 00530 } 00531 00532 00533 /* -------------------------- 00534 * Initializations of stage3 00535 * -------------------------*/ 00536 00537 /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1] 00538 * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2] 00539 * .... 00540 * sum += x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2] 00541 * sum += x[srcALen-1] * y[srcBLen-1] 00542 */ 00543 00544 /* In this stage the MAC operations are decreased by 1 for every iteration. 00545 The blockSize3 variable holds the number of MAC operations performed */ 00546 00547 /* Working pointer of inputA */ 00548 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 00549 px = pSrc1; 00550 00551 /* Working pointer of inputB */ 00552 pSrc2 = pIn2 + (srcBLen - 1u); 00553 py = pSrc2; 00554 00555 /* ------------------- 00556 * Stage3 process 00557 * ------------------*/ 00558 00559 while(blockSize3 > 0u) 00560 { 00561 /* Accumulator is made zero for every iteration */ 00562 sum = 0.0f; 00563 00564 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00565 k = blockSize3 >> 2u; 00566 00567 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00568 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00569 while(k > 0u) 00570 { 00571 /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 00572 sum += *px++ * *py--; 00573 00574 /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 00575 sum += *px++ * *py--; 00576 00577 /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 00578 sum += *px++ * *py--; 00579 00580 /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 00581 sum += *px++ * *py--; 00582 00583 /* Decrement the loop counter */ 00584 k--; 00585 } 00586 00587 /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here. 00588 ** No loop unrolling is used. */ 00589 k = blockSize3 % 0x4u; 00590 00591 while(k > 0u) 00592 { 00593 /* Perform the multiply-accumulates */ 00594 /* sum += x[srcALen-1] * y[srcBLen-1] */ 00595 sum += *px++ * *py--; 00596 00597 /* Decrement the loop counter */ 00598 k--; 00599 } 00600 00601 /* Store the result in the accumulator in the destination buffer. */ 00602 *pOut++ = sum; 00603 00604 /* Update the inputA and inputB pointers for next MAC calculation */ 00605 px = ++pSrc1; 00606 py = pSrc2; 00607 00608 /* Decrement the loop counter */ 00609 blockSize3--; 00610 } 00611 00612 #else 00613 00614 /* Run the below code for Cortex-M0 */ 00615 00616 float32_t *pIn1 = pSrcA; /* inputA pointer */ 00617 float32_t *pIn2 = pSrcB; /* inputB pointer */ 00618 float32_t sum; /* Accumulator */ 00619 uint32_t i, j; /* loop counters */ 00620 00621 /* Loop to calculate convolution for output length number of times */ 00622 for (i = 0u; i < ((srcALen + srcBLen) - 1u); i++) 00623 { 00624 /* Initialize sum with zero to carry out MAC operations */ 00625 sum = 0.0f; 00626 00627 /* Loop to perform MAC operations according to convolution equation */ 00628 for (j = 0u; j <= i; j++) 00629 { 00630 /* Check the array limitations */ 00631 if((((i - j) < srcBLen) && (j < srcALen))) 00632 { 00633 /* z[i] += x[i-j] * y[j] */ 00634 sum += pIn1[j] * pIn2[i - j]; 00635 } 00636 } 00637 /* Store the output in the destination buffer */ 00638 pDst[i] = sum; 00639 } 00640 00641 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00642 00643 } 00644 00645 /** 00646 * @} end of Conv group 00647 */
Generated on Tue Jul 12 2022 11:59:16 by 1.7.2