Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_conv_partial_opt_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_conv_partial_opt_q15.c 00004 * Description: Partial convolution of Q15 sequences 00005 * 00006 * $Date: 27. January 2017 00007 * $Revision: V.1.5.1 00008 * 00009 * Target Processor: Cortex-M cores 00010 * -------------------------------------------------------------------- */ 00011 /* 00012 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00013 * 00014 * SPDX-License-Identifier: Apache-2.0 00015 * 00016 * Licensed under the Apache License, Version 2.0 (the License); you may 00017 * not use this file except in compliance with the License. 00018 * You may obtain a copy of the License at 00019 * 00020 * www.apache.org/licenses/LICENSE-2.0 00021 * 00022 * Unless required by applicable law or agreed to in writing, software 00023 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00024 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00025 * See the License for the specific language governing permissions and 00026 * limitations under the License. 00027 */ 00028 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupFilters 00033 */ 00034 00035 /** 00036 * @addtogroup PartialConv 00037 * @{ 00038 */ 00039 00040 /** 00041 * @brief Partial convolution of Q15 sequences. 00042 * @param[in] *pSrcA points to the first input sequence. 00043 * @param[in] srcALen length of the first input sequence. 00044 * @param[in] *pSrcB points to the second input sequence. 00045 * @param[in] srcBLen length of the second input sequence. 00046 * @param[out] *pDst points to the location where the output result is written. 00047 * @param[in] firstIndex is the first output sample to start with. 00048 * @param[in] numPoints is the number of output points to be computed. 00049 * @param[in] *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. 00050 * @param[in] *pScratch2 points to scratch buffer of size min(srcALen, srcBLen). 00051 * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. 00052 * 00053 * \par Restrictions 00054 * If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE 00055 * In this case input, output, state buffers should be aligned by 32-bit 00056 * 00057 * Refer to <code>arm_conv_partial_fast_q15()</code> for a faster but less precise version of this function for Cortex-M3 and Cortex-M4. 00058 * 00059 * 00060 */ 00061 00062 #ifndef UNALIGNED_SUPPORT_DISABLE 00063 00064 arm_status arm_conv_partial_opt_q15( 00065 q15_t * pSrcA, 00066 uint32_t srcALen, 00067 q15_t * pSrcB, 00068 uint32_t srcBLen, 00069 q15_t * pDst, 00070 uint32_t firstIndex, 00071 uint32_t numPoints, 00072 q15_t * pScratch1, 00073 q15_t * pScratch2) 00074 { 00075 00076 q15_t *pOut = pDst; /* output pointer */ 00077 q15_t *pScr1 = pScratch1; /* Temporary pointer for scratch1 */ 00078 q15_t *pScr2 = pScratch2; /* Temporary pointer for scratch1 */ 00079 q63_t acc0, acc1, acc2, acc3; /* Accumulator */ 00080 q31_t x1, x2, x3; /* Temporary variables to hold state and coefficient values */ 00081 q31_t y1, y2; /* State variables */ 00082 q15_t *pIn1; /* inputA pointer */ 00083 q15_t *pIn2; /* inputB pointer */ 00084 q15_t *px; /* Intermediate inputA pointer */ 00085 q15_t *py; /* Intermediate inputB pointer */ 00086 uint32_t j, k, blkCnt; /* loop counter */ 00087 arm_status status; /* Status variable */ 00088 uint32_t tapCnt; /* loop count */ 00089 00090 /* Check for range of output samples to be calculated */ 00091 if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U)))) 00092 { 00093 /* Set status as ARM_MATH_ARGUMENT_ERROR */ 00094 status = ARM_MATH_ARGUMENT_ERROR; 00095 } 00096 else 00097 { 00098 00099 /* The algorithm implementation is based on the lengths of the inputs. */ 00100 /* srcB is always made to slide across srcA. */ 00101 /* So srcBLen is always considered as shorter or equal to srcALen */ 00102 if (srcALen >= srcBLen) 00103 { 00104 /* Initialization of inputA pointer */ 00105 pIn1 = pSrcA; 00106 00107 /* Initialization of inputB pointer */ 00108 pIn2 = pSrcB; 00109 } 00110 else 00111 { 00112 /* Initialization of inputA pointer */ 00113 pIn1 = pSrcB; 00114 00115 /* Initialization of inputB pointer */ 00116 pIn2 = pSrcA; 00117 00118 /* srcBLen is always considered as shorter or equal to srcALen */ 00119 j = srcBLen; 00120 srcBLen = srcALen; 00121 srcALen = j; 00122 } 00123 00124 /* Temporary pointer for scratch2 */ 00125 py = pScratch2; 00126 00127 /* pointer to take end of scratch2 buffer */ 00128 pScr2 = pScratch2 + srcBLen - 1; 00129 00130 /* points to smaller length sequence */ 00131 px = pIn2; 00132 00133 /* Apply loop unrolling and do 4 Copies simultaneously. */ 00134 k = srcBLen >> 2U; 00135 00136 /* First part of the processing with loop unrolling copies 4 data points at a time. 00137 ** a second loop below copies for the remaining 1 to 3 samples. */ 00138 while (k > 0U) 00139 { 00140 /* copy second buffer in reversal manner */ 00141 *pScr2-- = *px++; 00142 *pScr2-- = *px++; 00143 *pScr2-- = *px++; 00144 *pScr2-- = *px++; 00145 00146 /* Decrement the loop counter */ 00147 k--; 00148 } 00149 00150 /* If the count is not a multiple of 4, copy remaining samples here. 00151 ** No loop unrolling is used. */ 00152 k = srcBLen % 0x4U; 00153 00154 while (k > 0U) 00155 { 00156 /* copy second buffer in reversal manner for remaining samples */ 00157 *pScr2-- = *px++; 00158 00159 /* Decrement the loop counter */ 00160 k--; 00161 } 00162 00163 /* Initialze temporary scratch pointer */ 00164 pScr1 = pScratch1; 00165 00166 /* Fill (srcBLen - 1U) zeros in scratch buffer */ 00167 arm_fill_q15(0, pScr1, (srcBLen - 1U)); 00168 00169 /* Update temporary scratch pointer */ 00170 pScr1 += (srcBLen - 1U); 00171 00172 /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */ 00173 00174 /* Copy (srcALen) samples in scratch buffer */ 00175 arm_copy_q15(pIn1, pScr1, srcALen); 00176 00177 /* Update pointers */ 00178 pScr1 += srcALen; 00179 00180 /* Fill (srcBLen - 1U) zeros at end of scratch buffer */ 00181 arm_fill_q15(0, pScr1, (srcBLen - 1U)); 00182 00183 /* Update pointer */ 00184 pScr1 += (srcBLen - 1U); 00185 00186 /* Initialization of pIn2 pointer */ 00187 pIn2 = py; 00188 00189 pScratch1 += firstIndex; 00190 00191 pOut = pDst + firstIndex; 00192 00193 /* Actual convolution process starts here */ 00194 blkCnt = (numPoints) >> 2; 00195 00196 while (blkCnt > 0) 00197 { 00198 /* Initialze temporary scratch pointer as scratch1 */ 00199 pScr1 = pScratch1; 00200 00201 /* Clear Accumlators */ 00202 acc0 = 0; 00203 acc1 = 0; 00204 acc2 = 0; 00205 acc3 = 0; 00206 00207 /* Read two samples from scratch1 buffer */ 00208 x1 = *__SIMD32(pScr1)++; 00209 00210 /* Read next two samples from scratch1 buffer */ 00211 x2 = *__SIMD32(pScr1)++; 00212 00213 tapCnt = (srcBLen) >> 2U; 00214 00215 while (tapCnt > 0U) 00216 { 00217 00218 /* Read four samples from smaller buffer */ 00219 y1 = _SIMD32_OFFSET(pIn2); 00220 y2 = _SIMD32_OFFSET(pIn2 + 2U); 00221 00222 /* multiply and accumlate */ 00223 acc0 = __SMLALD(x1, y1, acc0); 00224 acc2 = __SMLALD(x2, y1, acc2); 00225 00226 /* pack input data */ 00227 #ifndef ARM_MATH_BIG_ENDIAN 00228 x3 = __PKHBT(x2, x1, 0); 00229 #else 00230 x3 = __PKHBT(x1, x2, 0); 00231 #endif 00232 00233 /* multiply and accumlate */ 00234 acc1 = __SMLALDX(x3, y1, acc1); 00235 00236 /* Read next two samples from scratch1 buffer */ 00237 x1 = _SIMD32_OFFSET(pScr1); 00238 00239 /* multiply and accumlate */ 00240 acc0 = __SMLALD(x2, y2, acc0); 00241 acc2 = __SMLALD(x1, y2, acc2); 00242 00243 /* pack input data */ 00244 #ifndef ARM_MATH_BIG_ENDIAN 00245 x3 = __PKHBT(x1, x2, 0); 00246 #else 00247 x3 = __PKHBT(x2, x1, 0); 00248 #endif 00249 00250 acc3 = __SMLALDX(x3, y1, acc3); 00251 acc1 = __SMLALDX(x3, y2, acc1); 00252 00253 x2 = _SIMD32_OFFSET(pScr1 + 2U); 00254 00255 #ifndef ARM_MATH_BIG_ENDIAN 00256 x3 = __PKHBT(x2, x1, 0); 00257 #else 00258 x3 = __PKHBT(x1, x2, 0); 00259 #endif 00260 00261 acc3 = __SMLALDX(x3, y2, acc3); 00262 00263 /* update scratch pointers */ 00264 pIn2 += 4U; 00265 pScr1 += 4U; 00266 00267 00268 /* Decrement the loop counter */ 00269 tapCnt--; 00270 } 00271 00272 /* Update scratch pointer for remaining samples of smaller length sequence */ 00273 pScr1 -= 4U; 00274 00275 /* apply same above for remaining samples of smaller length sequence */ 00276 tapCnt = (srcBLen) & 3U; 00277 00278 while (tapCnt > 0U) 00279 { 00280 /* accumlate the results */ 00281 acc0 += (*pScr1++ * *pIn2); 00282 acc1 += (*pScr1++ * *pIn2); 00283 acc2 += (*pScr1++ * *pIn2); 00284 acc3 += (*pScr1++ * *pIn2++); 00285 00286 pScr1 -= 3U; 00287 00288 /* Decrement the loop counter */ 00289 tapCnt--; 00290 } 00291 00292 blkCnt--; 00293 00294 00295 /* Store the results in the accumulators in the destination buffer. */ 00296 00297 #ifndef ARM_MATH_BIG_ENDIAN 00298 00299 *__SIMD32(pOut)++ = 00300 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 00301 *__SIMD32(pOut)++ = 00302 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 00303 00304 #else 00305 00306 *__SIMD32(pOut)++ = 00307 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16); 00308 *__SIMD32(pOut)++ = 00309 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16); 00310 00311 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00312 00313 /* Initialization of inputB pointer */ 00314 pIn2 = py; 00315 00316 pScratch1 += 4U; 00317 00318 } 00319 00320 00321 blkCnt = numPoints & 0x3; 00322 00323 /* Calculate convolution for remaining samples of Bigger length sequence */ 00324 while (blkCnt > 0) 00325 { 00326 /* Initialze temporary scratch pointer as scratch1 */ 00327 pScr1 = pScratch1; 00328 00329 /* Clear Accumlators */ 00330 acc0 = 0; 00331 00332 tapCnt = (srcBLen) >> 1U; 00333 00334 while (tapCnt > 0U) 00335 { 00336 00337 /* Read next two samples from scratch1 buffer */ 00338 x1 = *__SIMD32(pScr1)++; 00339 00340 /* Read two samples from smaller buffer */ 00341 y1 = *__SIMD32(pIn2)++; 00342 00343 acc0 = __SMLALD(x1, y1, acc0); 00344 00345 /* Decrement the loop counter */ 00346 tapCnt--; 00347 } 00348 00349 tapCnt = (srcBLen) & 1U; 00350 00351 /* apply same above for remaining samples of smaller length sequence */ 00352 while (tapCnt > 0U) 00353 { 00354 00355 /* accumlate the results */ 00356 acc0 += (*pScr1++ * *pIn2++); 00357 00358 /* Decrement the loop counter */ 00359 tapCnt--; 00360 } 00361 00362 blkCnt--; 00363 00364 /* Store the result in the accumulator in the destination buffer. */ 00365 *pOut++ = (q15_t) (__SSAT((acc0 >> 15), 16)); 00366 00367 /* Initialization of inputB pointer */ 00368 pIn2 = py; 00369 00370 pScratch1 += 1U; 00371 00372 } 00373 00374 /* set status as ARM_MATH_SUCCESS */ 00375 status = ARM_MATH_SUCCESS; 00376 00377 } 00378 00379 /* Return to application */ 00380 return (status); 00381 } 00382 00383 #else 00384 00385 arm_status arm_conv_partial_opt_q15( 00386 q15_t * pSrcA, 00387 uint32_t srcALen, 00388 q15_t * pSrcB, 00389 uint32_t srcBLen, 00390 q15_t * pDst, 00391 uint32_t firstIndex, 00392 uint32_t numPoints, 00393 q15_t * pScratch1, 00394 q15_t * pScratch2) 00395 { 00396 00397 q15_t *pOut = pDst; /* output pointer */ 00398 q15_t *pScr1 = pScratch1; /* Temporary pointer for scratch1 */ 00399 q15_t *pScr2 = pScratch2; /* Temporary pointer for scratch1 */ 00400 q63_t acc0, acc1, acc2, acc3; /* Accumulator */ 00401 q15_t *pIn1; /* inputA pointer */ 00402 q15_t *pIn2; /* inputB pointer */ 00403 q15_t *px; /* Intermediate inputA pointer */ 00404 q15_t *py; /* Intermediate inputB pointer */ 00405 uint32_t j, k, blkCnt; /* loop counter */ 00406 arm_status status; /* Status variable */ 00407 uint32_t tapCnt; /* loop count */ 00408 q15_t x10, x11, x20, x21; /* Temporary variables to hold srcA buffer */ 00409 q15_t y10, y11; /* Temporary variables to hold srcB buffer */ 00410 00411 00412 /* Check for range of output samples to be calculated */ 00413 if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U)))) 00414 { 00415 /* Set status as ARM_MATH_ARGUMENT_ERROR */ 00416 status = ARM_MATH_ARGUMENT_ERROR; 00417 } 00418 else 00419 { 00420 00421 /* The algorithm implementation is based on the lengths of the inputs. */ 00422 /* srcB is always made to slide across srcA. */ 00423 /* So srcBLen is always considered as shorter or equal to srcALen */ 00424 if (srcALen >= srcBLen) 00425 { 00426 /* Initialization of inputA pointer */ 00427 pIn1 = pSrcA; 00428 00429 /* Initialization of inputB pointer */ 00430 pIn2 = pSrcB; 00431 } 00432 else 00433 { 00434 /* Initialization of inputA pointer */ 00435 pIn1 = pSrcB; 00436 00437 /* Initialization of inputB pointer */ 00438 pIn2 = pSrcA; 00439 00440 /* srcBLen is always considered as shorter or equal to srcALen */ 00441 j = srcBLen; 00442 srcBLen = srcALen; 00443 srcALen = j; 00444 } 00445 00446 /* Temporary pointer for scratch2 */ 00447 py = pScratch2; 00448 00449 /* pointer to take end of scratch2 buffer */ 00450 pScr2 = pScratch2 + srcBLen - 1; 00451 00452 /* points to smaller length sequence */ 00453 px = pIn2; 00454 00455 /* Apply loop unrolling and do 4 Copies simultaneously. */ 00456 k = srcBLen >> 2U; 00457 00458 /* First part of the processing with loop unrolling copies 4 data points at a time. 00459 ** a second loop below copies for the remaining 1 to 3 samples. */ 00460 while (k > 0U) 00461 { 00462 /* copy second buffer in reversal manner */ 00463 *pScr2-- = *px++; 00464 *pScr2-- = *px++; 00465 *pScr2-- = *px++; 00466 *pScr2-- = *px++; 00467 00468 /* Decrement the loop counter */ 00469 k--; 00470 } 00471 00472 /* If the count is not a multiple of 4, copy remaining samples here. 00473 ** No loop unrolling is used. */ 00474 k = srcBLen % 0x4U; 00475 00476 while (k > 0U) 00477 { 00478 /* copy second buffer in reversal manner for remaining samples */ 00479 *pScr2-- = *px++; 00480 00481 /* Decrement the loop counter */ 00482 k--; 00483 } 00484 00485 /* Initialze temporary scratch pointer */ 00486 pScr1 = pScratch1; 00487 00488 /* Fill (srcBLen - 1U) zeros in scratch buffer */ 00489 arm_fill_q15(0, pScr1, (srcBLen - 1U)); 00490 00491 /* Update temporary scratch pointer */ 00492 pScr1 += (srcBLen - 1U); 00493 00494 /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */ 00495 00496 00497 /* Apply loop unrolling and do 4 Copies simultaneously. */ 00498 k = srcALen >> 2U; 00499 00500 /* First part of the processing with loop unrolling copies 4 data points at a time. 00501 ** a second loop below copies for the remaining 1 to 3 samples. */ 00502 while (k > 0U) 00503 { 00504 /* copy second buffer in reversal manner */ 00505 *pScr1++ = *pIn1++; 00506 *pScr1++ = *pIn1++; 00507 *pScr1++ = *pIn1++; 00508 *pScr1++ = *pIn1++; 00509 00510 /* Decrement the loop counter */ 00511 k--; 00512 } 00513 00514 /* If the count is not a multiple of 4, copy remaining samples here. 00515 ** No loop unrolling is used. */ 00516 k = srcALen % 0x4U; 00517 00518 while (k > 0U) 00519 { 00520 /* copy second buffer in reversal manner for remaining samples */ 00521 *pScr1++ = *pIn1++; 00522 00523 /* Decrement the loop counter */ 00524 k--; 00525 } 00526 00527 00528 /* Apply loop unrolling and do 4 Copies simultaneously. */ 00529 k = (srcBLen - 1U) >> 2U; 00530 00531 /* First part of the processing with loop unrolling copies 4 data points at a time. 00532 ** a second loop below copies for the remaining 1 to 3 samples. */ 00533 while (k > 0U) 00534 { 00535 /* copy second buffer in reversal manner */ 00536 *pScr1++ = 0; 00537 *pScr1++ = 0; 00538 *pScr1++ = 0; 00539 *pScr1++ = 0; 00540 00541 /* Decrement the loop counter */ 00542 k--; 00543 } 00544 00545 /* If the count is not a multiple of 4, copy remaining samples here. 00546 ** No loop unrolling is used. */ 00547 k = (srcBLen - 1U) % 0x4U; 00548 00549 while (k > 0U) 00550 { 00551 /* copy second buffer in reversal manner for remaining samples */ 00552 *pScr1++ = 0; 00553 00554 /* Decrement the loop counter */ 00555 k--; 00556 } 00557 00558 00559 /* Initialization of pIn2 pointer */ 00560 pIn2 = py; 00561 00562 pScratch1 += firstIndex; 00563 00564 pOut = pDst + firstIndex; 00565 00566 /* Actual convolution process starts here */ 00567 blkCnt = (numPoints) >> 2; 00568 00569 while (blkCnt > 0) 00570 { 00571 /* Initialze temporary scratch pointer as scratch1 */ 00572 pScr1 = pScratch1; 00573 00574 /* Clear Accumlators */ 00575 acc0 = 0; 00576 acc1 = 0; 00577 acc2 = 0; 00578 acc3 = 0; 00579 00580 /* Read two samples from scratch1 buffer */ 00581 x10 = *pScr1++; 00582 x11 = *pScr1++; 00583 00584 /* Read next two samples from scratch1 buffer */ 00585 x20 = *pScr1++; 00586 x21 = *pScr1++; 00587 00588 tapCnt = (srcBLen) >> 2U; 00589 00590 while (tapCnt > 0U) 00591 { 00592 00593 /* Read two samples from smaller buffer */ 00594 y10 = *pIn2; 00595 y11 = *(pIn2 + 1U); 00596 00597 /* multiply and accumlate */ 00598 acc0 += (q63_t) x10 *y10; 00599 acc0 += (q63_t) x11 *y11; 00600 acc2 += (q63_t) x20 *y10; 00601 acc2 += (q63_t) x21 *y11; 00602 00603 /* multiply and accumlate */ 00604 acc1 += (q63_t) x11 *y10; 00605 acc1 += (q63_t) x20 *y11; 00606 00607 /* Read next two samples from scratch1 buffer */ 00608 x10 = *pScr1; 00609 x11 = *(pScr1 + 1U); 00610 00611 /* multiply and accumlate */ 00612 acc3 += (q63_t) x21 *y10; 00613 acc3 += (q63_t) x10 *y11; 00614 00615 /* Read next two samples from scratch2 buffer */ 00616 y10 = *(pIn2 + 2U); 00617 y11 = *(pIn2 + 3U); 00618 00619 /* multiply and accumlate */ 00620 acc0 += (q63_t) x20 *y10; 00621 acc0 += (q63_t) x21 *y11; 00622 acc2 += (q63_t) x10 *y10; 00623 acc2 += (q63_t) x11 *y11; 00624 acc1 += (q63_t) x21 *y10; 00625 acc1 += (q63_t) x10 *y11; 00626 00627 /* Read next two samples from scratch1 buffer */ 00628 x20 = *(pScr1 + 2); 00629 x21 = *(pScr1 + 3); 00630 00631 /* multiply and accumlate */ 00632 acc3 += (q63_t) x11 *y10; 00633 acc3 += (q63_t) x20 *y11; 00634 00635 /* update scratch pointers */ 00636 pIn2 += 4U; 00637 pScr1 += 4U; 00638 00639 /* Decrement the loop counter */ 00640 tapCnt--; 00641 } 00642 00643 /* Update scratch pointer for remaining samples of smaller length sequence */ 00644 pScr1 -= 4U; 00645 00646 /* apply same above for remaining samples of smaller length sequence */ 00647 tapCnt = (srcBLen) & 3U; 00648 00649 while (tapCnt > 0U) 00650 { 00651 /* accumlate the results */ 00652 acc0 += (*pScr1++ * *pIn2); 00653 acc1 += (*pScr1++ * *pIn2); 00654 acc2 += (*pScr1++ * *pIn2); 00655 acc3 += (*pScr1++ * *pIn2++); 00656 00657 pScr1 -= 3U; 00658 00659 /* Decrement the loop counter */ 00660 tapCnt--; 00661 } 00662 00663 blkCnt--; 00664 00665 00666 /* Store the results in the accumulators in the destination buffer. */ 00667 *pOut++ = __SSAT((acc0 >> 15), 16); 00668 *pOut++ = __SSAT((acc1 >> 15), 16); 00669 *pOut++ = __SSAT((acc2 >> 15), 16); 00670 *pOut++ = __SSAT((acc3 >> 15), 16); 00671 00672 00673 /* Initialization of inputB pointer */ 00674 pIn2 = py; 00675 00676 pScratch1 += 4U; 00677 00678 } 00679 00680 00681 blkCnt = numPoints & 0x3; 00682 00683 /* Calculate convolution for remaining samples of Bigger length sequence */ 00684 while (blkCnt > 0) 00685 { 00686 /* Initialze temporary scratch pointer as scratch1 */ 00687 pScr1 = pScratch1; 00688 00689 /* Clear Accumlators */ 00690 acc0 = 0; 00691 00692 tapCnt = (srcBLen) >> 1U; 00693 00694 while (tapCnt > 0U) 00695 { 00696 00697 /* Read next two samples from scratch1 buffer */ 00698 x10 = *pScr1++; 00699 x11 = *pScr1++; 00700 00701 /* Read two samples from smaller buffer */ 00702 y10 = *pIn2++; 00703 y11 = *pIn2++; 00704 00705 /* multiply and accumlate */ 00706 acc0 += (q63_t) x10 *y10; 00707 acc0 += (q63_t) x11 *y11; 00708 00709 /* Decrement the loop counter */ 00710 tapCnt--; 00711 } 00712 00713 tapCnt = (srcBLen) & 1U; 00714 00715 /* apply same above for remaining samples of smaller length sequence */ 00716 while (tapCnt > 0U) 00717 { 00718 00719 /* accumlate the results */ 00720 acc0 += (*pScr1++ * *pIn2++); 00721 00722 /* Decrement the loop counter */ 00723 tapCnt--; 00724 } 00725 00726 blkCnt--; 00727 00728 /* Store the result in the accumulator in the destination buffer. */ 00729 *pOut++ = (q15_t) (__SSAT((acc0 >> 15), 16)); 00730 00731 00732 /* Initialization of inputB pointer */ 00733 pIn2 = py; 00734 00735 pScratch1 += 1U; 00736 00737 } 00738 00739 /* set status as ARM_MATH_SUCCESS */ 00740 status = ARM_MATH_SUCCESS; 00741 00742 } 00743 00744 /* Return to application */ 00745 return (status); 00746 } 00747 00748 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00749 00750 00751 /** 00752 * @} end of PartialConv group 00753 */ 00754
Generated on Tue Jul 12 2022 16:46:23 by
