CMSIS_DSP_5 - The CMSIS DSP 5 library

Users » xorjoep » Code » CMSIS_DSP_5

The CMSIS DSP 5 library

Dependents: Nucleo-Heart-Rate ejercicioVrms2 PROYECTOFINAL ejercicioVrms ... more

functions/FilteringFunctions/arm_conv_partial_f32.c@3:4098b9d3d571, 2018-06-21 (annotated)

Committer:: xorjoep
Date:: Thu Jun 21 11:56:27 2018 +0000
Revision:: 3:4098b9d3d571
Parent:: 1:24714b45cd1b

headers is a folder not a library

Who changed what in which revision?

User	Revision	Line number	New contents of line
xorjoep	1:24714b45cd1b	1	/* ----------------------------------------------------------------------
xorjoep	1:24714b45cd1b	2	* Project: CMSIS DSP Library
xorjoep	1:24714b45cd1b	3	* Title: arm_conv_partial_f32.c
xorjoep	1:24714b45cd1b	4	* Description: Partial convolution of floating-point sequences
xorjoep	1:24714b45cd1b	5	*
xorjoep	1:24714b45cd1b	6	* $Date: 27. January 2017
xorjoep	1:24714b45cd1b	7	* $Revision: V.1.5.1
xorjoep	1:24714b45cd1b	8	*
xorjoep	1:24714b45cd1b	9	* Target Processor: Cortex-M cores
xorjoep	1:24714b45cd1b	10	* -------------------------------------------------------------------- */
xorjoep	1:24714b45cd1b	11	/*
xorjoep	1:24714b45cd1b	12	* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
xorjoep	1:24714b45cd1b	13	*
xorjoep	1:24714b45cd1b	14	* SPDX-License-Identifier: Apache-2.0
xorjoep	1:24714b45cd1b	15	*
xorjoep	1:24714b45cd1b	16	* Licensed under the Apache License, Version 2.0 (the License); you may
xorjoep	1:24714b45cd1b	17	* not use this file except in compliance with the License.
xorjoep	1:24714b45cd1b	18	* You may obtain a copy of the License at
xorjoep	1:24714b45cd1b	19	*
xorjoep	1:24714b45cd1b	20	* www.apache.org/licenses/LICENSE-2.0
xorjoep	1:24714b45cd1b	21	*
xorjoep	1:24714b45cd1b	22	* Unless required by applicable law or agreed to in writing, software
xorjoep	1:24714b45cd1b	23	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
xorjoep	1:24714b45cd1b	24	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
xorjoep	1:24714b45cd1b	25	* See the License for the specific language governing permissions and
xorjoep	1:24714b45cd1b	26	* limitations under the License.
xorjoep	1:24714b45cd1b	27	*/
xorjoep	1:24714b45cd1b	28
xorjoep	1:24714b45cd1b	29	#include "arm_math.h"
xorjoep	1:24714b45cd1b	30
xorjoep	1:24714b45cd1b	31	/**
xorjoep	1:24714b45cd1b	32	* @ingroup groupFilters
xorjoep	1:24714b45cd1b	33	*/
xorjoep	1:24714b45cd1b	34
xorjoep	1:24714b45cd1b	35	/**
xorjoep	1:24714b45cd1b	36	* @defgroup PartialConv Partial Convolution
xorjoep	1:24714b45cd1b	37	*
xorjoep	1:24714b45cd1b	38	* Partial Convolution is equivalent to Convolution except that a subset of the output samples is generated.
xorjoep	1:24714b45cd1b	39	* Each function has two additional arguments.
xorjoep	1:24714b45cd1b	40	* <code>firstIndex</code> specifies the starting index of the subset of output samples.
xorjoep	1:24714b45cd1b	41	* <code>numPoints</code> is the number of output samples to compute.
xorjoep	1:24714b45cd1b	42	* The function computes the output in the range
xorjoep	1:24714b45cd1b	43	* <code>[firstIndex, ..., firstIndex+numPoints-1]</code>.
xorjoep	1:24714b45cd1b	44	* The output array <code>pDst</code> contains <code>numPoints</code> values.
xorjoep	1:24714b45cd1b	45	*
xorjoep	1:24714b45cd1b	46	* The allowable range of output indices is [0 srcALen+srcBLen-2].
xorjoep	1:24714b45cd1b	47	* If the requested subset does not fall in this range then the functions return ARM_MATH_ARGUMENT_ERROR.
xorjoep	1:24714b45cd1b	48	* Otherwise the functions return ARM_MATH_SUCCESS.
xorjoep	1:24714b45cd1b	49	* \note Refer arm_conv_f32() for details on fixed point behavior.
xorjoep	1:24714b45cd1b	50	*
xorjoep	1:24714b45cd1b	51	*
xorjoep	1:24714b45cd1b	52	* <b>Fast Versions</b>
xorjoep	1:24714b45cd1b	53	*
xorjoep	1:24714b45cd1b	54	* \par
xorjoep	1:24714b45cd1b	55	* Fast versions are supported for Q31 and Q15 of partial convolution. Cycles for Fast versions are less compared to Q31 and Q15 of partial conv and the design requires
xorjoep	1:24714b45cd1b	56	* the input signals should be scaled down to avoid intermediate overflows.
xorjoep	1:24714b45cd1b	57	*
xorjoep	1:24714b45cd1b	58	*
xorjoep	1:24714b45cd1b	59	* <b>Opt Versions</b>
xorjoep	1:24714b45cd1b	60	*
xorjoep	1:24714b45cd1b	61	* \par
xorjoep	1:24714b45cd1b	62	* Opt versions are supported for Q15 and Q7. Design uses internal scratch buffer for getting good optimisation.
xorjoep	1:24714b45cd1b	63	* These versions are optimised in cycles and consumes more memory(Scratch memory) compared to Q15 and Q7 versions of partial convolution
xorjoep	1:24714b45cd1b	64	*/
xorjoep	1:24714b45cd1b	65
xorjoep	1:24714b45cd1b	66	/**
xorjoep	1:24714b45cd1b	67	* @addtogroup PartialConv
xorjoep	1:24714b45cd1b	68	* @{
xorjoep	1:24714b45cd1b	69	*/
xorjoep	1:24714b45cd1b	70
xorjoep	1:24714b45cd1b	71	/**
xorjoep	1:24714b45cd1b	72	* @brief Partial convolution of floating-point sequences.
xorjoep	1:24714b45cd1b	73	* @param[in] *pSrcA points to the first input sequence.
xorjoep	1:24714b45cd1b	74	* @param[in] srcALen length of the first input sequence.
xorjoep	1:24714b45cd1b	75	* @param[in] *pSrcB points to the second input sequence.
xorjoep	1:24714b45cd1b	76	* @param[in] srcBLen length of the second input sequence.
xorjoep	1:24714b45cd1b	77	* @param[out] *pDst points to the location where the output result is written.
xorjoep	1:24714b45cd1b	78	* @param[in] firstIndex is the first output sample to start with.
xorjoep	1:24714b45cd1b	79	* @param[in] numPoints is the number of output points to be computed.
xorjoep	1:24714b45cd1b	80	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
xorjoep	1:24714b45cd1b	81	*/
xorjoep	1:24714b45cd1b	82
xorjoep	1:24714b45cd1b	83	arm_status arm_conv_partial_f32(
xorjoep	1:24714b45cd1b	84	float32_t * pSrcA,
xorjoep	1:24714b45cd1b	85	uint32_t srcALen,
xorjoep	1:24714b45cd1b	86	float32_t * pSrcB,
xorjoep	1:24714b45cd1b	87	uint32_t srcBLen,
xorjoep	1:24714b45cd1b	88	float32_t * pDst,
xorjoep	1:24714b45cd1b	89	uint32_t firstIndex,
xorjoep	1:24714b45cd1b	90	uint32_t numPoints)
xorjoep	1:24714b45cd1b	91	{
xorjoep	1:24714b45cd1b	92
xorjoep	1:24714b45cd1b	93
xorjoep	1:24714b45cd1b	94	#if defined (ARM_MATH_DSP)
xorjoep	1:24714b45cd1b	95
xorjoep	1:24714b45cd1b	96	/* Run the below code for Cortex-M4 and Cortex-M3 */
xorjoep	1:24714b45cd1b	97
xorjoep	1:24714b45cd1b	98	float32_t pIn1 = pSrcA; / inputA pointer */
xorjoep	1:24714b45cd1b	99	float32_t pIn2 = pSrcB; / inputB pointer */
xorjoep	1:24714b45cd1b	100	float32_t pOut = pDst; / output pointer */
xorjoep	1:24714b45cd1b	101	float32_t px; / Intermediate inputA pointer */
xorjoep	1:24714b45cd1b	102	float32_t py; / Intermediate inputB pointer */
xorjoep	1:24714b45cd1b	103	float32_t pSrc1, pSrc2; /* Intermediate pointers */
xorjoep	1:24714b45cd1b	104	float32_t sum, acc0, acc1, acc2, acc3; /* Accumulator */
xorjoep	1:24714b45cd1b	105	float32_t x0, x1, x2, x3, c0; /* Temporary variables to hold state and coefficient values */
xorjoep	1:24714b45cd1b	106	uint32_t j, k, count = 0U, blkCnt, check;
xorjoep	1:24714b45cd1b	107	int32_t blockSize1, blockSize2, blockSize3; /* loop counters */
xorjoep	1:24714b45cd1b	108	arm_status status; /* status of Partial convolution */
xorjoep	1:24714b45cd1b	109
xorjoep	1:24714b45cd1b	110
xorjoep	1:24714b45cd1b	111	/* Check for range of output samples to be calculated */
xorjoep	1:24714b45cd1b	112	if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
xorjoep	1:24714b45cd1b	113	{
xorjoep	1:24714b45cd1b	114	/* Set status as ARM_MATH_ARGUMENT_ERROR */
xorjoep	1:24714b45cd1b	115	status = ARM_MATH_ARGUMENT_ERROR;
xorjoep	1:24714b45cd1b	116	}
xorjoep	1:24714b45cd1b	117	else
xorjoep	1:24714b45cd1b	118	{
xorjoep	1:24714b45cd1b	119
xorjoep	1:24714b45cd1b	120	/* The algorithm implementation is based on the lengths of the inputs. */
xorjoep	1:24714b45cd1b	121	/* srcB is always made to slide across srcA. */
xorjoep	1:24714b45cd1b	122	/* So srcBLen is always considered as shorter or equal to srcALen */
xorjoep	1:24714b45cd1b	123	if (srcALen >= srcBLen)
xorjoep	1:24714b45cd1b	124	{
xorjoep	1:24714b45cd1b	125	/* Initialization of inputA pointer */
xorjoep	1:24714b45cd1b	126	pIn1 = pSrcA;
xorjoep	1:24714b45cd1b	127
xorjoep	1:24714b45cd1b	128	/* Initialization of inputB pointer */
xorjoep	1:24714b45cd1b	129	pIn2 = pSrcB;
xorjoep	1:24714b45cd1b	130	}
xorjoep	1:24714b45cd1b	131	else
xorjoep	1:24714b45cd1b	132	{
xorjoep	1:24714b45cd1b	133	/* Initialization of inputA pointer */
xorjoep	1:24714b45cd1b	134	pIn1 = pSrcB;
xorjoep	1:24714b45cd1b	135
xorjoep	1:24714b45cd1b	136	/* Initialization of inputB pointer */
xorjoep	1:24714b45cd1b	137	pIn2 = pSrcA;
xorjoep	1:24714b45cd1b	138
xorjoep	1:24714b45cd1b	139	/* srcBLen is always considered as shorter or equal to srcALen */
xorjoep	1:24714b45cd1b	140	j = srcBLen;
xorjoep	1:24714b45cd1b	141	srcBLen = srcALen;
xorjoep	1:24714b45cd1b	142	srcALen = j;
xorjoep	1:24714b45cd1b	143	}
xorjoep	1:24714b45cd1b	144
xorjoep	1:24714b45cd1b	145	/* Conditions to check which loopCounter holds
xorjoep	1:24714b45cd1b	146	* the first and last indices of the output samples to be calculated. */
xorjoep	1:24714b45cd1b	147	check = firstIndex + numPoints;
xorjoep	1:24714b45cd1b	148	blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
xorjoep	1:24714b45cd1b	149	blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
xorjoep	1:24714b45cd1b	150	blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
xorjoep	1:24714b45cd1b	151	blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :
xorjoep	1:24714b45cd1b	152	(int32_t) numPoints) : 0;
xorjoep	1:24714b45cd1b	153	blockSize2 = ((int32_t) check - blockSize3) -
xorjoep	1:24714b45cd1b	154	(blockSize1 + (int32_t) firstIndex);
xorjoep	1:24714b45cd1b	155	blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
xorjoep	1:24714b45cd1b	156
xorjoep	1:24714b45cd1b	157	/* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */
xorjoep	1:24714b45cd1b	158	/* The function is internally
xorjoep	1:24714b45cd1b	159	* divided into three stages according to the number of multiplications that has to be
xorjoep	1:24714b45cd1b	160	* taken place between inputA samples and inputB samples. In the first stage of the
xorjoep	1:24714b45cd1b	161	* algorithm, the multiplications increase by one for every iteration.
xorjoep	1:24714b45cd1b	162	* In the second stage of the algorithm, srcBLen number of multiplications are done.
xorjoep	1:24714b45cd1b	163	* In the third stage of the algorithm, the multiplications decrease by one
xorjoep	1:24714b45cd1b	164	* for every iteration. */
xorjoep	1:24714b45cd1b	165
xorjoep	1:24714b45cd1b	166	/* Set the output pointer to point to the firstIndex
xorjoep	1:24714b45cd1b	167	* of the output sample to be calculated. */
xorjoep	1:24714b45cd1b	168	pOut = pDst + firstIndex;
xorjoep	1:24714b45cd1b	169
xorjoep	1:24714b45cd1b	170	/* --------------------------
xorjoep	1:24714b45cd1b	171	* Initializations of stage1
xorjoep	1:24714b45cd1b	172	* -------------------------*/
xorjoep	1:24714b45cd1b	173
xorjoep	1:24714b45cd1b	174	/* sum = x[0] * y[0]
xorjoep	1:24714b45cd1b	175	* sum = x[0] * y[1] + x[1] * y[0]
xorjoep	1:24714b45cd1b	176	* ....
xorjoep	1:24714b45cd1b	177	* sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]
xorjoep	1:24714b45cd1b	178	*/
xorjoep	1:24714b45cd1b	179
xorjoep	1:24714b45cd1b	180	/* In this stage the MAC operations are increased by 1 for every iteration.
xorjoep	1:24714b45cd1b	181	The count variable holds the number of MAC operations performed.
xorjoep	1:24714b45cd1b	182	Since the partial convolution starts from from firstIndex
xorjoep	1:24714b45cd1b	183	Number of Macs to be performed is firstIndex + 1 */
xorjoep	1:24714b45cd1b	184	count = 1U + firstIndex;
xorjoep	1:24714b45cd1b	185
xorjoep	1:24714b45cd1b	186	/* Working pointer of inputA */
xorjoep	1:24714b45cd1b	187	px = pIn1;
xorjoep	1:24714b45cd1b	188
xorjoep	1:24714b45cd1b	189	/* Working pointer of inputB */
xorjoep	1:24714b45cd1b	190	pSrc1 = pIn2 + firstIndex;
xorjoep	1:24714b45cd1b	191	py = pSrc1;
xorjoep	1:24714b45cd1b	192
xorjoep	1:24714b45cd1b	193	/* ------------------------
xorjoep	1:24714b45cd1b	194	* Stage1 process
xorjoep	1:24714b45cd1b	195	* ----------------------*/
xorjoep	1:24714b45cd1b	196
xorjoep	1:24714b45cd1b	197	/* The first stage starts here */
xorjoep	1:24714b45cd1b	198	while (blockSize1 > 0)
xorjoep	1:24714b45cd1b	199	{
xorjoep	1:24714b45cd1b	200	/* Accumulator is made zero for every iteration */
xorjoep	1:24714b45cd1b	201	sum = 0.0f;
xorjoep	1:24714b45cd1b	202
xorjoep	1:24714b45cd1b	203	/* Apply loop unrolling and compute 4 MACs simultaneously. */
xorjoep	1:24714b45cd1b	204	k = count >> 2U;
xorjoep	1:24714b45cd1b	205
xorjoep	1:24714b45cd1b	206	/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
xorjoep	1:24714b45cd1b	207	** a second loop below computes MACs for the remaining 1 to 3 samples. */
xorjoep	1:24714b45cd1b	208	while (k > 0U)
xorjoep	1:24714b45cd1b	209	{
xorjoep	1:24714b45cd1b	210	/* x[0] * y[srcBLen - 1] */
xorjoep	1:24714b45cd1b	211	sum += px++ *py--;
xorjoep	1:24714b45cd1b	212
xorjoep	1:24714b45cd1b	213	/* x[1] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	214	sum += px++ *py--;
xorjoep	1:24714b45cd1b	215
xorjoep	1:24714b45cd1b	216	/* x[2] * y[srcBLen - 3] */
xorjoep	1:24714b45cd1b	217	sum += px++ *py--;
xorjoep	1:24714b45cd1b	218
xorjoep	1:24714b45cd1b	219	/* x[3] * y[srcBLen - 4] */
xorjoep	1:24714b45cd1b	220	sum += px++ *py--;
xorjoep	1:24714b45cd1b	221
xorjoep	1:24714b45cd1b	222	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	223	k--;
xorjoep	1:24714b45cd1b	224	}
xorjoep	1:24714b45cd1b	225
xorjoep	1:24714b45cd1b	226	/* If the count is not a multiple of 4, compute any remaining MACs here.
xorjoep	1:24714b45cd1b	227	** No loop unrolling is used. */
xorjoep	1:24714b45cd1b	228	k = count % 0x4U;
xorjoep	1:24714b45cd1b	229
xorjoep	1:24714b45cd1b	230	while (k > 0U)
xorjoep	1:24714b45cd1b	231	{
xorjoep	1:24714b45cd1b	232	/* Perform the multiply-accumulates */
xorjoep	1:24714b45cd1b	233	sum += px++ *py--;
xorjoep	1:24714b45cd1b	234
xorjoep	1:24714b45cd1b	235	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	236	k--;
xorjoep	1:24714b45cd1b	237	}
xorjoep	1:24714b45cd1b	238
xorjoep	1:24714b45cd1b	239	/* Store the result in the accumulator in the destination buffer. */
xorjoep	1:24714b45cd1b	240	*pOut++ = sum;
xorjoep	1:24714b45cd1b	241
xorjoep	1:24714b45cd1b	242	/* Update the inputA and inputB pointers for next MAC calculation */
xorjoep	1:24714b45cd1b	243	py = ++pSrc1;
xorjoep	1:24714b45cd1b	244	px = pIn1;
xorjoep	1:24714b45cd1b	245
xorjoep	1:24714b45cd1b	246	/* Increment the MAC count */
xorjoep	1:24714b45cd1b	247	count++;
xorjoep	1:24714b45cd1b	248
xorjoep	1:24714b45cd1b	249	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	250	blockSize1--;
xorjoep	1:24714b45cd1b	251	}
xorjoep	1:24714b45cd1b	252
xorjoep	1:24714b45cd1b	253	/* --------------------------
xorjoep	1:24714b45cd1b	254	* Initializations of stage2
xorjoep	1:24714b45cd1b	255	* ------------------------*/
xorjoep	1:24714b45cd1b	256
xorjoep	1:24714b45cd1b	257	/* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]
xorjoep	1:24714b45cd1b	258	* sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]
xorjoep	1:24714b45cd1b	259	* ....
xorjoep	1:24714b45cd1b	260	* sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]
xorjoep	1:24714b45cd1b	261	*/
xorjoep	1:24714b45cd1b	262
xorjoep	1:24714b45cd1b	263	/* Working pointer of inputA */
xorjoep	1:24714b45cd1b	264	if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
xorjoep	1:24714b45cd1b	265	{
xorjoep	1:24714b45cd1b	266	px = pIn1 + firstIndex - srcBLen + 1;
xorjoep	1:24714b45cd1b	267	}
xorjoep	1:24714b45cd1b	268	else
xorjoep	1:24714b45cd1b	269	{
xorjoep	1:24714b45cd1b	270	px = pIn1;
xorjoep	1:24714b45cd1b	271	}
xorjoep	1:24714b45cd1b	272
xorjoep	1:24714b45cd1b	273	/* Working pointer of inputB */
xorjoep	1:24714b45cd1b	274	pSrc2 = pIn2 + (srcBLen - 1U);
xorjoep	1:24714b45cd1b	275	py = pSrc2;
xorjoep	1:24714b45cd1b	276
xorjoep	1:24714b45cd1b	277	/* count is index by which the pointer pIn1 to be incremented */
xorjoep	1:24714b45cd1b	278	count = 0U;
xorjoep	1:24714b45cd1b	279
xorjoep	1:24714b45cd1b	280	/* -------------------
xorjoep	1:24714b45cd1b	281	* Stage2 process
xorjoep	1:24714b45cd1b	282	* ------------------*/
xorjoep	1:24714b45cd1b	283
xorjoep	1:24714b45cd1b	284	/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
xorjoep	1:24714b45cd1b	285	* So, to loop unroll over blockSize2,
xorjoep	1:24714b45cd1b	286	* srcBLen should be greater than or equal to 4 */
xorjoep	1:24714b45cd1b	287	if (srcBLen >= 4U)
xorjoep	1:24714b45cd1b	288	{
xorjoep	1:24714b45cd1b	289	/* Loop unroll over blockSize2, by 4 */
xorjoep	1:24714b45cd1b	290	blkCnt = ((uint32_t) blockSize2 >> 2U);
xorjoep	1:24714b45cd1b	291
xorjoep	1:24714b45cd1b	292	while (blkCnt > 0U)
xorjoep	1:24714b45cd1b	293	{
xorjoep	1:24714b45cd1b	294	/* Set all accumulators to zero */
xorjoep	1:24714b45cd1b	295	acc0 = 0.0f;
xorjoep	1:24714b45cd1b	296	acc1 = 0.0f;
xorjoep	1:24714b45cd1b	297	acc2 = 0.0f;
xorjoep	1:24714b45cd1b	298	acc3 = 0.0f;
xorjoep	1:24714b45cd1b	299
xorjoep	1:24714b45cd1b	300	/* read x[0], x[1], x[2] samples */
xorjoep	1:24714b45cd1b	301	x0 = *(px++);
xorjoep	1:24714b45cd1b	302	x1 = *(px++);
xorjoep	1:24714b45cd1b	303	x2 = *(px++);
xorjoep	1:24714b45cd1b	304
xorjoep	1:24714b45cd1b	305	/* Apply loop unrolling and compute 4 MACs simultaneously. */
xorjoep	1:24714b45cd1b	306	k = srcBLen >> 2U;
xorjoep	1:24714b45cd1b	307
xorjoep	1:24714b45cd1b	308	/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
xorjoep	1:24714b45cd1b	309	** a second loop below computes MACs for the remaining 1 to 3 samples. */
xorjoep	1:24714b45cd1b	310	do
xorjoep	1:24714b45cd1b	311	{
xorjoep	1:24714b45cd1b	312	/* Read y[srcBLen - 1] sample */
xorjoep	1:24714b45cd1b	313	c0 = *(py--);
xorjoep	1:24714b45cd1b	314
xorjoep	1:24714b45cd1b	315	/* Read x[3] sample */
xorjoep	1:24714b45cd1b	316	x3 = *(px++);
xorjoep	1:24714b45cd1b	317
xorjoep	1:24714b45cd1b	318	/* Perform the multiply-accumulate */
xorjoep	1:24714b45cd1b	319	/* acc0 += x[0] * y[srcBLen - 1] */
xorjoep	1:24714b45cd1b	320	acc0 += x0 * c0;
xorjoep	1:24714b45cd1b	321
xorjoep	1:24714b45cd1b	322	/* acc1 += x[1] * y[srcBLen - 1] */
xorjoep	1:24714b45cd1b	323	acc1 += x1 * c0;
xorjoep	1:24714b45cd1b	324
xorjoep	1:24714b45cd1b	325	/* acc2 += x[2] * y[srcBLen - 1] */
xorjoep	1:24714b45cd1b	326	acc2 += x2 * c0;
xorjoep	1:24714b45cd1b	327
xorjoep	1:24714b45cd1b	328	/* acc3 += x[3] * y[srcBLen - 1] */
xorjoep	1:24714b45cd1b	329	acc3 += x3 * c0;
xorjoep	1:24714b45cd1b	330
xorjoep	1:24714b45cd1b	331	/* Read y[srcBLen - 2] sample */
xorjoep	1:24714b45cd1b	332	c0 = *(py--);
xorjoep	1:24714b45cd1b	333
xorjoep	1:24714b45cd1b	334	/* Read x[4] sample */
xorjoep	1:24714b45cd1b	335	x0 = *(px++);
xorjoep	1:24714b45cd1b	336
xorjoep	1:24714b45cd1b	337	/* Perform the multiply-accumulate */
xorjoep	1:24714b45cd1b	338	/* acc0 += x[1] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	339	acc0 += x1 * c0;
xorjoep	1:24714b45cd1b	340	/* acc1 += x[2] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	341	acc1 += x2 * c0;
xorjoep	1:24714b45cd1b	342	/* acc2 += x[3] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	343	acc2 += x3 * c0;
xorjoep	1:24714b45cd1b	344	/* acc3 += x[4] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	345	acc3 += x0 * c0;
xorjoep	1:24714b45cd1b	346
xorjoep	1:24714b45cd1b	347	/* Read y[srcBLen - 3] sample */
xorjoep	1:24714b45cd1b	348	c0 = *(py--);
xorjoep	1:24714b45cd1b	349
xorjoep	1:24714b45cd1b	350	/* Read x[5] sample */
xorjoep	1:24714b45cd1b	351	x1 = *(px++);
xorjoep	1:24714b45cd1b	352
xorjoep	1:24714b45cd1b	353	/* Perform the multiply-accumulates */
xorjoep	1:24714b45cd1b	354	/* acc0 += x[2] * y[srcBLen - 3] */
xorjoep	1:24714b45cd1b	355	acc0 += x2 * c0;
xorjoep	1:24714b45cd1b	356	/* acc1 += x[3] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	357	acc1 += x3 * c0;
xorjoep	1:24714b45cd1b	358	/* acc2 += x[4] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	359	acc2 += x0 * c0;
xorjoep	1:24714b45cd1b	360	/* acc3 += x[5] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	361	acc3 += x1 * c0;
xorjoep	1:24714b45cd1b	362
xorjoep	1:24714b45cd1b	363	/* Read y[srcBLen - 4] sample */
xorjoep	1:24714b45cd1b	364	c0 = *(py--);
xorjoep	1:24714b45cd1b	365
xorjoep	1:24714b45cd1b	366	/* Read x[6] sample */
xorjoep	1:24714b45cd1b	367	x2 = *(px++);
xorjoep	1:24714b45cd1b	368
xorjoep	1:24714b45cd1b	369	/* Perform the multiply-accumulates */
xorjoep	1:24714b45cd1b	370	/* acc0 += x[3] * y[srcBLen - 4] */
xorjoep	1:24714b45cd1b	371	acc0 += x3 * c0;
xorjoep	1:24714b45cd1b	372	/* acc1 += x[4] * y[srcBLen - 4] */
xorjoep	1:24714b45cd1b	373	acc1 += x0 * c0;
xorjoep	1:24714b45cd1b	374	/* acc2 += x[5] * y[srcBLen - 4] */
xorjoep	1:24714b45cd1b	375	acc2 += x1 * c0;
xorjoep	1:24714b45cd1b	376	/* acc3 += x[6] * y[srcBLen - 4] */
xorjoep	1:24714b45cd1b	377	acc3 += x2 * c0;
xorjoep	1:24714b45cd1b	378
xorjoep	1:24714b45cd1b	379
xorjoep	1:24714b45cd1b	380	} while (--k);
xorjoep	1:24714b45cd1b	381
xorjoep	1:24714b45cd1b	382	/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
xorjoep	1:24714b45cd1b	383	** No loop unrolling is used. */
xorjoep	1:24714b45cd1b	384	k = srcBLen % 0x4U;
xorjoep	1:24714b45cd1b	385
xorjoep	1:24714b45cd1b	386	while (k > 0U)
xorjoep	1:24714b45cd1b	387	{
xorjoep	1:24714b45cd1b	388	/* Read y[srcBLen - 5] sample */
xorjoep	1:24714b45cd1b	389	c0 = *(py--);
xorjoep	1:24714b45cd1b	390
xorjoep	1:24714b45cd1b	391	/* Read x[7] sample */
xorjoep	1:24714b45cd1b	392	x3 = *(px++);
xorjoep	1:24714b45cd1b	393
xorjoep	1:24714b45cd1b	394	/* Perform the multiply-accumulates */
xorjoep	1:24714b45cd1b	395	/* acc0 += x[4] * y[srcBLen - 5] */
xorjoep	1:24714b45cd1b	396	acc0 += x0 * c0;
xorjoep	1:24714b45cd1b	397	/* acc1 += x[5] * y[srcBLen - 5] */
xorjoep	1:24714b45cd1b	398	acc1 += x1 * c0;
xorjoep	1:24714b45cd1b	399	/* acc2 += x[6] * y[srcBLen - 5] */
xorjoep	1:24714b45cd1b	400	acc2 += x2 * c0;
xorjoep	1:24714b45cd1b	401	/* acc3 += x[7] * y[srcBLen - 5] */
xorjoep	1:24714b45cd1b	402	acc3 += x3 * c0;
xorjoep	1:24714b45cd1b	403
xorjoep	1:24714b45cd1b	404	/* Reuse the present samples for the next MAC */
xorjoep	1:24714b45cd1b	405	x0 = x1;
xorjoep	1:24714b45cd1b	406	x1 = x2;
xorjoep	1:24714b45cd1b	407	x2 = x3;
xorjoep	1:24714b45cd1b	408
xorjoep	1:24714b45cd1b	409	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	410	k--;
xorjoep	1:24714b45cd1b	411	}
xorjoep	1:24714b45cd1b	412
xorjoep	1:24714b45cd1b	413	/* Store the result in the accumulator in the destination buffer. */
xorjoep	1:24714b45cd1b	414	*pOut++ = acc0;
xorjoep	1:24714b45cd1b	415	*pOut++ = acc1;
xorjoep	1:24714b45cd1b	416	*pOut++ = acc2;
xorjoep	1:24714b45cd1b	417	*pOut++ = acc3;
xorjoep	1:24714b45cd1b	418
xorjoep	1:24714b45cd1b	419	/* Increment the pointer pIn1 index, count by 1 */
xorjoep	1:24714b45cd1b	420	count += 4U;
xorjoep	1:24714b45cd1b	421
xorjoep	1:24714b45cd1b	422	/* Update the inputA and inputB pointers for next MAC calculation */
xorjoep	1:24714b45cd1b	423	if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
xorjoep	1:24714b45cd1b	424	{
xorjoep	1:24714b45cd1b	425	px = pIn1 + firstIndex - srcBLen + 1 + count;
xorjoep	1:24714b45cd1b	426	}
xorjoep	1:24714b45cd1b	427	else
xorjoep	1:24714b45cd1b	428	{
xorjoep	1:24714b45cd1b	429	px = pIn1 + count;
xorjoep	1:24714b45cd1b	430	}
xorjoep	1:24714b45cd1b	431	py = pSrc2;
xorjoep	1:24714b45cd1b	432
xorjoep	1:24714b45cd1b	433	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	434	blkCnt--;
xorjoep	1:24714b45cd1b	435	}
xorjoep	1:24714b45cd1b	436
xorjoep	1:24714b45cd1b	437	/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
xorjoep	1:24714b45cd1b	438	** No loop unrolling is used. */
xorjoep	1:24714b45cd1b	439	blkCnt = (uint32_t) blockSize2 % 0x4U;
xorjoep	1:24714b45cd1b	440
xorjoep	1:24714b45cd1b	441	while (blkCnt > 0U)
xorjoep	1:24714b45cd1b	442	{
xorjoep	1:24714b45cd1b	443	/* Accumulator is made zero for every iteration */
xorjoep	1:24714b45cd1b	444	sum = 0.0f;
xorjoep	1:24714b45cd1b	445
xorjoep	1:24714b45cd1b	446	/* Apply loop unrolling and compute 4 MACs simultaneously. */
xorjoep	1:24714b45cd1b	447	k = srcBLen >> 2U;
xorjoep	1:24714b45cd1b	448
xorjoep	1:24714b45cd1b	449	/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
xorjoep	1:24714b45cd1b	450	** a second loop below computes MACs for the remaining 1 to 3 samples. */
xorjoep	1:24714b45cd1b	451	while (k > 0U)
xorjoep	1:24714b45cd1b	452	{
xorjoep	1:24714b45cd1b	453	/* Perform the multiply-accumulates */
xorjoep	1:24714b45cd1b	454	sum += px++ *py--;
xorjoep	1:24714b45cd1b	455	sum += px++ *py--;
xorjoep	1:24714b45cd1b	456	sum += px++ *py--;
xorjoep	1:24714b45cd1b	457	sum += px++ *py--;
xorjoep	1:24714b45cd1b	458
xorjoep	1:24714b45cd1b	459	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	460	k--;
xorjoep	1:24714b45cd1b	461	}
xorjoep	1:24714b45cd1b	462
xorjoep	1:24714b45cd1b	463	/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
xorjoep	1:24714b45cd1b	464	** No loop unrolling is used. */
xorjoep	1:24714b45cd1b	465	k = srcBLen % 0x4U;
xorjoep	1:24714b45cd1b	466
xorjoep	1:24714b45cd1b	467	while (k > 0U)
xorjoep	1:24714b45cd1b	468	{
xorjoep	1:24714b45cd1b	469	/* Perform the multiply-accumulate */
xorjoep	1:24714b45cd1b	470	sum += px++ *py--;
xorjoep	1:24714b45cd1b	471
xorjoep	1:24714b45cd1b	472	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	473	k--;
xorjoep	1:24714b45cd1b	474	}
xorjoep	1:24714b45cd1b	475
xorjoep	1:24714b45cd1b	476	/* Store the result in the accumulator in the destination buffer. */
xorjoep	1:24714b45cd1b	477	*pOut++ = sum;
xorjoep	1:24714b45cd1b	478
xorjoep	1:24714b45cd1b	479	/* Increment the MAC count */
xorjoep	1:24714b45cd1b	480	count++;
xorjoep	1:24714b45cd1b	481
xorjoep	1:24714b45cd1b	482	/* Update the inputA and inputB pointers for next MAC calculation */
xorjoep	1:24714b45cd1b	483	if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
xorjoep	1:24714b45cd1b	484	{
xorjoep	1:24714b45cd1b	485	px = pIn1 + firstIndex - srcBLen + 1 + count;
xorjoep	1:24714b45cd1b	486	}
xorjoep	1:24714b45cd1b	487	else
xorjoep	1:24714b45cd1b	488	{
xorjoep	1:24714b45cd1b	489	px = pIn1 + count;
xorjoep	1:24714b45cd1b	490	}
xorjoep	1:24714b45cd1b	491	py = pSrc2;
xorjoep	1:24714b45cd1b	492
xorjoep	1:24714b45cd1b	493	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	494	blkCnt--;
xorjoep	1:24714b45cd1b	495	}
xorjoep	1:24714b45cd1b	496	}
xorjoep	1:24714b45cd1b	497	else
xorjoep	1:24714b45cd1b	498	{
xorjoep	1:24714b45cd1b	499	/* If the srcBLen is not a multiple of 4,
xorjoep	1:24714b45cd1b	500	* the blockSize2 loop cannot be unrolled by 4 */
xorjoep	1:24714b45cd1b	501	blkCnt = (uint32_t) blockSize2;
xorjoep	1:24714b45cd1b	502
xorjoep	1:24714b45cd1b	503	while (blkCnt > 0U)
xorjoep	1:24714b45cd1b	504	{
xorjoep	1:24714b45cd1b	505	/* Accumulator is made zero for every iteration */
xorjoep	1:24714b45cd1b	506	sum = 0.0f;
xorjoep	1:24714b45cd1b	507
xorjoep	1:24714b45cd1b	508	/* srcBLen number of MACS should be performed */
xorjoep	1:24714b45cd1b	509	k = srcBLen;
xorjoep	1:24714b45cd1b	510
xorjoep	1:24714b45cd1b	511	while (k > 0U)
xorjoep	1:24714b45cd1b	512	{
xorjoep	1:24714b45cd1b	513	/* Perform the multiply-accumulate */
xorjoep	1:24714b45cd1b	514	sum += px++ *py--;
xorjoep	1:24714b45cd1b	515
xorjoep	1:24714b45cd1b	516	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	517	k--;
xorjoep	1:24714b45cd1b	518	}
xorjoep	1:24714b45cd1b	519
xorjoep	1:24714b45cd1b	520	/* Store the result in the accumulator in the destination buffer. */
xorjoep	1:24714b45cd1b	521	*pOut++ = sum;
xorjoep	1:24714b45cd1b	522
xorjoep	1:24714b45cd1b	523	/* Increment the MAC count */
xorjoep	1:24714b45cd1b	524	count++;
xorjoep	1:24714b45cd1b	525
xorjoep	1:24714b45cd1b	526	/* Update the inputA and inputB pointers for next MAC calculation */
xorjoep	1:24714b45cd1b	527	if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
xorjoep	1:24714b45cd1b	528	{
xorjoep	1:24714b45cd1b	529	px = pIn1 + firstIndex - srcBLen + 1 + count;
xorjoep	1:24714b45cd1b	530	}
xorjoep	1:24714b45cd1b	531	else
xorjoep	1:24714b45cd1b	532	{
xorjoep	1:24714b45cd1b	533	px = pIn1 + count;
xorjoep	1:24714b45cd1b	534	}
xorjoep	1:24714b45cd1b	535	py = pSrc2;
xorjoep	1:24714b45cd1b	536
xorjoep	1:24714b45cd1b	537	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	538	blkCnt--;
xorjoep	1:24714b45cd1b	539	}
xorjoep	1:24714b45cd1b	540	}
xorjoep	1:24714b45cd1b	541
xorjoep	1:24714b45cd1b	542
xorjoep	1:24714b45cd1b	543	/* --------------------------
xorjoep	1:24714b45cd1b	544	* Initializations of stage3
xorjoep	1:24714b45cd1b	545	* -------------------------*/
xorjoep	1:24714b45cd1b	546
xorjoep	1:24714b45cd1b	547	/* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]
xorjoep	1:24714b45cd1b	548	* sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]
xorjoep	1:24714b45cd1b	549	* ....
xorjoep	1:24714b45cd1b	550	* sum += x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]
xorjoep	1:24714b45cd1b	551	* sum += x[srcALen-1] * y[srcBLen-1]
xorjoep	1:24714b45cd1b	552	*/
xorjoep	1:24714b45cd1b	553
xorjoep	1:24714b45cd1b	554	/* In this stage the MAC operations are decreased by 1 for every iteration.
xorjoep	1:24714b45cd1b	555	The count variable holds the number of MAC operations performed */
xorjoep	1:24714b45cd1b	556	count = srcBLen - 1U;
xorjoep	1:24714b45cd1b	557
xorjoep	1:24714b45cd1b	558	/* Working pointer of inputA */
xorjoep	1:24714b45cd1b	559	pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
xorjoep	1:24714b45cd1b	560	px = pSrc1;
xorjoep	1:24714b45cd1b	561
xorjoep	1:24714b45cd1b	562	/* Working pointer of inputB */
xorjoep	1:24714b45cd1b	563	pSrc2 = pIn2 + (srcBLen - 1U);
xorjoep	1:24714b45cd1b	564	py = pSrc2;
xorjoep	1:24714b45cd1b	565
xorjoep	1:24714b45cd1b	566	while (blockSize3 > 0)
xorjoep	1:24714b45cd1b	567	{
xorjoep	1:24714b45cd1b	568	/* Accumulator is made zero for every iteration */
xorjoep	1:24714b45cd1b	569	sum = 0.0f;
xorjoep	1:24714b45cd1b	570
xorjoep	1:24714b45cd1b	571	/* Apply loop unrolling and compute 4 MACs simultaneously. */
xorjoep	1:24714b45cd1b	572	k = count >> 2U;
xorjoep	1:24714b45cd1b	573
xorjoep	1:24714b45cd1b	574	/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
xorjoep	1:24714b45cd1b	575	** a second loop below computes MACs for the remaining 1 to 3 samples. */
xorjoep	1:24714b45cd1b	576	while (k > 0U)
xorjoep	1:24714b45cd1b	577	{
xorjoep	1:24714b45cd1b	578	/* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */
xorjoep	1:24714b45cd1b	579	sum += px++ *py--;
xorjoep	1:24714b45cd1b	580
xorjoep	1:24714b45cd1b	581	/* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */
xorjoep	1:24714b45cd1b	582	sum += px++ *py--;
xorjoep	1:24714b45cd1b	583
xorjoep	1:24714b45cd1b	584	/* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */
xorjoep	1:24714b45cd1b	585	sum += px++ *py--;
xorjoep	1:24714b45cd1b	586
xorjoep	1:24714b45cd1b	587	/* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */
xorjoep	1:24714b45cd1b	588	sum += px++ *py--;
xorjoep	1:24714b45cd1b	589
xorjoep	1:24714b45cd1b	590	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	591	k--;
xorjoep	1:24714b45cd1b	592	}
xorjoep	1:24714b45cd1b	593
xorjoep	1:24714b45cd1b	594	/* If the count is not a multiple of 4, compute any remaining MACs here.
xorjoep	1:24714b45cd1b	595	** No loop unrolling is used. */
xorjoep	1:24714b45cd1b	596	k = count % 0x4U;
xorjoep	1:24714b45cd1b	597
xorjoep	1:24714b45cd1b	598	while (k > 0U)
xorjoep	1:24714b45cd1b	599	{
xorjoep	1:24714b45cd1b	600	/* Perform the multiply-accumulates */
xorjoep	1:24714b45cd1b	601	/* sum += x[srcALen-1] * y[srcBLen-1] */
xorjoep	1:24714b45cd1b	602	sum += px++ *py--;
xorjoep	1:24714b45cd1b	603
xorjoep	1:24714b45cd1b	604	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	605	k--;
xorjoep	1:24714b45cd1b	606	}
xorjoep	1:24714b45cd1b	607
xorjoep	1:24714b45cd1b	608	/* Store the result in the accumulator in the destination buffer. */
xorjoep	1:24714b45cd1b	609	*pOut++ = sum;
xorjoep	1:24714b45cd1b	610
xorjoep	1:24714b45cd1b	611	/* Update the inputA and inputB pointers for next MAC calculation */
xorjoep	1:24714b45cd1b	612	px = ++pSrc1;
xorjoep	1:24714b45cd1b	613	py = pSrc2;
xorjoep	1:24714b45cd1b	614
xorjoep	1:24714b45cd1b	615	/* Decrement the MAC count */
xorjoep	1:24714b45cd1b	616	count--;
xorjoep	1:24714b45cd1b	617
xorjoep	1:24714b45cd1b	618	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	619	blockSize3--;
xorjoep	1:24714b45cd1b	620
xorjoep	1:24714b45cd1b	621	}
xorjoep	1:24714b45cd1b	622
xorjoep	1:24714b45cd1b	623	/* set status as ARM_MATH_SUCCESS */
xorjoep	1:24714b45cd1b	624	status = ARM_MATH_SUCCESS;
xorjoep	1:24714b45cd1b	625	}
xorjoep	1:24714b45cd1b	626
xorjoep	1:24714b45cd1b	627	/* Return to application */
xorjoep	1:24714b45cd1b	628	return (status);
xorjoep	1:24714b45cd1b	629
xorjoep	1:24714b45cd1b	630	#else
xorjoep	1:24714b45cd1b	631
xorjoep	1:24714b45cd1b	632	/* Run the below code for Cortex-M0 */
xorjoep	1:24714b45cd1b	633
xorjoep	1:24714b45cd1b	634	float32_t pIn1 = pSrcA; / inputA pointer */
xorjoep	1:24714b45cd1b	635	float32_t pIn2 = pSrcB; / inputB pointer */
xorjoep	1:24714b45cd1b	636	float32_t sum; /* Accumulator */
xorjoep	1:24714b45cd1b	637	uint32_t i, j; /* loop counters */
xorjoep	1:24714b45cd1b	638	arm_status status; /* status of Partial convolution */
xorjoep	1:24714b45cd1b	639
xorjoep	1:24714b45cd1b	640	/* Check for range of output samples to be calculated */
xorjoep	1:24714b45cd1b	641	if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
xorjoep	1:24714b45cd1b	642	{
xorjoep	1:24714b45cd1b	643	/* Set status as ARM_ARGUMENT_ERROR */
xorjoep	1:24714b45cd1b	644	status = ARM_MATH_ARGUMENT_ERROR;
xorjoep	1:24714b45cd1b	645	}
xorjoep	1:24714b45cd1b	646	else
xorjoep	1:24714b45cd1b	647	{
xorjoep	1:24714b45cd1b	648	/* Loop to calculate convolution for output length number of values */
xorjoep	1:24714b45cd1b	649	for (i = firstIndex; i <= (firstIndex + numPoints - 1); i++)
xorjoep	1:24714b45cd1b	650	{
xorjoep	1:24714b45cd1b	651	/* Initialize sum with zero to carry on MAC operations */
xorjoep	1:24714b45cd1b	652	sum = 0.0f;
xorjoep	1:24714b45cd1b	653
xorjoep	1:24714b45cd1b	654	/* Loop to perform MAC operations according to convolution equation */
xorjoep	1:24714b45cd1b	655	for (j = 0U; j <= i; j++)
xorjoep	1:24714b45cd1b	656	{
xorjoep	1:24714b45cd1b	657	/* Check the array limitations for inputs */
xorjoep	1:24714b45cd1b	658	if ((((i - j) < srcBLen) && (j < srcALen)))
xorjoep	1:24714b45cd1b	659	{
xorjoep	1:24714b45cd1b	660	/* z[i] += x[i-j] * y[j] */
xorjoep	1:24714b45cd1b	661	sum += pIn1[j] * pIn2[i - j];
xorjoep	1:24714b45cd1b	662	}
xorjoep	1:24714b45cd1b	663	}
xorjoep	1:24714b45cd1b	664	/* Store the output in the destination buffer */
xorjoep	1:24714b45cd1b	665	pDst[i] = sum;
xorjoep	1:24714b45cd1b	666	}
xorjoep	1:24714b45cd1b	667	/* set status as ARM_SUCCESS as there are no argument errors */
xorjoep	1:24714b45cd1b	668	status = ARM_MATH_SUCCESS;
xorjoep	1:24714b45cd1b	669	}
xorjoep	1:24714b45cd1b	670	return (status);
xorjoep	1:24714b45cd1b	671
xorjoep	1:24714b45cd1b	672	#endif /* #if defined (ARM_MATH_DSP) */
xorjoep	1:24714b45cd1b	673
xorjoep	1:24714b45cd1b	674	}
xorjoep	1:24714b45cd1b	675
xorjoep	1:24714b45cd1b	676	/**
xorjoep	1:24714b45cd1b	677	* @} end of PartialConv group
xorjoep	1:24714b45cd1b	678	*/

Repository toolbox

Export to desktop IDE

Repository details

Type:	Library
Created:	20 Jun 2018
Imports:	227
Forks:	0
Commits:	4
Dependents:	10
Dependencies:	0
Followers:	6

functions/FilteringFunctions/arm_conv_partial_f32.c@3:4098b9d3d571, 2018-06-21 (annotated)

Who changed what in which revision?

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning