CMSIS_DSP_5 - The CMSIS DSP 5 library

Users » xorjoep » Code » CMSIS_DSP_5

The CMSIS DSP 5 library

Dependents: Nucleo-Heart-Rate ejercicioVrms2 PROYECTOFINAL ejercicioVrms ... more

functions/BasicMathFunctions/arm_dot_prod_q7.c@3:4098b9d3d571, 2018-06-21 (annotated)

Committer:: xorjoep
Date:: Thu Jun 21 11:56:27 2018 +0000
Revision:: 3:4098b9d3d571
Parent:: 1:24714b45cd1b

headers is a folder not a library

Who changed what in which revision?

User	Revision	Line number	New contents of line
xorjoep	1:24714b45cd1b	1	/* ----------------------------------------------------------------------
xorjoep	1:24714b45cd1b	2	* Project: CMSIS DSP Library
xorjoep	1:24714b45cd1b	3	* Title: arm_dot_prod_q7.c
xorjoep	1:24714b45cd1b	4	* Description: Q7 dot product
xorjoep	1:24714b45cd1b	5	*
xorjoep	1:24714b45cd1b	6	* $Date: 27. January 2017
xorjoep	1:24714b45cd1b	7	* $Revision: V.1.5.1
xorjoep	1:24714b45cd1b	8	*
xorjoep	1:24714b45cd1b	9	* Target Processor: Cortex-M cores
xorjoep	1:24714b45cd1b	10	* -------------------------------------------------------------------- */
xorjoep	1:24714b45cd1b	11	/*
xorjoep	1:24714b45cd1b	12	* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
xorjoep	1:24714b45cd1b	13	*
xorjoep	1:24714b45cd1b	14	* SPDX-License-Identifier: Apache-2.0
xorjoep	1:24714b45cd1b	15	*
xorjoep	1:24714b45cd1b	16	* Licensed under the Apache License, Version 2.0 (the License); you may
xorjoep	1:24714b45cd1b	17	* not use this file except in compliance with the License.
xorjoep	1:24714b45cd1b	18	* You may obtain a copy of the License at
xorjoep	1:24714b45cd1b	19	*
xorjoep	1:24714b45cd1b	20	* www.apache.org/licenses/LICENSE-2.0
xorjoep	1:24714b45cd1b	21	*
xorjoep	1:24714b45cd1b	22	* Unless required by applicable law or agreed to in writing, software
xorjoep	1:24714b45cd1b	23	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
xorjoep	1:24714b45cd1b	24	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
xorjoep	1:24714b45cd1b	25	* See the License for the specific language governing permissions and
xorjoep	1:24714b45cd1b	26	* limitations under the License.
xorjoep	1:24714b45cd1b	27	*/
xorjoep	1:24714b45cd1b	28
xorjoep	1:24714b45cd1b	29	#include "arm_math.h"
xorjoep	1:24714b45cd1b	30
xorjoep	1:24714b45cd1b	31	/**
xorjoep	1:24714b45cd1b	32	* @ingroup groupMath
xorjoep	1:24714b45cd1b	33	*/
xorjoep	1:24714b45cd1b	34
xorjoep	1:24714b45cd1b	35	/**
xorjoep	1:24714b45cd1b	36	* @addtogroup dot_prod
xorjoep	1:24714b45cd1b	37	* @{
xorjoep	1:24714b45cd1b	38	*/
xorjoep	1:24714b45cd1b	39
xorjoep	1:24714b45cd1b	40	/**
xorjoep	1:24714b45cd1b	41	* @brief Dot product of Q7 vectors.
xorjoep	1:24714b45cd1b	42	* @param[in] *pSrcA points to the first input vector
xorjoep	1:24714b45cd1b	43	* @param[in] *pSrcB points to the second input vector
xorjoep	1:24714b45cd1b	44	* @param[in] blockSize number of samples in each vector
xorjoep	1:24714b45cd1b	45	* @param[out] *result output result returned here
xorjoep	1:24714b45cd1b	46	* @return none.
xorjoep	1:24714b45cd1b	47	*
xorjoep	1:24714b45cd1b	48	* <b>Scaling and Overflow Behavior:</b>
xorjoep	1:24714b45cd1b	49	* \par
xorjoep	1:24714b45cd1b	50	* The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
xorjoep	1:24714b45cd1b	51	* results are added to an accumulator in 18.14 format.
xorjoep	1:24714b45cd1b	52	* Nonsaturating additions are used and there is no danger of wrap around as long as
xorjoep	1:24714b45cd1b	53	* the vectors are less than 2^18 elements long.
xorjoep	1:24714b45cd1b	54	* The return result is in 18.14 format.
xorjoep	1:24714b45cd1b	55	*/
xorjoep	1:24714b45cd1b	56
xorjoep	1:24714b45cd1b	57	void arm_dot_prod_q7(
xorjoep	1:24714b45cd1b	58	q7_t * pSrcA,
xorjoep	1:24714b45cd1b	59	q7_t * pSrcB,
xorjoep	1:24714b45cd1b	60	uint32_t blockSize,
xorjoep	1:24714b45cd1b	61	q31_t * result)
xorjoep	1:24714b45cd1b	62	{
xorjoep	1:24714b45cd1b	63	uint32_t blkCnt; /* loop counter */
xorjoep	1:24714b45cd1b	64
xorjoep	1:24714b45cd1b	65	q31_t sum = 0; /* Temporary variables to store output */
xorjoep	1:24714b45cd1b	66
xorjoep	1:24714b45cd1b	67	#if defined (ARM_MATH_DSP)
xorjoep	1:24714b45cd1b	68
xorjoep	1:24714b45cd1b	69	/* Run the below code for Cortex-M4 and Cortex-M3 */
xorjoep	1:24714b45cd1b	70
xorjoep	1:24714b45cd1b	71	q31_t input1, input2; /* Temporary variables to store input */
xorjoep	1:24714b45cd1b	72	q31_t inA1, inA2, inB1, inB2; /* Temporary variables to store input */
xorjoep	1:24714b45cd1b	73
xorjoep	1:24714b45cd1b	74
xorjoep	1:24714b45cd1b	75
xorjoep	1:24714b45cd1b	76	/loop Unrolling /
xorjoep	1:24714b45cd1b	77	blkCnt = blockSize >> 2U;
xorjoep	1:24714b45cd1b	78
xorjoep	1:24714b45cd1b	79	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
xorjoep	1:24714b45cd1b	80	** a second loop below computes the remaining 1 to 3 samples. */
xorjoep	1:24714b45cd1b	81	while (blkCnt > 0U)
xorjoep	1:24714b45cd1b	82	{
xorjoep	1:24714b45cd1b	83	/* read 4 samples at a time from sourceA */
xorjoep	1:24714b45cd1b	84	input1 = *__SIMD32(pSrcA)++;
xorjoep	1:24714b45cd1b	85	/* read 4 samples at a time from sourceB */
xorjoep	1:24714b45cd1b	86	input2 = *__SIMD32(pSrcB)++;
xorjoep	1:24714b45cd1b	87
xorjoep	1:24714b45cd1b	88	/* extract two q7_t samples to q15_t samples */
xorjoep	1:24714b45cd1b	89	inA1 = __SXTB16(__ROR(input1, 8));
xorjoep	1:24714b45cd1b	90	/* extract reminaing two samples */
xorjoep	1:24714b45cd1b	91	inA2 = __SXTB16(input1);
xorjoep	1:24714b45cd1b	92	/* extract two q7_t samples to q15_t samples */
xorjoep	1:24714b45cd1b	93	inB1 = __SXTB16(__ROR(input2, 8));
xorjoep	1:24714b45cd1b	94	/* extract reminaing two samples */
xorjoep	1:24714b45cd1b	95	inB2 = __SXTB16(input2);
xorjoep	1:24714b45cd1b	96
xorjoep	1:24714b45cd1b	97	/* multiply and accumulate two samples at a time */
xorjoep	1:24714b45cd1b	98	sum = __SMLAD(inA1, inB1, sum);
xorjoep	1:24714b45cd1b	99	sum = __SMLAD(inA2, inB2, sum);
xorjoep	1:24714b45cd1b	100
xorjoep	1:24714b45cd1b	101	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	102	blkCnt--;
xorjoep	1:24714b45cd1b	103	}
xorjoep	1:24714b45cd1b	104
xorjoep	1:24714b45cd1b	105	/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
xorjoep	1:24714b45cd1b	106	** No loop unrolling is used. */
xorjoep	1:24714b45cd1b	107	blkCnt = blockSize % 0x4U;
xorjoep	1:24714b45cd1b	108
xorjoep	1:24714b45cd1b	109	while (blkCnt > 0U)
xorjoep	1:24714b45cd1b	110	{
xorjoep	1:24714b45cd1b	111	/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
xorjoep	1:24714b45cd1b	112	/* Dot product and then store the results in a temporary buffer. */
xorjoep	1:24714b45cd1b	113	sum = __SMLAD(pSrcA++, pSrcB++, sum);
xorjoep	1:24714b45cd1b	114
xorjoep	1:24714b45cd1b	115	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	116	blkCnt--;
xorjoep	1:24714b45cd1b	117	}
xorjoep	1:24714b45cd1b	118
xorjoep	1:24714b45cd1b	119	#else
xorjoep	1:24714b45cd1b	120
xorjoep	1:24714b45cd1b	121	/* Run the below code for Cortex-M0 */
xorjoep	1:24714b45cd1b	122
xorjoep	1:24714b45cd1b	123
xorjoep	1:24714b45cd1b	124
xorjoep	1:24714b45cd1b	125	/* Initialize blkCnt with number of samples */
xorjoep	1:24714b45cd1b	126	blkCnt = blockSize;
xorjoep	1:24714b45cd1b	127
xorjoep	1:24714b45cd1b	128	while (blkCnt > 0U)
xorjoep	1:24714b45cd1b	129	{
xorjoep	1:24714b45cd1b	130	/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
xorjoep	1:24714b45cd1b	131	/* Dot product and then store the results in a temporary buffer. */
xorjoep	1:24714b45cd1b	132	sum += (q31_t) ((q15_t) * pSrcA++ * *pSrcB++);
xorjoep	1:24714b45cd1b	133
xorjoep	1:24714b45cd1b	134	/* Decrement the loop counter */
xorjoep	1:24714b45cd1b	135	blkCnt--;
xorjoep	1:24714b45cd1b	136	}
xorjoep	1:24714b45cd1b	137
xorjoep	1:24714b45cd1b	138	#endif /* #if defined (ARM_MATH_DSP) */
xorjoep	1:24714b45cd1b	139
xorjoep	1:24714b45cd1b	140
xorjoep	1:24714b45cd1b	141	/* Store the result in the destination buffer in 18.14 format */
xorjoep	1:24714b45cd1b	142	*result = sum;
xorjoep	1:24714b45cd1b	143	}
xorjoep	1:24714b45cd1b	144
xorjoep	1:24714b45cd1b	145	/**
xorjoep	1:24714b45cd1b	146	* @} end of dot_prod group
xorjoep	1:24714b45cd1b	147	*/

Repository toolbox

Export to desktop IDE

Repository details

Type:	Library
Created:	20 Jun 2018
Imports:	227
Forks:	0
Commits:	4
Dependents:	10
Dependencies:	0
Followers:	6

functions/BasicMathFunctions/arm_dot_prod_q7.c@3:4098b9d3d571, 2018-06-21 (annotated)

Who changed what in which revision?

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning