dsp | Mbed

Users » nprobably » Code » dsp

src/Cortex-M4-M3/FilteringFunctions/arm_fir_q15.c@3:ad02f4ea1fbe, 2017-06-29 (annotated)

Committer:: nprobably
Date:: Thu Jun 29 06:56:55 2017 +0000
Revision:: 3:ad02f4ea1fbe
Parent:: 0:1014af42efd9

A hack to make it compile with mbed 5

Who changed what in which revision?

User	Revision	Line number	New contents of line
simon	0:1014af42efd9	1	/* ----------------------------------------------------------------------
simon	0:1014af42efd9	2	* Copyright (C) 2010 ARM Limited. All rights reserved.
simon	0:1014af42efd9	3	*
simon	0:1014af42efd9	4	* $Date: 29. November 2010
simon	0:1014af42efd9	5	* $Revision: V1.0.3
simon	0:1014af42efd9	6	*
simon	0:1014af42efd9	7	* Project: CMSIS DSP Library
simon	0:1014af42efd9	8	* Title: arm_fir_q15.c
simon	0:1014af42efd9	9	*
simon	0:1014af42efd9	10	* Description: Q15 FIR filter processing function.
simon	0:1014af42efd9	11	*
simon	0:1014af42efd9	12	* Target Processor: Cortex-M4/Cortex-M3
simon	0:1014af42efd9	13	*
simon	0:1014af42efd9	14	* Version 1.0.3 2010/11/29
simon	0:1014af42efd9	15	* Re-organized the CMSIS folders and updated documentation.
simon	0:1014af42efd9	16	*
simon	0:1014af42efd9	17	* Version 1.0.2 2010/11/11
simon	0:1014af42efd9	18	* Documentation updated.
simon	0:1014af42efd9	19	*
simon	0:1014af42efd9	20	* Version 1.0.1 2010/10/05
simon	0:1014af42efd9	21	* Production release and review comments incorporated.
simon	0:1014af42efd9	22	*
simon	0:1014af42efd9	23	* Version 1.0.0 2010/09/20
simon	0:1014af42efd9	24	* Production release and review comments incorporated.
simon	0:1014af42efd9	25	*
simon	0:1014af42efd9	26	* Version 0.0.5 2010/04/26
simon	0:1014af42efd9	27	* incorporated review comments and updated with latest CMSIS layer
simon	0:1014af42efd9	28	*
simon	0:1014af42efd9	29	* Version 0.0.3 2010/03/10
simon	0:1014af42efd9	30	* Initial version
simon	0:1014af42efd9	31	* -------------------------------------------------------------------- */
simon	0:1014af42efd9	32
simon	0:1014af42efd9	33	#include "arm_math.h"
simon	0:1014af42efd9	34
simon	0:1014af42efd9	35	/**
simon	0:1014af42efd9	36	* @ingroup groupFilters
simon	0:1014af42efd9	37	*/
simon	0:1014af42efd9	38
simon	0:1014af42efd9	39	/**
simon	0:1014af42efd9	40	* @addtogroup FIR
simon	0:1014af42efd9	41	* @{
simon	0:1014af42efd9	42	*/
simon	0:1014af42efd9	43
simon	0:1014af42efd9	44	/**
simon	0:1014af42efd9	45	* @brief Processing function for the Q15 FIR filter.
simon	0:1014af42efd9	46	* @param[in] *S points to an instance of the Q15 FIR structure.
simon	0:1014af42efd9	47	* @param[in] *pSrc points to the block of input data.
simon	0:1014af42efd9	48	* @param[out] *pDst points to the block of output data.
simon	0:1014af42efd9	49	* @param[in] blockSize number of samples to process per call.
simon	0:1014af42efd9	50	* @return none.
simon	0:1014af42efd9	51	*
simon	0:1014af42efd9	52	* <b>Scaling and Overflow Behavior:</b>
simon	0:1014af42efd9	53	* \par
simon	0:1014af42efd9	54	* The function is implemented using a 64-bit internal accumulator.
simon	0:1014af42efd9	55	* Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
simon	0:1014af42efd9	56	* The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
simon	0:1014af42efd9	57	* There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
simon	0:1014af42efd9	58	* After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
simon	0:1014af42efd9	59	* Lastly, the accumulator is saturated to yield a result in 1.15 format.
simon	0:1014af42efd9	60	*
simon	0:1014af42efd9	61	* \par
simon	0:1014af42efd9	62	* Refer to the function <code>arm_fir_fast_q15()</code> for a faster but less precise implementation of this function.
simon	0:1014af42efd9	63	*/
simon	0:1014af42efd9	64
simon	0:1014af42efd9	65	void arm_fir_q15(
simon	0:1014af42efd9	66	const arm_fir_instance_q15 * S,
simon	0:1014af42efd9	67	q15_t * pSrc,
simon	0:1014af42efd9	68	q15_t * pDst,
simon	0:1014af42efd9	69	uint32_t blockSize)
simon	0:1014af42efd9	70	{
simon	0:1014af42efd9	71	q15_t pState = S->pState; / State pointer */
simon	0:1014af42efd9	72	q15_t pCoeffs = S->pCoeffs; / Coefficient pointer */
simon	0:1014af42efd9	73	q15_t pStateCurnt; / Points to the current sample of the state */
simon	0:1014af42efd9	74	q15_t px1; / Temporary q15 pointer for state buffer */
simon	0:1014af42efd9	75	q31_t pb; / Temporary pointer for coefficient buffer */
simon	0:1014af42efd9	76	q31_t px2; / Temporary q31 pointer for SIMD state buffer accesses */
simon	0:1014af42efd9	77	q31_t x0, x1, x2, x3, c0; /* Temporary variables to hold SIMD state and coefficient values */
simon	0:1014af42efd9	78	q63_t acc0, acc1, acc2, acc3; /* Accumulators */
simon	0:1014af42efd9	79	uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
simon	0:1014af42efd9	80	uint32_t tapCnt, blkCnt; /* Loop counters */
simon	0:1014af42efd9	81
simon	0:1014af42efd9	82	/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
simon	0:1014af42efd9	83	/* pStateCurnt points to the location where the new input data should be written */
simon	0:1014af42efd9	84	pStateCurnt = &(S->pState[(numTaps - 1u)]);
simon	0:1014af42efd9	85
simon	0:1014af42efd9	86	/* Apply loop unrolling and compute 4 output values simultaneously.
simon	0:1014af42efd9	87	* The variables acc0 ... acc3 hold output values that are being computed:
simon	0:1014af42efd9	88	*
simon	0:1014af42efd9	89	* acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
simon	0:1014af42efd9	90	* acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
simon	0:1014af42efd9	91	* acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
simon	0:1014af42efd9	92	* acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
simon	0:1014af42efd9	93	*/
simon	0:1014af42efd9	94	blkCnt = blockSize >> 2;
simon	0:1014af42efd9	95
simon	0:1014af42efd9	96	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
simon	0:1014af42efd9	97	** a second loop below computes the remaining 1 to 3 samples. */
simon	0:1014af42efd9	98	while(blkCnt > 0u)
simon	0:1014af42efd9	99	{
simon	0:1014af42efd9	100	/* Copy four new input samples into the state buffer.
simon	0:1014af42efd9	101	** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
simon	0:1014af42efd9	102	__SIMD32(pStateCurnt)++ = __SIMD32(pSrc)++;
simon	0:1014af42efd9	103	__SIMD32(pStateCurnt)++ = __SIMD32(pSrc)++;
simon	0:1014af42efd9	104
simon	0:1014af42efd9	105	/* Set all accumulators to zero */
simon	0:1014af42efd9	106	acc0 = 0;
simon	0:1014af42efd9	107	acc1 = 0;
simon	0:1014af42efd9	108	acc2 = 0;
simon	0:1014af42efd9	109	acc3 = 0;
simon	0:1014af42efd9	110
simon	0:1014af42efd9	111	/* Initialize state pointer of type q15 */
simon	0:1014af42efd9	112	px1 = pState;
simon	0:1014af42efd9	113
simon	0:1014af42efd9	114	/* Initialize coeff pointer of type q31 */
simon	0:1014af42efd9	115	pb = (q31_t *) (pCoeffs);
simon	0:1014af42efd9	116
simon	0:1014af42efd9	117	/* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
simon	0:1014af42efd9	118	x0 = (q31_t ) (px1++);
simon	0:1014af42efd9	119
simon	0:1014af42efd9	120	/* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */
simon	0:1014af42efd9	121	x1 = (q31_t ) (px1++);
simon	0:1014af42efd9	122
simon	0:1014af42efd9	123	/* Loop over the number of taps. Unroll by a factor of 4.
simon	0:1014af42efd9	124	** Repeat until we've computed numTaps-4 coefficients. */
simon	0:1014af42efd9	125	tapCnt = numTaps >> 2;
simon	0:1014af42efd9	126	do
simon	0:1014af42efd9	127	{
simon	0:1014af42efd9	128	/* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
simon	0:1014af42efd9	129	c0 = *(pb++);
simon	0:1014af42efd9	130
simon	0:1014af42efd9	131	/* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
simon	0:1014af42efd9	132	acc0 = __SMLALD(x0, c0, acc0);
simon	0:1014af42efd9	133
simon	0:1014af42efd9	134	/* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
simon	0:1014af42efd9	135	acc1 = __SMLALD(x1, c0, acc1);
simon	0:1014af42efd9	136
simon	0:1014af42efd9	137	/* Read state x[n-N-2], x[n-N-3] */
simon	0:1014af42efd9	138	x2 = (q31_t ) (px1++);
simon	0:1014af42efd9	139
simon	0:1014af42efd9	140	/* Read state x[n-N-3], x[n-N-4] */
simon	0:1014af42efd9	141	x3 = (q31_t ) (px1++);
simon	0:1014af42efd9	142
simon	0:1014af42efd9	143	/* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
simon	0:1014af42efd9	144	acc2 = __SMLALD(x2, c0, acc2);
simon	0:1014af42efd9	145
simon	0:1014af42efd9	146	/* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
simon	0:1014af42efd9	147	acc3 = __SMLALD(x3, c0, acc3);
simon	0:1014af42efd9	148
simon	0:1014af42efd9	149	/* Read coefficients b[N-2], b[N-3] */
simon	0:1014af42efd9	150	c0 = *(pb++);
simon	0:1014af42efd9	151
simon	0:1014af42efd9	152	/* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
simon	0:1014af42efd9	153	acc0 = __SMLALD(x2, c0, acc0);
simon	0:1014af42efd9	154
simon	0:1014af42efd9	155	/* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
simon	0:1014af42efd9	156	acc1 = __SMLALD(x3, c0, acc1);
simon	0:1014af42efd9	157
simon	0:1014af42efd9	158	/* Read state x[n-N-4], x[n-N-5] */
simon	0:1014af42efd9	159	x0 = (q31_t ) (px1++);
simon	0:1014af42efd9	160
simon	0:1014af42efd9	161	/* Read state x[n-N-5], x[n-N-6] */
simon	0:1014af42efd9	162	x1 = (q31_t ) (px1++);
simon	0:1014af42efd9	163
simon	0:1014af42efd9	164	/* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
simon	0:1014af42efd9	165	acc2 = __SMLALD(x0, c0, acc2);
simon	0:1014af42efd9	166
simon	0:1014af42efd9	167	/* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
simon	0:1014af42efd9	168	acc3 = __SMLALD(x1, c0, acc3);
simon	0:1014af42efd9	169	tapCnt--;
simon	0:1014af42efd9	170
simon	0:1014af42efd9	171	}
simon	0:1014af42efd9	172	while(tapCnt > 0u);
simon	0:1014af42efd9	173
simon	0:1014af42efd9	174	/* If the filter length is not a multiple of 4, compute the remaining filter taps.
simon	0:1014af42efd9	175	** This is always be 2 taps since the filter length is even. */
simon	0:1014af42efd9	176	if((numTaps & 0x3u) != 0u)
simon	0:1014af42efd9	177	{
simon	0:1014af42efd9	178	/* Read 2 coefficients */
simon	0:1014af42efd9	179	c0 = *(pb++);
simon	0:1014af42efd9	180	/* Fetch 4 state variables */
simon	0:1014af42efd9	181	x2 = (q31_t ) (px1++);
simon	0:1014af42efd9	182	x3 = (q31_t ) (px1++);
simon	0:1014af42efd9	183
simon	0:1014af42efd9	184	/* Perform the multiply-accumulates */
simon	0:1014af42efd9	185	acc0 = __SMLALD(x0, c0, acc0);
simon	0:1014af42efd9	186	acc1 = __SMLALD(x1, c0, acc1);
simon	0:1014af42efd9	187	acc2 = __SMLALD(x2, c0, acc2);
simon	0:1014af42efd9	188	acc3 = __SMLALD(x3, c0, acc3);
simon	0:1014af42efd9	189	}
simon	0:1014af42efd9	190
simon	0:1014af42efd9	191	/* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
simon	0:1014af42efd9	192	** Then store the 4 outputs in the destination buffer. */
simon	0:1014af42efd9	193	*__SIMD32(pDst)++ =
simon	0:1014af42efd9	194	__PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
simon	0:1014af42efd9	195	*__SIMD32(pDst)++ =
simon	0:1014af42efd9	196	__PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
simon	0:1014af42efd9	197
simon	0:1014af42efd9	198
simon	0:1014af42efd9	199	/* Advance the state pointer by 4 to process the next group of 4 samples */
simon	0:1014af42efd9	200	pState = pState + 4;
simon	0:1014af42efd9	201
simon	0:1014af42efd9	202	/* Decrement the loop counter */
simon	0:1014af42efd9	203	blkCnt--;
simon	0:1014af42efd9	204	}
simon	0:1014af42efd9	205
simon	0:1014af42efd9	206	/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
simon	0:1014af42efd9	207	** No loop unrolling is used. */
simon	0:1014af42efd9	208	blkCnt = blockSize % 0x4u;
simon	0:1014af42efd9	209	while(blkCnt > 0u)
simon	0:1014af42efd9	210	{
simon	0:1014af42efd9	211	/* Copy two samples into state buffer */
simon	0:1014af42efd9	212	pStateCurnt++ = pSrc++;
simon	0:1014af42efd9	213
simon	0:1014af42efd9	214	/* Set the accumulator to zero */
simon	0:1014af42efd9	215	acc0 = 0;
simon	0:1014af42efd9	216
simon	0:1014af42efd9	217	/* Use SIMD to hold states and coefficients */
simon	0:1014af42efd9	218	px2 = (q31_t *) pState;
simon	0:1014af42efd9	219	pb = (q31_t *) (pCoeffs);
simon	0:1014af42efd9	220	tapCnt = numTaps >> 1;
simon	0:1014af42efd9	221
simon	0:1014af42efd9	222	do
simon	0:1014af42efd9	223	{
simon	0:1014af42efd9	224	acc0 = __SMLALD(px2++, (pb++), acc0);
simon	0:1014af42efd9	225	tapCnt--;
simon	0:1014af42efd9	226	}
simon	0:1014af42efd9	227	while(tapCnt > 0u);
simon	0:1014af42efd9	228
simon	0:1014af42efd9	229	/* The result is in 2.30 format. Convert to 1.15 with saturation.
simon	0:1014af42efd9	230	** Then store the output in the destination buffer. */
simon	0:1014af42efd9	231	*pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
simon	0:1014af42efd9	232
simon	0:1014af42efd9	233	/* Advance state pointer by 1 for the next sample */
simon	0:1014af42efd9	234	pState = pState + 1;
simon	0:1014af42efd9	235
simon	0:1014af42efd9	236	/* Decrement the loop counter */
simon	0:1014af42efd9	237	blkCnt--;
simon	0:1014af42efd9	238	}
simon	0:1014af42efd9	239
simon	0:1014af42efd9	240	/* Processing is complete.
simon	0:1014af42efd9	241	** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
simon	0:1014af42efd9	242	** This prepares the state buffer for the next function call. */
simon	0:1014af42efd9	243
simon	0:1014af42efd9	244	/* Points to the start of the state buffer */
simon	0:1014af42efd9	245	pStateCurnt = S->pState;
simon	0:1014af42efd9	246
simon	0:1014af42efd9	247	/* Calculation of count for copying integer writes */
simon	0:1014af42efd9	248	tapCnt = (numTaps - 1u) >> 2;
simon	0:1014af42efd9	249
simon	0:1014af42efd9	250	while(tapCnt > 0u)
simon	0:1014af42efd9	251	{
simon	0:1014af42efd9	252	__SIMD32(pStateCurnt)++ = __SIMD32(pState)++;
simon	0:1014af42efd9	253	__SIMD32(pStateCurnt)++ = __SIMD32(pState)++;
simon	0:1014af42efd9	254
simon	0:1014af42efd9	255	tapCnt--;
simon	0:1014af42efd9	256
simon	0:1014af42efd9	257	}
simon	0:1014af42efd9	258
simon	0:1014af42efd9	259	/* Calculation of count for remaining q15_t data */
simon	0:1014af42efd9	260	tapCnt = (numTaps - 1u) % 0x4u;
simon	0:1014af42efd9	261
simon	0:1014af42efd9	262	/* copy remaining data */
simon	0:1014af42efd9	263	while(tapCnt > 0u)
simon	0:1014af42efd9	264	{
simon	0:1014af42efd9	265	pStateCurnt++ = pState++;
simon	0:1014af42efd9	266
simon	0:1014af42efd9	267	/* Decrement the loop counter */
simon	0:1014af42efd9	268	tapCnt--;
simon	0:1014af42efd9	269	}
simon	0:1014af42efd9	270	}
simon	0:1014af42efd9	271
simon	0:1014af42efd9	272	/**
simon	0:1014af42efd9	273	* @} end of FIR group
simon	0:1014af42efd9	274	*/

Repository toolbox

Repository details

Type:	Library
Created:	29 Jun 2017
Imports:	0
Forks:	0
Commits:	4
Dependents:	0
Dependencies:	0
Followers:	1

Important changes to repositories hosted on mbed.com

src/Cortex-M4-M3/FilteringFunctions/arm_fir_q15.c@3:ad02f4ea1fbe, 2017-06-29 (annotated)

Who changed what in which revision?

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

src/Cortex-M4-M3/FilteringFunctions/arm_fir_q15.c@3:ad02f4ea1fbe, 2017-06-29 (annotated)

Who changed what in which revision?

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning