V4.0.1 of the ARM CMSIS DSP libraries. Note that arm_bitreversal2.s, arm_cfft_f32.c and arm_rfft_fast_f32.c had to be removed. arm_bitreversal2.s will not assemble with the online tools. So, the fast f32 FFT functions are not yet available. All the other FFT functions are available.

Dependents:   MPU9150_Example fir_f32 fir_f32 MPU9150_nucleo_noni2cdev ... more

Committer:
emh203
Date:
Mon Jul 28 15:03:15 2014 +0000
Revision:
0:3d9c67d97d6f
1st working commit.   Had to remove arm_bitreversal2.s     arm_cfft_f32.c and arm_rfft_fast_f32.c.    The .s will not assemble.      For now I removed these functions so we could at least have a library for the other functions.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
emh203 0:3d9c67d97d6f 1 /* ----------------------------------------------------------------------
emh203 0:3d9c67d97d6f 2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
emh203 0:3d9c67d97d6f 3 *
emh203 0:3d9c67d97d6f 4 * $Date: 12. March 2014
emh203 0:3d9c67d97d6f 5 * $Revision: V1.4.3
emh203 0:3d9c67d97d6f 6 *
emh203 0:3d9c67d97d6f 7 * Project: CMSIS DSP Library
emh203 0:3d9c67d97d6f 8 * Title: arm_fir_q15.c
emh203 0:3d9c67d97d6f 9 *
emh203 0:3d9c67d97d6f 10 * Description: Q15 FIR filter processing function.
emh203 0:3d9c67d97d6f 11 *
emh203 0:3d9c67d97d6f 12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
emh203 0:3d9c67d97d6f 13 *
emh203 0:3d9c67d97d6f 14 * Redistribution and use in source and binary forms, with or without
emh203 0:3d9c67d97d6f 15 * modification, are permitted provided that the following conditions
emh203 0:3d9c67d97d6f 16 * are met:
emh203 0:3d9c67d97d6f 17 * - Redistributions of source code must retain the above copyright
emh203 0:3d9c67d97d6f 18 * notice, this list of conditions and the following disclaimer.
emh203 0:3d9c67d97d6f 19 * - Redistributions in binary form must reproduce the above copyright
emh203 0:3d9c67d97d6f 20 * notice, this list of conditions and the following disclaimer in
emh203 0:3d9c67d97d6f 21 * the documentation and/or other materials provided with the
emh203 0:3d9c67d97d6f 22 * distribution.
emh203 0:3d9c67d97d6f 23 * - Neither the name of ARM LIMITED nor the names of its contributors
emh203 0:3d9c67d97d6f 24 * may be used to endorse or promote products derived from this
emh203 0:3d9c67d97d6f 25 * software without specific prior written permission.
emh203 0:3d9c67d97d6f 26 *
emh203 0:3d9c67d97d6f 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
emh203 0:3d9c67d97d6f 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
emh203 0:3d9c67d97d6f 29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
emh203 0:3d9c67d97d6f 30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
emh203 0:3d9c67d97d6f 31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
emh203 0:3d9c67d97d6f 32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
emh203 0:3d9c67d97d6f 33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
emh203 0:3d9c67d97d6f 34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
emh203 0:3d9c67d97d6f 35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
emh203 0:3d9c67d97d6f 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
emh203 0:3d9c67d97d6f 37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
emh203 0:3d9c67d97d6f 38 * POSSIBILITY OF SUCH DAMAGE.
emh203 0:3d9c67d97d6f 39 * -------------------------------------------------------------------- */
emh203 0:3d9c67d97d6f 40
emh203 0:3d9c67d97d6f 41 #include "arm_math.h"
emh203 0:3d9c67d97d6f 42
emh203 0:3d9c67d97d6f 43 /**
emh203 0:3d9c67d97d6f 44 * @ingroup groupFilters
emh203 0:3d9c67d97d6f 45 */
emh203 0:3d9c67d97d6f 46
emh203 0:3d9c67d97d6f 47 /**
emh203 0:3d9c67d97d6f 48 * @addtogroup FIR
emh203 0:3d9c67d97d6f 49 * @{
emh203 0:3d9c67d97d6f 50 */
emh203 0:3d9c67d97d6f 51
emh203 0:3d9c67d97d6f 52 /**
emh203 0:3d9c67d97d6f 53 * @brief Processing function for the Q15 FIR filter.
emh203 0:3d9c67d97d6f 54 * @param[in] *S points to an instance of the Q15 FIR structure.
emh203 0:3d9c67d97d6f 55 * @param[in] *pSrc points to the block of input data.
emh203 0:3d9c67d97d6f 56 * @param[out] *pDst points to the block of output data.
emh203 0:3d9c67d97d6f 57 * @param[in] blockSize number of samples to process per call.
emh203 0:3d9c67d97d6f 58 * @return none.
emh203 0:3d9c67d97d6f 59 *
emh203 0:3d9c67d97d6f 60 *
emh203 0:3d9c67d97d6f 61 * \par Restrictions
emh203 0:3d9c67d97d6f 62 * If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE
emh203 0:3d9c67d97d6f 63 * In this case input, output, state buffers should be aligned by 32-bit
emh203 0:3d9c67d97d6f 64 *
emh203 0:3d9c67d97d6f 65 * <b>Scaling and Overflow Behavior:</b>
emh203 0:3d9c67d97d6f 66 * \par
emh203 0:3d9c67d97d6f 67 * The function is implemented using a 64-bit internal accumulator.
emh203 0:3d9c67d97d6f 68 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
emh203 0:3d9c67d97d6f 69 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
emh203 0:3d9c67d97d6f 70 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
emh203 0:3d9c67d97d6f 71 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
emh203 0:3d9c67d97d6f 72 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
emh203 0:3d9c67d97d6f 73 *
emh203 0:3d9c67d97d6f 74 * \par
emh203 0:3d9c67d97d6f 75 * Refer to the function <code>arm_fir_fast_q15()</code> for a faster but less precise implementation of this function.
emh203 0:3d9c67d97d6f 76 */
emh203 0:3d9c67d97d6f 77
emh203 0:3d9c67d97d6f 78 #ifndef ARM_MATH_CM0_FAMILY
emh203 0:3d9c67d97d6f 79
emh203 0:3d9c67d97d6f 80 /* Run the below code for Cortex-M4 and Cortex-M3 */
emh203 0:3d9c67d97d6f 81
emh203 0:3d9c67d97d6f 82 #ifndef UNALIGNED_SUPPORT_DISABLE
emh203 0:3d9c67d97d6f 83
emh203 0:3d9c67d97d6f 84
emh203 0:3d9c67d97d6f 85 void arm_fir_q15(
emh203 0:3d9c67d97d6f 86 const arm_fir_instance_q15 * S,
emh203 0:3d9c67d97d6f 87 q15_t * pSrc,
emh203 0:3d9c67d97d6f 88 q15_t * pDst,
emh203 0:3d9c67d97d6f 89 uint32_t blockSize)
emh203 0:3d9c67d97d6f 90 {
emh203 0:3d9c67d97d6f 91 q15_t *pState = S->pState; /* State pointer */
emh203 0:3d9c67d97d6f 92 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emh203 0:3d9c67d97d6f 93 q15_t *pStateCurnt; /* Points to the current sample of the state */
emh203 0:3d9c67d97d6f 94 q15_t *px1; /* Temporary q15 pointer for state buffer */
emh203 0:3d9c67d97d6f 95 q15_t *pb; /* Temporary pointer for coefficient buffer */
emh203 0:3d9c67d97d6f 96 q31_t x0, x1, x2, x3, c0; /* Temporary variables to hold SIMD state and coefficient values */
emh203 0:3d9c67d97d6f 97 q63_t acc0, acc1, acc2, acc3; /* Accumulators */
emh203 0:3d9c67d97d6f 98 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
emh203 0:3d9c67d97d6f 99 uint32_t tapCnt, blkCnt; /* Loop counters */
emh203 0:3d9c67d97d6f 100
emh203 0:3d9c67d97d6f 101
emh203 0:3d9c67d97d6f 102 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
emh203 0:3d9c67d97d6f 103 /* pStateCurnt points to the location where the new input data should be written */
emh203 0:3d9c67d97d6f 104 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emh203 0:3d9c67d97d6f 105
emh203 0:3d9c67d97d6f 106 /* Apply loop unrolling and compute 4 output values simultaneously.
emh203 0:3d9c67d97d6f 107 * The variables acc0 ... acc3 hold output values that are being computed:
emh203 0:3d9c67d97d6f 108 *
emh203 0:3d9c67d97d6f 109 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
emh203 0:3d9c67d97d6f 110 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
emh203 0:3d9c67d97d6f 111 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
emh203 0:3d9c67d97d6f 112 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
emh203 0:3d9c67d97d6f 113 */
emh203 0:3d9c67d97d6f 114
emh203 0:3d9c67d97d6f 115 blkCnt = blockSize >> 2;
emh203 0:3d9c67d97d6f 116
emh203 0:3d9c67d97d6f 117 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
emh203 0:3d9c67d97d6f 118 ** a second loop below computes the remaining 1 to 3 samples. */
emh203 0:3d9c67d97d6f 119 while(blkCnt > 0u)
emh203 0:3d9c67d97d6f 120 {
emh203 0:3d9c67d97d6f 121 /* Copy four new input samples into the state buffer.
emh203 0:3d9c67d97d6f 122 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
emh203 0:3d9c67d97d6f 123 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
emh203 0:3d9c67d97d6f 124 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
emh203 0:3d9c67d97d6f 125
emh203 0:3d9c67d97d6f 126 /* Set all accumulators to zero */
emh203 0:3d9c67d97d6f 127 acc0 = 0;
emh203 0:3d9c67d97d6f 128 acc1 = 0;
emh203 0:3d9c67d97d6f 129 acc2 = 0;
emh203 0:3d9c67d97d6f 130 acc3 = 0;
emh203 0:3d9c67d97d6f 131
emh203 0:3d9c67d97d6f 132 /* Initialize state pointer of type q15 */
emh203 0:3d9c67d97d6f 133 px1 = pState;
emh203 0:3d9c67d97d6f 134
emh203 0:3d9c67d97d6f 135 /* Initialize coeff pointer of type q31 */
emh203 0:3d9c67d97d6f 136 pb = pCoeffs;
emh203 0:3d9c67d97d6f 137
emh203 0:3d9c67d97d6f 138 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
emh203 0:3d9c67d97d6f 139 x0 = _SIMD32_OFFSET(px1);
emh203 0:3d9c67d97d6f 140
emh203 0:3d9c67d97d6f 141 /* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */
emh203 0:3d9c67d97d6f 142 x1 = _SIMD32_OFFSET(px1 + 1u);
emh203 0:3d9c67d97d6f 143
emh203 0:3d9c67d97d6f 144 px1 += 2u;
emh203 0:3d9c67d97d6f 145
emh203 0:3d9c67d97d6f 146 /* Loop over the number of taps. Unroll by a factor of 4.
emh203 0:3d9c67d97d6f 147 ** Repeat until we've computed numTaps-4 coefficients. */
emh203 0:3d9c67d97d6f 148 tapCnt = numTaps >> 2;
emh203 0:3d9c67d97d6f 149
emh203 0:3d9c67d97d6f 150 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 151 {
emh203 0:3d9c67d97d6f 152 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
emh203 0:3d9c67d97d6f 153 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 154
emh203 0:3d9c67d97d6f 155 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
emh203 0:3d9c67d97d6f 156 acc0 = __SMLALD(x0, c0, acc0);
emh203 0:3d9c67d97d6f 157
emh203 0:3d9c67d97d6f 158 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
emh203 0:3d9c67d97d6f 159 acc1 = __SMLALD(x1, c0, acc1);
emh203 0:3d9c67d97d6f 160
emh203 0:3d9c67d97d6f 161 /* Read state x[n-N-2], x[n-N-3] */
emh203 0:3d9c67d97d6f 162 x2 = _SIMD32_OFFSET(px1);
emh203 0:3d9c67d97d6f 163
emh203 0:3d9c67d97d6f 164 /* Read state x[n-N-3], x[n-N-4] */
emh203 0:3d9c67d97d6f 165 x3 = _SIMD32_OFFSET(px1 + 1u);
emh203 0:3d9c67d97d6f 166
emh203 0:3d9c67d97d6f 167 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
emh203 0:3d9c67d97d6f 168 acc2 = __SMLALD(x2, c0, acc2);
emh203 0:3d9c67d97d6f 169
emh203 0:3d9c67d97d6f 170 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
emh203 0:3d9c67d97d6f 171 acc3 = __SMLALD(x3, c0, acc3);
emh203 0:3d9c67d97d6f 172
emh203 0:3d9c67d97d6f 173 /* Read coefficients b[N-2], b[N-3] */
emh203 0:3d9c67d97d6f 174 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 175
emh203 0:3d9c67d97d6f 176 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
emh203 0:3d9c67d97d6f 177 acc0 = __SMLALD(x2, c0, acc0);
emh203 0:3d9c67d97d6f 178
emh203 0:3d9c67d97d6f 179 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
emh203 0:3d9c67d97d6f 180 acc1 = __SMLALD(x3, c0, acc1);
emh203 0:3d9c67d97d6f 181
emh203 0:3d9c67d97d6f 182 /* Read state x[n-N-4], x[n-N-5] */
emh203 0:3d9c67d97d6f 183 x0 = _SIMD32_OFFSET(px1 + 2u);
emh203 0:3d9c67d97d6f 184
emh203 0:3d9c67d97d6f 185 /* Read state x[n-N-5], x[n-N-6] */
emh203 0:3d9c67d97d6f 186 x1 = _SIMD32_OFFSET(px1 + 3u);
emh203 0:3d9c67d97d6f 187
emh203 0:3d9c67d97d6f 188 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
emh203 0:3d9c67d97d6f 189 acc2 = __SMLALD(x0, c0, acc2);
emh203 0:3d9c67d97d6f 190
emh203 0:3d9c67d97d6f 191 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
emh203 0:3d9c67d97d6f 192 acc3 = __SMLALD(x1, c0, acc3);
emh203 0:3d9c67d97d6f 193
emh203 0:3d9c67d97d6f 194 px1 += 4u;
emh203 0:3d9c67d97d6f 195
emh203 0:3d9c67d97d6f 196 tapCnt--;
emh203 0:3d9c67d97d6f 197
emh203 0:3d9c67d97d6f 198 }
emh203 0:3d9c67d97d6f 199
emh203 0:3d9c67d97d6f 200
emh203 0:3d9c67d97d6f 201 /* If the filter length is not a multiple of 4, compute the remaining filter taps.
emh203 0:3d9c67d97d6f 202 ** This is always be 2 taps since the filter length is even. */
emh203 0:3d9c67d97d6f 203 if((numTaps & 0x3u) != 0u)
emh203 0:3d9c67d97d6f 204 {
emh203 0:3d9c67d97d6f 205 /* Read 2 coefficients */
emh203 0:3d9c67d97d6f 206 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 207
emh203 0:3d9c67d97d6f 208 /* Fetch 4 state variables */
emh203 0:3d9c67d97d6f 209 x2 = _SIMD32_OFFSET(px1);
emh203 0:3d9c67d97d6f 210
emh203 0:3d9c67d97d6f 211 x3 = _SIMD32_OFFSET(px1 + 1u);
emh203 0:3d9c67d97d6f 212
emh203 0:3d9c67d97d6f 213 /* Perform the multiply-accumulates */
emh203 0:3d9c67d97d6f 214 acc0 = __SMLALD(x0, c0, acc0);
emh203 0:3d9c67d97d6f 215
emh203 0:3d9c67d97d6f 216 px1 += 2u;
emh203 0:3d9c67d97d6f 217
emh203 0:3d9c67d97d6f 218 acc1 = __SMLALD(x1, c0, acc1);
emh203 0:3d9c67d97d6f 219 acc2 = __SMLALD(x2, c0, acc2);
emh203 0:3d9c67d97d6f 220 acc3 = __SMLALD(x3, c0, acc3);
emh203 0:3d9c67d97d6f 221 }
emh203 0:3d9c67d97d6f 222
emh203 0:3d9c67d97d6f 223 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
emh203 0:3d9c67d97d6f 224 ** Then store the 4 outputs in the destination buffer. */
emh203 0:3d9c67d97d6f 225
emh203 0:3d9c67d97d6f 226 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 227
emh203 0:3d9c67d97d6f 228 *__SIMD32(pDst)++ =
emh203 0:3d9c67d97d6f 229 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
emh203 0:3d9c67d97d6f 230 *__SIMD32(pDst)++ =
emh203 0:3d9c67d97d6f 231 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
emh203 0:3d9c67d97d6f 232
emh203 0:3d9c67d97d6f 233 #else
emh203 0:3d9c67d97d6f 234
emh203 0:3d9c67d97d6f 235 *__SIMD32(pDst)++ =
emh203 0:3d9c67d97d6f 236 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
emh203 0:3d9c67d97d6f 237 *__SIMD32(pDst)++ =
emh203 0:3d9c67d97d6f 238 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
emh203 0:3d9c67d97d6f 239
emh203 0:3d9c67d97d6f 240 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emh203 0:3d9c67d97d6f 241
emh203 0:3d9c67d97d6f 242
emh203 0:3d9c67d97d6f 243
emh203 0:3d9c67d97d6f 244 /* Advance the state pointer by 4 to process the next group of 4 samples */
emh203 0:3d9c67d97d6f 245 pState = pState + 4;
emh203 0:3d9c67d97d6f 246
emh203 0:3d9c67d97d6f 247 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 248 blkCnt--;
emh203 0:3d9c67d97d6f 249 }
emh203 0:3d9c67d97d6f 250
emh203 0:3d9c67d97d6f 251 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
emh203 0:3d9c67d97d6f 252 ** No loop unrolling is used. */
emh203 0:3d9c67d97d6f 253 blkCnt = blockSize % 0x4u;
emh203 0:3d9c67d97d6f 254 while(blkCnt > 0u)
emh203 0:3d9c67d97d6f 255 {
emh203 0:3d9c67d97d6f 256 /* Copy two samples into state buffer */
emh203 0:3d9c67d97d6f 257 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 258
emh203 0:3d9c67d97d6f 259 /* Set the accumulator to zero */
emh203 0:3d9c67d97d6f 260 acc0 = 0;
emh203 0:3d9c67d97d6f 261
emh203 0:3d9c67d97d6f 262 /* Initialize state pointer of type q15 */
emh203 0:3d9c67d97d6f 263 px1 = pState;
emh203 0:3d9c67d97d6f 264
emh203 0:3d9c67d97d6f 265 /* Initialize coeff pointer of type q31 */
emh203 0:3d9c67d97d6f 266 pb = pCoeffs;
emh203 0:3d9c67d97d6f 267
emh203 0:3d9c67d97d6f 268 tapCnt = numTaps >> 1;
emh203 0:3d9c67d97d6f 269
emh203 0:3d9c67d97d6f 270 do
emh203 0:3d9c67d97d6f 271 {
emh203 0:3d9c67d97d6f 272
emh203 0:3d9c67d97d6f 273 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 274 x0 = *__SIMD32(px1)++;
emh203 0:3d9c67d97d6f 275
emh203 0:3d9c67d97d6f 276 acc0 = __SMLALD(x0, c0, acc0);
emh203 0:3d9c67d97d6f 277 tapCnt--;
emh203 0:3d9c67d97d6f 278 }
emh203 0:3d9c67d97d6f 279 while(tapCnt > 0u);
emh203 0:3d9c67d97d6f 280
emh203 0:3d9c67d97d6f 281 /* The result is in 2.30 format. Convert to 1.15 with saturation.
emh203 0:3d9c67d97d6f 282 ** Then store the output in the destination buffer. */
emh203 0:3d9c67d97d6f 283 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
emh203 0:3d9c67d97d6f 284
emh203 0:3d9c67d97d6f 285 /* Advance state pointer by 1 for the next sample */
emh203 0:3d9c67d97d6f 286 pState = pState + 1;
emh203 0:3d9c67d97d6f 287
emh203 0:3d9c67d97d6f 288 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 289 blkCnt--;
emh203 0:3d9c67d97d6f 290 }
emh203 0:3d9c67d97d6f 291
emh203 0:3d9c67d97d6f 292 /* Processing is complete.
emh203 0:3d9c67d97d6f 293 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emh203 0:3d9c67d97d6f 294 ** This prepares the state buffer for the next function call. */
emh203 0:3d9c67d97d6f 295
emh203 0:3d9c67d97d6f 296 /* Points to the start of the state buffer */
emh203 0:3d9c67d97d6f 297 pStateCurnt = S->pState;
emh203 0:3d9c67d97d6f 298
emh203 0:3d9c67d97d6f 299 /* Calculation of count for copying integer writes */
emh203 0:3d9c67d97d6f 300 tapCnt = (numTaps - 1u) >> 2;
emh203 0:3d9c67d97d6f 301
emh203 0:3d9c67d97d6f 302 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 303 {
emh203 0:3d9c67d97d6f 304
emh203 0:3d9c67d97d6f 305 /* Copy state values to start of state buffer */
emh203 0:3d9c67d97d6f 306 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
emh203 0:3d9c67d97d6f 307 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
emh203 0:3d9c67d97d6f 308
emh203 0:3d9c67d97d6f 309 tapCnt--;
emh203 0:3d9c67d97d6f 310
emh203 0:3d9c67d97d6f 311 }
emh203 0:3d9c67d97d6f 312
emh203 0:3d9c67d97d6f 313 /* Calculation of count for remaining q15_t data */
emh203 0:3d9c67d97d6f 314 tapCnt = (numTaps - 1u) % 0x4u;
emh203 0:3d9c67d97d6f 315
emh203 0:3d9c67d97d6f 316 /* copy remaining data */
emh203 0:3d9c67d97d6f 317 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 318 {
emh203 0:3d9c67d97d6f 319 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 320
emh203 0:3d9c67d97d6f 321 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 322 tapCnt--;
emh203 0:3d9c67d97d6f 323 }
emh203 0:3d9c67d97d6f 324 }
emh203 0:3d9c67d97d6f 325
emh203 0:3d9c67d97d6f 326 #else /* UNALIGNED_SUPPORT_DISABLE */
emh203 0:3d9c67d97d6f 327
emh203 0:3d9c67d97d6f 328 void arm_fir_q15(
emh203 0:3d9c67d97d6f 329 const arm_fir_instance_q15 * S,
emh203 0:3d9c67d97d6f 330 q15_t * pSrc,
emh203 0:3d9c67d97d6f 331 q15_t * pDst,
emh203 0:3d9c67d97d6f 332 uint32_t blockSize)
emh203 0:3d9c67d97d6f 333 {
emh203 0:3d9c67d97d6f 334 q15_t *pState = S->pState; /* State pointer */
emh203 0:3d9c67d97d6f 335 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emh203 0:3d9c67d97d6f 336 q15_t *pStateCurnt; /* Points to the current sample of the state */
emh203 0:3d9c67d97d6f 337 q63_t acc0, acc1, acc2, acc3; /* Accumulators */
emh203 0:3d9c67d97d6f 338 q15_t *pb; /* Temporary pointer for coefficient buffer */
emh203 0:3d9c67d97d6f 339 q15_t *px; /* Temporary q31 pointer for SIMD state buffer accesses */
emh203 0:3d9c67d97d6f 340 q31_t x0, x1, x2, c0; /* Temporary variables to hold SIMD state and coefficient values */
emh203 0:3d9c67d97d6f 341 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
emh203 0:3d9c67d97d6f 342 uint32_t tapCnt, blkCnt; /* Loop counters */
emh203 0:3d9c67d97d6f 343
emh203 0:3d9c67d97d6f 344
emh203 0:3d9c67d97d6f 345 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
emh203 0:3d9c67d97d6f 346 /* pStateCurnt points to the location where the new input data should be written */
emh203 0:3d9c67d97d6f 347 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emh203 0:3d9c67d97d6f 348
emh203 0:3d9c67d97d6f 349 /* Apply loop unrolling and compute 4 output values simultaneously.
emh203 0:3d9c67d97d6f 350 * The variables acc0 ... acc3 hold output values that are being computed:
emh203 0:3d9c67d97d6f 351 *
emh203 0:3d9c67d97d6f 352 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
emh203 0:3d9c67d97d6f 353 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
emh203 0:3d9c67d97d6f 354 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
emh203 0:3d9c67d97d6f 355 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
emh203 0:3d9c67d97d6f 356 */
emh203 0:3d9c67d97d6f 357
emh203 0:3d9c67d97d6f 358 blkCnt = blockSize >> 2;
emh203 0:3d9c67d97d6f 359
emh203 0:3d9c67d97d6f 360 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
emh203 0:3d9c67d97d6f 361 ** a second loop below computes the remaining 1 to 3 samples. */
emh203 0:3d9c67d97d6f 362 while(blkCnt > 0u)
emh203 0:3d9c67d97d6f 363 {
emh203 0:3d9c67d97d6f 364 /* Copy four new input samples into the state buffer.
emh203 0:3d9c67d97d6f 365 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
emh203 0:3d9c67d97d6f 366 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 367 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 368 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 369 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 370
emh203 0:3d9c67d97d6f 371
emh203 0:3d9c67d97d6f 372 /* Set all accumulators to zero */
emh203 0:3d9c67d97d6f 373 acc0 = 0;
emh203 0:3d9c67d97d6f 374 acc1 = 0;
emh203 0:3d9c67d97d6f 375 acc2 = 0;
emh203 0:3d9c67d97d6f 376 acc3 = 0;
emh203 0:3d9c67d97d6f 377
emh203 0:3d9c67d97d6f 378 /* Typecast q15_t pointer to q31_t pointer for state reading in q31_t */
emh203 0:3d9c67d97d6f 379 px = pState;
emh203 0:3d9c67d97d6f 380
emh203 0:3d9c67d97d6f 381 /* Typecast q15_t pointer to q31_t pointer for coefficient reading in q31_t */
emh203 0:3d9c67d97d6f 382 pb = pCoeffs;
emh203 0:3d9c67d97d6f 383
emh203 0:3d9c67d97d6f 384 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
emh203 0:3d9c67d97d6f 385 x0 = *__SIMD32(px)++;
emh203 0:3d9c67d97d6f 386
emh203 0:3d9c67d97d6f 387 /* Read the third and forth samples from the state buffer: x[n-N-2], x[n-N-3] */
emh203 0:3d9c67d97d6f 388 x2 = *__SIMD32(px)++;
emh203 0:3d9c67d97d6f 389
emh203 0:3d9c67d97d6f 390 /* Loop over the number of taps. Unroll by a factor of 4.
emh203 0:3d9c67d97d6f 391 ** Repeat until we've computed numTaps-(numTaps%4) coefficients. */
emh203 0:3d9c67d97d6f 392 tapCnt = numTaps >> 2;
emh203 0:3d9c67d97d6f 393
emh203 0:3d9c67d97d6f 394 while(tapCnt > 0)
emh203 0:3d9c67d97d6f 395 {
emh203 0:3d9c67d97d6f 396 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
emh203 0:3d9c67d97d6f 397 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 398
emh203 0:3d9c67d97d6f 399 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
emh203 0:3d9c67d97d6f 400 acc0 = __SMLALD(x0, c0, acc0);
emh203 0:3d9c67d97d6f 401
emh203 0:3d9c67d97d6f 402 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
emh203 0:3d9c67d97d6f 403 acc2 = __SMLALD(x2, c0, acc2);
emh203 0:3d9c67d97d6f 404
emh203 0:3d9c67d97d6f 405 /* pack x[n-N-1] and x[n-N-2] */
emh203 0:3d9c67d97d6f 406 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 407 x1 = __PKHBT(x2, x0, 0);
emh203 0:3d9c67d97d6f 408 #else
emh203 0:3d9c67d97d6f 409 x1 = __PKHBT(x0, x2, 0);
emh203 0:3d9c67d97d6f 410 #endif
emh203 0:3d9c67d97d6f 411
emh203 0:3d9c67d97d6f 412 /* Read state x[n-N-4], x[n-N-5] */
emh203 0:3d9c67d97d6f 413 x0 = _SIMD32_OFFSET(px);
emh203 0:3d9c67d97d6f 414
emh203 0:3d9c67d97d6f 415 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
emh203 0:3d9c67d97d6f 416 acc1 = __SMLALDX(x1, c0, acc1);
emh203 0:3d9c67d97d6f 417
emh203 0:3d9c67d97d6f 418 /* pack x[n-N-3] and x[n-N-4] */
emh203 0:3d9c67d97d6f 419 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 420 x1 = __PKHBT(x0, x2, 0);
emh203 0:3d9c67d97d6f 421 #else
emh203 0:3d9c67d97d6f 422 x1 = __PKHBT(x2, x0, 0);
emh203 0:3d9c67d97d6f 423 #endif
emh203 0:3d9c67d97d6f 424
emh203 0:3d9c67d97d6f 425 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
emh203 0:3d9c67d97d6f 426 acc3 = __SMLALDX(x1, c0, acc3);
emh203 0:3d9c67d97d6f 427
emh203 0:3d9c67d97d6f 428 /* Read coefficients b[N-2], b[N-3] */
emh203 0:3d9c67d97d6f 429 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 430
emh203 0:3d9c67d97d6f 431 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
emh203 0:3d9c67d97d6f 432 acc0 = __SMLALD(x2, c0, acc0);
emh203 0:3d9c67d97d6f 433
emh203 0:3d9c67d97d6f 434 /* Read state x[n-N-6], x[n-N-7] with offset */
emh203 0:3d9c67d97d6f 435 x2 = _SIMD32_OFFSET(px + 2u);
emh203 0:3d9c67d97d6f 436
emh203 0:3d9c67d97d6f 437 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
emh203 0:3d9c67d97d6f 438 acc2 = __SMLALD(x0, c0, acc2);
emh203 0:3d9c67d97d6f 439
emh203 0:3d9c67d97d6f 440 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
emh203 0:3d9c67d97d6f 441 acc1 = __SMLALDX(x1, c0, acc1);
emh203 0:3d9c67d97d6f 442
emh203 0:3d9c67d97d6f 443 /* pack x[n-N-5] and x[n-N-6] */
emh203 0:3d9c67d97d6f 444 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 445 x1 = __PKHBT(x2, x0, 0);
emh203 0:3d9c67d97d6f 446 #else
emh203 0:3d9c67d97d6f 447 x1 = __PKHBT(x0, x2, 0);
emh203 0:3d9c67d97d6f 448 #endif
emh203 0:3d9c67d97d6f 449
emh203 0:3d9c67d97d6f 450 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
emh203 0:3d9c67d97d6f 451 acc3 = __SMLALDX(x1, c0, acc3);
emh203 0:3d9c67d97d6f 452
emh203 0:3d9c67d97d6f 453 /* Update state pointer for next state reading */
emh203 0:3d9c67d97d6f 454 px += 4u;
emh203 0:3d9c67d97d6f 455
emh203 0:3d9c67d97d6f 456 /* Decrement tap count */
emh203 0:3d9c67d97d6f 457 tapCnt--;
emh203 0:3d9c67d97d6f 458
emh203 0:3d9c67d97d6f 459 }
emh203 0:3d9c67d97d6f 460
emh203 0:3d9c67d97d6f 461 /* If the filter length is not a multiple of 4, compute the remaining filter taps.
emh203 0:3d9c67d97d6f 462 ** This is always be 2 taps since the filter length is even. */
emh203 0:3d9c67d97d6f 463 if((numTaps & 0x3u) != 0u)
emh203 0:3d9c67d97d6f 464 {
emh203 0:3d9c67d97d6f 465
emh203 0:3d9c67d97d6f 466 /* Read last two coefficients */
emh203 0:3d9c67d97d6f 467 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 468
emh203 0:3d9c67d97d6f 469 /* Perform the multiply-accumulates */
emh203 0:3d9c67d97d6f 470 acc0 = __SMLALD(x0, c0, acc0);
emh203 0:3d9c67d97d6f 471 acc2 = __SMLALD(x2, c0, acc2);
emh203 0:3d9c67d97d6f 472
emh203 0:3d9c67d97d6f 473 /* pack state variables */
emh203 0:3d9c67d97d6f 474 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 475 x1 = __PKHBT(x2, x0, 0);
emh203 0:3d9c67d97d6f 476 #else
emh203 0:3d9c67d97d6f 477 x1 = __PKHBT(x0, x2, 0);
emh203 0:3d9c67d97d6f 478 #endif
emh203 0:3d9c67d97d6f 479
emh203 0:3d9c67d97d6f 480 /* Read last state variables */
emh203 0:3d9c67d97d6f 481 x0 = *__SIMD32(px);
emh203 0:3d9c67d97d6f 482
emh203 0:3d9c67d97d6f 483 /* Perform the multiply-accumulates */
emh203 0:3d9c67d97d6f 484 acc1 = __SMLALDX(x1, c0, acc1);
emh203 0:3d9c67d97d6f 485
emh203 0:3d9c67d97d6f 486 /* pack state variables */
emh203 0:3d9c67d97d6f 487 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 488 x1 = __PKHBT(x0, x2, 0);
emh203 0:3d9c67d97d6f 489 #else
emh203 0:3d9c67d97d6f 490 x1 = __PKHBT(x2, x0, 0);
emh203 0:3d9c67d97d6f 491 #endif
emh203 0:3d9c67d97d6f 492
emh203 0:3d9c67d97d6f 493 /* Perform the multiply-accumulates */
emh203 0:3d9c67d97d6f 494 acc3 = __SMLALDX(x1, c0, acc3);
emh203 0:3d9c67d97d6f 495 }
emh203 0:3d9c67d97d6f 496
emh203 0:3d9c67d97d6f 497 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
emh203 0:3d9c67d97d6f 498 ** Then store the 4 outputs in the destination buffer. */
emh203 0:3d9c67d97d6f 499
emh203 0:3d9c67d97d6f 500 #ifndef ARM_MATH_BIG_ENDIAN
emh203 0:3d9c67d97d6f 501
emh203 0:3d9c67d97d6f 502 *__SIMD32(pDst)++ =
emh203 0:3d9c67d97d6f 503 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
emh203 0:3d9c67d97d6f 504
emh203 0:3d9c67d97d6f 505 *__SIMD32(pDst)++ =
emh203 0:3d9c67d97d6f 506 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
emh203 0:3d9c67d97d6f 507
emh203 0:3d9c67d97d6f 508 #else
emh203 0:3d9c67d97d6f 509
emh203 0:3d9c67d97d6f 510 *__SIMD32(pDst)++ =
emh203 0:3d9c67d97d6f 511 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
emh203 0:3d9c67d97d6f 512
emh203 0:3d9c67d97d6f 513 *__SIMD32(pDst)++ =
emh203 0:3d9c67d97d6f 514 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
emh203 0:3d9c67d97d6f 515
emh203 0:3d9c67d97d6f 516 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emh203 0:3d9c67d97d6f 517
emh203 0:3d9c67d97d6f 518 /* Advance the state pointer by 4 to process the next group of 4 samples */
emh203 0:3d9c67d97d6f 519 pState = pState + 4;
emh203 0:3d9c67d97d6f 520
emh203 0:3d9c67d97d6f 521 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 522 blkCnt--;
emh203 0:3d9c67d97d6f 523 }
emh203 0:3d9c67d97d6f 524
emh203 0:3d9c67d97d6f 525 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
emh203 0:3d9c67d97d6f 526 ** No loop unrolling is used. */
emh203 0:3d9c67d97d6f 527 blkCnt = blockSize % 0x4u;
emh203 0:3d9c67d97d6f 528 while(blkCnt > 0u)
emh203 0:3d9c67d97d6f 529 {
emh203 0:3d9c67d97d6f 530 /* Copy two samples into state buffer */
emh203 0:3d9c67d97d6f 531 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 532
emh203 0:3d9c67d97d6f 533 /* Set the accumulator to zero */
emh203 0:3d9c67d97d6f 534 acc0 = 0;
emh203 0:3d9c67d97d6f 535
emh203 0:3d9c67d97d6f 536 /* Use SIMD to hold states and coefficients */
emh203 0:3d9c67d97d6f 537 px = pState;
emh203 0:3d9c67d97d6f 538 pb = pCoeffs;
emh203 0:3d9c67d97d6f 539
emh203 0:3d9c67d97d6f 540 tapCnt = numTaps >> 1u;
emh203 0:3d9c67d97d6f 541
emh203 0:3d9c67d97d6f 542 do
emh203 0:3d9c67d97d6f 543 {
emh203 0:3d9c67d97d6f 544 acc0 += (q31_t) * px++ * *pb++;
emh203 0:3d9c67d97d6f 545 acc0 += (q31_t) * px++ * *pb++;
emh203 0:3d9c67d97d6f 546 tapCnt--;
emh203 0:3d9c67d97d6f 547 }
emh203 0:3d9c67d97d6f 548 while(tapCnt > 0u);
emh203 0:3d9c67d97d6f 549
emh203 0:3d9c67d97d6f 550 /* The result is in 2.30 format. Convert to 1.15 with saturation.
emh203 0:3d9c67d97d6f 551 ** Then store the output in the destination buffer. */
emh203 0:3d9c67d97d6f 552 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
emh203 0:3d9c67d97d6f 553
emh203 0:3d9c67d97d6f 554 /* Advance state pointer by 1 for the next sample */
emh203 0:3d9c67d97d6f 555 pState = pState + 1u;
emh203 0:3d9c67d97d6f 556
emh203 0:3d9c67d97d6f 557 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 558 blkCnt--;
emh203 0:3d9c67d97d6f 559 }
emh203 0:3d9c67d97d6f 560
emh203 0:3d9c67d97d6f 561 /* Processing is complete.
emh203 0:3d9c67d97d6f 562 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emh203 0:3d9c67d97d6f 563 ** This prepares the state buffer for the next function call. */
emh203 0:3d9c67d97d6f 564
emh203 0:3d9c67d97d6f 565 /* Points to the start of the state buffer */
emh203 0:3d9c67d97d6f 566 pStateCurnt = S->pState;
emh203 0:3d9c67d97d6f 567
emh203 0:3d9c67d97d6f 568 /* Calculation of count for copying integer writes */
emh203 0:3d9c67d97d6f 569 tapCnt = (numTaps - 1u) >> 2;
emh203 0:3d9c67d97d6f 570
emh203 0:3d9c67d97d6f 571 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 572 {
emh203 0:3d9c67d97d6f 573 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 574 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 575 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 576 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 577
emh203 0:3d9c67d97d6f 578 tapCnt--;
emh203 0:3d9c67d97d6f 579
emh203 0:3d9c67d97d6f 580 }
emh203 0:3d9c67d97d6f 581
emh203 0:3d9c67d97d6f 582 /* Calculation of count for remaining q15_t data */
emh203 0:3d9c67d97d6f 583 tapCnt = (numTaps - 1u) % 0x4u;
emh203 0:3d9c67d97d6f 584
emh203 0:3d9c67d97d6f 585 /* copy remaining data */
emh203 0:3d9c67d97d6f 586 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 587 {
emh203 0:3d9c67d97d6f 588 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 589
emh203 0:3d9c67d97d6f 590 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 591 tapCnt--;
emh203 0:3d9c67d97d6f 592 }
emh203 0:3d9c67d97d6f 593 }
emh203 0:3d9c67d97d6f 594
emh203 0:3d9c67d97d6f 595
emh203 0:3d9c67d97d6f 596 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
emh203 0:3d9c67d97d6f 597
emh203 0:3d9c67d97d6f 598 #else /* ARM_MATH_CM0_FAMILY */
emh203 0:3d9c67d97d6f 599
emh203 0:3d9c67d97d6f 600
emh203 0:3d9c67d97d6f 601 /* Run the below code for Cortex-M0 */
emh203 0:3d9c67d97d6f 602
emh203 0:3d9c67d97d6f 603 void arm_fir_q15(
emh203 0:3d9c67d97d6f 604 const arm_fir_instance_q15 * S,
emh203 0:3d9c67d97d6f 605 q15_t * pSrc,
emh203 0:3d9c67d97d6f 606 q15_t * pDst,
emh203 0:3d9c67d97d6f 607 uint32_t blockSize)
emh203 0:3d9c67d97d6f 608 {
emh203 0:3d9c67d97d6f 609 q15_t *pState = S->pState; /* State pointer */
emh203 0:3d9c67d97d6f 610 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emh203 0:3d9c67d97d6f 611 q15_t *pStateCurnt; /* Points to the current sample of the state */
emh203 0:3d9c67d97d6f 612
emh203 0:3d9c67d97d6f 613
emh203 0:3d9c67d97d6f 614
emh203 0:3d9c67d97d6f 615 q15_t *px; /* Temporary pointer for state buffer */
emh203 0:3d9c67d97d6f 616 q15_t *pb; /* Temporary pointer for coefficient buffer */
emh203 0:3d9c67d97d6f 617 q63_t acc; /* Accumulator */
emh203 0:3d9c67d97d6f 618 uint32_t numTaps = S->numTaps; /* Number of nTaps in the filter */
emh203 0:3d9c67d97d6f 619 uint32_t tapCnt, blkCnt; /* Loop counters */
emh203 0:3d9c67d97d6f 620
emh203 0:3d9c67d97d6f 621 /* S->pState buffer contains previous frame (numTaps - 1) samples */
emh203 0:3d9c67d97d6f 622 /* pStateCurnt points to the location where the new input data should be written */
emh203 0:3d9c67d97d6f 623 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emh203 0:3d9c67d97d6f 624
emh203 0:3d9c67d97d6f 625 /* Initialize blkCnt with blockSize */
emh203 0:3d9c67d97d6f 626 blkCnt = blockSize;
emh203 0:3d9c67d97d6f 627
emh203 0:3d9c67d97d6f 628 while(blkCnt > 0u)
emh203 0:3d9c67d97d6f 629 {
emh203 0:3d9c67d97d6f 630 /* Copy one sample at a time into state buffer */
emh203 0:3d9c67d97d6f 631 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 632
emh203 0:3d9c67d97d6f 633 /* Set the accumulator to zero */
emh203 0:3d9c67d97d6f 634 acc = 0;
emh203 0:3d9c67d97d6f 635
emh203 0:3d9c67d97d6f 636 /* Initialize state pointer */
emh203 0:3d9c67d97d6f 637 px = pState;
emh203 0:3d9c67d97d6f 638
emh203 0:3d9c67d97d6f 639 /* Initialize Coefficient pointer */
emh203 0:3d9c67d97d6f 640 pb = pCoeffs;
emh203 0:3d9c67d97d6f 641
emh203 0:3d9c67d97d6f 642 tapCnt = numTaps;
emh203 0:3d9c67d97d6f 643
emh203 0:3d9c67d97d6f 644 /* Perform the multiply-accumulates */
emh203 0:3d9c67d97d6f 645 do
emh203 0:3d9c67d97d6f 646 {
emh203 0:3d9c67d97d6f 647 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
emh203 0:3d9c67d97d6f 648 acc += (q31_t) * px++ * *pb++;
emh203 0:3d9c67d97d6f 649 tapCnt--;
emh203 0:3d9c67d97d6f 650 } while(tapCnt > 0u);
emh203 0:3d9c67d97d6f 651
emh203 0:3d9c67d97d6f 652 /* The result is in 2.30 format. Convert to 1.15
emh203 0:3d9c67d97d6f 653 ** Then store the output in the destination buffer. */
emh203 0:3d9c67d97d6f 654 *pDst++ = (q15_t) __SSAT((acc >> 15u), 16);
emh203 0:3d9c67d97d6f 655
emh203 0:3d9c67d97d6f 656 /* Advance state pointer by 1 for the next sample */
emh203 0:3d9c67d97d6f 657 pState = pState + 1;
emh203 0:3d9c67d97d6f 658
emh203 0:3d9c67d97d6f 659 /* Decrement the samples loop counter */
emh203 0:3d9c67d97d6f 660 blkCnt--;
emh203 0:3d9c67d97d6f 661 }
emh203 0:3d9c67d97d6f 662
emh203 0:3d9c67d97d6f 663 /* Processing is complete.
emh203 0:3d9c67d97d6f 664 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emh203 0:3d9c67d97d6f 665 ** This prepares the state buffer for the next function call. */
emh203 0:3d9c67d97d6f 666
emh203 0:3d9c67d97d6f 667 /* Points to the start of the state buffer */
emh203 0:3d9c67d97d6f 668 pStateCurnt = S->pState;
emh203 0:3d9c67d97d6f 669
emh203 0:3d9c67d97d6f 670 /* Copy numTaps number of values */
emh203 0:3d9c67d97d6f 671 tapCnt = (numTaps - 1u);
emh203 0:3d9c67d97d6f 672
emh203 0:3d9c67d97d6f 673 /* copy data */
emh203 0:3d9c67d97d6f 674 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 675 {
emh203 0:3d9c67d97d6f 676 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 677
emh203 0:3d9c67d97d6f 678 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 679 tapCnt--;
emh203 0:3d9c67d97d6f 680 }
emh203 0:3d9c67d97d6f 681
emh203 0:3d9c67d97d6f 682 }
emh203 0:3d9c67d97d6f 683
emh203 0:3d9c67d97d6f 684 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
emh203 0:3d9c67d97d6f 685
emh203 0:3d9c67d97d6f 686
emh203 0:3d9c67d97d6f 687
emh203 0:3d9c67d97d6f 688
emh203 0:3d9c67d97d6f 689 /**
emh203 0:3d9c67d97d6f 690 * @} end of FIR group
emh203 0:3d9c67d97d6f 691 */