CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Committer:
mbed_official
Date:
Fri Nov 08 13:45:10 2013 +0000
Revision:
3:7a284390b0ce
Parent:
2:da51fb522205
Synchronized with git revision e69956aba2f68a2a26ac26b051f8d349deaa1ce8

Who changed what in which revision?

UserRevisionLine numberNew contents of line
emilmont 1:fdd22bb7aa52 1 /* ----------------------------------------------------------------------
mbed_official 3:7a284390b0ce 2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
emilmont 1:fdd22bb7aa52 3 *
mbed_official 3:7a284390b0ce 4 * $Date: 17. January 2013
mbed_official 3:7a284390b0ce 5 * $Revision: V1.4.1
emilmont 1:fdd22bb7aa52 6 *
emilmont 2:da51fb522205 7 * Project: CMSIS DSP Library
emilmont 1:fdd22bb7aa52 8 * Title: arm_fir_q15.c
emilmont 1:fdd22bb7aa52 9 *
emilmont 1:fdd22bb7aa52 10 * Description: Q15 FIR filter processing function.
emilmont 1:fdd22bb7aa52 11 *
emilmont 1:fdd22bb7aa52 12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
emilmont 1:fdd22bb7aa52 13 *
mbed_official 3:7a284390b0ce 14 * Redistribution and use in source and binary forms, with or without
mbed_official 3:7a284390b0ce 15 * modification, are permitted provided that the following conditions
mbed_official 3:7a284390b0ce 16 * are met:
mbed_official 3:7a284390b0ce 17 * - Redistributions of source code must retain the above copyright
mbed_official 3:7a284390b0ce 18 * notice, this list of conditions and the following disclaimer.
mbed_official 3:7a284390b0ce 19 * - Redistributions in binary form must reproduce the above copyright
mbed_official 3:7a284390b0ce 20 * notice, this list of conditions and the following disclaimer in
mbed_official 3:7a284390b0ce 21 * the documentation and/or other materials provided with the
mbed_official 3:7a284390b0ce 22 * distribution.
mbed_official 3:7a284390b0ce 23 * - Neither the name of ARM LIMITED nor the names of its contributors
mbed_official 3:7a284390b0ce 24 * may be used to endorse or promote products derived from this
mbed_official 3:7a284390b0ce 25 * software without specific prior written permission.
mbed_official 3:7a284390b0ce 26 *
mbed_official 3:7a284390b0ce 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
mbed_official 3:7a284390b0ce 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
mbed_official 3:7a284390b0ce 29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
mbed_official 3:7a284390b0ce 30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
mbed_official 3:7a284390b0ce 31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
mbed_official 3:7a284390b0ce 32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
mbed_official 3:7a284390b0ce 33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
mbed_official 3:7a284390b0ce 34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
mbed_official 3:7a284390b0ce 35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
mbed_official 3:7a284390b0ce 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
mbed_official 3:7a284390b0ce 37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
mbed_official 3:7a284390b0ce 38 * POSSIBILITY OF SUCH DAMAGE.
emilmont 1:fdd22bb7aa52 39 * -------------------------------------------------------------------- */
emilmont 1:fdd22bb7aa52 40
emilmont 1:fdd22bb7aa52 41 #include "arm_math.h"
emilmont 1:fdd22bb7aa52 42
emilmont 1:fdd22bb7aa52 43 /**
emilmont 1:fdd22bb7aa52 44 * @ingroup groupFilters
emilmont 1:fdd22bb7aa52 45 */
emilmont 1:fdd22bb7aa52 46
emilmont 1:fdd22bb7aa52 47 /**
emilmont 1:fdd22bb7aa52 48 * @addtogroup FIR
emilmont 1:fdd22bb7aa52 49 * @{
emilmont 1:fdd22bb7aa52 50 */
emilmont 1:fdd22bb7aa52 51
emilmont 1:fdd22bb7aa52 52 /**
emilmont 1:fdd22bb7aa52 53 * @brief Processing function for the Q15 FIR filter.
emilmont 1:fdd22bb7aa52 54 * @param[in] *S points to an instance of the Q15 FIR structure.
emilmont 1:fdd22bb7aa52 55 * @param[in] *pSrc points to the block of input data.
emilmont 1:fdd22bb7aa52 56 * @param[out] *pDst points to the block of output data.
emilmont 1:fdd22bb7aa52 57 * @param[in] blockSize number of samples to process per call.
emilmont 1:fdd22bb7aa52 58 * @return none.
emilmont 1:fdd22bb7aa52 59 *
emilmont 1:fdd22bb7aa52 60 *
emilmont 1:fdd22bb7aa52 61 * \par Restrictions
emilmont 1:fdd22bb7aa52 62 * If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE
emilmont 2:da51fb522205 63 * In this case input, output, state buffers should be aligned by 32-bit
emilmont 1:fdd22bb7aa52 64 *
emilmont 1:fdd22bb7aa52 65 * <b>Scaling and Overflow Behavior:</b>
emilmont 1:fdd22bb7aa52 66 * \par
emilmont 1:fdd22bb7aa52 67 * The function is implemented using a 64-bit internal accumulator.
emilmont 1:fdd22bb7aa52 68 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
emilmont 1:fdd22bb7aa52 69 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
emilmont 1:fdd22bb7aa52 70 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
emilmont 1:fdd22bb7aa52 71 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
emilmont 1:fdd22bb7aa52 72 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
emilmont 1:fdd22bb7aa52 73 *
emilmont 1:fdd22bb7aa52 74 * \par
emilmont 1:fdd22bb7aa52 75 * Refer to the function <code>arm_fir_fast_q15()</code> for a faster but less precise implementation of this function.
emilmont 1:fdd22bb7aa52 76 */
emilmont 1:fdd22bb7aa52 77
mbed_official 3:7a284390b0ce 78 #ifndef ARM_MATH_CM0_FAMILY
emilmont 1:fdd22bb7aa52 79
emilmont 1:fdd22bb7aa52 80 /* Run the below code for Cortex-M4 and Cortex-M3 */
emilmont 1:fdd22bb7aa52 81
emilmont 1:fdd22bb7aa52 82 #ifndef UNALIGNED_SUPPORT_DISABLE
emilmont 1:fdd22bb7aa52 83
emilmont 1:fdd22bb7aa52 84
emilmont 1:fdd22bb7aa52 85 void arm_fir_q15(
emilmont 1:fdd22bb7aa52 86 const arm_fir_instance_q15 * S,
emilmont 1:fdd22bb7aa52 87 q15_t * pSrc,
emilmont 1:fdd22bb7aa52 88 q15_t * pDst,
emilmont 1:fdd22bb7aa52 89 uint32_t blockSize)
emilmont 1:fdd22bb7aa52 90 {
emilmont 1:fdd22bb7aa52 91 q15_t *pState = S->pState; /* State pointer */
emilmont 1:fdd22bb7aa52 92 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emilmont 1:fdd22bb7aa52 93 q15_t *pStateCurnt; /* Points to the current sample of the state */
emilmont 1:fdd22bb7aa52 94 q15_t *px1; /* Temporary q15 pointer for state buffer */
emilmont 1:fdd22bb7aa52 95 q15_t *pb; /* Temporary pointer for coefficient buffer */
emilmont 1:fdd22bb7aa52 96 q31_t x0, x1, x2, x3, c0; /* Temporary variables to hold SIMD state and coefficient values */
emilmont 1:fdd22bb7aa52 97 q63_t acc0, acc1, acc2, acc3; /* Accumulators */
emilmont 1:fdd22bb7aa52 98 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
emilmont 1:fdd22bb7aa52 99 uint32_t tapCnt, blkCnt; /* Loop counters */
emilmont 1:fdd22bb7aa52 100
emilmont 1:fdd22bb7aa52 101
emilmont 1:fdd22bb7aa52 102 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
emilmont 1:fdd22bb7aa52 103 /* pStateCurnt points to the location where the new input data should be written */
emilmont 1:fdd22bb7aa52 104 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emilmont 1:fdd22bb7aa52 105
emilmont 1:fdd22bb7aa52 106 /* Apply loop unrolling and compute 4 output values simultaneously.
emilmont 1:fdd22bb7aa52 107 * The variables acc0 ... acc3 hold output values that are being computed:
emilmont 1:fdd22bb7aa52 108 *
emilmont 1:fdd22bb7aa52 109 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
emilmont 1:fdd22bb7aa52 110 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
emilmont 1:fdd22bb7aa52 111 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
emilmont 1:fdd22bb7aa52 112 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
emilmont 1:fdd22bb7aa52 113 */
emilmont 1:fdd22bb7aa52 114
emilmont 1:fdd22bb7aa52 115 blkCnt = blockSize >> 2;
emilmont 1:fdd22bb7aa52 116
emilmont 1:fdd22bb7aa52 117 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
emilmont 1:fdd22bb7aa52 118 ** a second loop below computes the remaining 1 to 3 samples. */
emilmont 1:fdd22bb7aa52 119 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 120 {
emilmont 1:fdd22bb7aa52 121 /* Copy four new input samples into the state buffer.
emilmont 1:fdd22bb7aa52 122 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
emilmont 1:fdd22bb7aa52 123 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
emilmont 1:fdd22bb7aa52 124 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
emilmont 1:fdd22bb7aa52 125
emilmont 1:fdd22bb7aa52 126 /* Set all accumulators to zero */
emilmont 1:fdd22bb7aa52 127 acc0 = 0;
emilmont 1:fdd22bb7aa52 128 acc1 = 0;
emilmont 1:fdd22bb7aa52 129 acc2 = 0;
emilmont 1:fdd22bb7aa52 130 acc3 = 0;
emilmont 1:fdd22bb7aa52 131
emilmont 1:fdd22bb7aa52 132 /* Initialize state pointer of type q15 */
emilmont 1:fdd22bb7aa52 133 px1 = pState;
emilmont 1:fdd22bb7aa52 134
emilmont 1:fdd22bb7aa52 135 /* Initialize coeff pointer of type q31 */
emilmont 1:fdd22bb7aa52 136 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 137
emilmont 1:fdd22bb7aa52 138 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
emilmont 1:fdd22bb7aa52 139 x0 = _SIMD32_OFFSET(px1);
emilmont 1:fdd22bb7aa52 140
emilmont 1:fdd22bb7aa52 141 /* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */
emilmont 1:fdd22bb7aa52 142 x1 = _SIMD32_OFFSET(px1 + 1u);
emilmont 1:fdd22bb7aa52 143
emilmont 1:fdd22bb7aa52 144 px1 += 2u;
emilmont 1:fdd22bb7aa52 145
emilmont 1:fdd22bb7aa52 146 /* Loop over the number of taps. Unroll by a factor of 4.
emilmont 1:fdd22bb7aa52 147 ** Repeat until we've computed numTaps-4 coefficients. */
emilmont 1:fdd22bb7aa52 148 tapCnt = numTaps >> 2;
emilmont 1:fdd22bb7aa52 149
emilmont 1:fdd22bb7aa52 150 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 151 {
emilmont 1:fdd22bb7aa52 152 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
emilmont 1:fdd22bb7aa52 153 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 154
emilmont 1:fdd22bb7aa52 155 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
emilmont 1:fdd22bb7aa52 156 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 157
emilmont 1:fdd22bb7aa52 158 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
emilmont 1:fdd22bb7aa52 159 acc1 = __SMLALD(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 160
emilmont 1:fdd22bb7aa52 161 /* Read state x[n-N-2], x[n-N-3] */
emilmont 1:fdd22bb7aa52 162 x2 = _SIMD32_OFFSET(px1);
emilmont 1:fdd22bb7aa52 163
emilmont 1:fdd22bb7aa52 164 /* Read state x[n-N-3], x[n-N-4] */
emilmont 1:fdd22bb7aa52 165 x3 = _SIMD32_OFFSET(px1 + 1u);
emilmont 1:fdd22bb7aa52 166
emilmont 1:fdd22bb7aa52 167 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
emilmont 1:fdd22bb7aa52 168 acc2 = __SMLALD(x2, c0, acc2);
emilmont 1:fdd22bb7aa52 169
emilmont 1:fdd22bb7aa52 170 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
emilmont 1:fdd22bb7aa52 171 acc3 = __SMLALD(x3, c0, acc3);
emilmont 1:fdd22bb7aa52 172
emilmont 1:fdd22bb7aa52 173 /* Read coefficients b[N-2], b[N-3] */
emilmont 1:fdd22bb7aa52 174 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 175
emilmont 1:fdd22bb7aa52 176 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
emilmont 1:fdd22bb7aa52 177 acc0 = __SMLALD(x2, c0, acc0);
emilmont 1:fdd22bb7aa52 178
emilmont 1:fdd22bb7aa52 179 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
emilmont 1:fdd22bb7aa52 180 acc1 = __SMLALD(x3, c0, acc1);
emilmont 1:fdd22bb7aa52 181
emilmont 1:fdd22bb7aa52 182 /* Read state x[n-N-4], x[n-N-5] */
emilmont 1:fdd22bb7aa52 183 x0 = _SIMD32_OFFSET(px1 + 2u);
emilmont 1:fdd22bb7aa52 184
emilmont 1:fdd22bb7aa52 185 /* Read state x[n-N-5], x[n-N-6] */
emilmont 1:fdd22bb7aa52 186 x1 = _SIMD32_OFFSET(px1 + 3u);
emilmont 1:fdd22bb7aa52 187
emilmont 1:fdd22bb7aa52 188 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
emilmont 1:fdd22bb7aa52 189 acc2 = __SMLALD(x0, c0, acc2);
emilmont 1:fdd22bb7aa52 190
emilmont 1:fdd22bb7aa52 191 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
emilmont 1:fdd22bb7aa52 192 acc3 = __SMLALD(x1, c0, acc3);
emilmont 1:fdd22bb7aa52 193
emilmont 1:fdd22bb7aa52 194 px1 += 4u;
emilmont 1:fdd22bb7aa52 195
emilmont 1:fdd22bb7aa52 196 tapCnt--;
emilmont 1:fdd22bb7aa52 197
emilmont 1:fdd22bb7aa52 198 }
emilmont 1:fdd22bb7aa52 199
emilmont 1:fdd22bb7aa52 200
emilmont 1:fdd22bb7aa52 201 /* If the filter length is not a multiple of 4, compute the remaining filter taps.
emilmont 1:fdd22bb7aa52 202 ** This is always be 2 taps since the filter length is even. */
emilmont 1:fdd22bb7aa52 203 if((numTaps & 0x3u) != 0u)
emilmont 1:fdd22bb7aa52 204 {
emilmont 1:fdd22bb7aa52 205 /* Read 2 coefficients */
emilmont 1:fdd22bb7aa52 206 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 207
emilmont 1:fdd22bb7aa52 208 /* Fetch 4 state variables */
emilmont 1:fdd22bb7aa52 209 x2 = _SIMD32_OFFSET(px1);
emilmont 1:fdd22bb7aa52 210
emilmont 1:fdd22bb7aa52 211 x3 = _SIMD32_OFFSET(px1 + 1u);
emilmont 1:fdd22bb7aa52 212
emilmont 1:fdd22bb7aa52 213 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 214 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 215
emilmont 1:fdd22bb7aa52 216 px1 += 2u;
emilmont 1:fdd22bb7aa52 217
emilmont 1:fdd22bb7aa52 218 acc1 = __SMLALD(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 219 acc2 = __SMLALD(x2, c0, acc2);
emilmont 1:fdd22bb7aa52 220 acc3 = __SMLALD(x3, c0, acc3);
emilmont 1:fdd22bb7aa52 221 }
emilmont 1:fdd22bb7aa52 222
emilmont 1:fdd22bb7aa52 223 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
emilmont 1:fdd22bb7aa52 224 ** Then store the 4 outputs in the destination buffer. */
emilmont 1:fdd22bb7aa52 225
emilmont 1:fdd22bb7aa52 226 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 227
emilmont 1:fdd22bb7aa52 228 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 229 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 230 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 231 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 232
emilmont 1:fdd22bb7aa52 233 #else
emilmont 1:fdd22bb7aa52 234
emilmont 1:fdd22bb7aa52 235 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 236 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 237 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 238 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 239
emilmont 1:fdd22bb7aa52 240 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 241
emilmont 1:fdd22bb7aa52 242
emilmont 1:fdd22bb7aa52 243
emilmont 1:fdd22bb7aa52 244 /* Advance the state pointer by 4 to process the next group of 4 samples */
emilmont 1:fdd22bb7aa52 245 pState = pState + 4;
emilmont 1:fdd22bb7aa52 246
emilmont 1:fdd22bb7aa52 247 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 248 blkCnt--;
emilmont 1:fdd22bb7aa52 249 }
emilmont 1:fdd22bb7aa52 250
emilmont 1:fdd22bb7aa52 251 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
emilmont 1:fdd22bb7aa52 252 ** No loop unrolling is used. */
emilmont 1:fdd22bb7aa52 253 blkCnt = blockSize % 0x4u;
emilmont 1:fdd22bb7aa52 254 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 255 {
emilmont 1:fdd22bb7aa52 256 /* Copy two samples into state buffer */
emilmont 1:fdd22bb7aa52 257 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 258
emilmont 1:fdd22bb7aa52 259 /* Set the accumulator to zero */
emilmont 1:fdd22bb7aa52 260 acc0 = 0;
emilmont 1:fdd22bb7aa52 261
emilmont 1:fdd22bb7aa52 262 /* Initialize state pointer of type q15 */
emilmont 1:fdd22bb7aa52 263 px1 = pState;
emilmont 1:fdd22bb7aa52 264
emilmont 1:fdd22bb7aa52 265 /* Initialize coeff pointer of type q31 */
emilmont 1:fdd22bb7aa52 266 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 267
emilmont 1:fdd22bb7aa52 268 tapCnt = numTaps >> 1;
emilmont 1:fdd22bb7aa52 269
emilmont 1:fdd22bb7aa52 270 do
emilmont 1:fdd22bb7aa52 271 {
emilmont 1:fdd22bb7aa52 272
emilmont 1:fdd22bb7aa52 273 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 274 x0 = *__SIMD32(px1)++;
emilmont 1:fdd22bb7aa52 275
emilmont 1:fdd22bb7aa52 276 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 277 tapCnt--;
emilmont 1:fdd22bb7aa52 278 }
emilmont 1:fdd22bb7aa52 279 while(tapCnt > 0u);
emilmont 1:fdd22bb7aa52 280
emilmont 1:fdd22bb7aa52 281 /* The result is in 2.30 format. Convert to 1.15 with saturation.
emilmont 1:fdd22bb7aa52 282 ** Then store the output in the destination buffer. */
emilmont 1:fdd22bb7aa52 283 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
emilmont 1:fdd22bb7aa52 284
emilmont 1:fdd22bb7aa52 285 /* Advance state pointer by 1 for the next sample */
emilmont 1:fdd22bb7aa52 286 pState = pState + 1;
emilmont 1:fdd22bb7aa52 287
emilmont 1:fdd22bb7aa52 288 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 289 blkCnt--;
emilmont 1:fdd22bb7aa52 290 }
emilmont 1:fdd22bb7aa52 291
emilmont 1:fdd22bb7aa52 292 /* Processing is complete.
emilmont 1:fdd22bb7aa52 293 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emilmont 1:fdd22bb7aa52 294 ** This prepares the state buffer for the next function call. */
emilmont 1:fdd22bb7aa52 295
emilmont 1:fdd22bb7aa52 296 /* Points to the start of the state buffer */
emilmont 1:fdd22bb7aa52 297 pStateCurnt = S->pState;
emilmont 1:fdd22bb7aa52 298
emilmont 1:fdd22bb7aa52 299 /* Calculation of count for copying integer writes */
emilmont 1:fdd22bb7aa52 300 tapCnt = (numTaps - 1u) >> 2;
emilmont 1:fdd22bb7aa52 301
emilmont 1:fdd22bb7aa52 302 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 303 {
emilmont 1:fdd22bb7aa52 304
emilmont 1:fdd22bb7aa52 305 /* Copy state values to start of state buffer */
emilmont 1:fdd22bb7aa52 306 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
emilmont 1:fdd22bb7aa52 307 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
emilmont 1:fdd22bb7aa52 308
emilmont 1:fdd22bb7aa52 309 tapCnt--;
emilmont 1:fdd22bb7aa52 310
emilmont 1:fdd22bb7aa52 311 }
emilmont 1:fdd22bb7aa52 312
emilmont 1:fdd22bb7aa52 313 /* Calculation of count for remaining q15_t data */
emilmont 1:fdd22bb7aa52 314 tapCnt = (numTaps - 1u) % 0x4u;
emilmont 1:fdd22bb7aa52 315
emilmont 1:fdd22bb7aa52 316 /* copy remaining data */
emilmont 1:fdd22bb7aa52 317 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 318 {
emilmont 1:fdd22bb7aa52 319 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 320
emilmont 1:fdd22bb7aa52 321 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 322 tapCnt--;
emilmont 1:fdd22bb7aa52 323 }
emilmont 1:fdd22bb7aa52 324 }
emilmont 1:fdd22bb7aa52 325
emilmont 1:fdd22bb7aa52 326 #else /* UNALIGNED_SUPPORT_DISABLE */
emilmont 1:fdd22bb7aa52 327
emilmont 1:fdd22bb7aa52 328 void arm_fir_q15(
emilmont 1:fdd22bb7aa52 329 const arm_fir_instance_q15 * S,
emilmont 1:fdd22bb7aa52 330 q15_t * pSrc,
emilmont 1:fdd22bb7aa52 331 q15_t * pDst,
emilmont 1:fdd22bb7aa52 332 uint32_t blockSize)
emilmont 1:fdd22bb7aa52 333 {
emilmont 1:fdd22bb7aa52 334 q15_t *pState = S->pState; /* State pointer */
emilmont 1:fdd22bb7aa52 335 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emilmont 1:fdd22bb7aa52 336 q15_t *pStateCurnt; /* Points to the current sample of the state */
emilmont 1:fdd22bb7aa52 337 q63_t acc0, acc1, acc2, acc3; /* Accumulators */
emilmont 1:fdd22bb7aa52 338 q15_t *pb; /* Temporary pointer for coefficient buffer */
emilmont 1:fdd22bb7aa52 339 q15_t *px; /* Temporary q31 pointer for SIMD state buffer accesses */
emilmont 1:fdd22bb7aa52 340 q31_t x0, x1, x2, c0; /* Temporary variables to hold SIMD state and coefficient values */
emilmont 1:fdd22bb7aa52 341 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
emilmont 1:fdd22bb7aa52 342 uint32_t tapCnt, blkCnt; /* Loop counters */
emilmont 1:fdd22bb7aa52 343
emilmont 1:fdd22bb7aa52 344
emilmont 1:fdd22bb7aa52 345 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
emilmont 1:fdd22bb7aa52 346 /* pStateCurnt points to the location where the new input data should be written */
emilmont 1:fdd22bb7aa52 347 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emilmont 1:fdd22bb7aa52 348
emilmont 1:fdd22bb7aa52 349 /* Apply loop unrolling and compute 4 output values simultaneously.
emilmont 1:fdd22bb7aa52 350 * The variables acc0 ... acc3 hold output values that are being computed:
emilmont 1:fdd22bb7aa52 351 *
emilmont 1:fdd22bb7aa52 352 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
emilmont 1:fdd22bb7aa52 353 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
emilmont 1:fdd22bb7aa52 354 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
emilmont 1:fdd22bb7aa52 355 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
emilmont 1:fdd22bb7aa52 356 */
emilmont 1:fdd22bb7aa52 357
emilmont 1:fdd22bb7aa52 358 blkCnt = blockSize >> 2;
emilmont 1:fdd22bb7aa52 359
emilmont 1:fdd22bb7aa52 360 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
emilmont 1:fdd22bb7aa52 361 ** a second loop below computes the remaining 1 to 3 samples. */
emilmont 1:fdd22bb7aa52 362 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 363 {
emilmont 1:fdd22bb7aa52 364 /* Copy four new input samples into the state buffer.
emilmont 1:fdd22bb7aa52 365 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
emilmont 1:fdd22bb7aa52 366 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 367 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 368 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 369 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 370
emilmont 1:fdd22bb7aa52 371
emilmont 1:fdd22bb7aa52 372 /* Set all accumulators to zero */
emilmont 1:fdd22bb7aa52 373 acc0 = 0;
emilmont 1:fdd22bb7aa52 374 acc1 = 0;
emilmont 1:fdd22bb7aa52 375 acc2 = 0;
emilmont 1:fdd22bb7aa52 376 acc3 = 0;
emilmont 1:fdd22bb7aa52 377
emilmont 1:fdd22bb7aa52 378 /* Typecast q15_t pointer to q31_t pointer for state reading in q31_t */
emilmont 1:fdd22bb7aa52 379 px = pState;
emilmont 1:fdd22bb7aa52 380
emilmont 1:fdd22bb7aa52 381 /* Typecast q15_t pointer to q31_t pointer for coefficient reading in q31_t */
emilmont 1:fdd22bb7aa52 382 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 383
emilmont 1:fdd22bb7aa52 384 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
emilmont 1:fdd22bb7aa52 385 x0 = *__SIMD32(px)++;
emilmont 1:fdd22bb7aa52 386
emilmont 1:fdd22bb7aa52 387 /* Read the third and forth samples from the state buffer: x[n-N-2], x[n-N-3] */
emilmont 1:fdd22bb7aa52 388 x2 = *__SIMD32(px)++;
emilmont 1:fdd22bb7aa52 389
emilmont 1:fdd22bb7aa52 390 /* Loop over the number of taps. Unroll by a factor of 4.
emilmont 1:fdd22bb7aa52 391 ** Repeat until we've computed numTaps-(numTaps%4) coefficients. */
emilmont 1:fdd22bb7aa52 392 tapCnt = numTaps >> 2;
emilmont 1:fdd22bb7aa52 393
emilmont 1:fdd22bb7aa52 394 while(tapCnt > 0)
emilmont 1:fdd22bb7aa52 395 {
emilmont 1:fdd22bb7aa52 396 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
emilmont 1:fdd22bb7aa52 397 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 398
emilmont 1:fdd22bb7aa52 399 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
emilmont 1:fdd22bb7aa52 400 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 401
emilmont 1:fdd22bb7aa52 402 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
emilmont 1:fdd22bb7aa52 403 acc2 = __SMLALD(x2, c0, acc2);
emilmont 1:fdd22bb7aa52 404
emilmont 1:fdd22bb7aa52 405 /* pack x[n-N-1] and x[n-N-2] */
emilmont 1:fdd22bb7aa52 406 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 407 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 408 #else
emilmont 1:fdd22bb7aa52 409 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 410 #endif
emilmont 1:fdd22bb7aa52 411
emilmont 1:fdd22bb7aa52 412 /* Read state x[n-N-4], x[n-N-5] */
emilmont 1:fdd22bb7aa52 413 x0 = _SIMD32_OFFSET(px);
emilmont 1:fdd22bb7aa52 414
emilmont 1:fdd22bb7aa52 415 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
emilmont 1:fdd22bb7aa52 416 acc1 = __SMLALDX(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 417
emilmont 1:fdd22bb7aa52 418 /* pack x[n-N-3] and x[n-N-4] */
emilmont 1:fdd22bb7aa52 419 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 420 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 421 #else
emilmont 1:fdd22bb7aa52 422 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 423 #endif
emilmont 1:fdd22bb7aa52 424
emilmont 1:fdd22bb7aa52 425 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
emilmont 1:fdd22bb7aa52 426 acc3 = __SMLALDX(x1, c0, acc3);
emilmont 1:fdd22bb7aa52 427
emilmont 1:fdd22bb7aa52 428 /* Read coefficients b[N-2], b[N-3] */
emilmont 1:fdd22bb7aa52 429 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 430
emilmont 1:fdd22bb7aa52 431 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
emilmont 1:fdd22bb7aa52 432 acc0 = __SMLALD(x2, c0, acc0);
emilmont 1:fdd22bb7aa52 433
emilmont 1:fdd22bb7aa52 434 /* Read state x[n-N-6], x[n-N-7] with offset */
emilmont 1:fdd22bb7aa52 435 x2 = _SIMD32_OFFSET(px + 2u);
emilmont 1:fdd22bb7aa52 436
emilmont 1:fdd22bb7aa52 437 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
emilmont 1:fdd22bb7aa52 438 acc2 = __SMLALD(x0, c0, acc2);
emilmont 1:fdd22bb7aa52 439
emilmont 1:fdd22bb7aa52 440 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
emilmont 1:fdd22bb7aa52 441 acc1 = __SMLALDX(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 442
emilmont 1:fdd22bb7aa52 443 /* pack x[n-N-5] and x[n-N-6] */
emilmont 1:fdd22bb7aa52 444 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 445 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 446 #else
emilmont 1:fdd22bb7aa52 447 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 448 #endif
emilmont 1:fdd22bb7aa52 449
emilmont 1:fdd22bb7aa52 450 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
emilmont 1:fdd22bb7aa52 451 acc3 = __SMLALDX(x1, c0, acc3);
emilmont 1:fdd22bb7aa52 452
emilmont 1:fdd22bb7aa52 453 /* Update state pointer for next state reading */
emilmont 1:fdd22bb7aa52 454 px += 4u;
emilmont 1:fdd22bb7aa52 455
emilmont 1:fdd22bb7aa52 456 /* Decrement tap count */
emilmont 1:fdd22bb7aa52 457 tapCnt--;
emilmont 1:fdd22bb7aa52 458
emilmont 1:fdd22bb7aa52 459 }
emilmont 1:fdd22bb7aa52 460
emilmont 1:fdd22bb7aa52 461 /* If the filter length is not a multiple of 4, compute the remaining filter taps.
emilmont 1:fdd22bb7aa52 462 ** This is always be 2 taps since the filter length is even. */
emilmont 1:fdd22bb7aa52 463 if((numTaps & 0x3u) != 0u)
emilmont 1:fdd22bb7aa52 464 {
emilmont 1:fdd22bb7aa52 465
emilmont 1:fdd22bb7aa52 466 /* Read last two coefficients */
emilmont 1:fdd22bb7aa52 467 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 468
emilmont 1:fdd22bb7aa52 469 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 470 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 471 acc2 = __SMLALD(x2, c0, acc2);
emilmont 1:fdd22bb7aa52 472
emilmont 1:fdd22bb7aa52 473 /* pack state variables */
emilmont 1:fdd22bb7aa52 474 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 475 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 476 #else
emilmont 1:fdd22bb7aa52 477 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 478 #endif
emilmont 1:fdd22bb7aa52 479
emilmont 1:fdd22bb7aa52 480 /* Read last state variables */
emilmont 1:fdd22bb7aa52 481 x0 = *__SIMD32(px);
emilmont 1:fdd22bb7aa52 482
emilmont 1:fdd22bb7aa52 483 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 484 acc1 = __SMLALDX(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 485
emilmont 1:fdd22bb7aa52 486 /* pack state variables */
emilmont 1:fdd22bb7aa52 487 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 488 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 489 #else
emilmont 1:fdd22bb7aa52 490 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 491 #endif
emilmont 1:fdd22bb7aa52 492
emilmont 1:fdd22bb7aa52 493 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 494 acc3 = __SMLALDX(x1, c0, acc3);
emilmont 1:fdd22bb7aa52 495 }
emilmont 1:fdd22bb7aa52 496
emilmont 1:fdd22bb7aa52 497 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
emilmont 1:fdd22bb7aa52 498 ** Then store the 4 outputs in the destination buffer. */
emilmont 1:fdd22bb7aa52 499
emilmont 1:fdd22bb7aa52 500 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 501
emilmont 1:fdd22bb7aa52 502 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 503 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 504
emilmont 1:fdd22bb7aa52 505 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 506 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 507
emilmont 1:fdd22bb7aa52 508 #else
emilmont 1:fdd22bb7aa52 509
emilmont 1:fdd22bb7aa52 510 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 511 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 512
emilmont 1:fdd22bb7aa52 513 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 514 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 515
emilmont 1:fdd22bb7aa52 516 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 517
emilmont 1:fdd22bb7aa52 518 /* Advance the state pointer by 4 to process the next group of 4 samples */
emilmont 1:fdd22bb7aa52 519 pState = pState + 4;
emilmont 1:fdd22bb7aa52 520
emilmont 1:fdd22bb7aa52 521 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 522 blkCnt--;
emilmont 1:fdd22bb7aa52 523 }
emilmont 1:fdd22bb7aa52 524
emilmont 1:fdd22bb7aa52 525 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
emilmont 1:fdd22bb7aa52 526 ** No loop unrolling is used. */
emilmont 1:fdd22bb7aa52 527 blkCnt = blockSize % 0x4u;
emilmont 1:fdd22bb7aa52 528 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 529 {
emilmont 1:fdd22bb7aa52 530 /* Copy two samples into state buffer */
emilmont 1:fdd22bb7aa52 531 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 532
emilmont 1:fdd22bb7aa52 533 /* Set the accumulator to zero */
emilmont 1:fdd22bb7aa52 534 acc0 = 0;
emilmont 1:fdd22bb7aa52 535
emilmont 1:fdd22bb7aa52 536 /* Use SIMD to hold states and coefficients */
emilmont 1:fdd22bb7aa52 537 px = pState;
emilmont 1:fdd22bb7aa52 538 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 539
emilmont 1:fdd22bb7aa52 540 tapCnt = numTaps >> 1u;
emilmont 1:fdd22bb7aa52 541
emilmont 1:fdd22bb7aa52 542 do
emilmont 1:fdd22bb7aa52 543 {
emilmont 1:fdd22bb7aa52 544 acc0 += (q31_t) * px++ * *pb++;
emilmont 2:da51fb522205 545 acc0 += (q31_t) * px++ * *pb++;
emilmont 1:fdd22bb7aa52 546 tapCnt--;
emilmont 1:fdd22bb7aa52 547 }
emilmont 1:fdd22bb7aa52 548 while(tapCnt > 0u);
emilmont 1:fdd22bb7aa52 549
emilmont 1:fdd22bb7aa52 550 /* The result is in 2.30 format. Convert to 1.15 with saturation.
emilmont 1:fdd22bb7aa52 551 ** Then store the output in the destination buffer. */
emilmont 1:fdd22bb7aa52 552 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
emilmont 1:fdd22bb7aa52 553
emilmont 1:fdd22bb7aa52 554 /* Advance state pointer by 1 for the next sample */
emilmont 1:fdd22bb7aa52 555 pState = pState + 1u;
emilmont 1:fdd22bb7aa52 556
emilmont 1:fdd22bb7aa52 557 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 558 blkCnt--;
emilmont 1:fdd22bb7aa52 559 }
emilmont 1:fdd22bb7aa52 560
emilmont 1:fdd22bb7aa52 561 /* Processing is complete.
emilmont 1:fdd22bb7aa52 562 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emilmont 1:fdd22bb7aa52 563 ** This prepares the state buffer for the next function call. */
emilmont 1:fdd22bb7aa52 564
emilmont 1:fdd22bb7aa52 565 /* Points to the start of the state buffer */
emilmont 1:fdd22bb7aa52 566 pStateCurnt = S->pState;
emilmont 1:fdd22bb7aa52 567
emilmont 1:fdd22bb7aa52 568 /* Calculation of count for copying integer writes */
emilmont 1:fdd22bb7aa52 569 tapCnt = (numTaps - 1u) >> 2;
emilmont 1:fdd22bb7aa52 570
emilmont 1:fdd22bb7aa52 571 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 572 {
emilmont 1:fdd22bb7aa52 573 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 574 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 575 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 576 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 577
emilmont 1:fdd22bb7aa52 578 tapCnt--;
emilmont 1:fdd22bb7aa52 579
emilmont 1:fdd22bb7aa52 580 }
emilmont 1:fdd22bb7aa52 581
emilmont 1:fdd22bb7aa52 582 /* Calculation of count for remaining q15_t data */
emilmont 1:fdd22bb7aa52 583 tapCnt = (numTaps - 1u) % 0x4u;
emilmont 1:fdd22bb7aa52 584
emilmont 1:fdd22bb7aa52 585 /* copy remaining data */
emilmont 1:fdd22bb7aa52 586 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 587 {
emilmont 1:fdd22bb7aa52 588 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 589
emilmont 1:fdd22bb7aa52 590 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 591 tapCnt--;
emilmont 1:fdd22bb7aa52 592 }
emilmont 1:fdd22bb7aa52 593 }
emilmont 1:fdd22bb7aa52 594
emilmont 1:fdd22bb7aa52 595
emilmont 1:fdd22bb7aa52 596 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
emilmont 1:fdd22bb7aa52 597
mbed_official 3:7a284390b0ce 598 #else /* ARM_MATH_CM0_FAMILY */
emilmont 1:fdd22bb7aa52 599
emilmont 1:fdd22bb7aa52 600
emilmont 1:fdd22bb7aa52 601 /* Run the below code for Cortex-M0 */
emilmont 1:fdd22bb7aa52 602
emilmont 1:fdd22bb7aa52 603 void arm_fir_q15(
emilmont 1:fdd22bb7aa52 604 const arm_fir_instance_q15 * S,
emilmont 1:fdd22bb7aa52 605 q15_t * pSrc,
emilmont 1:fdd22bb7aa52 606 q15_t * pDst,
emilmont 1:fdd22bb7aa52 607 uint32_t blockSize)
emilmont 1:fdd22bb7aa52 608 {
emilmont 1:fdd22bb7aa52 609 q15_t *pState = S->pState; /* State pointer */
emilmont 1:fdd22bb7aa52 610 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emilmont 1:fdd22bb7aa52 611 q15_t *pStateCurnt; /* Points to the current sample of the state */
emilmont 1:fdd22bb7aa52 612
emilmont 1:fdd22bb7aa52 613
emilmont 1:fdd22bb7aa52 614
emilmont 1:fdd22bb7aa52 615 q15_t *px; /* Temporary pointer for state buffer */
emilmont 1:fdd22bb7aa52 616 q15_t *pb; /* Temporary pointer for coefficient buffer */
emilmont 1:fdd22bb7aa52 617 q63_t acc; /* Accumulator */
emilmont 1:fdd22bb7aa52 618 uint32_t numTaps = S->numTaps; /* Number of nTaps in the filter */
emilmont 1:fdd22bb7aa52 619 uint32_t tapCnt, blkCnt; /* Loop counters */
emilmont 1:fdd22bb7aa52 620
emilmont 1:fdd22bb7aa52 621 /* S->pState buffer contains previous frame (numTaps - 1) samples */
emilmont 1:fdd22bb7aa52 622 /* pStateCurnt points to the location where the new input data should be written */
emilmont 1:fdd22bb7aa52 623 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emilmont 1:fdd22bb7aa52 624
emilmont 1:fdd22bb7aa52 625 /* Initialize blkCnt with blockSize */
emilmont 1:fdd22bb7aa52 626 blkCnt = blockSize;
emilmont 1:fdd22bb7aa52 627
emilmont 1:fdd22bb7aa52 628 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 629 {
emilmont 1:fdd22bb7aa52 630 /* Copy one sample at a time into state buffer */
emilmont 1:fdd22bb7aa52 631 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 632
emilmont 1:fdd22bb7aa52 633 /* Set the accumulator to zero */
emilmont 1:fdd22bb7aa52 634 acc = 0;
emilmont 1:fdd22bb7aa52 635
emilmont 1:fdd22bb7aa52 636 /* Initialize state pointer */
emilmont 1:fdd22bb7aa52 637 px = pState;
emilmont 1:fdd22bb7aa52 638
emilmont 1:fdd22bb7aa52 639 /* Initialize Coefficient pointer */
emilmont 1:fdd22bb7aa52 640 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 641
emilmont 1:fdd22bb7aa52 642 tapCnt = numTaps;
emilmont 1:fdd22bb7aa52 643
emilmont 1:fdd22bb7aa52 644 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 645 do
emilmont 1:fdd22bb7aa52 646 {
emilmont 1:fdd22bb7aa52 647 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
emilmont 1:fdd22bb7aa52 648 acc += (q31_t) * px++ * *pb++;
emilmont 1:fdd22bb7aa52 649 tapCnt--;
emilmont 1:fdd22bb7aa52 650 } while(tapCnt > 0u);
emilmont 1:fdd22bb7aa52 651
emilmont 1:fdd22bb7aa52 652 /* The result is in 2.30 format. Convert to 1.15
emilmont 1:fdd22bb7aa52 653 ** Then store the output in the destination buffer. */
emilmont 1:fdd22bb7aa52 654 *pDst++ = (q15_t) __SSAT((acc >> 15u), 16);
emilmont 1:fdd22bb7aa52 655
emilmont 1:fdd22bb7aa52 656 /* Advance state pointer by 1 for the next sample */
emilmont 1:fdd22bb7aa52 657 pState = pState + 1;
emilmont 1:fdd22bb7aa52 658
emilmont 1:fdd22bb7aa52 659 /* Decrement the samples loop counter */
emilmont 1:fdd22bb7aa52 660 blkCnt--;
emilmont 1:fdd22bb7aa52 661 }
emilmont 1:fdd22bb7aa52 662
emilmont 1:fdd22bb7aa52 663 /* Processing is complete.
emilmont 1:fdd22bb7aa52 664 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emilmont 1:fdd22bb7aa52 665 ** This prepares the state buffer for the next function call. */
emilmont 1:fdd22bb7aa52 666
emilmont 1:fdd22bb7aa52 667 /* Points to the start of the state buffer */
emilmont 1:fdd22bb7aa52 668 pStateCurnt = S->pState;
emilmont 1:fdd22bb7aa52 669
emilmont 1:fdd22bb7aa52 670 /* Copy numTaps number of values */
emilmont 1:fdd22bb7aa52 671 tapCnt = (numTaps - 1u);
emilmont 1:fdd22bb7aa52 672
emilmont 1:fdd22bb7aa52 673 /* copy data */
emilmont 1:fdd22bb7aa52 674 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 675 {
emilmont 1:fdd22bb7aa52 676 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 677
emilmont 1:fdd22bb7aa52 678 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 679 tapCnt--;
emilmont 1:fdd22bb7aa52 680 }
emilmont 1:fdd22bb7aa52 681
emilmont 1:fdd22bb7aa52 682 }
emilmont 1:fdd22bb7aa52 683
mbed_official 3:7a284390b0ce 684 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
emilmont 1:fdd22bb7aa52 685
emilmont 1:fdd22bb7aa52 686
emilmont 1:fdd22bb7aa52 687
emilmont 1:fdd22bb7aa52 688
emilmont 1:fdd22bb7aa52 689 /**
emilmont 1:fdd22bb7aa52 690 * @} end of FIR group
emilmont 1:fdd22bb7aa52 691 */