CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Legacy Warning

This is an mbed 2 library. To learn more about mbed OS 5, visit the docs.

Committer:
emilmont
Date:
Wed Nov 28 12:30:09 2012 +0000
Revision:
1:fdd22bb7aa52
Child:
2:da51fb522205
DSP library code

Who changed what in which revision?

UserRevisionLine numberNew contents of line
emilmont 1:fdd22bb7aa52 1 /* ----------------------------------------------------------------------
emilmont 1:fdd22bb7aa52 2 * Copyright (C) 2010 ARM Limited. All rights reserved.
emilmont 1:fdd22bb7aa52 3 *
emilmont 1:fdd22bb7aa52 4 * $Date: 15. February 2012
emilmont 1:fdd22bb7aa52 5 * $Revision: V1.1.0
emilmont 1:fdd22bb7aa52 6 *
emilmont 1:fdd22bb7aa52 7 * Project: CMSIS DSP Library
emilmont 1:fdd22bb7aa52 8 * Title: arm_fir_q15.c
emilmont 1:fdd22bb7aa52 9 *
emilmont 1:fdd22bb7aa52 10 * Description: Q15 FIR filter processing function.
emilmont 1:fdd22bb7aa52 11 *
emilmont 1:fdd22bb7aa52 12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
emilmont 1:fdd22bb7aa52 13 *
emilmont 1:fdd22bb7aa52 14 * Version 1.1.0 2012/02/15
emilmont 1:fdd22bb7aa52 15 * Updated with more optimizations, bug fixes and minor API changes.
emilmont 1:fdd22bb7aa52 16 *
emilmont 1:fdd22bb7aa52 17 * Version 1.0.10 2011/7/15
emilmont 1:fdd22bb7aa52 18 * Big Endian support added and Merged M0 and M3/M4 Source code.
emilmont 1:fdd22bb7aa52 19 *
emilmont 1:fdd22bb7aa52 20 * Version 1.0.3 2010/11/29
emilmont 1:fdd22bb7aa52 21 * Re-organized the CMSIS folders and updated documentation.
emilmont 1:fdd22bb7aa52 22 *
emilmont 1:fdd22bb7aa52 23 * Version 1.0.2 2010/11/11
emilmont 1:fdd22bb7aa52 24 * Documentation updated.
emilmont 1:fdd22bb7aa52 25 *
emilmont 1:fdd22bb7aa52 26 * Version 1.0.1 2010/10/05
emilmont 1:fdd22bb7aa52 27 * Production release and review comments incorporated.
emilmont 1:fdd22bb7aa52 28 *
emilmont 1:fdd22bb7aa52 29 * Version 1.0.0 2010/09/20
emilmont 1:fdd22bb7aa52 30 * Production release and review comments incorporated.
emilmont 1:fdd22bb7aa52 31 *
emilmont 1:fdd22bb7aa52 32 * Version 0.0.5 2010/04/26
emilmont 1:fdd22bb7aa52 33 * incorporated review comments and updated with latest CMSIS layer
emilmont 1:fdd22bb7aa52 34 *
emilmont 1:fdd22bb7aa52 35 * Version 0.0.3 2010/03/10
emilmont 1:fdd22bb7aa52 36 * Initial version
emilmont 1:fdd22bb7aa52 37 * -------------------------------------------------------------------- */
emilmont 1:fdd22bb7aa52 38
emilmont 1:fdd22bb7aa52 39 #include "arm_math.h"
emilmont 1:fdd22bb7aa52 40
emilmont 1:fdd22bb7aa52 41 /**
emilmont 1:fdd22bb7aa52 42 * @ingroup groupFilters
emilmont 1:fdd22bb7aa52 43 */
emilmont 1:fdd22bb7aa52 44
emilmont 1:fdd22bb7aa52 45 /**
emilmont 1:fdd22bb7aa52 46 * @addtogroup FIR
emilmont 1:fdd22bb7aa52 47 * @{
emilmont 1:fdd22bb7aa52 48 */
emilmont 1:fdd22bb7aa52 49
emilmont 1:fdd22bb7aa52 50 /**
emilmont 1:fdd22bb7aa52 51 * @brief Processing function for the Q15 FIR filter.
emilmont 1:fdd22bb7aa52 52 * @param[in] *S points to an instance of the Q15 FIR structure.
emilmont 1:fdd22bb7aa52 53 * @param[in] *pSrc points to the block of input data.
emilmont 1:fdd22bb7aa52 54 * @param[out] *pDst points to the block of output data.
emilmont 1:fdd22bb7aa52 55 * @param[in] blockSize number of samples to process per call.
emilmont 1:fdd22bb7aa52 56 * @return none.
emilmont 1:fdd22bb7aa52 57 *
emilmont 1:fdd22bb7aa52 58 *
emilmont 1:fdd22bb7aa52 59 * \par Restrictions
emilmont 1:fdd22bb7aa52 60 * If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE
emilmont 1:fdd22bb7aa52 61 * In this case input, output, state buffers should be aligned by 32-bit
emilmont 1:fdd22bb7aa52 62 *
emilmont 1:fdd22bb7aa52 63 * <b>Scaling and Overflow Behavior:</b>
emilmont 1:fdd22bb7aa52 64 * \par
emilmont 1:fdd22bb7aa52 65 * The function is implemented using a 64-bit internal accumulator.
emilmont 1:fdd22bb7aa52 66 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
emilmont 1:fdd22bb7aa52 67 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
emilmont 1:fdd22bb7aa52 68 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
emilmont 1:fdd22bb7aa52 69 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
emilmont 1:fdd22bb7aa52 70 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
emilmont 1:fdd22bb7aa52 71 *
emilmont 1:fdd22bb7aa52 72 * \par
emilmont 1:fdd22bb7aa52 73 * Refer to the function <code>arm_fir_fast_q15()</code> for a faster but less precise implementation of this function.
emilmont 1:fdd22bb7aa52 74 */
emilmont 1:fdd22bb7aa52 75
emilmont 1:fdd22bb7aa52 76 #ifndef ARM_MATH_CM0
emilmont 1:fdd22bb7aa52 77
emilmont 1:fdd22bb7aa52 78 /* Run the below code for Cortex-M4 and Cortex-M3 */
emilmont 1:fdd22bb7aa52 79
emilmont 1:fdd22bb7aa52 80 #ifndef UNALIGNED_SUPPORT_DISABLE
emilmont 1:fdd22bb7aa52 81
emilmont 1:fdd22bb7aa52 82
emilmont 1:fdd22bb7aa52 83 void arm_fir_q15(
emilmont 1:fdd22bb7aa52 84 const arm_fir_instance_q15 * S,
emilmont 1:fdd22bb7aa52 85 q15_t * pSrc,
emilmont 1:fdd22bb7aa52 86 q15_t * pDst,
emilmont 1:fdd22bb7aa52 87 uint32_t blockSize)
emilmont 1:fdd22bb7aa52 88 {
emilmont 1:fdd22bb7aa52 89 q15_t *pState = S->pState; /* State pointer */
emilmont 1:fdd22bb7aa52 90 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emilmont 1:fdd22bb7aa52 91 q15_t *pStateCurnt; /* Points to the current sample of the state */
emilmont 1:fdd22bb7aa52 92 q15_t *px1; /* Temporary q15 pointer for state buffer */
emilmont 1:fdd22bb7aa52 93 q15_t *pb; /* Temporary pointer for coefficient buffer */
emilmont 1:fdd22bb7aa52 94 q31_t x0, x1, x2, x3, c0; /* Temporary variables to hold SIMD state and coefficient values */
emilmont 1:fdd22bb7aa52 95 q63_t acc0, acc1, acc2, acc3; /* Accumulators */
emilmont 1:fdd22bb7aa52 96 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
emilmont 1:fdd22bb7aa52 97 uint32_t tapCnt, blkCnt; /* Loop counters */
emilmont 1:fdd22bb7aa52 98
emilmont 1:fdd22bb7aa52 99
emilmont 1:fdd22bb7aa52 100 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
emilmont 1:fdd22bb7aa52 101 /* pStateCurnt points to the location where the new input data should be written */
emilmont 1:fdd22bb7aa52 102 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emilmont 1:fdd22bb7aa52 103
emilmont 1:fdd22bb7aa52 104 /* Apply loop unrolling and compute 4 output values simultaneously.
emilmont 1:fdd22bb7aa52 105 * The variables acc0 ... acc3 hold output values that are being computed:
emilmont 1:fdd22bb7aa52 106 *
emilmont 1:fdd22bb7aa52 107 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
emilmont 1:fdd22bb7aa52 108 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
emilmont 1:fdd22bb7aa52 109 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
emilmont 1:fdd22bb7aa52 110 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
emilmont 1:fdd22bb7aa52 111 */
emilmont 1:fdd22bb7aa52 112
emilmont 1:fdd22bb7aa52 113 blkCnt = blockSize >> 2;
emilmont 1:fdd22bb7aa52 114
emilmont 1:fdd22bb7aa52 115 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
emilmont 1:fdd22bb7aa52 116 ** a second loop below computes the remaining 1 to 3 samples. */
emilmont 1:fdd22bb7aa52 117 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 118 {
emilmont 1:fdd22bb7aa52 119 /* Copy four new input samples into the state buffer.
emilmont 1:fdd22bb7aa52 120 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
emilmont 1:fdd22bb7aa52 121 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
emilmont 1:fdd22bb7aa52 122 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
emilmont 1:fdd22bb7aa52 123
emilmont 1:fdd22bb7aa52 124 /* Set all accumulators to zero */
emilmont 1:fdd22bb7aa52 125 acc0 = 0;
emilmont 1:fdd22bb7aa52 126 acc1 = 0;
emilmont 1:fdd22bb7aa52 127 acc2 = 0;
emilmont 1:fdd22bb7aa52 128 acc3 = 0;
emilmont 1:fdd22bb7aa52 129
emilmont 1:fdd22bb7aa52 130 /* Initialize state pointer of type q15 */
emilmont 1:fdd22bb7aa52 131 px1 = pState;
emilmont 1:fdd22bb7aa52 132
emilmont 1:fdd22bb7aa52 133 /* Initialize coeff pointer of type q31 */
emilmont 1:fdd22bb7aa52 134 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 135
emilmont 1:fdd22bb7aa52 136 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
emilmont 1:fdd22bb7aa52 137 x0 = _SIMD32_OFFSET(px1);
emilmont 1:fdd22bb7aa52 138
emilmont 1:fdd22bb7aa52 139 /* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */
emilmont 1:fdd22bb7aa52 140 x1 = _SIMD32_OFFSET(px1 + 1u);
emilmont 1:fdd22bb7aa52 141
emilmont 1:fdd22bb7aa52 142 px1 += 2u;
emilmont 1:fdd22bb7aa52 143
emilmont 1:fdd22bb7aa52 144 /* Loop over the number of taps. Unroll by a factor of 4.
emilmont 1:fdd22bb7aa52 145 ** Repeat until we've computed numTaps-4 coefficients. */
emilmont 1:fdd22bb7aa52 146 tapCnt = numTaps >> 2;
emilmont 1:fdd22bb7aa52 147
emilmont 1:fdd22bb7aa52 148 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 149 {
emilmont 1:fdd22bb7aa52 150 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
emilmont 1:fdd22bb7aa52 151 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 152
emilmont 1:fdd22bb7aa52 153 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
emilmont 1:fdd22bb7aa52 154 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 155
emilmont 1:fdd22bb7aa52 156 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
emilmont 1:fdd22bb7aa52 157 acc1 = __SMLALD(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 158
emilmont 1:fdd22bb7aa52 159 /* Read state x[n-N-2], x[n-N-3] */
emilmont 1:fdd22bb7aa52 160 x2 = _SIMD32_OFFSET(px1);
emilmont 1:fdd22bb7aa52 161
emilmont 1:fdd22bb7aa52 162 /* Read state x[n-N-3], x[n-N-4] */
emilmont 1:fdd22bb7aa52 163 x3 = _SIMD32_OFFSET(px1 + 1u);
emilmont 1:fdd22bb7aa52 164
emilmont 1:fdd22bb7aa52 165 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
emilmont 1:fdd22bb7aa52 166 acc2 = __SMLALD(x2, c0, acc2);
emilmont 1:fdd22bb7aa52 167
emilmont 1:fdd22bb7aa52 168 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
emilmont 1:fdd22bb7aa52 169 acc3 = __SMLALD(x3, c0, acc3);
emilmont 1:fdd22bb7aa52 170
emilmont 1:fdd22bb7aa52 171 /* Read coefficients b[N-2], b[N-3] */
emilmont 1:fdd22bb7aa52 172 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 173
emilmont 1:fdd22bb7aa52 174 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
emilmont 1:fdd22bb7aa52 175 acc0 = __SMLALD(x2, c0, acc0);
emilmont 1:fdd22bb7aa52 176
emilmont 1:fdd22bb7aa52 177 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
emilmont 1:fdd22bb7aa52 178 acc1 = __SMLALD(x3, c0, acc1);
emilmont 1:fdd22bb7aa52 179
emilmont 1:fdd22bb7aa52 180 /* Read state x[n-N-4], x[n-N-5] */
emilmont 1:fdd22bb7aa52 181 x0 = _SIMD32_OFFSET(px1 + 2u);
emilmont 1:fdd22bb7aa52 182
emilmont 1:fdd22bb7aa52 183 /* Read state x[n-N-5], x[n-N-6] */
emilmont 1:fdd22bb7aa52 184 x1 = _SIMD32_OFFSET(px1 + 3u);
emilmont 1:fdd22bb7aa52 185
emilmont 1:fdd22bb7aa52 186 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
emilmont 1:fdd22bb7aa52 187 acc2 = __SMLALD(x0, c0, acc2);
emilmont 1:fdd22bb7aa52 188
emilmont 1:fdd22bb7aa52 189 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
emilmont 1:fdd22bb7aa52 190 acc3 = __SMLALD(x1, c0, acc3);
emilmont 1:fdd22bb7aa52 191
emilmont 1:fdd22bb7aa52 192 px1 += 4u;
emilmont 1:fdd22bb7aa52 193
emilmont 1:fdd22bb7aa52 194 tapCnt--;
emilmont 1:fdd22bb7aa52 195
emilmont 1:fdd22bb7aa52 196 }
emilmont 1:fdd22bb7aa52 197
emilmont 1:fdd22bb7aa52 198
emilmont 1:fdd22bb7aa52 199 /* If the filter length is not a multiple of 4, compute the remaining filter taps.
emilmont 1:fdd22bb7aa52 200 ** This is always be 2 taps since the filter length is even. */
emilmont 1:fdd22bb7aa52 201 if((numTaps & 0x3u) != 0u)
emilmont 1:fdd22bb7aa52 202 {
emilmont 1:fdd22bb7aa52 203 /* Read 2 coefficients */
emilmont 1:fdd22bb7aa52 204 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 205
emilmont 1:fdd22bb7aa52 206 /* Fetch 4 state variables */
emilmont 1:fdd22bb7aa52 207 x2 = _SIMD32_OFFSET(px1);
emilmont 1:fdd22bb7aa52 208
emilmont 1:fdd22bb7aa52 209 x3 = _SIMD32_OFFSET(px1 + 1u);
emilmont 1:fdd22bb7aa52 210
emilmont 1:fdd22bb7aa52 211 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 212 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 213
emilmont 1:fdd22bb7aa52 214 px1 += 2u;
emilmont 1:fdd22bb7aa52 215
emilmont 1:fdd22bb7aa52 216 acc1 = __SMLALD(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 217 acc2 = __SMLALD(x2, c0, acc2);
emilmont 1:fdd22bb7aa52 218 acc3 = __SMLALD(x3, c0, acc3);
emilmont 1:fdd22bb7aa52 219 }
emilmont 1:fdd22bb7aa52 220
emilmont 1:fdd22bb7aa52 221 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
emilmont 1:fdd22bb7aa52 222 ** Then store the 4 outputs in the destination buffer. */
emilmont 1:fdd22bb7aa52 223
emilmont 1:fdd22bb7aa52 224 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 225
emilmont 1:fdd22bb7aa52 226 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 227 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 228 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 229 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 230
emilmont 1:fdd22bb7aa52 231 #else
emilmont 1:fdd22bb7aa52 232
emilmont 1:fdd22bb7aa52 233 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 234 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 235 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 236 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 237
emilmont 1:fdd22bb7aa52 238 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 239
emilmont 1:fdd22bb7aa52 240
emilmont 1:fdd22bb7aa52 241
emilmont 1:fdd22bb7aa52 242 /* Advance the state pointer by 4 to process the next group of 4 samples */
emilmont 1:fdd22bb7aa52 243 pState = pState + 4;
emilmont 1:fdd22bb7aa52 244
emilmont 1:fdd22bb7aa52 245 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 246 blkCnt--;
emilmont 1:fdd22bb7aa52 247 }
emilmont 1:fdd22bb7aa52 248
emilmont 1:fdd22bb7aa52 249 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
emilmont 1:fdd22bb7aa52 250 ** No loop unrolling is used. */
emilmont 1:fdd22bb7aa52 251 blkCnt = blockSize % 0x4u;
emilmont 1:fdd22bb7aa52 252 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 253 {
emilmont 1:fdd22bb7aa52 254 /* Copy two samples into state buffer */
emilmont 1:fdd22bb7aa52 255 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 256
emilmont 1:fdd22bb7aa52 257 /* Set the accumulator to zero */
emilmont 1:fdd22bb7aa52 258 acc0 = 0;
emilmont 1:fdd22bb7aa52 259
emilmont 1:fdd22bb7aa52 260 /* Initialize state pointer of type q15 */
emilmont 1:fdd22bb7aa52 261 px1 = pState;
emilmont 1:fdd22bb7aa52 262
emilmont 1:fdd22bb7aa52 263 /* Initialize coeff pointer of type q31 */
emilmont 1:fdd22bb7aa52 264 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 265
emilmont 1:fdd22bb7aa52 266 tapCnt = numTaps >> 1;
emilmont 1:fdd22bb7aa52 267
emilmont 1:fdd22bb7aa52 268 do
emilmont 1:fdd22bb7aa52 269 {
emilmont 1:fdd22bb7aa52 270
emilmont 1:fdd22bb7aa52 271 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 272 x0 = *__SIMD32(px1)++;
emilmont 1:fdd22bb7aa52 273
emilmont 1:fdd22bb7aa52 274 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 275 tapCnt--;
emilmont 1:fdd22bb7aa52 276 }
emilmont 1:fdd22bb7aa52 277 while(tapCnt > 0u);
emilmont 1:fdd22bb7aa52 278
emilmont 1:fdd22bb7aa52 279 /* The result is in 2.30 format. Convert to 1.15 with saturation.
emilmont 1:fdd22bb7aa52 280 ** Then store the output in the destination buffer. */
emilmont 1:fdd22bb7aa52 281 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
emilmont 1:fdd22bb7aa52 282
emilmont 1:fdd22bb7aa52 283 /* Advance state pointer by 1 for the next sample */
emilmont 1:fdd22bb7aa52 284 pState = pState + 1;
emilmont 1:fdd22bb7aa52 285
emilmont 1:fdd22bb7aa52 286 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 287 blkCnt--;
emilmont 1:fdd22bb7aa52 288 }
emilmont 1:fdd22bb7aa52 289
emilmont 1:fdd22bb7aa52 290 /* Processing is complete.
emilmont 1:fdd22bb7aa52 291 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emilmont 1:fdd22bb7aa52 292 ** This prepares the state buffer for the next function call. */
emilmont 1:fdd22bb7aa52 293
emilmont 1:fdd22bb7aa52 294 /* Points to the start of the state buffer */
emilmont 1:fdd22bb7aa52 295 pStateCurnt = S->pState;
emilmont 1:fdd22bb7aa52 296
emilmont 1:fdd22bb7aa52 297 /* Calculation of count for copying integer writes */
emilmont 1:fdd22bb7aa52 298 tapCnt = (numTaps - 1u) >> 2;
emilmont 1:fdd22bb7aa52 299
emilmont 1:fdd22bb7aa52 300 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 301 {
emilmont 1:fdd22bb7aa52 302
emilmont 1:fdd22bb7aa52 303 /* Copy state values to start of state buffer */
emilmont 1:fdd22bb7aa52 304 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
emilmont 1:fdd22bb7aa52 305 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
emilmont 1:fdd22bb7aa52 306
emilmont 1:fdd22bb7aa52 307 tapCnt--;
emilmont 1:fdd22bb7aa52 308
emilmont 1:fdd22bb7aa52 309 }
emilmont 1:fdd22bb7aa52 310
emilmont 1:fdd22bb7aa52 311 /* Calculation of count for remaining q15_t data */
emilmont 1:fdd22bb7aa52 312 tapCnt = (numTaps - 1u) % 0x4u;
emilmont 1:fdd22bb7aa52 313
emilmont 1:fdd22bb7aa52 314 /* copy remaining data */
emilmont 1:fdd22bb7aa52 315 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 316 {
emilmont 1:fdd22bb7aa52 317 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 318
emilmont 1:fdd22bb7aa52 319 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 320 tapCnt--;
emilmont 1:fdd22bb7aa52 321 }
emilmont 1:fdd22bb7aa52 322 }
emilmont 1:fdd22bb7aa52 323
emilmont 1:fdd22bb7aa52 324 #else /* UNALIGNED_SUPPORT_DISABLE */
emilmont 1:fdd22bb7aa52 325
emilmont 1:fdd22bb7aa52 326 void arm_fir_q15(
emilmont 1:fdd22bb7aa52 327 const arm_fir_instance_q15 * S,
emilmont 1:fdd22bb7aa52 328 q15_t * pSrc,
emilmont 1:fdd22bb7aa52 329 q15_t * pDst,
emilmont 1:fdd22bb7aa52 330 uint32_t blockSize)
emilmont 1:fdd22bb7aa52 331 {
emilmont 1:fdd22bb7aa52 332 q15_t *pState = S->pState; /* State pointer */
emilmont 1:fdd22bb7aa52 333 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emilmont 1:fdd22bb7aa52 334 q15_t *pStateCurnt; /* Points to the current sample of the state */
emilmont 1:fdd22bb7aa52 335 q63_t acc0, acc1, acc2, acc3; /* Accumulators */
emilmont 1:fdd22bb7aa52 336 q15_t *pb; /* Temporary pointer for coefficient buffer */
emilmont 1:fdd22bb7aa52 337 q15_t *px; /* Temporary q31 pointer for SIMD state buffer accesses */
emilmont 1:fdd22bb7aa52 338 q31_t x0, x1, x2, c0; /* Temporary variables to hold SIMD state and coefficient values */
emilmont 1:fdd22bb7aa52 339 uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
emilmont 1:fdd22bb7aa52 340 uint32_t tapCnt, blkCnt; /* Loop counters */
emilmont 1:fdd22bb7aa52 341
emilmont 1:fdd22bb7aa52 342
emilmont 1:fdd22bb7aa52 343 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
emilmont 1:fdd22bb7aa52 344 /* pStateCurnt points to the location where the new input data should be written */
emilmont 1:fdd22bb7aa52 345 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emilmont 1:fdd22bb7aa52 346
emilmont 1:fdd22bb7aa52 347 /* Apply loop unrolling and compute 4 output values simultaneously.
emilmont 1:fdd22bb7aa52 348 * The variables acc0 ... acc3 hold output values that are being computed:
emilmont 1:fdd22bb7aa52 349 *
emilmont 1:fdd22bb7aa52 350 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
emilmont 1:fdd22bb7aa52 351 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
emilmont 1:fdd22bb7aa52 352 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
emilmont 1:fdd22bb7aa52 353 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
emilmont 1:fdd22bb7aa52 354 */
emilmont 1:fdd22bb7aa52 355
emilmont 1:fdd22bb7aa52 356 blkCnt = blockSize >> 2;
emilmont 1:fdd22bb7aa52 357
emilmont 1:fdd22bb7aa52 358 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
emilmont 1:fdd22bb7aa52 359 ** a second loop below computes the remaining 1 to 3 samples. */
emilmont 1:fdd22bb7aa52 360 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 361 {
emilmont 1:fdd22bb7aa52 362 /* Copy four new input samples into the state buffer.
emilmont 1:fdd22bb7aa52 363 ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
emilmont 1:fdd22bb7aa52 364 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 365 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 366 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 367 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 368
emilmont 1:fdd22bb7aa52 369
emilmont 1:fdd22bb7aa52 370 /* Set all accumulators to zero */
emilmont 1:fdd22bb7aa52 371 acc0 = 0;
emilmont 1:fdd22bb7aa52 372 acc1 = 0;
emilmont 1:fdd22bb7aa52 373 acc2 = 0;
emilmont 1:fdd22bb7aa52 374 acc3 = 0;
emilmont 1:fdd22bb7aa52 375
emilmont 1:fdd22bb7aa52 376 /* Typecast q15_t pointer to q31_t pointer for state reading in q31_t */
emilmont 1:fdd22bb7aa52 377 px = pState;
emilmont 1:fdd22bb7aa52 378
emilmont 1:fdd22bb7aa52 379 /* Typecast q15_t pointer to q31_t pointer for coefficient reading in q31_t */
emilmont 1:fdd22bb7aa52 380 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 381
emilmont 1:fdd22bb7aa52 382 /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
emilmont 1:fdd22bb7aa52 383 x0 = *__SIMD32(px)++;
emilmont 1:fdd22bb7aa52 384
emilmont 1:fdd22bb7aa52 385 /* Read the third and forth samples from the state buffer: x[n-N-2], x[n-N-3] */
emilmont 1:fdd22bb7aa52 386 x2 = *__SIMD32(px)++;
emilmont 1:fdd22bb7aa52 387
emilmont 1:fdd22bb7aa52 388 /* Loop over the number of taps. Unroll by a factor of 4.
emilmont 1:fdd22bb7aa52 389 ** Repeat until we've computed numTaps-(numTaps%4) coefficients. */
emilmont 1:fdd22bb7aa52 390 tapCnt = numTaps >> 2;
emilmont 1:fdd22bb7aa52 391
emilmont 1:fdd22bb7aa52 392 while(tapCnt > 0)
emilmont 1:fdd22bb7aa52 393 {
emilmont 1:fdd22bb7aa52 394 /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
emilmont 1:fdd22bb7aa52 395 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 396
emilmont 1:fdd22bb7aa52 397 /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
emilmont 1:fdd22bb7aa52 398 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 399
emilmont 1:fdd22bb7aa52 400 /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
emilmont 1:fdd22bb7aa52 401 acc2 = __SMLALD(x2, c0, acc2);
emilmont 1:fdd22bb7aa52 402
emilmont 1:fdd22bb7aa52 403 /* pack x[n-N-1] and x[n-N-2] */
emilmont 1:fdd22bb7aa52 404 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 405 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 406 #else
emilmont 1:fdd22bb7aa52 407 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 408 #endif
emilmont 1:fdd22bb7aa52 409
emilmont 1:fdd22bb7aa52 410 /* Read state x[n-N-4], x[n-N-5] */
emilmont 1:fdd22bb7aa52 411 x0 = _SIMD32_OFFSET(px);
emilmont 1:fdd22bb7aa52 412
emilmont 1:fdd22bb7aa52 413 /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
emilmont 1:fdd22bb7aa52 414 acc1 = __SMLALDX(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 415
emilmont 1:fdd22bb7aa52 416 /* pack x[n-N-3] and x[n-N-4] */
emilmont 1:fdd22bb7aa52 417 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 418 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 419 #else
emilmont 1:fdd22bb7aa52 420 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 421 #endif
emilmont 1:fdd22bb7aa52 422
emilmont 1:fdd22bb7aa52 423 /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
emilmont 1:fdd22bb7aa52 424 acc3 = __SMLALDX(x1, c0, acc3);
emilmont 1:fdd22bb7aa52 425
emilmont 1:fdd22bb7aa52 426 /* Read coefficients b[N-2], b[N-3] */
emilmont 1:fdd22bb7aa52 427 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 428
emilmont 1:fdd22bb7aa52 429 /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
emilmont 1:fdd22bb7aa52 430 acc0 = __SMLALD(x2, c0, acc0);
emilmont 1:fdd22bb7aa52 431
emilmont 1:fdd22bb7aa52 432 /* Read state x[n-N-6], x[n-N-7] with offset */
emilmont 1:fdd22bb7aa52 433 x2 = _SIMD32_OFFSET(px + 2u);
emilmont 1:fdd22bb7aa52 434
emilmont 1:fdd22bb7aa52 435 /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
emilmont 1:fdd22bb7aa52 436 acc2 = __SMLALD(x0, c0, acc2);
emilmont 1:fdd22bb7aa52 437
emilmont 1:fdd22bb7aa52 438 /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
emilmont 1:fdd22bb7aa52 439 acc1 = __SMLALDX(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 440
emilmont 1:fdd22bb7aa52 441 /* pack x[n-N-5] and x[n-N-6] */
emilmont 1:fdd22bb7aa52 442 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 443 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 444 #else
emilmont 1:fdd22bb7aa52 445 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 446 #endif
emilmont 1:fdd22bb7aa52 447
emilmont 1:fdd22bb7aa52 448 /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
emilmont 1:fdd22bb7aa52 449 acc3 = __SMLALDX(x1, c0, acc3);
emilmont 1:fdd22bb7aa52 450
emilmont 1:fdd22bb7aa52 451 /* Update state pointer for next state reading */
emilmont 1:fdd22bb7aa52 452 px += 4u;
emilmont 1:fdd22bb7aa52 453
emilmont 1:fdd22bb7aa52 454 /* Decrement tap count */
emilmont 1:fdd22bb7aa52 455 tapCnt--;
emilmont 1:fdd22bb7aa52 456
emilmont 1:fdd22bb7aa52 457 }
emilmont 1:fdd22bb7aa52 458
emilmont 1:fdd22bb7aa52 459 /* If the filter length is not a multiple of 4, compute the remaining filter taps.
emilmont 1:fdd22bb7aa52 460 ** This is always be 2 taps since the filter length is even. */
emilmont 1:fdd22bb7aa52 461 if((numTaps & 0x3u) != 0u)
emilmont 1:fdd22bb7aa52 462 {
emilmont 1:fdd22bb7aa52 463
emilmont 1:fdd22bb7aa52 464 /* Read last two coefficients */
emilmont 1:fdd22bb7aa52 465 c0 = *__SIMD32(pb)++;
emilmont 1:fdd22bb7aa52 466
emilmont 1:fdd22bb7aa52 467 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 468 acc0 = __SMLALD(x0, c0, acc0);
emilmont 1:fdd22bb7aa52 469 acc2 = __SMLALD(x2, c0, acc2);
emilmont 1:fdd22bb7aa52 470
emilmont 1:fdd22bb7aa52 471 /* pack state variables */
emilmont 1:fdd22bb7aa52 472 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 473 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 474 #else
emilmont 1:fdd22bb7aa52 475 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 476 #endif
emilmont 1:fdd22bb7aa52 477
emilmont 1:fdd22bb7aa52 478 /* Read last state variables */
emilmont 1:fdd22bb7aa52 479 x0 = *__SIMD32(px);
emilmont 1:fdd22bb7aa52 480
emilmont 1:fdd22bb7aa52 481 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 482 acc1 = __SMLALDX(x1, c0, acc1);
emilmont 1:fdd22bb7aa52 483
emilmont 1:fdd22bb7aa52 484 /* pack state variables */
emilmont 1:fdd22bb7aa52 485 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 486 x1 = __PKHBT(x0, x2, 0);
emilmont 1:fdd22bb7aa52 487 #else
emilmont 1:fdd22bb7aa52 488 x1 = __PKHBT(x2, x0, 0);
emilmont 1:fdd22bb7aa52 489 #endif
emilmont 1:fdd22bb7aa52 490
emilmont 1:fdd22bb7aa52 491 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 492 acc3 = __SMLALDX(x1, c0, acc3);
emilmont 1:fdd22bb7aa52 493 }
emilmont 1:fdd22bb7aa52 494
emilmont 1:fdd22bb7aa52 495 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
emilmont 1:fdd22bb7aa52 496 ** Then store the 4 outputs in the destination buffer. */
emilmont 1:fdd22bb7aa52 497
emilmont 1:fdd22bb7aa52 498 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 499
emilmont 1:fdd22bb7aa52 500 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 501 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 502
emilmont 1:fdd22bb7aa52 503 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 504 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 505
emilmont 1:fdd22bb7aa52 506 #else
emilmont 1:fdd22bb7aa52 507
emilmont 1:fdd22bb7aa52 508 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 509 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 510
emilmont 1:fdd22bb7aa52 511 *__SIMD32(pDst)++ =
emilmont 1:fdd22bb7aa52 512 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
emilmont 1:fdd22bb7aa52 513
emilmont 1:fdd22bb7aa52 514 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 515
emilmont 1:fdd22bb7aa52 516 /* Advance the state pointer by 4 to process the next group of 4 samples */
emilmont 1:fdd22bb7aa52 517 pState = pState + 4;
emilmont 1:fdd22bb7aa52 518
emilmont 1:fdd22bb7aa52 519 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 520 blkCnt--;
emilmont 1:fdd22bb7aa52 521 }
emilmont 1:fdd22bb7aa52 522
emilmont 1:fdd22bb7aa52 523 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
emilmont 1:fdd22bb7aa52 524 ** No loop unrolling is used. */
emilmont 1:fdd22bb7aa52 525 blkCnt = blockSize % 0x4u;
emilmont 1:fdd22bb7aa52 526 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 527 {
emilmont 1:fdd22bb7aa52 528 /* Copy two samples into state buffer */
emilmont 1:fdd22bb7aa52 529 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 530
emilmont 1:fdd22bb7aa52 531 /* Set the accumulator to zero */
emilmont 1:fdd22bb7aa52 532 acc0 = 0;
emilmont 1:fdd22bb7aa52 533
emilmont 1:fdd22bb7aa52 534 /* Use SIMD to hold states and coefficients */
emilmont 1:fdd22bb7aa52 535 px = pState;
emilmont 1:fdd22bb7aa52 536 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 537
emilmont 1:fdd22bb7aa52 538 tapCnt = numTaps >> 1u;
emilmont 1:fdd22bb7aa52 539
emilmont 1:fdd22bb7aa52 540 do
emilmont 1:fdd22bb7aa52 541 {
emilmont 1:fdd22bb7aa52 542 acc0 += (q31_t) * px++ * *pb++;
emilmont 1:fdd22bb7aa52 543 acc0 += (q31_t) * px++ * *pb++;
emilmont 1:fdd22bb7aa52 544 tapCnt--;
emilmont 1:fdd22bb7aa52 545 }
emilmont 1:fdd22bb7aa52 546 while(tapCnt > 0u);
emilmont 1:fdd22bb7aa52 547
emilmont 1:fdd22bb7aa52 548 /* The result is in 2.30 format. Convert to 1.15 with saturation.
emilmont 1:fdd22bb7aa52 549 ** Then store the output in the destination buffer. */
emilmont 1:fdd22bb7aa52 550 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
emilmont 1:fdd22bb7aa52 551
emilmont 1:fdd22bb7aa52 552 /* Advance state pointer by 1 for the next sample */
emilmont 1:fdd22bb7aa52 553 pState = pState + 1u;
emilmont 1:fdd22bb7aa52 554
emilmont 1:fdd22bb7aa52 555 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 556 blkCnt--;
emilmont 1:fdd22bb7aa52 557 }
emilmont 1:fdd22bb7aa52 558
emilmont 1:fdd22bb7aa52 559 /* Processing is complete.
emilmont 1:fdd22bb7aa52 560 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emilmont 1:fdd22bb7aa52 561 ** This prepares the state buffer for the next function call. */
emilmont 1:fdd22bb7aa52 562
emilmont 1:fdd22bb7aa52 563 /* Points to the start of the state buffer */
emilmont 1:fdd22bb7aa52 564 pStateCurnt = S->pState;
emilmont 1:fdd22bb7aa52 565
emilmont 1:fdd22bb7aa52 566 /* Calculation of count for copying integer writes */
emilmont 1:fdd22bb7aa52 567 tapCnt = (numTaps - 1u) >> 2;
emilmont 1:fdd22bb7aa52 568
emilmont 1:fdd22bb7aa52 569 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 570 {
emilmont 1:fdd22bb7aa52 571 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 572 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 573 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 574 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 575
emilmont 1:fdd22bb7aa52 576 tapCnt--;
emilmont 1:fdd22bb7aa52 577
emilmont 1:fdd22bb7aa52 578 }
emilmont 1:fdd22bb7aa52 579
emilmont 1:fdd22bb7aa52 580 /* Calculation of count for remaining q15_t data */
emilmont 1:fdd22bb7aa52 581 tapCnt = (numTaps - 1u) % 0x4u;
emilmont 1:fdd22bb7aa52 582
emilmont 1:fdd22bb7aa52 583 /* copy remaining data */
emilmont 1:fdd22bb7aa52 584 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 585 {
emilmont 1:fdd22bb7aa52 586 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 587
emilmont 1:fdd22bb7aa52 588 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 589 tapCnt--;
emilmont 1:fdd22bb7aa52 590 }
emilmont 1:fdd22bb7aa52 591 }
emilmont 1:fdd22bb7aa52 592
emilmont 1:fdd22bb7aa52 593
emilmont 1:fdd22bb7aa52 594 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
emilmont 1:fdd22bb7aa52 595
emilmont 1:fdd22bb7aa52 596 #else /* ARM_MATH_CM0 */
emilmont 1:fdd22bb7aa52 597
emilmont 1:fdd22bb7aa52 598
emilmont 1:fdd22bb7aa52 599 /* Run the below code for Cortex-M0 */
emilmont 1:fdd22bb7aa52 600
emilmont 1:fdd22bb7aa52 601 void arm_fir_q15(
emilmont 1:fdd22bb7aa52 602 const arm_fir_instance_q15 * S,
emilmont 1:fdd22bb7aa52 603 q15_t * pSrc,
emilmont 1:fdd22bb7aa52 604 q15_t * pDst,
emilmont 1:fdd22bb7aa52 605 uint32_t blockSize)
emilmont 1:fdd22bb7aa52 606 {
emilmont 1:fdd22bb7aa52 607 q15_t *pState = S->pState; /* State pointer */
emilmont 1:fdd22bb7aa52 608 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emilmont 1:fdd22bb7aa52 609 q15_t *pStateCurnt; /* Points to the current sample of the state */
emilmont 1:fdd22bb7aa52 610
emilmont 1:fdd22bb7aa52 611
emilmont 1:fdd22bb7aa52 612
emilmont 1:fdd22bb7aa52 613 q15_t *px; /* Temporary pointer for state buffer */
emilmont 1:fdd22bb7aa52 614 q15_t *pb; /* Temporary pointer for coefficient buffer */
emilmont 1:fdd22bb7aa52 615 q63_t acc; /* Accumulator */
emilmont 1:fdd22bb7aa52 616 uint32_t numTaps = S->numTaps; /* Number of nTaps in the filter */
emilmont 1:fdd22bb7aa52 617 uint32_t tapCnt, blkCnt; /* Loop counters */
emilmont 1:fdd22bb7aa52 618
emilmont 1:fdd22bb7aa52 619 /* S->pState buffer contains previous frame (numTaps - 1) samples */
emilmont 1:fdd22bb7aa52 620 /* pStateCurnt points to the location where the new input data should be written */
emilmont 1:fdd22bb7aa52 621 pStateCurnt = &(S->pState[(numTaps - 1u)]);
emilmont 1:fdd22bb7aa52 622
emilmont 1:fdd22bb7aa52 623 /* Initialize blkCnt with blockSize */
emilmont 1:fdd22bb7aa52 624 blkCnt = blockSize;
emilmont 1:fdd22bb7aa52 625
emilmont 1:fdd22bb7aa52 626 while(blkCnt > 0u)
emilmont 1:fdd22bb7aa52 627 {
emilmont 1:fdd22bb7aa52 628 /* Copy one sample at a time into state buffer */
emilmont 1:fdd22bb7aa52 629 *pStateCurnt++ = *pSrc++;
emilmont 1:fdd22bb7aa52 630
emilmont 1:fdd22bb7aa52 631 /* Set the accumulator to zero */
emilmont 1:fdd22bb7aa52 632 acc = 0;
emilmont 1:fdd22bb7aa52 633
emilmont 1:fdd22bb7aa52 634 /* Initialize state pointer */
emilmont 1:fdd22bb7aa52 635 px = pState;
emilmont 1:fdd22bb7aa52 636
emilmont 1:fdd22bb7aa52 637 /* Initialize Coefficient pointer */
emilmont 1:fdd22bb7aa52 638 pb = pCoeffs;
emilmont 1:fdd22bb7aa52 639
emilmont 1:fdd22bb7aa52 640 tapCnt = numTaps;
emilmont 1:fdd22bb7aa52 641
emilmont 1:fdd22bb7aa52 642 /* Perform the multiply-accumulates */
emilmont 1:fdd22bb7aa52 643 do
emilmont 1:fdd22bb7aa52 644 {
emilmont 1:fdd22bb7aa52 645 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
emilmont 1:fdd22bb7aa52 646 acc += (q31_t) * px++ * *pb++;
emilmont 1:fdd22bb7aa52 647 tapCnt--;
emilmont 1:fdd22bb7aa52 648 } while(tapCnt > 0u);
emilmont 1:fdd22bb7aa52 649
emilmont 1:fdd22bb7aa52 650 /* The result is in 2.30 format. Convert to 1.15
emilmont 1:fdd22bb7aa52 651 ** Then store the output in the destination buffer. */
emilmont 1:fdd22bb7aa52 652 *pDst++ = (q15_t) __SSAT((acc >> 15u), 16);
emilmont 1:fdd22bb7aa52 653
emilmont 1:fdd22bb7aa52 654 /* Advance state pointer by 1 for the next sample */
emilmont 1:fdd22bb7aa52 655 pState = pState + 1;
emilmont 1:fdd22bb7aa52 656
emilmont 1:fdd22bb7aa52 657 /* Decrement the samples loop counter */
emilmont 1:fdd22bb7aa52 658 blkCnt--;
emilmont 1:fdd22bb7aa52 659 }
emilmont 1:fdd22bb7aa52 660
emilmont 1:fdd22bb7aa52 661 /* Processing is complete.
emilmont 1:fdd22bb7aa52 662 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emilmont 1:fdd22bb7aa52 663 ** This prepares the state buffer for the next function call. */
emilmont 1:fdd22bb7aa52 664
emilmont 1:fdd22bb7aa52 665 /* Points to the start of the state buffer */
emilmont 1:fdd22bb7aa52 666 pStateCurnt = S->pState;
emilmont 1:fdd22bb7aa52 667
emilmont 1:fdd22bb7aa52 668 /* Copy numTaps number of values */
emilmont 1:fdd22bb7aa52 669 tapCnt = (numTaps - 1u);
emilmont 1:fdd22bb7aa52 670
emilmont 1:fdd22bb7aa52 671 /* copy data */
emilmont 1:fdd22bb7aa52 672 while(tapCnt > 0u)
emilmont 1:fdd22bb7aa52 673 {
emilmont 1:fdd22bb7aa52 674 *pStateCurnt++ = *pState++;
emilmont 1:fdd22bb7aa52 675
emilmont 1:fdd22bb7aa52 676 /* Decrement the loop counter */
emilmont 1:fdd22bb7aa52 677 tapCnt--;
emilmont 1:fdd22bb7aa52 678 }
emilmont 1:fdd22bb7aa52 679
emilmont 1:fdd22bb7aa52 680 }
emilmont 1:fdd22bb7aa52 681
emilmont 1:fdd22bb7aa52 682 #endif /* #ifndef ARM_MATH_CM0 */
emilmont 1:fdd22bb7aa52 683
emilmont 1:fdd22bb7aa52 684
emilmont 1:fdd22bb7aa52 685
emilmont 1:fdd22bb7aa52 686
emilmont 1:fdd22bb7aa52 687 /**
emilmont 1:fdd22bb7aa52 688 * @} end of FIR group
emilmont 1:fdd22bb7aa52 689 */