V4.0.1 of the ARM CMSIS DSP libraries. Note that arm_bitreversal2.s, arm_cfft_f32.c and arm_rfft_fast_f32.c had to be removed. arm_bitreversal2.s will not assemble with the online tools. So, the fast f32 FFT functions are not yet available. All the other FFT functions are available.

Dependents:   MPU9150_Example fir_f32 fir_f32 MPU9150_nucleo_noni2cdev ... more

Committer:
emh203
Date:
Mon Jul 28 15:03:15 2014 +0000
Revision:
0:3d9c67d97d6f
1st working commit.   Had to remove arm_bitreversal2.s     arm_cfft_f32.c and arm_rfft_fast_f32.c.    The .s will not assemble.      For now I removed these functions so we could at least have a library for the other functions.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
emh203 0:3d9c67d97d6f 1 /* ----------------------------------------------------------------------
emh203 0:3d9c67d97d6f 2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
emh203 0:3d9c67d97d6f 3 *
emh203 0:3d9c67d97d6f 4 * $Date: 12. March 2014
emh203 0:3d9c67d97d6f 5 * $Revision: V1.4.3
emh203 0:3d9c67d97d6f 6 *
emh203 0:3d9c67d97d6f 7 * Project: CMSIS DSP Library
emh203 0:3d9c67d97d6f 8 * Title: arm_fir_decimate_fast_q15.c
emh203 0:3d9c67d97d6f 9 *
emh203 0:3d9c67d97d6f 10 * Description: Fast Q15 FIR Decimator.
emh203 0:3d9c67d97d6f 11 *
emh203 0:3d9c67d97d6f 12 * Target Processor: Cortex-M4/Cortex-M3
emh203 0:3d9c67d97d6f 13 *
emh203 0:3d9c67d97d6f 14 * Redistribution and use in source and binary forms, with or without
emh203 0:3d9c67d97d6f 15 * modification, are permitted provided that the following conditions
emh203 0:3d9c67d97d6f 16 * are met:
emh203 0:3d9c67d97d6f 17 * - Redistributions of source code must retain the above copyright
emh203 0:3d9c67d97d6f 18 * notice, this list of conditions and the following disclaimer.
emh203 0:3d9c67d97d6f 19 * - Redistributions in binary form must reproduce the above copyright
emh203 0:3d9c67d97d6f 20 * notice, this list of conditions and the following disclaimer in
emh203 0:3d9c67d97d6f 21 * the documentation and/or other materials provided with the
emh203 0:3d9c67d97d6f 22 * distribution.
emh203 0:3d9c67d97d6f 23 * - Neither the name of ARM LIMITED nor the names of its contributors
emh203 0:3d9c67d97d6f 24 * may be used to endorse or promote products derived from this
emh203 0:3d9c67d97d6f 25 * software without specific prior written permission.
emh203 0:3d9c67d97d6f 26 *
emh203 0:3d9c67d97d6f 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
emh203 0:3d9c67d97d6f 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
emh203 0:3d9c67d97d6f 29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
emh203 0:3d9c67d97d6f 30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
emh203 0:3d9c67d97d6f 31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
emh203 0:3d9c67d97d6f 32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
emh203 0:3d9c67d97d6f 33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
emh203 0:3d9c67d97d6f 34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
emh203 0:3d9c67d97d6f 35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
emh203 0:3d9c67d97d6f 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
emh203 0:3d9c67d97d6f 37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
emh203 0:3d9c67d97d6f 38 * POSSIBILITY OF SUCH DAMAGE.
emh203 0:3d9c67d97d6f 39 * -------------------------------------------------------------------- */
emh203 0:3d9c67d97d6f 40
emh203 0:3d9c67d97d6f 41 #include "arm_math.h"
emh203 0:3d9c67d97d6f 42
emh203 0:3d9c67d97d6f 43 /**
emh203 0:3d9c67d97d6f 44 * @ingroup groupFilters
emh203 0:3d9c67d97d6f 45 */
emh203 0:3d9c67d97d6f 46
emh203 0:3d9c67d97d6f 47 /**
emh203 0:3d9c67d97d6f 48 * @addtogroup FIR_decimate
emh203 0:3d9c67d97d6f 49 * @{
emh203 0:3d9c67d97d6f 50 */
emh203 0:3d9c67d97d6f 51
emh203 0:3d9c67d97d6f 52 /**
emh203 0:3d9c67d97d6f 53 * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
emh203 0:3d9c67d97d6f 54 * @param[in] *S points to an instance of the Q15 FIR decimator structure.
emh203 0:3d9c67d97d6f 55 * @param[in] *pSrc points to the block of input data.
emh203 0:3d9c67d97d6f 56 * @param[out] *pDst points to the block of output data
emh203 0:3d9c67d97d6f 57 * @param[in] blockSize number of input samples to process per call.
emh203 0:3d9c67d97d6f 58 * @return none
emh203 0:3d9c67d97d6f 59 *
emh203 0:3d9c67d97d6f 60 * \par Restrictions
emh203 0:3d9c67d97d6f 61 * If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE
emh203 0:3d9c67d97d6f 62 * In this case input, output, state buffers should be aligned by 32-bit
emh203 0:3d9c67d97d6f 63 *
emh203 0:3d9c67d97d6f 64 * <b>Scaling and Overflow Behavior:</b>
emh203 0:3d9c67d97d6f 65 * \par
emh203 0:3d9c67d97d6f 66 * This fast version uses a 32-bit accumulator with 2.30 format.
emh203 0:3d9c67d97d6f 67 * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.
emh203 0:3d9c67d97d6f 68 * Thus, if the accumulator result overflows it wraps around and distorts the result.
emh203 0:3d9c67d97d6f 69 * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (log2 is read as log to the base 2).
emh203 0:3d9c67d97d6f 70 * The 2.30 accumulator is then truncated to 2.15 format and saturated to yield the 1.15 result.
emh203 0:3d9c67d97d6f 71 *
emh203 0:3d9c67d97d6f 72 * \par
emh203 0:3d9c67d97d6f 73 * Refer to the function <code>arm_fir_decimate_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.
emh203 0:3d9c67d97d6f 74 * Both the slow and the fast versions use the same instance structure.
emh203 0:3d9c67d97d6f 75 * Use the function <code>arm_fir_decimate_init_q15()</code> to initialize the filter structure.
emh203 0:3d9c67d97d6f 76 */
emh203 0:3d9c67d97d6f 77
emh203 0:3d9c67d97d6f 78 #ifndef UNALIGNED_SUPPORT_DISABLE
emh203 0:3d9c67d97d6f 79
emh203 0:3d9c67d97d6f 80 void arm_fir_decimate_fast_q15(
emh203 0:3d9c67d97d6f 81 const arm_fir_decimate_instance_q15 * S,
emh203 0:3d9c67d97d6f 82 q15_t * pSrc,
emh203 0:3d9c67d97d6f 83 q15_t * pDst,
emh203 0:3d9c67d97d6f 84 uint32_t blockSize)
emh203 0:3d9c67d97d6f 85 {
emh203 0:3d9c67d97d6f 86 q15_t *pState = S->pState; /* State pointer */
emh203 0:3d9c67d97d6f 87 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emh203 0:3d9c67d97d6f 88 q15_t *pStateCurnt; /* Points to the current sample of the state */
emh203 0:3d9c67d97d6f 89 q15_t *px; /* Temporary pointer for state buffer */
emh203 0:3d9c67d97d6f 90 q15_t *pb; /* Temporary pointer coefficient buffer */
emh203 0:3d9c67d97d6f 91 q31_t x0, x1, c0, c1; /* Temporary variables to hold state and coefficient values */
emh203 0:3d9c67d97d6f 92 q31_t sum0; /* Accumulators */
emh203 0:3d9c67d97d6f 93 q31_t acc0, acc1;
emh203 0:3d9c67d97d6f 94 q15_t *px0, *px1;
emh203 0:3d9c67d97d6f 95 uint32_t blkCntN3;
emh203 0:3d9c67d97d6f 96 uint32_t numTaps = S->numTaps; /* Number of taps */
emh203 0:3d9c67d97d6f 97 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
emh203 0:3d9c67d97d6f 98
emh203 0:3d9c67d97d6f 99
emh203 0:3d9c67d97d6f 100 /* S->pState buffer contains previous frame (numTaps - 1) samples */
emh203 0:3d9c67d97d6f 101 /* pStateCurnt points to the location where the new input data should be written */
emh203 0:3d9c67d97d6f 102 pStateCurnt = S->pState + (numTaps - 1u);
emh203 0:3d9c67d97d6f 103
emh203 0:3d9c67d97d6f 104
emh203 0:3d9c67d97d6f 105 /* Total number of output samples to be computed */
emh203 0:3d9c67d97d6f 106 blkCnt = outBlockSize / 2;
emh203 0:3d9c67d97d6f 107 blkCntN3 = outBlockSize - (2 * blkCnt);
emh203 0:3d9c67d97d6f 108
emh203 0:3d9c67d97d6f 109
emh203 0:3d9c67d97d6f 110 while(blkCnt > 0u)
emh203 0:3d9c67d97d6f 111 {
emh203 0:3d9c67d97d6f 112 /* Copy decimation factor number of new input samples into the state buffer */
emh203 0:3d9c67d97d6f 113 i = 2 * S->M;
emh203 0:3d9c67d97d6f 114
emh203 0:3d9c67d97d6f 115 do
emh203 0:3d9c67d97d6f 116 {
emh203 0:3d9c67d97d6f 117 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 118
emh203 0:3d9c67d97d6f 119 } while(--i);
emh203 0:3d9c67d97d6f 120
emh203 0:3d9c67d97d6f 121 /* Set accumulator to zero */
emh203 0:3d9c67d97d6f 122 acc0 = 0;
emh203 0:3d9c67d97d6f 123 acc1 = 0;
emh203 0:3d9c67d97d6f 124
emh203 0:3d9c67d97d6f 125 /* Initialize state pointer */
emh203 0:3d9c67d97d6f 126 px0 = pState;
emh203 0:3d9c67d97d6f 127
emh203 0:3d9c67d97d6f 128 px1 = pState + S->M;
emh203 0:3d9c67d97d6f 129
emh203 0:3d9c67d97d6f 130
emh203 0:3d9c67d97d6f 131 /* Initialize coeff pointer */
emh203 0:3d9c67d97d6f 132 pb = pCoeffs;
emh203 0:3d9c67d97d6f 133
emh203 0:3d9c67d97d6f 134 /* Loop unrolling. Process 4 taps at a time. */
emh203 0:3d9c67d97d6f 135 tapCnt = numTaps >> 2;
emh203 0:3d9c67d97d6f 136
emh203 0:3d9c67d97d6f 137 /* Loop over the number of taps. Unroll by a factor of 4.
emh203 0:3d9c67d97d6f 138 ** Repeat until we've computed numTaps-4 coefficients. */
emh203 0:3d9c67d97d6f 139 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 140 {
emh203 0:3d9c67d97d6f 141 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
emh203 0:3d9c67d97d6f 142 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 143
emh203 0:3d9c67d97d6f 144 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
emh203 0:3d9c67d97d6f 145 x0 = *__SIMD32(px0)++;
emh203 0:3d9c67d97d6f 146
emh203 0:3d9c67d97d6f 147 x1 = *__SIMD32(px1)++;
emh203 0:3d9c67d97d6f 148
emh203 0:3d9c67d97d6f 149 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 150 acc0 = __SMLAD(x0, c0, acc0);
emh203 0:3d9c67d97d6f 151
emh203 0:3d9c67d97d6f 152 acc1 = __SMLAD(x1, c0, acc1);
emh203 0:3d9c67d97d6f 153
emh203 0:3d9c67d97d6f 154 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
emh203 0:3d9c67d97d6f 155 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 156
emh203 0:3d9c67d97d6f 157 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
emh203 0:3d9c67d97d6f 158 x0 = *__SIMD32(px0)++;
emh203 0:3d9c67d97d6f 159
emh203 0:3d9c67d97d6f 160 x1 = *__SIMD32(px1)++;
emh203 0:3d9c67d97d6f 161
emh203 0:3d9c67d97d6f 162 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 163 acc0 = __SMLAD(x0, c0, acc0);
emh203 0:3d9c67d97d6f 164
emh203 0:3d9c67d97d6f 165 acc1 = __SMLAD(x1, c0, acc1);
emh203 0:3d9c67d97d6f 166
emh203 0:3d9c67d97d6f 167 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 168 tapCnt--;
emh203 0:3d9c67d97d6f 169 }
emh203 0:3d9c67d97d6f 170
emh203 0:3d9c67d97d6f 171 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
emh203 0:3d9c67d97d6f 172 tapCnt = numTaps % 0x4u;
emh203 0:3d9c67d97d6f 173
emh203 0:3d9c67d97d6f 174 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 175 {
emh203 0:3d9c67d97d6f 176 /* Read coefficients */
emh203 0:3d9c67d97d6f 177 c0 = *pb++;
emh203 0:3d9c67d97d6f 178
emh203 0:3d9c67d97d6f 179 /* Fetch 1 state variable */
emh203 0:3d9c67d97d6f 180 x0 = *px0++;
emh203 0:3d9c67d97d6f 181
emh203 0:3d9c67d97d6f 182 x1 = *px1++;
emh203 0:3d9c67d97d6f 183
emh203 0:3d9c67d97d6f 184 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 185 acc0 = __SMLAD(x0, c0, acc0);
emh203 0:3d9c67d97d6f 186 acc1 = __SMLAD(x1, c0, acc1);
emh203 0:3d9c67d97d6f 187
emh203 0:3d9c67d97d6f 188 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 189 tapCnt--;
emh203 0:3d9c67d97d6f 190 }
emh203 0:3d9c67d97d6f 191
emh203 0:3d9c67d97d6f 192 /* Advance the state pointer by the decimation factor
emh203 0:3d9c67d97d6f 193 * to process the next group of decimation factor number samples */
emh203 0:3d9c67d97d6f 194 pState = pState + S->M * 2;
emh203 0:3d9c67d97d6f 195
emh203 0:3d9c67d97d6f 196 /* Store filter output, smlad returns the values in 2.14 format */
emh203 0:3d9c67d97d6f 197 /* so downsacle by 15 to get output in 1.15 */
emh203 0:3d9c67d97d6f 198 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
emh203 0:3d9c67d97d6f 199 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
emh203 0:3d9c67d97d6f 200
emh203 0:3d9c67d97d6f 201 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 202 blkCnt--;
emh203 0:3d9c67d97d6f 203 }
emh203 0:3d9c67d97d6f 204
emh203 0:3d9c67d97d6f 205
emh203 0:3d9c67d97d6f 206
emh203 0:3d9c67d97d6f 207 while(blkCntN3 > 0u)
emh203 0:3d9c67d97d6f 208 {
emh203 0:3d9c67d97d6f 209 /* Copy decimation factor number of new input samples into the state buffer */
emh203 0:3d9c67d97d6f 210 i = S->M;
emh203 0:3d9c67d97d6f 211
emh203 0:3d9c67d97d6f 212 do
emh203 0:3d9c67d97d6f 213 {
emh203 0:3d9c67d97d6f 214 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 215
emh203 0:3d9c67d97d6f 216 } while(--i);
emh203 0:3d9c67d97d6f 217
emh203 0:3d9c67d97d6f 218 /*Set sum to zero */
emh203 0:3d9c67d97d6f 219 sum0 = 0;
emh203 0:3d9c67d97d6f 220
emh203 0:3d9c67d97d6f 221 /* Initialize state pointer */
emh203 0:3d9c67d97d6f 222 px = pState;
emh203 0:3d9c67d97d6f 223
emh203 0:3d9c67d97d6f 224 /* Initialize coeff pointer */
emh203 0:3d9c67d97d6f 225 pb = pCoeffs;
emh203 0:3d9c67d97d6f 226
emh203 0:3d9c67d97d6f 227 /* Loop unrolling. Process 4 taps at a time. */
emh203 0:3d9c67d97d6f 228 tapCnt = numTaps >> 2;
emh203 0:3d9c67d97d6f 229
emh203 0:3d9c67d97d6f 230 /* Loop over the number of taps. Unroll by a factor of 4.
emh203 0:3d9c67d97d6f 231 ** Repeat until we've computed numTaps-4 coefficients. */
emh203 0:3d9c67d97d6f 232 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 233 {
emh203 0:3d9c67d97d6f 234 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
emh203 0:3d9c67d97d6f 235 c0 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 236
emh203 0:3d9c67d97d6f 237 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
emh203 0:3d9c67d97d6f 238 x0 = *__SIMD32(px)++;
emh203 0:3d9c67d97d6f 239
emh203 0:3d9c67d97d6f 240 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
emh203 0:3d9c67d97d6f 241 c1 = *__SIMD32(pb)++;
emh203 0:3d9c67d97d6f 242
emh203 0:3d9c67d97d6f 243 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 244 sum0 = __SMLAD(x0, c0, sum0);
emh203 0:3d9c67d97d6f 245
emh203 0:3d9c67d97d6f 246 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
emh203 0:3d9c67d97d6f 247 x0 = *__SIMD32(px)++;
emh203 0:3d9c67d97d6f 248
emh203 0:3d9c67d97d6f 249 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 250 sum0 = __SMLAD(x0, c1, sum0);
emh203 0:3d9c67d97d6f 251
emh203 0:3d9c67d97d6f 252 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 253 tapCnt--;
emh203 0:3d9c67d97d6f 254 }
emh203 0:3d9c67d97d6f 255
emh203 0:3d9c67d97d6f 256 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
emh203 0:3d9c67d97d6f 257 tapCnt = numTaps % 0x4u;
emh203 0:3d9c67d97d6f 258
emh203 0:3d9c67d97d6f 259 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 260 {
emh203 0:3d9c67d97d6f 261 /* Read coefficients */
emh203 0:3d9c67d97d6f 262 c0 = *pb++;
emh203 0:3d9c67d97d6f 263
emh203 0:3d9c67d97d6f 264 /* Fetch 1 state variable */
emh203 0:3d9c67d97d6f 265 x0 = *px++;
emh203 0:3d9c67d97d6f 266
emh203 0:3d9c67d97d6f 267 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 268 sum0 = __SMLAD(x0, c0, sum0);
emh203 0:3d9c67d97d6f 269
emh203 0:3d9c67d97d6f 270 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 271 tapCnt--;
emh203 0:3d9c67d97d6f 272 }
emh203 0:3d9c67d97d6f 273
emh203 0:3d9c67d97d6f 274 /* Advance the state pointer by the decimation factor
emh203 0:3d9c67d97d6f 275 * to process the next group of decimation factor number samples */
emh203 0:3d9c67d97d6f 276 pState = pState + S->M;
emh203 0:3d9c67d97d6f 277
emh203 0:3d9c67d97d6f 278 /* Store filter output, smlad returns the values in 2.14 format */
emh203 0:3d9c67d97d6f 279 /* so downsacle by 15 to get output in 1.15 */
emh203 0:3d9c67d97d6f 280 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
emh203 0:3d9c67d97d6f 281
emh203 0:3d9c67d97d6f 282 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 283 blkCntN3--;
emh203 0:3d9c67d97d6f 284 }
emh203 0:3d9c67d97d6f 285
emh203 0:3d9c67d97d6f 286 /* Processing is complete.
emh203 0:3d9c67d97d6f 287 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emh203 0:3d9c67d97d6f 288 ** This prepares the state buffer for the next function call. */
emh203 0:3d9c67d97d6f 289
emh203 0:3d9c67d97d6f 290 /* Points to the start of the state buffer */
emh203 0:3d9c67d97d6f 291 pStateCurnt = S->pState;
emh203 0:3d9c67d97d6f 292
emh203 0:3d9c67d97d6f 293 i = (numTaps - 1u) >> 2u;
emh203 0:3d9c67d97d6f 294
emh203 0:3d9c67d97d6f 295 /* copy data */
emh203 0:3d9c67d97d6f 296 while(i > 0u)
emh203 0:3d9c67d97d6f 297 {
emh203 0:3d9c67d97d6f 298 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
emh203 0:3d9c67d97d6f 299 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
emh203 0:3d9c67d97d6f 300
emh203 0:3d9c67d97d6f 301 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 302 i--;
emh203 0:3d9c67d97d6f 303 }
emh203 0:3d9c67d97d6f 304
emh203 0:3d9c67d97d6f 305 i = (numTaps - 1u) % 0x04u;
emh203 0:3d9c67d97d6f 306
emh203 0:3d9c67d97d6f 307 /* copy data */
emh203 0:3d9c67d97d6f 308 while(i > 0u)
emh203 0:3d9c67d97d6f 309 {
emh203 0:3d9c67d97d6f 310 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 311
emh203 0:3d9c67d97d6f 312 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 313 i--;
emh203 0:3d9c67d97d6f 314 }
emh203 0:3d9c67d97d6f 315 }
emh203 0:3d9c67d97d6f 316
emh203 0:3d9c67d97d6f 317 #else
emh203 0:3d9c67d97d6f 318
emh203 0:3d9c67d97d6f 319
emh203 0:3d9c67d97d6f 320 void arm_fir_decimate_fast_q15(
emh203 0:3d9c67d97d6f 321 const arm_fir_decimate_instance_q15 * S,
emh203 0:3d9c67d97d6f 322 q15_t * pSrc,
emh203 0:3d9c67d97d6f 323 q15_t * pDst,
emh203 0:3d9c67d97d6f 324 uint32_t blockSize)
emh203 0:3d9c67d97d6f 325 {
emh203 0:3d9c67d97d6f 326 q15_t *pState = S->pState; /* State pointer */
emh203 0:3d9c67d97d6f 327 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
emh203 0:3d9c67d97d6f 328 q15_t *pStateCurnt; /* Points to the current sample of the state */
emh203 0:3d9c67d97d6f 329 q15_t *px; /* Temporary pointer for state buffer */
emh203 0:3d9c67d97d6f 330 q15_t *pb; /* Temporary pointer coefficient buffer */
emh203 0:3d9c67d97d6f 331 q15_t x0, x1, c0; /* Temporary variables to hold state and coefficient values */
emh203 0:3d9c67d97d6f 332 q31_t sum0; /* Accumulators */
emh203 0:3d9c67d97d6f 333 q31_t acc0, acc1;
emh203 0:3d9c67d97d6f 334 q15_t *px0, *px1;
emh203 0:3d9c67d97d6f 335 uint32_t blkCntN3;
emh203 0:3d9c67d97d6f 336 uint32_t numTaps = S->numTaps; /* Number of taps */
emh203 0:3d9c67d97d6f 337 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
emh203 0:3d9c67d97d6f 338
emh203 0:3d9c67d97d6f 339
emh203 0:3d9c67d97d6f 340 /* S->pState buffer contains previous frame (numTaps - 1) samples */
emh203 0:3d9c67d97d6f 341 /* pStateCurnt points to the location where the new input data should be written */
emh203 0:3d9c67d97d6f 342 pStateCurnt = S->pState + (numTaps - 1u);
emh203 0:3d9c67d97d6f 343
emh203 0:3d9c67d97d6f 344
emh203 0:3d9c67d97d6f 345 /* Total number of output samples to be computed */
emh203 0:3d9c67d97d6f 346 blkCnt = outBlockSize / 2;
emh203 0:3d9c67d97d6f 347 blkCntN3 = outBlockSize - (2 * blkCnt);
emh203 0:3d9c67d97d6f 348
emh203 0:3d9c67d97d6f 349 while(blkCnt > 0u)
emh203 0:3d9c67d97d6f 350 {
emh203 0:3d9c67d97d6f 351 /* Copy decimation factor number of new input samples into the state buffer */
emh203 0:3d9c67d97d6f 352 i = 2 * S->M;
emh203 0:3d9c67d97d6f 353
emh203 0:3d9c67d97d6f 354 do
emh203 0:3d9c67d97d6f 355 {
emh203 0:3d9c67d97d6f 356 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 357
emh203 0:3d9c67d97d6f 358 } while(--i);
emh203 0:3d9c67d97d6f 359
emh203 0:3d9c67d97d6f 360 /* Set accumulator to zero */
emh203 0:3d9c67d97d6f 361 acc0 = 0;
emh203 0:3d9c67d97d6f 362 acc1 = 0;
emh203 0:3d9c67d97d6f 363
emh203 0:3d9c67d97d6f 364 /* Initialize state pointer */
emh203 0:3d9c67d97d6f 365 px0 = pState;
emh203 0:3d9c67d97d6f 366
emh203 0:3d9c67d97d6f 367 px1 = pState + S->M;
emh203 0:3d9c67d97d6f 368
emh203 0:3d9c67d97d6f 369
emh203 0:3d9c67d97d6f 370 /* Initialize coeff pointer */
emh203 0:3d9c67d97d6f 371 pb = pCoeffs;
emh203 0:3d9c67d97d6f 372
emh203 0:3d9c67d97d6f 373 /* Loop unrolling. Process 4 taps at a time. */
emh203 0:3d9c67d97d6f 374 tapCnt = numTaps >> 2;
emh203 0:3d9c67d97d6f 375
emh203 0:3d9c67d97d6f 376 /* Loop over the number of taps. Unroll by a factor of 4.
emh203 0:3d9c67d97d6f 377 ** Repeat until we've computed numTaps-4 coefficients. */
emh203 0:3d9c67d97d6f 378 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 379 {
emh203 0:3d9c67d97d6f 380 /* Read the Read b[numTaps-1] coefficients */
emh203 0:3d9c67d97d6f 381 c0 = *pb++;
emh203 0:3d9c67d97d6f 382
emh203 0:3d9c67d97d6f 383 /* Read x[n-numTaps-1] for sample 0 and for sample 1 */
emh203 0:3d9c67d97d6f 384 x0 = *px0++;
emh203 0:3d9c67d97d6f 385 x1 = *px1++;
emh203 0:3d9c67d97d6f 386
emh203 0:3d9c67d97d6f 387 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 388 acc0 += x0 * c0;
emh203 0:3d9c67d97d6f 389 acc1 += x1 * c0;
emh203 0:3d9c67d97d6f 390
emh203 0:3d9c67d97d6f 391 /* Read the b[numTaps-2] coefficient */
emh203 0:3d9c67d97d6f 392 c0 = *pb++;
emh203 0:3d9c67d97d6f 393
emh203 0:3d9c67d97d6f 394 /* Read x[n-numTaps-2] for sample 0 and sample 1 */
emh203 0:3d9c67d97d6f 395 x0 = *px0++;
emh203 0:3d9c67d97d6f 396 x1 = *px1++;
emh203 0:3d9c67d97d6f 397
emh203 0:3d9c67d97d6f 398 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 399 acc0 += x0 * c0;
emh203 0:3d9c67d97d6f 400 acc1 += x1 * c0;
emh203 0:3d9c67d97d6f 401
emh203 0:3d9c67d97d6f 402 /* Read the b[numTaps-3] coefficients */
emh203 0:3d9c67d97d6f 403 c0 = *pb++;
emh203 0:3d9c67d97d6f 404
emh203 0:3d9c67d97d6f 405 /* Read x[n-numTaps-3] for sample 0 and sample 1 */
emh203 0:3d9c67d97d6f 406 x0 = *px0++;
emh203 0:3d9c67d97d6f 407 x1 = *px1++;
emh203 0:3d9c67d97d6f 408
emh203 0:3d9c67d97d6f 409 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 410 acc0 += x0 * c0;
emh203 0:3d9c67d97d6f 411 acc1 += x1 * c0;
emh203 0:3d9c67d97d6f 412
emh203 0:3d9c67d97d6f 413 /* Read the b[numTaps-4] coefficient */
emh203 0:3d9c67d97d6f 414 c0 = *pb++;
emh203 0:3d9c67d97d6f 415
emh203 0:3d9c67d97d6f 416 /* Read x[n-numTaps-4] for sample 0 and sample 1 */
emh203 0:3d9c67d97d6f 417 x0 = *px0++;
emh203 0:3d9c67d97d6f 418 x1 = *px1++;
emh203 0:3d9c67d97d6f 419
emh203 0:3d9c67d97d6f 420 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 421 acc0 += x0 * c0;
emh203 0:3d9c67d97d6f 422 acc1 += x1 * c0;
emh203 0:3d9c67d97d6f 423
emh203 0:3d9c67d97d6f 424 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 425 tapCnt--;
emh203 0:3d9c67d97d6f 426 }
emh203 0:3d9c67d97d6f 427
emh203 0:3d9c67d97d6f 428 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
emh203 0:3d9c67d97d6f 429 tapCnt = numTaps % 0x4u;
emh203 0:3d9c67d97d6f 430
emh203 0:3d9c67d97d6f 431 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 432 {
emh203 0:3d9c67d97d6f 433 /* Read coefficients */
emh203 0:3d9c67d97d6f 434 c0 = *pb++;
emh203 0:3d9c67d97d6f 435
emh203 0:3d9c67d97d6f 436 /* Fetch 1 state variable */
emh203 0:3d9c67d97d6f 437 x0 = *px0++;
emh203 0:3d9c67d97d6f 438 x1 = *px1++;
emh203 0:3d9c67d97d6f 439
emh203 0:3d9c67d97d6f 440 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 441 acc0 += x0 * c0;
emh203 0:3d9c67d97d6f 442 acc1 += x1 * c0;
emh203 0:3d9c67d97d6f 443
emh203 0:3d9c67d97d6f 444 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 445 tapCnt--;
emh203 0:3d9c67d97d6f 446 }
emh203 0:3d9c67d97d6f 447
emh203 0:3d9c67d97d6f 448 /* Advance the state pointer by the decimation factor
emh203 0:3d9c67d97d6f 449 * to process the next group of decimation factor number samples */
emh203 0:3d9c67d97d6f 450 pState = pState + S->M * 2;
emh203 0:3d9c67d97d6f 451
emh203 0:3d9c67d97d6f 452 /* Store filter output, smlad returns the values in 2.14 format */
emh203 0:3d9c67d97d6f 453 /* so downsacle by 15 to get output in 1.15 */
emh203 0:3d9c67d97d6f 454
emh203 0:3d9c67d97d6f 455 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
emh203 0:3d9c67d97d6f 456 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
emh203 0:3d9c67d97d6f 457
emh203 0:3d9c67d97d6f 458
emh203 0:3d9c67d97d6f 459 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 460 blkCnt--;
emh203 0:3d9c67d97d6f 461 }
emh203 0:3d9c67d97d6f 462
emh203 0:3d9c67d97d6f 463 while(blkCntN3 > 0u)
emh203 0:3d9c67d97d6f 464 {
emh203 0:3d9c67d97d6f 465 /* Copy decimation factor number of new input samples into the state buffer */
emh203 0:3d9c67d97d6f 466 i = S->M;
emh203 0:3d9c67d97d6f 467
emh203 0:3d9c67d97d6f 468 do
emh203 0:3d9c67d97d6f 469 {
emh203 0:3d9c67d97d6f 470 *pStateCurnt++ = *pSrc++;
emh203 0:3d9c67d97d6f 471
emh203 0:3d9c67d97d6f 472 } while(--i);
emh203 0:3d9c67d97d6f 473
emh203 0:3d9c67d97d6f 474 /*Set sum to zero */
emh203 0:3d9c67d97d6f 475 sum0 = 0;
emh203 0:3d9c67d97d6f 476
emh203 0:3d9c67d97d6f 477 /* Initialize state pointer */
emh203 0:3d9c67d97d6f 478 px = pState;
emh203 0:3d9c67d97d6f 479
emh203 0:3d9c67d97d6f 480 /* Initialize coeff pointer */
emh203 0:3d9c67d97d6f 481 pb = pCoeffs;
emh203 0:3d9c67d97d6f 482
emh203 0:3d9c67d97d6f 483 /* Loop unrolling. Process 4 taps at a time. */
emh203 0:3d9c67d97d6f 484 tapCnt = numTaps >> 2;
emh203 0:3d9c67d97d6f 485
emh203 0:3d9c67d97d6f 486 /* Loop over the number of taps. Unroll by a factor of 4.
emh203 0:3d9c67d97d6f 487 ** Repeat until we've computed numTaps-4 coefficients. */
emh203 0:3d9c67d97d6f 488 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 489 {
emh203 0:3d9c67d97d6f 490 /* Read the Read b[numTaps-1] coefficients */
emh203 0:3d9c67d97d6f 491 c0 = *pb++;
emh203 0:3d9c67d97d6f 492
emh203 0:3d9c67d97d6f 493 /* Read x[n-numTaps-1] and sample */
emh203 0:3d9c67d97d6f 494 x0 = *px++;
emh203 0:3d9c67d97d6f 495
emh203 0:3d9c67d97d6f 496 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 497 sum0 += x0 * c0;
emh203 0:3d9c67d97d6f 498
emh203 0:3d9c67d97d6f 499 /* Read the b[numTaps-2] coefficient */
emh203 0:3d9c67d97d6f 500 c0 = *pb++;
emh203 0:3d9c67d97d6f 501
emh203 0:3d9c67d97d6f 502 /* Read x[n-numTaps-2] and sample */
emh203 0:3d9c67d97d6f 503 x0 = *px++;
emh203 0:3d9c67d97d6f 504
emh203 0:3d9c67d97d6f 505 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 506 sum0 += x0 * c0;
emh203 0:3d9c67d97d6f 507
emh203 0:3d9c67d97d6f 508 /* Read the b[numTaps-3] coefficients */
emh203 0:3d9c67d97d6f 509 c0 = *pb++;
emh203 0:3d9c67d97d6f 510
emh203 0:3d9c67d97d6f 511 /* Read x[n-numTaps-3] sample */
emh203 0:3d9c67d97d6f 512 x0 = *px++;
emh203 0:3d9c67d97d6f 513
emh203 0:3d9c67d97d6f 514 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 515 sum0 += x0 * c0;
emh203 0:3d9c67d97d6f 516
emh203 0:3d9c67d97d6f 517 /* Read the b[numTaps-4] coefficient */
emh203 0:3d9c67d97d6f 518 c0 = *pb++;
emh203 0:3d9c67d97d6f 519
emh203 0:3d9c67d97d6f 520 /* Read x[n-numTaps-4] sample */
emh203 0:3d9c67d97d6f 521 x0 = *px++;
emh203 0:3d9c67d97d6f 522
emh203 0:3d9c67d97d6f 523 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 524 sum0 += x0 * c0;
emh203 0:3d9c67d97d6f 525
emh203 0:3d9c67d97d6f 526 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 527 tapCnt--;
emh203 0:3d9c67d97d6f 528 }
emh203 0:3d9c67d97d6f 529
emh203 0:3d9c67d97d6f 530 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
emh203 0:3d9c67d97d6f 531 tapCnt = numTaps % 0x4u;
emh203 0:3d9c67d97d6f 532
emh203 0:3d9c67d97d6f 533 while(tapCnt > 0u)
emh203 0:3d9c67d97d6f 534 {
emh203 0:3d9c67d97d6f 535 /* Read coefficients */
emh203 0:3d9c67d97d6f 536 c0 = *pb++;
emh203 0:3d9c67d97d6f 537
emh203 0:3d9c67d97d6f 538 /* Fetch 1 state variable */
emh203 0:3d9c67d97d6f 539 x0 = *px++;
emh203 0:3d9c67d97d6f 540
emh203 0:3d9c67d97d6f 541 /* Perform the multiply-accumulate */
emh203 0:3d9c67d97d6f 542 sum0 += x0 * c0;
emh203 0:3d9c67d97d6f 543
emh203 0:3d9c67d97d6f 544 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 545 tapCnt--;
emh203 0:3d9c67d97d6f 546 }
emh203 0:3d9c67d97d6f 547
emh203 0:3d9c67d97d6f 548 /* Advance the state pointer by the decimation factor
emh203 0:3d9c67d97d6f 549 * to process the next group of decimation factor number samples */
emh203 0:3d9c67d97d6f 550 pState = pState + S->M;
emh203 0:3d9c67d97d6f 551
emh203 0:3d9c67d97d6f 552 /* Store filter output, smlad returns the values in 2.14 format */
emh203 0:3d9c67d97d6f 553 /* so downsacle by 15 to get output in 1.15 */
emh203 0:3d9c67d97d6f 554 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
emh203 0:3d9c67d97d6f 555
emh203 0:3d9c67d97d6f 556 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 557 blkCntN3--;
emh203 0:3d9c67d97d6f 558 }
emh203 0:3d9c67d97d6f 559
emh203 0:3d9c67d97d6f 560 /* Processing is complete.
emh203 0:3d9c67d97d6f 561 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
emh203 0:3d9c67d97d6f 562 ** This prepares the state buffer for the next function call. */
emh203 0:3d9c67d97d6f 563
emh203 0:3d9c67d97d6f 564 /* Points to the start of the state buffer */
emh203 0:3d9c67d97d6f 565 pStateCurnt = S->pState;
emh203 0:3d9c67d97d6f 566
emh203 0:3d9c67d97d6f 567 i = (numTaps - 1u) >> 2u;
emh203 0:3d9c67d97d6f 568
emh203 0:3d9c67d97d6f 569 /* copy data */
emh203 0:3d9c67d97d6f 570 while(i > 0u)
emh203 0:3d9c67d97d6f 571 {
emh203 0:3d9c67d97d6f 572 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 573 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 574 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 575 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 576
emh203 0:3d9c67d97d6f 577 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 578 i--;
emh203 0:3d9c67d97d6f 579 }
emh203 0:3d9c67d97d6f 580
emh203 0:3d9c67d97d6f 581 i = (numTaps - 1u) % 0x04u;
emh203 0:3d9c67d97d6f 582
emh203 0:3d9c67d97d6f 583 /* copy data */
emh203 0:3d9c67d97d6f 584 while(i > 0u)
emh203 0:3d9c67d97d6f 585 {
emh203 0:3d9c67d97d6f 586 *pStateCurnt++ = *pState++;
emh203 0:3d9c67d97d6f 587
emh203 0:3d9c67d97d6f 588 /* Decrement the loop counter */
emh203 0:3d9c67d97d6f 589 i--;
emh203 0:3d9c67d97d6f 590 }
emh203 0:3d9c67d97d6f 591 }
emh203 0:3d9c67d97d6f 592
emh203 0:3d9c67d97d6f 593
emh203 0:3d9c67d97d6f 594 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
emh203 0:3d9c67d97d6f 595
emh203 0:3d9c67d97d6f 596 /**
emh203 0:3d9c67d97d6f 597 * @} end of FIR_decimate group
emh203 0:3d9c67d97d6f 598 */