The CMSIS DSP 5 library

Dependents:   Nucleo-Heart-Rate ejercicioVrms2 PROYECTOFINAL ejercicioVrms ... more

Committer:
xorjoep
Date:
Thu Jun 21 11:56:27 2018 +0000
Revision:
3:4098b9d3d571
Parent:
1:24714b45cd1b
headers is a folder not a library

Who changed what in which revision?

UserRevisionLine numberNew contents of line
xorjoep 1:24714b45cd1b 1 /* ----------------------------------------------------------------------
xorjoep 1:24714b45cd1b 2 * Project: CMSIS DSP Library
xorjoep 1:24714b45cd1b 3 * Title: arm_biquad_cascade_df1_fast_q15.c
xorjoep 1:24714b45cd1b 4 * Description: Fast processing function for the Q15 Biquad cascade filter
xorjoep 1:24714b45cd1b 5 *
xorjoep 1:24714b45cd1b 6 * $Date: 27. January 2017
xorjoep 1:24714b45cd1b 7 * $Revision: V.1.5.1
xorjoep 1:24714b45cd1b 8 *
xorjoep 1:24714b45cd1b 9 * Target Processor: Cortex-M cores
xorjoep 1:24714b45cd1b 10 * -------------------------------------------------------------------- */
xorjoep 1:24714b45cd1b 11 /*
xorjoep 1:24714b45cd1b 12 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
xorjoep 1:24714b45cd1b 13 *
xorjoep 1:24714b45cd1b 14 * SPDX-License-Identifier: Apache-2.0
xorjoep 1:24714b45cd1b 15 *
xorjoep 1:24714b45cd1b 16 * Licensed under the Apache License, Version 2.0 (the License); you may
xorjoep 1:24714b45cd1b 17 * not use this file except in compliance with the License.
xorjoep 1:24714b45cd1b 18 * You may obtain a copy of the License at
xorjoep 1:24714b45cd1b 19 *
xorjoep 1:24714b45cd1b 20 * www.apache.org/licenses/LICENSE-2.0
xorjoep 1:24714b45cd1b 21 *
xorjoep 1:24714b45cd1b 22 * Unless required by applicable law or agreed to in writing, software
xorjoep 1:24714b45cd1b 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
xorjoep 1:24714b45cd1b 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
xorjoep 1:24714b45cd1b 25 * See the License for the specific language governing permissions and
xorjoep 1:24714b45cd1b 26 * limitations under the License.
xorjoep 1:24714b45cd1b 27 */
xorjoep 1:24714b45cd1b 28
xorjoep 1:24714b45cd1b 29 #include "arm_math.h"
xorjoep 1:24714b45cd1b 30
xorjoep 1:24714b45cd1b 31 /**
xorjoep 1:24714b45cd1b 32 * @ingroup groupFilters
xorjoep 1:24714b45cd1b 33 */
xorjoep 1:24714b45cd1b 34
xorjoep 1:24714b45cd1b 35 /**
xorjoep 1:24714b45cd1b 36 * @addtogroup BiquadCascadeDF1
xorjoep 1:24714b45cd1b 37 * @{
xorjoep 1:24714b45cd1b 38 */
xorjoep 1:24714b45cd1b 39
xorjoep 1:24714b45cd1b 40 /**
xorjoep 1:24714b45cd1b 41 * @details
xorjoep 1:24714b45cd1b 42 * @param[in] *S points to an instance of the Q15 Biquad cascade structure.
xorjoep 1:24714b45cd1b 43 * @param[in] *pSrc points to the block of input data.
xorjoep 1:24714b45cd1b 44 * @param[out] *pDst points to the block of output data.
xorjoep 1:24714b45cd1b 45 * @param[in] blockSize number of samples to process per call.
xorjoep 1:24714b45cd1b 46 * @return none.
xorjoep 1:24714b45cd1b 47 *
xorjoep 1:24714b45cd1b 48 * <b>Scaling and Overflow Behavior:</b>
xorjoep 1:24714b45cd1b 49 * \par
xorjoep 1:24714b45cd1b 50 * This fast version uses a 32-bit accumulator with 2.30 format.
xorjoep 1:24714b45cd1b 51 * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.
xorjoep 1:24714b45cd1b 52 * Thus, if the accumulator result overflows it wraps around and distorts the result.
xorjoep 1:24714b45cd1b 53 * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).
xorjoep 1:24714b45cd1b 54 * The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.
xorjoep 1:24714b45cd1b 55 *
xorjoep 1:24714b45cd1b 56 * \par
xorjoep 1:24714b45cd1b 57 * Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure.
xorjoep 1:24714b45cd1b 58 * Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.
xorjoep 1:24714b45cd1b 59 *
xorjoep 1:24714b45cd1b 60 */
xorjoep 1:24714b45cd1b 61
xorjoep 1:24714b45cd1b 62 void arm_biquad_cascade_df1_fast_q15(
xorjoep 1:24714b45cd1b 63 const arm_biquad_casd_df1_inst_q15 * S,
xorjoep 1:24714b45cd1b 64 q15_t * pSrc,
xorjoep 1:24714b45cd1b 65 q15_t * pDst,
xorjoep 1:24714b45cd1b 66 uint32_t blockSize)
xorjoep 1:24714b45cd1b 67 {
xorjoep 1:24714b45cd1b 68 q15_t *pIn = pSrc; /* Source pointer */
xorjoep 1:24714b45cd1b 69 q15_t *pOut = pDst; /* Destination pointer */
xorjoep 1:24714b45cd1b 70 q31_t in; /* Temporary variable to hold input value */
xorjoep 1:24714b45cd1b 71 q31_t out; /* Temporary variable to hold output value */
xorjoep 1:24714b45cd1b 72 q31_t b0; /* Temporary variable to hold bo value */
xorjoep 1:24714b45cd1b 73 q31_t b1, a1; /* Filter coefficients */
xorjoep 1:24714b45cd1b 74 q31_t state_in, state_out; /* Filter state variables */
xorjoep 1:24714b45cd1b 75 q31_t acc; /* Accumulator */
xorjoep 1:24714b45cd1b 76 int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */
xorjoep 1:24714b45cd1b 77 q15_t *pState = S->pState; /* State pointer */
xorjoep 1:24714b45cd1b 78 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
xorjoep 1:24714b45cd1b 79 uint32_t sample, stage = S->numStages; /* Stage loop counter */
xorjoep 1:24714b45cd1b 80
xorjoep 1:24714b45cd1b 81
xorjoep 1:24714b45cd1b 82
xorjoep 1:24714b45cd1b 83 do
xorjoep 1:24714b45cd1b 84 {
xorjoep 1:24714b45cd1b 85
xorjoep 1:24714b45cd1b 86 /* Read the b0 and 0 coefficients using SIMD */
xorjoep 1:24714b45cd1b 87 b0 = *__SIMD32(pCoeffs)++;
xorjoep 1:24714b45cd1b 88
xorjoep 1:24714b45cd1b 89 /* Read the b1 and b2 coefficients using SIMD */
xorjoep 1:24714b45cd1b 90 b1 = *__SIMD32(pCoeffs)++;
xorjoep 1:24714b45cd1b 91
xorjoep 1:24714b45cd1b 92 /* Read the a1 and a2 coefficients using SIMD */
xorjoep 1:24714b45cd1b 93 a1 = *__SIMD32(pCoeffs)++;
xorjoep 1:24714b45cd1b 94
xorjoep 1:24714b45cd1b 95 /* Read the input state values from the state buffer: x[n-1], x[n-2] */
xorjoep 1:24714b45cd1b 96 state_in = *__SIMD32(pState)++;
xorjoep 1:24714b45cd1b 97
xorjoep 1:24714b45cd1b 98 /* Read the output state values from the state buffer: y[n-1], y[n-2] */
xorjoep 1:24714b45cd1b 99 state_out = *__SIMD32(pState)--;
xorjoep 1:24714b45cd1b 100
xorjoep 1:24714b45cd1b 101 /* Apply loop unrolling and compute 2 output values simultaneously. */
xorjoep 1:24714b45cd1b 102 /* The variable acc hold output values that are being computed:
xorjoep 1:24714b45cd1b 103 *
xorjoep 1:24714b45cd1b 104 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
xorjoep 1:24714b45cd1b 105 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
xorjoep 1:24714b45cd1b 106 */
xorjoep 1:24714b45cd1b 107 sample = blockSize >> 1U;
xorjoep 1:24714b45cd1b 108
xorjoep 1:24714b45cd1b 109 /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
xorjoep 1:24714b45cd1b 110 ** a second loop below computes the remaining 1 sample. */
xorjoep 1:24714b45cd1b 111 while (sample > 0U)
xorjoep 1:24714b45cd1b 112 {
xorjoep 1:24714b45cd1b 113
xorjoep 1:24714b45cd1b 114 /* Read the input */
xorjoep 1:24714b45cd1b 115 in = *__SIMD32(pIn)++;
xorjoep 1:24714b45cd1b 116
xorjoep 1:24714b45cd1b 117 /* out = b0 * x[n] + 0 * 0 */
xorjoep 1:24714b45cd1b 118 out = __SMUAD(b0, in);
xorjoep 1:24714b45cd1b 119 /* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */
xorjoep 1:24714b45cd1b 120 acc = __SMLAD(b1, state_in, out);
xorjoep 1:24714b45cd1b 121 /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
xorjoep 1:24714b45cd1b 122 acc = __SMLAD(a1, state_out, acc);
xorjoep 1:24714b45cd1b 123
xorjoep 1:24714b45cd1b 124 /* The result is converted from 3.29 to 1.31 and then saturation is applied */
xorjoep 1:24714b45cd1b 125 out = __SSAT((acc >> shift), 16);
xorjoep 1:24714b45cd1b 126
xorjoep 1:24714b45cd1b 127 /* Every time after the output is computed state should be updated. */
xorjoep 1:24714b45cd1b 128 /* The states should be updated as: */
xorjoep 1:24714b45cd1b 129 /* Xn2 = Xn1 */
xorjoep 1:24714b45cd1b 130 /* Xn1 = Xn */
xorjoep 1:24714b45cd1b 131 /* Yn2 = Yn1 */
xorjoep 1:24714b45cd1b 132 /* Yn1 = acc */
xorjoep 1:24714b45cd1b 133 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
xorjoep 1:24714b45cd1b 134 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
xorjoep 1:24714b45cd1b 135
xorjoep 1:24714b45cd1b 136 #ifndef ARM_MATH_BIG_ENDIAN
xorjoep 1:24714b45cd1b 137
xorjoep 1:24714b45cd1b 138 state_in = __PKHBT(in, state_in, 16);
xorjoep 1:24714b45cd1b 139 state_out = __PKHBT(out, state_out, 16);
xorjoep 1:24714b45cd1b 140
xorjoep 1:24714b45cd1b 141 #else
xorjoep 1:24714b45cd1b 142
xorjoep 1:24714b45cd1b 143 state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
xorjoep 1:24714b45cd1b 144 state_out = __PKHBT(state_out >> 16, (out), 16);
xorjoep 1:24714b45cd1b 145
xorjoep 1:24714b45cd1b 146 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
xorjoep 1:24714b45cd1b 147
xorjoep 1:24714b45cd1b 148 /* out = b0 * x[n] + 0 * 0 */
xorjoep 1:24714b45cd1b 149 out = __SMUADX(b0, in);
xorjoep 1:24714b45cd1b 150 /* acc0 = b1 * x[n-1] , acc0 += b2 * x[n-2] + out */
xorjoep 1:24714b45cd1b 151 acc = __SMLAD(b1, state_in, out);
xorjoep 1:24714b45cd1b 152 /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
xorjoep 1:24714b45cd1b 153 acc = __SMLAD(a1, state_out, acc);
xorjoep 1:24714b45cd1b 154
xorjoep 1:24714b45cd1b 155 /* The result is converted from 3.29 to 1.31 and then saturation is applied */
xorjoep 1:24714b45cd1b 156 out = __SSAT((acc >> shift), 16);
xorjoep 1:24714b45cd1b 157
xorjoep 1:24714b45cd1b 158
xorjoep 1:24714b45cd1b 159 /* Store the output in the destination buffer. */
xorjoep 1:24714b45cd1b 160
xorjoep 1:24714b45cd1b 161 #ifndef ARM_MATH_BIG_ENDIAN
xorjoep 1:24714b45cd1b 162
xorjoep 1:24714b45cd1b 163 *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
xorjoep 1:24714b45cd1b 164
xorjoep 1:24714b45cd1b 165 #else
xorjoep 1:24714b45cd1b 166
xorjoep 1:24714b45cd1b 167 *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
xorjoep 1:24714b45cd1b 168
xorjoep 1:24714b45cd1b 169 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
xorjoep 1:24714b45cd1b 170
xorjoep 1:24714b45cd1b 171 /* Every time after the output is computed state should be updated. */
xorjoep 1:24714b45cd1b 172 /* The states should be updated as: */
xorjoep 1:24714b45cd1b 173 /* Xn2 = Xn1 */
xorjoep 1:24714b45cd1b 174 /* Xn1 = Xn */
xorjoep 1:24714b45cd1b 175 /* Yn2 = Yn1 */
xorjoep 1:24714b45cd1b 176 /* Yn1 = acc */
xorjoep 1:24714b45cd1b 177 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
xorjoep 1:24714b45cd1b 178 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
xorjoep 1:24714b45cd1b 179
xorjoep 1:24714b45cd1b 180 #ifndef ARM_MATH_BIG_ENDIAN
xorjoep 1:24714b45cd1b 181
xorjoep 1:24714b45cd1b 182 state_in = __PKHBT(in >> 16, state_in, 16);
xorjoep 1:24714b45cd1b 183 state_out = __PKHBT(out, state_out, 16);
xorjoep 1:24714b45cd1b 184
xorjoep 1:24714b45cd1b 185 #else
xorjoep 1:24714b45cd1b 186
xorjoep 1:24714b45cd1b 187 state_in = __PKHBT(state_in >> 16, in, 16);
xorjoep 1:24714b45cd1b 188 state_out = __PKHBT(state_out >> 16, out, 16);
xorjoep 1:24714b45cd1b 189
xorjoep 1:24714b45cd1b 190 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
xorjoep 1:24714b45cd1b 191
xorjoep 1:24714b45cd1b 192
xorjoep 1:24714b45cd1b 193 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 194 sample--;
xorjoep 1:24714b45cd1b 195
xorjoep 1:24714b45cd1b 196 }
xorjoep 1:24714b45cd1b 197
xorjoep 1:24714b45cd1b 198 /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
xorjoep 1:24714b45cd1b 199 ** No loop unrolling is used. */
xorjoep 1:24714b45cd1b 200
xorjoep 1:24714b45cd1b 201 if ((blockSize & 0x1U) != 0U)
xorjoep 1:24714b45cd1b 202 {
xorjoep 1:24714b45cd1b 203 /* Read the input */
xorjoep 1:24714b45cd1b 204 in = *pIn++;
xorjoep 1:24714b45cd1b 205
xorjoep 1:24714b45cd1b 206 /* out = b0 * x[n] + 0 * 0 */
xorjoep 1:24714b45cd1b 207
xorjoep 1:24714b45cd1b 208 #ifndef ARM_MATH_BIG_ENDIAN
xorjoep 1:24714b45cd1b 209
xorjoep 1:24714b45cd1b 210 out = __SMUAD(b0, in);
xorjoep 1:24714b45cd1b 211
xorjoep 1:24714b45cd1b 212 #else
xorjoep 1:24714b45cd1b 213
xorjoep 1:24714b45cd1b 214 out = __SMUADX(b0, in);
xorjoep 1:24714b45cd1b 215
xorjoep 1:24714b45cd1b 216 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
xorjoep 1:24714b45cd1b 217
xorjoep 1:24714b45cd1b 218 /* acc = b1 * x[n-1], acc += b2 * x[n-2] + out */
xorjoep 1:24714b45cd1b 219 acc = __SMLAD(b1, state_in, out);
xorjoep 1:24714b45cd1b 220 /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
xorjoep 1:24714b45cd1b 221 acc = __SMLAD(a1, state_out, acc);
xorjoep 1:24714b45cd1b 222
xorjoep 1:24714b45cd1b 223 /* The result is converted from 3.29 to 1.31 and then saturation is applied */
xorjoep 1:24714b45cd1b 224 out = __SSAT((acc >> shift), 16);
xorjoep 1:24714b45cd1b 225
xorjoep 1:24714b45cd1b 226 /* Store the output in the destination buffer. */
xorjoep 1:24714b45cd1b 227 *pOut++ = (q15_t) out;
xorjoep 1:24714b45cd1b 228
xorjoep 1:24714b45cd1b 229 /* Every time after the output is computed state should be updated. */
xorjoep 1:24714b45cd1b 230 /* The states should be updated as: */
xorjoep 1:24714b45cd1b 231 /* Xn2 = Xn1 */
xorjoep 1:24714b45cd1b 232 /* Xn1 = Xn */
xorjoep 1:24714b45cd1b 233 /* Yn2 = Yn1 */
xorjoep 1:24714b45cd1b 234 /* Yn1 = acc */
xorjoep 1:24714b45cd1b 235 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
xorjoep 1:24714b45cd1b 236 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
xorjoep 1:24714b45cd1b 237
xorjoep 1:24714b45cd1b 238 #ifndef ARM_MATH_BIG_ENDIAN
xorjoep 1:24714b45cd1b 239
xorjoep 1:24714b45cd1b 240 state_in = __PKHBT(in, state_in, 16);
xorjoep 1:24714b45cd1b 241 state_out = __PKHBT(out, state_out, 16);
xorjoep 1:24714b45cd1b 242
xorjoep 1:24714b45cd1b 243 #else
xorjoep 1:24714b45cd1b 244
xorjoep 1:24714b45cd1b 245 state_in = __PKHBT(state_in >> 16, in, 16);
xorjoep 1:24714b45cd1b 246 state_out = __PKHBT(state_out >> 16, out, 16);
xorjoep 1:24714b45cd1b 247
xorjoep 1:24714b45cd1b 248 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
xorjoep 1:24714b45cd1b 249
xorjoep 1:24714b45cd1b 250 }
xorjoep 1:24714b45cd1b 251
xorjoep 1:24714b45cd1b 252 /* The first stage goes from the input buffer to the output buffer. */
xorjoep 1:24714b45cd1b 253 /* Subsequent (numStages - 1) occur in-place in the output buffer */
xorjoep 1:24714b45cd1b 254 pIn = pDst;
xorjoep 1:24714b45cd1b 255
xorjoep 1:24714b45cd1b 256 /* Reset the output pointer */
xorjoep 1:24714b45cd1b 257 pOut = pDst;
xorjoep 1:24714b45cd1b 258
xorjoep 1:24714b45cd1b 259 /* Store the updated state variables back into the state array */
xorjoep 1:24714b45cd1b 260 *__SIMD32(pState)++ = state_in;
xorjoep 1:24714b45cd1b 261 *__SIMD32(pState)++ = state_out;
xorjoep 1:24714b45cd1b 262
xorjoep 1:24714b45cd1b 263
xorjoep 1:24714b45cd1b 264 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 265 stage--;
xorjoep 1:24714b45cd1b 266
xorjoep 1:24714b45cd1b 267 } while (stage > 0U);
xorjoep 1:24714b45cd1b 268 }
xorjoep 1:24714b45cd1b 269
xorjoep 1:24714b45cd1b 270
xorjoep 1:24714b45cd1b 271 /**
xorjoep 1:24714b45cd1b 272 * @} end of BiquadCascadeDF1 group
xorjoep 1:24714b45cd1b 273 */