Robert Lopez / CMSIS5
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_q15.c Source File

arm_biquad_cascade_df1_q15.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_biquad_cascade_df1_q15.c
00004  * Description:  Processing function for the Q15 Biquad cascade DirectFormI(DF1) filter
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @ingroup groupFilters
00033  */
00034 
00035 /**
00036  * @addtogroup BiquadCascadeDF1
00037  * @{
00038  */
00039 
00040 /**
00041  * @brief Processing function for the Q15 Biquad cascade filter.
00042  * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
00043  * @param[in]  *pSrc points to the block of input data.
00044  * @param[out] *pDst points to the location where the output result is written.
00045  * @param[in]  blockSize number of samples to process per call.
00046  * @return none.
00047  *
00048  *
00049  * <b>Scaling and Overflow Behavior:</b>
00050  * \par
00051  * The function is implemented using a 64-bit internal accumulator.
00052  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
00053  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
00054  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
00055  * The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.
00056  * Finally, the result is saturated to 1.15 format.
00057  *
00058  * \par
00059  * Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
00060  */
00061 
00062 void arm_biquad_cascade_df1_q15(
00063   const arm_biquad_casd_df1_inst_q15 * S,
00064   q15_t * pSrc,
00065   q15_t * pDst,
00066   uint32_t blockSize)
00067 {
00068 
00069 
00070 #if defined (ARM_MATH_DSP)
00071 
00072   /* Run the below code for Cortex-M4 and Cortex-M3 */
00073 
00074   q15_t *pIn = pSrc;                             /*  Source pointer                               */
00075   q15_t *pOut = pDst;                            /*  Destination pointer                          */
00076   q31_t in;                                      /*  Temporary variable to hold input value       */
00077   q31_t out;                                     /*  Temporary variable to hold output value      */
00078   q31_t b0;                                      /*  Temporary variable to hold bo value          */
00079   q31_t b1, a1;                                  /*  Filter coefficients                          */
00080   q31_t state_in, state_out;                     /*  Filter state variables                       */
00081   q31_t acc_l, acc_h;
00082   q63_t acc;                                     /*  Accumulator                                  */
00083   int32_t lShift = (15 - (int32_t) S->postShift);       /*  Post shift                                   */
00084   q15_t *pState = S->pState;                     /*  State pointer                                */
00085   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
00086   uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */
00087   int32_t uShift = (32 - lShift);
00088 
00089   do
00090   {
00091     /* Read the b0 and 0 coefficients using SIMD  */
00092     b0 = *__SIMD32(pCoeffs)++;
00093 
00094     /* Read the b1 and b2 coefficients using SIMD */
00095     b1 = *__SIMD32(pCoeffs)++;
00096 
00097     /* Read the a1 and a2 coefficients using SIMD */
00098     a1 = *__SIMD32(pCoeffs)++;
00099 
00100     /* Read the input state values from the state buffer:  x[n-1], x[n-2] */
00101     state_in = *__SIMD32(pState)++;
00102 
00103     /* Read the output state values from the state buffer:  y[n-1], y[n-2] */
00104     state_out = *__SIMD32(pState)--;
00105 
00106     /* Apply loop unrolling and compute 2 output values simultaneously. */
00107     /*      The variable acc hold output values that are being computed:
00108      *
00109      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
00110      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
00111      */
00112     sample = blockSize >> 1U;
00113 
00114     /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
00115      ** a second loop below computes the remaining 1 sample. */
00116     while (sample > 0U)
00117     {
00118 
00119       /* Read the input */
00120       in = *__SIMD32(pIn)++;
00121 
00122       /* out =  b0 * x[n] + 0 * 0 */
00123       out = __SMUAD(b0, in);
00124 
00125       /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */
00126       acc = __SMLALD(b1, state_in, out);
00127       /* acc +=  a1 * y[n-1] +  a2 * y[n-2] */
00128       acc = __SMLALD(a1, state_out, acc);
00129 
00130       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00131       /* Calc lower part of acc */
00132       acc_l = acc & 0xffffffff;
00133 
00134       /* Calc upper part of acc */
00135       acc_h = (acc >> 32) & 0xffffffff;
00136 
00137       /* Apply shift for lower part of acc and upper part of acc */
00138       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
00139 
00140       out = __SSAT(out, 16);
00141 
00142       /* Every time after the output is computed state should be updated. */
00143       /* The states should be updated as:  */
00144       /* Xn2 = Xn1    */
00145       /* Xn1 = Xn     */
00146       /* Yn2 = Yn1    */
00147       /* Yn1 = acc   */
00148       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00149       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00150 
00151 #ifndef  ARM_MATH_BIG_ENDIAN
00152 
00153       state_in = __PKHBT(in, state_in, 16);
00154       state_out = __PKHBT(out, state_out, 16);
00155 
00156 #else
00157 
00158       state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
00159       state_out = __PKHBT(state_out >> 16, (out), 16);
00160 
00161 #endif /* #ifndef  ARM_MATH_BIG_ENDIAN */
00162 
00163       /* out =  b0 * x[n] + 0 * 0 */
00164       out = __SMUADX(b0, in);
00165       /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */
00166       acc = __SMLALD(b1, state_in, out);
00167       /* acc +=  a1 * y[n-1] + a2 * y[n-2] */
00168       acc = __SMLALD(a1, state_out, acc);
00169 
00170       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00171       /* Calc lower part of acc */
00172       acc_l = acc & 0xffffffff;
00173 
00174       /* Calc upper part of acc */
00175       acc_h = (acc >> 32) & 0xffffffff;
00176 
00177       /* Apply shift for lower part of acc and upper part of acc */
00178       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
00179 
00180       out = __SSAT(out, 16);
00181 
00182       /* Store the output in the destination buffer. */
00183 
00184 #ifndef  ARM_MATH_BIG_ENDIAN
00185 
00186       *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
00187 
00188 #else
00189 
00190       *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
00191 
00192 #endif /* #ifndef  ARM_MATH_BIG_ENDIAN */
00193 
00194       /* Every time after the output is computed state should be updated. */
00195       /* The states should be updated as:  */
00196       /* Xn2 = Xn1    */
00197       /* Xn1 = Xn     */
00198       /* Yn2 = Yn1    */
00199       /* Yn1 = acc   */
00200       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00201       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00202 #ifndef  ARM_MATH_BIG_ENDIAN
00203 
00204       state_in = __PKHBT(in >> 16, state_in, 16);
00205       state_out = __PKHBT(out, state_out, 16);
00206 
00207 #else
00208 
00209       state_in = __PKHBT(state_in >> 16, in, 16);
00210       state_out = __PKHBT(state_out >> 16, out, 16);
00211 
00212 #endif /* #ifndef  ARM_MATH_BIG_ENDIAN */
00213 
00214 
00215       /* Decrement the loop counter */
00216       sample--;
00217 
00218     }
00219 
00220     /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
00221      ** No loop unrolling is used. */
00222 
00223     if ((blockSize & 0x1U) != 0U)
00224     {
00225       /* Read the input */
00226       in = *pIn++;
00227 
00228       /* out =  b0 * x[n] + 0 * 0 */
00229 
00230 #ifndef  ARM_MATH_BIG_ENDIAN
00231 
00232       out = __SMUAD(b0, in);
00233 
00234 #else
00235 
00236       out = __SMUADX(b0, in);
00237 
00238 #endif /* #ifndef  ARM_MATH_BIG_ENDIAN */
00239 
00240       /* acc =  b1 * x[n-1] + b2 * x[n-2] + out */
00241       acc = __SMLALD(b1, state_in, out);
00242       /* acc +=  a1 * y[n-1] + a2 * y[n-2] */
00243       acc = __SMLALD(a1, state_out, acc);
00244 
00245       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00246       /* Calc lower part of acc */
00247       acc_l = acc & 0xffffffff;
00248 
00249       /* Calc upper part of acc */
00250       acc_h = (acc >> 32) & 0xffffffff;
00251 
00252       /* Apply shift for lower part of acc and upper part of acc */
00253       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
00254 
00255       out = __SSAT(out, 16);
00256 
00257       /* Store the output in the destination buffer. */
00258       *pOut++ = (q15_t) out;
00259 
00260       /* Every time after the output is computed state should be updated. */
00261       /* The states should be updated as:  */
00262       /* Xn2 = Xn1    */
00263       /* Xn1 = Xn     */
00264       /* Yn2 = Yn1    */
00265       /* Yn1 = acc   */
00266       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00267       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00268 
00269 #ifndef  ARM_MATH_BIG_ENDIAN
00270 
00271       state_in = __PKHBT(in, state_in, 16);
00272       state_out = __PKHBT(out, state_out, 16);
00273 
00274 #else
00275 
00276       state_in = __PKHBT(state_in >> 16, in, 16);
00277       state_out = __PKHBT(state_out >> 16, out, 16);
00278 
00279 #endif /* #ifndef  ARM_MATH_BIG_ENDIAN */
00280 
00281     }
00282 
00283     /*  The first stage goes from the input wire to the output wire.  */
00284     /*  Subsequent numStages occur in-place in the output wire  */
00285     pIn = pDst;
00286 
00287     /* Reset the output pointer */
00288     pOut = pDst;
00289 
00290     /*  Store the updated state variables back into the state array */
00291     *__SIMD32(pState)++ = state_in;
00292     *__SIMD32(pState)++ = state_out;
00293 
00294 
00295     /* Decrement the loop counter */
00296     stage--;
00297 
00298   } while (stage > 0U);
00299 
00300 #else
00301 
00302   /* Run the below code for Cortex-M0 */
00303 
00304   q15_t *pIn = pSrc;                             /*  Source pointer                               */
00305   q15_t *pOut = pDst;                            /*  Destination pointer                          */
00306   q15_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
00307   q15_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
00308   q15_t Xn;                                      /*  temporary input               */
00309   q63_t acc;                                     /*  Accumulator                                  */
00310   int32_t shift = (15 - (int32_t) S->postShift); /*  Post shift                                   */
00311   q15_t *pState = S->pState;                     /*  State pointer                                */
00312   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
00313   uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */
00314 
00315   do
00316   {
00317     /* Reading the coefficients */
00318     b0 = *pCoeffs++;
00319     pCoeffs++;  // skip the 0 coefficient
00320     b1 = *pCoeffs++;
00321     b2 = *pCoeffs++;
00322     a1 = *pCoeffs++;
00323     a2 = *pCoeffs++;
00324 
00325     /* Reading the state values */
00326     Xn1 = pState[0];
00327     Xn2 = pState[1];
00328     Yn1 = pState[2];
00329     Yn2 = pState[3];
00330 
00331     /*      The variables acc holds the output value that is computed:
00332      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
00333      */
00334 
00335     sample = blockSize;
00336 
00337     while (sample > 0U)
00338     {
00339       /* Read the input */
00340       Xn = *pIn++;
00341 
00342       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00343       /* acc =  b0 * x[n] */
00344       acc = (q31_t) b0 *Xn;
00345 
00346       /* acc +=  b1 * x[n-1] */
00347       acc += (q31_t) b1 *Xn1;
00348       /* acc +=  b[2] * x[n-2] */
00349       acc += (q31_t) b2 *Xn2;
00350       /* acc +=  a1 * y[n-1] */
00351       acc += (q31_t) a1 *Yn1;
00352       /* acc +=  a2 * y[n-2] */
00353       acc += (q31_t) a2 *Yn2;
00354 
00355       /* The result is converted to 1.31  */
00356       acc = __SSAT((acc >> shift), 16);
00357 
00358       /* Every time after the output is computed state should be updated. */
00359       /* The states should be updated as:  */
00360       /* Xn2 = Xn1    */
00361       /* Xn1 = Xn     */
00362       /* Yn2 = Yn1    */
00363       /* Yn1 = acc    */
00364       Xn2 = Xn1;
00365       Xn1 = Xn;
00366       Yn2 = Yn1;
00367       Yn1 = (q15_t) acc;
00368 
00369       /* Store the output in the destination buffer. */
00370       *pOut++ = (q15_t) acc;
00371 
00372       /* decrement the loop counter */
00373       sample--;
00374     }
00375 
00376     /*  The first stage goes from the input buffer to the output buffer. */
00377     /*  Subsequent stages occur in-place in the output buffer */
00378     pIn = pDst;
00379 
00380     /* Reset to destination pointer */
00381     pOut = pDst;
00382 
00383     /*  Store the updated state variables back into the pState array */
00384     *pState++ = Xn1;
00385     *pState++ = Xn2;
00386     *pState++ = Yn1;
00387     *pState++ = Yn2;
00388 
00389   } while (--stage);
00390 
00391 #endif /* #if defined (ARM_MATH_DSP) */
00392 
00393 }
00394 
00395 
00396 /**
00397  * @} end of BiquadCascadeDF1 group
00398  */
00399