Robert Lopez / CMSIS5
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_q31.c Source File

arm_biquad_cascade_df1_q31.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_biquad_cascade_df1_q31.c
00004  * Description:  Processing function for the Q31 Biquad cascade filter
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @ingroup groupFilters
00033  */
00034 
00035 /**
00036  * @addtogroup BiquadCascadeDF1
00037  * @{
00038  */
00039 
00040 /**
00041  * @brief Processing function for the Q31 Biquad cascade filter.
00042  * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
00043  * @param[in]  *pSrc      points to the block of input data.
00044  * @param[out] *pDst      points to the block of output data.
00045  * @param[in]  blockSize  number of samples to process per call.
00046  * @return none.
00047  *
00048  * <b>Scaling and Overflow Behavior:</b>
00049  * \par
00050  * The function is implemented using an internal 64-bit accumulator.
00051  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
00052  * Thus, if the accumulator result overflows it wraps around rather than clip.
00053  * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
00054  * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
00055  * 1.31 format by discarding the low 32 bits.
00056  *
00057  * \par
00058  * Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
00059  */
00060 
00061 void arm_biquad_cascade_df1_q31(
00062   const arm_biquad_casd_df1_inst_q31 * S,
00063   q31_t * pSrc,
00064   q31_t * pDst,
00065   uint32_t blockSize)
00066 {
00067   q63_t acc;                                     /*  accumulator                   */
00068   uint32_t uShift = ((uint32_t) S->postShift + 1U);
00069   uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output */
00070   q31_t *pIn = pSrc;                             /*  input pointer initialization  */
00071   q31_t *pOut = pDst;                            /*  output pointer initialization */
00072   q31_t *pState = S->pState;                     /*  pState pointer initialization */
00073   q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */
00074   q31_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
00075   q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
00076   q31_t Xn;                                      /*  temporary input               */
00077   uint32_t sample, stage = S->numStages;         /*  loop counters                     */
00078 
00079 
00080 #if defined (ARM_MATH_DSP)
00081 
00082   q31_t acc_l, acc_h;                            /*  temporary output variables    */
00083 
00084   /* Run the below code for Cortex-M4 and Cortex-M3 */
00085 
00086   do
00087   {
00088     /* Reading the coefficients */
00089     b0 = *pCoeffs++;
00090     b1 = *pCoeffs++;
00091     b2 = *pCoeffs++;
00092     a1 = *pCoeffs++;
00093     a2 = *pCoeffs++;
00094 
00095     /* Reading the state values */
00096     Xn1 = pState[0];
00097     Xn2 = pState[1];
00098     Yn1 = pState[2];
00099     Yn2 = pState[3];
00100 
00101     /* Apply loop unrolling and compute 4 output values simultaneously. */
00102     /*      The variable acc hold output values that are being computed:
00103      *
00104      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
00105      */
00106 
00107     sample = blockSize >> 2U;
00108 
00109     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
00110      ** a second loop below computes the remaining 1 to 3 samples. */
00111     while (sample > 0U)
00112     {
00113       /* Read the input */
00114       Xn = *pIn++;
00115 
00116       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00117 
00118       /* acc =  b0 * x[n] */
00119       acc = (q63_t) b0 *Xn;
00120       /* acc +=  b1 * x[n-1] */
00121       acc += (q63_t) b1 *Xn1;
00122       /* acc +=  b[2] * x[n-2] */
00123       acc += (q63_t) b2 *Xn2;
00124       /* acc +=  a1 * y[n-1] */
00125       acc += (q63_t) a1 *Yn1;
00126       /* acc +=  a2 * y[n-2] */
00127       acc += (q63_t) a2 *Yn2;
00128 
00129       /* The result is converted to 1.31 , Yn2 variable is reused */
00130 
00131       /* Calc lower part of acc */
00132       acc_l = acc & 0xffffffff;
00133 
00134       /* Calc upper part of acc */
00135       acc_h = (acc >> 32) & 0xffffffff;
00136 
00137       /* Apply shift for lower part of acc and upper part of acc */
00138       Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
00139 
00140       /* Store the output in the destination buffer. */
00141       *pOut++ = Yn2;
00142 
00143       /* Read the second input */
00144       Xn2 = *pIn++;
00145 
00146       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00147 
00148       /* acc =  b0 * x[n] */
00149       acc = (q63_t) b0 *Xn2;
00150       /* acc +=  b1 * x[n-1] */
00151       acc += (q63_t) b1 *Xn;
00152       /* acc +=  b[2] * x[n-2] */
00153       acc += (q63_t) b2 *Xn1;
00154       /* acc +=  a1 * y[n-1] */
00155       acc += (q63_t) a1 *Yn2;
00156       /* acc +=  a2 * y[n-2] */
00157       acc += (q63_t) a2 *Yn1;
00158 
00159 
00160       /* The result is converted to 1.31, Yn1 variable is reused  */
00161 
00162       /* Calc lower part of acc */
00163       acc_l = acc & 0xffffffff;
00164 
00165       /* Calc upper part of acc */
00166       acc_h = (acc >> 32) & 0xffffffff;
00167 
00168 
00169       /* Apply shift for lower part of acc and upper part of acc */
00170       Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
00171 
00172       /* Store the output in the destination buffer. */
00173       *pOut++ = Yn1;
00174 
00175       /* Read the third input  */
00176       Xn1 = *pIn++;
00177 
00178       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00179 
00180       /* acc =  b0 * x[n] */
00181       acc = (q63_t) b0 *Xn1;
00182       /* acc +=  b1 * x[n-1] */
00183       acc += (q63_t) b1 *Xn2;
00184       /* acc +=  b[2] * x[n-2] */
00185       acc += (q63_t) b2 *Xn;
00186       /* acc +=  a1 * y[n-1] */
00187       acc += (q63_t) a1 *Yn1;
00188       /* acc +=  a2 * y[n-2] */
00189       acc += (q63_t) a2 *Yn2;
00190 
00191       /* The result is converted to 1.31, Yn2 variable is reused  */
00192       /* Calc lower part of acc */
00193       acc_l = acc & 0xffffffff;
00194 
00195       /* Calc upper part of acc */
00196       acc_h = (acc >> 32) & 0xffffffff;
00197 
00198 
00199       /* Apply shift for lower part of acc and upper part of acc */
00200       Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
00201 
00202       /* Store the output in the destination buffer. */
00203       *pOut++ = Yn2;
00204 
00205       /* Read the forth input */
00206       Xn = *pIn++;
00207 
00208       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00209 
00210       /* acc =  b0 * x[n] */
00211       acc = (q63_t) b0 *Xn;
00212       /* acc +=  b1 * x[n-1] */
00213       acc += (q63_t) b1 *Xn1;
00214       /* acc +=  b[2] * x[n-2] */
00215       acc += (q63_t) b2 *Xn2;
00216       /* acc +=  a1 * y[n-1] */
00217       acc += (q63_t) a1 *Yn2;
00218       /* acc +=  a2 * y[n-2] */
00219       acc += (q63_t) a2 *Yn1;
00220 
00221       /* The result is converted to 1.31, Yn1 variable is reused  */
00222       /* Calc lower part of acc */
00223       acc_l = acc & 0xffffffff;
00224 
00225       /* Calc upper part of acc */
00226       acc_h = (acc >> 32) & 0xffffffff;
00227 
00228       /* Apply shift for lower part of acc and upper part of acc */
00229       Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
00230 
00231       /* Every time after the output is computed state should be updated. */
00232       /* The states should be updated as:  */
00233       /* Xn2 = Xn1    */
00234       /* Xn1 = Xn     */
00235       /* Yn2 = Yn1    */
00236       /* Yn1 = acc    */
00237       Xn2 = Xn1;
00238       Xn1 = Xn;
00239 
00240       /* Store the output in the destination buffer. */
00241       *pOut++ = Yn1;
00242 
00243       /* decrement the loop counter */
00244       sample--;
00245     }
00246 
00247     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
00248      ** No loop unrolling is used. */
00249     sample = (blockSize & 0x3U);
00250 
00251     while (sample > 0U)
00252     {
00253       /* Read the input */
00254       Xn = *pIn++;
00255 
00256       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00257 
00258       /* acc =  b0 * x[n] */
00259       acc = (q63_t) b0 *Xn;
00260       /* acc +=  b1 * x[n-1] */
00261       acc += (q63_t) b1 *Xn1;
00262       /* acc +=  b[2] * x[n-2] */
00263       acc += (q63_t) b2 *Xn2;
00264       /* acc +=  a1 * y[n-1] */
00265       acc += (q63_t) a1 *Yn1;
00266       /* acc +=  a2 * y[n-2] */
00267       acc += (q63_t) a2 *Yn2;
00268 
00269       /* The result is converted to 1.31  */
00270       acc = acc >> lShift;
00271 
00272       /* Every time after the output is computed state should be updated. */
00273       /* The states should be updated as:  */
00274       /* Xn2 = Xn1    */
00275       /* Xn1 = Xn     */
00276       /* Yn2 = Yn1    */
00277       /* Yn1 = acc    */
00278       Xn2 = Xn1;
00279       Xn1 = Xn;
00280       Yn2 = Yn1;
00281       Yn1 = (q31_t) acc;
00282 
00283       /* Store the output in the destination buffer. */
00284       *pOut++ = (q31_t) acc;
00285 
00286       /* decrement the loop counter */
00287       sample--;
00288     }
00289 
00290     /*  The first stage goes from the input buffer to the output buffer. */
00291     /*  Subsequent stages occur in-place in the output buffer */
00292     pIn = pDst;
00293 
00294     /* Reset to destination pointer */
00295     pOut = pDst;
00296 
00297     /*  Store the updated state variables back into the pState array */
00298     *pState++ = Xn1;
00299     *pState++ = Xn2;
00300     *pState++ = Yn1;
00301     *pState++ = Yn2;
00302 
00303   } while (--stage);
00304 
00305 #else
00306 
00307   /* Run the below code for Cortex-M0 */
00308 
00309   do
00310   {
00311     /* Reading the coefficients */
00312     b0 = *pCoeffs++;
00313     b1 = *pCoeffs++;
00314     b2 = *pCoeffs++;
00315     a1 = *pCoeffs++;
00316     a2 = *pCoeffs++;
00317 
00318     /* Reading the state values */
00319     Xn1 = pState[0];
00320     Xn2 = pState[1];
00321     Yn1 = pState[2];
00322     Yn2 = pState[3];
00323 
00324     /*      The variables acc holds the output value that is computed:
00325      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
00326      */
00327 
00328     sample = blockSize;
00329 
00330     while (sample > 0U)
00331     {
00332       /* Read the input */
00333       Xn = *pIn++;
00334 
00335       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00336       /* acc =  b0 * x[n] */
00337       acc = (q63_t) b0 *Xn;
00338 
00339       /* acc +=  b1 * x[n-1] */
00340       acc += (q63_t) b1 *Xn1;
00341       /* acc +=  b[2] * x[n-2] */
00342       acc += (q63_t) b2 *Xn2;
00343       /* acc +=  a1 * y[n-1] */
00344       acc += (q63_t) a1 *Yn1;
00345       /* acc +=  a2 * y[n-2] */
00346       acc += (q63_t) a2 *Yn2;
00347 
00348       /* The result is converted to 1.31  */
00349       acc = acc >> lShift;
00350 
00351       /* Every time after the output is computed state should be updated. */
00352       /* The states should be updated as:  */
00353       /* Xn2 = Xn1    */
00354       /* Xn1 = Xn     */
00355       /* Yn2 = Yn1    */
00356       /* Yn1 = acc    */
00357       Xn2 = Xn1;
00358       Xn1 = Xn;
00359       Yn2 = Yn1;
00360       Yn1 = (q31_t) acc;
00361 
00362       /* Store the output in the destination buffer. */
00363       *pOut++ = (q31_t) acc;
00364 
00365       /* decrement the loop counter */
00366       sample--;
00367     }
00368 
00369     /*  The first stage goes from the input buffer to the output buffer. */
00370     /*  Subsequent stages occur in-place in the output buffer */
00371     pIn = pDst;
00372 
00373     /* Reset to destination pointer */
00374     pOut = pDst;
00375 
00376     /*  Store the updated state variables back into the pState array */
00377     *pState++ = Xn1;
00378     *pState++ = Xn2;
00379     *pState++ = Yn1;
00380     *pState++ = Yn2;
00381 
00382   } while (--stage);
00383 
00384 #endif /*  #if defined (ARM_MATH_DSP) */
00385 }
00386 
00387 
00388 
00389 
00390 /**
00391   * @} end of BiquadCascadeDF1 group
00392   */
00393