CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_f32.c Source File

arm_biquad_cascade_df1_f32.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013
00005 * $Revision:    V1.4.1
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_biquad_cascade_df1_f32.c    
00009 *    
00010 * Description:  Processing function for the    
00011 *               floating-point Biquad cascade DirectFormI(DF1) filter.    
00012 *    
00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00014 *  
00015 * Redistribution and use in source and binary forms, with or without 
00016 * modification, are permitted provided that the following conditions
00017 * are met:
00018 *   - Redistributions of source code must retain the above copyright
00019 *     notice, this list of conditions and the following disclaimer.
00020 *   - Redistributions in binary form must reproduce the above copyright
00021 *     notice, this list of conditions and the following disclaimer in
00022 *     the documentation and/or other materials provided with the 
00023 *     distribution.
00024 *   - Neither the name of ARM LIMITED nor the names of its contributors
00025 *     may be used to endorse or promote products derived from this
00026 *     software without specific prior written permission.
00027 *
00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00039 * POSSIBILITY OF SUCH DAMAGE. 
00040 * -------------------------------------------------------------------- */
00041 
00042 #include "arm_math.h"
00043 
00044 /**    
00045  * @ingroup groupFilters    
00046  */
00047 
00048 /**    
00049  * @defgroup BiquadCascadeDF1 Biquad Cascade IIR Filters Using Direct Form I Structure    
00050  *    
00051  * This set of functions implements arbitrary order recursive (IIR) filters.    
00052  * The filters are implemented as a cascade of second order Biquad sections.    
00053  * The functions support Q15, Q31 and floating-point data types.  
00054  * Fast version of Q15 and Q31 also supported on CortexM4 and Cortex-M3.    
00055  *    
00056  * The functions operate on blocks of input and output data and each call to the function    
00057  * processes <code>blockSize</code> samples through the filter.    
00058  * <code>pSrc</code> points to the array of input data and    
00059  * <code>pDst</code> points to the array of output data.    
00060  * Both arrays contain <code>blockSize</code> values.    
00061  *    
00062  * \par Algorithm    
00063  * Each Biquad stage implements a second order filter using the difference equation:    
00064  * <pre>    
00065  *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]    
00066  * </pre>    
00067  * A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.    
00068  * \image html Biquad.gif "Single Biquad filter stage"    
00069  * Coefficients <code>b0, b1 and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.    
00070  * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.    
00071  * Pay careful attention to the sign of the feedback coefficients.    
00072  * Some design tools use the difference equation    
00073  * <pre>    
00074  *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]    
00075  * </pre>    
00076  * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.    
00077  *    
00078  * \par    
00079  * Higher order filters are realized as a cascade of second order sections.    
00080  * <code>numStages</code> refers to the number of second order stages used.    
00081  * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.    
00082  * \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"    
00083  * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).    
00084  *    
00085  * \par    
00086  * The <code>pState</code> points to state variables array.    
00087  * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.    
00088  * The state variables are arranged in the <code>pState</code> array as:    
00089  * <pre>    
00090  *     {x[n-1], x[n-2], y[n-1], y[n-2]}    
00091  * </pre>    
00092  *    
00093  * \par    
00094  * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.    
00095  * The state array has a total length of <code>4*numStages</code> values.    
00096  * The state variables are updated after each block of data is processed, the coefficients are untouched.    
00097  *    
00098  * \par Instance Structure    
00099  * The coefficients and state variables for a filter are stored together in an instance data structure.    
00100  * A separate instance structure must be defined for each filter.    
00101  * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.    
00102  * There are separate instance structure declarations for each of the 3 supported data types.    
00103  *    
00104  * \par Init Functions    
00105  * There is also an associated initialization function for each data type.    
00106  * The initialization function performs following operations:    
00107  * - Sets the values of the internal structure fields.    
00108  * - Zeros out the values in the state buffer.    
00109  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
00110  * numStages, pCoeffs, pState. Also set all of the values in pState to zero. 
00111  *    
00112  * \par    
00113  * Use of the initialization function is optional.    
00114  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.    
00115  * To place an instance structure into a const data section, the instance structure must be manually initialized.    
00116  * Set the values in the state buffer to zeros before static initialization.    
00117  * The code below statically initializes each of the 3 different data type filter instance structures    
00118  * <pre>    
00119  *     arm_biquad_casd_df1_inst_f32 S1 = {numStages, pState, pCoeffs};    
00120  *     arm_biquad_casd_df1_inst_q15 S2 = {numStages, pState, pCoeffs, postShift};    
00121  *     arm_biquad_casd_df1_inst_q31 S3 = {numStages, pState, pCoeffs, postShift};    
00122  * </pre>    
00123  * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;    
00124  * <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied.    
00125  *    
00126  * \par Fixed-Point Behavior    
00127  * Care must be taken when using the Q15 and Q31 versions of the Biquad Cascade filter functions.    
00128  * Following issues must be considered:    
00129  * - Scaling of coefficients    
00130  * - Filter gain    
00131  * - Overflow and saturation    
00132  *    
00133  * \par    
00134  * <b>Scaling of coefficients: </b>    
00135  * Filter coefficients are represented as fractional values and    
00136  * coefficients are restricted to lie in the range <code>[-1 +1)</code>.    
00137  * The fixed-point functions have an additional scaling parameter <code>postShift</code>    
00138  * which allow the filter coefficients to exceed the range <code>[+1 -1)</code>.    
00139  * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.    
00140  * \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"    
00141  * This essentially scales the filter coefficients by <code>2^postShift</code>.    
00142  * For example, to realize the coefficients    
00143  * <pre>    
00144  *    {1.5, -0.8, 1.2, 1.6, -0.9}    
00145  * </pre>    
00146  * set the pCoeffs array to:    
00147  * <pre>    
00148  *    {0.75, -0.4, 0.6, 0.8, -0.45}    
00149  * </pre>    
00150  * and set <code>postShift=1</code>    
00151  *    
00152  * \par    
00153  * <b>Filter gain: </b>    
00154  * The frequency response of a Biquad filter is a function of its coefficients.    
00155  * It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.    
00156  * This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.    
00157  * To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.    
00158  *    
00159  * \par    
00160  * <b>Overflow and saturation: </b>    
00161  * For Q15 and Q31 versions, it is described separately as part of the function specific documentation below.    
00162  */
00163 
00164 /**    
00165  * @addtogroup BiquadCascadeDF1    
00166  * @{    
00167  */
00168 
00169 /**    
00170  * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.    
00171  * @param[in]  *pSrc      points to the block of input data.    
00172  * @param[out] *pDst      points to the block of output data.    
00173  * @param[in]  blockSize  number of samples to process per call.    
00174  * @return     none.    
00175  *    
00176  */
00177 
00178 void arm_biquad_cascade_df1_f32(
00179   const arm_biquad_casd_df1_inst_f32 * S,
00180   float32_t * pSrc,
00181   float32_t * pDst,
00182   uint32_t blockSize)
00183 {
00184   float32_t *pIn = pSrc;                         /*  source pointer            */
00185   float32_t *pOut = pDst;                        /*  destination pointer       */
00186   float32_t *pState = S->pState;                 /*  pState pointer            */
00187   float32_t *pCoeffs = S->pCoeffs;               /*  coefficient pointer       */
00188   float32_t acc;                                 /*  Simulates the accumulator */
00189   float32_t b0, b1, b2, a1, a2;                  /*  Filter coefficients       */
00190   float32_t Xn1, Xn2, Yn1, Yn2;                  /*  Filter pState variables   */
00191   float32_t Xn;                                  /*  temporary input           */
00192   uint32_t sample, stage = S->numStages;         /*  loop counters             */
00193 
00194 
00195 #ifndef ARM_MATH_CM0_FAMILY
00196 
00197   /* Run the below code for Cortex-M4 and Cortex-M3 */
00198 
00199   do
00200   {
00201     /* Reading the coefficients */
00202     b0 = *pCoeffs++;
00203     b1 = *pCoeffs++;
00204     b2 = *pCoeffs++;
00205     a1 = *pCoeffs++;
00206     a2 = *pCoeffs++;
00207 
00208     /* Reading the pState values */
00209     Xn1 = pState[0];
00210     Xn2 = pState[1];
00211     Yn1 = pState[2];
00212     Yn2 = pState[3];
00213 
00214     /* Apply loop unrolling and compute 4 output values simultaneously. */
00215     /*      The variable acc hold output values that are being computed:    
00216      *    
00217      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]    
00218      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]    
00219      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]    
00220      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]    
00221      */
00222 
00223     sample = blockSize >> 2u;
00224 
00225     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
00226      ** a second loop below computes the remaining 1 to 3 samples. */
00227     while(sample > 0u)
00228     {
00229       /* Read the first input */
00230       Xn = *pIn++;
00231 
00232       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00233       Yn2 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
00234 
00235       /* Store the result in the accumulator in the destination buffer. */
00236       *pOut++ = Yn2;
00237 
00238       /* Every time after the output is computed state should be updated. */
00239       /* The states should be updated as:  */
00240       /* Xn2 = Xn1    */
00241       /* Xn1 = Xn     */
00242       /* Yn2 = Yn1    */
00243       /* Yn1 = acc   */
00244 
00245       /* Read the second input */
00246       Xn2 = *pIn++;
00247 
00248       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00249       Yn1 = (b0 * Xn2) + (b1 * Xn) + (b2 * Xn1) + (a1 * Yn2) + (a2 * Yn1);
00250 
00251       /* Store the result in the accumulator in the destination buffer. */
00252       *pOut++ = Yn1;
00253 
00254       /* Every time after the output is computed state should be updated. */
00255       /* The states should be updated as:  */
00256       /* Xn2 = Xn1    */
00257       /* Xn1 = Xn     */
00258       /* Yn2 = Yn1    */
00259       /* Yn1 = acc   */
00260 
00261       /* Read the third input */
00262       Xn1 = *pIn++;
00263 
00264       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00265       Yn2 = (b0 * Xn1) + (b1 * Xn2) + (b2 * Xn) + (a1 * Yn1) + (a2 * Yn2);
00266 
00267       /* Store the result in the accumulator in the destination buffer. */
00268       *pOut++ = Yn2;
00269 
00270       /* Every time after the output is computed state should be updated. */
00271       /* The states should be updated as: */
00272       /* Xn2 = Xn1    */
00273       /* Xn1 = Xn     */
00274       /* Yn2 = Yn1    */
00275       /* Yn1 = acc   */
00276 
00277       /* Read the forth input */
00278       Xn = *pIn++;
00279 
00280       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00281       Yn1 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn2) + (a2 * Yn1);
00282 
00283       /* Store the result in the accumulator in the destination buffer. */
00284       *pOut++ = Yn1;
00285 
00286       /* Every time after the output is computed state should be updated. */
00287       /* The states should be updated as:  */
00288       /* Xn2 = Xn1    */
00289       /* Xn1 = Xn     */
00290       /* Yn2 = Yn1    */
00291       /* Yn1 = acc   */
00292       Xn2 = Xn1;
00293       Xn1 = Xn;
00294 
00295       /* decrement the loop counter */
00296       sample--;
00297 
00298     }
00299 
00300     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
00301      ** No loop unrolling is used. */
00302     sample = blockSize & 0x3u;
00303 
00304     while(sample > 0u)
00305     {
00306       /* Read the input */
00307       Xn = *pIn++;
00308 
00309       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00310       acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
00311 
00312       /* Store the result in the accumulator in the destination buffer. */
00313       *pOut++ = acc;
00314 
00315       /* Every time after the output is computed state should be updated. */
00316       /* The states should be updated as:    */
00317       /* Xn2 = Xn1    */
00318       /* Xn1 = Xn     */
00319       /* Yn2 = Yn1    */
00320       /* Yn1 = acc   */
00321       Xn2 = Xn1;
00322       Xn1 = Xn;
00323       Yn2 = Yn1;
00324       Yn1 = acc;
00325 
00326       /* decrement the loop counter */
00327       sample--;
00328 
00329     }
00330 
00331     /*  Store the updated state variables back into the pState array */
00332     *pState++ = Xn1;
00333     *pState++ = Xn2;
00334     *pState++ = Yn1;
00335     *pState++ = Yn2;
00336 
00337     /*  The first stage goes from the input buffer to the output buffer. */
00338     /*  Subsequent numStages  occur in-place in the output buffer */
00339     pIn = pDst;
00340 
00341     /* Reset the output pointer */
00342     pOut = pDst;
00343 
00344     /* decrement the loop counter */
00345     stage--;
00346 
00347   } while(stage > 0u);
00348 
00349 #else
00350 
00351   /* Run the below code for Cortex-M0 */
00352 
00353   do
00354   {
00355     /* Reading the coefficients */
00356     b0 = *pCoeffs++;
00357     b1 = *pCoeffs++;
00358     b2 = *pCoeffs++;
00359     a1 = *pCoeffs++;
00360     a2 = *pCoeffs++;
00361 
00362     /* Reading the pState values */
00363     Xn1 = pState[0];
00364     Xn2 = pState[1];
00365     Yn1 = pState[2];
00366     Yn2 = pState[3];
00367 
00368     /*      The variables acc holds the output value that is computed:        
00369      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]        
00370      */
00371 
00372     sample = blockSize;
00373 
00374     while(sample > 0u)
00375     {
00376       /* Read the input */
00377       Xn = *pIn++;
00378 
00379       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00380       acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
00381 
00382       /* Store the result in the accumulator in the destination buffer. */
00383       *pOut++ = acc;
00384 
00385       /* Every time after the output is computed state should be updated. */
00386       /* The states should be updated as:    */
00387       /* Xn2 = Xn1    */
00388       /* Xn1 = Xn     */
00389       /* Yn2 = Yn1    */
00390       /* Yn1 = acc   */
00391       Xn2 = Xn1;
00392       Xn1 = Xn;
00393       Yn2 = Yn1;
00394       Yn1 = acc;
00395 
00396       /* decrement the loop counter */
00397       sample--;
00398     }
00399 
00400     /*  Store the updated state variables back into the pState array */
00401     *pState++ = Xn1;
00402     *pState++ = Xn2;
00403     *pState++ = Yn1;
00404     *pState++ = Yn2;
00405 
00406     /*  The first stage goes from the input buffer to the output buffer. */
00407     /*  Subsequent numStages  occur in-place in the output buffer */
00408     pIn = pDst;
00409 
00410     /* Reset the output pointer */
00411     pOut = pDst;
00412 
00413     /* decrement the loop counter */
00414     stage--;
00415 
00416   } while(stage > 0u);
00417 
00418 #endif /*   #ifndef ARM_MATH_CM0_FAMILY         */
00419 
00420 }
00421 
00422 
00423   /**    
00424    * @} end of BiquadCascadeDF1 group    
00425    */