CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_fast_q31.c Source File

arm_biquad_cascade_df1_fast_q31.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013
00005 * $Revision:    V1.4.1
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_biquad_cascade_df1_fast_q31.c    
00009 *    
00010 * Description:  Processing function for the    
00011 *               Q31 Fast Biquad cascade DirectFormI(DF1) filter.    
00012 *    
00013 * Target Processor: Cortex-M4/Cortex-M3
00014 *  
00015 * Redistribution and use in source and binary forms, with or without 
00016 * modification, are permitted provided that the following conditions
00017 * are met:
00018 *   - Redistributions of source code must retain the above copyright
00019 *     notice, this list of conditions and the following disclaimer.
00020 *   - Redistributions in binary form must reproduce the above copyright
00021 *     notice, this list of conditions and the following disclaimer in
00022 *     the documentation and/or other materials provided with the 
00023 *     distribution.
00024 *   - Neither the name of ARM LIMITED nor the names of its contributors
00025 *     may be used to endorse or promote products derived from this
00026 *     software without specific prior written permission.
00027 *
00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00039 * POSSIBILITY OF SUCH DAMAGE. 
00040 * -------------------------------------------------------------------- */
00041 
00042 #include "arm_math.h"
00043 
00044 /**    
00045  * @ingroup groupFilters    
00046  */
00047 
00048 /**    
00049  * @addtogroup BiquadCascadeDF1    
00050  * @{    
00051  */
00052 
00053 /**    
00054  * @details    
00055  *    
00056  * @param[in]  *S        points to an instance of the Q31 Biquad cascade structure.    
00057  * @param[in]  *pSrc     points to the block of input data.    
00058  * @param[out] *pDst     points to the block of output data.    
00059  * @param[in]  blockSize number of samples to process per call.    
00060  * @return     none.    
00061  *    
00062  * <b>Scaling and Overflow Behavior:</b>    
00063  * \par    
00064  * This function is optimized for speed at the expense of fixed-point precision and overflow protection.    
00065  * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.    
00066  * These intermediate results are added to a 2.30 accumulator.    
00067  * Finally, the accumulator is saturated and converted to a 1.31 result.    
00068  * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.    
00069  * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function    
00070  * arm_biquad_cascade_df1_init_q31() to initialize filter structure.    
00071  *    
00072  * \par    
00073  * Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision.  Both the slow and the fast versions use the same instance structure.    
00074  * Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure.    
00075  */
00076 
00077 void arm_biquad_cascade_df1_fast_q31(
00078   const arm_biquad_casd_df1_inst_q31 * S,
00079   q31_t * pSrc,
00080   q31_t * pDst,
00081   uint32_t blockSize)
00082 {
00083   q31_t acc = 0;                                 /*  accumulator                   */
00084   q31_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
00085   q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
00086   q31_t *pIn = pSrc;                             /*  input pointer initialization  */
00087   q31_t *pOut = pDst;                            /*  output pointer initialization */
00088   q31_t *pState = S->pState;                     /*  pState pointer initialization */
00089   q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */
00090   q31_t Xn;                                      /*  temporary input               */
00091   int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output */
00092   uint32_t sample, stage = S->numStages;         /*  loop counters                     */
00093 
00094 
00095   do
00096   {
00097     /* Reading the coefficients */
00098     b0 = *pCoeffs++;
00099     b1 = *pCoeffs++;
00100     b2 = *pCoeffs++;
00101     a1 = *pCoeffs++;
00102     a2 = *pCoeffs++;
00103 
00104     /* Reading the state values */
00105     Xn1 = pState[0];
00106     Xn2 = pState[1];
00107     Yn1 = pState[2];
00108     Yn2 = pState[3];
00109 
00110     /* Apply loop unrolling and compute 4 output values simultaneously. */
00111     /*      The variables acc ... acc3 hold output values that are being computed:       
00112      *       
00113      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]       
00114      */
00115 
00116     sample = blockSize >> 2u;
00117 
00118     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.       
00119      ** a second loop below computes the remaining 1 to 3 samples. */
00120     while(sample > 0u)
00121     {
00122       /* Read the input */
00123       Xn = *pIn;
00124 
00125       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00126       /* acc =  b0 * x[n] */
00127       //acc = (q31_t) (((q63_t) b1 * Xn1) >> 32);
00128       mult_32x32_keep32_R(acc, b1, Xn1);
00129       /* acc +=  b1 * x[n-1] */
00130       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b0 * (Xn))) >> 32);   
00131       multAcc_32x32_keep32_R(acc, b0, Xn);
00132       /* acc +=  b[2] * x[n-2] */
00133       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
00134       multAcc_32x32_keep32_R(acc, b2, Xn2);
00135       /* acc +=  a1 * y[n-1] */
00136       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
00137       multAcc_32x32_keep32_R(acc, a1, Yn1);
00138       /* acc +=  a2 * y[n-2] */
00139       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
00140       multAcc_32x32_keep32_R(acc, a2, Yn2);
00141 
00142       /* The result is converted to 1.31 , Yn2 variable is reused */
00143       Yn2 = acc << shift;
00144 
00145       /* Read the second input */
00146       Xn2 = *(pIn + 1u);
00147 
00148       /* Store the output in the destination buffer. */
00149       *pOut = Yn2;
00150 
00151       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00152       /* acc =  b0 * x[n] */
00153       //acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32);
00154       mult_32x32_keep32_R(acc, b0, Xn2);
00155       /* acc +=  b1 * x[n-1] */
00156       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32);
00157       multAcc_32x32_keep32_R(acc, b1, Xn);
00158       /* acc +=  b[2] * x[n-2] */
00159       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32);
00160       multAcc_32x32_keep32_R(acc, b2, Xn1);
00161       /* acc +=  a1 * y[n-1] */
00162       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);
00163       multAcc_32x32_keep32_R(acc, a1, Yn2);
00164       /* acc +=  a2 * y[n-2] */
00165       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);
00166       multAcc_32x32_keep32_R(acc, a2, Yn1);
00167 
00168       /* The result is converted to 1.31, Yn1 variable is reused  */
00169       Yn1 = acc << shift;
00170 
00171       /* Read the third input  */
00172       Xn1 = *(pIn + 2u);
00173 
00174       /* Store the output in the destination buffer. */
00175       *(pOut + 1u) = Yn1;
00176 
00177       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00178       /* acc =  b0 * x[n] */
00179       //acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32);
00180       mult_32x32_keep32_R(acc, b0, Xn1);
00181       /* acc +=  b1 * x[n-1] */
00182       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32);
00183       multAcc_32x32_keep32_R(acc, b1, Xn2);
00184       /* acc +=  b[2] * x[n-2] */
00185       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32);
00186       multAcc_32x32_keep32_R(acc, b2, Xn);
00187       /* acc +=  a1 * y[n-1] */
00188       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
00189       multAcc_32x32_keep32_R(acc, a1, Yn1);
00190       /* acc +=  a2 * y[n-2] */
00191       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
00192       multAcc_32x32_keep32_R(acc, a2, Yn2);
00193 
00194       /* The result is converted to 1.31, Yn2 variable is reused  */
00195       Yn2 = acc << shift;
00196 
00197       /* Read the forth input */
00198       Xn = *(pIn + 3u);
00199 
00200       /* Store the output in the destination buffer. */
00201       *(pOut + 2u) = Yn2;
00202       pIn += 4u;
00203 
00204       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00205       /* acc =  b0 * x[n] */
00206       //acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);
00207       mult_32x32_keep32_R(acc, b0, Xn);
00208       /* acc +=  b1 * x[n-1] */
00209       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
00210       multAcc_32x32_keep32_R(acc, b1, Xn1);
00211       /* acc +=  b[2] * x[n-2] */
00212       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
00213       multAcc_32x32_keep32_R(acc, b2, Xn2);
00214       /* acc +=  a1 * y[n-1] */
00215       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);
00216       multAcc_32x32_keep32_R(acc, a1, Yn2);
00217       /* acc +=  a2 * y[n-2] */
00218       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);
00219       multAcc_32x32_keep32_R(acc, a2, Yn1);
00220 
00221       /* Every time after the output is computed state should be updated. */
00222       /* The states should be updated as:  */
00223       /* Xn2 = Xn1    */
00224       Xn2 = Xn1;
00225 
00226       /* The result is converted to 1.31, Yn1 variable is reused  */
00227       Yn1 = acc << shift;
00228 
00229       /* Xn1 = Xn     */
00230       Xn1 = Xn;
00231 
00232       /* Store the output in the destination buffer. */
00233       *(pOut + 3u) = Yn1;
00234       pOut += 4u;
00235 
00236       /* decrement the loop counter */
00237       sample--;
00238     }
00239 
00240     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.       
00241      ** No loop unrolling is used. */
00242     sample = (blockSize & 0x3u);
00243 
00244    while(sample > 0u)
00245    {
00246       /* Read the input */
00247       Xn = *pIn++;
00248 
00249       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00250       /* acc =  b0 * x[n] */
00251       //acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);
00252       mult_32x32_keep32_R(acc, b0, Xn);
00253       /* acc +=  b1 * x[n-1] */
00254       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
00255       multAcc_32x32_keep32_R(acc, b1, Xn1);
00256       /* acc +=  b[2] * x[n-2] */
00257       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
00258       multAcc_32x32_keep32_R(acc, b2, Xn2);
00259       /* acc +=  a1 * y[n-1] */
00260       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
00261       multAcc_32x32_keep32_R(acc, a1, Yn1);
00262       /* acc +=  a2 * y[n-2] */
00263       //acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
00264       multAcc_32x32_keep32_R(acc, a2, Yn2);
00265 
00266       /* The result is converted to 1.31  */
00267       acc = acc << shift;
00268 
00269       /* Every time after the output is computed state should be updated. */
00270       /* The states should be updated as:  */
00271       /* Xn2 = Xn1    */
00272       /* Xn1 = Xn     */
00273       /* Yn2 = Yn1    */
00274       /* Yn1 = acc    */
00275       Xn2 = Xn1;
00276       Xn1 = Xn;
00277       Yn2 = Yn1;
00278       Yn1 = acc;
00279 
00280       /* Store the output in the destination buffer. */
00281       *pOut++ = acc;
00282 
00283       /* decrement the loop counter */
00284       sample--;
00285    }
00286 
00287     /*  The first stage goes from the input buffer to the output buffer. */
00288     /*  Subsequent stages occur in-place in the output buffer */
00289     pIn = pDst;
00290 
00291     /* Reset to destination pointer */
00292     pOut = pDst;
00293 
00294     /*  Store the updated state variables back into the pState array */
00295     *pState++ = Xn1;
00296     *pState++ = Xn2;
00297     *pState++ = Yn1;
00298     *pState++ = Yn2;
00299 
00300   } while(--stage);
00301 }
00302 
00303 /**    
00304   * @} end of BiquadCascadeDF1 group    
00305   */