CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_32x64_q31.c Source File

arm_biquad_cascade_df1_32x64_q31.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013
00005 * $Revision:    V1.4.1
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_biquad_cascade_df1_32x64_q31.c    
00009 *    
00010 * Description:  High precision Q31 Biquad cascade filter processing function    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE.   
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 
00043 /**    
00044  * @ingroup groupFilters    
00045  */
00046 
00047 /**    
00048  * @defgroup BiquadCascadeDF1_32x64 High Precision Q31 Biquad Cascade Filter    
00049  *    
00050  * This function implements a high precision Biquad cascade filter which operates on    
00051  * Q31 data values.  The filter coefficients are in 1.31 format and the state variables    
00052  * are in 1.63 format.  The double precision state variables reduce quantization noise    
00053  * in the filter and provide a cleaner output.    
00054  * These filters are particularly useful when implementing filters in which the    
00055  * singularities are close to the unit circle.  This is common for low pass or high    
00056  * pass filters with very low cutoff frequencies.    
00057  *    
00058  * The function operates on blocks of input and output data    
00059  * and each call to the function processes <code>blockSize</code> samples through    
00060  * the filter. <code>pSrc</code> and <code>pDst</code> points to input and output arrays    
00061  * containing <code>blockSize</code> Q31 values.    
00062  *    
00063  * \par Algorithm    
00064  * Each Biquad stage implements a second order filter using the difference equation:    
00065  * <pre>    
00066  *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]    
00067  * </pre>    
00068  * A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.    
00069  * \image html Biquad.gif "Single Biquad filter stage"    
00070  * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.    
00071  * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.    
00072  * Pay careful attention to the sign of the feedback coefficients.    
00073  * Some design tools use the difference equation    
00074  * <pre>    
00075  *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]    
00076  * </pre>    
00077  * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.    
00078  *    
00079  * \par    
00080  * Higher order filters are realized as a cascade of second order sections.    
00081  * <code>numStages</code> refers to the number of second order stages used.    
00082  * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.    
00083  * \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"    
00084  * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).    
00085  *    
00086  * \par    
00087  * The <code>pState</code> points to state variables array .    
00088  * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code> and each state variable in 1.63 format to improve precision.    
00089  * The state variables are arranged in the array as:    
00090  * <pre>    
00091  *     {x[n-1], x[n-2], y[n-1], y[n-2]}    
00092  * </pre>    
00093  *    
00094  * \par    
00095  * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.    
00096  * The state array has a total length of <code>4*numStages</code> values of data in 1.63 format.    
00097  * The state variables are updated after each block of data is processed; the coefficients are untouched.    
00098  *    
00099  * \par Instance Structure    
00100  * The coefficients and state variables for a filter are stored together in an instance data structure.    
00101  * A separate instance structure must be defined for each filter.    
00102  * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.    
00103  *    
00104  * \par Init Function    
00105  * There is also an associated initialization function which performs the following operations:    
00106  * - Sets the values of the internal structure fields.    
00107  * - Zeros out the values in the state buffer.    
00108  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
00109  * numStages, pCoeffs, postShift, pState. Also set all of the values in pState to zero. 
00110  *
00111  * \par    
00112  * Use of the initialization function is optional.    
00113  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.    
00114  * To place an instance structure into a const data section, the instance structure must be manually initialized.    
00115  * Set the values in the state buffer to zeros before static initialization.    
00116  * For example, to statically initialize the filter instance structure use    
00117  * <pre>    
00118  *     arm_biquad_cas_df1_32x64_ins_q31 S1 = {numStages, pState, pCoeffs, postShift};    
00119  * </pre>    
00120  * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;    
00121  * <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied which is described in detail below.    
00122  * \par Fixed-Point Behavior    
00123  * Care must be taken while using Biquad Cascade 32x64 filter function.    
00124  * Following issues must be considered:    
00125  * - Scaling of coefficients    
00126  * - Filter gain    
00127  * - Overflow and saturation    
00128  *    
00129  * \par    
00130  * Filter coefficients are represented as fractional values and    
00131  * restricted to lie in the range <code>[-1 +1)</code>.    
00132  * The processing function has an additional scaling parameter <code>postShift</code>    
00133  * which allows the filter coefficients to exceed the range <code>[+1 -1)</code>.    
00134  * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.    
00135  * \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"    
00136  * This essentially scales the filter coefficients by <code>2^postShift</code>.    
00137  * For example, to realize the coefficients    
00138  * <pre>    
00139  *    {1.5, -0.8, 1.2, 1.6, -0.9}    
00140  * </pre>    
00141  * set the Coefficient array to:    
00142  * <pre>    
00143  *    {0.75, -0.4, 0.6, 0.8, -0.45}    
00144  * </pre>    
00145  * and set <code>postShift=1</code>    
00146  *    
00147  * \par    
00148  * The second thing to keep in mind is the gain through the filter.    
00149  * The frequency response of a Biquad filter is a function of its coefficients.    
00150  * It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.    
00151  * This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.    
00152  * To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.    
00153  *    
00154  * \par    
00155  * The third item to consider is the overflow and saturation behavior of the fixed-point Q31 version.    
00156  * This is described in the function specific documentation below.    
00157  */
00158 
00159 /**    
00160  * @addtogroup BiquadCascadeDF1_32x64    
00161  * @{    
00162  */
00163 
00164 /**    
00165  * @details    
00166     
00167  * @param[in]  *S points to an instance of the high precision Q31 Biquad cascade filter.    
00168  * @param[in]  *pSrc points to the block of input data.    
00169  * @param[out] *pDst points to the block of output data.    
00170  * @param[in]  blockSize number of samples to process.    
00171  * @return none.    
00172  *    
00173  * \par    
00174  * The function is implemented using an internal 64-bit accumulator.    
00175  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.    
00176  * Thus, if the accumulator result overflows it wraps around rather than clip.    
00177  * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).    
00178  * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to    
00179  * 1.31 format by discarding the low 32 bits.    
00180  *    
00181  * \par    
00182  * Two related functions are provided in the CMSIS DSP library.    
00183  * <code>arm_biquad_cascade_df1_q31()</code> implements a Biquad cascade with 32-bit coefficients and state variables with a Q63 accumulator.    
00184  * <code>arm_biquad_cascade_df1_fast_q31()</code> implements a Biquad cascade with 32-bit coefficients and state variables with a Q31 accumulator.    
00185  */
00186 
00187 void arm_biquad_cas_df1_32x64_q31 (
00188   const arm_biquad_cas_df1_32x64_ins_q31 * S,
00189   q31_t * pSrc,
00190   q31_t * pDst,
00191   uint32_t blockSize)
00192 {
00193   q31_t *pIn = pSrc;                             /*  input pointer initialization  */
00194   q31_t *pOut = pDst;                            /*  output pointer initialization */
00195   q63_t *pState = S->pState;                     /*  state pointer initialization  */
00196   q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */
00197   q63_t acc;                                     /*  accumulator                   */
00198   q31_t Xn1, Xn2;                                /*  Input Filter state variables        */
00199   q63_t Yn1, Yn2;                                /*  Output Filter state variables        */
00200   q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
00201   q31_t Xn;                                      /*  temporary input               */
00202   int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output */
00203   uint32_t sample, stage = S->numStages;         /*  loop counters                     */
00204   q31_t acc_l, acc_h;                            /*  temporary output               */
00205   uint32_t uShift = ((uint32_t) S->postShift + 1u);
00206   uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
00207 
00208 
00209 #ifndef ARM_MATH_CM0_FAMILY
00210 
00211   /* Run the below code for Cortex-M4 and Cortex-M3 */
00212 
00213   do
00214   {
00215     /* Reading the coefficients */
00216     b0 = *pCoeffs++;
00217     b1 = *pCoeffs++;
00218     b2 = *pCoeffs++;
00219     a1 = *pCoeffs++;
00220     a2 = *pCoeffs++;
00221 
00222     /* Reading the state values */
00223     Xn1 = (q31_t) (pState[0]);
00224     Xn2 = (q31_t) (pState[1]);
00225     Yn1 = pState[2];
00226     Yn2 = pState[3];
00227 
00228     /* Apply loop unrolling and compute 4 output values simultaneously. */
00229     /* The variable acc hold output value that is being computed and    
00230      * stored in the destination buffer    
00231      * acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]    
00232      */
00233 
00234     sample = blockSize >> 2u;
00235 
00236     /* First part of the processing with loop unrolling. Compute 4 outputs at a time.    
00237      ** a second loop below computes the remaining 1 to 3 samples. */
00238     while(sample > 0u)
00239     {
00240       /* Read the input */
00241       Xn = *pIn++;
00242 
00243       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00244 
00245       /* acc =  b0 * x[n] */
00246       acc = (q63_t) Xn *b0;
00247 
00248       /* acc +=  b1 * x[n-1] */
00249       acc += (q63_t) Xn1 *b1;
00250 
00251       /* acc +=  b[2] * x[n-2] */
00252       acc += (q63_t) Xn2 *b2;
00253 
00254       /* acc +=  a1 * y[n-1] */
00255       acc += mult32x64(Yn1, a1);
00256 
00257       /* acc +=  a2 * y[n-2] */
00258       acc += mult32x64(Yn2, a2);
00259 
00260       /* The result is converted to 1.63 , Yn2 variable is reused */
00261       Yn2 = acc << shift;
00262 
00263       /* Calc lower part of acc */
00264       acc_l = acc & 0xffffffff;
00265 
00266       /* Calc upper part of acc */
00267       acc_h = (acc >> 32) & 0xffffffff;
00268 
00269       /* Apply shift for lower part of acc and upper part of acc */
00270       acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
00271 
00272       /* Store the output in the destination buffer in 1.31 format. */
00273       *pOut = acc_h;
00274 
00275       /* Read the second input into Xn2, to reuse the value */
00276       Xn2 = *pIn++;
00277 
00278       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00279 
00280       /* acc +=  b1 * x[n-1] */
00281       acc = (q63_t) Xn *b1;
00282 
00283       /* acc =  b0 * x[n] */
00284       acc += (q63_t) Xn2 *b0;
00285 
00286       /* acc +=  b[2] * x[n-2] */
00287       acc += (q63_t) Xn1 *b2;
00288 
00289       /* acc +=  a1 * y[n-1] */
00290       acc += mult32x64(Yn2, a1);
00291 
00292       /* acc +=  a2 * y[n-2] */
00293       acc += mult32x64(Yn1, a2);
00294 
00295       /* The result is converted to 1.63, Yn1 variable is reused */
00296       Yn1 = acc << shift;
00297 
00298       /* Calc lower part of acc */
00299       acc_l = acc & 0xffffffff;
00300 
00301       /* Calc upper part of acc */
00302       acc_h = (acc >> 32) & 0xffffffff;
00303 
00304       /* Apply shift for lower part of acc and upper part of acc */
00305       acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
00306 
00307       /* Read the third input into Xn1, to reuse the value */
00308       Xn1 = *pIn++;
00309 
00310       /* The result is converted to 1.31 */
00311       /* Store the output in the destination buffer. */
00312       *(pOut + 1u) = acc_h;
00313 
00314       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00315 
00316       /* acc =  b0 * x[n] */
00317       acc = (q63_t) Xn1 *b0;
00318 
00319       /* acc +=  b1 * x[n-1] */
00320       acc += (q63_t) Xn2 *b1;
00321 
00322       /* acc +=  b[2] * x[n-2] */
00323       acc += (q63_t) Xn *b2;
00324 
00325       /* acc +=  a1 * y[n-1] */
00326       acc += mult32x64(Yn1, a1);
00327 
00328       /* acc +=  a2 * y[n-2] */
00329       acc += mult32x64(Yn2, a2);
00330 
00331       /* The result is converted to 1.63, Yn2 variable is reused  */
00332       Yn2 = acc << shift;
00333 
00334       /* Calc lower part of acc */
00335       acc_l = acc & 0xffffffff;
00336 
00337       /* Calc upper part of acc */
00338       acc_h = (acc >> 32) & 0xffffffff;
00339 
00340       /* Apply shift for lower part of acc and upper part of acc */
00341       acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
00342 
00343       /* Store the output in the destination buffer in 1.31 format. */
00344       *(pOut + 2u) = acc_h;
00345 
00346       /* Read the fourth input into Xn, to reuse the value */
00347       Xn = *pIn++;
00348 
00349       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00350       /* acc =  b0 * x[n] */
00351       acc = (q63_t) Xn *b0;
00352 
00353       /* acc +=  b1 * x[n-1] */
00354       acc += (q63_t) Xn1 *b1;
00355 
00356       /* acc +=  b[2] * x[n-2] */
00357       acc += (q63_t) Xn2 *b2;
00358 
00359       /* acc +=  a1 * y[n-1] */
00360       acc += mult32x64(Yn2, a1);
00361 
00362       /* acc +=  a2 * y[n-2] */
00363       acc += mult32x64(Yn1, a2);
00364 
00365       /* The result is converted to 1.63, Yn1 variable is reused  */
00366       Yn1 = acc << shift;
00367 
00368       /* Calc lower part of acc */
00369       acc_l = acc & 0xffffffff;
00370 
00371       /* Calc upper part of acc */
00372       acc_h = (acc >> 32) & 0xffffffff;
00373 
00374       /* Apply shift for lower part of acc and upper part of acc */
00375       acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
00376 
00377       /* Store the output in the destination buffer in 1.31 format. */
00378       *(pOut + 3u) = acc_h;
00379 
00380       /* Every time after the output is computed state should be updated. */
00381       /* The states should be updated as:  */
00382       /* Xn2 = Xn1    */
00383       /* Xn1 = Xn     */
00384       /* Yn2 = Yn1    */
00385       /* Yn1 = acc    */
00386       Xn2 = Xn1;
00387       Xn1 = Xn;
00388 
00389       /* update output pointer */
00390       pOut += 4u;
00391 
00392       /* decrement the loop counter */
00393       sample--;
00394     }
00395 
00396     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
00397      ** No loop unrolling is used. */
00398     sample = (blockSize & 0x3u);
00399 
00400     while(sample > 0u)
00401     {
00402       /* Read the input */
00403       Xn = *pIn++;
00404 
00405       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00406 
00407       /* acc =  b0 * x[n] */
00408       acc = (q63_t) Xn *b0;
00409       /* acc +=  b1 * x[n-1] */
00410       acc += (q63_t) Xn1 *b1;
00411       /* acc +=  b[2] * x[n-2] */
00412       acc += (q63_t) Xn2 *b2;
00413       /* acc +=  a1 * y[n-1] */
00414       acc += mult32x64(Yn1, a1);
00415       /* acc +=  a2 * y[n-2] */
00416       acc += mult32x64(Yn2, a2);
00417 
00418       /* Every time after the output is computed state should be updated. */
00419       /* The states should be updated as:  */
00420       /* Xn2 = Xn1    */
00421       /* Xn1 = Xn     */
00422       /* Yn2 = Yn1    */
00423       /* Yn1 = acc    */
00424       Xn2 = Xn1;
00425       Xn1 = Xn;
00426       Yn2 = Yn1;
00427       /* The result is converted to 1.63, Yn1 variable is reused  */
00428       Yn1 = acc << shift;
00429 
00430       /* Calc lower part of acc */
00431       acc_l = acc & 0xffffffff;
00432 
00433       /* Calc upper part of acc */
00434       acc_h = (acc >> 32) & 0xffffffff;
00435 
00436       /* Apply shift for lower part of acc and upper part of acc */
00437       acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
00438 
00439       /* Store the output in the destination buffer in 1.31 format. */
00440       *pOut++ = acc_h;
00441       //Yn1 = acc << shift; 
00442 
00443       /* Store the output in the destination buffer in 1.31 format. */
00444 //      *pOut++ = (q31_t) (acc >> (32 - shift)); 
00445 
00446       /* decrement the loop counter */
00447       sample--;
00448     }
00449 
00450     /*  The first stage output is given as input to the second stage. */
00451     pIn = pDst;
00452 
00453     /* Reset to destination buffer working pointer */
00454     pOut = pDst;
00455 
00456     /*  Store the updated state variables back into the pState array */
00457     /*  Store the updated state variables back into the pState array */
00458     *pState++ = (q63_t) Xn1;
00459     *pState++ = (q63_t) Xn2;
00460     *pState++ = Yn1;
00461     *pState++ = Yn2;
00462 
00463   } while(--stage);
00464 
00465 #else
00466 
00467   /* Run the below code for Cortex-M0 */
00468 
00469   do
00470   {
00471     /* Reading the coefficients */
00472     b0 = *pCoeffs++;
00473     b1 = *pCoeffs++;
00474     b2 = *pCoeffs++;
00475     a1 = *pCoeffs++;
00476     a2 = *pCoeffs++;
00477 
00478     /* Reading the state values */
00479     Xn1 = pState[0];
00480     Xn2 = pState[1];
00481     Yn1 = pState[2];
00482     Yn2 = pState[3];
00483 
00484     /* The variable acc hold output value that is being computed and        
00485      * stored in the destination buffer            
00486      * acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]            
00487      */
00488 
00489     sample = blockSize;
00490 
00491     while(sample > 0u)
00492     {
00493       /* Read the input */
00494       Xn = *pIn++;
00495 
00496       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00497       /* acc =  b0 * x[n] */
00498       acc = (q63_t) Xn *b0;
00499       /* acc +=  b1 * x[n-1] */
00500       acc += (q63_t) Xn1 *b1;
00501       /* acc +=  b[2] * x[n-2] */
00502       acc += (q63_t) Xn2 *b2;
00503       /* acc +=  a1 * y[n-1] */
00504       acc += mult32x64(Yn1, a1);
00505       /* acc +=  a2 * y[n-2] */
00506       acc += mult32x64(Yn2, a2);
00507 
00508       /* Every time after the output is computed state should be updated. */
00509       /* The states should be updated as:  */
00510       /* Xn2 = Xn1    */
00511       /* Xn1 = Xn     */
00512       /* Yn2 = Yn1    */
00513       /* Yn1 = acc    */
00514       Xn2 = Xn1;
00515       Xn1 = Xn;
00516       Yn2 = Yn1;
00517 
00518       /* The result is converted to 1.63, Yn1 variable is reused  */
00519       Yn1 = acc << shift;
00520 
00521       /* Calc lower part of acc */
00522       acc_l = acc & 0xffffffff;
00523 
00524       /* Calc upper part of acc */
00525       acc_h = (acc >> 32) & 0xffffffff;
00526 
00527       /* Apply shift for lower part of acc and upper part of acc */
00528       acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
00529 
00530       /* Store the output in the destination buffer in 1.31 format. */
00531       *pOut++ = acc_h;
00532 
00533       //Yn1 = acc << shift; 
00534 
00535       /* Store the output in the destination buffer in 1.31 format. */
00536       //*pOut++ = (q31_t) (acc >> (32 - shift)); 
00537 
00538       /* decrement the loop counter */
00539       sample--;
00540     }
00541 
00542     /*  The first stage output is given as input to the second stage. */
00543     pIn = pDst;
00544 
00545     /* Reset to destination buffer working pointer */
00546     pOut = pDst;
00547 
00548     /*  Store the updated state variables back into the pState array */
00549     *pState++ = (q63_t) Xn1;
00550     *pState++ = (q63_t) Xn2;
00551     *pState++ = Yn1;
00552     *pState++ = Yn2;
00553 
00554   } while(--stage);
00555 
00556 #endif /*    #ifndef ARM_MATH_CM0_FAMILY     */
00557 }
00558 
00559   /**    
00560    * @} end of BiquadCascadeDF1_32x64 group    
00561    */