Aded CMSIS5 DSP and NN folder. Needs some work

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_stereo_df2T_f32.c Source File

arm_biquad_cascade_stereo_df2T_f32.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_biquad_cascade_stereo_df2T_f32.c
00004  * Description:  Processing function for floating-point transposed direct form II Biquad cascade filter. 2 channels
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032 * @ingroup groupFilters
00033 */
00034 
00035 /**
00036 * @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
00037 *
00038 * This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.
00039 * The filters are implemented as a cascade of second order Biquad sections.
00040 * These functions provide a slight memory savings as compared to the direct form I Biquad filter functions.
00041 * Only floating-point data is supported.
00042 *
00043 * This function operate on blocks of input and output data and each call to the function
00044 * processes <code>blockSize</code> samples through the filter.
00045 * <code>pSrc</code> points to the array of input data and
00046 * <code>pDst</code> points to the array of output data.
00047 * Both arrays contain <code>blockSize</code> values.
00048 *
00049 * \par Algorithm
00050 * Each Biquad stage implements a second order filter using the difference equation:
00051 * <pre>
00052 *    y[n] = b0 * x[n] + d1
00053 *    d1 = b1 * x[n] + a1 * y[n] + d2
00054 *    d2 = b2 * x[n] + a2 * y[n]
00055 * </pre>
00056 * where d1 and d2 represent the two state values.
00057 *
00058 * \par
00059 * A Biquad filter using a transposed Direct Form II structure is shown below.
00060 * \image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"
00061 * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
00062 * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
00063 * Pay careful attention to the sign of the feedback coefficients.
00064 * Some design tools flip the sign of the feedback coefficients:
00065 * <pre>
00066 *    y[n] = b0 * x[n] + d1;
00067 *    d1 = b1 * x[n] - a1 * y[n] + d2;
00068 *    d2 = b2 * x[n] - a2 * y[n];
00069 * </pre>
00070 * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
00071 *
00072 * \par
00073 * Higher order filters are realized as a cascade of second order sections.
00074 * <code>numStages</code> refers to the number of second order stages used.
00075 * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
00076 * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the
00077 * coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
00078 *
00079 * \par
00080 * <code>pState</code> points to the state variable array.
00081 * Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.
00082 * The state variables are arranged in the <code>pState</code> array as:
00083 * <pre>
00084 *     {d11, d12, d21, d22, ...}
00085 * </pre>
00086 * where <code>d1x</code> refers to the state variables for the first Biquad and
00087 * <code>d2x</code> refers to the state variables for the second Biquad.
00088 * The state array has a total length of <code>2*numStages</code> values.
00089 * The state variables are updated after each block of data is processed; the coefficients are untouched.
00090 *
00091 * \par
00092 * The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.
00093 * The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.
00094 * That is why the Direct Form I structure supports Q15 and Q31 data types.
00095 * The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.
00096 * Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.
00097 * The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.
00098 *
00099 * \par Instance Structure
00100 * The coefficients and state variables for a filter are stored together in an instance data structure.
00101 * A separate instance structure must be defined for each filter.
00102 * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
00103 *
00104 * \par Init Functions
00105 * There is also an associated initialization function.
00106 * The initialization function performs following operations:
00107 * - Sets the values of the internal structure fields.
00108 * - Zeros out the values in the state buffer.
00109 * To do this manually without calling the init function, assign the follow subfields of the instance structure:
00110 * numStages, pCoeffs, pState. Also set all of the values in pState to zero.
00111 *
00112 * \par
00113 * Use of the initialization function is optional.
00114 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
00115 * To place an instance structure into a const data section, the instance structure must be manually initialized.
00116 * Set the values in the state buffer to zeros before static initialization.
00117 * For example, to statically initialize the instance structure use
00118 * <pre>
00119 *     arm_biquad_cascade_df2T_instance_f32 S1 = {numStages, pState, pCoeffs};
00120 * </pre>
00121 * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer.
00122 * <code>pCoeffs</code> is the address of the coefficient buffer;
00123 *
00124 */
00125 
00126 /**
00127 * @addtogroup BiquadCascadeDF2T
00128 * @{
00129 */
00130 
00131 /**
00132 * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
00133 * @param[in]  *S        points to an instance of the filter data structure.
00134 * @param[in]  *pSrc     points to the block of input data.
00135 * @param[out] *pDst     points to the block of output data
00136 * @param[in]  blockSize number of samples to process.
00137 * @return none.
00138 */
00139 
00140 
00141 LOW_OPTIMIZATION_ENTER
00142 void arm_biquad_cascade_stereo_df2T_f32(
00143 const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
00144 float32_t * pSrc,
00145 float32_t * pDst,
00146 uint32_t blockSize)
00147 {
00148 
00149     float32_t *pIn = pSrc;                         /*  source pointer            */
00150     float32_t *pOut = pDst;                        /*  destination pointer       */
00151     float32_t *pState = S->pState;                 /*  State pointer             */
00152     float32_t *pCoeffs = S->pCoeffs;               /*  coefficient pointer       */
00153     float32_t acc1a, acc1b;                        /*  accumulator               */
00154     float32_t b0, b1, b2, a1, a2;                  /*  Filter coefficients       */
00155     float32_t Xn1a, Xn1b;                          /*  temporary input           */
00156     float32_t d1a, d2a, d1b, d2b;                  /*  state variables           */
00157     uint32_t sample, stage = S->numStages;         /*  loop counters             */
00158 
00159 #if defined(ARM_MATH_CM7)
00160 
00161     float32_t Xn2a, Xn3a, Xn4a, Xn5a, Xn6a, Xn7a, Xn8a;         /*  Input State variables     */
00162     float32_t Xn2b, Xn3b, Xn4b, Xn5b, Xn6b, Xn7b, Xn8b;         /*  Input State variables     */
00163     float32_t acc2a, acc3a, acc4a, acc5a, acc6a, acc7a, acc8a;  /*  Simulates the accumulator */
00164     float32_t acc2b, acc3b, acc4b, acc5b, acc6b, acc7b, acc8b;  /*  Simulates the accumulator */
00165 
00166     do
00167     {
00168         /* Reading the coefficients */
00169         b0 = pCoeffs[0];
00170         b1 = pCoeffs[1];
00171         b2 = pCoeffs[2];
00172         a1 = pCoeffs[3];
00173         /* Apply loop unrolling and compute 8 output values simultaneously. */
00174         sample = blockSize >> 3U;
00175         a2 = pCoeffs[4];
00176 
00177         /*Reading the state values */
00178         d1a = pState[0];
00179         d2a = pState[1];
00180         d1b = pState[2];
00181         d2b = pState[3];
00182 
00183         pCoeffs += 5U;
00184 
00185         /* First part of the processing with loop unrolling.  Compute 8 outputs at a time.
00186         ** a second loop below computes the remaining 1 to 7 samples. */
00187         while (sample > 0U) {
00188 
00189             /* y[n] = b0 * x[n] + d1 */
00190             /* d1 = b1 * x[n] + a1 * y[n] + d2 */
00191             /* d2 = b2 * x[n] + a2 * y[n] */
00192 
00193             /* Read the first 2 inputs. 2 cycles */
00194             Xn1a  = pIn[0 ];
00195             Xn1b  = pIn[1 ];
00196 
00197             /* Sample 1. 5 cycles */
00198             Xn2a  = pIn[2 ];
00199             acc1a = b0 * Xn1a + d1a;
00200 
00201             Xn2b  = pIn[3 ];
00202             d1a = b1 * Xn1a + d2a;
00203 
00204             Xn3a  = pIn[4 ];
00205             d2a = b2 * Xn1a;
00206 
00207             Xn3b  = pIn[5 ];
00208             d1a += a1 * acc1a;
00209 
00210             Xn4a  = pIn[6 ];
00211             d2a += a2 * acc1a;
00212 
00213             /* Sample 2. 5 cycles */
00214             Xn4b  = pIn[7 ];
00215             acc1b = b0 * Xn1b + d1b;
00216 
00217             Xn5a  = pIn[8 ];
00218             d1b = b1 * Xn1b + d2b;
00219 
00220             Xn5b = pIn[9 ];
00221             d2b = b2 * Xn1b;
00222 
00223             Xn6a = pIn[10];
00224             d1b += a1 * acc1b;
00225 
00226             Xn6b = pIn[11];
00227             d2b += a2 * acc1b;
00228 
00229             /* Sample 3. 5 cycles */
00230             Xn7a = pIn[12];
00231             acc2a = b0 * Xn2a + d1a;
00232 
00233             Xn7b = pIn[13];
00234             d1a = b1 * Xn2a + d2a;
00235 
00236             Xn8a = pIn[14];
00237             d2a = b2 * Xn2a;
00238 
00239             Xn8b = pIn[15];
00240             d1a += a1 * acc2a;
00241 
00242             pIn += 16;
00243             d2a += a2 * acc2a;
00244 
00245             /* Sample 4. 5 cycles */
00246             acc2b = b0 * Xn2b + d1b;
00247             d1b = b1 * Xn2b + d2b;
00248             d2b = b2 * Xn2b;
00249             d1b += a1 * acc2b;
00250             d2b += a2 * acc2b;
00251 
00252             /* Sample 5. 5 cycles */
00253             acc3a = b0 * Xn3a + d1a;
00254             d1a = b1 * Xn3a + d2a;
00255             d2a = b2 * Xn3a;
00256             d1a += a1 * acc3a;
00257             d2a += a2 * acc3a;
00258 
00259             /* Sample 6. 5 cycles */
00260             acc3b = b0 * Xn3b + d1b;
00261             d1b = b1 * Xn3b + d2b;
00262             d2b = b2 * Xn3b;
00263             d1b += a1 * acc3b;
00264             d2b += a2 * acc3b;
00265 
00266             /* Sample 7. 5 cycles */
00267             acc4a = b0 * Xn4a + d1a;
00268             d1a = b1 * Xn4a + d2a;
00269             d2a = b2 * Xn4a;
00270             d1a += a1 * acc4a;
00271             d2a += a2 * acc4a;
00272 
00273             /* Sample 8. 5 cycles */
00274             acc4b = b0 * Xn4b + d1b;
00275             d1b = b1 * Xn4b + d2b;
00276             d2b = b2 * Xn4b;
00277             d1b += a1 * acc4b;
00278             d2b += a2 * acc4b;
00279 
00280             /* Sample 9. 5 cycles */
00281             acc5a = b0 * Xn5a + d1a;
00282             d1a = b1 * Xn5a + d2a;
00283             d2a = b2 * Xn5a;
00284             d1a += a1 * acc5a;
00285             d2a += a2 * acc5a;
00286 
00287             /* Sample 10. 5 cycles */
00288             acc5b = b0 * Xn5b + d1b;
00289             d1b = b1 * Xn5b + d2b;
00290             d2b = b2 * Xn5b;
00291             d1b += a1 * acc5b;
00292             d2b += a2 * acc5b;
00293 
00294             /* Sample 11. 5 cycles */
00295             acc6a = b0 * Xn6a + d1a;
00296             d1a = b1 * Xn6a + d2a;
00297             d2a = b2 * Xn6a;
00298             d1a += a1 * acc6a;
00299             d2a += a2 * acc6a;
00300 
00301             /* Sample 12. 5 cycles */
00302             acc6b = b0 * Xn6b + d1b;
00303             d1b = b1 * Xn6b + d2b;
00304             d2b = b2 * Xn6b;
00305             d1b += a1 * acc6b;
00306             d2b += a2 * acc6b;
00307 
00308             /* Sample 13. 5 cycles */
00309             acc7a = b0 * Xn7a + d1a;
00310             d1a = b1 * Xn7a + d2a;
00311 
00312             pOut[0 ] = acc1a ;
00313             d2a = b2 * Xn7a;
00314 
00315             pOut[1 ] = acc1b ;
00316             d1a += a1 * acc7a;
00317 
00318             pOut[2 ] = acc2a ;
00319             d2a += a2 * acc7a;
00320 
00321             /* Sample 14. 5 cycles */
00322             pOut[3 ] = acc2b ;
00323             acc7b = b0 * Xn7b + d1b;
00324 
00325             pOut[4 ] = acc3a ;
00326             d1b = b1 * Xn7b + d2b;
00327 
00328             pOut[5 ] = acc3b ;
00329             d2b = b2 * Xn7b;
00330 
00331             pOut[6 ] = acc4a ;
00332             d1b += a1 * acc7b;
00333 
00334             pOut[7 ] = acc4b ;
00335             d2b += a2 * acc7b;
00336 
00337             /* Sample 15. 5 cycles */
00338             pOut[8 ] = acc5a ;
00339             acc8a = b0 * Xn8a + d1a;
00340 
00341             pOut[9 ] = acc5b;
00342             d1a = b1 * Xn8a + d2a;
00343 
00344             pOut[10] = acc6a;
00345             d2a = b2 * Xn8a;
00346 
00347             pOut[11] = acc6b;
00348             d1a += a1 * acc8a;
00349 
00350             pOut[12] = acc7a;
00351             d2a += a2 * acc8a;
00352 
00353             /* Sample 16. 5 cycles */
00354             pOut[13] = acc7b;
00355             acc8b = b0 * Xn8b + d1b;
00356 
00357             pOut[14] = acc8a;
00358             d1b = b1 * Xn8b + d2b;
00359 
00360             pOut[15] = acc8b;
00361             d2b = b2 * Xn8b;
00362 
00363             sample--;
00364             d1b += a1 * acc8b;
00365 
00366             pOut += 16;
00367             d2b += a2 * acc8b;
00368         }
00369 
00370         sample = blockSize & 0x7U;
00371         while (sample > 0U) {
00372             /* Read the input */
00373             Xn1a = *pIn++; //Channel a
00374             Xn1b = *pIn++; //Channel b
00375 
00376             /* y[n] = b0 * x[n] + d1 */
00377             acc1a = (b0 * Xn1a) + d1a;
00378             acc1b = (b0 * Xn1b) + d1b;
00379 
00380             /* Store the result in the accumulator in the destination buffer. */
00381             *pOut++ = acc1a;
00382             *pOut++ = acc1b;
00383 
00384             /* Every time after the output is computed state should be updated. */
00385             /* d1 = b1 * x[n] + a1 * y[n] + d2 */
00386             d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
00387             d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
00388 
00389             /* d2 = b2 * x[n] + a2 * y[n] */
00390             d2a = (b2 * Xn1a) + (a2 * acc1a);
00391             d2b = (b2 * Xn1b) + (a2 * acc1b);
00392 
00393             sample--;
00394         }
00395 
00396         /* Store the updated state variables back into the state array */
00397         pState[0] = d1a;
00398         pState[1] = d2a;
00399 
00400         pState[2] = d1b;
00401         pState[3] = d2b;
00402 
00403         /* The current stage input is given as the output to the next stage */
00404         pIn = pDst;
00405         /* decrement the loop counter */
00406         stage--;
00407 
00408         pState += 4U;
00409         /*Reset the output working pointer */
00410         pOut = pDst;
00411 
00412     } while (stage > 0U);
00413 
00414 #elif defined(ARM_MATH_CM0_FAMILY)
00415 
00416     /* Run the below code for Cortex-M0 */
00417 
00418     do
00419     {
00420         /* Reading the coefficients */
00421         b0 = *pCoeffs++;
00422         b1 = *pCoeffs++;
00423         b2 = *pCoeffs++;
00424         a1 = *pCoeffs++;
00425         a2 = *pCoeffs++;
00426 
00427         /*Reading the state values */
00428         d1a = pState[0];
00429         d2a = pState[1];
00430         d1b = pState[2];
00431         d2b = pState[3];
00432 
00433 
00434         sample = blockSize;
00435 
00436         while (sample > 0U)
00437         {
00438             /* Read the input */
00439             Xn1a = *pIn++; //Channel a
00440             Xn1b = *pIn++; //Channel b
00441 
00442             /* y[n] = b0 * x[n] + d1 */
00443             acc1a = (b0 * Xn1a) + d1a;
00444             acc1b = (b0 * Xn1b) + d1b;
00445 
00446             /* Store the result in the accumulator in the destination buffer. */
00447             *pOut++ = acc1a;
00448             *pOut++ = acc1b;
00449 
00450             /* Every time after the output is computed state should be updated. */
00451             /* d1 = b1 * x[n] + a1 * y[n] + d2 */
00452             d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
00453             d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
00454 
00455             /* d2 = b2 * x[n] + a2 * y[n] */
00456             d2a = (b2 * Xn1a) + (a2 * acc1a);
00457             d2b = (b2 * Xn1b) + (a2 * acc1b);
00458 
00459             /* decrement the loop counter */
00460             sample--;
00461         }
00462 
00463         /* Store the updated state variables back into the state array */
00464         *pState++ = d1a;
00465         *pState++ = d2a;
00466         *pState++ = d1b;
00467         *pState++ = d2b;
00468 
00469         /* The current stage input is given as the output to the next stage */
00470         pIn = pDst;
00471 
00472         /*Reset the output working pointer */
00473         pOut = pDst;
00474 
00475         /* decrement the loop counter */
00476         stage--;
00477 
00478     } while (stage > 0U);
00479 
00480 #else
00481 
00482     float32_t Xn2a, Xn3a, Xn4a;                          /*  Input State variables     */
00483     float32_t Xn2b, Xn3b, Xn4b;                          /*  Input State variables     */
00484     float32_t acc2a, acc3a, acc4a;                       /*  accumulator               */
00485     float32_t acc2b, acc3b, acc4b;                       /*  accumulator               */
00486     float32_t p0a, p1a, p2a, p3a, p4a, A1a;
00487     float32_t p0b, p1b, p2b, p3b, p4b, A1b;
00488 
00489     /* Run the below code for Cortex-M4 and Cortex-M3 */
00490     do
00491     {
00492         /* Reading the coefficients */
00493         b0 = *pCoeffs++;
00494         b1 = *pCoeffs++;
00495         b2 = *pCoeffs++;
00496         a1 = *pCoeffs++;
00497         a2 = *pCoeffs++;
00498 
00499         /*Reading the state values */
00500         d1a = pState[0];
00501         d2a = pState[1];
00502         d1b = pState[2];
00503         d2b = pState[3];
00504 
00505         /* Apply loop unrolling and compute 4 output values simultaneously. */
00506         sample = blockSize >> 2U;
00507 
00508         /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
00509         ** a second loop below computes the remaining 1 to 3 samples. */
00510         while (sample > 0U) {
00511 
00512             /* y[n] = b0 * x[n] + d1 */
00513             /* d1 = b1 * x[n] + a1 * y[n] + d2 */
00514             /* d2 = b2 * x[n] + a2 * y[n] */
00515 
00516             /* Read the four inputs */
00517             Xn1a = pIn[0];
00518             Xn1b = pIn[1];
00519             Xn2a = pIn[2];
00520             Xn2b = pIn[3];
00521             Xn3a = pIn[4];
00522             Xn3b = pIn[5];
00523             Xn4a = pIn[6];
00524             Xn4b = pIn[7];
00525             pIn += 8;
00526 
00527             p0a = b0 * Xn1a;
00528             p0b = b0 * Xn1b;
00529             p1a = b1 * Xn1a;
00530             p1b = b1 * Xn1b;
00531             acc1a = p0a + d1a;
00532             acc1b = p0b + d1b;
00533             p0a = b0 * Xn2a;
00534             p0b = b0 * Xn2b;
00535             p3a = a1 * acc1a;
00536             p3b = a1 * acc1b;
00537             p2a = b2 * Xn1a;
00538             p2b = b2 * Xn1b;
00539             A1a = p1a + p3a;
00540             A1b = p1b + p3b;
00541             p4a = a2 * acc1a;
00542             p4b = a2 * acc1b;
00543             d1a = A1a + d2a;
00544             d1b = A1b + d2b;
00545             d2a = p2a + p4a;
00546             d2b = p2b + p4b;
00547 
00548             p1a = b1 * Xn2a;
00549             p1b = b1 * Xn2b;
00550             acc2a = p0a + d1a;
00551             acc2b = p0b + d1b;
00552             p0a = b0 * Xn3a;
00553             p0b = b0 * Xn3b;
00554             p3a = a1 * acc2a;
00555             p3b = a1 * acc2b;
00556             p2a = b2 * Xn2a;
00557             p2b = b2 * Xn2b;
00558             A1a = p1a + p3a;
00559             A1b = p1b + p3b;
00560             p4a = a2 * acc2a;
00561             p4b = a2 * acc2b;
00562             d1a = A1a + d2a;
00563             d1b = A1b + d2b;
00564             d2a = p2a + p4a;
00565             d2b = p2b + p4b;
00566 
00567             p1a = b1 * Xn3a;
00568             p1b = b1 * Xn3b;
00569             acc3a = p0a + d1a;
00570             acc3b = p0b + d1b;
00571             p0a = b0 * Xn4a;
00572             p0b = b0 * Xn4b;
00573             p3a = a1 * acc3a;
00574             p3b = a1 * acc3b;
00575             p2a = b2 * Xn3a;
00576             p2b = b2 * Xn3b;
00577             A1a = p1a + p3a;
00578             A1b = p1b + p3b;
00579             p4a = a2 * acc3a;
00580             p4b = a2 * acc3b;
00581             d1a = A1a + d2a;
00582             d1b = A1b + d2b;
00583             d2a = p2a + p4a;
00584             d2b = p2b + p4b;
00585 
00586             acc4a = p0a + d1a;
00587             acc4b = p0b + d1b;
00588             p1a = b1 * Xn4a;
00589             p1b = b1 * Xn4b;
00590             p3a = a1 * acc4a;
00591             p3b = a1 * acc4b;
00592             p2a = b2 * Xn4a;
00593             p2b = b2 * Xn4b;
00594             A1a = p1a + p3a;
00595             A1b = p1b + p3b;
00596             p4a = a2 * acc4a;
00597             p4b = a2 * acc4b;
00598             d1a = A1a + d2a;
00599             d1b = A1b + d2b;
00600             d2a = p2a + p4a;
00601             d2b = p2b + p4b;
00602 
00603             pOut[0] = acc1a;
00604             pOut[1] = acc1b;
00605             pOut[2] = acc2a;
00606             pOut[3] = acc2b;
00607             pOut[4] = acc3a;
00608             pOut[5] = acc3b;
00609             pOut[6] = acc4a;
00610             pOut[7] = acc4b;
00611             pOut += 8;
00612 
00613             sample--;
00614         }
00615 
00616         sample = blockSize & 0x3U;
00617         while (sample > 0U) {
00618             Xn1a = *pIn++;
00619             Xn1b = *pIn++;
00620 
00621             p0a = b0 * Xn1a;
00622             p0b = b0 * Xn1b;
00623             p1a = b1 * Xn1a;
00624             p1b = b1 * Xn1b;
00625             acc1a = p0a + d1a;
00626             acc1b = p0b + d1b;
00627             p3a = a1 * acc1a;
00628             p3b = a1 * acc1b;
00629             p2a = b2 * Xn1a;
00630             p2b = b2 * Xn1b;
00631             A1a = p1a + p3a;
00632             A1b = p1b + p3b;
00633             p4a = a2 * acc1a;
00634             p4b = a2 * acc1b;
00635             d1a = A1a + d2a;
00636             d1b = A1b + d2b;
00637             d2a = p2a + p4a;
00638             d2b = p2b + p4b;
00639 
00640             *pOut++ = acc1a;
00641             *pOut++ = acc1b;
00642 
00643             sample--;
00644         }
00645 
00646         /* Store the updated state variables back into the state array */
00647         *pState++ = d1a;
00648         *pState++ = d2a;
00649         *pState++ = d1b;
00650         *pState++ = d2b;
00651 
00652         /* The current stage input is given as the output to the next stage */
00653         pIn = pDst;
00654 
00655         /*Reset the output working pointer */
00656         pOut = pDst;
00657 
00658         /* decrement the loop counter */
00659         stage--;
00660 
00661     } while (stage > 0U);
00662 
00663 #endif
00664 
00665 }
00666 LOW_OPTIMIZATION_EXIT
00667 
00668 /**
00669    * @} end of BiquadCascadeDF2T group
00670    */
00671