Robert Lopez / CMSIS5
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df2T_f64.c Source File

arm_biquad_cascade_df2T_f64.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_biquad_cascade_df2T_f64.c
00004  * Description:  Processing function for floating-point transposed direct form II Biquad cascade filter
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032 * @ingroup groupFilters
00033 */
00034 
00035 /**
00036 * @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
00037 *
00038 * This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.
00039 * The filters are implemented as a cascade of second order Biquad sections.
00040 * These functions provide a slight memory savings as compared to the direct form I Biquad filter functions.
00041 * Only floating-point data is supported.
00042 *
00043 * This function operate on blocks of input and output data and each call to the function
00044 * processes <code>blockSize</code> samples through the filter.
00045 * <code>pSrc</code> points to the array of input data and
00046 * <code>pDst</code> points to the array of output data.
00047 * Both arrays contain <code>blockSize</code> values.
00048 *
00049 * \par Algorithm
00050 * Each Biquad stage implements a second order filter using the difference equation:
00051 * <pre>
00052 *    y[n] = b0 * x[n] + d1
00053 *    d1 = b1 * x[n] + a1 * y[n] + d2
00054 *    d2 = b2 * x[n] + a2 * y[n]
00055 * </pre>
00056 * where d1 and d2 represent the two state values.
00057 *
00058 * \par
00059 * A Biquad filter using a transposed Direct Form II structure is shown below.
00060 * \image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"
00061 * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
00062 * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
00063 * Pay careful attention to the sign of the feedback coefficients.
00064 * Some design tools flip the sign of the feedback coefficients:
00065 * <pre>
00066 *    y[n] = b0 * x[n] + d1;
00067 *    d1 = b1 * x[n] - a1 * y[n] + d2;
00068 *    d2 = b2 * x[n] - a2 * y[n];
00069 * </pre>
00070 * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
00071 *
00072 * \par
00073 * Higher order filters are realized as a cascade of second order sections.
00074 * <code>numStages</code> refers to the number of second order stages used.
00075 * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
00076 * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the
00077 * coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
00078 *
00079 * \par
00080 * <code>pState</code> points to the state variable array.
00081 * Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.
00082 * The state variables are arranged in the <code>pState</code> array as:
00083 * <pre>
00084 *     {d11, d12, d21, d22, ...}
00085 * </pre>
00086 * where <code>d1x</code> refers to the state variables for the first Biquad and
00087 * <code>d2x</code> refers to the state variables for the second Biquad.
00088 * The state array has a total length of <code>2*numStages</code> values.
00089 * The state variables are updated after each block of data is processed; the coefficients are untouched.
00090 *
00091 * \par
00092 * The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.
00093 * The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.
00094 * That is why the Direct Form I structure supports Q15 and Q31 data types.
00095 * The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.
00096 * Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.
00097 * The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.
00098 *
00099 * \par Instance Structure
00100 * The coefficients and state variables for a filter are stored together in an instance data structure.
00101 * A separate instance structure must be defined for each filter.
00102 * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
00103 *
00104 * \par Init Functions
00105 * There is also an associated initialization function.
00106 * The initialization function performs following operations:
00107 * - Sets the values of the internal structure fields.
00108 * - Zeros out the values in the state buffer.
00109 * To do this manually without calling the init function, assign the follow subfields of the instance structure:
00110 * numStages, pCoeffs, pState. Also set all of the values in pState to zero.
00111 *
00112 * \par
00113 * Use of the initialization function is optional.
00114 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
00115 * To place an instance structure into a const data section, the instance structure must be manually initialized.
00116 * Set the values in the state buffer to zeros before static initialization.
00117 * For example, to statically initialize the instance structure use
00118 * <pre>
00119 *     arm_biquad_cascade_df2T_instance_f64 S1 = {numStages, pState, pCoeffs};
00120 * </pre>
00121 * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer.
00122 * <code>pCoeffs</code> is the address of the coefficient buffer;
00123 *
00124 */
00125 
00126 /**
00127 * @addtogroup BiquadCascadeDF2T
00128 * @{
00129 */
00130 
00131 /**
00132 * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
00133 * @param[in]  *S        points to an instance of the filter data structure.
00134 * @param[in]  *pSrc     points to the block of input data.
00135 * @param[out] *pDst     points to the block of output data
00136 * @param[in]  blockSize number of samples to process.
00137 * @return none.
00138 */
00139 
00140 
00141 LOW_OPTIMIZATION_ENTER
00142 void arm_biquad_cascade_df2T_f64(
00143 const arm_biquad_cascade_df2T_instance_f64 * S,
00144 float64_t * pSrc,
00145 float64_t * pDst,
00146 uint32_t blockSize)
00147 {
00148 
00149    float64_t *pIn = pSrc;                         /*  source pointer            */
00150    float64_t *pOut = pDst;                        /*  destination pointer       */
00151    float64_t *pState = S->pState;                 /*  State pointer             */
00152    float64_t *pCoeffs = S->pCoeffs;               /*  coefficient pointer       */
00153    float64_t acc1;                                /*  accumulator               */
00154    float64_t b0, b1, b2, a1, a2;                  /*  Filter coefficients       */
00155    float64_t Xn1;                                 /*  temporary input           */
00156    float64_t d1, d2;                              /*  state variables           */
00157    uint32_t sample, stage = S->numStages;         /*  loop counters             */
00158 
00159 #if defined(ARM_MATH_CM7)
00160 
00161    float64_t Xn2, Xn3, Xn4, Xn5, Xn6, Xn7, Xn8;   /*  Input State variables     */
00162    float64_t Xn9, Xn10, Xn11, Xn12, Xn13, Xn14, Xn15, Xn16;
00163    float64_t acc2, acc3, acc4, acc5, acc6, acc7;  /*  Simulates the accumulator */
00164    float64_t acc8, acc9, acc10, acc11, acc12, acc13, acc14, acc15, acc16;
00165 
00166    do
00167    {
00168       /* Reading the coefficients */
00169       b0 = pCoeffs[0];
00170       b1 = pCoeffs[1];
00171       b2 = pCoeffs[2];
00172       a1 = pCoeffs[3];
00173       /* Apply loop unrolling and compute 16 output values simultaneously. */
00174       sample = blockSize >> 4U;
00175       a2 = pCoeffs[4];
00176 
00177       /*Reading the state values */
00178       d1 = pState[0];
00179       d2 = pState[1];
00180 
00181       pCoeffs += 5U;
00182 
00183 
00184       /* First part of the processing with loop unrolling.  Compute 16 outputs at a time.
00185        ** a second loop below computes the remaining 1 to 15 samples. */
00186       while (sample > 0U) {
00187 
00188          /* y[n] = b0 * x[n] + d1 */
00189          /* d1 = b1 * x[n] + a1 * y[n] + d2 */
00190          /* d2 = b2 * x[n] + a2 * y[n] */
00191 
00192          /* Read the first 2 inputs. 2 cycles */
00193          Xn1  = pIn[0 ];
00194          Xn2  = pIn[1 ];
00195 
00196          /* Sample 1. 5 cycles */
00197          Xn3  = pIn[2 ];
00198          acc1 = b0 * Xn1 + d1;
00199 
00200          Xn4  = pIn[3 ];
00201          d1 = b1 * Xn1 + d2;
00202 
00203          Xn5  = pIn[4 ];
00204          d2 = b2 * Xn1;
00205 
00206          Xn6  = pIn[5 ];
00207          d1 += a1 * acc1;
00208 
00209          Xn7  = pIn[6 ];
00210          d2 += a2 * acc1;
00211 
00212          /* Sample 2. 5 cycles */
00213          Xn8  = pIn[7 ];
00214          acc2 = b0 * Xn2 + d1;
00215 
00216          Xn9  = pIn[8 ];
00217          d1 = b1 * Xn2 + d2;
00218 
00219          Xn10 = pIn[9 ];
00220          d2 = b2 * Xn2;
00221 
00222          Xn11 = pIn[10];
00223          d1 += a1 * acc2;
00224 
00225          Xn12 = pIn[11];
00226          d2 += a2 * acc2;
00227 
00228          /* Sample 3. 5 cycles */
00229          Xn13 = pIn[12];
00230          acc3 = b0 * Xn3 + d1;
00231 
00232          Xn14 = pIn[13];
00233          d1 = b1 * Xn3 + d2;
00234 
00235          Xn15 = pIn[14];
00236          d2 = b2 * Xn3;
00237 
00238          Xn16 = pIn[15];
00239          d1 += a1 * acc3;
00240 
00241          pIn += 16;
00242          d2 += a2 * acc3;
00243 
00244          /* Sample 4. 5 cycles */
00245          acc4 = b0 * Xn4 + d1;
00246          d1 = b1 * Xn4 + d2;
00247          d2 = b2 * Xn4;
00248          d1 += a1 * acc4;
00249          d2 += a2 * acc4;
00250 
00251          /* Sample 5. 5 cycles */
00252          acc5 = b0 * Xn5 + d1;
00253          d1 = b1 * Xn5 + d2;
00254          d2 = b2 * Xn5;
00255          d1 += a1 * acc5;
00256          d2 += a2 * acc5;
00257 
00258          /* Sample 6. 5 cycles */
00259          acc6 = b0 * Xn6 + d1;
00260          d1 = b1 * Xn6 + d2;
00261          d2 = b2 * Xn6;
00262          d1 += a1 * acc6;
00263          d2 += a2 * acc6;
00264 
00265          /* Sample 7. 5 cycles */
00266          acc7 = b0 * Xn7 + d1;
00267          d1 = b1 * Xn7 + d2;
00268          d2 = b2 * Xn7;
00269          d1 += a1 * acc7;
00270          d2 += a2 * acc7;
00271 
00272          /* Sample 8. 5 cycles */
00273          acc8 = b0 * Xn8 + d1;
00274          d1 = b1 * Xn8 + d2;
00275          d2 = b2 * Xn8;
00276          d1 += a1 * acc8;
00277          d2 += a2 * acc8;
00278 
00279          /* Sample 9. 5 cycles */
00280          acc9 = b0 * Xn9 + d1;
00281          d1 = b1 * Xn9 + d2;
00282          d2 = b2 * Xn9;
00283          d1 += a1 * acc9;
00284          d2 += a2 * acc9;
00285 
00286          /* Sample 10. 5 cycles */
00287          acc10 = b0 * Xn10 + d1;
00288          d1 = b1 * Xn10 + d2;
00289          d2 = b2 * Xn10;
00290          d1 += a1 * acc10;
00291          d2 += a2 * acc10;
00292 
00293          /* Sample 11. 5 cycles */
00294          acc11 = b0 * Xn11 + d1;
00295          d1 = b1 * Xn11 + d2;
00296          d2 = b2 * Xn11;
00297          d1 += a1 * acc11;
00298          d2 += a2 * acc11;
00299 
00300          /* Sample 12. 5 cycles */
00301          acc12 = b0 * Xn12 + d1;
00302          d1 = b1 * Xn12 + d2;
00303          d2 = b2 * Xn12;
00304          d1 += a1 * acc12;
00305          d2 += a2 * acc12;
00306 
00307          /* Sample 13. 5 cycles */
00308          acc13 = b0 * Xn13 + d1;
00309          d1 = b1 * Xn13 + d2;
00310          d2 = b2 * Xn13;
00311 
00312          pOut[0 ] = acc1 ;
00313          d1 += a1 * acc13;
00314 
00315          pOut[1 ] = acc2 ;
00316          d2 += a2 * acc13;
00317 
00318          /* Sample 14. 5 cycles */
00319          pOut[2 ] = acc3 ;
00320          acc14 = b0 * Xn14 + d1;
00321 
00322          pOut[3 ] = acc4 ;
00323          d1 = b1 * Xn14 + d2;
00324 
00325          pOut[4 ] = acc5 ;
00326          d2 = b2 * Xn14;
00327 
00328          pOut[5 ] = acc6 ;
00329          d1 += a1 * acc14;
00330 
00331          pOut[6 ] = acc7 ;
00332          d2 += a2 * acc14;
00333 
00334          /* Sample 15. 5 cycles */
00335          pOut[7 ] = acc8 ;
00336          pOut[8 ] = acc9 ;
00337          acc15 = b0 * Xn15 + d1;
00338 
00339          pOut[9 ] = acc10;
00340          d1 = b1 * Xn15 + d2;
00341 
00342          pOut[10] = acc11;
00343          d2 = b2 * Xn15;
00344 
00345          pOut[11] = acc12;
00346          d1 += a1 * acc15;
00347 
00348          pOut[12] = acc13;
00349          d2 += a2 * acc15;
00350 
00351          /* Sample 16. 5 cycles */
00352          pOut[13] = acc14;
00353          acc16 = b0 * Xn16 + d1;
00354 
00355          pOut[14] = acc15;
00356          d1 = b1 * Xn16 + d2;
00357 
00358          pOut[15] = acc16;
00359          d2 = b2 * Xn16;
00360 
00361          sample--;
00362          d1 += a1 * acc16;
00363 
00364          pOut += 16;
00365          d2 += a2 * acc16;
00366       }
00367 
00368       sample = blockSize & 0xFu;
00369       while (sample > 0U) {
00370          Xn1 = *pIn;
00371          acc1 = b0 * Xn1 + d1;
00372 
00373          pIn++;
00374          d1 = b1 * Xn1 + d2;
00375 
00376          *pOut = acc1;
00377          d2 = b2 * Xn1;
00378 
00379          pOut++;
00380          d1 += a1 * acc1;
00381 
00382          sample--;
00383          d2 += a2 * acc1;
00384       }
00385 
00386       /* Store the updated state variables back into the state array */
00387       pState[0] = d1;
00388       /* The current stage input is given as the output to the next stage */
00389       pIn = pDst;
00390 
00391       pState[1] = d2;
00392       /* decrement the loop counter */
00393       stage--;
00394 
00395       pState += 2U;
00396 
00397       /*Reset the output working pointer */
00398       pOut = pDst;
00399 
00400    } while (stage > 0U);
00401 
00402 #elif defined(ARM_MATH_CM0_FAMILY)
00403 
00404    /* Run the below code for Cortex-M0 */
00405 
00406    do
00407    {
00408       /* Reading the coefficients */
00409       b0 = *pCoeffs++;
00410       b1 = *pCoeffs++;
00411       b2 = *pCoeffs++;
00412       a1 = *pCoeffs++;
00413       a2 = *pCoeffs++;
00414 
00415       /*Reading the state values */
00416       d1 = pState[0];
00417       d2 = pState[1];
00418 
00419 
00420       sample = blockSize;
00421 
00422       while (sample > 0U)
00423       {
00424          /* Read the input */
00425          Xn1 = *pIn++;
00426 
00427          /* y[n] = b0 * x[n] + d1 */
00428          acc1 = (b0 * Xn1) + d1;
00429 
00430          /* Store the result in the accumulator in the destination buffer. */
00431          *pOut++ = acc1;
00432 
00433          /* Every time after the output is computed state should be updated. */
00434          /* d1 = b1 * x[n] + a1 * y[n] + d2 */
00435          d1 = ((b1 * Xn1) + (a1 * acc1)) + d2;
00436 
00437          /* d2 = b2 * x[n] + a2 * y[n] */
00438          d2 = (b2 * Xn1) + (a2 * acc1);
00439 
00440          /* decrement the loop counter */
00441          sample--;
00442       }
00443 
00444       /* Store the updated state variables back into the state array */
00445       *pState++ = d1;
00446       *pState++ = d2;
00447 
00448       /* The current stage input is given as the output to the next stage */
00449       pIn = pDst;
00450 
00451       /*Reset the output working pointer */
00452       pOut = pDst;
00453 
00454       /* decrement the loop counter */
00455       stage--;
00456 
00457    } while (stage > 0U);
00458 
00459 #else
00460 
00461    float64_t Xn2, Xn3, Xn4;                       /*  Input State variables     */
00462    float64_t acc2, acc3, acc4;                        /*  accumulator               */
00463 
00464 
00465    float64_t p0, p1, p2, p3, p4, A1;
00466 
00467    /* Run the below code for Cortex-M4 and Cortex-M3 */
00468    do
00469    {
00470       /* Reading the coefficients */
00471       b0 = *pCoeffs++;
00472       b1 = *pCoeffs++;
00473       b2 = *pCoeffs++;
00474       a1 = *pCoeffs++;
00475       a2 = *pCoeffs++;
00476 
00477 
00478       /*Reading the state values */
00479       d1 = pState[0];
00480       d2 = pState[1];
00481 
00482       /* Apply loop unrolling and compute 4 output values simultaneously. */
00483       sample = blockSize >> 2U;
00484 
00485       /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
00486    ** a second loop below computes the remaining 1 to 3 samples. */
00487       while (sample > 0U) {
00488 
00489          /* y[n] = b0 * x[n] + d1 */
00490          /* d1 = b1 * x[n] + a1 * y[n] + d2 */
00491          /* d2 = b2 * x[n] + a2 * y[n] */
00492 
00493          /* Read the four inputs */
00494          Xn1 = pIn[0];
00495          Xn2 = pIn[1];
00496          Xn3 = pIn[2];
00497          Xn4 = pIn[3];
00498          pIn += 4;
00499 
00500          p0 = b0 * Xn1;
00501          p1 = b1 * Xn1;
00502          acc1 = p0 + d1;
00503          p0 = b0 * Xn2;
00504          p3 = a1 * acc1;
00505          p2 = b2 * Xn1;
00506          A1 = p1 + p3;
00507          p4 = a2 * acc1;
00508          d1 = A1 + d2;
00509          d2 = p2 + p4;
00510 
00511          p1 = b1 * Xn2;
00512          acc2 = p0 + d1;
00513          p0 = b0 * Xn3;
00514          p3 = a1 * acc2;
00515          p2 = b2 * Xn2;
00516          A1 = p1 + p3;
00517          p4 = a2 * acc2;
00518          d1 = A1 + d2;
00519          d2 = p2 + p4;
00520 
00521          p1 = b1 * Xn3;
00522          acc3 = p0 + d1;
00523          p0 = b0 * Xn4;
00524          p3 = a1 * acc3;
00525          p2 = b2 * Xn3;
00526          A1 = p1 + p3;
00527          p4 = a2 * acc3;
00528          d1 = A1 + d2;
00529          d2 = p2 + p4;
00530 
00531          acc4 = p0 + d1;
00532          p1 = b1 * Xn4;
00533          p3 = a1 * acc4;
00534          p2 = b2 * Xn4;
00535          A1 = p1 + p3;
00536          p4 = a2 * acc4;
00537          d1 = A1 + d2;
00538          d2 = p2 + p4;
00539 
00540          pOut[0] = acc1;
00541          pOut[1] = acc2;
00542          pOut[2] = acc3;
00543          pOut[3] = acc4;
00544                  pOut += 4;
00545 
00546          sample--;
00547       }
00548 
00549       sample = blockSize & 0x3U;
00550       while (sample > 0U) {
00551          Xn1 = *pIn++;
00552 
00553          p0 = b0 * Xn1;
00554          p1 = b1 * Xn1;
00555          acc1 = p0 + d1;
00556          p3 = a1 * acc1;
00557          p2 = b2 * Xn1;
00558          A1 = p1 + p3;
00559          p4 = a2 * acc1;
00560          d1 = A1 + d2;
00561          d2 = p2 + p4;
00562 
00563          *pOut++ = acc1;
00564 
00565          sample--;
00566       }
00567 
00568       /* Store the updated state variables back into the state array */
00569       *pState++ = d1;
00570       *pState++ = d2;
00571 
00572       /* The current stage input is given as the output to the next stage */
00573       pIn = pDst;
00574 
00575       /*Reset the output working pointer */
00576       pOut = pDst;
00577 
00578       /* decrement the loop counter */
00579       stage--;
00580 
00581    } while (stage > 0U);
00582 
00583 #endif
00584 
00585 }
00586 LOW_OPTIMIZATION_EXIT
00587 
00588 /**
00589    * @} end of BiquadCascadeDF2T group
00590    */
00591