Aded CMSIS5 DSP and NN folder. Needs some work

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_fast_q31.c Source File

arm_biquad_cascade_df1_fast_q31.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_biquad_cascade_df1_fast_q31.c
00004  * Description:  Processing function for the Q31 Fast Biquad cascade DirectFormI(DF1) filter
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @ingroup groupFilters
00033  */
00034 
00035 /**
00036  * @addtogroup BiquadCascadeDF1
00037  * @{
00038  */
00039 
00040 /**
00041  * @details
00042  *
00043  * @param[in]  *S        points to an instance of the Q31 Biquad cascade structure.
00044  * @param[in]  *pSrc     points to the block of input data.
00045  * @param[out] *pDst     points to the block of output data.
00046  * @param[in]  blockSize number of samples to process per call.
00047  * @return     none.
00048  *
00049  * <b>Scaling and Overflow Behavior:</b>
00050  * \par
00051  * This function is optimized for speed at the expense of fixed-point precision and overflow protection.
00052  * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.
00053  * These intermediate results are added to a 2.30 accumulator.
00054  * Finally, the accumulator is saturated and converted to a 1.31 result.
00055  * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.
00056  * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function
00057  * arm_biquad_cascade_df1_init_q31() to initialize filter structure.
00058  *
00059  * \par
00060  * Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision.  Both the slow and the fast versions use the same instance structure.
00061  * Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure.
00062  */
00063 
00064 void arm_biquad_cascade_df1_fast_q31(
00065   const arm_biquad_casd_df1_inst_q31 * S,
00066   q31_t * pSrc,
00067   q31_t * pDst,
00068   uint32_t blockSize)
00069 {
00070   q31_t acc = 0;                                 /*  accumulator                   */
00071   q31_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
00072   q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
00073   q31_t *pIn = pSrc;                             /*  input pointer initialization  */
00074   q31_t *pOut = pDst;                            /*  output pointer initialization */
00075   q31_t *pState = S->pState;                     /*  pState pointer initialization */
00076   q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */
00077   q31_t Xn;                                      /*  temporary input               */
00078   int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output */
00079   uint32_t sample, stage = S->numStages;         /*  loop counters                     */
00080 
00081 
00082   do
00083   {
00084     /* Reading the coefficients */
00085     b0 = *pCoeffs++;
00086     b1 = *pCoeffs++;
00087     b2 = *pCoeffs++;
00088     a1 = *pCoeffs++;
00089     a2 = *pCoeffs++;
00090 
00091     /* Reading the state values */
00092     Xn1 = pState[0];
00093     Xn2 = pState[1];
00094     Yn1 = pState[2];
00095     Yn2 = pState[3];
00096 
00097     /* Apply loop unrolling and compute 4 output values simultaneously. */
00098     /*      The variables acc ... acc3 hold output values that are being computed:
00099      *
00100      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
00101      */
00102 
00103     sample = blockSize >> 2U;
00104 
00105     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
00106      ** a second loop below computes the remaining 1 to 3 samples. */
00107     while (sample > 0U)
00108     {
00109       /* Read the input */
00110       Xn = *pIn;
00111 
00112       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00113       /* acc =  b0 * x[n] */
00114       /*acc = (q31_t) (((q63_t) b1 * Xn1) >> 32);*/
00115       mult_32x32_keep32_R(acc, b1, Xn1);
00116       /* acc +=  b1 * x[n-1] */
00117       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b0 * (Xn))) >> 32);*/
00118       multAcc_32x32_keep32_R(acc, b0, Xn);
00119       /* acc +=  b[2] * x[n-2] */
00120       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
00121       multAcc_32x32_keep32_R(acc, b2, Xn2);
00122       /* acc +=  a1 * y[n-1] */
00123       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
00124       multAcc_32x32_keep32_R(acc, a1, Yn1);
00125       /* acc +=  a2 * y[n-2] */
00126       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
00127       multAcc_32x32_keep32_R(acc, a2, Yn2);
00128 
00129       /* The result is converted to 1.31 , Yn2 variable is reused */
00130       Yn2 = acc << shift;
00131 
00132       /* Read the second input */
00133       Xn2 = *(pIn + 1U);
00134 
00135       /* Store the output in the destination buffer. */
00136       *pOut = Yn2;
00137 
00138       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00139       /* acc =  b0 * x[n] */
00140       /*acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32);*/
00141       mult_32x32_keep32_R(acc, b0, Xn2);
00142       /* acc +=  b1 * x[n-1] */
00143       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32);*/
00144       multAcc_32x32_keep32_R(acc, b1, Xn);
00145       /* acc +=  b[2] * x[n-2] */
00146       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32);*/
00147       multAcc_32x32_keep32_R(acc, b2, Xn1);
00148       /* acc +=  a1 * y[n-1] */
00149       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);*/
00150       multAcc_32x32_keep32_R(acc, a1, Yn2);
00151       /* acc +=  a2 * y[n-2] */
00152       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);*/
00153       multAcc_32x32_keep32_R(acc, a2, Yn1);
00154 
00155       /* The result is converted to 1.31, Yn1 variable is reused  */
00156       Yn1 = acc << shift;
00157 
00158       /* Read the third input  */
00159       Xn1 = *(pIn + 2U);
00160 
00161       /* Store the output in the destination buffer. */
00162       *(pOut + 1U) = Yn1;
00163 
00164       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00165       /* acc =  b0 * x[n] */
00166       /*acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32);*/
00167       mult_32x32_keep32_R(acc, b0, Xn1);
00168       /* acc +=  b1 * x[n-1] */
00169       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32);*/
00170       multAcc_32x32_keep32_R(acc, b1, Xn2);
00171       /* acc +=  b[2] * x[n-2] */
00172       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32);*/
00173       multAcc_32x32_keep32_R(acc, b2, Xn);
00174       /* acc +=  a1 * y[n-1] */
00175       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
00176       multAcc_32x32_keep32_R(acc, a1, Yn1);
00177       /* acc +=  a2 * y[n-2] */
00178       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
00179       multAcc_32x32_keep32_R(acc, a2, Yn2);
00180 
00181       /* The result is converted to 1.31, Yn2 variable is reused  */
00182       Yn2 = acc << shift;
00183 
00184       /* Read the forth input */
00185       Xn = *(pIn + 3U);
00186 
00187       /* Store the output in the destination buffer. */
00188       *(pOut + 2U) = Yn2;
00189       pIn += 4U;
00190 
00191       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00192       /* acc =  b0 * x[n] */
00193       /*acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);*/
00194       mult_32x32_keep32_R(acc, b0, Xn);
00195       /* acc +=  b1 * x[n-1] */
00196       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);*/
00197       multAcc_32x32_keep32_R(acc, b1, Xn1);
00198       /* acc +=  b[2] * x[n-2] */
00199       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
00200       multAcc_32x32_keep32_R(acc, b2, Xn2);
00201       /* acc +=  a1 * y[n-1] */
00202       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);*/
00203       multAcc_32x32_keep32_R(acc, a1, Yn2);
00204       /* acc +=  a2 * y[n-2] */
00205       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);*/
00206       multAcc_32x32_keep32_R(acc, a2, Yn1);
00207 
00208       /* Every time after the output is computed state should be updated. */
00209       /* The states should be updated as:  */
00210       /* Xn2 = Xn1    */
00211       Xn2 = Xn1;
00212 
00213       /* The result is converted to 1.31, Yn1 variable is reused  */
00214       Yn1 = acc << shift;
00215 
00216       /* Xn1 = Xn     */
00217       Xn1 = Xn;
00218 
00219       /* Store the output in the destination buffer. */
00220       *(pOut + 3U) = Yn1;
00221       pOut += 4U;
00222 
00223       /* decrement the loop counter */
00224       sample--;
00225     }
00226 
00227     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
00228      ** No loop unrolling is used. */
00229     sample = (blockSize & 0x3U);
00230 
00231    while (sample > 0U)
00232    {
00233       /* Read the input */
00234       Xn = *pIn++;
00235 
00236       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00237       /* acc =  b0 * x[n] */
00238       /*acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);*/
00239       mult_32x32_keep32_R(acc, b0, Xn);
00240       /* acc +=  b1 * x[n-1] */
00241       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);*/
00242       multAcc_32x32_keep32_R(acc, b1, Xn1);
00243       /* acc +=  b[2] * x[n-2] */
00244       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
00245       multAcc_32x32_keep32_R(acc, b2, Xn2);
00246       /* acc +=  a1 * y[n-1] */
00247       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
00248       multAcc_32x32_keep32_R(acc, a1, Yn1);
00249       /* acc +=  a2 * y[n-2] */
00250       /*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
00251       multAcc_32x32_keep32_R(acc, a2, Yn2);
00252 
00253       /* The result is converted to 1.31  */
00254       acc = acc << shift;
00255 
00256       /* Every time after the output is computed state should be updated. */
00257       /* The states should be updated as:  */
00258       /* Xn2 = Xn1    */
00259       /* Xn1 = Xn     */
00260       /* Yn2 = Yn1    */
00261       /* Yn1 = acc    */
00262       Xn2 = Xn1;
00263       Xn1 = Xn;
00264       Yn2 = Yn1;
00265       Yn1 = acc;
00266 
00267       /* Store the output in the destination buffer. */
00268       *pOut++ = acc;
00269 
00270       /* decrement the loop counter */
00271       sample--;
00272    }
00273 
00274     /*  The first stage goes from the input buffer to the output buffer. */
00275     /*  Subsequent stages occur in-place in the output buffer */
00276     pIn = pDst;
00277 
00278     /* Reset to destination pointer */
00279     pOut = pDst;
00280 
00281     /*  Store the updated state variables back into the pState array */
00282     *pState++ = Xn1;
00283     *pState++ = Xn2;
00284     *pState++ = Yn1;
00285     *pState++ = Yn2;
00286 
00287   } while (--stage);
00288 }
00289 
00290 /**
00291   * @} end of BiquadCascadeDF1 group
00292   */
00293