CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_q15.c Source File

arm_biquad_cascade_df1_q15.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        19. March 2015
00005 * $Revision:    V.1.4.5
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_biquad_cascade_df1_q15.c    
00009 *    
00010 * Description:  Processing function for the    
00011 *               Q15 Biquad cascade DirectFormI(DF1) filter.    
00012 *    
00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00014 *  
00015 * Redistribution and use in source and binary forms, with or without 
00016 * modification, are permitted provided that the following conditions
00017 * are met:
00018 *   - Redistributions of source code must retain the above copyright
00019 *     notice, this list of conditions and the following disclaimer.
00020 *   - Redistributions in binary form must reproduce the above copyright
00021 *     notice, this list of conditions and the following disclaimer in
00022 *     the documentation and/or other materials provided with the 
00023 *     distribution.
00024 *   - Neither the name of ARM LIMITED nor the names of its contributors
00025 *     may be used to endorse or promote products derived from this
00026 *     software without specific prior written permission.
00027 *
00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00039 * POSSIBILITY OF SUCH DAMAGE.    
00040 * -------------------------------------------------------------------- */
00041 
00042 #include "arm_math.h"
00043 
00044 /**    
00045  * @ingroup groupFilters    
00046  */
00047 
00048 /**    
00049  * @addtogroup BiquadCascadeDF1    
00050  * @{    
00051  */
00052 
00053 /**    
00054  * @brief Processing function for the Q15 Biquad cascade filter.    
00055  * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.    
00056  * @param[in]  *pSrc points to the block of input data.    
00057  * @param[out] *pDst points to the location where the output result is written.    
00058  * @param[in]  blockSize number of samples to process per call.    
00059  * @return none.    
00060  *    
00061  *    
00062  * <b>Scaling and Overflow Behavior:</b>    
00063  * \par    
00064  * The function is implemented using a 64-bit internal accumulator.    
00065  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.    
00066  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.    
00067  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.    
00068  * The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.    
00069  * Finally, the result is saturated to 1.15 format.    
00070  *    
00071  * \par    
00072  * Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.    
00073  */
00074 
00075 void arm_biquad_cascade_df1_q15(
00076   const arm_biquad_casd_df1_inst_q15 * S,
00077   q15_t * pSrc,
00078   q15_t * pDst,
00079   uint32_t blockSize)
00080 {
00081 
00082 
00083 #ifndef ARM_MATH_CM0_FAMILY
00084 
00085   /* Run the below code for Cortex-M4 and Cortex-M3 */
00086 
00087   q15_t *pIn = pSrc;                             /*  Source pointer                               */
00088   q15_t *pOut = pDst;                            /*  Destination pointer                          */
00089   q31_t in;                                      /*  Temporary variable to hold input value       */
00090   q31_t out;                                     /*  Temporary variable to hold output value      */
00091   q31_t b0;                                      /*  Temporary variable to hold bo value          */
00092   q31_t b1, a1;                                  /*  Filter coefficients                          */
00093   q31_t state_in, state_out;                     /*  Filter state variables                       */
00094   q31_t acc_l, acc_h;
00095   q63_t acc;                                     /*  Accumulator                                  */
00096   int32_t lShift = (15 - (int32_t) S->postShift);       /*  Post shift                                   */
00097   q15_t *pState = S->pState;                     /*  State pointer                                */
00098   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
00099   uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */
00100   int32_t uShift = (32 - lShift);
00101 
00102   do
00103   {
00104     /* Read the b0 and 0 coefficients using SIMD  */
00105     b0 = *__SIMD32(pCoeffs)++;
00106 
00107     /* Read the b1 and b2 coefficients using SIMD */
00108     b1 = *__SIMD32(pCoeffs)++;
00109 
00110     /* Read the a1 and a2 coefficients using SIMD */
00111     a1 = *__SIMD32(pCoeffs)++;
00112 
00113     /* Read the input state values from the state buffer:  x[n-1], x[n-2] */
00114     state_in = *__SIMD32(pState)++;
00115 
00116     /* Read the output state values from the state buffer:  y[n-1], y[n-2] */
00117     state_out = *__SIMD32(pState)--;
00118 
00119     /* Apply loop unrolling and compute 2 output values simultaneously. */
00120     /*      The variable acc hold output values that are being computed:    
00121      *    
00122      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]    
00123      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]    
00124      */
00125     sample = blockSize >> 1u;
00126 
00127     /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.    
00128      ** a second loop below computes the remaining 1 sample. */
00129     while(sample > 0u)
00130     {
00131 
00132       /* Read the input */
00133       in = *__SIMD32(pIn)++;
00134 
00135       /* out =  b0 * x[n] + 0 * 0 */
00136       out = __SMUAD(b0, in);
00137 
00138       /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */
00139       acc = __SMLALD(b1, state_in, out);
00140       /* acc +=  a1 * y[n-1] +  a2 * y[n-2] */
00141       acc = __SMLALD(a1, state_out, acc);
00142 
00143       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00144       /* Calc lower part of acc */
00145       acc_l = acc & 0xffffffff;
00146 
00147       /* Calc upper part of acc */
00148       acc_h = (acc >> 32) & 0xffffffff;
00149 
00150       /* Apply shift for lower part of acc and upper part of acc */
00151       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
00152 
00153       out = __SSAT(out, 16);
00154 
00155       /* Every time after the output is computed state should be updated. */
00156       /* The states should be updated as:  */
00157       /* Xn2 = Xn1    */
00158       /* Xn1 = Xn     */
00159       /* Yn2 = Yn1    */
00160       /* Yn1 = acc   */
00161       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00162       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00163 
00164 #ifndef  ARM_MATH_BIG_ENDIAN
00165 
00166       state_in = __PKHBT(in, state_in, 16);
00167       state_out = __PKHBT(out, state_out, 16);
00168 
00169 #else
00170 
00171       state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
00172       state_out = __PKHBT(state_out >> 16, (out), 16);
00173 
00174 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00175 
00176       /* out =  b0 * x[n] + 0 * 0 */
00177       out = __SMUADX(b0, in);
00178       /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */
00179       acc = __SMLALD(b1, state_in, out);
00180       /* acc +=  a1 * y[n-1] + a2 * y[n-2] */
00181       acc = __SMLALD(a1, state_out, acc);
00182 
00183       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00184       /* Calc lower part of acc */
00185       acc_l = acc & 0xffffffff;
00186 
00187       /* Calc upper part of acc */
00188       acc_h = (acc >> 32) & 0xffffffff;
00189 
00190       /* Apply shift for lower part of acc and upper part of acc */
00191       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
00192 
00193       out = __SSAT(out, 16);
00194 
00195       /* Store the output in the destination buffer. */
00196 
00197 #ifndef  ARM_MATH_BIG_ENDIAN
00198 
00199       *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
00200 
00201 #else
00202 
00203       *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
00204 
00205 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00206 
00207       /* Every time after the output is computed state should be updated. */
00208       /* The states should be updated as:  */
00209       /* Xn2 = Xn1    */
00210       /* Xn1 = Xn     */
00211       /* Yn2 = Yn1    */
00212       /* Yn1 = acc   */
00213       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00214       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00215 #ifndef  ARM_MATH_BIG_ENDIAN
00216 
00217       state_in = __PKHBT(in >> 16, state_in, 16);
00218       state_out = __PKHBT(out, state_out, 16);
00219 
00220 #else
00221 
00222       state_in = __PKHBT(state_in >> 16, in, 16);
00223       state_out = __PKHBT(state_out >> 16, out, 16);
00224 
00225 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00226 
00227 
00228       /* Decrement the loop counter */
00229       sample--;
00230 
00231     }
00232 
00233     /* If the blockSize is not a multiple of 2, compute any remaining output samples here.    
00234      ** No loop unrolling is used. */
00235 
00236     if((blockSize & 0x1u) != 0u)
00237     {
00238       /* Read the input */
00239       in = *pIn++;
00240 
00241       /* out =  b0 * x[n] + 0 * 0 */
00242 
00243 #ifndef  ARM_MATH_BIG_ENDIAN
00244 
00245       out = __SMUAD(b0, in);
00246 
00247 #else
00248 
00249       out = __SMUADX(b0, in);
00250 
00251 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00252 
00253       /* acc =  b1 * x[n-1] + b2 * x[n-2] + out */
00254       acc = __SMLALD(b1, state_in, out);
00255       /* acc +=  a1 * y[n-1] + a2 * y[n-2] */
00256       acc = __SMLALD(a1, state_out, acc);
00257 
00258       /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
00259       /* Calc lower part of acc */
00260       acc_l = acc & 0xffffffff;
00261 
00262       /* Calc upper part of acc */
00263       acc_h = (acc >> 32) & 0xffffffff;
00264 
00265       /* Apply shift for lower part of acc and upper part of acc */
00266       out = (uint32_t) acc_l >> lShift | acc_h << uShift;
00267 
00268       out = __SSAT(out, 16);
00269 
00270       /* Store the output in the destination buffer. */
00271       *pOut++ = (q15_t) out;
00272 
00273       /* Every time after the output is computed state should be updated. */
00274       /* The states should be updated as:  */
00275       /* Xn2 = Xn1    */
00276       /* Xn1 = Xn     */
00277       /* Yn2 = Yn1    */
00278       /* Yn1 = acc   */
00279       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00280       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00281 
00282 #ifndef  ARM_MATH_BIG_ENDIAN
00283 
00284       state_in = __PKHBT(in, state_in, 16);
00285       state_out = __PKHBT(out, state_out, 16);
00286 
00287 #else
00288 
00289       state_in = __PKHBT(state_in >> 16, in, 16);
00290       state_out = __PKHBT(state_out >> 16, out, 16);
00291 
00292 #endif /*   #ifndef  ARM_MATH_BIG_ENDIAN    */
00293 
00294     }
00295 
00296     /*  The first stage goes from the input wire to the output wire.  */
00297     /*  Subsequent numStages occur in-place in the output wire  */
00298     pIn = pDst;
00299 
00300     /* Reset the output pointer */
00301     pOut = pDst;
00302 
00303     /*  Store the updated state variables back into the state array */
00304     *__SIMD32(pState)++ = state_in;
00305     *__SIMD32(pState)++ = state_out;
00306 
00307 
00308     /* Decrement the loop counter */
00309     stage--;
00310 
00311   } while(stage > 0u);
00312 
00313 #else
00314 
00315   /* Run the below code for Cortex-M0 */
00316 
00317   q15_t *pIn = pSrc;                             /*  Source pointer                               */
00318   q15_t *pOut = pDst;                            /*  Destination pointer                          */
00319   q15_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */
00320   q15_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */
00321   q15_t Xn;                                      /*  temporary input               */
00322   q63_t acc;                                     /*  Accumulator                                  */
00323   int32_t shift = (15 - (int32_t) S->postShift); /*  Post shift                                   */
00324   q15_t *pState = S->pState;                     /*  State pointer                                */
00325   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
00326   uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */
00327 
00328   do
00329   {
00330     /* Reading the coefficients */
00331     b0 = *pCoeffs++;
00332     pCoeffs++;  // skip the 0 coefficient
00333     b1 = *pCoeffs++;
00334     b2 = *pCoeffs++;
00335     a1 = *pCoeffs++;
00336     a2 = *pCoeffs++;
00337 
00338     /* Reading the state values */
00339     Xn1 = pState[0];
00340     Xn2 = pState[1];
00341     Yn1 = pState[2];
00342     Yn2 = pState[3];
00343 
00344     /*      The variables acc holds the output value that is computed:         
00345      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]         
00346      */
00347 
00348     sample = blockSize;
00349 
00350     while(sample > 0u)
00351     {
00352       /* Read the input */
00353       Xn = *pIn++;
00354 
00355       /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
00356       /* acc =  b0 * x[n] */
00357       acc = (q31_t) b0 *Xn;
00358 
00359       /* acc +=  b1 * x[n-1] */
00360       acc += (q31_t) b1 *Xn1;
00361       /* acc +=  b[2] * x[n-2] */
00362       acc += (q31_t) b2 *Xn2;
00363       /* acc +=  a1 * y[n-1] */
00364       acc += (q31_t) a1 *Yn1;
00365       /* acc +=  a2 * y[n-2] */
00366       acc += (q31_t) a2 *Yn2;
00367 
00368       /* The result is converted to 1.31  */
00369       acc = __SSAT((acc >> shift), 16);
00370 
00371       /* Every time after the output is computed state should be updated. */
00372       /* The states should be updated as:  */
00373       /* Xn2 = Xn1    */
00374       /* Xn1 = Xn     */
00375       /* Yn2 = Yn1    */
00376       /* Yn1 = acc    */
00377       Xn2 = Xn1;
00378       Xn1 = Xn;
00379       Yn2 = Yn1;
00380       Yn1 = (q15_t) acc;
00381 
00382       /* Store the output in the destination buffer. */
00383       *pOut++ = (q15_t) acc;
00384 
00385       /* decrement the loop counter */
00386       sample--;
00387     }
00388 
00389     /*  The first stage goes from the input buffer to the output buffer. */
00390     /*  Subsequent stages occur in-place in the output buffer */
00391     pIn = pDst;
00392 
00393     /* Reset to destination pointer */
00394     pOut = pDst;
00395 
00396     /*  Store the updated state variables back into the pState array */
00397     *pState++ = Xn1;
00398     *pState++ = Xn2;
00399     *pState++ = Yn1;
00400     *pState++ = Yn2;
00401 
00402   } while(--stage);
00403 
00404 #endif /*     #ifndef ARM_MATH_CM0_FAMILY */
00405 
00406 }
00407 
00408 
00409 /**    
00410  * @} end of BiquadCascadeDF1 group    
00411  */