CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_biquad_cascade_df1_fast_q15.c Source File

arm_biquad_cascade_df1_fast_q15.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        19. March 2015
00005 * $Revision:    V.1.4.5
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_biquad_cascade_df1_fast_q15.c    
00009 *    
00010 * Description:  Fast processing function for the    
00011 *               Q15 Biquad cascade filter.    
00012 *    
00013 * Target Processor: Cortex-M4/Cortex-M3
00014 *  
00015 * Redistribution and use in source and binary forms, with or without 
00016 * modification, are permitted provided that the following conditions
00017 * are met:
00018 *   - Redistributions of source code must retain the above copyright
00019 *     notice, this list of conditions and the following disclaimer.
00020 *   - Redistributions in binary form must reproduce the above copyright
00021 *     notice, this list of conditions and the following disclaimer in
00022 *     the documentation and/or other materials provided with the 
00023 *     distribution.
00024 *   - Neither the name of ARM LIMITED nor the names of its contributors
00025 *     may be used to endorse or promote products derived from this
00026 *     software without specific prior written permission.
00027 *
00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00039 * POSSIBILITY OF SUCH DAMAGE. 
00040 * -------------------------------------------------------------------- */
00041 
00042 #include "arm_math.h"
00043 
00044 /**    
00045  * @ingroup groupFilters    
00046  */
00047 
00048 /**    
00049  * @addtogroup BiquadCascadeDF1    
00050  * @{    
00051  */
00052 
00053 /**    
00054  * @details    
00055  * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.    
00056  * @param[in]  *pSrc points to the block of input data.    
00057  * @param[out] *pDst points to the block of output data.    
00058  * @param[in]  blockSize number of samples to process per call.    
00059  * @return none.    
00060  *    
00061  * <b>Scaling and Overflow Behavior:</b>    
00062  * \par    
00063  * This fast version uses a 32-bit accumulator with 2.30 format.    
00064  * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.    
00065  * Thus, if the accumulator result overflows it wraps around and distorts the result.    
00066  * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).    
00067  * The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.    
00068  *    
00069  * \par    
00070  * Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.  Both the slow and the fast versions use the same instance structure.    
00071  * Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.    
00072  *    
00073  */
00074 
00075 void arm_biquad_cascade_df1_fast_q15(
00076   const arm_biquad_casd_df1_inst_q15 * S,
00077   q15_t * pSrc,
00078   q15_t * pDst,
00079   uint32_t blockSize)
00080 {
00081   q15_t *pIn = pSrc;                             /*  Source pointer                               */
00082   q15_t *pOut = pDst;                            /*  Destination pointer                          */
00083   q31_t in;                                      /*  Temporary variable to hold input value       */
00084   q31_t out;                                     /*  Temporary variable to hold output value      */
00085   q31_t b0;                                      /*  Temporary variable to hold bo value          */
00086   q31_t b1, a1;                                  /*  Filter coefficients                          */
00087   q31_t state_in, state_out;                     /*  Filter state variables                       */
00088   q31_t acc;                                     /*  Accumulator                                  */
00089   int32_t shift = (int32_t) (15 - S->postShift); /*  Post shift                                   */
00090   q15_t *pState = S->pState;                     /*  State pointer                                */
00091   q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */
00092   uint32_t sample, stage = S->numStages;         /*  Stage loop counter                           */
00093 
00094 
00095 
00096   do
00097   {
00098 
00099     /* Read the b0 and 0 coefficients using SIMD  */
00100     b0 = *__SIMD32(pCoeffs)++;
00101 
00102     /* Read the b1 and b2 coefficients using SIMD */
00103     b1 = *__SIMD32(pCoeffs)++;
00104 
00105     /* Read the a1 and a2 coefficients using SIMD */
00106     a1 = *__SIMD32(pCoeffs)++;
00107 
00108     /* Read the input state values from the state buffer:  x[n-1], x[n-2] */
00109     state_in = *__SIMD32(pState)++;
00110 
00111     /* Read the output state values from the state buffer:  y[n-1], y[n-2] */
00112     state_out = *__SIMD32(pState)--;
00113 
00114     /* Apply loop unrolling and compute 2 output values simultaneously. */
00115     /*      The variable acc hold output values that are being computed:       
00116      *    
00117      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]       
00118      *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]       
00119      */
00120     sample = blockSize >> 1u;
00121 
00122     /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.    
00123      ** a second loop below computes the remaining 1 sample. */
00124     while(sample > 0u)
00125     {
00126 
00127       /* Read the input */
00128       in = *__SIMD32(pIn)++;
00129 
00130       /* out =  b0 * x[n] + 0 * 0 */
00131       out = __SMUAD(b0, in);
00132       /* acc =  b1 * x[n-1] + acc +=  b2 * x[n-2] + out */
00133       acc = __SMLAD(b1, state_in, out);
00134       /* acc +=  a1 * y[n-1] + acc +=  a2 * y[n-2] */
00135       acc = __SMLAD(a1, state_out, acc);
00136 
00137       /* The result is converted from 3.29 to 1.31 and then saturation is applied */
00138       out = __SSAT((acc >> shift), 16);
00139 
00140       /* Every time after the output is computed state should be updated. */
00141       /* The states should be updated as:  */
00142       /* Xn2 = Xn1    */
00143       /* Xn1 = Xn     */
00144       /* Yn2 = Yn1    */
00145       /* Yn1 = acc   */
00146       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00147       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00148 
00149 #ifndef  ARM_MATH_BIG_ENDIAN
00150 
00151       state_in = __PKHBT(in, state_in, 16);
00152       state_out = __PKHBT(out, state_out, 16);
00153 
00154 #else
00155 
00156       state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
00157       state_out = __PKHBT(state_out >> 16, (out), 16);
00158 
00159 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00160 
00161       /* out =  b0 * x[n] + 0 * 0 */
00162       out = __SMUADX(b0, in);
00163       /* acc0 =  b1 * x[n-1] , acc0 +=  b2 * x[n-2] + out */
00164       acc = __SMLAD(b1, state_in, out);
00165       /* acc +=  a1 * y[n-1] + acc +=  a2 * y[n-2] */
00166       acc = __SMLAD(a1, state_out, acc);
00167 
00168       /* The result is converted from 3.29 to 1.31 and then saturation is applied */
00169       out = __SSAT((acc >> shift), 16);
00170 
00171 
00172       /* Store the output in the destination buffer. */
00173 
00174 #ifndef  ARM_MATH_BIG_ENDIAN
00175 
00176       *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
00177 
00178 #else
00179 
00180       *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
00181 
00182 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00183 
00184       /* Every time after the output is computed state should be updated. */
00185       /* The states should be updated as:  */
00186       /* Xn2 = Xn1    */
00187       /* Xn1 = Xn     */
00188       /* Yn2 = Yn1    */
00189       /* Yn1 = acc   */
00190       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00191       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00192 
00193 #ifndef  ARM_MATH_BIG_ENDIAN
00194 
00195       state_in = __PKHBT(in >> 16, state_in, 16);
00196       state_out = __PKHBT(out, state_out, 16);
00197 
00198 #else
00199 
00200       state_in = __PKHBT(state_in >> 16, in, 16);
00201       state_out = __PKHBT(state_out >> 16, out, 16);
00202 
00203 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00204 
00205 
00206       /* Decrement the loop counter */
00207       sample--;
00208 
00209     }
00210 
00211     /* If the blockSize is not a multiple of 2, compute any remaining output samples here.    
00212      ** No loop unrolling is used. */
00213 
00214     if((blockSize & 0x1u) != 0u)
00215     {
00216       /* Read the input */
00217       in = *pIn++;
00218 
00219       /* out =  b0 * x[n] + 0 * 0 */
00220 
00221 #ifndef  ARM_MATH_BIG_ENDIAN
00222 
00223       out = __SMUAD(b0, in);
00224 
00225 #else
00226 
00227       out = __SMUADX(b0, in);
00228 
00229 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
00230 
00231       /* acc =  b1 * x[n-1], acc +=  b2 * x[n-2] + out */
00232       acc = __SMLAD(b1, state_in, out);
00233       /* acc +=  a1 * y[n-1] + acc +=  a2 * y[n-2] */
00234       acc = __SMLAD(a1, state_out, acc);
00235 
00236       /* The result is converted from 3.29 to 1.31 and then saturation is applied */
00237       out = __SSAT((acc >> shift), 16);
00238 
00239       /* Store the output in the destination buffer. */
00240       *pOut++ = (q15_t) out;
00241 
00242       /* Every time after the output is computed state should be updated. */
00243       /* The states should be updated as:  */
00244       /* Xn2 = Xn1    */
00245       /* Xn1 = Xn     */
00246       /* Yn2 = Yn1    */
00247       /* Yn1 = acc   */
00248       /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
00249       /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
00250 
00251 #ifndef  ARM_MATH_BIG_ENDIAN
00252 
00253       state_in = __PKHBT(in, state_in, 16);
00254       state_out = __PKHBT(out, state_out, 16);
00255 
00256 #else
00257 
00258       state_in = __PKHBT(state_in >> 16, in, 16);
00259       state_out = __PKHBT(state_out >> 16, out, 16);
00260 
00261 #endif /*   #ifndef  ARM_MATH_BIG_ENDIAN    */
00262 
00263     }
00264 
00265     /*  The first stage goes from the input buffer to the output buffer.  */
00266     /*  Subsequent (numStages - 1) occur in-place in the output buffer  */
00267     pIn = pDst;
00268 
00269     /* Reset the output pointer */
00270     pOut = pDst;
00271 
00272     /*  Store the updated state variables back into the state array */
00273     *__SIMD32(pState)++ = state_in;
00274     *__SIMD32(pState)++ = state_out;
00275 
00276 
00277     /* Decrement the loop counter */
00278     stage--;
00279 
00280   } while(stage > 0u);
00281 }
00282 
00283 
00284 /**    
00285  * @} end of BiquadCascadeDF1 group    
00286  */