CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_q31.c Source File

arm_fir_q31.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013
00005 * $Revision:    V1.4.1
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_fir_q31.c    
00009 *    
00010 * Description:  Q31 FIR filter processing function.    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE.   
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 
00043 /**    
00044  * @ingroup groupFilters    
00045  */
00046 
00047 /**    
00048  * @addtogroup FIR    
00049  * @{    
00050  */
00051 
00052 /**    
00053  * @param[in] *S points to an instance of the Q31 FIR filter structure.    
00054  * @param[in] *pSrc points to the block of input data.    
00055  * @param[out] *pDst points to the block of output data.    
00056  * @param[in] blockSize number of samples to process per call.    
00057  * @return none.    
00058  *    
00059  * @details    
00060  * <b>Scaling and Overflow Behavior:</b>    
00061  * \par    
00062  * The function is implemented using an internal 64-bit accumulator.    
00063  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.    
00064  * Thus, if the accumulator result overflows it wraps around rather than clip.    
00065  * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits.    
00066  * After all multiply-accumulates are performed, the 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.  
00067  *    
00068  * \par    
00069  * Refer to the function <code>arm_fir_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.    
00070  */
00071 
00072 void arm_fir_q31(
00073   const arm_fir_instance_q31 * S,
00074   q31_t * pSrc,
00075   q31_t * pDst,
00076   uint32_t blockSize)
00077 {
00078   q31_t *pState = S->pState;                     /* State pointer */
00079   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00080   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
00081 
00082 
00083 #ifndef ARM_MATH_CM0_FAMILY
00084 
00085   /* Run the below code for Cortex-M4 and Cortex-M3 */
00086 
00087   q31_t x0, x1, x2;                              /* Temporary variables to hold state */
00088   q31_t c0;                                      /* Temporary variable to hold coefficient value */
00089   q31_t *px;                                     /* Temporary pointer for state */
00090   q31_t *pb;                                     /* Temporary pointer for coefficient buffer */
00091   q63_t acc0, acc1, acc2;                        /* Accumulators */
00092   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
00093   uint32_t i, tapCnt, blkCnt, tapCntN3;          /* Loop counters */
00094 
00095   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
00096   /* pStateCurnt points to the location where the new input data should be written */
00097   pStateCurnt = &(S->pState[(numTaps - 1u)]);
00098 
00099   /* Apply loop unrolling and compute 4 output values simultaneously.    
00100    * The variables acc0 ... acc3 hold output values that are being computed:    
00101    *    
00102    *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]    
00103    *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]    
00104    *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]    
00105    *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]    
00106    */
00107   blkCnt = blockSize / 3;
00108   blockSize = blockSize - (3 * blkCnt);
00109 
00110   tapCnt = numTaps / 3;
00111   tapCntN3 = numTaps - (3 * tapCnt);
00112 
00113   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
00114    ** a second loop below computes the remaining 1 to 3 samples. */
00115   while(blkCnt > 0u)
00116   {
00117     /* Copy three new input samples into the state buffer */
00118     *pStateCurnt++ = *pSrc++;
00119     *pStateCurnt++ = *pSrc++;
00120     *pStateCurnt++ = *pSrc++;
00121 
00122     /* Set all accumulators to zero */
00123     acc0 = 0;
00124     acc1 = 0;
00125     acc2 = 0;
00126 
00127     /* Initialize state pointer */
00128     px = pState;
00129 
00130     /* Initialize coefficient pointer */
00131     pb = pCoeffs;
00132 
00133     /* Read the first two samples from the state buffer:    
00134      *  x[n-numTaps], x[n-numTaps-1] */
00135     x0 = *(px++);
00136     x1 = *(px++);
00137 
00138     /* Loop unrolling.  Process 3 taps at a time. */
00139     i = tapCnt;
00140 
00141     while(i > 0u)
00142     {
00143       /* Read the b[numTaps] coefficient */
00144       c0 = *pb;
00145 
00146       /* Read x[n-numTaps-2] sample */
00147       x2 = *(px++);
00148 
00149       /* Perform the multiply-accumulates */
00150       acc0 += ((q63_t) x0 * c0);
00151       acc1 += ((q63_t) x1 * c0);
00152       acc2 += ((q63_t) x2 * c0);
00153 
00154       /* Read the coefficient and state */
00155       c0 = *(pb + 1u);
00156       x0 = *(px++);
00157 
00158       /* Perform the multiply-accumulates */
00159       acc0 += ((q63_t) x1 * c0);
00160       acc1 += ((q63_t) x2 * c0);
00161       acc2 += ((q63_t) x0 * c0);
00162 
00163       /* Read the coefficient and state */
00164       c0 = *(pb + 2u);
00165       x1 = *(px++);
00166 
00167       /* update coefficient pointer */
00168       pb += 3u;
00169 
00170       /* Perform the multiply-accumulates */
00171       acc0 += ((q63_t) x2 * c0);
00172       acc1 += ((q63_t) x0 * c0);
00173       acc2 += ((q63_t) x1 * c0);
00174 
00175       /* Decrement the loop counter */
00176       i--;
00177     }
00178 
00179     /* If the filter length is not a multiple of 3, compute the remaining filter taps */
00180 
00181     i = tapCntN3;
00182 
00183     while(i > 0u)
00184     {
00185       /* Read coefficients */
00186       c0 = *(pb++);
00187 
00188       /* Fetch 1 state variable */
00189       x2 = *(px++);
00190 
00191       /* Perform the multiply-accumulates */
00192       acc0 += ((q63_t) x0 * c0);
00193       acc1 += ((q63_t) x1 * c0);
00194       acc2 += ((q63_t) x2 * c0);
00195 
00196       /* Reuse the present sample states for next sample */
00197       x0 = x1;
00198       x1 = x2;
00199 
00200       /* Decrement the loop counter */
00201       i--;
00202     }
00203 
00204     /* Advance the state pointer by 3 to process the next group of 3 samples */
00205     pState = pState + 3;
00206 
00207     /* The results in the 3 accumulators are in 2.30 format.  Convert to 1.31    
00208      ** Then store the 3 outputs in the destination buffer. */
00209     *pDst++ = (q31_t) (acc0 >> 31u);
00210     *pDst++ = (q31_t) (acc1 >> 31u);
00211     *pDst++ = (q31_t) (acc2 >> 31u);
00212 
00213     /* Decrement the samples loop counter */
00214     blkCnt--;
00215   }
00216 
00217   /* If the blockSize is not a multiple of 3, compute any remaining output samples here.    
00218    ** No loop unrolling is used. */
00219 
00220   while(blockSize > 0u)
00221   {
00222     /* Copy one sample at a time into state buffer */
00223     *pStateCurnt++ = *pSrc++;
00224 
00225     /* Set the accumulator to zero */
00226     acc0 = 0;
00227 
00228     /* Initialize state pointer */
00229     px = pState;
00230 
00231     /* Initialize Coefficient pointer */
00232     pb = (pCoeffs);
00233 
00234     i = numTaps;
00235 
00236     /* Perform the multiply-accumulates */
00237     do
00238     {
00239       acc0 += (q63_t) * (px++) * (*(pb++));
00240       i--;
00241     } while(i > 0u);
00242 
00243     /* The result is in 2.62 format.  Convert to 1.31    
00244      ** Then store the output in the destination buffer. */
00245     *pDst++ = (q31_t) (acc0 >> 31u);
00246 
00247     /* Advance state pointer by 1 for the next sample */
00248     pState = pState + 1;
00249 
00250     /* Decrement the samples loop counter */
00251     blockSize--;
00252   }
00253 
00254   /* Processing is complete.    
00255    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.    
00256    ** This prepares the state buffer for the next function call. */
00257 
00258   /* Points to the start of the state buffer */
00259   pStateCurnt = S->pState;
00260 
00261   tapCnt = (numTaps - 1u) >> 2u;
00262 
00263   /* copy data */
00264   while(tapCnt > 0u)
00265   {
00266     *pStateCurnt++ = *pState++;
00267     *pStateCurnt++ = *pState++;
00268     *pStateCurnt++ = *pState++;
00269     *pStateCurnt++ = *pState++;
00270 
00271     /* Decrement the loop counter */
00272     tapCnt--;
00273   }
00274 
00275   /* Calculate remaining number of copies */
00276   tapCnt = (numTaps - 1u) % 0x4u;
00277 
00278   /* Copy the remaining q31_t data */
00279   while(tapCnt > 0u)
00280   {
00281     *pStateCurnt++ = *pState++;
00282 
00283     /* Decrement the loop counter */
00284     tapCnt--;
00285   }
00286 
00287 #else
00288 
00289 /* Run the below code for Cortex-M0 */
00290 
00291   q31_t *px;                                     /* Temporary pointer for state */
00292   q31_t *pb;                                     /* Temporary pointer for coefficient buffer */
00293   q63_t acc;                                     /* Accumulator */
00294   uint32_t numTaps = S->numTaps;                 /* Length of the filter */
00295   uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
00296 
00297   /* S->pState buffer contains previous frame (numTaps - 1) samples */
00298   /* pStateCurnt points to the location where the new input data should be written */
00299   pStateCurnt = &(S->pState[(numTaps - 1u)]);
00300 
00301   /* Initialize blkCnt with blockSize */
00302   blkCnt = blockSize;
00303 
00304   while(blkCnt > 0u)
00305   {
00306     /* Copy one sample at a time into state buffer */
00307     *pStateCurnt++ = *pSrc++;
00308 
00309     /* Set the accumulator to zero */
00310     acc = 0;
00311 
00312     /* Initialize state pointer */
00313     px = pState;
00314 
00315     /* Initialize Coefficient pointer */
00316     pb = pCoeffs;
00317 
00318     i = numTaps;
00319 
00320     /* Perform the multiply-accumulates */
00321     do
00322     {
00323       /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
00324       acc += (q63_t) * px++ * *pb++;
00325       i--;
00326     } while(i > 0u);
00327 
00328     /* The result is in 2.62 format.  Convert to 1.31         
00329      ** Then store the output in the destination buffer. */
00330     *pDst++ = (q31_t) (acc >> 31u);
00331 
00332     /* Advance state pointer by 1 for the next sample */
00333     pState = pState + 1;
00334 
00335     /* Decrement the samples loop counter */
00336     blkCnt--;
00337   }
00338 
00339   /* Processing is complete.         
00340    ** Now copy the last numTaps - 1 samples to the starting of the state buffer.       
00341    ** This prepares the state buffer for the next function call. */
00342 
00343   /* Points to the start of the state buffer */
00344   pStateCurnt = S->pState;
00345 
00346   /* Copy numTaps number of values */
00347   tapCnt = numTaps - 1u;
00348 
00349   /* Copy the data */
00350   while(tapCnt > 0u)
00351   {
00352     *pStateCurnt++ = *pState++;
00353 
00354     /* Decrement the loop counter */
00355     tapCnt--;
00356   }
00357 
00358 
00359 #endif /*  #ifndef ARM_MATH_CM0_FAMILY */
00360 
00361 }
00362 
00363 /**    
00364  * @} end of FIR group    
00365  */