CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_q7.c Source File

arm_fir_q7.c

00001 /* ----------------------------------------------------------------------  
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.  
00003 *  
00004 * $Date:        29. November 2010  
00005 * $Revision:    V1.0.3  
00006 *  
00007 * Project:      CMSIS DSP Library  
00008 * Title:        arm_fir_q7.c  
00009 *  
00010 * Description:  Q7 FIR filter processing function.  
00011 *  
00012 * Target Processor: Cortex-M4/Cortex-M3
00013 *  
00014 * Version 1.0.3 2010/11/29 
00015 *    Re-organized the CMSIS folders and updated documentation.  
00016 *   
00017 * Version 1.0.2 2010/11/11  
00018 *    Documentation updated.   
00019 *  
00020 * Version 1.0.1 2010/10/05   
00021 *    Production release and review comments incorporated.  
00022 *  
00023 * Version 1.0.0 2010/09/20   
00024 *    Production release and review comments incorporated.  
00025 *  
00026 * Version 0.0.5  2010/04/26   
00027 *    incorporated review comments and updated with latest CMSIS layer  
00028 *  
00029 * Version 0.0.3  2010/03/10   
00030 *    Initial version  
00031 * -------------------------------------------------------------------- */ 
00032  
00033 #include "arm_math.h" 
00034  
00035 /**  
00036  * @ingroup groupFilters  
00037  */ 
00038  
00039 /**  
00040  * @addtogroup FIR  
00041  * @{  
00042  */ 
00043  
00044 /**  
00045  * @param[in]   *S points to an instance of the Q7 FIR filter structure.  
00046  * @param[in]   *pSrc points to the block of input data.  
00047  * @param[out]  *pDst points to the block of output data.  
00048  * @param[in]   blockSize number of samples to process per call.  
00049  * @return  none.  
00050  *  
00051  * <b>Scaling and Overflow Behavior:</b>  
00052  * \par  
00053  * The function is implemented using a 32-bit internal accumulator.  
00054  * Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result.  
00055  * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.  
00056  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
00057  * The accumulator is converted to 18.7 format by discarding the low 7 bits.  
00058  * Finally, the result is truncated to 1.7 format.  
00059  */ 
00060  
00061 void arm_fir_q7( 
00062   const arm_fir_instance_q7 * S, 
00063   q7_t * pSrc, 
00064   q7_t * pDst, 
00065   uint32_t blockSize) 
00066 { 
00067   uint32_t numTaps = S->numTaps;                 /* Number of taps in the filter */ 
00068   uint32_t i, blkCnt;                            /* Loop counters */ 
00069   q7_t *pState = S->pState;                      /* State pointer */ 
00070   q7_t *pCoeffs = S->pCoeffs;                    /* Coefficient pointer */ 
00071   q7_t *px, *pb;                                 /* Temporary pointers to state and coeff */ 
00072   q31_t acc = 0;                                 /* Accumlator */ 
00073   q31_t input1, input2;                          /* Temporary variables to store input */ 
00074   q15_t in1, in2;                                /* Temporary variables to store input */ 
00075   q7_t *pStateCurnt;                             /* Points to the current sample of the state */ 
00076  
00077  
00078   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 
00079   /* pStateCurnt points to the location where the new input data should be written */ 
00080   pStateCurnt = S->pState + (numTaps - 1u); 
00081  
00082   i = blockSize >> 2u; 
00083  
00084   /* Copy four new input samples into the state buffer.  
00085    ** Use 32-bit SIMD to move the four 8-bit data.  Only requires one copy for every four samples. */ 
00086   while(i > 0u) 
00087   { 
00088     *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 
00089     i--; 
00090   } 
00091  
00092   i = blockSize % 0x4u; 
00093  
00094   /* Copy remining samples into the state buffer. */ 
00095   while(i > 0u) 
00096   { 
00097     *pStateCurnt++ = *pSrc++; 
00098     i--; 
00099   } 
00100  
00101   blkCnt = blockSize; 
00102  
00103   /* Perform filtering upto BlockSize - BlockSize%4  */ 
00104   while(blkCnt > 0u) 
00105   { 
00106     /* Set accumulator to zero */ 
00107     acc = 0; 
00108  
00109     /* Initialize state pointer of type q7 */ 
00110     px = pState; 
00111  
00112     /* Initialize coeff pointer of type q7 */ 
00113     pb = pCoeffs; 
00114  
00115     i = numTaps >> 2u; 
00116  
00117     /* Loop over the number of taps.  Unroll by a factor of 4.  
00118      ** Repeat until we've computed numTaps-4 coefficients. */ 
00119     while(i > 0u) 
00120     { 
00121       /* Reading two inputs of state buffer and packing */ 
00122       in1 = (q15_t) * px++; 
00123       in2 = (q15_t) * px++; 
00124       input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
00125  
00126       /* Reading two inputs of coefficient buffer and packing */ 
00127       in1 = (q15_t) * pb++; 
00128       in2 = (q15_t) * pb++; 
00129       input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
00130  
00131       /* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */ 
00132       acc = __SMLAD(input1, input2, acc); 
00133  
00134       /* Reading two inputs of state buffer and packing */ 
00135       in1 = (q15_t) * px++; 
00136       in2 = (q15_t) * px++; 
00137       input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
00138  
00139       /* Reading two inputs of coefficient buffer and packing */ 
00140       in1 = (q15_t) * pb++; 
00141       in2 = (q15_t) * pb++; 
00142       input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
00143  
00144       /* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */ 
00145       acc = __SMLAD(input1, input2, acc); 
00146  
00147       /* Decrement the tap loop counter */ 
00148       i--; 
00149     } 
00150  
00151     i = numTaps % 0x4u; 
00152  
00153     /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
00154     while(i > 0u) 
00155     { 
00156       acc = __SMLAD(*px++, *pb++, acc); 
00157       i--; 
00158  
00159     } 
00160  
00161     /* Saturate output */ 
00162     acc = __SSAT((acc >> 7), 8); 
00163  
00164     /*Store filter output */ 
00165     *pDst++ = (q7_t) (acc); 
00166  
00167     /* Advance the state pointer by 1 to process the next sample */ 
00168     pState = pState + 1; 
00169  
00170     /* Decrement the loop counter */ 
00171     blkCnt--; 
00172   } 
00173  
00174   /* Processing is complete.  
00175    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
00176    ** This prepares the state buffer for the next function call. */ 
00177  
00178   /* Points to the start of the state buffer */ 
00179   pStateCurnt = S->pState; 
00180  
00181   /* Calculation of count for copying integer writes */ 
00182   i = (numTaps - 1u) >> 2u; 
00183  
00184   /* Copy four values using integer pointer */ 
00185   while(i > 0u) 
00186   { 
00187     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
00188  
00189     i--; 
00190  
00191   } 
00192  
00193   /* Calculation of count for remaining q7_t data */ 
00194   i = (numTaps - 1u) % 0x4u; 
00195  
00196   /* Copy of remaining q7_t data */ 
00197   while(i > 0u) 
00198   { 
00199     *pStateCurnt++ = *pState++; 
00200     i--; 
00201   } 
00202  
00203 } 
00204  
00205 /**  
00206  * @} end of FIR group  
00207  */