CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_sparse_q7.c Source File

arm_fir_sparse_q7.c

00001 /* ----------------------------------------------------------------------  
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.  
00003 *  
00004 * $Date:        29. November 2010  
00005 * $Revision:    V1.0.3  
00006 *  
00007 * Project:      CMSIS DSP Library  
00008 * Title:        arm_fir_sparse_q7.c  
00009 *  
00010 * Description:  Q7 sparse FIR filter processing function. 
00011 *  
00012 * Target Processor: Cortex-M4/Cortex-M3
00013 *  
00014 * Version 1.0.3 2010/11/29 
00015 *    Re-organized the CMSIS folders and updated documentation.  
00016 *   
00017 * Version 1.0.2 2010/11/11  
00018 *    Documentation updated.   
00019 *  
00020 * Version 1.0.1 2010/10/05   
00021 *    Production release and review comments incorporated.  
00022 *  
00023 * Version 1.0.0 2010/09/20   
00024 *    Production release and review comments incorporated  
00025 *  
00026 * Version 0.0.7  2010/06/10   
00027 *    Misra-C changes done  
00028 * ------------------------------------------------------------------- */ 
00029 #include "arm_math.h" 
00030  
00031  
00032 /**  
00033  * @ingroup groupFilters  
00034  */ 
00035  
00036 /**  
00037  * @addtogroup FIR_Sparse  
00038  * @{  
00039  */ 
00040  
00041  
00042 /** 
00043  * @brief Processing function for the Q7 sparse FIR filter. 
00044  * @param[in]  *S           points to an instance of the Q7 sparse FIR structure. 
00045  * @param[in]  *pSrc        points to the block of input data. 
00046  * @param[out] *pDst        points to the block of output data 
00047  * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize. 
00048  * @param[in]  *pScratchOut points to a temporary buffer of size blockSize. 
00049  * @param[in]  blockSize    number of input samples to process per call. 
00050  * @return none. 
00051  *  
00052  * <b>Scaling and Overflow Behavior:</b>  
00053  * \par  
00054  * The function is implemented using a 32-bit internal accumulator.  
00055  * Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result.  
00056  * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.  
00057  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
00058  * The accumulator is then converted to 18.7 format by discarding the low 7 bits. 
00059  * Finally, the result is truncated to 1.7 format. 
00060  */ 
00061  
00062 void arm_fir_sparse_q7( 
00063   arm_fir_sparse_instance_q7 * S, 
00064   q7_t * pSrc, 
00065   q7_t * pDst, 
00066   q7_t * pScratchIn, 
00067   q31_t * pScratchOut, 
00068   uint32_t blockSize) 
00069 { 
00070  
00071   q7_t *pState = S->pState;                      /* State pointer */ 
00072   q7_t *pCoeffs = S->pCoeffs;                    /* Coefficient pointer */ 
00073   q7_t *px;                                      /* Scratch buffer pointer */ 
00074   q7_t *py = pState;                             /* Temporary pointers for state buffer */ 
00075   q7_t *pb = pScratchIn;                         /* Temporary pointers for scratch buffer */ 
00076   q7_t *pOut = pDst;                             /* Destination pointer */ 
00077   int32_t *pTapDelay = S->pTapDelay;             /* Pointer to the array containing offset of the non-zero tap values. */ 
00078   uint32_t delaySize = S->maxDelay + blockSize;  /* state length */ 
00079   uint16_t numTaps = S->numTaps;                 /* Filter order */ 
00080   int32_t readIndex;                             /* Read index of the state buffer */ 
00081   uint32_t tapCnt, blkCnt;                       /* loop counters */ 
00082   q7_t coeff = *pCoeffs++;                       /* Read the coefficient value */ 
00083   q31_t *pScr2 = pScratchOut;                    /* Working pointer for scratch buffer of output values */ 
00084   q31_t in; 
00085   q7_t in1, in2, in3, in4; 
00086  
00087   /* BlockSize of Input samples are copied into the state buffer */ 
00088   /* StateIndex points to the starting position to write in the state buffer */ 
00089   arm_circularWrite_q7(py, (int32_t) delaySize, &S->stateIndex, 1, pSrc, 1, 
00090                        blockSize); 
00091  
00092   /* Loop over the number of taps. */ 
00093   tapCnt = numTaps; 
00094  
00095   /* Read Index, from where the state buffer should be read, is calculated. */ 
00096   readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 
00097  
00098   /* Wraparound of readIndex */ 
00099   if(readIndex < 0) 
00100   { 
00101     readIndex += (int32_t) delaySize; 
00102   } 
00103  
00104   /* Working pointer for state buffer is updated */ 
00105   py = pState; 
00106  
00107   /* blockSize samples are read from the state buffer */ 
00108   arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 
00109                       (int32_t) blockSize, 1, blockSize); 
00110  
00111   /* Working pointer for the scratch buffer of state values */ 
00112   px = pb; 
00113  
00114   /* Working pointer for scratch buffer of output values */ 
00115   pScratchOut = pScr2; 
00116  
00117   /* Loop over the blockSize. Unroll by a factor of 4.  
00118    * Compute 4 multiplications at a time. */ 
00119   blkCnt = blockSize >> 2; 
00120  
00121   while(blkCnt > 0u) 
00122   { 
00123     /* Perform multiplication and store in the scratch buffer */ 
00124     *pScratchOut++ = ((q31_t) * px++ * coeff); 
00125     *pScratchOut++ = ((q31_t) * px++ * coeff); 
00126     *pScratchOut++ = ((q31_t) * px++ * coeff); 
00127     *pScratchOut++ = ((q31_t) * px++ * coeff); 
00128  
00129     /* Decrement the loop counter */ 
00130     blkCnt--; 
00131   } 
00132  
00133   /* If the blockSize is not a multiple of 4,  
00134    * compute the remaining samples */ 
00135   blkCnt = blockSize % 0x4u; 
00136  
00137   while(blkCnt > 0u) 
00138   { 
00139     /* Perform multiplication and store in the scratch buffer */ 
00140     *pScratchOut++ = ((q31_t) * px++ * coeff); 
00141  
00142     /* Decrement the loop counter */ 
00143     blkCnt--; 
00144   } 
00145  
00146   /* Load the coefficient value and  
00147    * increment the coefficient buffer for the next set of state values */ 
00148   coeff = *pCoeffs++; 
00149  
00150   /* Read Index, from where the state buffer should be read, is calculated. */ 
00151   readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 
00152  
00153   /* Wraparound of readIndex */ 
00154   if(readIndex < 0) 
00155   { 
00156     readIndex += (int32_t) delaySize; 
00157   } 
00158  
00159   /* Loop over the number of taps. */ 
00160   tapCnt = (uint32_t) numTaps - 1u; 
00161  
00162   while(tapCnt > 0u) 
00163   { 
00164     /* Working pointer for state buffer is updated */ 
00165     py = pState; 
00166  
00167     /* blockSize samples are read from the state buffer */ 
00168     arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 
00169                         (int32_t) blockSize, 1, blockSize); 
00170  
00171     /* Working pointer for the scratch buffer of state values */ 
00172     px = pb; 
00173  
00174     /* Working pointer for scratch buffer of output values */ 
00175     pScratchOut = pScr2; 
00176  
00177     /* Loop over the blockSize. Unroll by a factor of 4.  
00178      * Compute 4 MACS at a time. */ 
00179     blkCnt = blockSize >> 2; 
00180  
00181     while(blkCnt > 0u) 
00182     { 
00183       /* Perform Multiply-Accumulate */ 
00184       in = *pScratchOut + ((q31_t) * px++ * coeff); 
00185       *pScratchOut++ = in; 
00186       in = *pScratchOut + ((q31_t) * px++ * coeff); 
00187       *pScratchOut++ = in; 
00188       in = *pScratchOut + ((q31_t) * px++ * coeff); 
00189       *pScratchOut++ = in; 
00190       in = *pScratchOut + ((q31_t) * px++ * coeff); 
00191       *pScratchOut++ = in; 
00192  
00193       /* Decrement the loop counter */ 
00194       blkCnt--; 
00195     } 
00196  
00197     /* If the blockSize is not a multiple of 4,  
00198      * compute the remaining samples */ 
00199     blkCnt = blockSize % 0x4u; 
00200  
00201     while(blkCnt > 0u) 
00202     { 
00203       /* Perform Multiply-Accumulate */ 
00204       in = *pScratchOut + ((q31_t) * px++ * coeff); 
00205       *pScratchOut++ = in; 
00206  
00207       /* Decrement the loop counter */ 
00208       blkCnt--; 
00209     } 
00210  
00211     /* Load the coefficient value and  
00212      * increment the coefficient buffer for the next set of state values */ 
00213     coeff = *pCoeffs++; 
00214  
00215     /* Read Index, from where the state buffer should be read, is calculated. */ 
00216     readIndex = ((int32_t) S->stateIndex - 
00217                  (int32_t) blockSize) - *pTapDelay++; 
00218  
00219     /* Wraparound of readIndex */ 
00220     if(readIndex < 0) 
00221     { 
00222       readIndex += (int32_t) delaySize; 
00223     } 
00224  
00225     /* Decrement the tap loop counter */ 
00226     tapCnt--; 
00227   } 
00228  
00229   /* All the output values are in pScratchOut buffer.  
00230      Convert them into 1.15 format, saturate and store in the destination buffer. */ 
00231   /* Loop over the blockSize. */ 
00232   blkCnt = blockSize >> 2; 
00233  
00234   while(blkCnt > 0u) 
00235   { 
00236     in1 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
00237     in2 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
00238     in3 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
00239     in4 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
00240  
00241     *__SIMD32(pOut)++ = __PACKq7(in1, in2, in3, in4); 
00242  
00243     /* Decrement the blockSize loop counter */ 
00244     blkCnt--; 
00245   } 
00246  
00247   /* If the blockSize is not a multiple of 4,  
00248      remaining samples are processed in the below loop */ 
00249   blkCnt = blockSize % 0x4u; 
00250  
00251   while(blkCnt > 0u) 
00252   { 
00253     *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
00254  
00255     /* Decrement the blockSize loop counter */ 
00256     blkCnt--; 
00257   } 
00258 } 
00259  
00260 /**  
00261  * @} end of FIR_Sparse group  
00262  */