CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_decimate_q15.c Source File

arm_fir_decimate_q15.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        19. March 2015
00005 * $Revision:    V.1.4.5
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_fir_decimate_q15.c    
00009 *    
00010 * Description:  Q15 FIR Decimator.    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE. 
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 
00043 /**    
00044  * @ingroup groupFilters    
00045  */
00046 
00047 /**    
00048  * @addtogroup FIR_decimate    
00049  * @{    
00050  */
00051 
00052 /**    
00053  * @brief Processing function for the Q15 FIR decimator.    
00054  * @param[in] *S points to an instance of the Q15 FIR decimator structure.    
00055  * @param[in] *pSrc points to the block of input data.    
00056  * @param[out] *pDst points to the location where the output result is written.    
00057  * @param[in] blockSize number of input samples to process per call.    
00058  * @return none.    
00059  *    
00060  * <b>Scaling and Overflow Behavior:</b>    
00061  * \par    
00062  * The function is implemented using a 64-bit internal accumulator.    
00063  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.    
00064  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.    
00065  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.    
00066  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.    
00067  * Lastly, the accumulator is saturated to yield a result in 1.15 format.    
00068  *    
00069  * \par    
00070  * Refer to the function <code>arm_fir_decimate_fast_q15()</code> for a faster but less precise implementation of this function for Cortex-M3 and Cortex-M4.    
00071  */
00072 
00073 #ifndef ARM_MATH_CM0_FAMILY
00074 
00075 #ifndef UNALIGNED_SUPPORT_DISABLE
00076 
00077 void arm_fir_decimate_q15(
00078   const arm_fir_decimate_instance_q15 * S,
00079   q15_t * pSrc,
00080   q15_t * pDst,
00081   uint32_t blockSize)
00082 {
00083   q15_t *pState = S->pState;                     /* State pointer */
00084   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00085   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
00086   q15_t *px;                                     /* Temporary pointer for state buffer */
00087   q15_t *pb;                                     /* Temporary pointer coefficient buffer */
00088   q31_t x0, x1, c0, c1;                          /* Temporary variables to hold state and coefficient values */
00089   q63_t sum0;                                    /* Accumulators */
00090   q63_t acc0, acc1;
00091   q15_t *px0, *px1;
00092   uint32_t blkCntN3;
00093   uint32_t numTaps = S->numTaps;                 /* Number of taps */
00094   uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;  /* Loop counters */
00095 
00096 
00097   /* S->pState buffer contains previous frame (numTaps - 1) samples */
00098   /* pStateCurnt points to the location where the new input data should be written */
00099   pStateCurnt = S->pState + (numTaps - 1u);
00100 
00101 
00102   /* Total number of output samples to be computed */
00103   blkCnt = outBlockSize / 2;
00104   blkCntN3 = outBlockSize - (2 * blkCnt);
00105 
00106 
00107   while(blkCnt > 0u)
00108   {
00109     /* Copy decimation factor number of new input samples into the state buffer */
00110     i = 2 * S->M;
00111 
00112     do
00113     {
00114       *pStateCurnt++ = *pSrc++;
00115 
00116     } while(--i);
00117 
00118     /* Set accumulator to zero */
00119     acc0 = 0;
00120     acc1 = 0;
00121 
00122     /* Initialize state pointer */
00123     px0 = pState;
00124 
00125     px1 = pState + S->M;
00126 
00127 
00128     /* Initialize coeff pointer */
00129     pb = pCoeffs;
00130 
00131     /* Loop unrolling.  Process 4 taps at a time. */
00132     tapCnt = numTaps >> 2;
00133 
00134     /* Loop over the number of taps.  Unroll by a factor of 4.       
00135      ** Repeat until we've computed numTaps-4 coefficients. */
00136     while(tapCnt > 0u)
00137     {
00138       /* Read the Read b[numTaps-1] and b[numTaps-2]  coefficients */
00139       c0 = *__SIMD32(pb)++;
00140 
00141       /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
00142       x0 = *__SIMD32(px0)++;
00143 
00144       x1 = *__SIMD32(px1)++;
00145 
00146       /* Perform the multiply-accumulate */
00147       acc0 = __SMLALD(x0, c0, acc0);
00148 
00149       acc1 = __SMLALD(x1, c0, acc1);
00150 
00151       /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
00152       c0 = *__SIMD32(pb)++;
00153 
00154       /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
00155       x0 = *__SIMD32(px0)++;
00156 
00157       x1 = *__SIMD32(px1)++;
00158 
00159       /* Perform the multiply-accumulate */
00160       acc0 = __SMLALD(x0, c0, acc0);
00161 
00162       acc1 = __SMLALD(x1, c0, acc1);
00163 
00164       /* Decrement the loop counter */
00165       tapCnt--;
00166     }
00167 
00168     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00169     tapCnt = numTaps % 0x4u;
00170 
00171     while(tapCnt > 0u)
00172     {
00173       /* Read coefficients */
00174       c0 = *pb++;
00175 
00176       /* Fetch 1 state variable */
00177       x0 = *px0++;
00178 
00179       x1 = *px1++;
00180 
00181       /* Perform the multiply-accumulate */
00182       acc0 = __SMLALD(x0, c0, acc0);
00183       acc1 = __SMLALD(x1, c0, acc1);
00184 
00185       /* Decrement the loop counter */
00186       tapCnt--;
00187     }
00188 
00189     /* Advance the state pointer by the decimation factor       
00190      * to process the next group of decimation factor number samples */
00191     pState = pState + S->M * 2;
00192 
00193     /* Store filter output, smlad returns the values in 2.14 format */
00194     /* so downsacle by 15 to get output in 1.15 */
00195     *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00196     *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
00197 
00198     /* Decrement the loop counter */
00199     blkCnt--;
00200   }
00201 
00202 
00203 
00204   while(blkCntN3 > 0u)
00205   {
00206     /* Copy decimation factor number of new input samples into the state buffer */
00207     i = S->M;
00208 
00209     do
00210     {
00211       *pStateCurnt++ = *pSrc++;
00212 
00213     } while(--i);
00214 
00215     /*Set sum to zero */
00216     sum0 = 0;
00217 
00218     /* Initialize state pointer */
00219     px = pState;
00220 
00221     /* Initialize coeff pointer */
00222     pb = pCoeffs;
00223 
00224     /* Loop unrolling.  Process 4 taps at a time. */
00225     tapCnt = numTaps >> 2;
00226 
00227     /* Loop over the number of taps.  Unroll by a factor of 4.       
00228      ** Repeat until we've computed numTaps-4 coefficients. */
00229     while(tapCnt > 0u)
00230     {
00231       /* Read the Read b[numTaps-1] and b[numTaps-2]  coefficients */
00232       c0 = *__SIMD32(pb)++;
00233 
00234       /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
00235       x0 = *__SIMD32(px)++;
00236 
00237       /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
00238       c1 = *__SIMD32(pb)++;
00239 
00240       /* Perform the multiply-accumulate */
00241       sum0 = __SMLALD(x0, c0, sum0);
00242 
00243       /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
00244       x0 = *__SIMD32(px)++;
00245 
00246       /* Perform the multiply-accumulate */
00247       sum0 = __SMLALD(x0, c1, sum0);
00248 
00249       /* Decrement the loop counter */
00250       tapCnt--;
00251     }
00252 
00253     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00254     tapCnt = numTaps % 0x4u;
00255 
00256     while(tapCnt > 0u)
00257     {
00258       /* Read coefficients */
00259       c0 = *pb++;
00260 
00261       /* Fetch 1 state variable */
00262       x0 = *px++;
00263 
00264       /* Perform the multiply-accumulate */
00265       sum0 = __SMLALD(x0, c0, sum0);
00266 
00267       /* Decrement the loop counter */
00268       tapCnt--;
00269     }
00270 
00271     /* Advance the state pointer by the decimation factor       
00272      * to process the next group of decimation factor number samples */
00273     pState = pState + S->M;
00274 
00275     /* Store filter output, smlad returns the values in 2.14 format */
00276     /* so downsacle by 15 to get output in 1.15 */
00277     *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
00278 
00279     /* Decrement the loop counter */
00280     blkCntN3--;
00281   }
00282 
00283   /* Processing is complete.       
00284    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.       
00285    ** This prepares the state buffer for the next function call. */
00286 
00287   /* Points to the start of the state buffer */
00288   pStateCurnt = S->pState;
00289 
00290   i = (numTaps - 1u) >> 2u;
00291 
00292   /* copy data */
00293   while(i > 0u)
00294   {
00295     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00296     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00297 
00298     /* Decrement the loop counter */
00299     i--;
00300   }
00301 
00302   i = (numTaps - 1u) % 0x04u;
00303 
00304   /* copy data */
00305   while(i > 0u)
00306   {
00307     *pStateCurnt++ = *pState++;
00308 
00309     /* Decrement the loop counter */
00310     i--;
00311   }
00312 }
00313 
00314 #else
00315 
00316 
00317 void arm_fir_decimate_q15(
00318   const arm_fir_decimate_instance_q15 * S,
00319   q15_t * pSrc,
00320   q15_t * pDst,
00321   uint32_t blockSize)
00322 {
00323   q15_t *pState = S->pState;                     /* State pointer */
00324   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00325   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
00326   q15_t *px;                                     /* Temporary pointer for state buffer */
00327   q15_t *pb;                                     /* Temporary pointer coefficient buffer */
00328   q15_t x0, x1, c0;                              /* Temporary variables to hold state and coefficient values */
00329   q63_t sum0;                                    /* Accumulators */
00330   q63_t acc0, acc1;
00331   q15_t *px0, *px1;
00332   uint32_t blkCntN3;
00333   uint32_t numTaps = S->numTaps;                 /* Number of taps */
00334   uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;  /* Loop counters */
00335 
00336 
00337   /* S->pState buffer contains previous frame (numTaps - 1) samples */
00338   /* pStateCurnt points to the location where the new input data should be written */
00339   pStateCurnt = S->pState + (numTaps - 1u);
00340 
00341 
00342   /* Total number of output samples to be computed */
00343   blkCnt = outBlockSize / 2;
00344   blkCntN3 = outBlockSize - (2 * blkCnt);
00345 
00346   while(blkCnt > 0u)
00347   {
00348     /* Copy decimation factor number of new input samples into the state buffer */
00349     i = 2 * S->M;
00350 
00351     do
00352     {
00353       *pStateCurnt++ = *pSrc++;
00354 
00355     } while(--i);
00356 
00357     /* Set accumulator to zero */
00358     acc0 = 0;
00359     acc1 = 0;
00360 
00361     /* Initialize state pointer */
00362     px0 = pState;
00363 
00364     px1 = pState + S->M;
00365 
00366 
00367     /* Initialize coeff pointer */
00368     pb = pCoeffs;
00369 
00370     /* Loop unrolling.  Process 4 taps at a time. */
00371     tapCnt = numTaps >> 2;
00372 
00373     /* Loop over the number of taps.  Unroll by a factor of 4.       
00374      ** Repeat until we've computed numTaps-4 coefficients. */
00375     while(tapCnt > 0u)
00376     {
00377       /* Read the Read b[numTaps-1] coefficients */
00378       c0 = *pb++;
00379 
00380       /* Read x[n-numTaps-1] for sample 0 and for sample 1 */
00381       x0 = *px0++;
00382       x1 = *px1++;
00383 
00384       /* Perform the multiply-accumulate */
00385       acc0 += x0 * c0;
00386       acc1 += x1 * c0;
00387 
00388       /* Read the b[numTaps-2] coefficient */
00389       c0 = *pb++;
00390 
00391       /* Read x[n-numTaps-2] for sample 0 and sample 1 */
00392       x0 = *px0++;
00393       x1 = *px1++;
00394 
00395       /* Perform the multiply-accumulate */
00396       acc0 += x0 * c0;
00397       acc1 += x1 * c0;
00398 
00399       /* Read the b[numTaps-3] coefficients */
00400       c0 = *pb++;
00401 
00402       /* Read x[n-numTaps-3] for sample 0 and sample 1 */
00403       x0 = *px0++;
00404       x1 = *px1++;
00405 
00406       /* Perform the multiply-accumulate */
00407       acc0 += x0 * c0;
00408       acc1 += x1 * c0;
00409 
00410       /* Read the b[numTaps-4] coefficient */
00411       c0 = *pb++;
00412 
00413       /* Read x[n-numTaps-4] for sample 0 and sample 1 */
00414       x0 = *px0++;
00415       x1 = *px1++;
00416 
00417       /* Perform the multiply-accumulate */
00418       acc0 += x0 * c0;
00419       acc1 += x1 * c0;
00420 
00421       /* Decrement the loop counter */
00422       tapCnt--;
00423     }
00424 
00425     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00426     tapCnt = numTaps % 0x4u;
00427 
00428     while(tapCnt > 0u)
00429     {
00430       /* Read coefficients */
00431       c0 = *pb++;
00432 
00433       /* Fetch 1 state variable */
00434       x0 = *px0++;
00435       x1 = *px1++;
00436 
00437       /* Perform the multiply-accumulate */
00438       acc0 += x0 * c0;
00439       acc1 += x1 * c0;
00440 
00441       /* Decrement the loop counter */
00442       tapCnt--;
00443     }
00444 
00445     /* Advance the state pointer by the decimation factor       
00446      * to process the next group of decimation factor number samples */
00447     pState = pState + S->M * 2;
00448 
00449     /* Store filter output, smlad returns the values in 2.14 format */
00450     /* so downsacle by 15 to get output in 1.15 */
00451 
00452     *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00453     *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
00454 
00455     /* Decrement the loop counter */
00456     blkCnt--;
00457   }
00458 
00459   while(blkCntN3 > 0u)
00460   {
00461     /* Copy decimation factor number of new input samples into the state buffer */
00462     i = S->M;
00463 
00464     do
00465     {
00466       *pStateCurnt++ = *pSrc++;
00467 
00468     } while(--i);
00469 
00470     /*Set sum to zero */
00471     sum0 = 0;
00472 
00473     /* Initialize state pointer */
00474     px = pState;
00475 
00476     /* Initialize coeff pointer */
00477     pb = pCoeffs;
00478 
00479     /* Loop unrolling.  Process 4 taps at a time. */
00480     tapCnt = numTaps >> 2;
00481 
00482     /* Loop over the number of taps.  Unroll by a factor of 4.       
00483      ** Repeat until we've computed numTaps-4 coefficients. */
00484     while(tapCnt > 0u)
00485     {
00486       /* Read the Read b[numTaps-1] coefficients */
00487       c0 = *pb++;
00488 
00489       /* Read x[n-numTaps-1] and sample */
00490       x0 = *px++;
00491 
00492       /* Perform the multiply-accumulate */
00493       sum0 += x0 * c0;
00494 
00495       /* Read the b[numTaps-2] coefficient */
00496       c0 = *pb++;
00497 
00498       /* Read x[n-numTaps-2] and  sample */
00499       x0 = *px++;
00500 
00501       /* Perform the multiply-accumulate */
00502       sum0 += x0 * c0;
00503 
00504       /* Read the b[numTaps-3]  coefficients */
00505       c0 = *pb++;
00506 
00507       /* Read x[n-numTaps-3] sample */
00508       x0 = *px++;
00509 
00510       /* Perform the multiply-accumulate */
00511       sum0 += x0 * c0;
00512 
00513       /* Read the b[numTaps-4] coefficient */
00514       c0 = *pb++;
00515 
00516       /* Read x[n-numTaps-4] sample */
00517       x0 = *px++;
00518 
00519       /* Perform the multiply-accumulate */
00520       sum0 += x0 * c0;
00521 
00522       /* Decrement the loop counter */
00523       tapCnt--;
00524     }
00525 
00526     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00527     tapCnt = numTaps % 0x4u;
00528 
00529     while(tapCnt > 0u)
00530     {
00531       /* Read coefficients */
00532       c0 = *pb++;
00533 
00534       /* Fetch 1 state variable */
00535       x0 = *px++;
00536 
00537       /* Perform the multiply-accumulate */
00538       sum0 += x0 * c0;
00539 
00540       /* Decrement the loop counter */
00541       tapCnt--;
00542     }
00543 
00544     /* Advance the state pointer by the decimation factor       
00545      * to process the next group of decimation factor number samples */
00546     pState = pState + S->M;
00547 
00548     /* Store filter output, smlad returns the values in 2.14 format */
00549     /* so downsacle by 15 to get output in 1.15 */
00550     *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
00551 
00552     /* Decrement the loop counter */
00553     blkCntN3--;
00554   }
00555 
00556   /* Processing is complete.       
00557    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.       
00558    ** This prepares the state buffer for the next function call. */
00559 
00560   /* Points to the start of the state buffer */
00561   pStateCurnt = S->pState;
00562 
00563   i = (numTaps - 1u) >> 2u;
00564 
00565   /* copy data */
00566   while(i > 0u)
00567   {
00568     *pStateCurnt++ = *pState++;
00569     *pStateCurnt++ = *pState++;
00570     *pStateCurnt++ = *pState++;
00571     *pStateCurnt++ = *pState++;
00572 
00573     /* Decrement the loop counter */
00574     i--;
00575   }
00576 
00577   i = (numTaps - 1u) % 0x04u;
00578 
00579   /* copy data */
00580   while(i > 0u)
00581   {
00582     *pStateCurnt++ = *pState++;
00583 
00584     /* Decrement the loop counter */
00585     i--;
00586   }
00587 }
00588 
00589 
00590 #endif  /*  #ifndef UNALIGNED_SUPPORT_DISABLE   */
00591 
00592 #else
00593 
00594 
00595 void arm_fir_decimate_q15(
00596   const arm_fir_decimate_instance_q15 * S,
00597   q15_t * pSrc,
00598   q15_t * pDst,
00599   uint32_t blockSize)
00600 {
00601   q15_t *pState = S->pState;                     /* State pointer */
00602   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00603   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
00604   q15_t *px;                                     /* Temporary pointer for state buffer */
00605   q15_t *pb;                                     /* Temporary pointer coefficient buffer */
00606   q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */
00607   q63_t sum0;                                    /* Accumulators */
00608   uint32_t numTaps = S->numTaps;                 /* Number of taps */
00609   uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;  /* Loop counters */
00610 
00611 
00612 
00613 /* Run the below code for Cortex-M0 */
00614 
00615   /* S->pState buffer contains previous frame (numTaps - 1) samples */
00616   /* pStateCurnt points to the location where the new input data should be written */
00617   pStateCurnt = S->pState + (numTaps - 1u);
00618 
00619   /* Total number of output samples to be computed */
00620   blkCnt = outBlockSize;
00621 
00622   while(blkCnt > 0u)
00623   {
00624     /* Copy decimation factor number of new input samples into the state buffer */
00625     i = S->M;
00626 
00627     do
00628     {
00629       *pStateCurnt++ = *pSrc++;
00630 
00631     } while(--i);
00632 
00633     /*Set sum to zero */
00634     sum0 = 0;
00635 
00636     /* Initialize state pointer */
00637     px = pState;
00638 
00639     /* Initialize coeff pointer */
00640     pb = pCoeffs;
00641 
00642     tapCnt = numTaps;
00643 
00644     while(tapCnt > 0u)
00645     {
00646       /* Read coefficients */
00647       c0 = *pb++;
00648 
00649       /* Fetch 1 state variable */
00650       x0 = *px++;
00651 
00652       /* Perform the multiply-accumulate */
00653       sum0 += (q31_t) x0 *c0;
00654 
00655       /* Decrement the loop counter */
00656       tapCnt--;
00657     }
00658 
00659     /* Advance the state pointer by the decimation factor           
00660      * to process the next group of decimation factor number samples */
00661     pState = pState + S->M;
00662 
00663     /*Store filter output , smlad will return the values in 2.14 format */
00664     /* so downsacle by 15 to get output in 1.15 */
00665     *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
00666 
00667     /* Decrement the loop counter */
00668     blkCnt--;
00669   }
00670 
00671   /* Processing is complete.         
00672    ** Now copy the last numTaps - 1 samples to the start of the state buffer.       
00673    ** This prepares the state buffer for the next function call. */
00674 
00675   /* Points to the start of the state buffer */
00676   pStateCurnt = S->pState;
00677 
00678   i = numTaps - 1u;
00679 
00680   /* copy data */
00681   while(i > 0u)
00682   {
00683     *pStateCurnt++ = *pState++;
00684 
00685     /* Decrement the loop counter */
00686     i--;
00687   }
00688 
00689 
00690 }
00691 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
00692 
00693 
00694 /**    
00695  * @} end of FIR_decimate group    
00696  */