CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_decimate_f32.c Source File

arm_fir_decimate_f32.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013
00005 * $Revision:    V1.4.1
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_fir_decimate_f32.c    
00009 *    
00010 * Description:  FIR decimation for floating-point sequences.    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE. 
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 
00043 /**    
00044  * @ingroup groupFilters    
00045  */
00046 
00047 /**    
00048  * @defgroup FIR_decimate Finite Impulse Response (FIR) Decimator    
00049  *    
00050  * These functions combine an FIR filter together with a decimator.    
00051  * They are used in multirate systems for reducing the sample rate of a signal without introducing aliasing distortion.    
00052  * Conceptually, the functions are equivalent to the block diagram below:    
00053  * \image html FIRDecimator.gif "Components included in the FIR Decimator functions"    
00054  * When decimating by a factor of <code>M</code>, the signal should be prefiltered by a lowpass filter with a normalized    
00055  * cutoff frequency of <code>1/M</code> in order to prevent aliasing distortion.    
00056  * The user of the function is responsible for providing the filter coefficients.    
00057  *    
00058  * The FIR decimator functions provided in the CMSIS DSP Library combine the FIR filter and the decimator in an efficient manner.    
00059  * Instead of calculating all of the FIR filter outputs and discarding <code>M-1</code> out of every <code>M</code>, only the    
00060  * samples output by the decimator are computed.    
00061  * The functions operate on blocks of input and output data.    
00062  * <code>pSrc</code> points to an array of <code>blockSize</code> input values and    
00063  * <code>pDst</code> points to an array of <code>blockSize/M</code> output values.    
00064  * In order to have an integer number of output samples <code>blockSize</code>    
00065  * must always be a multiple of the decimation factor <code>M</code>.    
00066  *    
00067  * The library provides separate functions for Q15, Q31 and floating-point data types.    
00068  *    
00069  * \par Algorithm:    
00070  * The FIR portion of the algorithm uses the standard form filter:    
00071  * <pre>    
00072  *    y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]    
00073  * </pre>    
00074  * where, <code>b[n]</code> are the filter coefficients.    
00075  * \par   
00076  * The <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.    
00077  * Coefficients are stored in time reversed order.    
00078  * \par    
00079  * <pre>    
00080  *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}    
00081  * </pre>    
00082  * \par    
00083  * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.    
00084  * Samples in the state buffer are stored in the order:    
00085  * \par    
00086  * <pre>    
00087  *    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}    
00088  * </pre>    
00089  * The state variables are updated after each block of data is processed, the coefficients are untouched.    
00090  *    
00091  * \par Instance Structure    
00092  * The coefficients and state variables for a filter are stored together in an instance data structure.    
00093  * A separate instance structure must be defined for each filter.    
00094  * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.    
00095  * There are separate instance structure declarations for each of the 3 supported data types.    
00096  *    
00097  * \par Initialization Functions    
00098  * There is also an associated initialization function for each data type.    
00099  * The initialization function performs the following operations:    
00100  * - Sets the values of the internal structure fields.    
00101  * - Zeros out the values in the state buffer.    
00102  * - Checks to make sure that the size of the input is a multiple of the decimation factor.    
00103  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
00104  * numTaps, pCoeffs, M (decimation factor), pState. Also set all of the values in pState to zero. 
00105  *    
00106  * \par    
00107  * Use of the initialization function is optional.    
00108  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.    
00109  * To place an instance structure into a const data section, the instance structure must be manually initialized.    
00110  * The code below statically initializes each of the 3 different data type filter instance structures    
00111  * <pre>    
00112  *arm_fir_decimate_instance_f32 S = {M, numTaps, pCoeffs, pState};    
00113  *arm_fir_decimate_instance_q31 S = {M, numTaps, pCoeffs, pState};    
00114  *arm_fir_decimate_instance_q15 S = {M, numTaps, pCoeffs, pState};    
00115  * </pre>    
00116  * where <code>M</code> is the decimation factor; <code>numTaps</code> is the number of filter coefficients in the filter;    
00117  * <code>pCoeffs</code> is the address of the coefficient buffer;    
00118  * <code>pState</code> is the address of the state buffer.    
00119  * Be sure to set the values in the state buffer to zeros when doing static initialization.    
00120  *    
00121  * \par Fixed-Point Behavior    
00122  * Care must be taken when using the fixed-point versions of the FIR decimate filter functions.    
00123  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.    
00124  * Refer to the function specific documentation below for usage guidelines.    
00125  */
00126 
00127 /**    
00128  * @addtogroup FIR_decimate    
00129  * @{    
00130  */
00131 
00132   /**    
00133    * @brief Processing function for the floating-point FIR decimator.    
00134    * @param[in] *S        points to an instance of the floating-point FIR decimator structure.    
00135    * @param[in] *pSrc     points to the block of input data.    
00136    * @param[out] *pDst    points to the block of output data.    
00137    * @param[in] blockSize number of input samples to process per call.    
00138    * @return none.    
00139    */
00140 
00141 void arm_fir_decimate_f32(
00142   const arm_fir_decimate_instance_f32 * S,
00143   float32_t * pSrc,
00144   float32_t * pDst,
00145   uint32_t blockSize)
00146 {
00147   float32_t *pState = S->pState;                 /* State pointer */
00148   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
00149   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
00150   float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */
00151   float32_t sum0;                                /* Accumulator */
00152   float32_t x0, c0;                              /* Temporary variables to hold state and coefficient values */
00153   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
00154   uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;  /* Loop counters */
00155 
00156 #ifndef ARM_MATH_CM0_FAMILY
00157 
00158   uint32_t blkCntN4;
00159   float32_t *px0, *px1, *px2, *px3;
00160   float32_t acc0, acc1, acc2, acc3;
00161   float32_t x1, x2, x3;
00162 
00163   /* Run the below code for Cortex-M4 and Cortex-M3 */
00164 
00165   /* S->pState buffer contains previous frame (numTaps - 1) samples */
00166   /* pStateCurnt points to the location where the new input data should be written */
00167   pStateCurnt = S->pState + (numTaps - 1u);
00168 
00169   /* Total number of output samples to be computed */
00170   blkCnt = outBlockSize / 4;
00171   blkCntN4 = outBlockSize - (4 * blkCnt);
00172 
00173   while(blkCnt > 0u)
00174   {
00175     /* Copy 4 * decimation factor number of new input samples into the state buffer */
00176     i = 4 * S->M;
00177 
00178     do
00179     {
00180       *pStateCurnt++ = *pSrc++;
00181 
00182     } while(--i);
00183 
00184     /* Set accumulators to zero */
00185     acc0 = 0.0f;
00186     acc1 = 0.0f;
00187     acc2 = 0.0f;
00188     acc3 = 0.0f;
00189 
00190     /* Initialize state pointer for all the samples */
00191     px0 = pState;
00192     px1 = pState + S->M;
00193     px2 = pState + 2 * S->M;
00194     px3 = pState + 3 * S->M;
00195 
00196     /* Initialize coeff pointer */
00197     pb = pCoeffs;
00198 
00199     /* Loop unrolling.  Process 4 taps at a time. */
00200     tapCnt = numTaps >> 2;
00201 
00202     /* Loop over the number of taps.  Unroll by a factor of 4.       
00203      ** Repeat until we've computed numTaps-4 coefficients. */
00204 
00205     while(tapCnt > 0u)
00206     {
00207       /* Read the b[numTaps-1] coefficient */
00208       c0 = *(pb++);
00209 
00210       /* Read x[n-numTaps-1] sample for acc0 */
00211       x0 = *(px0++);
00212       /* Read x[n-numTaps-1] sample for acc1 */
00213       x1 = *(px1++);
00214       /* Read x[n-numTaps-1] sample for acc2 */
00215       x2 = *(px2++);
00216       /* Read x[n-numTaps-1] sample for acc3 */
00217       x3 = *(px3++);
00218 
00219       /* Perform the multiply-accumulate */
00220       acc0 += x0 * c0;
00221       acc1 += x1 * c0;
00222       acc2 += x2 * c0;
00223       acc3 += x3 * c0;
00224 
00225       /* Read the b[numTaps-2] coefficient */
00226       c0 = *(pb++);
00227 
00228       /* Read x[n-numTaps-2] sample for acc0, acc1, acc2, acc3 */
00229       x0 = *(px0++);
00230       x1 = *(px1++);
00231       x2 = *(px2++);
00232       x3 = *(px3++);
00233 
00234       /* Perform the multiply-accumulate */
00235       acc0 += x0 * c0;
00236       acc1 += x1 * c0;
00237       acc2 += x2 * c0;
00238       acc3 += x3 * c0;
00239 
00240       /* Read the b[numTaps-3] coefficient */
00241       c0 = *(pb++);
00242 
00243       /* Read x[n-numTaps-3] sample acc0, acc1, acc2, acc3 */
00244       x0 = *(px0++);
00245       x1 = *(px1++);
00246       x2 = *(px2++);
00247       x3 = *(px3++);
00248 
00249       /* Perform the multiply-accumulate */
00250       acc0 += x0 * c0;
00251       acc1 += x1 * c0;
00252       acc2 += x2 * c0;
00253       acc3 += x3 * c0;
00254 
00255       /* Read the b[numTaps-4] coefficient */
00256       c0 = *(pb++);
00257 
00258       /* Read x[n-numTaps-4] sample acc0, acc1, acc2, acc3 */
00259       x0 = *(px0++);
00260       x1 = *(px1++);
00261       x2 = *(px2++);
00262       x3 = *(px3++);
00263 
00264       /* Perform the multiply-accumulate */
00265       acc0 += x0 * c0;
00266       acc1 += x1 * c0;
00267       acc2 += x2 * c0;
00268       acc3 += x3 * c0;
00269 
00270       /* Decrement the loop counter */
00271       tapCnt--;
00272     }
00273 
00274     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00275     tapCnt = numTaps % 0x4u;
00276 
00277     while(tapCnt > 0u)
00278     {
00279       /* Read coefficients */
00280       c0 = *(pb++);
00281 
00282       /* Fetch  state variables for acc0, acc1, acc2, acc3 */
00283       x0 = *(px0++);
00284       x1 = *(px1++);
00285       x2 = *(px2++);
00286       x3 = *(px3++);
00287 
00288       /* Perform the multiply-accumulate */
00289       acc0 += x0 * c0;
00290       acc1 += x1 * c0;
00291       acc2 += x2 * c0;
00292       acc3 += x3 * c0;
00293 
00294       /* Decrement the loop counter */
00295       tapCnt--;
00296     }
00297 
00298     /* Advance the state pointer by the decimation factor       
00299      * to process the next group of decimation factor number samples */
00300     pState = pState + 4 * S->M;
00301 
00302     /* The result is in the accumulator, store in the destination buffer. */
00303     *pDst++ = acc0;
00304     *pDst++ = acc1;
00305     *pDst++ = acc2;
00306     *pDst++ = acc3;
00307 
00308     /* Decrement the loop counter */
00309     blkCnt--;
00310   }
00311 
00312   while(blkCntN4 > 0u)
00313   {
00314     /* Copy decimation factor number of new input samples into the state buffer */
00315     i = S->M;
00316 
00317     do
00318     {
00319       *pStateCurnt++ = *pSrc++;
00320 
00321     } while(--i);
00322 
00323     /* Set accumulator to zero */
00324     sum0 = 0.0f;
00325 
00326     /* Initialize state pointer */
00327     px = pState;
00328 
00329     /* Initialize coeff pointer */
00330     pb = pCoeffs;
00331 
00332     /* Loop unrolling.  Process 4 taps at a time. */
00333     tapCnt = numTaps >> 2;
00334 
00335     /* Loop over the number of taps.  Unroll by a factor of 4.       
00336      ** Repeat until we've computed numTaps-4 coefficients. */
00337     while(tapCnt > 0u)
00338     {
00339       /* Read the b[numTaps-1] coefficient */
00340       c0 = *(pb++);
00341 
00342       /* Read x[n-numTaps-1] sample */
00343       x0 = *(px++);
00344 
00345       /* Perform the multiply-accumulate */
00346       sum0 += x0 * c0;
00347 
00348       /* Read the b[numTaps-2] coefficient */
00349       c0 = *(pb++);
00350 
00351       /* Read x[n-numTaps-2] sample */
00352       x0 = *(px++);
00353 
00354       /* Perform the multiply-accumulate */
00355       sum0 += x0 * c0;
00356 
00357       /* Read the b[numTaps-3] coefficient */
00358       c0 = *(pb++);
00359 
00360       /* Read x[n-numTaps-3] sample */
00361       x0 = *(px++);
00362 
00363       /* Perform the multiply-accumulate */
00364       sum0 += x0 * c0;
00365 
00366       /* Read the b[numTaps-4] coefficient */
00367       c0 = *(pb++);
00368 
00369       /* Read x[n-numTaps-4] sample */
00370       x0 = *(px++);
00371 
00372       /* Perform the multiply-accumulate */
00373       sum0 += x0 * c0;
00374 
00375       /* Decrement the loop counter */
00376       tapCnt--;
00377     }
00378 
00379     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00380     tapCnt = numTaps % 0x4u;
00381 
00382     while(tapCnt > 0u)
00383     {
00384       /* Read coefficients */
00385       c0 = *(pb++);
00386 
00387       /* Fetch 1 state variable */
00388       x0 = *(px++);
00389 
00390       /* Perform the multiply-accumulate */
00391       sum0 += x0 * c0;
00392 
00393       /* Decrement the loop counter */
00394       tapCnt--;
00395     }
00396 
00397     /* Advance the state pointer by the decimation factor       
00398      * to process the next group of decimation factor number samples */
00399     pState = pState + S->M;
00400 
00401     /* The result is in the accumulator, store in the destination buffer. */
00402     *pDst++ = sum0;
00403 
00404     /* Decrement the loop counter */
00405     blkCntN4--;
00406   }
00407 
00408   /* Processing is complete.    
00409    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.    
00410    ** This prepares the state buffer for the next function call. */
00411 
00412   /* Points to the start of the state buffer */
00413   pStateCurnt = S->pState;
00414 
00415   i = (numTaps - 1u) >> 2;
00416 
00417   /* copy data */
00418   while(i > 0u)
00419   {
00420     *pStateCurnt++ = *pState++;
00421     *pStateCurnt++ = *pState++;
00422     *pStateCurnt++ = *pState++;
00423     *pStateCurnt++ = *pState++;
00424 
00425     /* Decrement the loop counter */
00426     i--;
00427   }
00428 
00429   i = (numTaps - 1u) % 0x04u;
00430 
00431   /* copy data */
00432   while(i > 0u)
00433   {
00434     *pStateCurnt++ = *pState++;
00435 
00436     /* Decrement the loop counter */
00437     i--;
00438   }
00439 
00440 #else
00441 
00442 /* Run the below code for Cortex-M0 */
00443 
00444   /* S->pState buffer contains previous frame (numTaps - 1) samples */
00445   /* pStateCurnt points to the location where the new input data should be written */
00446   pStateCurnt = S->pState + (numTaps - 1u);
00447 
00448   /* Total number of output samples to be computed */
00449   blkCnt = outBlockSize;
00450 
00451   while(blkCnt > 0u)
00452   {
00453     /* Copy decimation factor number of new input samples into the state buffer */
00454     i = S->M;
00455 
00456     do
00457     {
00458       *pStateCurnt++ = *pSrc++;
00459 
00460     } while(--i);
00461 
00462     /* Set accumulator to zero */
00463     sum0 = 0.0f;
00464 
00465     /* Initialize state pointer */
00466     px = pState;
00467 
00468     /* Initialize coeff pointer */
00469     pb = pCoeffs;
00470 
00471     tapCnt = numTaps;
00472 
00473     while(tapCnt > 0u)
00474     {
00475       /* Read coefficients */
00476       c0 = *pb++;
00477 
00478       /* Fetch 1 state variable */
00479       x0 = *px++;
00480 
00481       /* Perform the multiply-accumulate */
00482       sum0 += x0 * c0;
00483 
00484       /* Decrement the loop counter */
00485       tapCnt--;
00486     }
00487 
00488     /* Advance the state pointer by the decimation factor           
00489      * to process the next group of decimation factor number samples */
00490     pState = pState + S->M;
00491 
00492     /* The result is in the accumulator, store in the destination buffer. */
00493     *pDst++ = sum0;
00494 
00495     /* Decrement the loop counter */
00496     blkCnt--;
00497   }
00498 
00499   /* Processing is complete.         
00500    ** Now copy the last numTaps - 1 samples to the start of the state buffer.       
00501    ** This prepares the state buffer for the next function call. */
00502 
00503   /* Points to the start of the state buffer */
00504   pStateCurnt = S->pState;
00505 
00506   /* Copy numTaps number of values */
00507   i = (numTaps - 1u);
00508 
00509   /* copy data */
00510   while(i > 0u)
00511   {
00512     *pStateCurnt++ = *pState++;
00513 
00514     /* Decrement the loop counter */
00515     i--;
00516   }
00517 
00518 #endif /*   #ifndef ARM_MATH_CM0_FAMILY        */
00519 
00520 }
00521 
00522 /**    
00523  * @} end of FIR_decimate group    
00524  */