Robert Lopez / CMSIS5
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_fir_decimate_q31.c Source File

arm_fir_decimate_q31.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_fir_decimate_q31.c
00004  * Description:  Q31 FIR Decimator
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @ingroup groupFilters
00033  */
00034 
00035 /**
00036  * @addtogroup FIR_decimate
00037  * @{
00038  */
00039 
00040 /**
00041  * @brief Processing function for the Q31 FIR decimator.
00042  * @param[in] *S points to an instance of the Q31 FIR decimator structure.
00043  * @param[in] *pSrc points to the block of input data.
00044  * @param[out] *pDst points to the block of output data
00045  * @param[in] blockSize number of input samples to process per call.
00046  * @return none
00047  *
00048  * <b>Scaling and Overflow Behavior:</b>
00049  * \par
00050  * The function is implemented using an internal 64-bit accumulator.
00051  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
00052  * Thus, if the accumulator result overflows it wraps around rather than clip.
00053  * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (where log2 is read as log to the base 2).
00054  * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
00055  *
00056  * \par
00057  * Refer to the function <code>arm_fir_decimate_fast_q31()</code> for a faster but less precise implementation of this function for Cortex-M3 and Cortex-M4.
00058  */
00059 
00060 void arm_fir_decimate_q31(
00061   const arm_fir_decimate_instance_q31 * S,
00062   q31_t * pSrc,
00063   q31_t * pDst,
00064   uint32_t blockSize)
00065 {
00066   q31_t *pState = S->pState;                     /* State pointer */
00067   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00068   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
00069   q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */
00070   q31_t *px;                                     /* Temporary pointers for state buffer */
00071   q31_t *pb;                                     /* Temporary pointers for coefficient buffer */
00072   q63_t sum0;                                    /* Accumulator */
00073   uint32_t numTaps = S->numTaps;                 /* Number of taps */
00074   uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;  /* Loop counters */
00075 
00076 
00077 #if defined (ARM_MATH_DSP)
00078 
00079   /* Run the below code for Cortex-M4 and Cortex-M3 */
00080 
00081   /* S->pState buffer contains previous frame (numTaps - 1) samples */
00082   /* pStateCurnt points to the location where the new input data should be written */
00083   pStateCurnt = S->pState + (numTaps - 1U);
00084 
00085   /* Total number of output samples to be computed */
00086   blkCnt = outBlockSize;
00087 
00088   while (blkCnt > 0U)
00089   {
00090     /* Copy decimation factor number of new input samples into the state buffer */
00091     i = S->M;
00092 
00093     do
00094     {
00095       *pStateCurnt++ = *pSrc++;
00096 
00097     } while (--i);
00098 
00099     /* Set accumulator to zero */
00100     sum0 = 0;
00101 
00102     /* Initialize state pointer */
00103     px = pState;
00104 
00105     /* Initialize coeff pointer */
00106     pb = pCoeffs;
00107 
00108     /* Loop unrolling.  Process 4 taps at a time. */
00109     tapCnt = numTaps >> 2;
00110 
00111     /* Loop over the number of taps.  Unroll by a factor of 4.
00112      ** Repeat until we've computed numTaps-4 coefficients. */
00113     while (tapCnt > 0U)
00114     {
00115       /* Read the b[numTaps-1] coefficient */
00116       c0 = *(pb++);
00117 
00118       /* Read x[n-numTaps-1] sample */
00119       x0 = *(px++);
00120 
00121       /* Perform the multiply-accumulate */
00122       sum0 += (q63_t) x0 *c0;
00123 
00124       /* Read the b[numTaps-2] coefficient */
00125       c0 = *(pb++);
00126 
00127       /* Read x[n-numTaps-2] sample */
00128       x0 = *(px++);
00129 
00130       /* Perform the multiply-accumulate */
00131       sum0 += (q63_t) x0 *c0;
00132 
00133       /* Read the b[numTaps-3] coefficient */
00134       c0 = *(pb++);
00135 
00136       /* Read x[n-numTaps-3] sample */
00137       x0 = *(px++);
00138 
00139       /* Perform the multiply-accumulate */
00140       sum0 += (q63_t) x0 *c0;
00141 
00142       /* Read the b[numTaps-4] coefficient */
00143       c0 = *(pb++);
00144 
00145       /* Read x[n-numTaps-4] sample */
00146       x0 = *(px++);
00147 
00148       /* Perform the multiply-accumulate */
00149       sum0 += (q63_t) x0 *c0;
00150 
00151       /* Decrement the loop counter */
00152       tapCnt--;
00153     }
00154 
00155     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00156     tapCnt = numTaps % 0x4U;
00157 
00158     while (tapCnt > 0U)
00159     {
00160       /* Read coefficients */
00161       c0 = *(pb++);
00162 
00163       /* Fetch 1 state variable */
00164       x0 = *(px++);
00165 
00166       /* Perform the multiply-accumulate */
00167       sum0 += (q63_t) x0 *c0;
00168 
00169       /* Decrement the loop counter */
00170       tapCnt--;
00171     }
00172 
00173     /* Advance the state pointer by the decimation factor
00174      * to process the next group of decimation factor number samples */
00175     pState = pState + S->M;
00176 
00177     /* The result is in the accumulator, store in the destination buffer. */
00178     *pDst++ = (q31_t) (sum0 >> 31);
00179 
00180     /* Decrement the loop counter */
00181     blkCnt--;
00182   }
00183 
00184   /* Processing is complete.
00185    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
00186    ** This prepares the state buffer for the next function call. */
00187 
00188   /* Points to the start of the state buffer */
00189   pStateCurnt = S->pState;
00190 
00191   i = (numTaps - 1U) >> 2U;
00192 
00193   /* copy data */
00194   while (i > 0U)
00195   {
00196     *pStateCurnt++ = *pState++;
00197     *pStateCurnt++ = *pState++;
00198     *pStateCurnt++ = *pState++;
00199     *pStateCurnt++ = *pState++;
00200 
00201     /* Decrement the loop counter */
00202     i--;
00203   }
00204 
00205   i = (numTaps - 1U) % 0x04U;
00206 
00207   /* copy data */
00208   while (i > 0U)
00209   {
00210     *pStateCurnt++ = *pState++;
00211 
00212     /* Decrement the loop counter */
00213     i--;
00214   }
00215 
00216 #else
00217 
00218 /* Run the below code for Cortex-M0 */
00219 
00220   /* S->pState buffer contains previous frame (numTaps - 1) samples */
00221   /* pStateCurnt points to the location where the new input data should be written */
00222   pStateCurnt = S->pState + (numTaps - 1U);
00223 
00224   /* Total number of output samples to be computed */
00225   blkCnt = outBlockSize;
00226 
00227   while (blkCnt > 0U)
00228   {
00229     /* Copy decimation factor number of new input samples into the state buffer */
00230     i = S->M;
00231 
00232     do
00233     {
00234       *pStateCurnt++ = *pSrc++;
00235 
00236     } while (--i);
00237 
00238     /* Set accumulator to zero */
00239     sum0 = 0;
00240 
00241     /* Initialize state pointer */
00242     px = pState;
00243 
00244     /* Initialize coeff pointer */
00245     pb = pCoeffs;
00246 
00247     tapCnt = numTaps;
00248 
00249     while (tapCnt > 0U)
00250     {
00251       /* Read coefficients */
00252       c0 = *pb++;
00253 
00254       /* Fetch 1 state variable */
00255       x0 = *px++;
00256 
00257       /* Perform the multiply-accumulate */
00258       sum0 += (q63_t) x0 *c0;
00259 
00260       /* Decrement the loop counter */
00261       tapCnt--;
00262     }
00263 
00264     /* Advance the state pointer by the decimation factor
00265      * to process the next group of decimation factor number samples */
00266     pState = pState + S->M;
00267 
00268     /* The result is in the accumulator, store in the destination buffer. */
00269     *pDst++ = (q31_t) (sum0 >> 31);
00270 
00271     /* Decrement the loop counter */
00272     blkCnt--;
00273   }
00274 
00275   /* Processing is complete.
00276    ** Now copy the last numTaps - 1 samples to the start of the state buffer.
00277    ** This prepares the state buffer for the next function call. */
00278 
00279   /* Points to the start of the state buffer */
00280   pStateCurnt = S->pState;
00281 
00282   i = numTaps - 1U;
00283 
00284   /* copy data */
00285   while (i > 0U)
00286   {
00287     *pStateCurnt++ = *pState++;
00288 
00289     /* Decrement the loop counter */
00290     i--;
00291   }
00292 
00293 #endif /*   #if defined (ARM_MATH_DSP) */
00294 
00295 }
00296 
00297 /**
00298  * @} end of FIR_decimate group
00299  */
00300