CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_lms_q31.c Source File

arm_lms_q31.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013
00005 * $Revision:    V1.4.1
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_lms_q31.c    
00009 *    
00010 * Description:  Processing function for the Q31 LMS filter.    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE.   
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 /**    
00043  * @ingroup groupFilters    
00044  */
00045 
00046 /**    
00047  * @addtogroup LMS    
00048  * @{    
00049  */
00050 
00051  /**    
00052  * @brief Processing function for Q31 LMS filter.    
00053  * @param[in]  *S points to an instance of the Q15 LMS filter structure.    
00054  * @param[in]  *pSrc points to the block of input data.    
00055  * @param[in]  *pRef points to the block of reference data.    
00056  * @param[out] *pOut points to the block of output data.    
00057  * @param[out] *pErr points to the block of error data.    
00058  * @param[in]  blockSize number of samples to process.    
00059  * @return     none.    
00060  *    
00061  * \par Scaling and Overflow Behavior:     
00062  * The function is implemented using an internal 64-bit accumulator.     
00063  * The accumulator has a 2.62 format and maintains full precision of the intermediate    
00064  * multiplication results but provides only a single guard bit.     
00065  * Thus, if the accumulator result overflows it wraps around rather than clips.     
00066  * In order to avoid overflows completely the input signal must be scaled down by    
00067  * log2(numTaps) bits.     
00068  * The reference signal should not be scaled down.     
00069  * After all multiply-accumulates are performed, the 2.62 accumulator is shifted    
00070  * and saturated to 1.31 format to yield the final result.     
00071  * The output signal and error signal are in 1.31 format.     
00072  *    
00073  * \par    
00074  *  In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.    
00075  */
00076 
00077 void arm_lms_q31(
00078   const arm_lms_instance_q31 * S,
00079   q31_t * pSrc,
00080   q31_t * pRef,
00081   q31_t * pOut,
00082   q31_t * pErr,
00083   uint32_t blockSize)
00084 {
00085   q31_t *pState = S->pState;                     /* State pointer */
00086   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
00087   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00088   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
00089   q31_t mu = S->mu;                              /* Adaptive factor */
00090   q31_t *px;                                     /* Temporary pointer for state */
00091   q31_t *pb;                                     /* Temporary pointer for coefficient buffer */
00092   uint32_t tapCnt, blkCnt;                       /* Loop counters */
00093   q63_t acc;                                     /* Accumulator */
00094   q31_t e = 0;                                   /* error of data sample */
00095   q31_t alpha;                                   /* Intermediate constant for taps update */
00096   q31_t coef;                                    /* Temporary variable for coef */
00097   q31_t acc_l, acc_h;                            /*  temporary input */
00098   uint32_t uShift = ((uint32_t) S->postShift + 1u);
00099   uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
00100 
00101   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
00102   /* pStateCurnt points to the location where the new input data should be written */
00103   pStateCurnt = &(S->pState[(numTaps - 1u)]);
00104 
00105   /* Initializing blkCnt with blockSize */
00106   blkCnt = blockSize;
00107 
00108 
00109 #ifndef ARM_MATH_CM0_FAMILY
00110 
00111   /* Run the below code for Cortex-M4 and Cortex-M3 */
00112 
00113   while(blkCnt > 0u)
00114   {
00115     /* Copy the new input sample into the state buffer */
00116     *pStateCurnt++ = *pSrc++;
00117 
00118     /* Initialize state pointer */
00119     px = pState;
00120 
00121     /* Initialize coefficient pointer */
00122     pb = pCoeffs;
00123 
00124     /* Set the accumulator to zero */
00125     acc = 0;
00126 
00127     /* Loop unrolling.  Process 4 taps at a time. */
00128     tapCnt = numTaps >> 2;
00129 
00130     while(tapCnt > 0u)
00131     {
00132       /* Perform the multiply-accumulate */
00133       /* acc +=  b[N] * x[n-N] */
00134       acc += ((q63_t) (*px++)) * (*pb++);
00135 
00136       /* acc +=  b[N-1] * x[n-N-1] */
00137       acc += ((q63_t) (*px++)) * (*pb++);
00138 
00139       /* acc +=  b[N-2] * x[n-N-2] */
00140       acc += ((q63_t) (*px++)) * (*pb++);
00141 
00142       /* acc +=  b[N-3] * x[n-N-3] */
00143       acc += ((q63_t) (*px++)) * (*pb++);
00144 
00145       /* Decrement the loop counter */
00146       tapCnt--;
00147     }
00148 
00149     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00150     tapCnt = numTaps % 0x4u;
00151 
00152     while(tapCnt > 0u)
00153     {
00154       /* Perform the multiply-accumulate */
00155       acc += ((q63_t) (*px++)) * (*pb++);
00156 
00157       /* Decrement the loop counter */
00158       tapCnt--;
00159     }
00160 
00161     /* Converting the result to 1.31 format */
00162     /* Calc lower part of acc */
00163     acc_l = acc & 0xffffffff;
00164 
00165     /* Calc upper part of acc */
00166     acc_h = (acc >> 32) & 0xffffffff;
00167 
00168     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
00169 
00170     /* Store the result from accumulator into the destination buffer. */
00171     *pOut++ = (q31_t) acc;
00172 
00173     /* Compute and store error */
00174     e = *pRef++ - (q31_t) acc;
00175 
00176     *pErr++ = (q31_t) e;
00177 
00178     /* Compute alpha i.e. intermediate constant for taps update */
00179     alpha = (q31_t) (((q63_t) e * mu) >> 31);
00180 
00181     /* Initialize state pointer */
00182     /* Advance state pointer by 1 for the next sample */
00183     px = pState++;
00184 
00185     /* Initialize coefficient pointer */
00186     pb = pCoeffs;
00187 
00188     /* Loop unrolling.  Process 4 taps at a time. */
00189     tapCnt = numTaps >> 2;
00190 
00191     /* Update filter coefficients */
00192     while(tapCnt > 0u)
00193     {
00194       /* coef is in 2.30 format */
00195       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
00196       /* get coef in 1.31 format by left shifting */
00197       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
00198       /* update coefficient buffer to next coefficient */
00199       pb++;
00200 
00201       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
00202       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
00203       pb++;
00204 
00205       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
00206       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
00207       pb++;
00208 
00209       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
00210       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
00211       pb++;
00212 
00213       /* Decrement the loop counter */
00214       tapCnt--;
00215     }
00216 
00217     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00218     tapCnt = numTaps % 0x4u;
00219 
00220     while(tapCnt > 0u)
00221     {
00222       /* Perform the multiply-accumulate */
00223       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
00224       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
00225       pb++;
00226 
00227       /* Decrement the loop counter */
00228       tapCnt--;
00229     }
00230 
00231     /* Decrement the loop counter */
00232     blkCnt--;
00233   }
00234 
00235   /* Processing is complete. Now copy the last numTaps - 1 samples to the    
00236      satrt of the state buffer. This prepares the state buffer for the    
00237      next function call. */
00238 
00239   /* Points to the start of the pState buffer */
00240   pStateCurnt = S->pState;
00241 
00242   /* Loop unrolling for (numTaps - 1u) samples copy */
00243   tapCnt = (numTaps - 1u) >> 2u;
00244 
00245   /* copy data */
00246   while(tapCnt > 0u)
00247   {
00248     *pStateCurnt++ = *pState++;
00249     *pStateCurnt++ = *pState++;
00250     *pStateCurnt++ = *pState++;
00251     *pStateCurnt++ = *pState++;
00252 
00253     /* Decrement the loop counter */
00254     tapCnt--;
00255   }
00256 
00257   /* Calculate remaining number of copies */
00258   tapCnt = (numTaps - 1u) % 0x4u;
00259 
00260   /* Copy the remaining q31_t data */
00261   while(tapCnt > 0u)
00262   {
00263     *pStateCurnt++ = *pState++;
00264 
00265     /* Decrement the loop counter */
00266     tapCnt--;
00267   }
00268 
00269 #else
00270 
00271   /* Run the below code for Cortex-M0 */
00272 
00273   while(blkCnt > 0u)
00274   {
00275     /* Copy the new input sample into the state buffer */
00276     *pStateCurnt++ = *pSrc++;
00277 
00278     /* Initialize pState pointer */
00279     px = pState;
00280 
00281     /* Initialize pCoeffs pointer */
00282     pb = pCoeffs;
00283 
00284     /* Set the accumulator to zero */
00285     acc = 0;
00286 
00287     /* Loop over numTaps number of values */
00288     tapCnt = numTaps;
00289 
00290     while(tapCnt > 0u)
00291     {
00292       /* Perform the multiply-accumulate */
00293       acc += ((q63_t) (*px++)) * (*pb++);
00294 
00295       /* Decrement the loop counter */
00296       tapCnt--;
00297     }
00298 
00299     /* Converting the result to 1.31 format */
00300     /* Store the result from accumulator into the destination buffer. */
00301     /* Calc lower part of acc */
00302     acc_l = acc & 0xffffffff;
00303 
00304     /* Calc upper part of acc */
00305     acc_h = (acc >> 32) & 0xffffffff;
00306 
00307     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
00308 
00309     *pOut++ = (q31_t) acc;
00310 
00311     /* Compute and store error */
00312     e = *pRef++ - (q31_t) acc;
00313 
00314     *pErr++ = (q31_t) e;
00315 
00316     /* Weighting factor for the LMS version */
00317     alpha = (q31_t) (((q63_t) e * mu) >> 31);
00318 
00319     /* Initialize pState pointer */
00320     /* Advance state pointer by 1 for the next sample */
00321     px = pState++;
00322 
00323     /* Initialize pCoeffs pointer */
00324     pb = pCoeffs;
00325 
00326     /* Loop over numTaps number of values */
00327     tapCnt = numTaps;
00328 
00329     while(tapCnt > 0u)
00330     {
00331       /* Perform the multiply-accumulate */
00332       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
00333       *pb += (coef << 1u);
00334       pb++;
00335 
00336       /* Decrement the loop counter */
00337       tapCnt--;
00338     }
00339 
00340     /* Decrement the loop counter */
00341     blkCnt--;
00342   }
00343 
00344   /* Processing is complete. Now copy the last numTaps - 1 samples to the     
00345      start of the state buffer. This prepares the state buffer for the   
00346      next function call. */
00347 
00348   /* Points to the start of the pState buffer */
00349   pStateCurnt = S->pState;
00350 
00351   /*  Copy (numTaps - 1u) samples  */
00352   tapCnt = (numTaps - 1u);
00353 
00354   /* Copy the data */
00355   while(tapCnt > 0u)
00356   {
00357     *pStateCurnt++ = *pState++;
00358 
00359     /* Decrement the loop counter */
00360     tapCnt--;
00361   }
00362 
00363 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
00364 
00365 }
00366 
00367 /**    
00368    * @} end of LMS group    
00369    */