CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_lms_norm_q31.c Source File

arm_lms_norm_q31.c

00001 /* ----------------------------------------------------------------------  
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.  
00003 *  
00004 * $Date:        29. November 2010  
00005 * $Revision:    V1.0.3  
00006 *  
00007 * Project:      CMSIS DSP Library  
00008 * Title:        arm_lms_norm_q31.c  
00009 *  
00010 * Description:  Processing function for the Q31 NLMS filter.  
00011 *  
00012 * Target Processor: Cortex-M4/Cortex-M3
00013 *  
00014 * Version 1.0.3 2010/11/29 
00015 *    Re-organized the CMSIS folders and updated documentation.  
00016 *   
00017 * Version 1.0.2 2010/11/11  
00018 *    Documentation updated.   
00019 *  
00020 * Version 1.0.1 2010/10/05   
00021 *    Production release and review comments incorporated.  
00022 *  
00023 * Version 1.0.0 2010/09/20   
00024 *    Production release and review comments incorporated  
00025 *  
00026 * Version 0.0.7  2010/06/10   
00027 *    Misra-C changes done  
00028 * -------------------------------------------------------------------- */ 
00029  
00030 #include "arm_math.h" 
00031  
00032 /**  
00033  * @ingroup groupFilters  
00034  */ 
00035  
00036 /**  
00037  * @addtogroup LMS_NORM  
00038  * @{  
00039  */ 
00040  
00041 /**  
00042 * @brief Processing function for Q31 normalized LMS filter.  
00043 * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.  
00044 * @param[in] *pSrc points to the block of input data.  
00045 * @param[in] *pRef points to the block of reference data.  
00046 * @param[out] *pOut points to the block of output data.  
00047 * @param[out] *pErr points to the block of error data.  
00048 * @param[in] blockSize number of samples to process.  
00049 * @return none.  
00050 *  
00051 * <b>Scaling and Overflow Behavior:</b>  
00052 * \par  
00053 * The function is implemented using an internal 64-bit accumulator.  
00054 * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
00055 * Thus, if the accumulator result overflows it wraps around rather than clip.  
00056 * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits.  
00057 * The reference signal should not be scaled down.  
00058 * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.  
00059 * The output signal and error signal are in 1.31 format.  
00060 * 
00061 * \par 
00062 *   In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 
00063 *  
00064 */ 
00065  
00066 void arm_lms_norm_q31( 
00067   arm_lms_norm_instance_q31 * S, 
00068   q31_t * pSrc, 
00069   q31_t * pRef, 
00070   q31_t * pOut, 
00071   q31_t * pErr, 
00072   uint32_t blockSize) 
00073 { 
00074   q31_t *pState = S->pState;                     /* State pointer */ 
00075   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
00076   q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
00077   q31_t *px, *pb;                                /* Temporary pointers for state and coefficient buffers */ 
00078   q31_t mu = S->mu;                              /* Adaptive factor */ 
00079   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
00080   uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
00081   q63_t energy;                                  /* Energy of the input */ 
00082   q63_t acc;                                     /* Accumulator */ 
00083   q31_t e = 0, d = 0;                            /* error, reference data sample */ 
00084   q31_t w = 0, in;                               /* weight factor and state */ 
00085   q31_t x0;                                      /* temporary variable to hold input sample */ 
00086   uint32_t shift = 32u - ((uint32_t) S->postShift + 1u);        /* Shift to be applied to the output */ 
00087   q31_t errorXmu, oneByEnergy;                   /* Temporary variables to store error and mu product and reciprocal of energy */ 
00088   q31_t postShift;                               /* Post shift to be applied to weight after reciprocal calculation */ 
00089   q31_t coef;                                    /* Temporary variable for coef */ 
00090  
00091   energy = S->energy; 
00092   x0 = S->x0; 
00093  
00094   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 
00095   /* pStateCurnt points to the location where the new input data should be written */ 
00096   pStateCurnt = &(S->pState[(numTaps - 1u)]); 
00097  
00098   blkCnt = blockSize; 
00099  
00100   while(blkCnt > 0u) 
00101   { 
00102  
00103     /* Copy the new input sample into the state buffer */ 
00104     *pStateCurnt++ = *pSrc; 
00105  
00106     /* Initialize pState pointer */ 
00107     px = pState; 
00108  
00109     /* Initialize coeff pointer */ 
00110     pb = (pCoeffs); 
00111  
00112     /* Read the sample from input buffer */ 
00113     in = *pSrc++; 
00114  
00115     /* Update the energy calculation */ 
00116     energy = (q31_t) ((((q63_t) energy << 32) - 
00117                        (((q63_t) x0 * x0) << 1)) >> 32); 
00118     energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32); 
00119  
00120     /* Set the accumulator to zero */ 
00121     acc = 0; 
00122  
00123     /* Loop unrolling.  Process 4 taps at a time. */ 
00124     tapCnt = numTaps >> 2; 
00125  
00126     while(tapCnt > 0u) 
00127     { 
00128       /* Perform the multiply-accumulate */ 
00129       acc += ((q63_t) (*px++)) * (*pb++); 
00130       acc += ((q63_t) (*px++)) * (*pb++); 
00131       acc += ((q63_t) (*px++)) * (*pb++); 
00132       acc += ((q63_t) (*px++)) * (*pb++); 
00133  
00134       /* Decrement the loop counter */ 
00135       tapCnt--; 
00136     } 
00137  
00138     /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
00139     tapCnt = numTaps % 0x4u; 
00140  
00141     while(tapCnt > 0u) 
00142     { 
00143       /* Perform the multiply-accumulate */ 
00144       acc += ((q63_t) (*px++)) * (*pb++); 
00145  
00146       /* Decrement the loop counter */ 
00147       tapCnt--; 
00148     } 
00149  
00150     /* Converting the result to 1.31 format */ 
00151     acc = (q31_t) (acc >> shift); 
00152  
00153     /* Store the result from accumulator into the destination buffer. */ 
00154     *pOut++ = (q31_t) acc; 
00155  
00156     /* Compute and store error */ 
00157     d = *pRef++; 
00158     e = d - (q31_t) acc; 
00159     *pErr++ = e; 
00160  
00161     /* Calculates the reciprocal of energy */ 
00162     postShift = arm_recip_q31(energy + DELTA_Q31, 
00163                               &oneByEnergy, &S->recipTable[0]); 
00164  
00165     /* Calculation of product of (e * mu) */ 
00166     errorXmu = (q31_t) (((q63_t) e * mu) >> 31); 
00167  
00168     /* Weighting factor for the normalized version */ 
00169     w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift)); 
00170  
00171     /* Initialize pState pointer */ 
00172     px = pState; 
00173  
00174     /* Initialize coeff pointer */ 
00175     pb = (pCoeffs); 
00176  
00177     /* Loop unrolling.  Process 4 taps at a time. */ 
00178     tapCnt = numTaps >> 2; 
00179  
00180     /* Update filter coefficients */ 
00181     while(tapCnt > 0u) 
00182     { 
00183       /* Perform the multiply-accumulate */ 
00184  
00185       /* coef is in 2.30 format */ 
00186       coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
00187       /* get coef in 1.31 format by left shifting */ 
00188       *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
00189       /* update coefficient buffer to next coefficient */ 
00190       pb++; 
00191  
00192       coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
00193       *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
00194       pb++; 
00195  
00196       coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
00197       *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
00198       pb++; 
00199  
00200       coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
00201       *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
00202       pb++; 
00203  
00204       /* Decrement the loop counter */ 
00205       tapCnt--; 
00206     } 
00207  
00208     /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
00209     tapCnt = numTaps % 0x4u; 
00210  
00211     while(tapCnt > 0u) 
00212     { 
00213       /* Perform the multiply-accumulate */ 
00214       coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
00215       *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
00216       pb++; 
00217  
00218       /* Decrement the loop counter */ 
00219       tapCnt--; 
00220     } 
00221  
00222     /* Read the sample from state buffer */ 
00223     x0 = *pState; 
00224  
00225     /* Advance state pointer by 1 for the next sample */ 
00226     pState = pState + 1; 
00227  
00228     /* Decrement the loop counter */ 
00229     blkCnt--; 
00230   } 
00231  
00232   /* Save energy and x0 values for the next frame */ 
00233   S->energy = (q31_t) energy; 
00234   S->x0 = x0; 
00235  
00236   /* Processing is complete. Now copy the last numTaps - 1 samples to the  
00237      satrt of the state buffer. This prepares the state buffer for the  
00238      next function call. */ 
00239  
00240   /* Points to the start of the pState buffer */ 
00241   pStateCurnt = S->pState; 
00242  
00243   /* Loop unrolling for (numTaps - 1u) samples copy */ 
00244   tapCnt = (numTaps - 1u) >> 2u; 
00245  
00246   /* copy data */ 
00247   while(tapCnt > 0u) 
00248   { 
00249     *pStateCurnt++ = *pState++; 
00250     *pStateCurnt++ = *pState++; 
00251     *pStateCurnt++ = *pState++; 
00252     *pStateCurnt++ = *pState++; 
00253  
00254     /* Decrement the loop counter */ 
00255     tapCnt--; 
00256   } 
00257  
00258   /* Calculate remaining number of copies */ 
00259   tapCnt = (numTaps - 1u) % 0x4u; 
00260  
00261   /* Copy the remaining q31_t data */ 
00262   while(tapCnt > 0u) 
00263   { 
00264     *pStateCurnt++ = *pState++; 
00265  
00266     /* Decrement the loop counter */ 
00267     tapCnt--; 
00268   } 
00269  
00270 } 
00271  
00272 /**  
00273  * @} end of LMS_NORM group  
00274  */