CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details
Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more
arm_lms_norm_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_lms_norm_q31.c 00009 * 00010 * Description: Processing function for the Q31 NLMS filter. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * -------------------------------------------------------------------- */ 00029 00030 #include "arm_math.h" 00031 00032 /** 00033 * @ingroup groupFilters 00034 */ 00035 00036 /** 00037 * @addtogroup LMS_NORM 00038 * @{ 00039 */ 00040 00041 /** 00042 * @brief Processing function for Q31 normalized LMS filter. 00043 * @param[in] *S points to an instance of the Q31 normalized LMS filter structure. 00044 * @param[in] *pSrc points to the block of input data. 00045 * @param[in] *pRef points to the block of reference data. 00046 * @param[out] *pOut points to the block of output data. 00047 * @param[out] *pErr points to the block of error data. 00048 * @param[in] blockSize number of samples to process. 00049 * @return none. 00050 * 00051 * <b>Scaling and Overflow Behavior:</b> 00052 * \par 00053 * The function is implemented using an internal 64-bit accumulator. 00054 * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. 00055 * Thus, if the accumulator result overflows it wraps around rather than clip. 00056 * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits. 00057 * The reference signal should not be scaled down. 00058 * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format. 00059 * The output signal and error signal are in 1.31 format. 00060 * 00061 * \par 00062 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 00063 * 00064 */ 00065 00066 void arm_lms_norm_q31( 00067 arm_lms_norm_instance_q31 * S, 00068 q31_t * pSrc, 00069 q31_t * pRef, 00070 q31_t * pOut, 00071 q31_t * pErr, 00072 uint32_t blockSize) 00073 { 00074 q31_t *pState = S->pState; /* State pointer */ 00075 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00076 q31_t *pStateCurnt; /* Points to the current sample of the state */ 00077 q31_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00078 q31_t mu = S->mu; /* Adaptive factor */ 00079 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00080 uint32_t tapCnt, blkCnt; /* Loop counters */ 00081 q63_t energy; /* Energy of the input */ 00082 q63_t acc; /* Accumulator */ 00083 q31_t e = 0, d = 0; /* error, reference data sample */ 00084 q31_t w = 0, in; /* weight factor and state */ 00085 q31_t x0; /* temporary variable to hold input sample */ 00086 uint32_t shift = 32u - ((uint32_t) S->postShift + 1u); /* Shift to be applied to the output */ 00087 q31_t errorXmu, oneByEnergy; /* Temporary variables to store error and mu product and reciprocal of energy */ 00088 q31_t postShift; /* Post shift to be applied to weight after reciprocal calculation */ 00089 q31_t coef; /* Temporary variable for coef */ 00090 00091 energy = S->energy; 00092 x0 = S->x0; 00093 00094 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00095 /* pStateCurnt points to the location where the new input data should be written */ 00096 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00097 00098 blkCnt = blockSize; 00099 00100 while(blkCnt > 0u) 00101 { 00102 00103 /* Copy the new input sample into the state buffer */ 00104 *pStateCurnt++ = *pSrc; 00105 00106 /* Initialize pState pointer */ 00107 px = pState; 00108 00109 /* Initialize coeff pointer */ 00110 pb = (pCoeffs); 00111 00112 /* Read the sample from input buffer */ 00113 in = *pSrc++; 00114 00115 /* Update the energy calculation */ 00116 energy = (q31_t) ((((q63_t) energy << 32) - 00117 (((q63_t) x0 * x0) << 1)) >> 32); 00118 energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32); 00119 00120 /* Set the accumulator to zero */ 00121 acc = 0; 00122 00123 /* Loop unrolling. Process 4 taps at a time. */ 00124 tapCnt = numTaps >> 2; 00125 00126 while(tapCnt > 0u) 00127 { 00128 /* Perform the multiply-accumulate */ 00129 acc += ((q63_t) (*px++)) * (*pb++); 00130 acc += ((q63_t) (*px++)) * (*pb++); 00131 acc += ((q63_t) (*px++)) * (*pb++); 00132 acc += ((q63_t) (*px++)) * (*pb++); 00133 00134 /* Decrement the loop counter */ 00135 tapCnt--; 00136 } 00137 00138 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00139 tapCnt = numTaps % 0x4u; 00140 00141 while(tapCnt > 0u) 00142 { 00143 /* Perform the multiply-accumulate */ 00144 acc += ((q63_t) (*px++)) * (*pb++); 00145 00146 /* Decrement the loop counter */ 00147 tapCnt--; 00148 } 00149 00150 /* Converting the result to 1.31 format */ 00151 acc = (q31_t) (acc >> shift); 00152 00153 /* Store the result from accumulator into the destination buffer. */ 00154 *pOut++ = (q31_t) acc; 00155 00156 /* Compute and store error */ 00157 d = *pRef++; 00158 e = d - (q31_t) acc; 00159 *pErr++ = e; 00160 00161 /* Calculates the reciprocal of energy */ 00162 postShift = arm_recip_q31(energy + DELTA_Q31, 00163 &oneByEnergy, &S->recipTable[0]); 00164 00165 /* Calculation of product of (e * mu) */ 00166 errorXmu = (q31_t) (((q63_t) e * mu) >> 31); 00167 00168 /* Weighting factor for the normalized version */ 00169 w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift)); 00170 00171 /* Initialize pState pointer */ 00172 px = pState; 00173 00174 /* Initialize coeff pointer */ 00175 pb = (pCoeffs); 00176 00177 /* Loop unrolling. Process 4 taps at a time. */ 00178 tapCnt = numTaps >> 2; 00179 00180 /* Update filter coefficients */ 00181 while(tapCnt > 0u) 00182 { 00183 /* Perform the multiply-accumulate */ 00184 00185 /* coef is in 2.30 format */ 00186 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00187 /* get coef in 1.31 format by left shifting */ 00188 *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 00189 /* update coefficient buffer to next coefficient */ 00190 pb++; 00191 00192 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00193 *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 00194 pb++; 00195 00196 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00197 *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 00198 pb++; 00199 00200 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00201 *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 00202 pb++; 00203 00204 /* Decrement the loop counter */ 00205 tapCnt--; 00206 } 00207 00208 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00209 tapCnt = numTaps % 0x4u; 00210 00211 while(tapCnt > 0u) 00212 { 00213 /* Perform the multiply-accumulate */ 00214 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00215 *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 00216 pb++; 00217 00218 /* Decrement the loop counter */ 00219 tapCnt--; 00220 } 00221 00222 /* Read the sample from state buffer */ 00223 x0 = *pState; 00224 00225 /* Advance state pointer by 1 for the next sample */ 00226 pState = pState + 1; 00227 00228 /* Decrement the loop counter */ 00229 blkCnt--; 00230 } 00231 00232 /* Save energy and x0 values for the next frame */ 00233 S->energy = (q31_t) energy; 00234 S->x0 = x0; 00235 00236 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00237 satrt of the state buffer. This prepares the state buffer for the 00238 next function call. */ 00239 00240 /* Points to the start of the pState buffer */ 00241 pStateCurnt = S->pState; 00242 00243 /* Loop unrolling for (numTaps - 1u) samples copy */ 00244 tapCnt = (numTaps - 1u) >> 2u; 00245 00246 /* copy data */ 00247 while(tapCnt > 0u) 00248 { 00249 *pStateCurnt++ = *pState++; 00250 *pStateCurnt++ = *pState++; 00251 *pStateCurnt++ = *pState++; 00252 *pStateCurnt++ = *pState++; 00253 00254 /* Decrement the loop counter */ 00255 tapCnt--; 00256 } 00257 00258 /* Calculate remaining number of copies */ 00259 tapCnt = (numTaps - 1u) % 0x4u; 00260 00261 /* Copy the remaining q31_t data */ 00262 while(tapCnt > 0u) 00263 { 00264 *pStateCurnt++ = *pState++; 00265 00266 /* Decrement the loop counter */ 00267 tapCnt--; 00268 } 00269 00270 } 00271 00272 /** 00273 * @} end of LMS_NORM group 00274 */
Generated on Tue Jul 12 2022 14:13:53 by 1.7.2