Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_lms_norm_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_lms_norm_q31.c 00009 * 00010 * Description: Processing function for the Q31 NLMS filter. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup LMS_NORM 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for Q31 normalized LMS filter. 00054 * @param[in] *S points to an instance of the Q31 normalized LMS filter structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[in] *pRef points to the block of reference data. 00057 * @param[out] *pOut points to the block of output data. 00058 * @param[out] *pErr points to the block of error data. 00059 * @param[in] blockSize number of samples to process. 00060 * @return none. 00061 * 00062 * <b>Scaling and Overflow Behavior:</b> 00063 * \par 00064 * The function is implemented using an internal 64-bit accumulator. 00065 * The accumulator has a 2.62 format and maintains full precision of the intermediate 00066 * multiplication results but provides only a single guard bit. 00067 * Thus, if the accumulator result overflows it wraps around rather than clip. 00068 * In order to avoid overflows completely the input signal must be scaled down by 00069 * log2(numTaps) bits. The reference signal should not be scaled down. 00070 * After all multiply-accumulates are performed, the 2.62 accumulator is shifted 00071 * and saturated to 1.31 format to yield the final result. 00072 * The output signal and error signal are in 1.31 format. 00073 * 00074 * \par 00075 * In this filter, filter coefficients are updated for each sample and the 00076 * updation of filter cofficients are saturted. 00077 * 00078 */ 00079 00080 void arm_lms_norm_q31( 00081 arm_lms_norm_instance_q31 * S, 00082 q31_t * pSrc, 00083 q31_t * pRef, 00084 q31_t * pOut, 00085 q31_t * pErr, 00086 uint32_t blockSize) 00087 { 00088 q31_t *pState = S->pState; /* State pointer */ 00089 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00090 q31_t *pStateCurnt; /* Points to the current sample of the state */ 00091 q31_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00092 q31_t mu = S->mu; /* Adaptive factor */ 00093 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00094 uint32_t tapCnt, blkCnt; /* Loop counters */ 00095 q63_t energy; /* Energy of the input */ 00096 q63_t acc; /* Accumulator */ 00097 q31_t e = 0, d = 0; /* error, reference data sample */ 00098 q31_t w = 0, in; /* weight factor and state */ 00099 q31_t x0; /* temporary variable to hold input sample */ 00100 // uint32_t shift = 32u - ((uint32_t) S->postShift + 1u); /* Shift to be applied to the output */ 00101 q31_t errorXmu, oneByEnergy; /* Temporary variables to store error and mu product and reciprocal of energy */ 00102 q31_t postShift; /* Post shift to be applied to weight after reciprocal calculation */ 00103 q31_t coef; /* Temporary variable for coef */ 00104 q31_t acc_l, acc_h; /* temporary input */ 00105 uint32_t uShift = ((uint32_t) S->postShift + 1u); 00106 uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */ 00107 00108 energy = S->energy; 00109 x0 = S->x0; 00110 00111 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00112 /* pStateCurnt points to the location where the new input data should be written */ 00113 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00114 00115 /* Loop over blockSize number of values */ 00116 blkCnt = blockSize; 00117 00118 00119 #ifndef ARM_MATH_CM0_FAMILY 00120 00121 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00122 00123 while(blkCnt > 0u) 00124 { 00125 00126 /* Copy the new input sample into the state buffer */ 00127 *pStateCurnt++ = *pSrc; 00128 00129 /* Initialize pState pointer */ 00130 px = pState; 00131 00132 /* Initialize coeff pointer */ 00133 pb = (pCoeffs); 00134 00135 /* Read the sample from input buffer */ 00136 in = *pSrc++; 00137 00138 /* Update the energy calculation */ 00139 energy = (q31_t) ((((q63_t) energy << 32) - 00140 (((q63_t) x0 * x0) << 1)) >> 32); 00141 energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32); 00142 00143 /* Set the accumulator to zero */ 00144 acc = 0; 00145 00146 /* Loop unrolling. Process 4 taps at a time. */ 00147 tapCnt = numTaps >> 2; 00148 00149 while(tapCnt > 0u) 00150 { 00151 /* Perform the multiply-accumulate */ 00152 acc += ((q63_t) (*px++)) * (*pb++); 00153 acc += ((q63_t) (*px++)) * (*pb++); 00154 acc += ((q63_t) (*px++)) * (*pb++); 00155 acc += ((q63_t) (*px++)) * (*pb++); 00156 00157 /* Decrement the loop counter */ 00158 tapCnt--; 00159 } 00160 00161 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00162 tapCnt = numTaps % 0x4u; 00163 00164 while(tapCnt > 0u) 00165 { 00166 /* Perform the multiply-accumulate */ 00167 acc += ((q63_t) (*px++)) * (*pb++); 00168 00169 /* Decrement the loop counter */ 00170 tapCnt--; 00171 } 00172 00173 /* Converting the result to 1.31 format */ 00174 /* Calc lower part of acc */ 00175 acc_l = acc & 0xffffffff; 00176 00177 /* Calc upper part of acc */ 00178 acc_h = (acc >> 32) & 0xffffffff; 00179 00180 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00181 00182 /* Store the result from accumulator into the destination buffer. */ 00183 *pOut++ = (q31_t) acc; 00184 00185 /* Compute and store error */ 00186 d = *pRef++; 00187 e = d - (q31_t) acc; 00188 *pErr++ = e; 00189 00190 /* Calculates the reciprocal of energy */ 00191 postShift = arm_recip_q31(energy + DELTA_Q31, 00192 &oneByEnergy, &S->recipTable[0]); 00193 00194 /* Calculation of product of (e * mu) */ 00195 errorXmu = (q31_t) (((q63_t) e * mu) >> 31); 00196 00197 /* Weighting factor for the normalized version */ 00198 w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift)); 00199 00200 /* Initialize pState pointer */ 00201 px = pState; 00202 00203 /* Initialize coeff pointer */ 00204 pb = (pCoeffs); 00205 00206 /* Loop unrolling. Process 4 taps at a time. */ 00207 tapCnt = numTaps >> 2; 00208 00209 /* Update filter coefficients */ 00210 while(tapCnt > 0u) 00211 { 00212 /* Perform the multiply-accumulate */ 00213 00214 /* coef is in 2.30 format */ 00215 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00216 /* get coef in 1.31 format by left shifting */ 00217 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00218 /* update coefficient buffer to next coefficient */ 00219 pb++; 00220 00221 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00222 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00223 pb++; 00224 00225 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00226 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00227 pb++; 00228 00229 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00230 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00231 pb++; 00232 00233 /* Decrement the loop counter */ 00234 tapCnt--; 00235 } 00236 00237 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00238 tapCnt = numTaps % 0x4u; 00239 00240 while(tapCnt > 0u) 00241 { 00242 /* Perform the multiply-accumulate */ 00243 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00244 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00245 pb++; 00246 00247 /* Decrement the loop counter */ 00248 tapCnt--; 00249 } 00250 00251 /* Read the sample from state buffer */ 00252 x0 = *pState; 00253 00254 /* Advance state pointer by 1 for the next sample */ 00255 pState = pState + 1; 00256 00257 /* Decrement the loop counter */ 00258 blkCnt--; 00259 } 00260 00261 /* Save energy and x0 values for the next frame */ 00262 S->energy = (q31_t) energy; 00263 S->x0 = x0; 00264 00265 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00266 satrt of the state buffer. This prepares the state buffer for the 00267 next function call. */ 00268 00269 /* Points to the start of the pState buffer */ 00270 pStateCurnt = S->pState; 00271 00272 /* Loop unrolling for (numTaps - 1u) samples copy */ 00273 tapCnt = (numTaps - 1u) >> 2u; 00274 00275 /* copy data */ 00276 while(tapCnt > 0u) 00277 { 00278 *pStateCurnt++ = *pState++; 00279 *pStateCurnt++ = *pState++; 00280 *pStateCurnt++ = *pState++; 00281 *pStateCurnt++ = *pState++; 00282 00283 /* Decrement the loop counter */ 00284 tapCnt--; 00285 } 00286 00287 /* Calculate remaining number of copies */ 00288 tapCnt = (numTaps - 1u) % 0x4u; 00289 00290 /* Copy the remaining q31_t data */ 00291 while(tapCnt > 0u) 00292 { 00293 *pStateCurnt++ = *pState++; 00294 00295 /* Decrement the loop counter */ 00296 tapCnt--; 00297 } 00298 00299 #else 00300 00301 /* Run the below code for Cortex-M0 */ 00302 00303 while(blkCnt > 0u) 00304 { 00305 00306 /* Copy the new input sample into the state buffer */ 00307 *pStateCurnt++ = *pSrc; 00308 00309 /* Initialize pState pointer */ 00310 px = pState; 00311 00312 /* Initialize pCoeffs pointer */ 00313 pb = pCoeffs; 00314 00315 /* Read the sample from input buffer */ 00316 in = *pSrc++; 00317 00318 /* Update the energy calculation */ 00319 energy = 00320 (q31_t) ((((q63_t) energy << 32) - (((q63_t) x0 * x0) << 1)) >> 32); 00321 energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32); 00322 00323 /* Set the accumulator to zero */ 00324 acc = 0; 00325 00326 /* Loop over numTaps number of values */ 00327 tapCnt = numTaps; 00328 00329 while(tapCnt > 0u) 00330 { 00331 /* Perform the multiply-accumulate */ 00332 acc += ((q63_t) (*px++)) * (*pb++); 00333 00334 /* Decrement the loop counter */ 00335 tapCnt--; 00336 } 00337 00338 /* Converting the result to 1.31 format */ 00339 /* Converting the result to 1.31 format */ 00340 /* Calc lower part of acc */ 00341 acc_l = acc & 0xffffffff; 00342 00343 /* Calc upper part of acc */ 00344 acc_h = (acc >> 32) & 0xffffffff; 00345 00346 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00347 00348 00349 //acc = (q31_t) (acc >> shift); 00350 00351 /* Store the result from accumulator into the destination buffer. */ 00352 *pOut++ = (q31_t) acc; 00353 00354 /* Compute and store error */ 00355 d = *pRef++; 00356 e = d - (q31_t) acc; 00357 *pErr++ = e; 00358 00359 /* Calculates the reciprocal of energy */ 00360 postShift = 00361 arm_recip_q31(energy + DELTA_Q31, &oneByEnergy, &S->recipTable[0]); 00362 00363 /* Calculation of product of (e * mu) */ 00364 errorXmu = (q31_t) (((q63_t) e * mu) >> 31); 00365 00366 /* Weighting factor for the normalized version */ 00367 w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift)); 00368 00369 /* Initialize pState pointer */ 00370 px = pState; 00371 00372 /* Initialize coeff pointer */ 00373 pb = (pCoeffs); 00374 00375 /* Loop over numTaps number of values */ 00376 tapCnt = numTaps; 00377 00378 while(tapCnt > 0u) 00379 { 00380 /* Perform the multiply-accumulate */ 00381 /* coef is in 2.30 format */ 00382 coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 00383 /* get coef in 1.31 format by left shifting */ 00384 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00385 /* update coefficient buffer to next coefficient */ 00386 pb++; 00387 00388 /* Decrement the loop counter */ 00389 tapCnt--; 00390 } 00391 00392 /* Read the sample from state buffer */ 00393 x0 = *pState; 00394 00395 /* Advance state pointer by 1 for the next sample */ 00396 pState = pState + 1; 00397 00398 /* Decrement the loop counter */ 00399 blkCnt--; 00400 } 00401 00402 /* Save energy and x0 values for the next frame */ 00403 S->energy = (q31_t) energy; 00404 S->x0 = x0; 00405 00406 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00407 start of the state buffer. This prepares the state buffer for the 00408 next function call. */ 00409 00410 /* Points to the start of the pState buffer */ 00411 pStateCurnt = S->pState; 00412 00413 /* Loop for (numTaps - 1u) samples copy */ 00414 tapCnt = (numTaps - 1u); 00415 00416 /* Copy the remaining q31_t data */ 00417 while(tapCnt > 0u) 00418 { 00419 *pStateCurnt++ = *pState++; 00420 00421 /* Decrement the loop counter */ 00422 tapCnt--; 00423 } 00424 00425 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00426 00427 } 00428 00429 /** 00430 * @} end of LMS_NORM group 00431 */
Generated on Tue Jul 12 2022 18:44:09 by
1.7.2
