Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_lms_norm_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_lms_norm_q15.c 00009 * 00010 * Description: Q15 NLMS filter. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup LMS_NORM 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for Q15 normalized LMS filter. 00054 * @param[in] *S points to an instance of the Q15 normalized LMS filter structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[in] *pRef points to the block of reference data. 00057 * @param[out] *pOut points to the block of output data. 00058 * @param[out] *pErr points to the block of error data. 00059 * @param[in] blockSize number of samples to process. 00060 * @return none. 00061 * 00062 * <b>Scaling and Overflow Behavior:</b> 00063 * \par 00064 * The function is implemented using a 64-bit internal accumulator. 00065 * Both coefficients and state variables are represented in 1.15 format and 00066 * multiplications yield a 2.30 result. The 2.30 intermediate results are 00067 * accumulated in a 64-bit accumulator in 34.30 format. 00068 * There is no risk of internal overflow with this approach and the full 00069 * precision of intermediate multiplications is preserved. After all additions 00070 * have been performed, the accumulator is truncated to 34.15 format by 00071 * discarding low 15 bits. Lastly, the accumulator is saturated to yield a 00072 * result in 1.15 format. 00073 * 00074 * \par 00075 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 00076 * 00077 */ 00078 00079 void arm_lms_norm_q15( 00080 arm_lms_norm_instance_q15 * S, 00081 q15_t * pSrc, 00082 q15_t * pRef, 00083 q15_t * pOut, 00084 q15_t * pErr, 00085 uint32_t blockSize) 00086 { 00087 q15_t *pState = S->pState; /* State pointer */ 00088 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00089 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00090 q15_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00091 q15_t mu = S->mu; /* Adaptive factor */ 00092 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00093 uint32_t tapCnt, blkCnt; /* Loop counters */ 00094 q31_t energy; /* Energy of the input */ 00095 q63_t acc; /* Accumulator */ 00096 q15_t e = 0, d = 0; /* error, reference data sample */ 00097 q15_t w = 0, in; /* weight factor and state */ 00098 q15_t x0; /* temporary variable to hold input sample */ 00099 //uint32_t shift = (uint32_t) S->postShift + 1u; /* Shift to be applied to the output */ 00100 q15_t errorXmu, oneByEnergy; /* Temporary variables to store error and mu product and reciprocal of energy */ 00101 q15_t postShift; /* Post shift to be applied to weight after reciprocal calculation */ 00102 q31_t coef; /* Teporary variable for coefficient */ 00103 q31_t acc_l, acc_h; 00104 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */ 00105 int32_t uShift = (32 - lShift); 00106 00107 energy = S->energy; 00108 x0 = S->x0; 00109 00110 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00111 /* pStateCurnt points to the location where the new input data should be written */ 00112 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00113 00114 /* Loop over blockSize number of values */ 00115 blkCnt = blockSize; 00116 00117 00118 #ifndef ARM_MATH_CM0_FAMILY 00119 00120 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00121 00122 while(blkCnt > 0u) 00123 { 00124 /* Copy the new input sample into the state buffer */ 00125 *pStateCurnt++ = *pSrc; 00126 00127 /* Initialize pState pointer */ 00128 px = pState; 00129 00130 /* Initialize coeff pointer */ 00131 pb = (pCoeffs); 00132 00133 /* Read the sample from input buffer */ 00134 in = *pSrc++; 00135 00136 /* Update the energy calculation */ 00137 energy -= (((q31_t) x0 * (x0)) >> 15); 00138 energy += (((q31_t) in * (in)) >> 15); 00139 00140 /* Set the accumulator to zero */ 00141 acc = 0; 00142 00143 /* Loop unrolling. Process 4 taps at a time. */ 00144 tapCnt = numTaps >> 2; 00145 00146 while(tapCnt > 0u) 00147 { 00148 00149 /* Perform the multiply-accumulate */ 00150 #ifndef UNALIGNED_SUPPORT_DISABLE 00151 00152 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 00153 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 00154 00155 #else 00156 00157 acc += (((q31_t) * px++ * (*pb++))); 00158 acc += (((q31_t) * px++ * (*pb++))); 00159 acc += (((q31_t) * px++ * (*pb++))); 00160 acc += (((q31_t) * px++ * (*pb++))); 00161 00162 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00163 00164 /* Decrement the loop counter */ 00165 tapCnt--; 00166 } 00167 00168 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00169 tapCnt = numTaps % 0x4u; 00170 00171 while(tapCnt > 0u) 00172 { 00173 /* Perform the multiply-accumulate */ 00174 acc += (((q31_t) * px++ * (*pb++))); 00175 00176 /* Decrement the loop counter */ 00177 tapCnt--; 00178 } 00179 00180 /* Calc lower part of acc */ 00181 acc_l = acc & 0xffffffff; 00182 00183 /* Calc upper part of acc */ 00184 acc_h = (acc >> 32) & 0xffffffff; 00185 00186 /* Apply shift for lower part of acc and upper part of acc */ 00187 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00188 00189 /* Converting the result to 1.15 format and saturate the output */ 00190 acc = __SSAT(acc, 16u); 00191 00192 /* Store the result from accumulator into the destination buffer. */ 00193 *pOut++ = (q15_t) acc; 00194 00195 /* Compute and store error */ 00196 d = *pRef++; 00197 e = d - (q15_t) acc; 00198 *pErr++ = e; 00199 00200 /* Calculation of 1/energy */ 00201 postShift = arm_recip_q15((q15_t) energy + DELTA_Q15, 00202 &oneByEnergy, S->recipTable); 00203 00204 /* Calculation of e * mu value */ 00205 errorXmu = (q15_t) (((q31_t) e * mu) >> 15); 00206 00207 /* Calculation of (e * mu) * (1/energy) value */ 00208 acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift)); 00209 00210 /* Weighting factor for the normalized version */ 00211 w = (q15_t) __SSAT((q31_t) acc, 16); 00212 00213 /* Initialize pState pointer */ 00214 px = pState; 00215 00216 /* Initialize coeff pointer */ 00217 pb = (pCoeffs); 00218 00219 /* Loop unrolling. Process 4 taps at a time. */ 00220 tapCnt = numTaps >> 2; 00221 00222 /* Update filter coefficients */ 00223 while(tapCnt > 0u) 00224 { 00225 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00226 *pb++ = (q15_t) __SSAT((coef), 16); 00227 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00228 *pb++ = (q15_t) __SSAT((coef), 16); 00229 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00230 *pb++ = (q15_t) __SSAT((coef), 16); 00231 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00232 *pb++ = (q15_t) __SSAT((coef), 16); 00233 00234 /* Decrement the loop counter */ 00235 tapCnt--; 00236 } 00237 00238 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00239 tapCnt = numTaps % 0x4u; 00240 00241 while(tapCnt > 0u) 00242 { 00243 /* Perform the multiply-accumulate */ 00244 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00245 *pb++ = (q15_t) __SSAT((coef), 16); 00246 00247 /* Decrement the loop counter */ 00248 tapCnt--; 00249 } 00250 00251 /* Read the sample from state buffer */ 00252 x0 = *pState; 00253 00254 /* Advance state pointer by 1 for the next sample */ 00255 pState = pState + 1u; 00256 00257 /* Decrement the loop counter */ 00258 blkCnt--; 00259 } 00260 00261 /* Save energy and x0 values for the next frame */ 00262 S->energy = (q15_t) energy; 00263 S->x0 = x0; 00264 00265 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00266 satrt of the state buffer. This prepares the state buffer for the 00267 next function call. */ 00268 00269 /* Points to the start of the pState buffer */ 00270 pStateCurnt = S->pState; 00271 00272 /* Calculation of count for copying integer writes */ 00273 tapCnt = (numTaps - 1u) >> 2; 00274 00275 while(tapCnt > 0u) 00276 { 00277 00278 #ifndef UNALIGNED_SUPPORT_DISABLE 00279 00280 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00281 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00282 00283 #else 00284 00285 *pStateCurnt++ = *pState++; 00286 *pStateCurnt++ = *pState++; 00287 *pStateCurnt++ = *pState++; 00288 *pStateCurnt++ = *pState++; 00289 00290 #endif 00291 00292 tapCnt--; 00293 00294 } 00295 00296 /* Calculation of count for remaining q15_t data */ 00297 tapCnt = (numTaps - 1u) % 0x4u; 00298 00299 /* copy data */ 00300 while(tapCnt > 0u) 00301 { 00302 *pStateCurnt++ = *pState++; 00303 00304 /* Decrement the loop counter */ 00305 tapCnt--; 00306 } 00307 00308 #else 00309 00310 /* Run the below code for Cortex-M0 */ 00311 00312 while(blkCnt > 0u) 00313 { 00314 /* Copy the new input sample into the state buffer */ 00315 *pStateCurnt++ = *pSrc; 00316 00317 /* Initialize pState pointer */ 00318 px = pState; 00319 00320 /* Initialize pCoeffs pointer */ 00321 pb = pCoeffs; 00322 00323 /* Read the sample from input buffer */ 00324 in = *pSrc++; 00325 00326 /* Update the energy calculation */ 00327 energy -= (((q31_t) x0 * (x0)) >> 15); 00328 energy += (((q31_t) in * (in)) >> 15); 00329 00330 /* Set the accumulator to zero */ 00331 acc = 0; 00332 00333 /* Loop over numTaps number of values */ 00334 tapCnt = numTaps; 00335 00336 while(tapCnt > 0u) 00337 { 00338 /* Perform the multiply-accumulate */ 00339 acc += (((q31_t) * px++ * (*pb++))); 00340 00341 /* Decrement the loop counter */ 00342 tapCnt--; 00343 } 00344 00345 /* Calc lower part of acc */ 00346 acc_l = acc & 0xffffffff; 00347 00348 /* Calc upper part of acc */ 00349 acc_h = (acc >> 32) & 0xffffffff; 00350 00351 /* Apply shift for lower part of acc and upper part of acc */ 00352 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00353 00354 /* Converting the result to 1.15 format and saturate the output */ 00355 acc = __SSAT(acc, 16u); 00356 00357 /* Converting the result to 1.15 format */ 00358 //acc = __SSAT((acc >> (16u - shift)), 16u); 00359 00360 /* Store the result from accumulator into the destination buffer. */ 00361 *pOut++ = (q15_t) acc; 00362 00363 /* Compute and store error */ 00364 d = *pRef++; 00365 e = d - (q15_t) acc; 00366 *pErr++ = e; 00367 00368 /* Calculation of 1/energy */ 00369 postShift = arm_recip_q15((q15_t) energy + DELTA_Q15, 00370 &oneByEnergy, S->recipTable); 00371 00372 /* Calculation of e * mu value */ 00373 errorXmu = (q15_t) (((q31_t) e * mu) >> 15); 00374 00375 /* Calculation of (e * mu) * (1/energy) value */ 00376 acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift)); 00377 00378 /* Weighting factor for the normalized version */ 00379 w = (q15_t) __SSAT((q31_t) acc, 16); 00380 00381 /* Initialize pState pointer */ 00382 px = pState; 00383 00384 /* Initialize coeff pointer */ 00385 pb = (pCoeffs); 00386 00387 /* Loop over numTaps number of values */ 00388 tapCnt = numTaps; 00389 00390 while(tapCnt > 0u) 00391 { 00392 /* Perform the multiply-accumulate */ 00393 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00394 *pb++ = (q15_t) __SSAT((coef), 16); 00395 00396 /* Decrement the loop counter */ 00397 tapCnt--; 00398 } 00399 00400 /* Read the sample from state buffer */ 00401 x0 = *pState; 00402 00403 /* Advance state pointer by 1 for the next sample */ 00404 pState = pState + 1u; 00405 00406 /* Decrement the loop counter */ 00407 blkCnt--; 00408 } 00409 00410 /* Save energy and x0 values for the next frame */ 00411 S->energy = (q15_t) energy; 00412 S->x0 = x0; 00413 00414 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00415 satrt of the state buffer. This prepares the state buffer for the 00416 next function call. */ 00417 00418 /* Points to the start of the pState buffer */ 00419 pStateCurnt = S->pState; 00420 00421 /* copy (numTaps - 1u) data */ 00422 tapCnt = (numTaps - 1u); 00423 00424 /* copy data */ 00425 while(tapCnt > 0u) 00426 { 00427 *pStateCurnt++ = *pState++; 00428 00429 /* Decrement the loop counter */ 00430 tapCnt--; 00431 } 00432 00433 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00434 00435 } 00436 00437 00438 /** 00439 * @} end of LMS_NORM group 00440 */
Generated on Tue Jul 12 2022 18:44:09 by
1.7.2
