CMSIS DSP library
Dependents: performance_timer Surfboard_ gps2rtty Capstone ... more
arm_lms_norm_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_lms_norm_q15.c 00009 * 00010 * Description: Q15 NLMS filter. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupFilters 00045 */ 00046 00047 /** 00048 * @addtogroup LMS_NORM 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Processing function for Q15 normalized LMS filter. 00054 * @param[in] *S points to an instance of the Q15 normalized LMS filter structure. 00055 * @param[in] *pSrc points to the block of input data. 00056 * @param[in] *pRef points to the block of reference data. 00057 * @param[out] *pOut points to the block of output data. 00058 * @param[out] *pErr points to the block of error data. 00059 * @param[in] blockSize number of samples to process. 00060 * @return none. 00061 * 00062 * <b>Scaling and Overflow Behavior:</b> 00063 * \par 00064 * The function is implemented using a 64-bit internal accumulator. 00065 * Both coefficients and state variables are represented in 1.15 format and 00066 * multiplications yield a 2.30 result. The 2.30 intermediate results are 00067 * accumulated in a 64-bit accumulator in 34.30 format. 00068 * There is no risk of internal overflow with this approach and the full 00069 * precision of intermediate multiplications is preserved. After all additions 00070 * have been performed, the accumulator is truncated to 34.15 format by 00071 * discarding low 15 bits. Lastly, the accumulator is saturated to yield a 00072 * result in 1.15 format. 00073 * 00074 * \par 00075 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 00076 * 00077 */ 00078 00079 void arm_lms_norm_q15( 00080 arm_lms_norm_instance_q15 * S, 00081 q15_t * pSrc, 00082 q15_t * pRef, 00083 q15_t * pOut, 00084 q15_t * pErr, 00085 uint32_t blockSize) 00086 { 00087 q15_t *pState = S->pState; /* State pointer */ 00088 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00089 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00090 q15_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00091 q15_t mu = S->mu; /* Adaptive factor */ 00092 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00093 uint32_t tapCnt, blkCnt; /* Loop counters */ 00094 q31_t energy; /* Energy of the input */ 00095 q63_t acc; /* Accumulator */ 00096 q15_t e = 0, d = 0; /* error, reference data sample */ 00097 q15_t w = 0, in; /* weight factor and state */ 00098 q15_t x0; /* temporary variable to hold input sample */ 00099 //uint32_t shift = (uint32_t) S->postShift + 1u; /* Shift to be applied to the output */ 00100 q15_t errorXmu, oneByEnergy; /* Temporary variables to store error and mu product and reciprocal of energy */ 00101 q15_t postShift; /* Post shift to be applied to weight after reciprocal calculation */ 00102 q31_t coef; /* Teporary variable for coefficient */ 00103 q31_t acc_l, acc_h; 00104 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */ 00105 int32_t uShift = (32 - lShift); 00106 00107 energy = S->energy; 00108 x0 = S->x0; 00109 00110 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00111 /* pStateCurnt points to the location where the new input data should be written */ 00112 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00113 00114 /* Loop over blockSize number of values */ 00115 blkCnt = blockSize; 00116 00117 00118 #ifndef ARM_MATH_CM0_FAMILY 00119 00120 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00121 00122 while(blkCnt > 0u) 00123 { 00124 /* Copy the new input sample into the state buffer */ 00125 *pStateCurnt++ = *pSrc; 00126 00127 /* Initialize pState pointer */ 00128 px = pState; 00129 00130 /* Initialize coeff pointer */ 00131 pb = (pCoeffs); 00132 00133 /* Read the sample from input buffer */ 00134 in = *pSrc++; 00135 00136 /* Update the energy calculation */ 00137 energy -= (((q31_t) x0 * (x0)) >> 15); 00138 energy += (((q31_t) in * (in)) >> 15); 00139 00140 /* Set the accumulator to zero */ 00141 acc = 0; 00142 00143 /* Loop unrolling. Process 4 taps at a time. */ 00144 tapCnt = numTaps >> 2; 00145 00146 while(tapCnt > 0u) 00147 { 00148 00149 /* Perform the multiply-accumulate */ 00150 #ifndef UNALIGNED_SUPPORT_DISABLE 00151 00152 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 00153 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 00154 00155 #else 00156 00157 acc += (((q31_t) * px++ * (*pb++))); 00158 acc += (((q31_t) * px++ * (*pb++))); 00159 acc += (((q31_t) * px++ * (*pb++))); 00160 acc += (((q31_t) * px++ * (*pb++))); 00161 00162 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00163 00164 /* Decrement the loop counter */ 00165 tapCnt--; 00166 } 00167 00168 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00169 tapCnt = numTaps % 0x4u; 00170 00171 while(tapCnt > 0u) 00172 { 00173 /* Perform the multiply-accumulate */ 00174 acc += (((q31_t) * px++ * (*pb++))); 00175 00176 /* Decrement the loop counter */ 00177 tapCnt--; 00178 } 00179 00180 /* Calc lower part of acc */ 00181 acc_l = acc & 0xffffffff; 00182 00183 /* Calc upper part of acc */ 00184 acc_h = (acc >> 32) & 0xffffffff; 00185 00186 /* Apply shift for lower part of acc and upper part of acc */ 00187 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00188 00189 /* Converting the result to 1.15 format and saturate the output */ 00190 acc = __SSAT(acc, 16u); 00191 00192 /* Store the result from accumulator into the destination buffer. */ 00193 *pOut++ = (q15_t) acc; 00194 00195 /* Compute and store error */ 00196 d = *pRef++; 00197 e = d - (q15_t) acc; 00198 *pErr++ = e; 00199 00200 /* Calculation of 1/energy */ 00201 postShift = arm_recip_q15((q15_t) energy + DELTA_Q15, 00202 &oneByEnergy, S->recipTable); 00203 00204 /* Calculation of e * mu value */ 00205 errorXmu = (q15_t) (((q31_t) e * mu) >> 15); 00206 00207 /* Calculation of (e * mu) * (1/energy) value */ 00208 acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift)); 00209 00210 /* Weighting factor for the normalized version */ 00211 w = (q15_t) __SSAT((q31_t) acc, 16); 00212 00213 /* Initialize pState pointer */ 00214 px = pState; 00215 00216 /* Initialize coeff pointer */ 00217 pb = (pCoeffs); 00218 00219 /* Loop unrolling. Process 4 taps at a time. */ 00220 tapCnt = numTaps >> 2; 00221 00222 /* Update filter coefficients */ 00223 while(tapCnt > 0u) 00224 { 00225 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00226 *pb++ = (q15_t) __SSAT((coef), 16); 00227 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00228 *pb++ = (q15_t) __SSAT((coef), 16); 00229 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00230 *pb++ = (q15_t) __SSAT((coef), 16); 00231 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00232 *pb++ = (q15_t) __SSAT((coef), 16); 00233 00234 /* Decrement the loop counter */ 00235 tapCnt--; 00236 } 00237 00238 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00239 tapCnt = numTaps % 0x4u; 00240 00241 while(tapCnt > 0u) 00242 { 00243 /* Perform the multiply-accumulate */ 00244 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00245 *pb++ = (q15_t) __SSAT((coef), 16); 00246 00247 /* Decrement the loop counter */ 00248 tapCnt--; 00249 } 00250 00251 /* Read the sample from state buffer */ 00252 x0 = *pState; 00253 00254 /* Advance state pointer by 1 for the next sample */ 00255 pState = pState + 1u; 00256 00257 /* Decrement the loop counter */ 00258 blkCnt--; 00259 } 00260 00261 /* Save energy and x0 values for the next frame */ 00262 S->energy = (q15_t) energy; 00263 S->x0 = x0; 00264 00265 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00266 satrt of the state buffer. This prepares the state buffer for the 00267 next function call. */ 00268 00269 /* Points to the start of the pState buffer */ 00270 pStateCurnt = S->pState; 00271 00272 /* Calculation of count for copying integer writes */ 00273 tapCnt = (numTaps - 1u) >> 2; 00274 00275 while(tapCnt > 0u) 00276 { 00277 00278 #ifndef UNALIGNED_SUPPORT_DISABLE 00279 00280 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00281 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00282 00283 #else 00284 00285 *pStateCurnt++ = *pState++; 00286 *pStateCurnt++ = *pState++; 00287 *pStateCurnt++ = *pState++; 00288 *pStateCurnt++ = *pState++; 00289 00290 #endif 00291 00292 tapCnt--; 00293 00294 } 00295 00296 /* Calculation of count for remaining q15_t data */ 00297 tapCnt = (numTaps - 1u) % 0x4u; 00298 00299 /* copy data */ 00300 while(tapCnt > 0u) 00301 { 00302 *pStateCurnt++ = *pState++; 00303 00304 /* Decrement the loop counter */ 00305 tapCnt--; 00306 } 00307 00308 #else 00309 00310 /* Run the below code for Cortex-M0 */ 00311 00312 while(blkCnt > 0u) 00313 { 00314 /* Copy the new input sample into the state buffer */ 00315 *pStateCurnt++ = *pSrc; 00316 00317 /* Initialize pState pointer */ 00318 px = pState; 00319 00320 /* Initialize pCoeffs pointer */ 00321 pb = pCoeffs; 00322 00323 /* Read the sample from input buffer */ 00324 in = *pSrc++; 00325 00326 /* Update the energy calculation */ 00327 energy -= (((q31_t) x0 * (x0)) >> 15); 00328 energy += (((q31_t) in * (in)) >> 15); 00329 00330 /* Set the accumulator to zero */ 00331 acc = 0; 00332 00333 /* Loop over numTaps number of values */ 00334 tapCnt = numTaps; 00335 00336 while(tapCnt > 0u) 00337 { 00338 /* Perform the multiply-accumulate */ 00339 acc += (((q31_t) * px++ * (*pb++))); 00340 00341 /* Decrement the loop counter */ 00342 tapCnt--; 00343 } 00344 00345 /* Calc lower part of acc */ 00346 acc_l = acc & 0xffffffff; 00347 00348 /* Calc upper part of acc */ 00349 acc_h = (acc >> 32) & 0xffffffff; 00350 00351 /* Apply shift for lower part of acc and upper part of acc */ 00352 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00353 00354 /* Converting the result to 1.15 format and saturate the output */ 00355 acc = __SSAT(acc, 16u); 00356 00357 /* Converting the result to 1.15 format */ 00358 //acc = __SSAT((acc >> (16u - shift)), 16u); 00359 00360 /* Store the result from accumulator into the destination buffer. */ 00361 *pOut++ = (q15_t) acc; 00362 00363 /* Compute and store error */ 00364 d = *pRef++; 00365 e = d - (q15_t) acc; 00366 *pErr++ = e; 00367 00368 /* Calculation of 1/energy */ 00369 postShift = arm_recip_q15((q15_t) energy + DELTA_Q15, 00370 &oneByEnergy, S->recipTable); 00371 00372 /* Calculation of e * mu value */ 00373 errorXmu = (q15_t) (((q31_t) e * mu) >> 15); 00374 00375 /* Calculation of (e * mu) * (1/energy) value */ 00376 acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift)); 00377 00378 /* Weighting factor for the normalized version */ 00379 w = (q15_t) __SSAT((q31_t) acc, 16); 00380 00381 /* Initialize pState pointer */ 00382 px = pState; 00383 00384 /* Initialize coeff pointer */ 00385 pb = (pCoeffs); 00386 00387 /* Loop over numTaps number of values */ 00388 tapCnt = numTaps; 00389 00390 while(tapCnt > 0u) 00391 { 00392 /* Perform the multiply-accumulate */ 00393 coef = *pb + (((q31_t) w * (*px++)) >> 15); 00394 *pb++ = (q15_t) __SSAT((coef), 16); 00395 00396 /* Decrement the loop counter */ 00397 tapCnt--; 00398 } 00399 00400 /* Read the sample from state buffer */ 00401 x0 = *pState; 00402 00403 /* Advance state pointer by 1 for the next sample */ 00404 pState = pState + 1u; 00405 00406 /* Decrement the loop counter */ 00407 blkCnt--; 00408 } 00409 00410 /* Save energy and x0 values for the next frame */ 00411 S->energy = (q15_t) energy; 00412 S->x0 = x0; 00413 00414 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00415 satrt of the state buffer. This prepares the state buffer for the 00416 next function call. */ 00417 00418 /* Points to the start of the pState buffer */ 00419 pStateCurnt = S->pState; 00420 00421 /* copy (numTaps - 1u) data */ 00422 tapCnt = (numTaps - 1u); 00423 00424 /* copy data */ 00425 while(tapCnt > 0u) 00426 { 00427 *pStateCurnt++ = *pState++; 00428 00429 /* Decrement the loop counter */ 00430 tapCnt--; 00431 } 00432 00433 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00434 00435 } 00436 00437 00438 /** 00439 * @} end of LMS_NORM group 00440 */
Generated on Tue Jul 12 2022 11:59:18 by 1.7.2