Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_lms_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_lms_q15.c 00009 * 00010 * Description: Processing function for the Q15 LMS filter. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 /** 00043 * @ingroup groupFilters 00044 */ 00045 00046 /** 00047 * @addtogroup LMS 00048 * @{ 00049 */ 00050 00051 /** 00052 * @brief Processing function for Q15 LMS filter. 00053 * @param[in] *S points to an instance of the Q15 LMS filter structure. 00054 * @param[in] *pSrc points to the block of input data. 00055 * @param[in] *pRef points to the block of reference data. 00056 * @param[out] *pOut points to the block of output data. 00057 * @param[out] *pErr points to the block of error data. 00058 * @param[in] blockSize number of samples to process. 00059 * @return none. 00060 * 00061 * \par Scaling and Overflow Behavior: 00062 * The function is implemented using a 64-bit internal accumulator. 00063 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00064 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00065 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00066 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 00067 * Lastly, the accumulator is saturated to yield a result in 1.15 format. 00068 * 00069 * \par 00070 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 00071 * 00072 */ 00073 00074 void arm_lms_q15( 00075 const arm_lms_instance_q15 * S, 00076 q15_t * pSrc, 00077 q15_t * pRef, 00078 q15_t * pOut, 00079 q15_t * pErr, 00080 uint32_t blockSize) 00081 { 00082 q15_t *pState = S->pState; /* State pointer */ 00083 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00084 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00085 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00086 q15_t mu = S->mu; /* Adaptive factor */ 00087 q15_t *px; /* Temporary pointer for state */ 00088 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00089 uint32_t tapCnt, blkCnt; /* Loop counters */ 00090 q63_t acc; /* Accumulator */ 00091 q15_t e = 0; /* error of data sample */ 00092 q15_t alpha; /* Intermediate constant for taps update */ 00093 q31_t coef; /* Teporary variable for coefficient */ 00094 q31_t acc_l, acc_h; 00095 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */ 00096 int32_t uShift = (32 - lShift); 00097 00098 00099 #ifndef ARM_MATH_CM0_FAMILY 00100 00101 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00102 00103 00104 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00105 /* pStateCurnt points to the location where the new input data should be written */ 00106 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00107 00108 /* Initializing blkCnt with blockSize */ 00109 blkCnt = blockSize; 00110 00111 while(blkCnt > 0u) 00112 { 00113 /* Copy the new input sample into the state buffer */ 00114 *pStateCurnt++ = *pSrc++; 00115 00116 /* Initialize state pointer */ 00117 px = pState; 00118 00119 /* Initialize coefficient pointer */ 00120 pb = pCoeffs; 00121 00122 /* Set the accumulator to zero */ 00123 acc = 0; 00124 00125 /* Loop unrolling. Process 4 taps at a time. */ 00126 tapCnt = numTaps >> 2u; 00127 00128 while(tapCnt > 0u) 00129 { 00130 /* acc += b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 00131 /* Perform the multiply-accumulate */ 00132 #ifndef UNALIGNED_SUPPORT_DISABLE 00133 00134 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 00135 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 00136 00137 #else 00138 00139 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00140 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00141 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00142 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00143 00144 00145 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00146 00147 /* Decrement the loop counter */ 00148 tapCnt--; 00149 } 00150 00151 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00152 tapCnt = numTaps % 0x4u; 00153 00154 while(tapCnt > 0u) 00155 { 00156 /* Perform the multiply-accumulate */ 00157 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00158 00159 /* Decrement the loop counter */ 00160 tapCnt--; 00161 } 00162 00163 /* Calc lower part of acc */ 00164 acc_l = acc & 0xffffffff; 00165 00166 /* Calc upper part of acc */ 00167 acc_h = (acc >> 32) & 0xffffffff; 00168 00169 /* Apply shift for lower part of acc and upper part of acc */ 00170 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00171 00172 /* Converting the result to 1.15 format and saturate the output */ 00173 acc = __SSAT(acc, 16); 00174 00175 /* Store the result from accumulator into the destination buffer. */ 00176 *pOut++ = (q15_t) acc; 00177 00178 /* Compute and store error */ 00179 e = *pRef++ - (q15_t) acc; 00180 00181 *pErr++ = (q15_t) e; 00182 00183 /* Compute alpha i.e. intermediate constant for taps update */ 00184 alpha = (q15_t) (((q31_t) e * (mu)) >> 15); 00185 00186 /* Initialize state pointer */ 00187 /* Advance state pointer by 1 for the next sample */ 00188 px = pState++; 00189 00190 /* Initialize coefficient pointer */ 00191 pb = pCoeffs; 00192 00193 /* Loop unrolling. Process 4 taps at a time. */ 00194 tapCnt = numTaps >> 2u; 00195 00196 /* Update filter coefficients */ 00197 while(tapCnt > 0u) 00198 { 00199 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00200 *pb++ = (q15_t) __SSAT((coef), 16); 00201 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00202 *pb++ = (q15_t) __SSAT((coef), 16); 00203 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00204 *pb++ = (q15_t) __SSAT((coef), 16); 00205 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00206 *pb++ = (q15_t) __SSAT((coef), 16); 00207 00208 /* Decrement the loop counter */ 00209 tapCnt--; 00210 } 00211 00212 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00213 tapCnt = numTaps % 0x4u; 00214 00215 while(tapCnt > 0u) 00216 { 00217 /* Perform the multiply-accumulate */ 00218 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00219 *pb++ = (q15_t) __SSAT((coef), 16); 00220 00221 /* Decrement the loop counter */ 00222 tapCnt--; 00223 } 00224 00225 /* Decrement the loop counter */ 00226 blkCnt--; 00227 00228 } 00229 00230 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00231 satrt of the state buffer. This prepares the state buffer for the 00232 next function call. */ 00233 00234 /* Points to the start of the pState buffer */ 00235 pStateCurnt = S->pState; 00236 00237 /* Calculation of count for copying integer writes */ 00238 tapCnt = (numTaps - 1u) >> 2; 00239 00240 while(tapCnt > 0u) 00241 { 00242 00243 #ifndef UNALIGNED_SUPPORT_DISABLE 00244 00245 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00246 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00247 #else 00248 *pStateCurnt++ = *pState++; 00249 *pStateCurnt++ = *pState++; 00250 *pStateCurnt++ = *pState++; 00251 *pStateCurnt++ = *pState++; 00252 #endif 00253 00254 tapCnt--; 00255 00256 } 00257 00258 /* Calculation of count for remaining q15_t data */ 00259 tapCnt = (numTaps - 1u) % 0x4u; 00260 00261 /* copy data */ 00262 while(tapCnt > 0u) 00263 { 00264 *pStateCurnt++ = *pState++; 00265 00266 /* Decrement the loop counter */ 00267 tapCnt--; 00268 } 00269 00270 #else 00271 00272 /* Run the below code for Cortex-M0 */ 00273 00274 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00275 /* pStateCurnt points to the location where the new input data should be written */ 00276 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00277 00278 /* Loop over blockSize number of values */ 00279 blkCnt = blockSize; 00280 00281 while(blkCnt > 0u) 00282 { 00283 /* Copy the new input sample into the state buffer */ 00284 *pStateCurnt++ = *pSrc++; 00285 00286 /* Initialize pState pointer */ 00287 px = pState; 00288 00289 /* Initialize pCoeffs pointer */ 00290 pb = pCoeffs; 00291 00292 /* Set the accumulator to zero */ 00293 acc = 0; 00294 00295 /* Loop over numTaps number of values */ 00296 tapCnt = numTaps; 00297 00298 while(tapCnt > 0u) 00299 { 00300 /* Perform the multiply-accumulate */ 00301 acc += (q63_t) ((q31_t) (*px++) * (*pb++)); 00302 00303 /* Decrement the loop counter */ 00304 tapCnt--; 00305 } 00306 00307 /* Calc lower part of acc */ 00308 acc_l = acc & 0xffffffff; 00309 00310 /* Calc upper part of acc */ 00311 acc_h = (acc >> 32) & 0xffffffff; 00312 00313 /* Apply shift for lower part of acc and upper part of acc */ 00314 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00315 00316 /* Converting the result to 1.15 format and saturate the output */ 00317 acc = __SSAT(acc, 16); 00318 00319 /* Store the result from accumulator into the destination buffer. */ 00320 *pOut++ = (q15_t) acc; 00321 00322 /* Compute and store error */ 00323 e = *pRef++ - (q15_t) acc; 00324 00325 *pErr++ = (q15_t) e; 00326 00327 /* Compute alpha i.e. intermediate constant for taps update */ 00328 alpha = (q15_t) (((q31_t) e * (mu)) >> 15); 00329 00330 /* Initialize pState pointer */ 00331 /* Advance state pointer by 1 for the next sample */ 00332 px = pState++; 00333 00334 /* Initialize pCoeffs pointer */ 00335 pb = pCoeffs; 00336 00337 /* Loop over numTaps number of values */ 00338 tapCnt = numTaps; 00339 00340 while(tapCnt > 0u) 00341 { 00342 /* Perform the multiply-accumulate */ 00343 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00344 *pb++ = (q15_t) __SSAT((coef), 16); 00345 00346 /* Decrement the loop counter */ 00347 tapCnt--; 00348 } 00349 00350 /* Decrement the loop counter */ 00351 blkCnt--; 00352 00353 } 00354 00355 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00356 start of the state buffer. This prepares the state buffer for the 00357 next function call. */ 00358 00359 /* Points to the start of the pState buffer */ 00360 pStateCurnt = S->pState; 00361 00362 /* Copy (numTaps - 1u) samples */ 00363 tapCnt = (numTaps - 1u); 00364 00365 /* Copy the data */ 00366 while(tapCnt > 0u) 00367 { 00368 *pStateCurnt++ = *pState++; 00369 00370 /* Decrement the loop counter */ 00371 tapCnt--; 00372 } 00373 00374 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00375 00376 } 00377 00378 /** 00379 * @} end of LMS group 00380 */
Generated on Tue Aug 9 2022 00:37:02 by
 1.7.2
 1.7.2