Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_lms_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_lms_q15.c 00009 * 00010 * Description: Processing function for the Q15 LMS filter. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 /** 00043 * @ingroup groupFilters 00044 */ 00045 00046 /** 00047 * @addtogroup LMS 00048 * @{ 00049 */ 00050 00051 /** 00052 * @brief Processing function for Q15 LMS filter. 00053 * @param[in] *S points to an instance of the Q15 LMS filter structure. 00054 * @param[in] *pSrc points to the block of input data. 00055 * @param[in] *pRef points to the block of reference data. 00056 * @param[out] *pOut points to the block of output data. 00057 * @param[out] *pErr points to the block of error data. 00058 * @param[in] blockSize number of samples to process. 00059 * @return none. 00060 * 00061 * \par Scaling and Overflow Behavior: 00062 * The function is implemented using a 64-bit internal accumulator. 00063 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 00064 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 00065 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 00066 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 00067 * Lastly, the accumulator is saturated to yield a result in 1.15 format. 00068 * 00069 * \par 00070 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 00071 * 00072 */ 00073 00074 void arm_lms_q15( 00075 const arm_lms_instance_q15 * S, 00076 q15_t * pSrc, 00077 q15_t * pRef, 00078 q15_t * pOut, 00079 q15_t * pErr, 00080 uint32_t blockSize) 00081 { 00082 q15_t *pState = S->pState; /* State pointer */ 00083 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00084 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00085 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00086 q15_t mu = S->mu; /* Adaptive factor */ 00087 q15_t *px; /* Temporary pointer for state */ 00088 q15_t *pb; /* Temporary pointer for coefficient buffer */ 00089 uint32_t tapCnt, blkCnt; /* Loop counters */ 00090 q63_t acc; /* Accumulator */ 00091 q15_t e = 0; /* error of data sample */ 00092 q15_t alpha; /* Intermediate constant for taps update */ 00093 q31_t coef; /* Teporary variable for coefficient */ 00094 q31_t acc_l, acc_h; 00095 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */ 00096 int32_t uShift = (32 - lShift); 00097 00098 00099 #ifndef ARM_MATH_CM0_FAMILY 00100 00101 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00102 00103 00104 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00105 /* pStateCurnt points to the location where the new input data should be written */ 00106 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00107 00108 /* Initializing blkCnt with blockSize */ 00109 blkCnt = blockSize; 00110 00111 while(blkCnt > 0u) 00112 { 00113 /* Copy the new input sample into the state buffer */ 00114 *pStateCurnt++ = *pSrc++; 00115 00116 /* Initialize state pointer */ 00117 px = pState; 00118 00119 /* Initialize coefficient pointer */ 00120 pb = pCoeffs; 00121 00122 /* Set the accumulator to zero */ 00123 acc = 0; 00124 00125 /* Loop unrolling. Process 4 taps at a time. */ 00126 tapCnt = numTaps >> 2u; 00127 00128 while(tapCnt > 0u) 00129 { 00130 /* acc += b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 00131 /* Perform the multiply-accumulate */ 00132 #ifndef UNALIGNED_SUPPORT_DISABLE 00133 00134 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 00135 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 00136 00137 #else 00138 00139 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00140 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00141 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00142 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00143 00144 00145 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ 00146 00147 /* Decrement the loop counter */ 00148 tapCnt--; 00149 } 00150 00151 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00152 tapCnt = numTaps % 0x4u; 00153 00154 while(tapCnt > 0u) 00155 { 00156 /* Perform the multiply-accumulate */ 00157 acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 00158 00159 /* Decrement the loop counter */ 00160 tapCnt--; 00161 } 00162 00163 /* Calc lower part of acc */ 00164 acc_l = acc & 0xffffffff; 00165 00166 /* Calc upper part of acc */ 00167 acc_h = (acc >> 32) & 0xffffffff; 00168 00169 /* Apply shift for lower part of acc and upper part of acc */ 00170 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00171 00172 /* Converting the result to 1.15 format and saturate the output */ 00173 acc = __SSAT(acc, 16); 00174 00175 /* Store the result from accumulator into the destination buffer. */ 00176 *pOut++ = (q15_t) acc; 00177 00178 /* Compute and store error */ 00179 e = *pRef++ - (q15_t) acc; 00180 00181 *pErr++ = (q15_t) e; 00182 00183 /* Compute alpha i.e. intermediate constant for taps update */ 00184 alpha = (q15_t) (((q31_t) e * (mu)) >> 15); 00185 00186 /* Initialize state pointer */ 00187 /* Advance state pointer by 1 for the next sample */ 00188 px = pState++; 00189 00190 /* Initialize coefficient pointer */ 00191 pb = pCoeffs; 00192 00193 /* Loop unrolling. Process 4 taps at a time. */ 00194 tapCnt = numTaps >> 2u; 00195 00196 /* Update filter coefficients */ 00197 while(tapCnt > 0u) 00198 { 00199 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00200 *pb++ = (q15_t) __SSAT((coef), 16); 00201 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00202 *pb++ = (q15_t) __SSAT((coef), 16); 00203 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00204 *pb++ = (q15_t) __SSAT((coef), 16); 00205 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00206 *pb++ = (q15_t) __SSAT((coef), 16); 00207 00208 /* Decrement the loop counter */ 00209 tapCnt--; 00210 } 00211 00212 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00213 tapCnt = numTaps % 0x4u; 00214 00215 while(tapCnt > 0u) 00216 { 00217 /* Perform the multiply-accumulate */ 00218 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00219 *pb++ = (q15_t) __SSAT((coef), 16); 00220 00221 /* Decrement the loop counter */ 00222 tapCnt--; 00223 } 00224 00225 /* Decrement the loop counter */ 00226 blkCnt--; 00227 00228 } 00229 00230 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00231 satrt of the state buffer. This prepares the state buffer for the 00232 next function call. */ 00233 00234 /* Points to the start of the pState buffer */ 00235 pStateCurnt = S->pState; 00236 00237 /* Calculation of count for copying integer writes */ 00238 tapCnt = (numTaps - 1u) >> 2; 00239 00240 while(tapCnt > 0u) 00241 { 00242 00243 #ifndef UNALIGNED_SUPPORT_DISABLE 00244 00245 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00246 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00247 #else 00248 *pStateCurnt++ = *pState++; 00249 *pStateCurnt++ = *pState++; 00250 *pStateCurnt++ = *pState++; 00251 *pStateCurnt++ = *pState++; 00252 #endif 00253 00254 tapCnt--; 00255 00256 } 00257 00258 /* Calculation of count for remaining q15_t data */ 00259 tapCnt = (numTaps - 1u) % 0x4u; 00260 00261 /* copy data */ 00262 while(tapCnt > 0u) 00263 { 00264 *pStateCurnt++ = *pState++; 00265 00266 /* Decrement the loop counter */ 00267 tapCnt--; 00268 } 00269 00270 #else 00271 00272 /* Run the below code for Cortex-M0 */ 00273 00274 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00275 /* pStateCurnt points to the location where the new input data should be written */ 00276 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00277 00278 /* Loop over blockSize number of values */ 00279 blkCnt = blockSize; 00280 00281 while(blkCnt > 0u) 00282 { 00283 /* Copy the new input sample into the state buffer */ 00284 *pStateCurnt++ = *pSrc++; 00285 00286 /* Initialize pState pointer */ 00287 px = pState; 00288 00289 /* Initialize pCoeffs pointer */ 00290 pb = pCoeffs; 00291 00292 /* Set the accumulator to zero */ 00293 acc = 0; 00294 00295 /* Loop over numTaps number of values */ 00296 tapCnt = numTaps; 00297 00298 while(tapCnt > 0u) 00299 { 00300 /* Perform the multiply-accumulate */ 00301 acc += (q63_t) ((q31_t) (*px++) * (*pb++)); 00302 00303 /* Decrement the loop counter */ 00304 tapCnt--; 00305 } 00306 00307 /* Calc lower part of acc */ 00308 acc_l = acc & 0xffffffff; 00309 00310 /* Calc upper part of acc */ 00311 acc_h = (acc >> 32) & 0xffffffff; 00312 00313 /* Apply shift for lower part of acc and upper part of acc */ 00314 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00315 00316 /* Converting the result to 1.15 format and saturate the output */ 00317 acc = __SSAT(acc, 16); 00318 00319 /* Store the result from accumulator into the destination buffer. */ 00320 *pOut++ = (q15_t) acc; 00321 00322 /* Compute and store error */ 00323 e = *pRef++ - (q15_t) acc; 00324 00325 *pErr++ = (q15_t) e; 00326 00327 /* Compute alpha i.e. intermediate constant for taps update */ 00328 alpha = (q15_t) (((q31_t) e * (mu)) >> 15); 00329 00330 /* Initialize pState pointer */ 00331 /* Advance state pointer by 1 for the next sample */ 00332 px = pState++; 00333 00334 /* Initialize pCoeffs pointer */ 00335 pb = pCoeffs; 00336 00337 /* Loop over numTaps number of values */ 00338 tapCnt = numTaps; 00339 00340 while(tapCnt > 0u) 00341 { 00342 /* Perform the multiply-accumulate */ 00343 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); 00344 *pb++ = (q15_t) __SSAT((coef), 16); 00345 00346 /* Decrement the loop counter */ 00347 tapCnt--; 00348 } 00349 00350 /* Decrement the loop counter */ 00351 blkCnt--; 00352 00353 } 00354 00355 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00356 start of the state buffer. This prepares the state buffer for the 00357 next function call. */ 00358 00359 /* Points to the start of the pState buffer */ 00360 pStateCurnt = S->pState; 00361 00362 /* Copy (numTaps - 1u) samples */ 00363 tapCnt = (numTaps - 1u); 00364 00365 /* Copy the data */ 00366 while(tapCnt > 0u) 00367 { 00368 *pStateCurnt++ = *pState++; 00369 00370 /* Decrement the loop counter */ 00371 tapCnt--; 00372 } 00373 00374 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00375 00376 } 00377 00378 /** 00379 * @} end of LMS group 00380 */
Generated on Tue Jul 12 2022 13:15:25 by
