CMSIS DSP library
Dependents: KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more
Fork of mbed-dsp by
arm_lms_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_lms_q31.c 00009 * 00010 * Description: Processing function for the Q31 LMS filter. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 /** 00043 * @ingroup groupFilters 00044 */ 00045 00046 /** 00047 * @addtogroup LMS 00048 * @{ 00049 */ 00050 00051 /** 00052 * @brief Processing function for Q31 LMS filter. 00053 * @param[in] *S points to an instance of the Q15 LMS filter structure. 00054 * @param[in] *pSrc points to the block of input data. 00055 * @param[in] *pRef points to the block of reference data. 00056 * @param[out] *pOut points to the block of output data. 00057 * @param[out] *pErr points to the block of error data. 00058 * @param[in] blockSize number of samples to process. 00059 * @return none. 00060 * 00061 * \par Scaling and Overflow Behavior: 00062 * The function is implemented using an internal 64-bit accumulator. 00063 * The accumulator has a 2.62 format and maintains full precision of the intermediate 00064 * multiplication results but provides only a single guard bit. 00065 * Thus, if the accumulator result overflows it wraps around rather than clips. 00066 * In order to avoid overflows completely the input signal must be scaled down by 00067 * log2(numTaps) bits. 00068 * The reference signal should not be scaled down. 00069 * After all multiply-accumulates are performed, the 2.62 accumulator is shifted 00070 * and saturated to 1.31 format to yield the final result. 00071 * The output signal and error signal are in 1.31 format. 00072 * 00073 * \par 00074 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 00075 */ 00076 00077 void arm_lms_q31( 00078 const arm_lms_instance_q31 * S, 00079 q31_t * pSrc, 00080 q31_t * pRef, 00081 q31_t * pOut, 00082 q31_t * pErr, 00083 uint32_t blockSize) 00084 { 00085 q31_t *pState = S->pState; /* State pointer */ 00086 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00087 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00088 q31_t *pStateCurnt; /* Points to the current sample of the state */ 00089 q31_t mu = S->mu; /* Adaptive factor */ 00090 q31_t *px; /* Temporary pointer for state */ 00091 q31_t *pb; /* Temporary pointer for coefficient buffer */ 00092 uint32_t tapCnt, blkCnt; /* Loop counters */ 00093 q63_t acc; /* Accumulator */ 00094 q31_t e = 0; /* error of data sample */ 00095 q31_t alpha; /* Intermediate constant for taps update */ 00096 q31_t coef; /* Temporary variable for coef */ 00097 q31_t acc_l, acc_h; /* temporary input */ 00098 uint32_t uShift = ((uint32_t) S->postShift + 1u); 00099 uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */ 00100 00101 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 00102 /* pStateCurnt points to the location where the new input data should be written */ 00103 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00104 00105 /* Initializing blkCnt with blockSize */ 00106 blkCnt = blockSize; 00107 00108 00109 #ifndef ARM_MATH_CM0_FAMILY 00110 00111 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00112 00113 while(blkCnt > 0u) 00114 { 00115 /* Copy the new input sample into the state buffer */ 00116 *pStateCurnt++ = *pSrc++; 00117 00118 /* Initialize state pointer */ 00119 px = pState; 00120 00121 /* Initialize coefficient pointer */ 00122 pb = pCoeffs; 00123 00124 /* Set the accumulator to zero */ 00125 acc = 0; 00126 00127 /* Loop unrolling. Process 4 taps at a time. */ 00128 tapCnt = numTaps >> 2; 00129 00130 while(tapCnt > 0u) 00131 { 00132 /* Perform the multiply-accumulate */ 00133 /* acc += b[N] * x[n-N] */ 00134 acc += ((q63_t) (*px++)) * (*pb++); 00135 00136 /* acc += b[N-1] * x[n-N-1] */ 00137 acc += ((q63_t) (*px++)) * (*pb++); 00138 00139 /* acc += b[N-2] * x[n-N-2] */ 00140 acc += ((q63_t) (*px++)) * (*pb++); 00141 00142 /* acc += b[N-3] * x[n-N-3] */ 00143 acc += ((q63_t) (*px++)) * (*pb++); 00144 00145 /* Decrement the loop counter */ 00146 tapCnt--; 00147 } 00148 00149 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00150 tapCnt = numTaps % 0x4u; 00151 00152 while(tapCnt > 0u) 00153 { 00154 /* Perform the multiply-accumulate */ 00155 acc += ((q63_t) (*px++)) * (*pb++); 00156 00157 /* Decrement the loop counter */ 00158 tapCnt--; 00159 } 00160 00161 /* Converting the result to 1.31 format */ 00162 /* Calc lower part of acc */ 00163 acc_l = acc & 0xffffffff; 00164 00165 /* Calc upper part of acc */ 00166 acc_h = (acc >> 32) & 0xffffffff; 00167 00168 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00169 00170 /* Store the result from accumulator into the destination buffer. */ 00171 *pOut++ = (q31_t) acc; 00172 00173 /* Compute and store error */ 00174 e = *pRef++ - (q31_t) acc; 00175 00176 *pErr++ = (q31_t) e; 00177 00178 /* Compute alpha i.e. intermediate constant for taps update */ 00179 alpha = (q31_t) (((q63_t) e * mu) >> 31); 00180 00181 /* Initialize state pointer */ 00182 /* Advance state pointer by 1 for the next sample */ 00183 px = pState++; 00184 00185 /* Initialize coefficient pointer */ 00186 pb = pCoeffs; 00187 00188 /* Loop unrolling. Process 4 taps at a time. */ 00189 tapCnt = numTaps >> 2; 00190 00191 /* Update filter coefficients */ 00192 while(tapCnt > 0u) 00193 { 00194 /* coef is in 2.30 format */ 00195 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 00196 /* get coef in 1.31 format by left shifting */ 00197 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00198 /* update coefficient buffer to next coefficient */ 00199 pb++; 00200 00201 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 00202 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00203 pb++; 00204 00205 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 00206 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00207 pb++; 00208 00209 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 00210 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00211 pb++; 00212 00213 /* Decrement the loop counter */ 00214 tapCnt--; 00215 } 00216 00217 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00218 tapCnt = numTaps % 0x4u; 00219 00220 while(tapCnt > 0u) 00221 { 00222 /* Perform the multiply-accumulate */ 00223 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 00224 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u)); 00225 pb++; 00226 00227 /* Decrement the loop counter */ 00228 tapCnt--; 00229 } 00230 00231 /* Decrement the loop counter */ 00232 blkCnt--; 00233 } 00234 00235 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00236 satrt of the state buffer. This prepares the state buffer for the 00237 next function call. */ 00238 00239 /* Points to the start of the pState buffer */ 00240 pStateCurnt = S->pState; 00241 00242 /* Loop unrolling for (numTaps - 1u) samples copy */ 00243 tapCnt = (numTaps - 1u) >> 2u; 00244 00245 /* copy data */ 00246 while(tapCnt > 0u) 00247 { 00248 *pStateCurnt++ = *pState++; 00249 *pStateCurnt++ = *pState++; 00250 *pStateCurnt++ = *pState++; 00251 *pStateCurnt++ = *pState++; 00252 00253 /* Decrement the loop counter */ 00254 tapCnt--; 00255 } 00256 00257 /* Calculate remaining number of copies */ 00258 tapCnt = (numTaps - 1u) % 0x4u; 00259 00260 /* Copy the remaining q31_t data */ 00261 while(tapCnt > 0u) 00262 { 00263 *pStateCurnt++ = *pState++; 00264 00265 /* Decrement the loop counter */ 00266 tapCnt--; 00267 } 00268 00269 #else 00270 00271 /* Run the below code for Cortex-M0 */ 00272 00273 while(blkCnt > 0u) 00274 { 00275 /* Copy the new input sample into the state buffer */ 00276 *pStateCurnt++ = *pSrc++; 00277 00278 /* Initialize pState pointer */ 00279 px = pState; 00280 00281 /* Initialize pCoeffs pointer */ 00282 pb = pCoeffs; 00283 00284 /* Set the accumulator to zero */ 00285 acc = 0; 00286 00287 /* Loop over numTaps number of values */ 00288 tapCnt = numTaps; 00289 00290 while(tapCnt > 0u) 00291 { 00292 /* Perform the multiply-accumulate */ 00293 acc += ((q63_t) (*px++)) * (*pb++); 00294 00295 /* Decrement the loop counter */ 00296 tapCnt--; 00297 } 00298 00299 /* Converting the result to 1.31 format */ 00300 /* Store the result from accumulator into the destination buffer. */ 00301 /* Calc lower part of acc */ 00302 acc_l = acc & 0xffffffff; 00303 00304 /* Calc upper part of acc */ 00305 acc_h = (acc >> 32) & 0xffffffff; 00306 00307 acc = (uint32_t) acc_l >> lShift | acc_h << uShift; 00308 00309 *pOut++ = (q31_t) acc; 00310 00311 /* Compute and store error */ 00312 e = *pRef++ - (q31_t) acc; 00313 00314 *pErr++ = (q31_t) e; 00315 00316 /* Weighting factor for the LMS version */ 00317 alpha = (q31_t) (((q63_t) e * mu) >> 31); 00318 00319 /* Initialize pState pointer */ 00320 /* Advance state pointer by 1 for the next sample */ 00321 px = pState++; 00322 00323 /* Initialize pCoeffs pointer */ 00324 pb = pCoeffs; 00325 00326 /* Loop over numTaps number of values */ 00327 tapCnt = numTaps; 00328 00329 while(tapCnt > 0u) 00330 { 00331 /* Perform the multiply-accumulate */ 00332 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 00333 *pb += (coef << 1u); 00334 pb++; 00335 00336 /* Decrement the loop counter */ 00337 tapCnt--; 00338 } 00339 00340 /* Decrement the loop counter */ 00341 blkCnt--; 00342 } 00343 00344 /* Processing is complete. Now copy the last numTaps - 1 samples to the 00345 start of the state buffer. This prepares the state buffer for the 00346 next function call. */ 00347 00348 /* Points to the start of the pState buffer */ 00349 pStateCurnt = S->pState; 00350 00351 /* Copy (numTaps - 1u) samples */ 00352 tapCnt = (numTaps - 1u); 00353 00354 /* Copy the data */ 00355 while(tapCnt > 0u) 00356 { 00357 *pStateCurnt++ = *pState++; 00358 00359 /* Decrement the loop counter */ 00360 tapCnt--; 00361 } 00362 00363 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00364 00365 } 00366 00367 /** 00368 * @} end of LMS group 00369 */
Generated on Tue Jul 12 2022 12:36:56 by 1.7.2