CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_lms_q15.c Source File

arm_lms_q15.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        19. March 2015
00005 * $Revision:    V.1.4.5
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_lms_q15.c    
00009 *    
00010 * Description:  Processing function for the Q15 LMS filter.    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE.    
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 /**    
00043  * @ingroup groupFilters    
00044  */
00045 
00046 /**    
00047  * @addtogroup LMS    
00048  * @{    
00049  */
00050 
00051  /**    
00052  * @brief Processing function for Q15 LMS filter.    
00053  * @param[in] *S points to an instance of the Q15 LMS filter structure.    
00054  * @param[in] *pSrc points to the block of input data.    
00055  * @param[in] *pRef points to the block of reference data.    
00056  * @param[out] *pOut points to the block of output data.    
00057  * @param[out] *pErr points to the block of error data.    
00058  * @param[in] blockSize number of samples to process.    
00059  * @return none.    
00060  *    
00061  * \par Scaling and Overflow Behavior:    
00062  * The function is implemented using a 64-bit internal accumulator.    
00063  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.    
00064  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.    
00065  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.    
00066  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.    
00067  * Lastly, the accumulator is saturated to yield a result in 1.15 format.    
00068  *   
00069  * \par   
00070  *  In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.   
00071  *    
00072  */
00073 
00074 void arm_lms_q15(
00075   const arm_lms_instance_q15 * S,
00076   q15_t * pSrc,
00077   q15_t * pRef,
00078   q15_t * pOut,
00079   q15_t * pErr,
00080   uint32_t blockSize)
00081 {
00082   q15_t *pState = S->pState;                     /* State pointer */
00083   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
00084   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
00085   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
00086   q15_t mu = S->mu;                              /* Adaptive factor */
00087   q15_t *px;                                     /* Temporary pointer for state */
00088   q15_t *pb;                                     /* Temporary pointer for coefficient buffer */
00089   uint32_t tapCnt, blkCnt;                       /* Loop counters */
00090   q63_t acc;                                     /* Accumulator */
00091   q15_t e = 0;                                   /* error of data sample */
00092   q15_t alpha;                                   /* Intermediate constant for taps update */
00093   q31_t coef;                                    /* Teporary variable for coefficient */
00094   q31_t acc_l, acc_h;
00095   int32_t lShift = (15 - (int32_t) S->postShift);       /*  Post shift  */
00096   int32_t uShift = (32 - lShift);
00097 
00098 
00099 #ifndef ARM_MATH_CM0_FAMILY
00100 
00101   /* Run the below code for Cortex-M4 and Cortex-M3 */
00102 
00103 
00104   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
00105   /* pStateCurnt points to the location where the new input data should be written */
00106   pStateCurnt = &(S->pState[(numTaps - 1u)]);
00107 
00108   /* Initializing blkCnt with blockSize */
00109   blkCnt = blockSize;
00110 
00111   while(blkCnt > 0u)
00112   {
00113     /* Copy the new input sample into the state buffer */
00114     *pStateCurnt++ = *pSrc++;
00115 
00116     /* Initialize state pointer */
00117     px = pState;
00118 
00119     /* Initialize coefficient pointer */
00120     pb = pCoeffs;
00121 
00122     /* Set the accumulator to zero */
00123     acc = 0;
00124 
00125     /* Loop unrolling.  Process 4 taps at a time. */
00126     tapCnt = numTaps >> 2u;
00127 
00128     while(tapCnt > 0u)
00129     {
00130       /* acc +=  b[N] * x[n-N] + b[N-1] * x[n-N-1] */
00131       /* Perform the multiply-accumulate */
00132 #ifndef UNALIGNED_SUPPORT_DISABLE
00133 
00134       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
00135       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
00136 
00137 #else
00138 
00139       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
00140       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
00141       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
00142       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
00143 
00144 
00145 #endif  /*  #ifndef UNALIGNED_SUPPORT_DISABLE   */
00146 
00147       /* Decrement the loop counter */
00148       tapCnt--;
00149     }
00150 
00151     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00152     tapCnt = numTaps % 0x4u;
00153 
00154     while(tapCnt > 0u)
00155     {
00156       /* Perform the multiply-accumulate */
00157       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
00158 
00159       /* Decrement the loop counter */
00160       tapCnt--;
00161     }
00162 
00163     /* Calc lower part of acc */
00164     acc_l = acc & 0xffffffff;
00165 
00166     /* Calc upper part of acc */
00167     acc_h = (acc >> 32) & 0xffffffff;
00168 
00169     /* Apply shift for lower part of acc and upper part of acc */
00170     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
00171 
00172     /* Converting the result to 1.15 format and saturate the output */
00173     acc = __SSAT(acc, 16);
00174 
00175     /* Store the result from accumulator into the destination buffer. */
00176     *pOut++ = (q15_t) acc;
00177 
00178     /* Compute and store error */
00179     e = *pRef++ - (q15_t) acc;
00180 
00181     *pErr++ = (q15_t) e;
00182 
00183     /* Compute alpha i.e. intermediate constant for taps update */
00184     alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
00185 
00186     /* Initialize state pointer */
00187     /* Advance state pointer by 1 for the next sample */
00188     px = pState++;
00189 
00190     /* Initialize coefficient pointer */
00191     pb = pCoeffs;
00192 
00193     /* Loop unrolling.  Process 4 taps at a time. */
00194     tapCnt = numTaps >> 2u;
00195 
00196     /* Update filter coefficients */
00197     while(tapCnt > 0u)
00198     {
00199       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
00200       *pb++ = (q15_t) __SSAT((coef), 16);
00201       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
00202       *pb++ = (q15_t) __SSAT((coef), 16);
00203       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
00204       *pb++ = (q15_t) __SSAT((coef), 16);
00205       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
00206       *pb++ = (q15_t) __SSAT((coef), 16);
00207 
00208       /* Decrement the loop counter */
00209       tapCnt--;
00210     }
00211 
00212     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00213     tapCnt = numTaps % 0x4u;
00214 
00215     while(tapCnt > 0u)
00216     {
00217       /* Perform the multiply-accumulate */
00218       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
00219       *pb++ = (q15_t) __SSAT((coef), 16);
00220 
00221       /* Decrement the loop counter */
00222       tapCnt--;
00223     }
00224 
00225     /* Decrement the loop counter */
00226     blkCnt--;
00227 
00228   }
00229 
00230   /* Processing is complete. Now copy the last numTaps - 1 samples to the    
00231      satrt of the state buffer. This prepares the state buffer for the    
00232      next function call. */
00233 
00234   /* Points to the start of the pState buffer */
00235   pStateCurnt = S->pState;
00236 
00237   /* Calculation of count for copying integer writes */
00238   tapCnt = (numTaps - 1u) >> 2;
00239 
00240   while(tapCnt > 0u)
00241   {
00242 
00243 #ifndef UNALIGNED_SUPPORT_DISABLE
00244 
00245     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00246     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00247 #else
00248     *pStateCurnt++ = *pState++;
00249     *pStateCurnt++ = *pState++;
00250     *pStateCurnt++ = *pState++;
00251     *pStateCurnt++ = *pState++;
00252 #endif
00253 
00254     tapCnt--;
00255 
00256   }
00257 
00258   /* Calculation of count for remaining q15_t data */
00259   tapCnt = (numTaps - 1u) % 0x4u;
00260 
00261   /* copy data */
00262   while(tapCnt > 0u)
00263   {
00264     *pStateCurnt++ = *pState++;
00265 
00266     /* Decrement the loop counter */
00267     tapCnt--;
00268   }
00269 
00270 #else
00271 
00272   /* Run the below code for Cortex-M0 */
00273 
00274   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
00275   /* pStateCurnt points to the location where the new input data should be written */
00276   pStateCurnt = &(S->pState[(numTaps - 1u)]);
00277 
00278   /* Loop over blockSize number of values */
00279   blkCnt = blockSize;
00280 
00281   while(blkCnt > 0u)
00282   {
00283     /* Copy the new input sample into the state buffer */
00284     *pStateCurnt++ = *pSrc++;
00285 
00286     /* Initialize pState pointer */
00287     px = pState;
00288 
00289     /* Initialize pCoeffs pointer */
00290     pb = pCoeffs;
00291 
00292     /* Set the accumulator to zero */
00293     acc = 0;
00294 
00295     /* Loop over numTaps number of values */
00296     tapCnt = numTaps;
00297 
00298     while(tapCnt > 0u)
00299     {
00300       /* Perform the multiply-accumulate */
00301       acc += (q63_t) ((q31_t) (*px++) * (*pb++));
00302 
00303       /* Decrement the loop counter */
00304       tapCnt--;
00305     }
00306 
00307     /* Calc lower part of acc */
00308     acc_l = acc & 0xffffffff;
00309 
00310     /* Calc upper part of acc */
00311     acc_h = (acc >> 32) & 0xffffffff;
00312 
00313     /* Apply shift for lower part of acc and upper part of acc */
00314     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
00315 
00316     /* Converting the result to 1.15 format and saturate the output */
00317     acc = __SSAT(acc, 16);
00318 
00319     /* Store the result from accumulator into the destination buffer. */
00320     *pOut++ = (q15_t) acc;
00321 
00322     /* Compute and store error */
00323     e = *pRef++ - (q15_t) acc;
00324 
00325     *pErr++ = (q15_t) e;
00326 
00327     /* Compute alpha i.e. intermediate constant for taps update */
00328     alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
00329 
00330     /* Initialize pState pointer */
00331     /* Advance state pointer by 1 for the next sample */
00332     px = pState++;
00333 
00334     /* Initialize pCoeffs pointer */
00335     pb = pCoeffs;
00336 
00337     /* Loop over numTaps number of values */
00338     tapCnt = numTaps;
00339 
00340     while(tapCnt > 0u)
00341     {
00342       /* Perform the multiply-accumulate */
00343       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
00344       *pb++ = (q15_t) __SSAT((coef), 16);
00345 
00346       /* Decrement the loop counter */
00347       tapCnt--;
00348     }
00349 
00350     /* Decrement the loop counter */
00351     blkCnt--;
00352 
00353   }
00354 
00355   /* Processing is complete. Now copy the last numTaps - 1 samples to the        
00356      start of the state buffer. This prepares the state buffer for the   
00357      next function call. */
00358 
00359   /* Points to the start of the pState buffer */
00360   pStateCurnt = S->pState;
00361 
00362   /*  Copy (numTaps - 1u) samples  */
00363   tapCnt = (numTaps - 1u);
00364 
00365   /* Copy the data */
00366   while(tapCnt > 0u)
00367   {
00368     *pStateCurnt++ = *pState++;
00369 
00370     /* Decrement the loop counter */
00371     tapCnt--;
00372   }
00373 
00374 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
00375 
00376 }
00377 
00378 /**    
00379    * @} end of LMS group    
00380    */