Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_power_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_power_q15.c 00009 * 00010 * Description: sum of the square of the elements in an array of Q15 type 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * -------------------------------------------------------------------- */ 00026 00027 #include "arm_math.h" 00028 00029 /** 00030 * @ingroup groupStats 00031 */ 00032 00033 /** 00034 * @addtogroup power 00035 * @{ 00036 */ 00037 00038 /** 00039 * @brief Sum of the squares of the elements of a Q15 vector. 00040 * @param[in] *pSrc points to the input vector 00041 * @param[in] blockSize length of the input vector 00042 * @param[out] *pResult sum of the squares value returned here 00043 * @return none. 00044 * 00045 * @details 00046 * <b>Scaling and Overflow Behavior:</b> 00047 * 00048 * \par 00049 * The function is implemented using a 64-bit internal accumulator. 00050 * The input is represented in 1.15 format. 00051 * Intermediate multiplication yields a 2.30 format, and this 00052 * result is added without saturation to a 64-bit accumulator in 34.30 format. 00053 * With 33 guard bits in the accumulator, there is no risk of overflow, and the 00054 * full precision of the intermediate multiplication is preserved. 00055 * Finally, the return result is in 34.30 format. 00056 * 00057 */ 00058 00059 void arm_power_q15( 00060 q15_t * pSrc, 00061 uint32_t blockSize, 00062 q63_t * pResult) 00063 { 00064 q63_t sum = 0; /* Temporary result storage */ 00065 q31_t in32; /* Temporary variable to store input value */ 00066 q15_t in16; /* Temporary variable to store input value */ 00067 uint32_t blkCnt; /* loop counter */ 00068 00069 00070 /* loop Unrolling */ 00071 blkCnt = blockSize >> 2u; 00072 00073 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00074 ** a second loop below computes the remaining 1 to 3 samples. */ 00075 while(blkCnt > 0u) 00076 { 00077 /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 00078 /* Compute Power and then store the result in a temporary variable, sum. */ 00079 in32 = *__SIMD32(pSrc)++; 00080 sum = __SMLALD(in32, in32, sum); 00081 in32 = *__SIMD32(pSrc)++; 00082 sum = __SMLALD(in32, in32, sum); 00083 00084 /* Decrement the loop counter */ 00085 blkCnt--; 00086 } 00087 00088 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00089 ** No loop unrolling is used. */ 00090 blkCnt = blockSize % 0x4u; 00091 00092 while(blkCnt > 0u) 00093 { 00094 /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 00095 /* Compute Power and then store the result in a temporary variable, sum. */ 00096 in16 = *pSrc++; 00097 sum = __SMLALD(in16, in16, sum); 00098 00099 /* Decrement the loop counter */ 00100 blkCnt--; 00101 } 00102 00103 /* Store the results in 34.30 format */ 00104 *pResult = sum; 00105 } 00106 00107 /** 00108 * @} end of power group 00109 */
Generated on Tue Jul 12 2022 19:55:43 by
1.7.2
