Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_std_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_std_q15.c 00009 * 00010 * Description: Standard deviation of an array of Q15 type. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * -------------------------------------------------------------------- */ 00026 00027 #include "arm_math.h" 00028 00029 /** 00030 * @ingroup groupStats 00031 */ 00032 00033 /** 00034 * @addtogroup STD 00035 * @{ 00036 */ 00037 00038 /** 00039 * @brief Standard deviation of the elements of a Q15 vector. 00040 * @param[in] *pSrc points to the input vector 00041 * @param[in] blockSize length of the input vector 00042 * @param[out] *pResult standard deviation value returned here 00043 * @return none. 00044 * 00045 * @details 00046 * <b>Scaling and Overflow Behavior:</b> 00047 * 00048 * \par 00049 * The function is implemented using a 64-bit internal accumulator. 00050 * The input is represented in 1.15 format. 00051 * Intermediate multiplication yields a 2.30 format, and this 00052 * result is added without saturation to a 64-bit accumulator in 34.30 format. 00053 * With 33 guard bits in the accumulator, there is no risk of overflow, and the 00054 * full precision of the intermediate multiplication is preserved. 00055 * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower 00056 * 15 bits, and then saturated to yield a result in 1.15 format. 00057 */ 00058 00059 void arm_std_q15( 00060 q15_t * pSrc, 00061 uint32_t blockSize, 00062 q15_t * pResult) 00063 { 00064 q63_t sum = 0; /* Accumulator */ 00065 q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ 00066 q15_t mean; /* mean */ 00067 q31_t in; /* input value */ 00068 q15_t in1; /* input value */ 00069 uint32_t blkCnt; /* loop counter */ 00070 q15_t t; /* Temporary variable */ 00071 q15_t *pIn; /* Temporary pointer */ 00072 00073 pIn = pSrc; 00074 00075 /*loop Unrolling */ 00076 blkCnt = blockSize >> 2u; 00077 00078 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00079 ** a second loop below computes the remaining 1 to 3 samples. */ 00080 while(blkCnt > 0u) 00081 { 00082 /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 00083 /* Compute Sum of squares of the input samples 00084 * and then store the result in a temporary variable, sum. */ 00085 in = *__SIMD32(pSrc)++; 00086 sum = __SMLALD(in, in, sum); 00087 in = *__SIMD32(pSrc)++; 00088 sum = __SMLALD(in, in, sum); 00089 00090 /* Decrement the loop counter */ 00091 blkCnt--; 00092 } 00093 00094 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00095 ** No loop unrolling is used. */ 00096 blkCnt = blockSize % 0x4u; 00097 00098 while(blkCnt > 0u) 00099 { 00100 /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 00101 /* Compute Sum of squares of the input samples 00102 * and then store the result in a temporary variable, sum. */ 00103 in1 = *pSrc++; 00104 sum = __SMLALD(in1, in1, sum); 00105 00106 /* Decrement the loop counter */ 00107 blkCnt--; 00108 } 00109 00110 /* Compute Mean of squares of the input samples 00111 * and then store the result in a temporary variable, meanOfSquares. */ 00112 t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL); 00113 sum = __SSAT((sum >> 15u), 16u); 00114 00115 meanOfSquares = (q31_t) ((sum * t) >> 14u); 00116 00117 /* Reset the accumulator */ 00118 sum = 0; 00119 00120 /*loop Unrolling */ 00121 blkCnt = blockSize >> 2u; 00122 00123 /* Reset the input working pointer */ 00124 pSrc = pIn; 00125 00126 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00127 ** a second loop below computes the remaining 1 to 3 samples. */ 00128 while(blkCnt > 0u) 00129 { 00130 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 00131 /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 00132 sum += *pSrc++; 00133 sum += *pSrc++; 00134 sum += *pSrc++; 00135 sum += *pSrc++; 00136 00137 /* Decrement the loop counter */ 00138 blkCnt--; 00139 } 00140 00141 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00142 ** No loop unrolling is used. */ 00143 blkCnt = blockSize % 0x4u; 00144 00145 while(blkCnt > 0u) 00146 { 00147 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 00148 /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 00149 sum += *pSrc++; 00150 00151 /* Decrement the loop counter */ 00152 blkCnt--; 00153 } 00154 /* Compute mean of all input values */ 00155 t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL); 00156 mean = (q15_t) __SSAT(sum, 16u); 00157 00158 /* Compute square of mean */ 00159 squareOfMean = ((q31_t) mean * mean) >> 15; 00160 squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15); 00161 00162 /* mean of the squares minus the square of the mean. */ 00163 in1 = (q15_t) (meanOfSquares - squareOfMean); 00164 00165 /* Compute standard deviation and store the result to the destination */ 00166 arm_sqrt_q15(in1, pResult); 00167 } 00168 00169 /** 00170 * @} end of STD group 00171 */
Generated on Tue Jul 12 2022 19:55:44 by
1.7.2
