Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_dot_prod_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_dot_prod_q31.c 00009 * 00010 * Description: Q31 dot product. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * -------------------------------------------------------------------- */ 00029 00030 #include "arm_math.h" 00031 00032 /** 00033 * @ingroup groupMath 00034 */ 00035 00036 /** 00037 * @addtogroup dot_prod 00038 * @{ 00039 */ 00040 00041 /** 00042 * @brief Dot product of Q31 vectors. 00043 * @param[in] *pSrcA points to the first input vector 00044 * @param[in] *pSrcB points to the second input vector 00045 * @param[in] blockSize number of samples in each vector 00046 * @param[out] *result output result returned here 00047 * @return none. 00048 * 00049 * <b>Scaling and Overflow Behavior:</b> 00050 * \par 00051 * The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these 00052 * are truncated to 2.48 format by discarding the lower 14 bits. 00053 * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format. 00054 * There are 15 guard bits in the accumulator and there is no risk of overflow as long as 00055 * the length of the vectors is less than 2^16 elements. 00056 * The return result is in 16.48 format. 00057 */ 00058 00059 void arm_dot_prod_q31( 00060 q31_t * pSrcA, 00061 q31_t * pSrcB, 00062 uint32_t blockSize, 00063 q63_t * result) 00064 { 00065 q63_t sum = 0; /* Temporary result storage */ 00066 uint32_t blkCnt; /* loop counter */ 00067 00068 00069 /*loop Unrolling */ 00070 blkCnt = blockSize >> 2u; 00071 00072 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00073 ** a second loop below computes the remaining 1 to 3 samples. */ 00074 while(blkCnt > 0u) 00075 { 00076 /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 00077 /* Calculate dot product and then store the result in a temporary buffer. */ 00078 sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 00079 sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 00080 sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 00081 sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 00082 00083 /* Decrement the loop counter */ 00084 blkCnt--; 00085 } 00086 00087 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00088 ** No loop unrolling is used. */ 00089 blkCnt = blockSize % 0x4u; 00090 00091 while(blkCnt > 0u) 00092 { 00093 /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 00094 /* Calculate dot product and then store the result in a temporary buffer. */ 00095 sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 00096 00097 /* Decrement the loop counter */ 00098 blkCnt--; 00099 } 00100 00101 /* Store the result in the destination buffer in 16.48 format */ 00102 *result = sum; 00103 } 00104 00105 /** 00106 * @} end of dot_prod group 00107 */
Generated on Tue Jul 12 2022 19:55:43 by
1.7.2
