Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_dot_prod_q7.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_dot_prod_q7.c 00009 * 00010 * Description: Q7 dot product. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * -------------------------------------------------------------------- */ 00029 00030 #include "arm_math.h" 00031 00032 /** 00033 * @ingroup groupMath 00034 */ 00035 00036 /** 00037 * @addtogroup dot_prod 00038 * @{ 00039 */ 00040 00041 /** 00042 * @brief Dot product of Q7 vectors. 00043 * @param[in] *pSrcA points to the first input vector 00044 * @param[in] *pSrcB points to the second input vector 00045 * @param[in] blockSize number of samples in each vector 00046 * @param[out] *result output result returned here 00047 * @return none. 00048 * 00049 * <b>Scaling and Overflow Behavior:</b> 00050 * \par 00051 * The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these 00052 * results are added to an accumulator in 18.14 format. 00053 * Nonsaturating additions are used and there is no danger of wrap around as long as 00054 * the vectors are less than 2^18 elements long. 00055 * The return result is in 18.14 format. 00056 */ 00057 00058 void arm_dot_prod_q7( 00059 q7_t * pSrcA, 00060 q7_t * pSrcB, 00061 uint32_t blockSize, 00062 q31_t * result) 00063 { 00064 q31_t input1, input2; /* Temporary variables to store input */ 00065 q15_t in1, in2; /* Temporary variables to store input */ 00066 q31_t sum = 0; /* Temporary variables to store output */ 00067 uint32_t blkCnt; /* loop counter */ 00068 00069 00070 00071 /*loop Unrolling */ 00072 blkCnt = blockSize >> 2u; 00073 00074 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00075 ** a second loop below computes the remaining 1 to 3 samples. */ 00076 while(blkCnt > 0u) 00077 { 00078 /* Reading two inputs of SrcA buffer and packing */ 00079 in1 = (q15_t) * pSrcA++; 00080 in2 = (q15_t) * pSrcA++; 00081 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00082 00083 /* Reading two inputs of SrcB buffer and packing */ 00084 in1 = (q15_t) * pSrcB++; 00085 in2 = (q15_t) * pSrcB++; 00086 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00087 00088 /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 00089 /* Perform Dot product of 2 packed inputs using SMLALD and store the result in a temporary variable. */ 00090 sum = __SMLAD(input1, input2, sum); 00091 00092 /* Reading two inputs of SrcA buffer and packing */ 00093 in1 = (q15_t) * pSrcA++; 00094 in2 = (q15_t) * pSrcA++; 00095 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00096 00097 /* Reading two inputs of SrcB buffer and packing */ 00098 in1 = (q15_t) * pSrcB++; 00099 in2 = (q15_t) * pSrcB++; 00100 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00101 00102 /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 00103 /* Perform Dot product of 2 packed inputs using SMLALD and store the result in a temporary variable. */ 00104 sum = __SMLAD(input1, input2, sum); 00105 00106 00107 00108 /* Decrement the loop counter */ 00109 blkCnt--; 00110 } 00111 00112 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00113 ** No loop unrolling is used. */ 00114 blkCnt = blockSize % 0x4u; 00115 00116 while(blkCnt > 0u) 00117 { 00118 /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 00119 /* Dot product and then store the results in a temporary buffer. */ 00120 sum = __SMLAD(*pSrcA++, *pSrcB++, sum); 00121 00122 /* Decrement the loop counter */ 00123 blkCnt--; 00124 } 00125 00126 /* Store the result in the destination buffer in 18.14 format */ 00127 *result = sum; 00128 } 00129 00130 /** 00131 * @} end of dot_prod group 00132 */
Generated on Tue Jul 12 2022 19:55:43 by
1.7.2
