Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_mat_mult_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_mat_mult_q31.c 00009 * 00010 * Description: Q31 matrix multiplication. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 0.0.5 2010/04/26 00027 * incorporated review comments and updated with latest CMSIS layer 00028 * 00029 * Version 0.0.3 2010/03/10 00030 * Initial version 00031 * -------------------------------------------------------------------- */ 00032 00033 #include "arm_math.h" 00034 00035 /** 00036 * @ingroup groupMatrix 00037 */ 00038 00039 /** 00040 * @addtogroup MatrixMult 00041 * @{ 00042 */ 00043 00044 /** 00045 * @brief Q31 matrix multiplication 00046 * @param[in] *pSrcA points to the first input matrix structure 00047 * @param[in] *pSrcB points to the second input matrix structure 00048 * @param[out] *pDst points to output matrix structure 00049 * @return The function returns either 00050 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00051 * 00052 * @details 00053 * <b>Scaling and Overflow Behavior:</b> 00054 * 00055 * \par 00056 * The function is implemented using an internal 64-bit accumulator. 00057 * The accumulator has a 2.62 format and maintains full precision of the intermediate 00058 * multiplication results but provides only a single guard bit. There is no saturation 00059 * on intermediate additions. Thus, if the accumulator overflows it wraps around and 00060 * distorts the result. The input signals should be scaled down to avoid intermediate 00061 * overflows. The input is thus scaled down by log2(numColsA) bits 00062 * to avoid overflows, as a total of numColsA additions are performed internally. 00063 * The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result. 00064 * 00065 * \par 00066 * See <code>arm_mat_mult_fast_q31()</code> for a faster but less precise implementation of this function. 00067 * 00068 */ 00069 00070 arm_status arm_mat_mult_q31( 00071 const arm_matrix_instance_q31 * pSrcA, 00072 const arm_matrix_instance_q31 * pSrcB, 00073 arm_matrix_instance_q31 * pDst) 00074 { 00075 q31_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00076 q31_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00077 q31_t *pInA = pSrcA->pData; /* input data matrix pointer A */ 00078 // q31_t *pSrcB = pSrcB->pData; /* input data matrix pointer B */ 00079 q31_t *pOut = pDst->pData; /* output data matrix pointer */ 00080 q31_t *px; /* Temporary output data matrix pointer */ 00081 q63_t sum; /* Accumulator */ 00082 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00083 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00084 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00085 uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */ 00086 arm_status status; /* status of matrix multiplication */ 00087 00088 00089 #ifdef ARM_MATH_MATRIX_CHECK 00090 /* Check for matrix mismatch condition */ 00091 if((pSrcA->numCols != pSrcB->numRows) || 00092 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00093 { 00094 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00095 status = ARM_MATH_SIZE_MISMATCH; 00096 } 00097 else 00098 #endif 00099 { 00100 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00101 /* row loop */ 00102 do 00103 { 00104 /* Output pointer is set to starting address of the row being processed */ 00105 px = pOut + i; 00106 00107 /* For every row wise process, the column loop counter is to be initiated */ 00108 col = numColsB; 00109 00110 /* For every row wise process, the pIn2 pointer is set 00111 ** to the starting address of the pSrcB data */ 00112 pIn2 = pSrcB->pData; 00113 00114 j = 0u; 00115 00116 /* column loop */ 00117 do 00118 { 00119 /* Set the variable sum, that acts as accumulator, to zero */ 00120 sum = 0; 00121 00122 /* Initiate the pointer pIn1 to point to the starting address of pInA */ 00123 pIn1 = pInA; 00124 00125 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00126 colCnt = numColsA >> 2; 00127 00128 00129 /* matrix multiplication */ 00130 while(colCnt > 0u) 00131 { 00132 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00133 /* Perform the multiply-accumulates */ 00134 sum += (q63_t) * pIn1++ * *pIn2; 00135 pIn2 += numColsB; 00136 00137 sum += (q63_t) * pIn1++ * *pIn2; 00138 pIn2 += numColsB; 00139 00140 sum += (q63_t) * pIn1++ * *pIn2; 00141 pIn2 += numColsB; 00142 00143 sum += (q63_t) * pIn1++ * *pIn2; 00144 pIn2 += numColsB; 00145 00146 /* Decrement the loop counter */ 00147 colCnt--; 00148 } 00149 00150 /* If the columns of pSrcA is not a multiple of 4, compute any remaining output samples here. 00151 ** No loop unrolling is used. */ 00152 colCnt = numColsA % 0x4u; 00153 00154 while(colCnt > 0u) 00155 { 00156 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00157 /* Perform the multiply-accumulates */ 00158 sum += (q63_t) * pIn1++ * *pIn2; 00159 pIn2 += numColsB; 00160 00161 /* Decrement the loop counter */ 00162 colCnt--; 00163 } 00164 00165 /* Convert the result from 2.30 to 1.31 format and store in destination buffer */ 00166 *px++ = (q31_t) (sum >> 31); 00167 00168 /* Update the pointer pIn2 to point to the starting address of the next column */ 00169 j++; 00170 pIn2 = (pSrcB->pData) + j; 00171 00172 /* Decrement the column loop counter */ 00173 col--; 00174 00175 } while(col > 0u); 00176 00177 /* Update the pointer pInA to point to the starting address of the next row */ 00178 i = i + numColsB; 00179 pInA = pInA + numColsA; 00180 00181 /* Decrement the row loop counter */ 00182 row--; 00183 00184 } while(row > 0u); 00185 00186 /* set status as ARM_MATH_SUCCESS */ 00187 status = ARM_MATH_SUCCESS; 00188 } 00189 /* Return to application */ 00190 return (status); 00191 } 00192 00193 /** 00194 * @} end of MatrixMult group 00195 */
Generated on Tue Jul 12 2022 19:55:43 by
1.7.2
