Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_mat_mult_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_mat_mult_f32.c 00009 * 00010 * Description: Floating-point matrix multiplication. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 0.0.5 2010/04/26 00027 * incorporated review comments and updated with latest CMSIS layer 00028 * 00029 * Version 0.0.3 2010/03/10 00030 * Initial version 00031 * -------------------------------------------------------------------- */ 00032 00033 #include "arm_math.h" 00034 00035 /** 00036 * @ingroup groupMatrix 00037 */ 00038 00039 /** 00040 * @defgroup MatrixMult Matrix Multiplication 00041 * 00042 * Multiplies two matrices. 00043 * 00044 * \image html MatrixMultiplication.gif "Multiplication of two 3 x 3 matrices" 00045 00046 * Matrix multiplication is only defined if the number of columns of the 00047 * first matrix equals the number of rows of the second matrix. 00048 * Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results 00049 * in an <code>M x P</code> matrix. 00050 * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of 00051 * <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output 00052 * matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>. 00053 */ 00054 00055 00056 /** 00057 * @addtogroup MatrixMult 00058 * @{ 00059 */ 00060 00061 /** 00062 * @brief Floating-point matrix multiplication. 00063 * @param[in] *pSrcA points to the first input matrix structure 00064 * @param[in] *pSrcB points to the second input matrix structure 00065 * @param[out] *pDst points to output matrix structure 00066 * @return The function returns either 00067 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00068 */ 00069 00070 arm_status arm_mat_mult_f32( 00071 const arm_matrix_instance_f32 * pSrcA, 00072 const arm_matrix_instance_f32 * pSrcB, 00073 arm_matrix_instance_f32 * pDst) 00074 { 00075 float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00076 float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00077 float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ 00078 // float32_t *pSrcB = pSrcB->pData; /* input data matrix pointer B */ 00079 float32_t *pOut = pDst->pData; /* output data matrix pointer */ 00080 float32_t *px; /* Temporary output data matrix pointer */ 00081 float32_t sum; /* Accumulator */ 00082 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00083 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00084 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00085 uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */ 00086 arm_status status; /* status of matrix multiplication */ 00087 00088 #ifdef ARM_MATH_MATRIX_CHECK 00089 /* Check for matrix mismatch condition */ 00090 if((pSrcA->numCols != pSrcB->numRows) || 00091 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00092 { 00093 00094 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00095 status = ARM_MATH_SIZE_MISMATCH; 00096 } 00097 else 00098 #endif 00099 { 00100 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00101 /* row loop */ 00102 do 00103 { 00104 /* Output pointer is set to starting address of the row being processed */ 00105 px = pOut + i; 00106 00107 /* For every row wise process, the column loop counter is to be initiated */ 00108 col = numColsB; 00109 00110 /* For every row wise process, the pIn2 pointer is set 00111 ** to the starting address of the pSrcB data */ 00112 pIn2 = pSrcB->pData; 00113 00114 j = 0u; 00115 00116 /* column loop */ 00117 do 00118 { 00119 /* Set the variable sum, that acts as accumulator, to zero */ 00120 sum = 0.0f; 00121 00122 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 00123 pIn1 = pInA; 00124 00125 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00126 colCnt = numColsA >> 2; 00127 00128 /* matrix multiplication */ 00129 while(colCnt > 0u) 00130 { 00131 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00132 sum += *pIn1++ * (*pIn2); 00133 pIn2 += numColsB; 00134 sum += *pIn1++ * (*pIn2); 00135 pIn2 += numColsB; 00136 sum += *pIn1++ * (*pIn2); 00137 pIn2 += numColsB; 00138 sum += *pIn1++ * (*pIn2); 00139 pIn2 += numColsB; 00140 00141 /* Decrement the loop count */ 00142 colCnt--; 00143 } 00144 00145 /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. 00146 ** No loop unrolling is used. */ 00147 colCnt = numColsA % 0x4u; 00148 00149 while(colCnt > 0u) 00150 { 00151 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00152 sum += *pIn1++ * (*pIn2); 00153 pIn2 += numColsB; 00154 00155 /* Decrement the loop counter */ 00156 colCnt--; 00157 } 00158 00159 /* Store the result in the destination buffer */ 00160 *px++ = sum; 00161 00162 /* Update the pointer pIn2 to point to the starting address of the next column */ 00163 j++; 00164 pIn2 = pSrcB->pData + j; 00165 00166 /* Decrement the column loop counter */ 00167 col--; 00168 00169 } while(col > 0u); 00170 00171 /* Update the pointer pInA to point to the starting address of the next row */ 00172 i = i + numColsB; 00173 pInA = pInA + numColsA; 00174 00175 /* Decrement the row loop counter */ 00176 row--; 00177 00178 } while(row > 0u); 00179 00180 /* Set status as ARM_MATH_SUCCESS */ 00181 status = ARM_MATH_SUCCESS; 00182 } 00183 00184 /* Return to application */ 00185 return (status); 00186 } 00187 00188 /** 00189 * @} end of MatrixMult group 00190 */
Generated on Tue Jul 12 2022 19:55:43 by
1.7.2
