Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_mat_mult_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_mat_mult_f32.c 00009 * 00010 * Description: Floating-point matrix multiplication. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupMatrix 00045 */ 00046 00047 /** 00048 * @defgroup MatrixMult Matrix Multiplication 00049 * 00050 * Multiplies two matrices. 00051 * 00052 * \image html MatrixMultiplication.gif "Multiplication of two 3 x 3 matrices" 00053 00054 * Matrix multiplication is only defined if the number of columns of the 00055 * first matrix equals the number of rows of the second matrix. 00056 * Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results 00057 * in an <code>M x P</code> matrix. 00058 * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of 00059 * <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output 00060 * matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>. 00061 */ 00062 00063 00064 /** 00065 * @addtogroup MatrixMult 00066 * @{ 00067 */ 00068 00069 /** 00070 * @brief Floating-point matrix multiplication. 00071 * @param[in] *pSrcA points to the first input matrix structure 00072 * @param[in] *pSrcB points to the second input matrix structure 00073 * @param[out] *pDst points to output matrix structure 00074 * @return The function returns either 00075 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00076 */ 00077 00078 arm_status arm_mat_mult_f32( 00079 const arm_matrix_instance_f32 * pSrcA, 00080 const arm_matrix_instance_f32 * pSrcB, 00081 arm_matrix_instance_f32 * pDst) 00082 { 00083 float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00084 float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00085 float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ 00086 float32_t *pOut = pDst->pData; /* output data matrix pointer */ 00087 float32_t *px; /* Temporary output data matrix pointer */ 00088 float32_t sum; /* Accumulator */ 00089 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00090 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00091 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00092 00093 #ifndef ARM_MATH_CM0_FAMILY 00094 00095 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00096 00097 float32_t in1, in2, in3, in4; 00098 uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */ 00099 arm_status status; /* status of matrix multiplication */ 00100 00101 #ifdef ARM_MATH_MATRIX_CHECK 00102 00103 00104 /* Check for matrix mismatch condition */ 00105 if((pSrcA->numCols != pSrcB->numRows) || 00106 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00107 { 00108 00109 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00110 status = ARM_MATH_SIZE_MISMATCH; 00111 } 00112 else 00113 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 00114 00115 { 00116 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00117 /* row loop */ 00118 do 00119 { 00120 /* Output pointer is set to starting address of the row being processed */ 00121 px = pOut + i; 00122 00123 /* For every row wise process, the column loop counter is to be initiated */ 00124 col = numColsB; 00125 00126 /* For every row wise process, the pIn2 pointer is set 00127 ** to the starting address of the pSrcB data */ 00128 pIn2 = pSrcB->pData; 00129 00130 j = 0u; 00131 00132 /* column loop */ 00133 do 00134 { 00135 /* Set the variable sum, that acts as accumulator, to zero */ 00136 sum = 0.0f; 00137 00138 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 00139 pIn1 = pInA; 00140 00141 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00142 colCnt = numColsA >> 2u; 00143 00144 /* matrix multiplication */ 00145 while(colCnt > 0u) 00146 { 00147 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00148 in3 = *pIn2; 00149 pIn2 += numColsB; 00150 in1 = pIn1[0]; 00151 in2 = pIn1[1]; 00152 sum += in1 * in3; 00153 in4 = *pIn2; 00154 pIn2 += numColsB; 00155 sum += in2 * in4; 00156 00157 in3 = *pIn2; 00158 pIn2 += numColsB; 00159 in1 = pIn1[2]; 00160 in2 = pIn1[3]; 00161 sum += in1 * in3; 00162 in4 = *pIn2; 00163 pIn2 += numColsB; 00164 sum += in2 * in4; 00165 pIn1 += 4u; 00166 00167 /* Decrement the loop count */ 00168 colCnt--; 00169 } 00170 00171 /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. 00172 ** No loop unrolling is used. */ 00173 colCnt = numColsA % 0x4u; 00174 00175 while(colCnt > 0u) 00176 { 00177 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00178 sum += *pIn1++ * (*pIn2); 00179 pIn2 += numColsB; 00180 00181 /* Decrement the loop counter */ 00182 colCnt--; 00183 } 00184 00185 /* Store the result in the destination buffer */ 00186 *px++ = sum; 00187 00188 /* Update the pointer pIn2 to point to the starting address of the next column */ 00189 j++; 00190 pIn2 = pSrcB->pData + j; 00191 00192 /* Decrement the column loop counter */ 00193 col--; 00194 00195 } while(col > 0u); 00196 00197 #else 00198 00199 /* Run the below code for Cortex-M0 */ 00200 00201 float32_t *pInB = pSrcB->pData; /* input data matrix pointer B */ 00202 uint16_t col, i = 0u, row = numRowsA, colCnt; /* loop counters */ 00203 arm_status status; /* status of matrix multiplication */ 00204 00205 #ifdef ARM_MATH_MATRIX_CHECK 00206 00207 /* Check for matrix mismatch condition */ 00208 if((pSrcA->numCols != pSrcB->numRows) || 00209 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00210 { 00211 00212 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00213 status = ARM_MATH_SIZE_MISMATCH; 00214 } 00215 else 00216 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 00217 00218 { 00219 /* The following loop performs the dot-product of each row in pInA with each column in pInB */ 00220 /* row loop */ 00221 do 00222 { 00223 /* Output pointer is set to starting address of the row being processed */ 00224 px = pOut + i; 00225 00226 /* For every row wise process, the column loop counter is to be initiated */ 00227 col = numColsB; 00228 00229 /* For every row wise process, the pIn2 pointer is set 00230 ** to the starting address of the pSrcB data */ 00231 pIn2 = pSrcB->pData; 00232 00233 /* column loop */ 00234 do 00235 { 00236 /* Set the variable sum, that acts as accumulator, to zero */ 00237 sum = 0.0f; 00238 00239 /* Initialize the pointer pIn1 to point to the starting address of the row being processed */ 00240 pIn1 = pInA; 00241 00242 /* Matrix A columns number of MAC operations are to be performed */ 00243 colCnt = numColsA; 00244 00245 while(colCnt > 0u) 00246 { 00247 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00248 sum += *pIn1++ * (*pIn2); 00249 pIn2 += numColsB; 00250 00251 /* Decrement the loop counter */ 00252 colCnt--; 00253 } 00254 00255 /* Store the result in the destination buffer */ 00256 *px++ = sum; 00257 00258 /* Decrement the column loop counter */ 00259 col--; 00260 00261 /* Update the pointer pIn2 to point to the starting address of the next column */ 00262 pIn2 = pInB + (numColsB - col); 00263 00264 } while(col > 0u); 00265 00266 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00267 00268 /* Update the pointer pInA to point to the starting address of the next row */ 00269 i = i + numColsB; 00270 pInA = pInA + numColsA; 00271 00272 /* Decrement the row loop counter */ 00273 row--; 00274 00275 } while(row > 0u); 00276 /* Set status as ARM_MATH_SUCCESS */ 00277 status = ARM_MATH_SUCCESS; 00278 } 00279 00280 /* Return to application */ 00281 return (status); 00282 } 00283 00284 /** 00285 * @} end of MatrixMult group 00286 */
Generated on Tue Jul 12 2022 13:15:25 by
