Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_mat_mult_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_mat_mult_f32.c 00004 * Description: Floating-point matrix multiplication 00005 * 00006 * $Date: 27. January 2017 00007 * $Revision: V.1.5.1 00008 * 00009 * Target Processor: Cortex-M cores 00010 * -------------------------------------------------------------------- */ 00011 /* 00012 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00013 * 00014 * SPDX-License-Identifier: Apache-2.0 00015 * 00016 * Licensed under the Apache License, Version 2.0 (the License); you may 00017 * not use this file except in compliance with the License. 00018 * You may obtain a copy of the License at 00019 * 00020 * www.apache.org/licenses/LICENSE-2.0 00021 * 00022 * Unless required by applicable law or agreed to in writing, software 00023 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00024 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00025 * See the License for the specific language governing permissions and 00026 * limitations under the License. 00027 */ 00028 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupMatrix 00033 */ 00034 00035 /** 00036 * @defgroup MatrixMult Matrix Multiplication 00037 * 00038 * Multiplies two matrices. 00039 * 00040 * \image html MatrixMultiplication.gif "Multiplication of two 3 x 3 matrices" 00041 00042 * Matrix multiplication is only defined if the number of columns of the 00043 * first matrix equals the number of rows of the second matrix. 00044 * Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results 00045 * in an <code>M x P</code> matrix. 00046 * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of 00047 * <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output 00048 * matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>. 00049 */ 00050 00051 00052 /** 00053 * @addtogroup MatrixMult 00054 * @{ 00055 */ 00056 00057 /** 00058 * @brief Floating-point matrix multiplication. 00059 * @param[in] *pSrcA points to the first input matrix structure 00060 * @param[in] *pSrcB points to the second input matrix structure 00061 * @param[out] *pDst points to output matrix structure 00062 * @return The function returns either 00063 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00064 */ 00065 00066 arm_status arm_mat_mult_f32( 00067 const arm_matrix_instance_f32 * pSrcA, 00068 const arm_matrix_instance_f32 * pSrcB, 00069 arm_matrix_instance_f32 * pDst) 00070 { 00071 float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00072 float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00073 float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ 00074 float32_t *pOut = pDst->pData; /* output data matrix pointer */ 00075 float32_t *px; /* Temporary output data matrix pointer */ 00076 float32_t sum; /* Accumulator */ 00077 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00078 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00079 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00080 00081 #if defined (ARM_MATH_DSP) 00082 00083 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00084 00085 float32_t in1, in2, in3, in4; 00086 uint16_t col, i = 0U, j, row = numRowsA, colCnt; /* loop counters */ 00087 arm_status status; /* status of matrix multiplication */ 00088 00089 #ifdef ARM_MATH_MATRIX_CHECK 00090 00091 00092 /* Check for matrix mismatch condition */ 00093 if ((pSrcA->numCols != pSrcB->numRows) || 00094 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00095 { 00096 00097 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00098 status = ARM_MATH_SIZE_MISMATCH; 00099 } 00100 else 00101 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 00102 00103 { 00104 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00105 /* row loop */ 00106 do 00107 { 00108 /* Output pointer is set to starting address of the row being processed */ 00109 px = pOut + i; 00110 00111 /* For every row wise process, the column loop counter is to be initiated */ 00112 col = numColsB; 00113 00114 /* For every row wise process, the pIn2 pointer is set 00115 ** to the starting address of the pSrcB data */ 00116 pIn2 = pSrcB->pData; 00117 00118 j = 0U; 00119 00120 /* column loop */ 00121 do 00122 { 00123 /* Set the variable sum, that acts as accumulator, to zero */ 00124 sum = 0.0f; 00125 00126 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 00127 pIn1 = pInA; 00128 00129 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00130 colCnt = numColsA >> 2U; 00131 00132 /* matrix multiplication */ 00133 while (colCnt > 0U) 00134 { 00135 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00136 in3 = *pIn2; 00137 pIn2 += numColsB; 00138 in1 = pIn1[0]; 00139 in2 = pIn1[1]; 00140 sum += in1 * in3; 00141 in4 = *pIn2; 00142 pIn2 += numColsB; 00143 sum += in2 * in4; 00144 00145 in3 = *pIn2; 00146 pIn2 += numColsB; 00147 in1 = pIn1[2]; 00148 in2 = pIn1[3]; 00149 sum += in1 * in3; 00150 in4 = *pIn2; 00151 pIn2 += numColsB; 00152 sum += in2 * in4; 00153 pIn1 += 4U; 00154 00155 /* Decrement the loop count */ 00156 colCnt--; 00157 } 00158 00159 /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. 00160 ** No loop unrolling is used. */ 00161 colCnt = numColsA % 0x4U; 00162 00163 while (colCnt > 0U) 00164 { 00165 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00166 sum += *pIn1++ * (*pIn2); 00167 pIn2 += numColsB; 00168 00169 /* Decrement the loop counter */ 00170 colCnt--; 00171 } 00172 00173 /* Store the result in the destination buffer */ 00174 *px++ = sum; 00175 00176 /* Update the pointer pIn2 to point to the starting address of the next column */ 00177 j++; 00178 pIn2 = pSrcB->pData + j; 00179 00180 /* Decrement the column loop counter */ 00181 col--; 00182 00183 } while (col > 0U); 00184 00185 #else 00186 00187 /* Run the below code for Cortex-M0 */ 00188 00189 float32_t *pInB = pSrcB->pData; /* input data matrix pointer B */ 00190 uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */ 00191 arm_status status; /* status of matrix multiplication */ 00192 00193 #ifdef ARM_MATH_MATRIX_CHECK 00194 00195 /* Check for matrix mismatch condition */ 00196 if ((pSrcA->numCols != pSrcB->numRows) || 00197 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00198 { 00199 00200 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00201 status = ARM_MATH_SIZE_MISMATCH; 00202 } 00203 else 00204 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 00205 00206 { 00207 /* The following loop performs the dot-product of each row in pInA with each column in pInB */ 00208 /* row loop */ 00209 do 00210 { 00211 /* Output pointer is set to starting address of the row being processed */ 00212 px = pOut + i; 00213 00214 /* For every row wise process, the column loop counter is to be initiated */ 00215 col = numColsB; 00216 00217 /* For every row wise process, the pIn2 pointer is set 00218 ** to the starting address of the pSrcB data */ 00219 pIn2 = pSrcB->pData; 00220 00221 /* column loop */ 00222 do 00223 { 00224 /* Set the variable sum, that acts as accumulator, to zero */ 00225 sum = 0.0f; 00226 00227 /* Initialize the pointer pIn1 to point to the starting address of the row being processed */ 00228 pIn1 = pInA; 00229 00230 /* Matrix A columns number of MAC operations are to be performed */ 00231 colCnt = numColsA; 00232 00233 while (colCnt > 0U) 00234 { 00235 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00236 sum += *pIn1++ * (*pIn2); 00237 pIn2 += numColsB; 00238 00239 /* Decrement the loop counter */ 00240 colCnt--; 00241 } 00242 00243 /* Store the result in the destination buffer */ 00244 *px++ = sum; 00245 00246 /* Decrement the column loop counter */ 00247 col--; 00248 00249 /* Update the pointer pIn2 to point to the starting address of the next column */ 00250 pIn2 = pInB + (numColsB - col); 00251 00252 } while (col > 0U); 00253 00254 #endif /* #if defined (ARM_MATH_DSP) */ 00255 00256 /* Update the pointer pInA to point to the starting address of the next row */ 00257 i = i + numColsB; 00258 pInA = pInA + numColsA; 00259 00260 /* Decrement the row loop counter */ 00261 row--; 00262 00263 } while (row > 0U); 00264 /* Set status as ARM_MATH_SUCCESS */ 00265 status = ARM_MATH_SUCCESS; 00266 } 00267 00268 /* Return to application */ 00269 return (status); 00270 } 00271 00272 /** 00273 * @} end of MatrixMult group 00274 */ 00275
Generated on Tue Jul 12 2022 16:47:27 by
1.7.2