Aded CMSIS5 DSP and NN folder. Needs some work
Embed:
(wiki syntax)
Show/hide line numbers
arm_mat_cmplx_mult_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_mat_cmplx_mult_f32.c 00004 * Description: Floating-point matrix multiplication 00005 * 00006 * $Date: 27. January 2017 00007 * $Revision: V.1.5.1 00008 * 00009 * Target Processor: Cortex-M cores 00010 * -------------------------------------------------------------------- */ 00011 /* 00012 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00013 * 00014 * SPDX-License-Identifier: Apache-2.0 00015 * 00016 * Licensed under the Apache License, Version 2.0 (the License); you may 00017 * not use this file except in compliance with the License. 00018 * You may obtain a copy of the License at 00019 * 00020 * www.apache.org/licenses/LICENSE-2.0 00021 * 00022 * Unless required by applicable law or agreed to in writing, software 00023 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00024 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00025 * See the License for the specific language governing permissions and 00026 * limitations under the License. 00027 */ 00028 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupMatrix 00033 */ 00034 00035 /** 00036 * @defgroup CmplxMatrixMult Complex Matrix Multiplication 00037 * 00038 * Complex Matrix multiplication is only defined if the number of columns of the 00039 * first matrix equals the number of rows of the second matrix. 00040 * Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results 00041 * in an <code>M x P</code> matrix. 00042 * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of 00043 * <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output 00044 * matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>. 00045 */ 00046 00047 00048 /** 00049 * @addtogroup CmplxMatrixMult 00050 * @{ 00051 */ 00052 00053 /** 00054 * @brief Floating-point Complex matrix multiplication. 00055 * @param[in] *pSrcA points to the first input complex matrix structure 00056 * @param[in] *pSrcB points to the second input complex matrix structure 00057 * @param[out] *pDst points to output complex matrix structure 00058 * @return The function returns either 00059 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00060 */ 00061 00062 arm_status arm_mat_cmplx_mult_f32( 00063 const arm_matrix_instance_f32 * pSrcA, 00064 const arm_matrix_instance_f32 * pSrcB, 00065 arm_matrix_instance_f32 * pDst) 00066 { 00067 float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00068 float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00069 float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ 00070 float32_t *pOut = pDst->pData; /* output data matrix pointer */ 00071 float32_t *px; /* Temporary output data matrix pointer */ 00072 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00073 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00074 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00075 float32_t sumReal1, sumImag1; /* accumulator */ 00076 float32_t a0, b0, c0, d0; 00077 float32_t a1, b1, c1, d1; 00078 float32_t sumReal2, sumImag2; /* accumulator */ 00079 00080 00081 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00082 00083 uint16_t col, i = 0U, j, row = numRowsA, colCnt; /* loop counters */ 00084 arm_status status; /* status of matrix multiplication */ 00085 00086 #ifdef ARM_MATH_MATRIX_CHECK 00087 00088 00089 /* Check for matrix mismatch condition */ 00090 if ((pSrcA->numCols != pSrcB->numRows) || 00091 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00092 { 00093 00094 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00095 status = ARM_MATH_SIZE_MISMATCH; 00096 } 00097 else 00098 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 00099 00100 { 00101 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00102 /* row loop */ 00103 do 00104 { 00105 /* Output pointer is set to starting address of the row being processed */ 00106 px = pOut + 2 * i; 00107 00108 /* For every row wise process, the column loop counter is to be initiated */ 00109 col = numColsB; 00110 00111 /* For every row wise process, the pIn2 pointer is set 00112 ** to the starting address of the pSrcB data */ 00113 pIn2 = pSrcB->pData; 00114 00115 j = 0U; 00116 00117 /* column loop */ 00118 do 00119 { 00120 /* Set the variable sum, that acts as accumulator, to zero */ 00121 sumReal1 = 0.0f; 00122 sumImag1 = 0.0f; 00123 00124 sumReal2 = 0.0f; 00125 sumImag2 = 0.0f; 00126 00127 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 00128 pIn1 = pInA; 00129 00130 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00131 colCnt = numColsA >> 2; 00132 00133 /* matrix multiplication */ 00134 while (colCnt > 0U) 00135 { 00136 00137 /* Reading real part of complex matrix A */ 00138 a0 = *pIn1; 00139 00140 /* Reading real part of complex matrix B */ 00141 c0 = *pIn2; 00142 00143 /* Reading imaginary part of complex matrix A */ 00144 b0 = *(pIn1 + 1U); 00145 00146 /* Reading imaginary part of complex matrix B */ 00147 d0 = *(pIn2 + 1U); 00148 00149 sumReal1 += a0 * c0; 00150 sumImag1 += b0 * c0; 00151 00152 pIn1 += 2U; 00153 pIn2 += 2 * numColsB; 00154 00155 sumReal2 -= b0 * d0; 00156 sumImag2 += a0 * d0; 00157 00158 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00159 00160 a1 = *pIn1; 00161 c1 = *pIn2; 00162 00163 b1 = *(pIn1 + 1U); 00164 d1 = *(pIn2 + 1U); 00165 00166 sumReal1 += a1 * c1; 00167 sumImag1 += b1 * c1; 00168 00169 pIn1 += 2U; 00170 pIn2 += 2 * numColsB; 00171 00172 sumReal2 -= b1 * d1; 00173 sumImag2 += a1 * d1; 00174 00175 a0 = *pIn1; 00176 c0 = *pIn2; 00177 00178 b0 = *(pIn1 + 1U); 00179 d0 = *(pIn2 + 1U); 00180 00181 sumReal1 += a0 * c0; 00182 sumImag1 += b0 * c0; 00183 00184 pIn1 += 2U; 00185 pIn2 += 2 * numColsB; 00186 00187 sumReal2 -= b0 * d0; 00188 sumImag2 += a0 * d0; 00189 00190 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00191 00192 a1 = *pIn1; 00193 c1 = *pIn2; 00194 00195 b1 = *(pIn1 + 1U); 00196 d1 = *(pIn2 + 1U); 00197 00198 sumReal1 += a1 * c1; 00199 sumImag1 += b1 * c1; 00200 00201 pIn1 += 2U; 00202 pIn2 += 2 * numColsB; 00203 00204 sumReal2 -= b1 * d1; 00205 sumImag2 += a1 * d1; 00206 00207 /* Decrement the loop count */ 00208 colCnt--; 00209 } 00210 00211 /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. 00212 ** No loop unrolling is used. */ 00213 colCnt = numColsA % 0x4U; 00214 00215 while (colCnt > 0U) 00216 { 00217 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00218 a1 = *pIn1; 00219 c1 = *pIn2; 00220 00221 b1 = *(pIn1 + 1U); 00222 d1 = *(pIn2 + 1U); 00223 00224 sumReal1 += a1 * c1; 00225 sumImag1 += b1 * c1; 00226 00227 pIn1 += 2U; 00228 pIn2 += 2 * numColsB; 00229 00230 sumReal2 -= b1 * d1; 00231 sumImag2 += a1 * d1; 00232 00233 /* Decrement the loop counter */ 00234 colCnt--; 00235 } 00236 00237 sumReal1 += sumReal2; 00238 sumImag1 += sumImag2; 00239 00240 /* Store the result in the destination buffer */ 00241 *px++ = sumReal1; 00242 *px++ = sumImag1; 00243 00244 /* Update the pointer pIn2 to point to the starting address of the next column */ 00245 j++; 00246 pIn2 = pSrcB->pData + 2U * j; 00247 00248 /* Decrement the column loop counter */ 00249 col--; 00250 00251 } while (col > 0U); 00252 00253 /* Update the pointer pInA to point to the starting address of the next row */ 00254 i = i + numColsB; 00255 pInA = pInA + 2 * numColsA; 00256 00257 /* Decrement the row loop counter */ 00258 row--; 00259 00260 } while (row > 0U); 00261 00262 /* Set status as ARM_MATH_SUCCESS */ 00263 status = ARM_MATH_SUCCESS; 00264 } 00265 00266 /* Return to application */ 00267 return (status); 00268 } 00269 00270 /** 00271 * @} end of MatrixMult group 00272 */ 00273
Generated on Tue Jul 12 2022 16:47:27 by 1.7.2