Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of OmniWheels by
arm_mat_cmplx_mult_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_mat_cmplx_mult_f32.c 00009 * 00010 * Description: Floating-point matrix multiplication. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 #include "arm_math.h" 00041 00042 /** 00043 * @ingroup groupMatrix 00044 */ 00045 00046 /** 00047 * @defgroup CmplxMatrixMult Complex Matrix Multiplication 00048 * 00049 * Complex Matrix multiplication is only defined if the number of columns of the 00050 * first matrix equals the number of rows of the second matrix. 00051 * Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results 00052 * in an <code>M x P</code> matrix. 00053 * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of 00054 * <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output 00055 * matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>. 00056 */ 00057 00058 00059 /** 00060 * @addtogroup CmplxMatrixMult 00061 * @{ 00062 */ 00063 00064 /** 00065 * @brief Floating-point Complex matrix multiplication. 00066 * @param[in] *pSrcA points to the first input complex matrix structure 00067 * @param[in] *pSrcB points to the second input complex matrix structure 00068 * @param[out] *pDst points to output complex matrix structure 00069 * @return The function returns either 00070 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00071 */ 00072 00073 arm_status arm_mat_cmplx_mult_f32( 00074 const arm_matrix_instance_f32 * pSrcA, 00075 const arm_matrix_instance_f32 * pSrcB, 00076 arm_matrix_instance_f32 * pDst) 00077 { 00078 float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00079 float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00080 float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ 00081 float32_t *pOut = pDst->pData; /* output data matrix pointer */ 00082 float32_t *px; /* Temporary output data matrix pointer */ 00083 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00084 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00085 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00086 float32_t sumReal1, sumImag1; /* accumulator */ 00087 float32_t a0, b0, c0, d0; 00088 float32_t a1, b1, c1, d1; 00089 float32_t sumReal2, sumImag2; /* accumulator */ 00090 00091 00092 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00093 00094 uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */ 00095 arm_status status; /* status of matrix multiplication */ 00096 00097 #ifdef ARM_MATH_MATRIX_CHECK 00098 00099 00100 /* Check for matrix mismatch condition */ 00101 if((pSrcA->numCols != pSrcB->numRows) || 00102 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00103 { 00104 00105 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00106 status = ARM_MATH_SIZE_MISMATCH; 00107 } 00108 else 00109 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 00110 00111 { 00112 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00113 /* row loop */ 00114 do 00115 { 00116 /* Output pointer is set to starting address of the row being processed */ 00117 px = pOut + 2 * i; 00118 00119 /* For every row wise process, the column loop counter is to be initiated */ 00120 col = numColsB; 00121 00122 /* For every row wise process, the pIn2 pointer is set 00123 ** to the starting address of the pSrcB data */ 00124 pIn2 = pSrcB->pData; 00125 00126 j = 0u; 00127 00128 /* column loop */ 00129 do 00130 { 00131 /* Set the variable sum, that acts as accumulator, to zero */ 00132 sumReal1 = 0.0f; 00133 sumImag1 = 0.0f; 00134 00135 sumReal2 = 0.0f; 00136 sumImag2 = 0.0f; 00137 00138 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 00139 pIn1 = pInA; 00140 00141 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00142 colCnt = numColsA >> 2; 00143 00144 /* matrix multiplication */ 00145 while(colCnt > 0u) 00146 { 00147 00148 /* Reading real part of complex matrix A */ 00149 a0 = *pIn1; 00150 00151 /* Reading real part of complex matrix B */ 00152 c0 = *pIn2; 00153 00154 /* Reading imaginary part of complex matrix A */ 00155 b0 = *(pIn1 + 1u); 00156 00157 /* Reading imaginary part of complex matrix B */ 00158 d0 = *(pIn2 + 1u); 00159 00160 sumReal1 += a0 * c0; 00161 sumImag1 += b0 * c0; 00162 00163 pIn1 += 2u; 00164 pIn2 += 2 * numColsB; 00165 00166 sumReal2 -= b0 * d0; 00167 sumImag2 += a0 * d0; 00168 00169 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00170 00171 a1 = *pIn1; 00172 c1 = *pIn2; 00173 00174 b1 = *(pIn1 + 1u); 00175 d1 = *(pIn2 + 1u); 00176 00177 sumReal1 += a1 * c1; 00178 sumImag1 += b1 * c1; 00179 00180 pIn1 += 2u; 00181 pIn2 += 2 * numColsB; 00182 00183 sumReal2 -= b1 * d1; 00184 sumImag2 += a1 * d1; 00185 00186 a0 = *pIn1; 00187 c0 = *pIn2; 00188 00189 b0 = *(pIn1 + 1u); 00190 d0 = *(pIn2 + 1u); 00191 00192 sumReal1 += a0 * c0; 00193 sumImag1 += b0 * c0; 00194 00195 pIn1 += 2u; 00196 pIn2 += 2 * numColsB; 00197 00198 sumReal2 -= b0 * d0; 00199 sumImag2 += a0 * d0; 00200 00201 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00202 00203 a1 = *pIn1; 00204 c1 = *pIn2; 00205 00206 b1 = *(pIn1 + 1u); 00207 d1 = *(pIn2 + 1u); 00208 00209 sumReal1 += a1 * c1; 00210 sumImag1 += b1 * c1; 00211 00212 pIn1 += 2u; 00213 pIn2 += 2 * numColsB; 00214 00215 sumReal2 -= b1 * d1; 00216 sumImag2 += a1 * d1; 00217 00218 /* Decrement the loop count */ 00219 colCnt--; 00220 } 00221 00222 /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. 00223 ** No loop unrolling is used. */ 00224 colCnt = numColsA % 0x4u; 00225 00226 while(colCnt > 0u) 00227 { 00228 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00229 a1 = *pIn1; 00230 c1 = *pIn2; 00231 00232 b1 = *(pIn1 + 1u); 00233 d1 = *(pIn2 + 1u); 00234 00235 sumReal1 += a1 * c1; 00236 sumImag1 += b1 * c1; 00237 00238 pIn1 += 2u; 00239 pIn2 += 2 * numColsB; 00240 00241 sumReal2 -= b1 * d1; 00242 sumImag2 += a1 * d1; 00243 00244 /* Decrement the loop counter */ 00245 colCnt--; 00246 } 00247 00248 sumReal1 += sumReal2; 00249 sumImag1 += sumImag2; 00250 00251 /* Store the result in the destination buffer */ 00252 *px++ = sumReal1; 00253 *px++ = sumImag1; 00254 00255 /* Update the pointer pIn2 to point to the starting address of the next column */ 00256 j++; 00257 pIn2 = pSrcB->pData + 2u * j; 00258 00259 /* Decrement the column loop counter */ 00260 col--; 00261 00262 } while(col > 0u); 00263 00264 /* Update the pointer pInA to point to the starting address of the next row */ 00265 i = i + numColsB; 00266 pInA = pInA + 2 * numColsA; 00267 00268 /* Decrement the row loop counter */ 00269 row--; 00270 00271 } while(row > 0u); 00272 00273 /* Set status as ARM_MATH_SUCCESS */ 00274 status = ARM_MATH_SUCCESS; 00275 } 00276 00277 /* Return to application */ 00278 return (status); 00279 } 00280 00281 /** 00282 * @} end of MatrixMult group 00283 */
Generated on Fri Jul 22 2022 04:53:44 by
1.7.2
