Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_mat_cmplx_mult_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_mat_cmplx_mult_q31.c 00009 * 00010 * Description: Floating-point matrix multiplication. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 #include "arm_math.h" 00041 00042 /** 00043 * @ingroup groupMatrix 00044 */ 00045 00046 /** 00047 * @addtogroup CmplxMatrixMult 00048 * @{ 00049 */ 00050 00051 /** 00052 * @brief Q31 Complex matrix multiplication 00053 * @param[in] *pSrcA points to the first input complex matrix structure 00054 * @param[in] *pSrcB points to the second input complex matrix structure 00055 * @param[out] *pDst points to output complex matrix structure 00056 * @return The function returns either 00057 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00058 * 00059 * @details 00060 * <b>Scaling and Overflow Behavior:</b> 00061 * 00062 * \par 00063 * The function is implemented using an internal 64-bit accumulator. 00064 * The accumulator has a 2.62 format and maintains full precision of the intermediate 00065 * multiplication results but provides only a single guard bit. There is no saturation 00066 * on intermediate additions. Thus, if the accumulator overflows it wraps around and 00067 * distorts the result. The input signals should be scaled down to avoid intermediate 00068 * overflows. The input is thus scaled down by log2(numColsA) bits 00069 * to avoid overflows, as a total of numColsA additions are performed internally. 00070 * The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result. 00071 * 00072 * 00073 */ 00074 00075 arm_status arm_mat_cmplx_mult_q31( 00076 const arm_matrix_instance_q31 * pSrcA, 00077 const arm_matrix_instance_q31 * pSrcB, 00078 arm_matrix_instance_q31 * pDst) 00079 { 00080 q31_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00081 q31_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00082 q31_t *pInA = pSrcA->pData; /* input data matrix pointer A */ 00083 q31_t *pOut = pDst->pData; /* output data matrix pointer */ 00084 q31_t *px; /* Temporary output data matrix pointer */ 00085 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00086 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00087 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00088 q63_t sumReal1, sumImag1; /* accumulator */ 00089 q31_t a0, b0, c0, d0; 00090 q31_t a1, b1, c1, d1; 00091 00092 00093 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00094 00095 uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */ 00096 arm_status status; /* status of matrix multiplication */ 00097 00098 #ifdef ARM_MATH_MATRIX_CHECK 00099 00100 00101 /* Check for matrix mismatch condition */ 00102 if((pSrcA->numCols != pSrcB->numRows) || 00103 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00104 { 00105 00106 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00107 status = ARM_MATH_SIZE_MISMATCH; 00108 } 00109 else 00110 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 00111 00112 { 00113 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00114 /* row loop */ 00115 do 00116 { 00117 /* Output pointer is set to starting address of the row being processed */ 00118 px = pOut + 2 * i; 00119 00120 /* For every row wise process, the column loop counter is to be initiated */ 00121 col = numColsB; 00122 00123 /* For every row wise process, the pIn2 pointer is set 00124 ** to the starting address of the pSrcB data */ 00125 pIn2 = pSrcB->pData; 00126 00127 j = 0u; 00128 00129 /* column loop */ 00130 do 00131 { 00132 /* Set the variable sum, that acts as accumulator, to zero */ 00133 sumReal1 = 0.0; 00134 sumImag1 = 0.0; 00135 00136 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 00137 pIn1 = pInA; 00138 00139 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00140 colCnt = numColsA >> 2; 00141 00142 /* matrix multiplication */ 00143 while(colCnt > 0u) 00144 { 00145 00146 /* Reading real part of complex matrix A */ 00147 a0 = *pIn1; 00148 00149 /* Reading real part of complex matrix B */ 00150 c0 = *pIn2; 00151 00152 /* Reading imaginary part of complex matrix A */ 00153 b0 = *(pIn1 + 1u); 00154 00155 /* Reading imaginary part of complex matrix B */ 00156 d0 = *(pIn2 + 1u); 00157 00158 /* Multiply and Accumlates */ 00159 sumReal1 += (q63_t) a0 *c0; 00160 sumImag1 += (q63_t) b0 *c0; 00161 00162 /* update pointers */ 00163 pIn1 += 2u; 00164 pIn2 += 2 * numColsB; 00165 00166 /* Multiply and Accumlates */ 00167 sumReal1 -= (q63_t) b0 *d0; 00168 sumImag1 += (q63_t) a0 *d0; 00169 00170 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00171 00172 /* read real and imag values from pSrcA and pSrcB buffer */ 00173 a1 = *pIn1; 00174 c1 = *pIn2; 00175 b1 = *(pIn1 + 1u); 00176 d1 = *(pIn2 + 1u); 00177 00178 /* Multiply and Accumlates */ 00179 sumReal1 += (q63_t) a1 *c1; 00180 sumImag1 += (q63_t) b1 *c1; 00181 00182 /* update pointers */ 00183 pIn1 += 2u; 00184 pIn2 += 2 * numColsB; 00185 00186 /* Multiply and Accumlates */ 00187 sumReal1 -= (q63_t) b1 *d1; 00188 sumImag1 += (q63_t) a1 *d1; 00189 00190 a0 = *pIn1; 00191 c0 = *pIn2; 00192 00193 b0 = *(pIn1 + 1u); 00194 d0 = *(pIn2 + 1u); 00195 00196 /* Multiply and Accumlates */ 00197 sumReal1 += (q63_t) a0 *c0; 00198 sumImag1 += (q63_t) b0 *c0; 00199 00200 /* update pointers */ 00201 pIn1 += 2u; 00202 pIn2 += 2 * numColsB; 00203 00204 /* Multiply and Accumlates */ 00205 sumReal1 -= (q63_t) b0 *d0; 00206 sumImag1 += (q63_t) a0 *d0; 00207 00208 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00209 00210 a1 = *pIn1; 00211 c1 = *pIn2; 00212 00213 b1 = *(pIn1 + 1u); 00214 d1 = *(pIn2 + 1u); 00215 00216 /* Multiply and Accumlates */ 00217 sumReal1 += (q63_t) a1 *c1; 00218 sumImag1 += (q63_t) b1 *c1; 00219 00220 /* update pointers */ 00221 pIn1 += 2u; 00222 pIn2 += 2 * numColsB; 00223 00224 /* Multiply and Accumlates */ 00225 sumReal1 -= (q63_t) b1 *d1; 00226 sumImag1 += (q63_t) a1 *d1; 00227 00228 /* Decrement the loop count */ 00229 colCnt--; 00230 } 00231 00232 /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. 00233 ** No loop unrolling is used. */ 00234 colCnt = numColsA % 0x4u; 00235 00236 while(colCnt > 0u) 00237 { 00238 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00239 a1 = *pIn1; 00240 c1 = *pIn2; 00241 00242 b1 = *(pIn1 + 1u); 00243 d1 = *(pIn2 + 1u); 00244 00245 /* Multiply and Accumlates */ 00246 sumReal1 += (q63_t) a1 *c1; 00247 sumImag1 += (q63_t) b1 *c1; 00248 00249 /* update pointers */ 00250 pIn1 += 2u; 00251 pIn2 += 2 * numColsB; 00252 00253 /* Multiply and Accumlates */ 00254 sumReal1 -= (q63_t) b1 *d1; 00255 sumImag1 += (q63_t) a1 *d1; 00256 00257 /* Decrement the loop counter */ 00258 colCnt--; 00259 } 00260 00261 /* Store the result in the destination buffer */ 00262 *px++ = (q31_t) clip_q63_to_q31(sumReal1 >> 31); 00263 *px++ = (q31_t) clip_q63_to_q31(sumImag1 >> 31); 00264 00265 /* Update the pointer pIn2 to point to the starting address of the next column */ 00266 j++; 00267 pIn2 = pSrcB->pData + 2u * j; 00268 00269 /* Decrement the column loop counter */ 00270 col--; 00271 00272 } while(col > 0u); 00273 00274 /* Update the pointer pInA to point to the starting address of the next row */ 00275 i = i + numColsB; 00276 pInA = pInA + 2 * numColsA; 00277 00278 /* Decrement the row loop counter */ 00279 row--; 00280 00281 } while(row > 0u); 00282 00283 /* Set status as ARM_MATH_SUCCESS */ 00284 status = ARM_MATH_SUCCESS; 00285 } 00286 00287 /* Return to application */ 00288 return (status); 00289 } 00290 00291 /** 00292 * @} end of MatrixMult group 00293 */
Generated on Tue Jul 12 2022 13:15:25 by
