Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of dsp by
arm_mat_mult_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_mat_mult_q15.c 00009 * 00010 * Description: Q15 matrix multiplication. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 0.0.5 2010/04/26 00027 * incorporated review comments and updated with latest CMSIS layer 00028 * 00029 * Version 0.0.3 2010/03/10 00030 * Initial version 00031 * -------------------------------------------------------------------- */ 00032 00033 #include "arm_math.h" 00034 00035 /** 00036 * @ingroup groupMatrix 00037 */ 00038 00039 /** 00040 * @addtogroup MatrixMult 00041 * @{ 00042 */ 00043 00044 00045 /** 00046 * @brief Q15 matrix multiplication 00047 * @param[in] *pSrcA points to the first input matrix structure 00048 * @param[in] *pSrcB points to the second input matrix structure 00049 * @param[out] *pDst points to output matrix structure 00050 * @param[in] *pState points to the array for storing intermediate results 00051 * @return The function returns either 00052 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00053 * 00054 * @details 00055 * <b>Scaling and Overflow Behavior:</b> 00056 * 00057 * \par 00058 * The function is implemented using a 64-bit internal accumulator. The inputs to the 00059 * multiplications are in 1.15 format and multiplications yield a 2.30 result. 00060 * The 2.30 intermediate 00061 * results are accumulated in a 64-bit accumulator in 34.30 format. This approach 00062 * provides 33 guard bits and there is no risk of overflow. The 34.30 result is then 00063 * truncated to 34.15 format by discarding the low 15 bits and then saturated to 00064 * 1.15 format. 00065 * 00066 * \par 00067 * Refer to <code>arm_mat_mult_fast_q15()</code> for a faster but less precise version of this function. 00068 * 00069 */ 00070 00071 arm_status arm_mat_mult_q15( 00072 const arm_matrix_instance_q15 * pSrcA, 00073 const arm_matrix_instance_q15 * pSrcB, 00074 arm_matrix_instance_q15 * pDst, 00075 q15_t * pState) 00076 { 00077 q63_t sum; /* accumulator */ 00078 q31_t in; /* Temporary variable to hold the input value */ 00079 q15_t *pSrcBT = pState; /* input data matrix pointer for transpose */ 00080 q15_t *pInA = pSrcA->pData; /* input data matrix pointer A of Q15 type */ 00081 q15_t *pInB = pSrcB->pData; /* input data matrix pointer B of Q15 type */ 00082 // q15_t *pDst = pDst->pData; /* output data matrix pointer */ 00083 q15_t *px; /* Temporary output data matrix pointer */ 00084 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00085 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00086 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00087 uint16_t numRowsB = pSrcB->numRows; /* number of rows of input matrix A */ 00088 uint16_t col, i = 0u, row = numRowsB, colCnt; /* loop counters */ 00089 arm_status status; /* status of matrix multiplication */ 00090 00091 #ifdef ARM_MATH_MATRIX_CHECK 00092 /* Check for matrix mismatch condition */ 00093 if((pSrcA->numCols != pSrcB->numRows) || 00094 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00095 { 00096 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00097 status = ARM_MATH_SIZE_MISMATCH; 00098 } 00099 else 00100 #endif 00101 { 00102 /* Matrix transpose */ 00103 do 00104 { 00105 /* Apply loop unrolling and exchange the columns with row elements */ 00106 col = numColsB >> 2; 00107 00108 /* The pointer px is set to starting address of the column being processed */ 00109 px = pSrcBT + i; 00110 00111 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00112 ** a second loop below computes the remaining 1 to 3 samples. */ 00113 while(col > 0u) 00114 { 00115 /* Read two elements from the row */ 00116 in = *__SIMD32(pInB)++; 00117 00118 /* Unpack and store one element in the destination */ 00119 *px = (q15_t) in; 00120 00121 /* Update the pointer px to point to the next row of the transposed matrix */ 00122 px += numRowsB; 00123 00124 /* Unpack and store the second element in the destination */ 00125 *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16); 00126 00127 /* Update the pointer px to point to the next row of the transposed matrix */ 00128 px += numRowsB; 00129 00130 /* Read two elements from the row */ 00131 in = *__SIMD32(pInB)++; 00132 00133 /* Unpack and store one element in the destination */ 00134 *px = (q15_t) in; 00135 00136 /* Update the pointer px to point to the next row of the transposed matrix */ 00137 px += numRowsB; 00138 00139 /* Unpack and store the second element in the destination */ 00140 *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16); 00141 00142 /* Update the pointer px to point to the next row of the transposed matrix */ 00143 px += numRowsB; 00144 00145 /* Decrement the column loop counter */ 00146 col--; 00147 } 00148 00149 /* If the columns of pSrcB is not a multiple of 4, compute any remaining output samples here. 00150 ** No loop unrolling is used. */ 00151 col = numColsB % 0x4u; 00152 00153 while(col > 0u) 00154 { 00155 /* Read and store the input element in the destination */ 00156 *px = *pInB++; 00157 00158 /* Update the pointer px to point to the next row of the transposed matrix */ 00159 px += numRowsB; 00160 00161 /* Decrement the column loop counter */ 00162 col--; 00163 } 00164 00165 i++; 00166 00167 /* Decrement the row loop counter */ 00168 row--; 00169 00170 } while(row > 0u); 00171 00172 /* Reset the variables for the usage in the following multiplication process */ 00173 row = numRowsA; 00174 i = 0u; 00175 px = pDst->pData; 00176 00177 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00178 /* row loop */ 00179 do 00180 { 00181 /* For every row wise process, the column loop counter is to be initiated */ 00182 col = numColsB; 00183 00184 /* For every row wise process, the pIn2 pointer is set 00185 ** to the starting address of the transposed pSrcB data */ 00186 pInB = pSrcBT; 00187 00188 /* column loop */ 00189 do 00190 { 00191 /* Set the variable sum, that acts as accumulator, to zero */ 00192 sum = 0; 00193 00194 /* Apply loop unrolling and compute 2 MACs simultaneously. */ 00195 colCnt = numColsA >> 1; 00196 00197 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 00198 pInA = pSrcA->pData + i; 00199 00200 /* matrix multiplication */ 00201 while(colCnt > 0u) 00202 { 00203 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00204 sum = __SMLALD(*__SIMD32(pInA)++, *__SIMD32(pInB)++, sum); 00205 00206 /* Decrement the loop counter */ 00207 colCnt--; 00208 } 00209 00210 /* process odd column samples */ 00211 if((numColsA & 0x1u) > 0u) 00212 { 00213 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00214 sum += ((q31_t) * pInA * (*pInB++)); 00215 } 00216 00217 /* Saturate and store the result in the destination buffer */ 00218 *px = (q15_t) (__SSAT((sum >> 15), 16)); 00219 px++; 00220 00221 /* Decrement the column loop counter */ 00222 col--; 00223 00224 } while(col > 0u); 00225 00226 i = i + numColsA; 00227 00228 /* Decrement the row loop counter */ 00229 row--; 00230 00231 } while(row > 0u); 00232 00233 /* set status as ARM_MATH_SUCCESS */ 00234 status = ARM_MATH_SUCCESS; 00235 } 00236 00237 /* Return to application */ 00238 return (status); 00239 } 00240 00241 /** 00242 * @} end of MatrixMult group 00243 */
Generated on Tue Jul 12 2022 19:55:43 by
1.7.2
