Aded CMSIS5 DSP and NN folder. Needs some work
Embed:
(wiki syntax)
Show/hide line numbers
arm_mat_add_f32.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_mat_add_f32.c 00004 * Description: Floating-point matrix addition 00005 * 00006 * $Date: 27. January 2017 00007 * $Revision: V.1.5.1 00008 * 00009 * Target Processor: Cortex-M cores 00010 * -------------------------------------------------------------------- */ 00011 /* 00012 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00013 * 00014 * SPDX-License-Identifier: Apache-2.0 00015 * 00016 * Licensed under the Apache License, Version 2.0 (the License); you may 00017 * not use this file except in compliance with the License. 00018 * You may obtain a copy of the License at 00019 * 00020 * www.apache.org/licenses/LICENSE-2.0 00021 * 00022 * Unless required by applicable law or agreed to in writing, software 00023 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00024 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00025 * See the License for the specific language governing permissions and 00026 * limitations under the License. 00027 */ 00028 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupMatrix 00033 */ 00034 00035 /** 00036 * @defgroup MatrixAdd Matrix Addition 00037 * 00038 * Adds two matrices. 00039 * \image html MatrixAddition.gif "Addition of two 3 x 3 matrices" 00040 * 00041 * The functions check to make sure that 00042 * <code>pSrcA</code>, <code>pSrcB</code>, and <code>pDst</code> have the same 00043 * number of rows and columns. 00044 */ 00045 00046 /** 00047 * @addtogroup MatrixAdd 00048 * @{ 00049 */ 00050 00051 00052 /** 00053 * @brief Floating-point matrix addition. 00054 * @param[in] *pSrcA points to the first input matrix structure 00055 * @param[in] *pSrcB points to the second input matrix structure 00056 * @param[out] *pDst points to output matrix structure 00057 * @return The function returns either 00058 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00059 */ 00060 00061 arm_status arm_mat_add_f32( 00062 const arm_matrix_instance_f32 * pSrcA, 00063 const arm_matrix_instance_f32 * pSrcB, 00064 arm_matrix_instance_f32 * pDst) 00065 { 00066 float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00067 float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00068 float32_t *pOut = pDst->pData; /* output data matrix pointer */ 00069 00070 #if defined (ARM_MATH_DSP) 00071 00072 float32_t inA1, inA2, inB1, inB2, out1, out2; /* temporary variables */ 00073 00074 #endif // #if defined (ARM_MATH_DSP) 00075 00076 uint32_t numSamples; /* total number of elements in the matrix */ 00077 uint32_t blkCnt; /* loop counters */ 00078 arm_status status; /* status of matrix addition */ 00079 00080 #ifdef ARM_MATH_MATRIX_CHECK 00081 /* Check for matrix mismatch condition */ 00082 if ((pSrcA->numRows != pSrcB->numRows) || 00083 (pSrcA->numCols != pSrcB->numCols) || 00084 (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) 00085 { 00086 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00087 status = ARM_MATH_SIZE_MISMATCH; 00088 } 00089 else 00090 #endif 00091 { 00092 00093 /* Total number of samples in the input matrix */ 00094 numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols; 00095 00096 #if defined (ARM_MATH_DSP) 00097 00098 /* Loop unrolling */ 00099 blkCnt = numSamples >> 2U; 00100 00101 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00102 ** a second loop below computes the remaining 1 to 3 samples. */ 00103 while (blkCnt > 0U) 00104 { 00105 /* C(m,n) = A(m,n) + B(m,n) */ 00106 /* Add and then store the results in the destination buffer. */ 00107 /* Read values from source A */ 00108 inA1 = pIn1[0]; 00109 00110 /* Read values from source B */ 00111 inB1 = pIn2[0]; 00112 00113 /* Read values from source A */ 00114 inA2 = pIn1[1]; 00115 00116 /* out = sourceA + sourceB */ 00117 out1 = inA1 + inB1; 00118 00119 /* Read values from source B */ 00120 inB2 = pIn2[1]; 00121 00122 /* Read values from source A */ 00123 inA1 = pIn1[2]; 00124 00125 /* out = sourceA + sourceB */ 00126 out2 = inA2 + inB2; 00127 00128 /* Read values from source B */ 00129 inB1 = pIn2[2]; 00130 00131 /* Store result in destination */ 00132 pOut[0] = out1; 00133 pOut[1] = out2; 00134 00135 /* Read values from source A */ 00136 inA2 = pIn1[3]; 00137 00138 /* Read values from source B */ 00139 inB2 = pIn2[3]; 00140 00141 /* out = sourceA + sourceB */ 00142 out1 = inA1 + inB1; 00143 00144 /* out = sourceA + sourceB */ 00145 out2 = inA2 + inB2; 00146 00147 /* Store result in destination */ 00148 pOut[2] = out1; 00149 00150 /* Store result in destination */ 00151 pOut[3] = out2; 00152 00153 00154 /* update pointers to process next sampels */ 00155 pIn1 += 4U; 00156 pIn2 += 4U; 00157 pOut += 4U; 00158 /* Decrement the loop counter */ 00159 blkCnt--; 00160 } 00161 00162 /* If the numSamples is not a multiple of 4, compute any remaining output samples here. 00163 ** No loop unrolling is used. */ 00164 blkCnt = numSamples % 0x4U; 00165 00166 #else 00167 00168 /* Run the below code for Cortex-M0 */ 00169 00170 /* Initialize blkCnt with number of samples */ 00171 blkCnt = numSamples; 00172 00173 #endif /* #if defined (ARM_MATH_DSP) */ 00174 00175 while (blkCnt > 0U) 00176 { 00177 /* C(m,n) = A(m,n) + B(m,n) */ 00178 /* Add and then store the results in the destination buffer. */ 00179 *pOut++ = (*pIn1++) + (*pIn2++); 00180 00181 /* Decrement the loop counter */ 00182 blkCnt--; 00183 } 00184 00185 /* set status as ARM_MATH_SUCCESS */ 00186 status = ARM_MATH_SUCCESS; 00187 00188 } 00189 00190 /* Return to application */ 00191 return (status); 00192 } 00193 00194 /** 00195 * @} end of MatrixAdd group 00196 */ 00197
Generated on Tue Jul 12 2022 16:47:27 by 1.7.2