Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_fully_connected_q15.c
00001 /* 00002 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. 00003 * 00004 * SPDX-License-Identifier: Apache-2.0 00005 * 00006 * Licensed under the Apache License, Version 2.0 (the License); you may 00007 * not use this file except in compliance with the License. 00008 * You may obtain a copy of the License at 00009 * 00010 * www.apache.org/licenses/LICENSE-2.0 00011 * 00012 * Unless required by applicable law or agreed to in writing, software 00013 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00014 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 * See the License for the specific language governing permissions and 00016 * limitations under the License. 00017 */ 00018 00019 /* ---------------------------------------------------------------------- 00020 * Project: CMSIS NN Library 00021 * Title: arm_fully_connected_q15.c 00022 * Description: Q15 basic fully-connected layer function 00023 * 00024 * $Date: 17. January 2018 00025 * $Revision: V.1.0.0 00026 * 00027 * Target Processor: Cortex-M cores 00028 * 00029 * -------------------------------------------------------------------- */ 00030 00031 #include "arm_math.h" 00032 #include "arm_nnfunctions.h" 00033 00034 /** 00035 * @ingroup groupNN 00036 */ 00037 00038 /** 00039 * @addtogroup FC 00040 * @{ 00041 */ 00042 00043 /** 00044 * @brief Q15 opt fully-connected layer function 00045 * @param[in] pV pointer to input vector 00046 * @param[in] pM pointer to matrix weights 00047 * @param[in] dim_vec length of the vector 00048 * @param[in] num_of_rows number of rows in weight matrix 00049 * @param[in] bias_shift amount of left-shift for bias 00050 * @param[in] out_shift amount of right-shift for output 00051 * @param[in] bias pointer to bias 00052 * @param[in,out] pOut pointer to output vector 00053 * @param[in,out] vec_buffer pointer to buffer space for input 00054 * @return The function returns <code>ARM_MATH_SUCCESS</code> 00055 * 00056 * 00057 * @details 00058 * 00059 * <b>Buffer size:</b> 00060 * 00061 * vec_buffer size: 0 00062 * 00063 */ 00064 00065 arm_status 00066 arm_fully_connected_q15(const q15_t * pV, 00067 const q15_t * pM, 00068 const uint16_t dim_vec, 00069 const uint16_t num_of_rows, 00070 const uint16_t bias_shift, 00071 const uint16_t out_shift, 00072 const q15_t * bias, 00073 q15_t * pOut, 00074 q15_t * vec_buffer) 00075 { 00076 00077 #if defined (ARM_MATH_DSP) 00078 /* Run the following code for Cortex-M4 and Cortex-M7 */ 00079 00080 const q15_t *pB = pM; 00081 const q15_t *pB2 = pB + dim_vec; 00082 q15_t *pO = pOut; 00083 const q15_t *pA; 00084 const q15_t *pBias = bias; 00085 uint16_t rowCnt = num_of_rows >> 1; 00086 00087 /* this loop loops over different output */ 00088 while (rowCnt) { 00089 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 00090 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 00091 00092 uint16_t colCnt = dim_vec >> 2; 00093 00094 pA = pV; 00095 pB2 = pB + dim_vec; 00096 00097 while (colCnt) 00098 { 00099 q31_t inV1, inM1, inM2; 00100 inV1 = *__SIMD32(pA)++; 00101 inM1 = *__SIMD32(pB)++; 00102 sum = __SMLAD(inV1, inM1, sum); 00103 inM2 = *__SIMD32(pB2)++; 00104 sum2 = __SMLAD(inV1, inM2, sum2); 00105 00106 inV1 = *__SIMD32(pA)++; 00107 inM1 = *__SIMD32(pB)++; 00108 sum = __SMLAD(inV1, inM1, sum); 00109 inM2 = *__SIMD32(pB2)++; 00110 sum2 = __SMLAD(inV1, inM2, sum2); 00111 00112 colCnt--; 00113 } 00114 colCnt = dim_vec & 0x3; 00115 while (colCnt) 00116 { 00117 q15_t inV = *pA++; 00118 q15_t inM = *pB++; 00119 q15_t inM2 = *pB2++; 00120 00121 sum += inV * inM; 00122 sum2 += inV * inM2; 00123 colCnt--; 00124 } /* while over colCnt */ 00125 *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); 00126 *pO++ = (q15_t) (__SSAT((sum2>> out_shift), 16)); 00127 00128 /* adjust the pointers and counters */ 00129 pB = pB + dim_vec; 00130 rowCnt --; 00131 } 00132 00133 rowCnt = num_of_rows & 0x1; 00134 00135 while (rowCnt) { 00136 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 00137 00138 uint16_t colCnt = dim_vec >> 2; 00139 00140 pA = pV; 00141 00142 while (colCnt) { 00143 q31_t inV1, inM1; 00144 inV1 = *__SIMD32(pA)++; 00145 inM1 = *__SIMD32(pB)++; 00146 sum = __SMLAD(inV1, inM1, sum); 00147 00148 inV1 = *__SIMD32(pA)++; 00149 inM1 = *__SIMD32(pB)++; 00150 sum = __SMLAD(inV1, inM1, sum); 00151 00152 colCnt--; 00153 } 00154 00155 /* left-over of the vector */ 00156 colCnt = dim_vec & 0x3; 00157 while(colCnt) { 00158 q15_t inV = *pA++; 00159 q15_t inM = *pB++; 00160 00161 sum += inV * inM; 00162 00163 colCnt--; 00164 } 00165 00166 *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16)); 00167 00168 rowCnt --; 00169 } 00170 00171 #else 00172 int i, j; 00173 /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ 00174 for (i = 0; i < num_of_rows; i++) 00175 { 00176 int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); 00177 for (j = 0; j < dim_vec; j++) 00178 { 00179 ip_out += pV[j] * pM[i * dim_vec + j]; 00180 } 00181 pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16); 00182 } 00183 00184 #endif /* ARM_MATH_DSP */ 00185 00186 /* Return to application */ 00187 return (ARM_MATH_SUCCESS); 00188 00189 } 00190 00191 /** 00192 * @} end of FC group 00193 */ 00194
Generated on Tue Jul 12 2022 16:47:27 by
1.7.2