Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_fully_connected_q7.c
00001 /* 00002 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. 00003 * 00004 * SPDX-License-Identifier: Apache-2.0 00005 * 00006 * Licensed under the Apache License, Version 2.0 (the License); you may 00007 * not use this file except in compliance with the License. 00008 * You may obtain a copy of the License at 00009 * 00010 * www.apache.org/licenses/LICENSE-2.0 00011 * 00012 * Unless required by applicable law or agreed to in writing, software 00013 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00014 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 * See the License for the specific language governing permissions and 00016 * limitations under the License. 00017 */ 00018 00019 /* ---------------------------------------------------------------------- 00020 * Project: CMSIS NN Library 00021 * Title: arm_fully_connected_q7.c 00022 * Description: Q7 basic fully-connected layer function 00023 * 00024 * $Date: 17. January 2018 00025 * $Revision: V.1.0.0 00026 * 00027 * Target Processor: Cortex-M cores 00028 * 00029 * -------------------------------------------------------------------- */ 00030 00031 #include "arm_math.h" 00032 #include "arm_nnfunctions.h" 00033 00034 /** 00035 * @ingroup groupNN 00036 */ 00037 00038 /** 00039 * @addtogroup FC 00040 * @{ 00041 */ 00042 00043 /** 00044 * @brief Q7 basic fully-connected layer function 00045 * @param[in] pV pointer to input vector 00046 * @param[in] pM pointer to matrix weights 00047 * @param[in] dim_vec length of the vector 00048 * @param[in] num_of_rows number of rows in weight matrix 00049 * @param[in] bias_shift amount of left-shift for bias 00050 * @param[in] out_shift amount of right-shift for output 00051 * @param[in] bias pointer to bias 00052 * @param[in,out] pOut pointer to output vector 00053 * @param[in,out] vec_buffer pointer to buffer space for input 00054 * @return The function returns <code>ARM_MATH_SUCCESS</code> 00055 * 00056 * @details 00057 * 00058 * <b>Buffer size:</b> 00059 * 00060 * vec_buffer size: dim_vec 00061 * 00062 * This basic function is designed to work with regular weight 00063 * matrix without interleaving. 00064 * 00065 */ 00066 00067 arm_status 00068 arm_fully_connected_q7(const q7_t * pV, 00069 const q7_t * pM, 00070 const uint16_t dim_vec, 00071 const uint16_t num_of_rows, 00072 const uint16_t bias_shift, 00073 const uint16_t out_shift, const q7_t * bias, q7_t * pOut, q15_t * vec_buffer) 00074 { 00075 00076 #if defined (ARM_MATH_DSP) 00077 /* Run the following code for Cortex-M4 and Cortex-M7 */ 00078 00079 const q7_t *pB = pM; 00080 const q7_t *pB2; 00081 q7_t *pO = pOut; 00082 const q7_t *pBias = bias; 00083 q15_t *pA; 00084 uint16_t rowCnt = num_of_rows >> 1; 00085 00086 /* expand the vector into the buffer */ 00087 arm_q7_to_q15_reordered_no_shift(pV, vec_buffer, dim_vec); 00088 00089 while (rowCnt) 00090 { 00091 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 00092 q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 00093 uint16_t colCnt = dim_vec >> 2; 00094 00095 pA = vec_buffer; 00096 pB2 = pB + dim_vec; 00097 00098 while (colCnt) 00099 { 00100 q31_t inV, inM11, inM12, inM21, inM22; 00101 pB = (q7_t *) read_and_pad_reordered((void *)pB, &inM11, &inM12); 00102 pB2 = (q7_t *) read_and_pad_reordered((void *)pB2, &inM21, &inM22); 00103 00104 inV = *__SIMD32(pA)++; 00105 00106 sum = __SMLAD(inV, inM11, sum); 00107 sum2 = __SMLAD(inV, inM21, sum2); 00108 00109 inV = *__SIMD32(pA)++; 00110 00111 sum = __SMLAD(inV, inM12, sum); 00112 sum2 = __SMLAD(inV, inM22, sum2); 00113 00114 colCnt--; 00115 } 00116 colCnt = dim_vec & 0x3; 00117 while (colCnt) 00118 { 00119 q7_t inV = *pA++; 00120 q15_t inM = *pB++; 00121 q15_t inM2 = *pB2++; 00122 00123 sum += inV * inM; 00124 sum2 += inV * inM2; 00125 colCnt--; 00126 } /* while over colCnt */ 00127 *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8)); 00128 *pO++ = (q7_t) (__SSAT((sum2 >> out_shift), 8)); 00129 00130 /* adjust the pointers and counters */ 00131 pB += dim_vec; 00132 rowCnt--; 00133 } 00134 00135 /* left-over part of the rows */ 00136 rowCnt = num_of_rows & 0x1; 00137 00138 while (rowCnt) 00139 { 00140 uint16_t colCnt = dim_vec >> 2; 00141 q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 00142 00143 pA = vec_buffer; 00144 00145 while (colCnt) 00146 { 00147 q31_t inV1, inV2, inM11, inM12; 00148 00149 pB = (q7_t *) read_and_pad_reordered((void *)pB, &inM11, &inM12); 00150 00151 inV1 = *__SIMD32(pA)++; 00152 sum = __SMLAD(inV1, inM11, sum); 00153 00154 inV2 = *__SIMD32(pA)++; 00155 sum = __SMLAD(inV2, inM12, sum); 00156 00157 colCnt--; 00158 } 00159 00160 /* left-over of the vector */ 00161 colCnt = dim_vec & 0x3; 00162 while (colCnt) 00163 { 00164 q7_t inV = *pA++; 00165 q15_t inM = *pB++; 00166 sum += inV * inM; 00167 colCnt--; 00168 } 00169 00170 *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8)); 00171 00172 rowCnt--; 00173 } 00174 00175 #else 00176 int i, j; 00177 00178 /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ 00179 for (i = 0; i < num_of_rows; i++) 00180 { 00181 int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); 00182 for (j = 0; j < dim_vec; j++) 00183 { 00184 ip_out += pV[j] * pM[i * dim_vec + j]; 00185 } 00186 pOut[i] = (q7_t) __SSAT((ip_out >> out_shift), 8); 00187 } 00188 00189 #endif /* ARM_MATH_DSP */ 00190 00191 /* Return to ARM_MATH_SUCCESS */ 00192 return (ARM_MATH_SUCCESS); 00193 00194 } 00195 00196 /** 00197 * @} end of FC group 00198 */ 00199
Generated on Tue Jul 12 2022 16:47:27 by
