Aded CMSIS5 DSP and NN folder. Needs some work

Committer:
robert_lp
Date:
Thu Apr 12 01:31:58 2018 +0000
Revision:
0:eedb7d567a5d
CMSIS5 Library

Who changed what in which revision?

UserRevisionLine numberNew contents of line
robert_lp 0:eedb7d567a5d 1 /*
robert_lp 0:eedb7d567a5d 2 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
robert_lp 0:eedb7d567a5d 3 *
robert_lp 0:eedb7d567a5d 4 * SPDX-License-Identifier: Apache-2.0
robert_lp 0:eedb7d567a5d 5 *
robert_lp 0:eedb7d567a5d 6 * Licensed under the Apache License, Version 2.0 (the License); you may
robert_lp 0:eedb7d567a5d 7 * not use this file except in compliance with the License.
robert_lp 0:eedb7d567a5d 8 * You may obtain a copy of the License at
robert_lp 0:eedb7d567a5d 9 *
robert_lp 0:eedb7d567a5d 10 * www.apache.org/licenses/LICENSE-2.0
robert_lp 0:eedb7d567a5d 11 *
robert_lp 0:eedb7d567a5d 12 * Unless required by applicable law or agreed to in writing, software
robert_lp 0:eedb7d567a5d 13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
robert_lp 0:eedb7d567a5d 14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
robert_lp 0:eedb7d567a5d 15 * See the License for the specific language governing permissions and
robert_lp 0:eedb7d567a5d 16 * limitations under the License.
robert_lp 0:eedb7d567a5d 17 */
robert_lp 0:eedb7d567a5d 18
robert_lp 0:eedb7d567a5d 19 /* ----------------------------------------------------------------------
robert_lp 0:eedb7d567a5d 20 * Project: CMSIS NN Library
robert_lp 0:eedb7d567a5d 21 * Title: arm_nnfunctions.h
robert_lp 0:eedb7d567a5d 22 * Description: Public header file for CMSIS NN Library
robert_lp 0:eedb7d567a5d 23 *
robert_lp 0:eedb7d567a5d 24 * $Date: 17. January 2018
robert_lp 0:eedb7d567a5d 25 * $Revision: V.1.0.0
robert_lp 0:eedb7d567a5d 26 *
robert_lp 0:eedb7d567a5d 27 * Target Processor: Cortex-M cores
robert_lp 0:eedb7d567a5d 28 * -------------------------------------------------------------------- */
robert_lp 0:eedb7d567a5d 29
robert_lp 0:eedb7d567a5d 30 /**
robert_lp 0:eedb7d567a5d 31 \mainpage CMSIS NN Software Library
robert_lp 0:eedb7d567a5d 32 *
robert_lp 0:eedb7d567a5d 33 * Introduction
robert_lp 0:eedb7d567a5d 34 * ------------
robert_lp 0:eedb7d567a5d 35 *
robert_lp 0:eedb7d567a5d 36 * This user manual describes the CMSIS NN software library,
robert_lp 0:eedb7d567a5d 37 * a collection of efficient neural network kernels developed to maximize the
robert_lp 0:eedb7d567a5d 38 * performance and minimize the memory footprint of neural networks on Cortex-M processor cores.
robert_lp 0:eedb7d567a5d 39 *
robert_lp 0:eedb7d567a5d 40 * The library is divided into a number of functions each covering a specific category:
robert_lp 0:eedb7d567a5d 41 * - Neural Network Convolution Functions
robert_lp 0:eedb7d567a5d 42 * - Neural Network Activation Functions
robert_lp 0:eedb7d567a5d 43 * - Fully-connected Layer Functions
robert_lp 0:eedb7d567a5d 44 * - Neural Network Pooling Functions
robert_lp 0:eedb7d567a5d 45 * - Softmax Functions
robert_lp 0:eedb7d567a5d 46 * - Neural Network Support Functions
robert_lp 0:eedb7d567a5d 47 *
robert_lp 0:eedb7d567a5d 48 * The library has separate functions for operating on different weight and activation data
robert_lp 0:eedb7d567a5d 49 * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the
robert_lp 0:eedb7d567a5d 50 * kernels are included in the function description. The implementation details are also
robert_lp 0:eedb7d567a5d 51 * described in this paper [1].
robert_lp 0:eedb7d567a5d 52 *
robert_lp 0:eedb7d567a5d 53 * Block Diagram
robert_lp 0:eedb7d567a5d 54 * --------
robert_lp 0:eedb7d567a5d 55 * \image html CMSIS-NN-OVERVIEW.PNG
robert_lp 0:eedb7d567a5d 56 *
robert_lp 0:eedb7d567a5d 57 * Examples
robert_lp 0:eedb7d567a5d 58 * --------
robert_lp 0:eedb7d567a5d 59 *
robert_lp 0:eedb7d567a5d 60 * The library ships with a number of examples which demonstrate how to use the library functions.
robert_lp 0:eedb7d567a5d 61 *
robert_lp 0:eedb7d567a5d 62 * Pre-processor Macros
robert_lp 0:eedb7d567a5d 63 * ------------
robert_lp 0:eedb7d567a5d 64 *
robert_lp 0:eedb7d567a5d 65 * Each library project have differant pre-processor macros.
robert_lp 0:eedb7d567a5d 66 *
robert_lp 0:eedb7d567a5d 67 * - ARM_MATH_DSP:
robert_lp 0:eedb7d567a5d 68 *
robert_lp 0:eedb7d567a5d 69 * Define macro ARM_MATH_DSP, If the silicon supports DSP instructions.
robert_lp 0:eedb7d567a5d 70 *
robert_lp 0:eedb7d567a5d 71 * - ARM_MATH_BIG_ENDIAN:
robert_lp 0:eedb7d567a5d 72 *
robert_lp 0:eedb7d567a5d 73 * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
robert_lp 0:eedb7d567a5d 74 *
robert_lp 0:eedb7d567a5d 75 * - ARM_NN_TRUNCATE:
robert_lp 0:eedb7d567a5d 76 *
robert_lp 0:eedb7d567a5d 77 * Define macro ARM_NN_TRUNCATE to use floor instead of round-to-the-nearest-int for the computation.
robert_lp 0:eedb7d567a5d 78 *
robert_lp 0:eedb7d567a5d 79 * Copyright Notice
robert_lp 0:eedb7d567a5d 80 * ------------
robert_lp 0:eedb7d567a5d 81 *
robert_lp 0:eedb7d567a5d 82 * Copyright (C) 2010-2018 Arm Limited. All rights reserved.
robert_lp 0:eedb7d567a5d 83 *
robert_lp 0:eedb7d567a5d 84 * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601
robert_lp 0:eedb7d567a5d 85 */
robert_lp 0:eedb7d567a5d 86
robert_lp 0:eedb7d567a5d 87 /**
robert_lp 0:eedb7d567a5d 88 * @defgroup groupNN Neural Network Functions
robert_lp 0:eedb7d567a5d 89 * These functions perform basic operations for neural network layers.
robert_lp 0:eedb7d567a5d 90 */
robert_lp 0:eedb7d567a5d 91
robert_lp 0:eedb7d567a5d 92 #ifndef _ARM_NNFUNCTIONS_H
robert_lp 0:eedb7d567a5d 93 #define _ARM_NNFUNCTIONS_H
robert_lp 0:eedb7d567a5d 94
robert_lp 0:eedb7d567a5d 95 #include "arm_nnsupportfunctions.h"
robert_lp 0:eedb7d567a5d 96 #include "arm_nn_tables.h"
robert_lp 0:eedb7d567a5d 97
robert_lp 0:eedb7d567a5d 98 #define USE_INTRINSIC
robert_lp 0:eedb7d567a5d 99
robert_lp 0:eedb7d567a5d 100 //#define ARM_NN_TRUNCATE /* This config the rounding model to floor or round to the nearest int */
robert_lp 0:eedb7d567a5d 101
robert_lp 0:eedb7d567a5d 102 #ifdef __cplusplus
robert_lp 0:eedb7d567a5d 103 extern "C"
robert_lp 0:eedb7d567a5d 104 {
robert_lp 0:eedb7d567a5d 105 #endif
robert_lp 0:eedb7d567a5d 106
robert_lp 0:eedb7d567a5d 107 /**
robert_lp 0:eedb7d567a5d 108 * @defgroup NNConv Neural Network Convolution Functions
robert_lp 0:eedb7d567a5d 109 *
robert_lp 0:eedb7d567a5d 110 * Perform convolution layer
robert_lp 0:eedb7d567a5d 111 *
robert_lp 0:eedb7d567a5d 112 * The convolution is implemented in 2 steps: im2col and GEMM
robert_lp 0:eedb7d567a5d 113 *
robert_lp 0:eedb7d567a5d 114 * im2col is a process of converting each patch of image data into
robert_lp 0:eedb7d567a5d 115 * a column. After im2col, the convolution is computed as matrix-matrix
robert_lp 0:eedb7d567a5d 116 * multiplication.
robert_lp 0:eedb7d567a5d 117 *
robert_lp 0:eedb7d567a5d 118 * To reduce the memory footprint, the im2col is performed partially.
robert_lp 0:eedb7d567a5d 119 * Each iteration, only a few column (i.e., patches) are generated and
robert_lp 0:eedb7d567a5d 120 * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.
robert_lp 0:eedb7d567a5d 121 *
robert_lp 0:eedb7d567a5d 122 */
robert_lp 0:eedb7d567a5d 123
robert_lp 0:eedb7d567a5d 124 /**
robert_lp 0:eedb7d567a5d 125 * @brief Basic Q7 convolution function
robert_lp 0:eedb7d567a5d 126 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 127 * @param[in] dim_im_in input tensor dimention
robert_lp 0:eedb7d567a5d 128 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 129 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 130 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 131 * @param[in] dim_kernel filter kernel size
robert_lp 0:eedb7d567a5d 132 * @param[in] padding padding sizes
robert_lp 0:eedb7d567a5d 133 * @param[in] stride convolution stride
robert_lp 0:eedb7d567a5d 134 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 135 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 136 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 137 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 138 * @param[in] dim_im_out output tensor dimension
robert_lp 0:eedb7d567a5d 139 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 140 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 141 * @return The function returns <code>ARM_MATH_SUCCESS</code>
robert_lp 0:eedb7d567a5d 142 *
robert_lp 0:eedb7d567a5d 143 */
robert_lp 0:eedb7d567a5d 144
robert_lp 0:eedb7d567a5d 145 arm_status arm_convolve_HWC_q7_basic(const q7_t * Im_in,
robert_lp 0:eedb7d567a5d 146 const uint16_t dim_im_in,
robert_lp 0:eedb7d567a5d 147 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 148 const q7_t * wt,
robert_lp 0:eedb7d567a5d 149 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 150 const uint16_t dim_kernel,
robert_lp 0:eedb7d567a5d 151 const uint16_t padding,
robert_lp 0:eedb7d567a5d 152 const uint16_t stride,
robert_lp 0:eedb7d567a5d 153 const q7_t * bias,
robert_lp 0:eedb7d567a5d 154 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 155 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 156 q7_t * Im_out,
robert_lp 0:eedb7d567a5d 157 const uint16_t dim_im_out,
robert_lp 0:eedb7d567a5d 158 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 159 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 160
robert_lp 0:eedb7d567a5d 161 /**
robert_lp 0:eedb7d567a5d 162 * @brief Basic Q15 convolution function
robert_lp 0:eedb7d567a5d 163 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 164 * @param[in] dim_im_in input tensor dimention
robert_lp 0:eedb7d567a5d 165 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 166 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 167 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 168 * @param[in] dim_kernel filter kernel size
robert_lp 0:eedb7d567a5d 169 * @param[in] padding padding sizes
robert_lp 0:eedb7d567a5d 170 * @param[in] stride convolution stride
robert_lp 0:eedb7d567a5d 171 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 172 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 173 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 174 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 175 * @param[in] dim_im_out output tensor dimension
robert_lp 0:eedb7d567a5d 176 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 177 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 178 * @return The function returns <code>ARM_MATH_SUCCESS</code>
robert_lp 0:eedb7d567a5d 179 *
robert_lp 0:eedb7d567a5d 180 */
robert_lp 0:eedb7d567a5d 181
robert_lp 0:eedb7d567a5d 182 arm_status arm_convolve_HWC_q15_basic(const q15_t * Im_in,
robert_lp 0:eedb7d567a5d 183 const uint16_t dim_im_in,
robert_lp 0:eedb7d567a5d 184 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 185 const q15_t * wt,
robert_lp 0:eedb7d567a5d 186 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 187 const uint16_t dim_kernel,
robert_lp 0:eedb7d567a5d 188 const uint16_t padding,
robert_lp 0:eedb7d567a5d 189 const uint16_t stride,
robert_lp 0:eedb7d567a5d 190 const q15_t * bias,
robert_lp 0:eedb7d567a5d 191 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 192 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 193 q15_t * Im_out,
robert_lp 0:eedb7d567a5d 194 const uint16_t dim_im_out,
robert_lp 0:eedb7d567a5d 195 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 196 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 197
robert_lp 0:eedb7d567a5d 198 /**
robert_lp 0:eedb7d567a5d 199 * @brief Fast Q7 convolution function
robert_lp 0:eedb7d567a5d 200 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 201 * @param[in] dim_im_in input tensor dimention
robert_lp 0:eedb7d567a5d 202 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 203 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 204 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 205 * @param[in] dim_kernel filter kernel size
robert_lp 0:eedb7d567a5d 206 * @param[in] padding padding sizes
robert_lp 0:eedb7d567a5d 207 * @param[in] stride convolution stride
robert_lp 0:eedb7d567a5d 208 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 209 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 210 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 211 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 212 * @param[in] dim_im_out output tensor dimension
robert_lp 0:eedb7d567a5d 213 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 214 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 215 * @return The function returns either
robert_lp 0:eedb7d567a5d 216 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
robert_lp 0:eedb7d567a5d 217 *
robert_lp 0:eedb7d567a5d 218 * This function is the version with full list of optimization tricks, but with
robert_lp 0:eedb7d567a5d 219 * some contraints:
robert_lp 0:eedb7d567a5d 220 * ch_im_in is multiple of 4
robert_lp 0:eedb7d567a5d 221 * ch_im_out is multiple of 2
robert_lp 0:eedb7d567a5d 222 */
robert_lp 0:eedb7d567a5d 223
robert_lp 0:eedb7d567a5d 224 arm_status arm_convolve_HWC_q7_fast(const q7_t * Im_in,
robert_lp 0:eedb7d567a5d 225 const uint16_t dim_im_in,
robert_lp 0:eedb7d567a5d 226 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 227 const q7_t * wt,
robert_lp 0:eedb7d567a5d 228 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 229 const uint16_t dim_kernel,
robert_lp 0:eedb7d567a5d 230 const uint16_t padding,
robert_lp 0:eedb7d567a5d 231 const uint16_t stride,
robert_lp 0:eedb7d567a5d 232 const q7_t * bias,
robert_lp 0:eedb7d567a5d 233 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 234 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 235 q7_t * Im_out,
robert_lp 0:eedb7d567a5d 236 const uint16_t dim_im_out,
robert_lp 0:eedb7d567a5d 237 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 238 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 239
robert_lp 0:eedb7d567a5d 240 /**
robert_lp 0:eedb7d567a5d 241 * @brief Fast Q7 convolution function (non-sqaure shape)
robert_lp 0:eedb7d567a5d 242 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 243 * @param[in] dim_im_in_x input tensor dimention x
robert_lp 0:eedb7d567a5d 244 * @param[in] dim_im_in_y input tensor dimention y
robert_lp 0:eedb7d567a5d 245 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 246 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 247 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 248 * @param[in] dim_kernel_x filter kernel size x
robert_lp 0:eedb7d567a5d 249 * @param[in] dim_kernel_y filter kernel size y
robert_lp 0:eedb7d567a5d 250 * @param[in] padding_x padding size x
robert_lp 0:eedb7d567a5d 251 * @param[in] padding_y padding size y
robert_lp 0:eedb7d567a5d 252 * @param[in] stride_x convolution stride x
robert_lp 0:eedb7d567a5d 253 * @param[in] stride_y convolution stride y
robert_lp 0:eedb7d567a5d 254 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 255 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 256 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 257 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 258 * @param[in] dim_im_out_x output tensor dimension x
robert_lp 0:eedb7d567a5d 259 * @param[in] dim_im_out_y output tensor dimension y
robert_lp 0:eedb7d567a5d 260 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 261 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 262 * @return The function returns either
robert_lp 0:eedb7d567a5d 263 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
robert_lp 0:eedb7d567a5d 264 *
robert_lp 0:eedb7d567a5d 265 * This function is the version with full list of optimization tricks, but with
robert_lp 0:eedb7d567a5d 266 * some contraints:
robert_lp 0:eedb7d567a5d 267 * ch_im_in is multiple of 4
robert_lp 0:eedb7d567a5d 268 * ch_im_out is multiple of 2
robert_lp 0:eedb7d567a5d 269 */
robert_lp 0:eedb7d567a5d 270
robert_lp 0:eedb7d567a5d 271 arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t * Im_in,
robert_lp 0:eedb7d567a5d 272 const uint16_t dim_im_in_x,
robert_lp 0:eedb7d567a5d 273 const uint16_t dim_im_in_y,
robert_lp 0:eedb7d567a5d 274 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 275 const q7_t * wt,
robert_lp 0:eedb7d567a5d 276 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 277 const uint16_t dim_kernel_x,
robert_lp 0:eedb7d567a5d 278 const uint16_t dim_kernel_y,
robert_lp 0:eedb7d567a5d 279 const uint16_t padding_x,
robert_lp 0:eedb7d567a5d 280 const uint16_t padding_y,
robert_lp 0:eedb7d567a5d 281 const uint16_t stride_x,
robert_lp 0:eedb7d567a5d 282 const uint16_t stride_y,
robert_lp 0:eedb7d567a5d 283 const q7_t * bias,
robert_lp 0:eedb7d567a5d 284 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 285 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 286 q7_t * Im_out,
robert_lp 0:eedb7d567a5d 287 const uint16_t dim_im_out_x,
robert_lp 0:eedb7d567a5d 288 const uint16_t dim_im_out_y,
robert_lp 0:eedb7d567a5d 289 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 290 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 291
robert_lp 0:eedb7d567a5d 292 /**
robert_lp 0:eedb7d567a5d 293 * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
robert_lp 0:eedb7d567a5d 294 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 295 * @param[in] dim_im_in_x input tensor dimention x
robert_lp 0:eedb7d567a5d 296 * @param[in] dim_im_in_y input tensor dimention y
robert_lp 0:eedb7d567a5d 297 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 298 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 299 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 300 * @param[in] dim_kernel_x filter kernel size x
robert_lp 0:eedb7d567a5d 301 * @param[in] dim_kernel_y filter kernel size y
robert_lp 0:eedb7d567a5d 302 * @param[in] padding_x padding size x
robert_lp 0:eedb7d567a5d 303 * @param[in] padding_y padding size y
robert_lp 0:eedb7d567a5d 304 * @param[in] stride_x convolution stride x
robert_lp 0:eedb7d567a5d 305 * @param[in] stride_y convolution stride y
robert_lp 0:eedb7d567a5d 306 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 307 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 308 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 309 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 310 * @param[in] dim_im_out_x output tensor dimension x
robert_lp 0:eedb7d567a5d 311 * @param[in] dim_im_out_y output tensor dimension y
robert_lp 0:eedb7d567a5d 312 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 313 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 314 * @return The function returns either
robert_lp 0:eedb7d567a5d 315 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
robert_lp 0:eedb7d567a5d 316 *
robert_lp 0:eedb7d567a5d 317 * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1
robert_lp 0:eedb7d567a5d 318 * and dim_kernel_y=1). It can be used for
robert_lp 0:eedb7d567a5d 319 * second half of MobileNets after depthwise separable convolution.
robert_lp 0:eedb7d567a5d 320 *
robert_lp 0:eedb7d567a5d 321 * This function is the version with full list of optimization tricks, but with
robert_lp 0:eedb7d567a5d 322 * some contraints:
robert_lp 0:eedb7d567a5d 323 * ch_im_in is multiple of 4
robert_lp 0:eedb7d567a5d 324 * ch_im_out is multiple of 2
robert_lp 0:eedb7d567a5d 325 */
robert_lp 0:eedb7d567a5d 326 arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t * Im_in,
robert_lp 0:eedb7d567a5d 327 const uint16_t dim_im_in_x,
robert_lp 0:eedb7d567a5d 328 const uint16_t dim_im_in_y,
robert_lp 0:eedb7d567a5d 329 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 330 const q7_t * wt,
robert_lp 0:eedb7d567a5d 331 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 332 const uint16_t dim_kernel_x,
robert_lp 0:eedb7d567a5d 333 const uint16_t dim_kernel_y,
robert_lp 0:eedb7d567a5d 334 const uint16_t padding_x,
robert_lp 0:eedb7d567a5d 335 const uint16_t padding_y,
robert_lp 0:eedb7d567a5d 336 const uint16_t stride_x,
robert_lp 0:eedb7d567a5d 337 const uint16_t stride_y,
robert_lp 0:eedb7d567a5d 338 const q7_t * bias,
robert_lp 0:eedb7d567a5d 339 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 340 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 341 q7_t * Im_out,
robert_lp 0:eedb7d567a5d 342 const uint16_t dim_im_out_x,
robert_lp 0:eedb7d567a5d 343 const uint16_t dim_im_out_y,
robert_lp 0:eedb7d567a5d 344 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 345 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 346
robert_lp 0:eedb7d567a5d 347 /**
robert_lp 0:eedb7d567a5d 348 * @brief Q7 version of convolution for RGB image
robert_lp 0:eedb7d567a5d 349 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 350 * @param[in] dim_im_in input tensor dimention
robert_lp 0:eedb7d567a5d 351 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 352 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 353 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 354 * @param[in] dim_kernel filter kernel size
robert_lp 0:eedb7d567a5d 355 * @param[in] padding padding sizes
robert_lp 0:eedb7d567a5d 356 * @param[in] stride convolution stride
robert_lp 0:eedb7d567a5d 357 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 358 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 359 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 360 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 361 * @param[in] dim_im_out output tensor dimension
robert_lp 0:eedb7d567a5d 362 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 363 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 364 * @return The function returns either
robert_lp 0:eedb7d567a5d 365 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
robert_lp 0:eedb7d567a5d 366 *
robert_lp 0:eedb7d567a5d 367 * This kernel is written exclusively for convolution with ch_im_in
robert_lp 0:eedb7d567a5d 368 * equals 3. This applies on the first layer of CNNs which has input
robert_lp 0:eedb7d567a5d 369 * image with RGB format.
robert_lp 0:eedb7d567a5d 370 */
robert_lp 0:eedb7d567a5d 371
robert_lp 0:eedb7d567a5d 372 arm_status arm_convolve_HWC_q7_RGB(const q7_t * Im_in,
robert_lp 0:eedb7d567a5d 373 const uint16_t dim_im_in,
robert_lp 0:eedb7d567a5d 374 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 375 const q7_t * wt,
robert_lp 0:eedb7d567a5d 376 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 377 const uint16_t dim_kernel,
robert_lp 0:eedb7d567a5d 378 const uint16_t padding,
robert_lp 0:eedb7d567a5d 379 const uint16_t stride,
robert_lp 0:eedb7d567a5d 380 const q7_t * bias,
robert_lp 0:eedb7d567a5d 381 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 382 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 383 q7_t * Im_out,
robert_lp 0:eedb7d567a5d 384 const uint16_t dim_im_out,
robert_lp 0:eedb7d567a5d 385 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 386 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 387
robert_lp 0:eedb7d567a5d 388 /**
robert_lp 0:eedb7d567a5d 389 * @brief Fast Q15 convolution function
robert_lp 0:eedb7d567a5d 390 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 391 * @param[in] dim_im_in input tensor dimention
robert_lp 0:eedb7d567a5d 392 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 393 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 394 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 395 * @param[in] dim_kernel filter kernel size
robert_lp 0:eedb7d567a5d 396 * @param[in] padding padding sizes
robert_lp 0:eedb7d567a5d 397 * @param[in] stride convolution stride
robert_lp 0:eedb7d567a5d 398 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 399 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 400 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 401 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 402 * @param[in] dim_im_out output tensor dimension
robert_lp 0:eedb7d567a5d 403 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 404 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 405 * @return The function returns either
robert_lp 0:eedb7d567a5d 406 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
robert_lp 0:eedb7d567a5d 407 *
robert_lp 0:eedb7d567a5d 408 * This function is the version with full list of optimization tricks, but with
robert_lp 0:eedb7d567a5d 409 * some contraints:
robert_lp 0:eedb7d567a5d 410 * ch_im_in is multiple of 2
robert_lp 0:eedb7d567a5d 411 * ch_im_out is multiple of 2
robert_lp 0:eedb7d567a5d 412 */
robert_lp 0:eedb7d567a5d 413
robert_lp 0:eedb7d567a5d 414 arm_status arm_convolve_HWC_q15_fast(const q15_t * Im_in,
robert_lp 0:eedb7d567a5d 415 const uint16_t dim_im_in,
robert_lp 0:eedb7d567a5d 416 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 417 const q15_t * wt,
robert_lp 0:eedb7d567a5d 418 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 419 const uint16_t dim_kernel,
robert_lp 0:eedb7d567a5d 420 const uint16_t padding,
robert_lp 0:eedb7d567a5d 421 const uint16_t stride,
robert_lp 0:eedb7d567a5d 422 const q15_t * bias,
robert_lp 0:eedb7d567a5d 423 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 424 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 425 q15_t * Im_out,
robert_lp 0:eedb7d567a5d 426 const uint16_t dim_im_out,
robert_lp 0:eedb7d567a5d 427 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 428 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 429
robert_lp 0:eedb7d567a5d 430 /**
robert_lp 0:eedb7d567a5d 431 * @brief Q7 depthwise separable convolution function
robert_lp 0:eedb7d567a5d 432 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 433 * @param[in] dim_im_in input tensor dimention
robert_lp 0:eedb7d567a5d 434 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 435 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 436 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 437 * @param[in] dim_kernel filter kernel size
robert_lp 0:eedb7d567a5d 438 * @param[in] padding padding sizes
robert_lp 0:eedb7d567a5d 439 * @param[in] stride convolution stride
robert_lp 0:eedb7d567a5d 440 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 441 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 442 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 443 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 444 * @param[in] dim_im_out output tensor dimension
robert_lp 0:eedb7d567a5d 445 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 446 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 447 * @return The function returns either
robert_lp 0:eedb7d567a5d 448 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
robert_lp 0:eedb7d567a5d 449 *
robert_lp 0:eedb7d567a5d 450 * This function is the version with full list of optimization tricks, but with
robert_lp 0:eedb7d567a5d 451 * some contraints:
robert_lp 0:eedb7d567a5d 452 * ch_im_in is multiple of 2
robert_lp 0:eedb7d567a5d 453 * ch_im_out is multiple of 2
robert_lp 0:eedb7d567a5d 454 */
robert_lp 0:eedb7d567a5d 455
robert_lp 0:eedb7d567a5d 456 arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t * Im_in,
robert_lp 0:eedb7d567a5d 457 const uint16_t dim_im_in,
robert_lp 0:eedb7d567a5d 458 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 459 const q7_t * wt,
robert_lp 0:eedb7d567a5d 460 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 461 const uint16_t dim_kernel,
robert_lp 0:eedb7d567a5d 462 const uint16_t padding,
robert_lp 0:eedb7d567a5d 463 const uint16_t stride,
robert_lp 0:eedb7d567a5d 464 const q7_t * bias,
robert_lp 0:eedb7d567a5d 465 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 466 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 467 q7_t * Im_out,
robert_lp 0:eedb7d567a5d 468 const uint16_t dim_im_out,
robert_lp 0:eedb7d567a5d 469 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 470 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 471
robert_lp 0:eedb7d567a5d 472 /**
robert_lp 0:eedb7d567a5d 473 * @brief Q7 depthwise separable convolution function (non-square shape)
robert_lp 0:eedb7d567a5d 474 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 475 * @param[in] dim_im_in_x input tensor dimention x
robert_lp 0:eedb7d567a5d 476 * @param[in] dim_im_in_y input tensor dimention y
robert_lp 0:eedb7d567a5d 477 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 478 * @param[in] wt pointer to kernel weights
robert_lp 0:eedb7d567a5d 479 * @param[in] ch_im_out number of filters, i.e., output tensor channels
robert_lp 0:eedb7d567a5d 480 * @param[in] dim_kernel_x filter kernel size x
robert_lp 0:eedb7d567a5d 481 * @param[in] dim_kernel_y filter kernel size y
robert_lp 0:eedb7d567a5d 482 * @param[in] padding_x padding sizes x
robert_lp 0:eedb7d567a5d 483 * @param[in] padding_y padding sizes y
robert_lp 0:eedb7d567a5d 484 * @param[in] stride_x convolution stride x
robert_lp 0:eedb7d567a5d 485 * @param[in] stride_y convolution stride y
robert_lp 0:eedb7d567a5d 486 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 487 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 488 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 489 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 490 * @param[in] dim_im_out_x output tensor dimension x
robert_lp 0:eedb7d567a5d 491 * @param[in] dim_im_out_y output tensor dimension y
robert_lp 0:eedb7d567a5d 492 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 493 * @param[in,out] bufferB pointer to buffer space for output
robert_lp 0:eedb7d567a5d 494 * @return The function returns either
robert_lp 0:eedb7d567a5d 495 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
robert_lp 0:eedb7d567a5d 496 *
robert_lp 0:eedb7d567a5d 497 * This function is the version with full list of optimization tricks, but with
robert_lp 0:eedb7d567a5d 498 * some contraints:
robert_lp 0:eedb7d567a5d 499 * ch_im_in is multiple of 2
robert_lp 0:eedb7d567a5d 500 * ch_im_out is multiple of 2
robert_lp 0:eedb7d567a5d 501 */
robert_lp 0:eedb7d567a5d 502 arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in,
robert_lp 0:eedb7d567a5d 503 const uint16_t dim_im_in_x,
robert_lp 0:eedb7d567a5d 504 const uint16_t dim_im_in_y,
robert_lp 0:eedb7d567a5d 505 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 506 const q7_t * wt,
robert_lp 0:eedb7d567a5d 507 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 508 const uint16_t dim_kernel_x,
robert_lp 0:eedb7d567a5d 509 const uint16_t dim_kernel_y,
robert_lp 0:eedb7d567a5d 510 const uint16_t padding_x,
robert_lp 0:eedb7d567a5d 511 const uint16_t padding_y,
robert_lp 0:eedb7d567a5d 512 const uint16_t stride_x,
robert_lp 0:eedb7d567a5d 513 const uint16_t stride_y,
robert_lp 0:eedb7d567a5d 514 const q7_t * bias,
robert_lp 0:eedb7d567a5d 515 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 516 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 517 q7_t * Im_out,
robert_lp 0:eedb7d567a5d 518 const uint16_t dim_im_out_x,
robert_lp 0:eedb7d567a5d 519 const uint16_t dim_im_out_y,
robert_lp 0:eedb7d567a5d 520 q15_t * bufferA,
robert_lp 0:eedb7d567a5d 521 q7_t * bufferB);
robert_lp 0:eedb7d567a5d 522
robert_lp 0:eedb7d567a5d 523
robert_lp 0:eedb7d567a5d 524 /**
robert_lp 0:eedb7d567a5d 525 * @defgroup FC Fully-connected Layer Functions
robert_lp 0:eedb7d567a5d 526 *
robert_lp 0:eedb7d567a5d 527 * Perform fully-connected layer
robert_lp 0:eedb7d567a5d 528 *
robert_lp 0:eedb7d567a5d 529 * Fully-connected layer is basically a matrix-vector multiplication
robert_lp 0:eedb7d567a5d 530 * with bias. The matrix is the weights and the input/output vectors
robert_lp 0:eedb7d567a5d 531 * are the activation values. Supported {weight, activation} precisions
robert_lp 0:eedb7d567a5d 532 * include {8-bit, 8-bit}, {16-bit, 16-bit}, and {8-bit, 16-bit}.
robert_lp 0:eedb7d567a5d 533 *
robert_lp 0:eedb7d567a5d 534 * Here we have two types of kernel functions. The basic function
robert_lp 0:eedb7d567a5d 535 * implements the function using regular GEMV approach. The opt functions
robert_lp 0:eedb7d567a5d 536 * operates with weights in interleaved formats.
robert_lp 0:eedb7d567a5d 537 *
robert_lp 0:eedb7d567a5d 538 */
robert_lp 0:eedb7d567a5d 539
robert_lp 0:eedb7d567a5d 540 /**
robert_lp 0:eedb7d567a5d 541 * @brief Q7 basic fully-connected layer function
robert_lp 0:eedb7d567a5d 542 * @param[in] pV pointer to input vector
robert_lp 0:eedb7d567a5d 543 * @param[in] pM pointer to matrix weights
robert_lp 0:eedb7d567a5d 544 * @param[in] dim_vec length of the vector
robert_lp 0:eedb7d567a5d 545 * @param[in] num_of_rows number of rows in weight matrix
robert_lp 0:eedb7d567a5d 546 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 547 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 548 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 549 * @param[in,out] pOut pointer to output vector
robert_lp 0:eedb7d567a5d 550 * @param[in,out] vec_buffer pointer to buffer space for input
robert_lp 0:eedb7d567a5d 551 * @return The function returns <code>ARM_MATH_SUCCESS</code>
robert_lp 0:eedb7d567a5d 552 *
robert_lp 0:eedb7d567a5d 553 */
robert_lp 0:eedb7d567a5d 554
robert_lp 0:eedb7d567a5d 555 arm_status arm_fully_connected_q7(const q7_t * pV,
robert_lp 0:eedb7d567a5d 556 const q7_t * pM,
robert_lp 0:eedb7d567a5d 557 const uint16_t dim_vec,
robert_lp 0:eedb7d567a5d 558 const uint16_t num_of_rows,
robert_lp 0:eedb7d567a5d 559 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 560 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 561 const q7_t * bias,
robert_lp 0:eedb7d567a5d 562 q7_t * pOut,
robert_lp 0:eedb7d567a5d 563 q15_t * vec_buffer);
robert_lp 0:eedb7d567a5d 564
robert_lp 0:eedb7d567a5d 565 /**
robert_lp 0:eedb7d567a5d 566 * @brief Q7 opt fully-connected layer function
robert_lp 0:eedb7d567a5d 567 * @param[in] pV pointer to input vector
robert_lp 0:eedb7d567a5d 568 * @param[in] pM pointer to matrix weights
robert_lp 0:eedb7d567a5d 569 * @param[in] dim_vec length of the vector
robert_lp 0:eedb7d567a5d 570 * @param[in] num_of_rows number of rows in weight matrix
robert_lp 0:eedb7d567a5d 571 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 572 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 573 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 574 * @param[in,out] pOut pointer to output vector
robert_lp 0:eedb7d567a5d 575 * @param[in,out] vec_buffer pointer to buffer space for input
robert_lp 0:eedb7d567a5d 576 * @return The function returns <code>ARM_MATH_SUCCESS</code>
robert_lp 0:eedb7d567a5d 577 *
robert_lp 0:eedb7d567a5d 578 */
robert_lp 0:eedb7d567a5d 579
robert_lp 0:eedb7d567a5d 580 arm_status arm_fully_connected_q7_opt(const q7_t * pV,
robert_lp 0:eedb7d567a5d 581 const q7_t * pM,
robert_lp 0:eedb7d567a5d 582 const uint16_t dim_vec,
robert_lp 0:eedb7d567a5d 583 const uint16_t num_of_rows,
robert_lp 0:eedb7d567a5d 584 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 585 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 586 const q7_t * bias,
robert_lp 0:eedb7d567a5d 587 q7_t * pOut,
robert_lp 0:eedb7d567a5d 588 q15_t * vec_buffer);
robert_lp 0:eedb7d567a5d 589
robert_lp 0:eedb7d567a5d 590 /**
robert_lp 0:eedb7d567a5d 591 * @brief Q15 basic fully-connected layer function
robert_lp 0:eedb7d567a5d 592 * @param[in] pV pointer to input vector
robert_lp 0:eedb7d567a5d 593 * @param[in] pM pointer to matrix weights
robert_lp 0:eedb7d567a5d 594 * @param[in] dim_vec length of the vector
robert_lp 0:eedb7d567a5d 595 * @param[in] num_of_rows number of rows in weight matrix
robert_lp 0:eedb7d567a5d 596 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 597 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 598 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 599 * @param[in,out] pOut pointer to output vector
robert_lp 0:eedb7d567a5d 600 * @param[in,out] vec_buffer pointer to buffer space for input
robert_lp 0:eedb7d567a5d 601 * @return The function returns <code>ARM_MATH_SUCCESS</code>
robert_lp 0:eedb7d567a5d 602 *
robert_lp 0:eedb7d567a5d 603 */
robert_lp 0:eedb7d567a5d 604
robert_lp 0:eedb7d567a5d 605 arm_status arm_fully_connected_q15(const q15_t * pV,
robert_lp 0:eedb7d567a5d 606 const q15_t * pM,
robert_lp 0:eedb7d567a5d 607 const uint16_t dim_vec,
robert_lp 0:eedb7d567a5d 608 const uint16_t num_of_rows,
robert_lp 0:eedb7d567a5d 609 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 610 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 611 const q15_t * bias,
robert_lp 0:eedb7d567a5d 612 q15_t * pOut,
robert_lp 0:eedb7d567a5d 613 q15_t * vec_buffer);
robert_lp 0:eedb7d567a5d 614
robert_lp 0:eedb7d567a5d 615 /**
robert_lp 0:eedb7d567a5d 616 * @brief Q15 opt fully-connected layer function
robert_lp 0:eedb7d567a5d 617 * @param[in] pV pointer to input vector
robert_lp 0:eedb7d567a5d 618 * @param[in] pM pointer to matrix weights
robert_lp 0:eedb7d567a5d 619 * @param[in] dim_vec length of the vector
robert_lp 0:eedb7d567a5d 620 * @param[in] num_of_rows number of rows in weight matrix
robert_lp 0:eedb7d567a5d 621 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 622 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 623 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 624 * @param[in,out] pOut pointer to output vector
robert_lp 0:eedb7d567a5d 625 * @param[in,out] vec_buffer pointer to buffer space for input
robert_lp 0:eedb7d567a5d 626 * @return The function returns <code>ARM_MATH_SUCCESS</code>
robert_lp 0:eedb7d567a5d 627 *
robert_lp 0:eedb7d567a5d 628 */
robert_lp 0:eedb7d567a5d 629
robert_lp 0:eedb7d567a5d 630 arm_status arm_fully_connected_q15_opt(const q15_t * pV,
robert_lp 0:eedb7d567a5d 631 const q15_t * pM,
robert_lp 0:eedb7d567a5d 632 const uint16_t dim_vec,
robert_lp 0:eedb7d567a5d 633 const uint16_t num_of_rows,
robert_lp 0:eedb7d567a5d 634 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 635 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 636 const q15_t * bias,
robert_lp 0:eedb7d567a5d 637 q15_t * pOut,
robert_lp 0:eedb7d567a5d 638 q15_t * vec_buffer);
robert_lp 0:eedb7d567a5d 639
robert_lp 0:eedb7d567a5d 640 /**
robert_lp 0:eedb7d567a5d 641 * @brief Mixed Q15-Q7 fully-connected layer function
robert_lp 0:eedb7d567a5d 642 * @param[in] pV pointer to input vector
robert_lp 0:eedb7d567a5d 643 * @param[in] pM pointer to matrix weights
robert_lp 0:eedb7d567a5d 644 * @param[in] dim_vec length of the vector
robert_lp 0:eedb7d567a5d 645 * @param[in] num_of_rows number of rows in weight matrix
robert_lp 0:eedb7d567a5d 646 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 647 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 648 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 649 * @param[in,out] pOut pointer to output vector
robert_lp 0:eedb7d567a5d 650 * @param[in,out] vec_buffer pointer to buffer space for input
robert_lp 0:eedb7d567a5d 651 * @return The function returns <code>ARM_MATH_SUCCESS</code>
robert_lp 0:eedb7d567a5d 652 *
robert_lp 0:eedb7d567a5d 653 */
robert_lp 0:eedb7d567a5d 654
robert_lp 0:eedb7d567a5d 655 arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t * pV,
robert_lp 0:eedb7d567a5d 656 const q7_t * pM,
robert_lp 0:eedb7d567a5d 657 const uint16_t dim_vec,
robert_lp 0:eedb7d567a5d 658 const uint16_t num_of_rows,
robert_lp 0:eedb7d567a5d 659 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 660 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 661 const q7_t * bias,
robert_lp 0:eedb7d567a5d 662 q15_t * pOut,
robert_lp 0:eedb7d567a5d 663 q15_t * vec_buffer);
robert_lp 0:eedb7d567a5d 664
robert_lp 0:eedb7d567a5d 665 /**
robert_lp 0:eedb7d567a5d 666 * @brief Mixed Q15-Q7 opt fully-connected layer function
robert_lp 0:eedb7d567a5d 667 * @param[in] pV pointer to input vector
robert_lp 0:eedb7d567a5d 668 * @param[in] pM pointer to matrix weights
robert_lp 0:eedb7d567a5d 669 * @param[in] dim_vec length of the vector
robert_lp 0:eedb7d567a5d 670 * @param[in] num_of_rows number of rows in weight matrix
robert_lp 0:eedb7d567a5d 671 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 672 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 673 * @param[in] bias pointer to bias
robert_lp 0:eedb7d567a5d 674 * @param[in,out] pOut pointer to output vector
robert_lp 0:eedb7d567a5d 675 * @param[in,out] vec_buffer pointer to buffer space for input
robert_lp 0:eedb7d567a5d 676 * @return The function returns <code>ARM_MATH_SUCCESS</code>
robert_lp 0:eedb7d567a5d 677 *
robert_lp 0:eedb7d567a5d 678 */
robert_lp 0:eedb7d567a5d 679
robert_lp 0:eedb7d567a5d 680 arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t * pV,
robert_lp 0:eedb7d567a5d 681 const q7_t * pM,
robert_lp 0:eedb7d567a5d 682 const uint16_t dim_vec,
robert_lp 0:eedb7d567a5d 683 const uint16_t num_of_rows,
robert_lp 0:eedb7d567a5d 684 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 685 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 686 const q7_t * bias,
robert_lp 0:eedb7d567a5d 687 q15_t * pOut,
robert_lp 0:eedb7d567a5d 688 q15_t * vec_buffer);
robert_lp 0:eedb7d567a5d 689
robert_lp 0:eedb7d567a5d 690 /**
robert_lp 0:eedb7d567a5d 691 * @brief Matrix-Multiplication Kernels for Convolution
robert_lp 0:eedb7d567a5d 692 *
robert_lp 0:eedb7d567a5d 693 * These functions are used within convolution layer functions for
robert_lp 0:eedb7d567a5d 694 * matrix multiplication.
robert_lp 0:eedb7d567a5d 695 *
robert_lp 0:eedb7d567a5d 696 * The implementation is similar to CMSIS-DSP arm_mat_mult functions
robert_lp 0:eedb7d567a5d 697 * with one Q7 and one Q15 operands. The Q15 operand is the im2col
robert_lp 0:eedb7d567a5d 698 * output which is always with 2 columns.
robert_lp 0:eedb7d567a5d 699 *
robert_lp 0:eedb7d567a5d 700 */
robert_lp 0:eedb7d567a5d 701
robert_lp 0:eedb7d567a5d 702 /**
robert_lp 0:eedb7d567a5d 703 * @brief Matrix-multiplication function for convolution
robert_lp 0:eedb7d567a5d 704 * @param[in] pA pointer to operand A
robert_lp 0:eedb7d567a5d 705 * @param[in] pInBuffer pointer to operand B, always conssists of 2 vectors
robert_lp 0:eedb7d567a5d 706 * @param[in] ch_im_out numRow of A
robert_lp 0:eedb7d567a5d 707 * @param[in] numCol_A numCol of A
robert_lp 0:eedb7d567a5d 708 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 709 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 710 * @param[in] bias the bias
robert_lp 0:eedb7d567a5d 711 * @param[in,out] pOut pointer to output
robert_lp 0:eedb7d567a5d 712 * @return The function returns the incremented output pointer
robert_lp 0:eedb7d567a5d 713 */
robert_lp 0:eedb7d567a5d 714
robert_lp 0:eedb7d567a5d 715 q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t * pA,
robert_lp 0:eedb7d567a5d 716 const q15_t * pInBuffer,
robert_lp 0:eedb7d567a5d 717 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 718 const uint16_t numCol_A,
robert_lp 0:eedb7d567a5d 719 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 720 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 721 const q7_t * bias,
robert_lp 0:eedb7d567a5d 722 q7_t * pOut);
robert_lp 0:eedb7d567a5d 723
robert_lp 0:eedb7d567a5d 724 /**
robert_lp 0:eedb7d567a5d 725 * @brief Matrix-multiplication function for convolution with reordered columns
robert_lp 0:eedb7d567a5d 726 * @param[in] pA pointer to operand A
robert_lp 0:eedb7d567a5d 727 * @param[in] pInBuffer pointer to operand B, always conssists of 2 vectors
robert_lp 0:eedb7d567a5d 728 * @param[in] ch_im_out numRow of A
robert_lp 0:eedb7d567a5d 729 * @param[in] numCol_A numCol of A
robert_lp 0:eedb7d567a5d 730 * @param[in] bias_shift amount of left-shift for bias
robert_lp 0:eedb7d567a5d 731 * @param[in] out_shift amount of right-shift for output
robert_lp 0:eedb7d567a5d 732 * @param[in] bias the bias
robert_lp 0:eedb7d567a5d 733 * @param[in,out] pOut pointer to output
robert_lp 0:eedb7d567a5d 734 * @return The function returns the incremented output pointer
robert_lp 0:eedb7d567a5d 735 */
robert_lp 0:eedb7d567a5d 736
robert_lp 0:eedb7d567a5d 737 q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t * pA,
robert_lp 0:eedb7d567a5d 738 const q15_t * pInBuffer,
robert_lp 0:eedb7d567a5d 739 const uint16_t ch_im_out,
robert_lp 0:eedb7d567a5d 740 const uint16_t numCol_A,
robert_lp 0:eedb7d567a5d 741 const uint16_t bias_shift,
robert_lp 0:eedb7d567a5d 742 const uint16_t out_shift,
robert_lp 0:eedb7d567a5d 743 const q7_t * bias,
robert_lp 0:eedb7d567a5d 744 q7_t * pOut);
robert_lp 0:eedb7d567a5d 745
robert_lp 0:eedb7d567a5d 746 #ifdef __cplusplus
robert_lp 0:eedb7d567a5d 747 }
robert_lp 0:eedb7d567a5d 748 #endif
robert_lp 0:eedb7d567a5d 749
robert_lp 0:eedb7d567a5d 750 /*
robert_lp 0:eedb7d567a5d 751 * Other functions
robert_lp 0:eedb7d567a5d 752 * These layers are typically not timing critical
robert_lp 0:eedb7d567a5d 753 * Basic implementation is supported here
robert_lp 0:eedb7d567a5d 754 */
robert_lp 0:eedb7d567a5d 755
robert_lp 0:eedb7d567a5d 756 #ifdef __cplusplus
robert_lp 0:eedb7d567a5d 757 extern "C"
robert_lp 0:eedb7d567a5d 758 {
robert_lp 0:eedb7d567a5d 759 #endif
robert_lp 0:eedb7d567a5d 760
robert_lp 0:eedb7d567a5d 761 /**
robert_lp 0:eedb7d567a5d 762 * @defgroup Acti Neural Network Activation Functions
robert_lp 0:eedb7d567a5d 763 *
robert_lp 0:eedb7d567a5d 764 * Perform activation layers, including ReLU (Rectified Linear Unit),
robert_lp 0:eedb7d567a5d 765 * sigmoid and tanh
robert_lp 0:eedb7d567a5d 766 *
robert_lp 0:eedb7d567a5d 767 */
robert_lp 0:eedb7d567a5d 768
robert_lp 0:eedb7d567a5d 769 /**
robert_lp 0:eedb7d567a5d 770 * @brief Q7 RELU function
robert_lp 0:eedb7d567a5d 771 * @param[in,out] data pointer to input
robert_lp 0:eedb7d567a5d 772 * @param[in] size number of elements
robert_lp 0:eedb7d567a5d 773 * @return none.
robert_lp 0:eedb7d567a5d 774 */
robert_lp 0:eedb7d567a5d 775
robert_lp 0:eedb7d567a5d 776 void arm_relu_q7(q7_t * data, uint16_t size);
robert_lp 0:eedb7d567a5d 777
robert_lp 0:eedb7d567a5d 778 /**
robert_lp 0:eedb7d567a5d 779 * @brief Q15 RELU function
robert_lp 0:eedb7d567a5d 780 * @param[in,out] data pointer to input
robert_lp 0:eedb7d567a5d 781 * @param[in] size number of elements
robert_lp 0:eedb7d567a5d 782 * @return none.
robert_lp 0:eedb7d567a5d 783 */
robert_lp 0:eedb7d567a5d 784
robert_lp 0:eedb7d567a5d 785 void arm_relu_q15(q15_t * data, uint16_t size);
robert_lp 0:eedb7d567a5d 786
robert_lp 0:eedb7d567a5d 787 /**
robert_lp 0:eedb7d567a5d 788 * @brief Q7 neural network activation function using direct table look-up
robert_lp 0:eedb7d567a5d 789 * @param[in,out] data pointer to input
robert_lp 0:eedb7d567a5d 790 * @param[in] size number of elements
robert_lp 0:eedb7d567a5d 791 * @param[in] int_width bit-width of the integer part, assume to be smaller than 3
robert_lp 0:eedb7d567a5d 792 * @param[in] type type of activation functions
robert_lp 0:eedb7d567a5d 793 * @return none.
robert_lp 0:eedb7d567a5d 794 */
robert_lp 0:eedb7d567a5d 795
robert_lp 0:eedb7d567a5d 796 void arm_nn_activations_direct_q7(q7_t * data, uint16_t size, uint16_t int_width,
robert_lp 0:eedb7d567a5d 797 arm_nn_activation_type type);
robert_lp 0:eedb7d567a5d 798
robert_lp 0:eedb7d567a5d 799 /**
robert_lp 0:eedb7d567a5d 800 * @brief Q15 neural network activation function using direct table look-up
robert_lp 0:eedb7d567a5d 801 * @param[in,out] data pointer to input
robert_lp 0:eedb7d567a5d 802 * @param[in] size number of elements
robert_lp 0:eedb7d567a5d 803 * @param[in] int_width bit-width of the integer part, assume to be smaller than 3
robert_lp 0:eedb7d567a5d 804 * @param[in] type type of activation functions
robert_lp 0:eedb7d567a5d 805 * @return none.
robert_lp 0:eedb7d567a5d 806 */
robert_lp 0:eedb7d567a5d 807
robert_lp 0:eedb7d567a5d 808 void arm_nn_activations_direct_q15(q15_t * data, uint16_t size, uint16_t int_width,
robert_lp 0:eedb7d567a5d 809 arm_nn_activation_type type);
robert_lp 0:eedb7d567a5d 810
robert_lp 0:eedb7d567a5d 811 /**
robert_lp 0:eedb7d567a5d 812 * @defgroup Pooling Neural Network Pooling Functions
robert_lp 0:eedb7d567a5d 813 *
robert_lp 0:eedb7d567a5d 814 * Perform pooling functions, including max pooling and average pooling
robert_lp 0:eedb7d567a5d 815 *
robert_lp 0:eedb7d567a5d 816 */
robert_lp 0:eedb7d567a5d 817
robert_lp 0:eedb7d567a5d 818 /**
robert_lp 0:eedb7d567a5d 819 * @brief Q7 max pooling function
robert_lp 0:eedb7d567a5d 820 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 821 * @param[in] dim_im_in input tensor dimention
robert_lp 0:eedb7d567a5d 822 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 823 * @param[in] dim_kernel filter kernel size
robert_lp 0:eedb7d567a5d 824 * @param[in] padding padding sizes
robert_lp 0:eedb7d567a5d 825 * @param[in] stride convolution stride
robert_lp 0:eedb7d567a5d 826 * @param[in] dim_im_out output tensor dimension
robert_lp 0:eedb7d567a5d 827 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 828 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 829 * @return none.
robert_lp 0:eedb7d567a5d 830 *
robert_lp 0:eedb7d567a5d 831 */
robert_lp 0:eedb7d567a5d 832
robert_lp 0:eedb7d567a5d 833 void arm_maxpool_q7_HWC(q7_t * Im_in,
robert_lp 0:eedb7d567a5d 834 const uint16_t dim_im_in,
robert_lp 0:eedb7d567a5d 835 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 836 const uint16_t dim_kernel,
robert_lp 0:eedb7d567a5d 837 const uint16_t padding,
robert_lp 0:eedb7d567a5d 838 const uint16_t stride,
robert_lp 0:eedb7d567a5d 839 const uint16_t dim_im_out,
robert_lp 0:eedb7d567a5d 840 q7_t * bufferA,
robert_lp 0:eedb7d567a5d 841 q7_t * Im_out);
robert_lp 0:eedb7d567a5d 842
robert_lp 0:eedb7d567a5d 843 /**
robert_lp 0:eedb7d567a5d 844 * @brief Q7 average pooling function
robert_lp 0:eedb7d567a5d 845 * @param[in] Im_in pointer to input tensor
robert_lp 0:eedb7d567a5d 846 * @param[in] dim_im_in input tensor dimention
robert_lp 0:eedb7d567a5d 847 * @param[in] ch_im_in number of input tensor channels
robert_lp 0:eedb7d567a5d 848 * @param[in] dim_kernel filter kernel size
robert_lp 0:eedb7d567a5d 849 * @param[in] padding padding sizes
robert_lp 0:eedb7d567a5d 850 * @param[in] stride convolution stride
robert_lp 0:eedb7d567a5d 851 * @param[in] dim_im_out output tensor dimension
robert_lp 0:eedb7d567a5d 852 * @param[in,out] bufferA pointer to buffer space for input
robert_lp 0:eedb7d567a5d 853 * @param[in,out] Im_out pointer to output tensor
robert_lp 0:eedb7d567a5d 854 * @return none.
robert_lp 0:eedb7d567a5d 855 *
robert_lp 0:eedb7d567a5d 856 */
robert_lp 0:eedb7d567a5d 857
robert_lp 0:eedb7d567a5d 858 void arm_avepool_q7_HWC(q7_t * Im_in,
robert_lp 0:eedb7d567a5d 859 const uint16_t dim_im_in,
robert_lp 0:eedb7d567a5d 860 const uint16_t ch_im_in,
robert_lp 0:eedb7d567a5d 861 const uint16_t dim_kernel,
robert_lp 0:eedb7d567a5d 862 const uint16_t padding,
robert_lp 0:eedb7d567a5d 863 const uint16_t stride,
robert_lp 0:eedb7d567a5d 864 const uint16_t dim_im_out,
robert_lp 0:eedb7d567a5d 865 q7_t * bufferA,
robert_lp 0:eedb7d567a5d 866 q7_t * Im_out);
robert_lp 0:eedb7d567a5d 867
robert_lp 0:eedb7d567a5d 868 /**
robert_lp 0:eedb7d567a5d 869 * @defgroup Softmax Softmax Functions
robert_lp 0:eedb7d567a5d 870 *
robert_lp 0:eedb7d567a5d 871 * EXP(2) based softmax function
robert_lp 0:eedb7d567a5d 872 *
robert_lp 0:eedb7d567a5d 873 */
robert_lp 0:eedb7d567a5d 874
robert_lp 0:eedb7d567a5d 875 /**
robert_lp 0:eedb7d567a5d 876 * @brief Q7 softmax function
robert_lp 0:eedb7d567a5d 877 * @param[in] vec_in pointer to input vector
robert_lp 0:eedb7d567a5d 878 * @param[in] dim_vec input vector dimention
robert_lp 0:eedb7d567a5d 879 * @param[out] p_out pointer to output vector
robert_lp 0:eedb7d567a5d 880 * @return none.
robert_lp 0:eedb7d567a5d 881 *
robert_lp 0:eedb7d567a5d 882 */
robert_lp 0:eedb7d567a5d 883
robert_lp 0:eedb7d567a5d 884 void arm_softmax_q7(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out);
robert_lp 0:eedb7d567a5d 885
robert_lp 0:eedb7d567a5d 886 /**
robert_lp 0:eedb7d567a5d 887 * @brief Q15 softmax function
robert_lp 0:eedb7d567a5d 888 * @param[in] vec_in pointer to input vector
robert_lp 0:eedb7d567a5d 889 * @param[in] dim_vec input vector dimention
robert_lp 0:eedb7d567a5d 890 * @param[out] p_out pointer to output vector
robert_lp 0:eedb7d567a5d 891 * @return none.
robert_lp 0:eedb7d567a5d 892 *
robert_lp 0:eedb7d567a5d 893 */
robert_lp 0:eedb7d567a5d 894
robert_lp 0:eedb7d567a5d 895 void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);
robert_lp 0:eedb7d567a5d 896
robert_lp 0:eedb7d567a5d 897 #ifdef __cplusplus
robert_lp 0:eedb7d567a5d 898 }
robert_lp 0:eedb7d567a5d 899 #endif
robert_lp 0:eedb7d567a5d 900
robert_lp 0:eedb7d567a5d 901 #endif
robert_lp 0:eedb7d567a5d 902