Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
conv.h
00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 00002 00003 Licensed under the Apache License, Version 2.0 (the "License"); 00004 you may not use this file except in compliance with the License. 00005 You may obtain a copy of the License at 00006 00007 http://www.apache.org/licenses/LICENSE-2.0 00008 00009 Unless required by applicable law or agreed to in writing, software 00010 distributed under the License is distributed on an "AS IS" BASIS, 00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 See the License for the specific language governing permissions and 00013 limitations under the License. 00014 ==============================================================================*/ 00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_ 00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_ 00017 00018 #include "tensorflow/lite/kernels/internal/types.h" 00019 #include "tensorflow/lite/kernels/internal/common.h" 00020 00021 00022 00023 namespace tflite { 00024 00025 namespace reference_ops { 00026 00027 00028 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, 00029 const float* input_data, const RuntimeShape& filter_shape, 00030 const float* filter_data, const RuntimeShape& bias_shape, 00031 const float* bias_data, const RuntimeShape& output_shape, 00032 float* output_data, const RuntimeShape& im2col_shape, 00033 float* im2col_data) { 00034 const int stride_width = params.stride_width; 00035 const int stride_height = params.stride_height; 00036 const int dilation_width_factor = params.dilation_width_factor; 00037 const int dilation_height_factor = params.dilation_height_factor; 00038 const int pad_width = params.padding_values.width; 00039 const int pad_height = params.padding_values.height; 00040 const float output_activation_min = params.float_activation_min; 00041 const float output_activation_max = params.float_activation_max; 00042 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); 00043 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); 00044 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); 00045 00046 (void)im2col_data; // only used in optimized code. 00047 (void)im2col_shape; // only used in optimized code. 00048 const int batches = MatchingDim(input_shape, 0, output_shape, 0); 00049 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); 00050 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); 00051 if (bias_data) { 00052 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); 00053 } 00054 const int input_height = input_shape.Dims(1); 00055 const int input_width = input_shape.Dims(2); 00056 const int filter_height = filter_shape.Dims(1); 00057 const int filter_width = filter_shape.Dims(2); 00058 const int output_height = output_shape.Dims(1); 00059 const int output_width = output_shape.Dims(2); 00060 for (int batch = 0; batch < batches; ++batch) { 00061 for (int out_y = 0; out_y < output_height; ++out_y) { 00062 for (int out_x = 0; out_x < output_width; ++out_x) { 00063 for (int out_channel = 0; out_channel < output_depth; ++out_channel) { 00064 const int in_x_origin = (out_x * stride_width) - pad_width; 00065 const int in_y_origin = (out_y * stride_height) - pad_height; 00066 float total = 0.f; 00067 for (int filter_y = 0; filter_y < filter_height; ++filter_y) { 00068 for (int filter_x = 0; filter_x < filter_width; ++filter_x) { 00069 for (int in_channel = 0; in_channel < input_depth; ++in_channel) { 00070 const int in_x = in_x_origin + dilation_width_factor * filter_x; 00071 const int in_y = 00072 in_y_origin + dilation_height_factor * filter_y; 00073 // If the location is outside the bounds of the input image, 00074 // use zero as a default value. 00075 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && 00076 (in_y < input_height)) { 00077 float input_value = input_data[Offset( 00078 input_shape, batch, in_y, in_x, in_channel)]; 00079 float filter_value = 00080 filter_data[Offset(filter_shape, out_channel, filter_y, 00081 filter_x, in_channel)]; 00082 total += (input_value * filter_value); 00083 } 00084 } 00085 } 00086 } 00087 float bias_value = 0.0f; 00088 if (bias_data) { 00089 bias_value = bias_data[out_channel]; 00090 } 00091 output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = 00092 ActivationFunctionWithMinMax(total + bias_value, 00093 output_activation_min, 00094 output_activation_max); 00095 } 00096 } 00097 } 00098 } 00099 } 00100 00101 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, 00102 const uint8* input_data, const RuntimeShape& filter_shape, 00103 const uint8* filter_data, const RuntimeShape& bias_shape, 00104 const int32* bias_data, const RuntimeShape& output_shape, 00105 uint8* output_data, const RuntimeShape& im2col_shape, 00106 uint8* im2col_data, void* cpu_backend_context) { 00107 (void)cpu_backend_context; // only used in optimized code. 00108 (void)im2col_data; // only used in optimized code. 00109 (void)im2col_shape; // only used in optimized code. 00110 const int stride_width = params.stride_width; 00111 const int stride_height = params.stride_height; 00112 const int dilation_width_factor = params.dilation_width_factor; 00113 const int dilation_height_factor = params.dilation_height_factor; 00114 const int pad_width = params.padding_values.width; 00115 const int pad_height = params.padding_values.height; 00116 const int32 input_offset = params.input_offset; 00117 const int32 filter_offset = params.weights_offset; 00118 const int32 output_offset = params.output_offset; 00119 const int32 output_multiplier = params.output_multiplier; 00120 const int output_shift = params.output_shift; 00121 const int32 output_activation_min = params.quantized_activation_min; 00122 const int32 output_activation_max = params.quantized_activation_max; 00123 TFLITE_DCHECK_LE(output_activation_min, output_activation_max); 00124 00125 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); 00126 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); 00127 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); 00128 const int batches = MatchingDim(input_shape, 0, output_shape, 0); 00129 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); 00130 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); 00131 if (bias_data) { 00132 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); 00133 } 00134 const int input_height = input_shape.Dims(1); 00135 const int input_width = input_shape.Dims(2); 00136 const int filter_height = filter_shape.Dims(1); 00137 const int filter_width = filter_shape.Dims(2); 00138 const int output_height = output_shape.Dims(1); 00139 const int output_width = output_shape.Dims(2); 00140 for (int batch = 0; batch < batches; ++batch) { 00141 for (int out_y = 0; out_y < output_height; ++out_y) { 00142 for (int out_x = 0; out_x < output_width; ++out_x) { 00143 for (int out_channel = 0; out_channel < output_depth; ++out_channel) { 00144 const int in_x_origin = (out_x * stride_width) - pad_width; 00145 const int in_y_origin = (out_y * stride_height) - pad_height; 00146 int32 acc = 0; 00147 for (int filter_y = 0; filter_y < filter_height; ++filter_y) { 00148 for (int filter_x = 0; filter_x < filter_width; ++filter_x) { 00149 for (int in_channel = 0; in_channel < input_depth; ++in_channel) { 00150 const int in_x = in_x_origin + dilation_width_factor * filter_x; 00151 const int in_y = 00152 in_y_origin + dilation_height_factor * filter_y; 00153 // If the location is outside the bounds of the input image, 00154 // use zero as a default value. 00155 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && 00156 (in_y < input_height)) { 00157 int32 input_val = input_data[Offset(input_shape, batch, in_y, 00158 in_x, in_channel)]; 00159 int32 filter_val = 00160 filter_data[Offset(filter_shape, out_channel, filter_y, 00161 filter_x, in_channel)]; 00162 acc += 00163 (filter_val + filter_offset) * (input_val + input_offset); 00164 } 00165 } 00166 } 00167 } 00168 if (bias_data) { 00169 acc += bias_data[out_channel]; 00170 } 00171 acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, 00172 output_shift); 00173 acc += output_offset; 00174 acc = std::max(acc, output_activation_min); 00175 acc = std::min(acc, output_activation_max); 00176 output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = 00177 static_cast<uint8>(acc); 00178 } 00179 } 00180 } 00181 } 00182 } 00183 00184 inline void HybridConvPerChannel( 00185 const ConvParams& params, float* scaling_factors_ptr, 00186 const RuntimeShape& input_shape, const int8_t* input_data, 00187 const RuntimeShape& filter_shape, const int8_t* filter_data, 00188 const RuntimeShape& bias_shape, const float* bias_data, 00189 const RuntimeShape& output_shape, float* output_data, 00190 const RuntimeShape& im2col_shape, int8_t* im2col_data, 00191 const float* per_channel_scale, int32_t* input_offset) { 00192 (void)im2col_data; // only used in optimized code. 00193 (void)im2col_shape; // only used in optimized code. 00194 const int stride_width = params.stride_width; 00195 const int stride_height = params.stride_height; 00196 const int dilation_width_factor = params.dilation_width_factor; 00197 const int dilation_height_factor = params.dilation_height_factor; 00198 const int pad_width = params.padding_values.width; 00199 const int pad_height = params.padding_values.height; 00200 const float output_activation_min = params.float_activation_min; 00201 const float output_activation_max = params.float_activation_max; 00202 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); 00203 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); 00204 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); 00205 const int batches = MatchingDim(input_shape, 0, output_shape, 0); 00206 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); 00207 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); 00208 if (bias_data) { 00209 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); 00210 } 00211 const int input_height = input_shape.Dims(1); 00212 const int input_width = input_shape.Dims(2); 00213 const int filter_height = filter_shape.Dims(1); 00214 const int filter_width = filter_shape.Dims(2); 00215 const int output_height = output_shape.Dims(1); 00216 const int output_width = output_shape.Dims(2); 00217 for (int batch = 0; batch < batches; ++batch) { 00218 for (int out_y = 0; out_y < output_height; ++out_y) { 00219 for (int out_x = 0; out_x < output_width; ++out_x) { 00220 for (int out_channel = 0; out_channel < output_depth; ++out_channel) { 00221 const int in_x_origin = (out_x * stride_width) - pad_width; 00222 const int in_y_origin = (out_y * stride_height) - pad_height; 00223 int32 acc = 0; 00224 for (int filter_y = 0; filter_y < filter_height; ++filter_y) { 00225 for (int filter_x = 0; filter_x < filter_width; ++filter_x) { 00226 for (int in_channel = 0; in_channel < input_depth; ++in_channel) { 00227 const int in_x = in_x_origin + dilation_width_factor * filter_x; 00228 const int in_y = 00229 in_y_origin + dilation_height_factor * filter_y; 00230 // If the location is outside the bounds of the input image, 00231 // use zero as a default value. 00232 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && 00233 (in_y < input_height)) { 00234 int32 input_val = input_data[Offset(input_shape, batch, in_y, 00235 in_x, in_channel)]; 00236 int32 filter_val = 00237 filter_data[Offset(filter_shape, out_channel, filter_y, 00238 filter_x, in_channel)]; 00239 acc += filter_val * (input_val - input_offset[batch]); 00240 } 00241 } 00242 } 00243 } 00244 float acc_float = 00245 acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch]; 00246 if (bias_data) { 00247 acc_float += bias_data[out_channel]; 00248 } 00249 output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = 00250 ActivationFunctionWithMinMax(acc_float, output_activation_min, 00251 output_activation_max); 00252 } 00253 } 00254 } 00255 } 00256 } 00257 00258 } // namespace reference_ops 00259 } // namespace tflite 00260 00261 00262 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
Generated on Wed Jul 13 2022 16:03:35 by
1.7.2