Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
depthwise_conv.h
00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 00002 00003 Licensed under the Apache License, Version 2.0 (the "License"); 00004 you may not use this file except in compliance with the License. 00005 You may obtain a copy of the License at 00006 00007 http://www.apache.org/licenses/LICENSE-2.0 00008 00009 Unless required by applicable law or agreed to in writing, software 00010 distributed under the License is distributed on an "AS IS" BASIS, 00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 See the License for the specific language governing permissions and 00013 limitations under the License. 00014 ==============================================================================*/ 00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_ 00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_ 00017 00018 #include "tensorflow/lite/kernels/internal/common.h" 00019 00020 namespace tflite { 00021 namespace reference_integer_ops { 00022 inline void DepthwiseConvPerChannel( 00023 const DepthwiseParams& params, const int32* output_multiplier, 00024 const int32* output_shift, const RuntimeShape& input_shape, 00025 const int8* input_data, const RuntimeShape& filter_shape, 00026 const int8* filter_data, const RuntimeShape& bias_shape, 00027 const int32* bias_data, const RuntimeShape& output_shape, 00028 int8* output_data) { 00029 // Get parameters. 00030 // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro. 00031 const int stride_width = params.stride_width; 00032 const int stride_height = params.stride_height; 00033 const int dilation_width_factor = params.dilation_width_factor; 00034 const int dilation_height_factor = params.dilation_height_factor; 00035 const int pad_width = params.padding_values.width; 00036 const int pad_height = params.padding_values.height; 00037 const int depth_multiplier = params.depth_multiplier; 00038 const int32 input_offset = params.input_offset; 00039 const int32 output_offset = params.output_offset; 00040 const int32 output_activation_min = params.quantized_activation_min; 00041 const int32 output_activation_max = params.quantized_activation_max; 00042 00043 // Check dimensions of the tensors. 00044 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); 00045 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); 00046 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); 00047 00048 TFLITE_DCHECK_LE(output_activation_min, output_activation_max); 00049 const int batches = MatchingDim(input_shape, 0, output_shape, 0); 00050 const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); 00051 const int input_height = input_shape.Dims(1); 00052 const int input_width = input_shape.Dims(2); 00053 const int input_depth = input_shape.Dims(3); 00054 const int filter_height = filter_shape.Dims(1); 00055 const int filter_width = filter_shape.Dims(2); 00056 const int output_height = output_shape.Dims(1); 00057 const int output_width = output_shape.Dims(2); 00058 TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); 00059 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); 00060 00061 for (int batch = 0; batch < batches; ++batch) { 00062 for (int out_y = 0; out_y < output_height; ++out_y) { 00063 for (int out_x = 0; out_x < output_width; ++out_x) { 00064 for (int in_channel = 0; in_channel < input_depth; ++in_channel) { 00065 for (int m = 0; m < depth_multiplier; ++m) { 00066 const int output_channel = m + in_channel * depth_multiplier; 00067 const int in_x_origin = (out_x * stride_width) - pad_width; 00068 const int in_y_origin = (out_y * stride_height) - pad_height; 00069 int32 acc = 0; 00070 for (int filter_y = 0; filter_y < filter_height; ++filter_y) { 00071 for (int filter_x = 0; filter_x < filter_width; ++filter_x) { 00072 const int in_x = in_x_origin + dilation_width_factor * filter_x; 00073 const int in_y = 00074 in_y_origin + dilation_height_factor * filter_y; 00075 // Zero padding by omitting the areas outside the image. 00076 const bool is_point_inside_image = 00077 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && 00078 (in_y < input_height); 00079 if (is_point_inside_image) { 00080 int32 input_val = input_data[Offset(input_shape, batch, in_y, 00081 in_x, in_channel)]; 00082 int32 filter_val = filter_data[Offset( 00083 filter_shape, 0, filter_y, filter_x, output_channel)]; 00084 // Accumulate with 32 bits accumulator. 00085 // In the nudging process during model quantization, we force 00086 // real value of 0.0 be represented by a quantized value. This 00087 // guarentees that the input_offset is a int8, even though it 00088 // is represented using int32. 00089 // int32 += int8 * (int8 - int8) so the highest value we can 00090 // get from each accumulation is [-127, 127] * ([-128, 127] - 00091 // [-128, 127]), which is [-32512, 32512]. log2(32512) 00092 // = 14.98, which means we can accumulate at least 2^16 00093 // multiplications without overflow. The accumulator is 00094 // applied to a filter so the accumulation logic will hold as 00095 // long as the filter size (filter_y * filter_x * in_channel) 00096 // does not exceed 2^16, which is the case in all the models 00097 // we have seen so far. 00098 // TODO(jianlijianli): Add a check to make sure the 00099 // accumulator depth is smaller than 2^16. 00100 acc += filter_val * (input_val + input_offset); 00101 } 00102 } 00103 } 00104 if (bias_data) { 00105 acc += bias_data[output_channel]; 00106 } 00107 acc = MultiplyByQuantizedMultiplier( 00108 acc, output_multiplier[output_channel], 00109 output_shift[output_channel]); 00110 acc += output_offset; 00111 acc = std::max(acc, output_activation_min); 00112 acc = std::min(acc, output_activation_max); 00113 output_data[Offset(output_shape, batch, out_y, out_x, 00114 output_channel)] = static_cast<int8_t>(acc); 00115 } 00116 } 00117 } 00118 } 00119 } 00120 } 00121 } // namespace reference_integer_ops 00122 } // namespace tflite 00123 00124 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
Generated on Wed Jul 13 2022 16:03:35 by
1.7.2