Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
conv.h
00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 00002 00003 Licensed under the Apache License, Version 2.0 (the "License"); 00004 you may not use this file except in compliance with the License. 00005 You may obtain a copy of the License at 00006 00007 http://www.apache.org/licenses/LICENSE-2.0 00008 00009 Unless required by applicable law or agreed to in writing, software 00010 distributed under the License is distributed on an "AS IS" BASIS, 00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 See the License for the specific language governing permissions and 00013 limitations under the License. 00014 ==============================================================================*/ 00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_ 00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_ 00017 00018 #include "tensorflow/lite/kernels/internal/common.h" 00019 00020 namespace tflite { 00021 namespace reference_integer_ops { 00022 00023 // Fixed-point per-channel-quantization convolution reference kernel. 00024 inline void ConvPerChannel( 00025 const ConvParams& params, const int32* output_multiplier, 00026 const int32* output_shift, const RuntimeShape& input_shape, 00027 const int8* input_data, const RuntimeShape& filter_shape, 00028 const int8* filter_data, const RuntimeShape& bias_shape, 00029 const int32* bias_data, const RuntimeShape& output_shape, 00030 int8* output_data) { 00031 // Get parameters. 00032 const int32 input_offset = params.input_offset; // r = s(q - Z) 00033 const int stride_width = params.stride_width; 00034 const int stride_height = params.stride_height; 00035 const int dilation_width_factor = params.dilation_width_factor; 00036 const int dilation_height_factor = params.dilation_height_factor; 00037 const int pad_width = params.padding_values.width; 00038 const int pad_height = params.padding_values.height; 00039 const int32 output_offset = params.output_offset; 00040 00041 // Set min and max value of the output. 00042 const int32 output_activation_min = std::numeric_limits<int8_t>::min(); 00043 const int32 output_activation_max = std::numeric_limits<int8_t>::max(); 00044 00045 // Sanity check. 00046 TFLITE_DCHECK_LE(output_activation_min, output_activation_max); 00047 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); 00048 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); 00049 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); 00050 const int batches = MatchingDim(input_shape, 0, output_shape, 0); 00051 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); 00052 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); 00053 if (bias_data) { 00054 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); 00055 } 00056 00057 // Check dimensions of the tensors. 00058 const int input_height = input_shape.Dims(1); 00059 const int input_width = input_shape.Dims(2); 00060 const int filter_height = filter_shape.Dims(1); 00061 const int filter_width = filter_shape.Dims(2); 00062 const int output_height = output_shape.Dims(1); 00063 const int output_width = output_shape.Dims(2); 00064 for (int batch = 0; batch < batches; ++batch) { 00065 for (int out_y = 0; out_y < output_height; ++out_y) { 00066 for (int out_x = 0; out_x < output_width; ++out_x) { 00067 for (int out_channel = 0; out_channel < output_depth; ++out_channel) { 00068 const int in_x_origin = (out_x * stride_width) - pad_width; 00069 const int in_y_origin = (out_y * stride_height) - pad_height; 00070 int32 acc = 0; 00071 for (int filter_y = 0; filter_y < filter_height; ++filter_y) { 00072 for (int filter_x = 0; filter_x < filter_width; ++filter_x) { 00073 for (int in_channel = 0; in_channel < input_depth; ++in_channel) { 00074 const int in_x = in_x_origin + dilation_width_factor * filter_x; 00075 const int in_y = 00076 in_y_origin + dilation_height_factor * filter_y; 00077 // Zero padding by omitting the areas outside the image. 00078 const bool is_point_inside_image = 00079 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && 00080 (in_y < input_height); 00081 if (is_point_inside_image) { 00082 int32 input_val = input_data[Offset(input_shape, batch, in_y, 00083 in_x, in_channel)]; 00084 int32 filter_val = 00085 filter_data[Offset(filter_shape, out_channel, filter_y, 00086 filter_x, in_channel)]; 00087 // Accumulate with 32 bits accumulator. 00088 // In the nudging process during model quantization, we force 00089 // real value of 0.0 be represented by a quantized value. This 00090 // guarantees that the input_offset is a int8, even though it 00091 // is represented using int32. 00092 // int32 += int8 * (int8 - int8) so the highest value we can 00093 // get from each accumulation is [-127, 127] * ([-128, 127] - 00094 // [-128, 127]), which is [-32512, 32512]. log2(32512) 00095 // = 14.98, which means we can accumulate at least 2^16 00096 // multiplications without overflow. The accumulator is 00097 // applied to a filter so the accumulation logic will hold as 00098 // long as the filter size (filter_y * filter_x * in_channel) 00099 // does not exceed 2^16, which is the case in all the models 00100 // we have seen so far. 00101 // TODO(jianlijianli): Add a check to make sure the 00102 // accumulator depth is smaller than 2^16. 00103 acc += filter_val * (input_val + input_offset); 00104 } 00105 } 00106 } 00107 } 00108 00109 if (bias_data) { 00110 acc += bias_data[out_channel]; 00111 } 00112 acc = MultiplyByQuantizedMultiplier( 00113 acc, output_multiplier[out_channel], output_shift[out_channel]); 00114 acc += output_offset; 00115 acc = std::max(acc, output_activation_min); 00116 acc = std::min(acc, output_activation_max); 00117 output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = 00118 static_cast<int8_t>(acc); 00119 } 00120 } 00121 } 00122 } 00123 } 00124 00125 } // namespace reference_integer_ops 00126 } // namespace tflite 00127 00128 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
Generated on Wed Jul 13 2022 16:03:35 by
