Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
depthwiseconv_uint8.h
00001 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 00002 00003 Licensed under the Apache License, Version 2.0 (the "License"); 00004 you may not use this file except in compliance with the License. 00005 You may obtain a copy of the License at 00006 00007 http://www.apache.org/licenses/LICENSE-2.0 00008 00009 Unless required by applicable law or agreed to in writing, software 00010 distributed under the License is distributed on an "AS IS" BASIS, 00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 See the License for the specific language governing permissions and 00013 limitations under the License. 00014 ==============================================================================*/ 00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ 00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ 00017 00018 #include <algorithm> 00019 00020 #include "fixedpoint/fixedpoint.h" 00021 #include "tensorflow/lite/kernels/internal/common.h" 00022 #include "tensorflow/lite/kernels/internal/compatibility.h" 00023 #include "tensorflow/lite/kernels/internal/types.h" 00024 00025 namespace tflite { 00026 00027 // Used in tests and template parameters to control which version of depthwise 00028 // convolution is called. Primarily for reference code, and specializations 00029 // forced in tests. 00030 enum class DepthwiseConvImplementation { 00031 // Run all tests against kUseStandardEntry even if also testing another 00032 // kernel, since we need to be sure that the main DepthwiseConv() function in 00033 // optimized_ops.h dispatches to a correctly-executing kernel. 00034 kNone = 0, // The "default" option: use the normal 00035 // DepthwiseConv kernel (entry) function. 00036 kUseGenericKernel, // Forced use of generic kernel. 00037 kUseNeon3x3, // 3x3 kernel that uses NEON when available. 00038 kUseNeon3x3DotProduct, // 3x3 kernel that uses dot-product enabled NEON 00039 // when available. 00040 kUseCModel3x3DotProduct, // 3x3 kernel, reference C model that is intended 00041 // to match overall design NEON code. 00042 kUseUnwound3x3DotProduct, // 3x3 kernel, reference C model with unwound loops 00043 // and some arrays. 00044 kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics. 00045 }; 00046 00047 // Category of depthwise convolution output rounding. 00048 enum class DepthwiseConvOutputRounding { 00049 kNone = 0, // Invalid: specific method must be specified. 00050 kAwayFromZero, // Original method: exact halves rounded away from zero. 00051 kUpward, // Halves towards +infinity: adds 0.5 before truncate. 00052 // This is where a future kNearestEven would be placed. 00053 }; 00054 00055 // Category of depthwise convolution depth multiplication. 00056 enum class DepthwiseConvDepthMultiplication { 00057 kNoMultiplication = 0, // Depth multiplier = 1. 00058 kUnitInputDepth, // Input depth = 1, output depth = depth multiplier. 00059 }; 00060 00061 namespace reference_ops { 00062 namespace depthwise_conv { 00063 00064 template <DepthwiseConvOutputRounding output_rounding> 00065 inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier, 00066 int shift) { 00067 TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone); 00068 return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); 00069 } 00070 00071 template <> 00072 inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>( 00073 int32 x, int32 quantized_multiplier, int shift) { 00074 return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); 00075 } 00076 00077 template <> 00078 inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>( 00079 int32 x, int32 quantized_multiplier, int shift) { 00080 using gemmlowp::SaturatingRoundingDoublingHighMul; 00081 const int left_shift = shift > 0 ? shift : 0; 00082 const int right_shift = shift > 0 ? 0 : -shift; 00083 const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0; 00084 return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift), 00085 quantized_multiplier) + 00086 rounding_offset) >> 00087 right_shift; 00088 } 00089 00090 template <DepthwiseConvOutputRounding output_rounding> 00091 struct DepthwiseConvBasicKernel { 00092 static inline void Run(const DepthwiseParams& params, 00093 const RuntimeShape& input_shape, 00094 const uint8* input_data, 00095 const RuntimeShape& filter_shape, 00096 const uint8* filter_data, 00097 const RuntimeShape& bias_shape, const int32* bias_data, 00098 const RuntimeShape& output_shape, uint8* output_data) { 00099 const int stride_width = params.stride_width; 00100 const int stride_height = params.stride_height; 00101 const int dilation_width_factor = params.dilation_width_factor; 00102 const int dilation_height_factor = params.dilation_height_factor; 00103 const int pad_width = params.padding_values.width; 00104 const int pad_height = params.padding_values.height; 00105 const int depth_multiplier = params.depth_multiplier; 00106 const int32 output_activation_min = params.quantized_activation_min; 00107 const int32 output_activation_max = params.quantized_activation_max; 00108 const int32 input_offset = params.input_offset; 00109 const int32 filter_offset = params.weights_offset; 00110 const int32 output_offset = params.output_offset; 00111 const int32 output_multiplier = params.output_multiplier; 00112 const int output_shift = params.output_shift; 00113 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); 00114 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); 00115 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); 00116 00117 TFLITE_DCHECK_LE(output_activation_min, output_activation_max); 00118 const int batches = MatchingDim(input_shape, 0, output_shape, 0); 00119 const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); 00120 const int input_height = input_shape.Dims(1); 00121 const int input_width = input_shape.Dims(2); 00122 const int input_depth = input_shape.Dims(3); 00123 const int filter_height = filter_shape.Dims(1); 00124 const int filter_width = filter_shape.Dims(2); 00125 const int output_height = output_shape.Dims(1); 00126 const int output_width = output_shape.Dims(2); 00127 TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); 00128 TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); 00129 00130 for (int b = 0; b < batches; ++b) { 00131 for (int out_y = 0; out_y < output_height; ++out_y) { 00132 for (int out_x = 0; out_x < output_width; ++out_x) { 00133 for (int ic = 0; ic < input_depth; ++ic) { 00134 for (int m = 0; m < depth_multiplier; m++) { 00135 const int oc = m + ic * depth_multiplier; 00136 const int in_x_origin = (out_x * stride_width) - pad_width; 00137 const int in_y_origin = (out_y * stride_height) - pad_height; 00138 int32 acc = 0; 00139 for (int filter_y = 0; filter_y < filter_height; ++filter_y) { 00140 for (int filter_x = 0; filter_x < filter_width; ++filter_x) { 00141 const int in_x = 00142 in_x_origin + dilation_width_factor * filter_x; 00143 const int in_y = 00144 in_y_origin + dilation_height_factor * filter_y; 00145 // If the location is outside the bounds of the input image, 00146 // use zero as a default value. 00147 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && 00148 (in_y < input_height)) { 00149 int32 input_val = 00150 input_data[Offset(input_shape, b, in_y, in_x, ic)]; 00151 int32 filter_val = filter_data[Offset( 00152 filter_shape, 0, filter_y, filter_x, oc)]; 00153 acc += (filter_val + filter_offset) * 00154 (input_val + input_offset); 00155 } 00156 } 00157 } 00158 if (bias_data) { 00159 acc += bias_data[oc]; 00160 } 00161 acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier, 00162 output_shift); 00163 acc += output_offset; 00164 acc = std::max(acc, output_activation_min); 00165 acc = std::min(acc, output_activation_max); 00166 output_data[Offset(output_shape, b, out_y, out_x, oc)] = 00167 static_cast<uint8>(acc); 00168 } 00169 } 00170 } 00171 } 00172 } 00173 } 00174 }; 00175 00176 } // namespace depthwise_conv 00177 00178 inline void DepthwiseConv( 00179 const DepthwiseParams& params, const RuntimeShape& input_shape, 00180 const uint8* input_data, const RuntimeShape& filter_shape, 00181 const uint8* filter_data, const RuntimeShape& bias_shape, 00182 const int32* bias_data, const RuntimeShape& output_shape, 00183 uint8* output_data) { 00184 return depthwise_conv::DepthwiseConvBasicKernel< 00185 DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape, 00186 input_data, filter_shape, 00187 filter_data, bias_shape, 00188 bias_data, output_shape, 00189 output_data); 00190 } 00191 00192 } // namespace reference_ops 00193 } // end namespace tflite 00194 00195 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
Generated on Wed Jul 13 2022 16:03:35 by
1.7.2