Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers depthwise_conv.cc Source File

depthwise_conv.cc

00001 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
00002 
00003 Licensed under the Apache License, Version 2.0 (the "License");
00004 you may not use this file except in compliance with the License.
00005 You may obtain a copy of the License at
00006 
00007     http://www.apache.org/licenses/LICENSE-2.0
00008 
00009 Unless required by applicable law or agreed to in writing, software
00010 distributed under the License is distributed on an "AS IS" BASIS,
00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 See the License for the specific language governing permissions and
00013 limitations under the License.
00014 ==============================================================================*/
00015 
00016 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
00017 
00018 #include "tensorflow/lite/c/builtin_op_data.h"
00019 #include "tensorflow/lite/c/c_api_internal.h"
00020 #include "tensorflow/lite/kernels/internal/common.h"
00021 #include "tensorflow/lite/kernels/internal/quantization_util.h"
00022 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
00023 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
00024 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
00025 #include "tensorflow/lite/kernels/kernel_util.h"
00026 #include "tensorflow/lite/kernels/padding.h"
00027 
00028 namespace tflite {
00029 namespace ops {
00030 namespace micro {
00031 namespace depthwise_conv {
00032 namespace {
00033 
00034 constexpr int kInputTensor = 0;
00035 constexpr int kFilterTensor = 1;
00036 constexpr int kBiasTensor = 2;
00037 constexpr int kOutputTensor = 0;
00038 constexpr int kMaxChannels = 64;
00039 
00040 struct OpData {
00041   TfLitePaddingValues padding;
00042   // The scaling factor from input to output (aka the 'real multiplier') can
00043   // be represented as a fixed point multiplier plus a left shift.
00044   int32_t output_multiplier;
00045   int output_shift;
00046 
00047   // Per channel output multiplier and shift.
00048   // TODO(b/141139247): Allocate these dynamically when possible.
00049   int32_t per_channel_output_multiplier[kMaxChannels];
00050   int32_t per_channel_output_shift[kMaxChannels];
00051 
00052   // The range of the fused activation layer. For example for kNone and
00053   // uint8_t these would be 0 and 255.
00054   int32_t output_activation_min;
00055   int32_t output_activation_max;
00056 };
00057 
00058 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
00059                              TfLiteDepthwiseConvParams* params, int width,
00060                              int height, int filter_width, int filter_height,
00061                              const TfLiteType data_type, OpData* data) {
00062   bool has_bias = node->inputs->size == 3;
00063   // Check number of inputs/outputs
00064   TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
00065   TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
00066 
00067   int unused_output_height, unused_output_width;
00068   data->padding = ComputePaddingHeightWidth(
00069       params->stride_height, params->stride_width, 1, 1, height, width,
00070       filter_height, filter_width, params->padding, &unused_output_height,
00071       &unused_output_width);
00072 
00073   // Note that quantized inference requires that all tensors have their
00074   // parameters set. This is usually done during quantized training.
00075   if (data_type != kTfLiteFloat32) {
00076     const TfLiteTensor* input = GetInput(context, node, kInputTensor);
00077     const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
00078     const TfLiteTensor* bias =
00079         GetOptionalInputTensor(context, node, kBiasTensor);
00080     TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
00081 
00082     TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
00083         context, input, filter, bias, output, params->activation,
00084         &data->output_multiplier, &data->output_shift,
00085         &data->output_activation_min, &data->output_activation_max,
00086         data->per_channel_output_multiplier,
00087         reinterpret_cast<int*>(data->per_channel_output_shift)));
00088   }
00089   return kTfLiteOk;
00090 }
00091 
00092 }  // namespace
00093 
00094 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
00095   return nullptr;
00096 }
00097 
00098 void Free(TfLiteContext* context, void* buffer) {}
00099 
00100 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
00101   return kTfLiteOk;
00102 }
00103 
00104 void EvalFloat(TfLiteContext* context, TfLiteNode* node,
00105                TfLiteDepthwiseConvParams* params, OpData* data,
00106                const TfLiteTensor* input, const TfLiteTensor* filter,
00107                const TfLiteTensor* bias, TfLiteTensor* output) {
00108   float output_activation_min, output_activation_max;
00109   CalculateActivationRange(params->activation, &output_activation_min,
00110                            &output_activation_max);
00111 
00112   tflite::DepthwiseParams op_params;
00113   // Padding type is ignored, but still set.
00114   op_params.padding_type = PaddingType::kSame;
00115   op_params.padding_values.width = data->padding.width;
00116   op_params.padding_values.height = data->padding.height;
00117   op_params.stride_width = params->stride_width;
00118   op_params.stride_height = params->stride_height;
00119   op_params.dilation_width_factor = 1;
00120   op_params.dilation_height_factor = 1;
00121   op_params.depth_multiplier = params->depth_multiplier;
00122   op_params.float_activation_min = output_activation_min;
00123   op_params.float_activation_max = output_activation_max;
00124 
00125   tflite::reference_ops::DepthwiseConv(
00126       op_params, GetTensorShape(input), GetTensorData<float>(input),
00127       GetTensorShape(filter), GetTensorData<float>(filter),
00128       GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
00129       GetTensorData<float>(output));
00130 }
00131 
00132 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
00133                              TfLiteDepthwiseConvParams* params, OpData* data,
00134                              const TfLiteTensor* input,
00135                              const TfLiteTensor* filter,
00136                              const TfLiteTensor* bias, TfLiteTensor* output) {
00137   DepthwiseParams op_params;
00138   op_params.padding_type = PaddingType::kSame;
00139   op_params.padding_values.width = data->padding.width;
00140   op_params.padding_values.height = data->padding.height;
00141   op_params.stride_width = params->stride_width;
00142   op_params.stride_height = params->stride_height;
00143   op_params.dilation_width_factor = params->dilation_width_factor;
00144   op_params.dilation_height_factor = params->dilation_height_factor;
00145   op_params.depth_multiplier = params->depth_multiplier;
00146   op_params.input_offset = -input->params.zero_point;
00147   op_params.weights_offset = 0;
00148   op_params.output_offset = output->params.zero_point;
00149   // TODO(b/130439627): Use calculated value for clamping.
00150   op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
00151   op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
00152 
00153   reference_integer_ops::DepthwiseConvPerChannel(
00154       op_params, data->per_channel_output_multiplier,
00155       data->per_channel_output_shift, GetTensorShape(input),
00156       GetTensorData<int8>(input), GetTensorShape(filter),
00157       GetTensorData<int8>(filter), GetTensorShape(bias),
00158       GetTensorData<int32>(bias), GetTensorShape(output),
00159       GetTensorData<int8>(output));
00160 }
00161 
00162 void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
00163                    TfLiteDepthwiseConvParams* params, OpData* data,
00164                    const TfLiteTensor* input, const TfLiteTensor* filter,
00165                    const TfLiteTensor* bias, TfLiteTensor* output) {
00166   const int32_t input_offset = -input->params.zero_point;
00167   const int32_t filter_offset = -filter->params.zero_point;
00168   const int32_t output_offset = output->params.zero_point;
00169 
00170   tflite::DepthwiseParams op_params;
00171   // Padding type is ignored, but still set.
00172   op_params.padding_type = PaddingType::kSame;
00173   op_params.padding_values.width = data->padding.width;
00174   op_params.padding_values.height = data->padding.height;
00175   op_params.stride_width = params->stride_width;
00176   op_params.stride_height = params->stride_height;
00177   op_params.dilation_width_factor = 1;
00178   op_params.dilation_height_factor = 1;
00179   op_params.depth_multiplier = params->depth_multiplier;
00180   op_params.quantized_activation_min = data->output_activation_min;
00181   op_params.quantized_activation_max = data->output_activation_max;
00182   op_params.input_offset = input_offset;
00183   op_params.weights_offset = filter_offset;
00184   op_params.output_offset = output_offset;
00185   op_params.output_multiplier = data->output_multiplier;
00186   // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
00187   op_params.output_shift = -data->output_shift;
00188 
00189   tflite::reference_ops::DepthwiseConv(
00190       op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
00191       GetTensorShape(filter), GetTensorData<uint8_t>(filter),
00192       GetTensorShape(bias), GetTensorData<int32_t>(bias),
00193       GetTensorShape(output), GetTensorData<uint8_t>(output));
00194 }
00195 
00196 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
00197   auto* params =
00198       reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
00199 
00200   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
00201   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
00202   const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
00203   const TfLiteTensor* bias =
00204       (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
00205 
00206   const TfLiteType data_type = input->type;
00207   int width = SizeOfDimension(input, 2);
00208   int height = SizeOfDimension(input, 1);
00209   int filter_width = SizeOfDimension(filter, 2);
00210   int filter_height = SizeOfDimension(filter, 1);
00211 
00212   OpData data;
00213 
00214   // All per-channel quantized tensors need valid zero point and scale arrays.
00215   if (input->type == kTfLiteInt8) {
00216     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
00217                       kTfLiteAffineQuantization);
00218 
00219     const auto* affine_quantization =
00220         reinterpret_cast<TfLiteAffineQuantization*>(
00221             filter->quantization.params);
00222     TF_LITE_ENSURE(context, affine_quantization);
00223     TF_LITE_ENSURE(context, affine_quantization->scale);
00224     TF_LITE_ENSURE(context, affine_quantization->zero_point);
00225     // Depthwise conv is quantized along dimension 3:
00226     // https://www.tensorflow.org/lite/performance/quantization_spec
00227     TF_LITE_ENSURE_EQ(context, filter->dims->data[3],
00228                       affine_quantization->scale->size);
00229     TF_LITE_ENSURE_EQ(context, filter->dims->data[3],
00230                       affine_quantization->zero_point->size);
00231   }
00232 
00233   TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
00234                                         filter_width, filter_height, data_type,
00235                                         &data));
00236 
00237   // TODO(aselle): Consider whether float conv and quantized conv should be
00238   // separate ops to avoid dispatch overhead here.
00239   switch (input->type) {  // Already know in/out types are same.
00240     case kTfLiteFloat32:
00241       EvalFloat(context, node, params, &data, input, filter, bias, output);
00242       break;
00243     case kTfLiteInt8:
00244       EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
00245                               output);
00246       break;
00247     case kTfLiteUInt8:
00248       EvalQuantized(context, node, params, &data, input, filter, bias, output);
00249       break;
00250     default:
00251       context->ReportError(context, "Type %s (%d) not supported.",
00252                            TfLiteTypeGetName(input->type), input->type);
00253       return kTfLiteError;
00254   }
00255   return kTfLiteOk;
00256 }
00257 
00258 }  // namespace depthwise_conv
00259 
00260 TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
00261   static TfLiteRegistration r = {depthwise_conv::Init, depthwise_conv::Free,
00262                                  depthwise_conv::Prepare, depthwise_conv::Eval};
00263   return &r;
00264 }
00265 
00266 }  // namespace micro
00267 }  // namespace ops
00268 }  // namespace tflite