Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers depthwise_conv.h Source File

depthwise_conv.h

00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
00002 
00003 Licensed under the Apache License, Version 2.0 (the "License");
00004 you may not use this file except in compliance with the License.
00005 You may obtain a copy of the License at
00006 
00007     http://www.apache.org/licenses/LICENSE-2.0
00008 
00009 Unless required by applicable law or agreed to in writing, software
00010 distributed under the License is distributed on an "AS IS" BASIS,
00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 See the License for the specific language governing permissions and
00013 limitations under the License.
00014 ==============================================================================*/
00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
00017 
00018 #include "tensorflow/lite/kernels/internal/common.h"
00019 
00020 namespace tflite {
00021 namespace reference_integer_ops {
00022 inline void DepthwiseConvPerChannel(
00023     const DepthwiseParams& params, const int32* output_multiplier,
00024     const int32* output_shift, const RuntimeShape& input_shape,
00025     const int8* input_data, const RuntimeShape& filter_shape,
00026     const int8* filter_data, const RuntimeShape& bias_shape,
00027     const int32* bias_data, const RuntimeShape& output_shape,
00028     int8* output_data) {
00029   // Get parameters.
00030   // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
00031   const int stride_width = params.stride_width;
00032   const int stride_height = params.stride_height;
00033   const int dilation_width_factor = params.dilation_width_factor;
00034   const int dilation_height_factor = params.dilation_height_factor;
00035   const int pad_width = params.padding_values.width;
00036   const int pad_height = params.padding_values.height;
00037   const int depth_multiplier = params.depth_multiplier;
00038   const int32 input_offset = params.input_offset;
00039   const int32 output_offset = params.output_offset;
00040   const int32 output_activation_min = params.quantized_activation_min;
00041   const int32 output_activation_max = params.quantized_activation_max;
00042 
00043   // Check dimensions of the tensors.
00044   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
00045   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
00046   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
00047 
00048   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
00049   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
00050   const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
00051   const int input_height = input_shape.Dims(1);
00052   const int input_width = input_shape.Dims(2);
00053   const int input_depth = input_shape.Dims(3);
00054   const int filter_height = filter_shape.Dims(1);
00055   const int filter_width = filter_shape.Dims(2);
00056   const int output_height = output_shape.Dims(1);
00057   const int output_width = output_shape.Dims(2);
00058   TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
00059   TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
00060 
00061   for (int batch = 0; batch < batches; ++batch) {
00062     for (int out_y = 0; out_y < output_height; ++out_y) {
00063       for (int out_x = 0; out_x < output_width; ++out_x) {
00064         for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
00065           for (int m = 0; m < depth_multiplier; ++m) {
00066             const int output_channel = m + in_channel * depth_multiplier;
00067             const int in_x_origin = (out_x * stride_width) - pad_width;
00068             const int in_y_origin = (out_y * stride_height) - pad_height;
00069             int32 acc = 0;
00070             for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
00071               for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
00072                 const int in_x = in_x_origin + dilation_width_factor * filter_x;
00073                 const int in_y =
00074                     in_y_origin + dilation_height_factor * filter_y;
00075                 // Zero padding by omitting the areas outside the image.
00076                 const bool is_point_inside_image =
00077                     (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
00078                     (in_y < input_height);
00079                 if (is_point_inside_image) {
00080                   int32 input_val = input_data[Offset(input_shape, batch, in_y,
00081                                                       in_x, in_channel)];
00082                   int32 filter_val = filter_data[Offset(
00083                       filter_shape, 0, filter_y, filter_x, output_channel)];
00084                   // Accumulate with 32 bits accumulator.
00085                   // In the nudging process during model quantization, we force
00086                   // real value of 0.0 be represented by a quantized value. This
00087                   // guarentees that the input_offset is a int8, even though it
00088                   // is represented using int32.
00089                   // int32 += int8 * (int8 - int8) so the highest value we can
00090                   // get from each accumulation is [-127, 127] * ([-128, 127] -
00091                   // [-128, 127]), which is [-32512, 32512]. log2(32512)
00092                   // = 14.98, which means we can accumulate at least 2^16
00093                   // multiplications without overflow. The accumulator is
00094                   // applied to a filter so the accumulation logic will hold as
00095                   // long as the filter size (filter_y * filter_x * in_channel)
00096                   // does not exceed 2^16, which is the case in all the models
00097                   // we have seen so far.
00098                   // TODO(jianlijianli): Add a check to make sure the
00099                   // accumulator depth is smaller than 2^16.
00100                   acc += filter_val * (input_val + input_offset);
00101                 }
00102               }
00103             }
00104             if (bias_data) {
00105               acc += bias_data[output_channel];
00106             }
00107             acc = MultiplyByQuantizedMultiplier(
00108                 acc, output_multiplier[output_channel],
00109                 output_shift[output_channel]);
00110             acc += output_offset;
00111             acc = std::max(acc, output_activation_min);
00112             acc = std::min(acc, output_activation_max);
00113             output_data[Offset(output_shape, batch, out_y, out_x,
00114                                output_channel)] = static_cast<int8_t>(acc);
00115           }
00116         }
00117       }
00118     }
00119   }
00120 }
00121 }  // namespace reference_integer_ops
00122 }  // namespace tflite
00123 
00124 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_