Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers conv.h Source File

conv.h

00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
00002 
00003 Licensed under the Apache License, Version 2.0 (the "License");
00004 you may not use this file except in compliance with the License.
00005 You may obtain a copy of the License at
00006 
00007     http://www.apache.org/licenses/LICENSE-2.0
00008 
00009 Unless required by applicable law or agreed to in writing, software
00010 distributed under the License is distributed on an "AS IS" BASIS,
00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 See the License for the specific language governing permissions and
00013 limitations under the License.
00014 ==============================================================================*/
00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
00017 
00018 #include "tensorflow/lite/kernels/internal/common.h"
00019 
00020 namespace tflite {
00021 namespace reference_integer_ops {
00022 
00023 // Fixed-point per-channel-quantization convolution reference kernel.
00024 inline void ConvPerChannel(
00025     const ConvParams& params, const int32* output_multiplier,
00026     const int32* output_shift, const RuntimeShape& input_shape,
00027     const int8* input_data, const RuntimeShape& filter_shape,
00028     const int8* filter_data, const RuntimeShape& bias_shape,
00029     const int32* bias_data, const RuntimeShape& output_shape,
00030     int8* output_data) {
00031   // Get parameters.
00032   const int32 input_offset = params.input_offset;  // r = s(q - Z)
00033   const int stride_width = params.stride_width;
00034   const int stride_height = params.stride_height;
00035   const int dilation_width_factor = params.dilation_width_factor;
00036   const int dilation_height_factor = params.dilation_height_factor;
00037   const int pad_width = params.padding_values.width;
00038   const int pad_height = params.padding_values.height;
00039   const int32 output_offset = params.output_offset;
00040 
00041   // Set min and max value of the output.
00042   const int32 output_activation_min = std::numeric_limits<int8_t>::min();
00043   const int32 output_activation_max = std::numeric_limits<int8_t>::max();
00044 
00045   // Sanity check.
00046   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
00047   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
00048   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
00049   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
00050   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
00051   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
00052   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
00053   if (bias_data) {
00054     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
00055   }
00056 
00057   // Check dimensions of the tensors.
00058   const int input_height = input_shape.Dims(1);
00059   const int input_width = input_shape.Dims(2);
00060   const int filter_height = filter_shape.Dims(1);
00061   const int filter_width = filter_shape.Dims(2);
00062   const int output_height = output_shape.Dims(1);
00063   const int output_width = output_shape.Dims(2);
00064   for (int batch = 0; batch < batches; ++batch) {
00065     for (int out_y = 0; out_y < output_height; ++out_y) {
00066       for (int out_x = 0; out_x < output_width; ++out_x) {
00067         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
00068           const int in_x_origin = (out_x * stride_width) - pad_width;
00069           const int in_y_origin = (out_y * stride_height) - pad_height;
00070           int32 acc = 0;
00071           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
00072             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
00073               for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
00074                 const int in_x = in_x_origin + dilation_width_factor * filter_x;
00075                 const int in_y =
00076                     in_y_origin + dilation_height_factor * filter_y;
00077                 // Zero padding by omitting the areas outside the image.
00078                 const bool is_point_inside_image =
00079                     (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
00080                     (in_y < input_height);
00081                 if (is_point_inside_image) {
00082                   int32 input_val = input_data[Offset(input_shape, batch, in_y,
00083                                                       in_x, in_channel)];
00084                   int32 filter_val =
00085                       filter_data[Offset(filter_shape, out_channel, filter_y,
00086                                          filter_x, in_channel)];
00087                   // Accumulate with 32 bits accumulator.
00088                   // In the nudging process during model quantization, we force
00089                   // real value of 0.0 be represented by a quantized value. This
00090                   // guarantees that the input_offset is a int8, even though it
00091                   // is represented using int32.
00092                   // int32 += int8 * (int8 - int8) so the highest value we can
00093                   // get from each accumulation is [-127, 127] * ([-128, 127] -
00094                   // [-128, 127]), which is [-32512, 32512]. log2(32512)
00095                   // = 14.98, which means we can accumulate at least 2^16
00096                   // multiplications without overflow. The accumulator is
00097                   // applied to a filter so the accumulation logic will hold as
00098                   // long as the filter size (filter_y * filter_x * in_channel)
00099                   // does not exceed 2^16, which is the case in all the models
00100                   // we have seen so far.
00101                   // TODO(jianlijianli): Add a check to make sure the
00102                   // accumulator depth is smaller than 2^16.
00103                   acc += filter_val * (input_val + input_offset);
00104                 }
00105               }
00106             }
00107           }
00108 
00109           if (bias_data) {
00110             acc += bias_data[out_channel];
00111           }
00112           acc = MultiplyByQuantizedMultiplier(
00113               acc, output_multiplier[out_channel], output_shift[out_channel]);
00114           acc += output_offset;
00115           acc = std::max(acc, output_activation_min);
00116           acc = std::min(acc, output_activation_max);
00117           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
00118               static_cast<int8_t>(acc);
00119         }
00120       }
00121     }
00122   }
00123 }
00124 
00125 }  // namespace reference_integer_ops
00126 }  // namespace tflite
00127 
00128 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_