Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers depthwiseconv_uint8.h Source File

depthwiseconv_uint8.h

00001 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
00002 
00003 Licensed under the Apache License, Version 2.0 (the "License");
00004 you may not use this file except in compliance with the License.
00005 You may obtain a copy of the License at
00006 
00007     http://www.apache.org/licenses/LICENSE-2.0
00008 
00009 Unless required by applicable law or agreed to in writing, software
00010 distributed under the License is distributed on an "AS IS" BASIS,
00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 See the License for the specific language governing permissions and
00013 limitations under the License.
00014 ==============================================================================*/
00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
00017 
00018 #include <algorithm>
00019 
00020 #include "fixedpoint/fixedpoint.h"
00021 #include "tensorflow/lite/kernels/internal/common.h"
00022 #include "tensorflow/lite/kernels/internal/compatibility.h"
00023 #include "tensorflow/lite/kernels/internal/types.h"
00024 
00025 namespace tflite {
00026 
00027 // Used in tests and template parameters to control which version of depthwise
00028 // convolution is called. Primarily for reference code, and specializations
00029 // forced in tests.
00030 enum class DepthwiseConvImplementation {
00031   // Run all tests against kUseStandardEntry even if also testing another
00032   // kernel, since we need to be sure that the main DepthwiseConv() function in
00033   // optimized_ops.h dispatches to a correctly-executing kernel.
00034   kNone = 0,                 // The "default" option: use the normal
00035                              // DepthwiseConv kernel (entry) function.
00036   kUseGenericKernel,         // Forced use of generic kernel.
00037   kUseNeon3x3,               // 3x3 kernel that uses NEON when available.
00038   kUseNeon3x3DotProduct,     // 3x3 kernel that uses dot-product enabled NEON
00039                              // when available.
00040   kUseCModel3x3DotProduct,   // 3x3 kernel, reference C model that is intended
00041                              // to match overall design NEON code.
00042   kUseUnwound3x3DotProduct,  // 3x3 kernel, reference C model with unwound loops
00043                              // and some arrays.
00044   kUseIntrinsics3x3DotProduct,  // 3x3 kernel using NEON intrinsics.
00045 };
00046 
00047 // Category of depthwise convolution output rounding.
00048 enum class DepthwiseConvOutputRounding {
00049   kNone = 0,      // Invalid: specific method must be specified.
00050   kAwayFromZero,  // Original method: exact halves rounded away from zero.
00051   kUpward,        // Halves towards +infinity: adds 0.5 before truncate.
00052   // This is where a future kNearestEven would be placed.
00053 };
00054 
00055 // Category of depthwise convolution depth multiplication.
00056 enum class DepthwiseConvDepthMultiplication {
00057   kNoMultiplication = 0,  // Depth multiplier = 1.
00058   kUnitInputDepth,        // Input depth = 1, output depth = depth multiplier.
00059 };
00060 
00061 namespace reference_ops {
00062 namespace depthwise_conv {
00063 
00064 template <DepthwiseConvOutputRounding output_rounding>
00065 inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier,
00066                                 int shift) {
00067   TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
00068   return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
00069 }
00070 
00071 template <>
00072 inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
00073     int32 x, int32 quantized_multiplier, int shift) {
00074   return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
00075 }
00076 
00077 template <>
00078 inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
00079     int32 x, int32 quantized_multiplier, int shift) {
00080   using gemmlowp::SaturatingRoundingDoublingHighMul;
00081   const int left_shift = shift > 0 ? shift : 0;
00082   const int right_shift = shift > 0 ? 0 : -shift;
00083   const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
00084   return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
00085                                             quantized_multiplier) +
00086           rounding_offset) >>
00087          right_shift;
00088 }
00089 
00090 template <DepthwiseConvOutputRounding output_rounding>
00091 struct DepthwiseConvBasicKernel {
00092   static inline void Run(const DepthwiseParams& params,
00093                          const RuntimeShape& input_shape,
00094                          const uint8* input_data,
00095                          const RuntimeShape& filter_shape,
00096                          const uint8* filter_data,
00097                          const RuntimeShape& bias_shape, const int32* bias_data,
00098                          const RuntimeShape& output_shape, uint8* output_data) {
00099     const int stride_width = params.stride_width;
00100     const int stride_height = params.stride_height;
00101     const int dilation_width_factor = params.dilation_width_factor;
00102     const int dilation_height_factor = params.dilation_height_factor;
00103     const int pad_width = params.padding_values.width;
00104     const int pad_height = params.padding_values.height;
00105     const int depth_multiplier = params.depth_multiplier;
00106     const int32 output_activation_min = params.quantized_activation_min;
00107     const int32 output_activation_max = params.quantized_activation_max;
00108     const int32 input_offset = params.input_offset;
00109     const int32 filter_offset = params.weights_offset;
00110     const int32 output_offset = params.output_offset;
00111     const int32 output_multiplier = params.output_multiplier;
00112     const int output_shift = params.output_shift;
00113     TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
00114     TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
00115     TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
00116 
00117     TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
00118     const int batches = MatchingDim(input_shape, 0, output_shape, 0);
00119     const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
00120     const int input_height = input_shape.Dims(1);
00121     const int input_width = input_shape.Dims(2);
00122     const int input_depth = input_shape.Dims(3);
00123     const int filter_height = filter_shape.Dims(1);
00124     const int filter_width = filter_shape.Dims(2);
00125     const int output_height = output_shape.Dims(1);
00126     const int output_width = output_shape.Dims(2);
00127     TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
00128     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
00129 
00130     for (int b = 0; b < batches; ++b) {
00131       for (int out_y = 0; out_y < output_height; ++out_y) {
00132         for (int out_x = 0; out_x < output_width; ++out_x) {
00133           for (int ic = 0; ic < input_depth; ++ic) {
00134             for (int m = 0; m < depth_multiplier; m++) {
00135               const int oc = m + ic * depth_multiplier;
00136               const int in_x_origin = (out_x * stride_width) - pad_width;
00137               const int in_y_origin = (out_y * stride_height) - pad_height;
00138               int32 acc = 0;
00139               for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
00140                 for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
00141                   const int in_x =
00142                       in_x_origin + dilation_width_factor * filter_x;
00143                   const int in_y =
00144                       in_y_origin + dilation_height_factor * filter_y;
00145                   // If the location is outside the bounds of the input image,
00146                   // use zero as a default value.
00147                   if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
00148                       (in_y < input_height)) {
00149                     int32 input_val =
00150                         input_data[Offset(input_shape, b, in_y, in_x, ic)];
00151                     int32 filter_val = filter_data[Offset(
00152                         filter_shape, 0, filter_y, filter_x, oc)];
00153                     acc += (filter_val + filter_offset) *
00154                            (input_val + input_offset);
00155                   }
00156                 }
00157               }
00158               if (bias_data) {
00159                 acc += bias_data[oc];
00160               }
00161               acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
00162                                                         output_shift);
00163               acc += output_offset;
00164               acc = std::max(acc, output_activation_min);
00165               acc = std::min(acc, output_activation_max);
00166               output_data[Offset(output_shape, b, out_y, out_x, oc)] =
00167                   static_cast<uint8>(acc);
00168             }
00169           }
00170         }
00171       }
00172     }
00173   }
00174 };
00175 
00176 }  // namespace depthwise_conv
00177 
00178 inline void DepthwiseConv(
00179     const DepthwiseParams& params, const RuntimeShape& input_shape,
00180     const uint8* input_data, const RuntimeShape& filter_shape,
00181     const uint8* filter_data, const RuntimeShape& bias_shape,
00182     const int32* bias_data, const RuntimeShape& output_shape,
00183     uint8* output_data) {
00184   return depthwise_conv::DepthwiseConvBasicKernel<
00185       DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
00186                                                        input_data, filter_shape,
00187                                                        filter_data, bias_shape,
00188                                                        bias_data, output_shape,
00189                                                        output_data);
00190 }
00191 
00192 }  // namespace reference_ops
00193 }  // end namespace tflite
00194 
00195 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_