Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers conv.h Source File

conv.h

00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
00002 
00003 Licensed under the Apache License, Version 2.0 (the "License");
00004 you may not use this file except in compliance with the License.
00005 You may obtain a copy of the License at
00006 
00007     http://www.apache.org/licenses/LICENSE-2.0
00008 
00009 Unless required by applicable law or agreed to in writing, software
00010 distributed under the License is distributed on an "AS IS" BASIS,
00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 See the License for the specific language governing permissions and
00013 limitations under the License.
00014 ==============================================================================*/
00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
00017 
00018 #include "tensorflow/lite/kernels/internal/types.h"
00019 #include "tensorflow/lite/kernels/internal/common.h"
00020 
00021 
00022 
00023 namespace tflite {
00024 
00025 namespace reference_ops {
00026 
00027 
00028 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
00029                  const float* input_data, const RuntimeShape& filter_shape,
00030                  const float* filter_data, const RuntimeShape& bias_shape,
00031                  const float* bias_data, const RuntimeShape& output_shape,
00032                  float* output_data, const RuntimeShape& im2col_shape,
00033                  float* im2col_data) {
00034   const int stride_width = params.stride_width;
00035   const int stride_height = params.stride_height;
00036   const int dilation_width_factor = params.dilation_width_factor;
00037   const int dilation_height_factor = params.dilation_height_factor;
00038   const int pad_width = params.padding_values.width;
00039   const int pad_height = params.padding_values.height;
00040   const float output_activation_min = params.float_activation_min;
00041   const float output_activation_max = params.float_activation_max;
00042   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
00043   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
00044   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
00045 
00046   (void)im2col_data;   // only used in optimized code.
00047   (void)im2col_shape;  // only used in optimized code.
00048   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
00049   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
00050   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
00051   if (bias_data) {
00052     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
00053   }
00054   const int input_height = input_shape.Dims(1);
00055   const int input_width = input_shape.Dims(2);
00056   const int filter_height = filter_shape.Dims(1);
00057   const int filter_width = filter_shape.Dims(2);
00058   const int output_height = output_shape.Dims(1);
00059   const int output_width = output_shape.Dims(2);
00060   for (int batch = 0; batch < batches; ++batch) {
00061     for (int out_y = 0; out_y < output_height; ++out_y) {
00062       for (int out_x = 0; out_x < output_width; ++out_x) {
00063         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
00064           const int in_x_origin = (out_x * stride_width) - pad_width;
00065           const int in_y_origin = (out_y * stride_height) - pad_height;
00066           float total = 0.f;
00067           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
00068             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
00069               for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
00070                 const int in_x = in_x_origin + dilation_width_factor * filter_x;
00071                 const int in_y =
00072                     in_y_origin + dilation_height_factor * filter_y;
00073                 // If the location is outside the bounds of the input image,
00074                 // use zero as a default value.
00075                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
00076                     (in_y < input_height)) {
00077                   float input_value = input_data[Offset(
00078                       input_shape, batch, in_y, in_x, in_channel)];
00079                   float filter_value =
00080                       filter_data[Offset(filter_shape, out_channel, filter_y,
00081                                          filter_x, in_channel)];
00082                   total += (input_value * filter_value);
00083                 }
00084               }
00085             }
00086           }
00087           float bias_value = 0.0f;
00088           if (bias_data) {
00089             bias_value = bias_data[out_channel];
00090           }
00091           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
00092               ActivationFunctionWithMinMax(total + bias_value,
00093                                            output_activation_min,
00094                                            output_activation_max);
00095         }
00096       }
00097     }
00098   }
00099 }
00100 
00101 inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
00102                  const uint8* input_data, const RuntimeShape& filter_shape,
00103                  const uint8* filter_data, const RuntimeShape& bias_shape,
00104                  const int32* bias_data, const RuntimeShape& output_shape,
00105                  uint8* output_data, const RuntimeShape& im2col_shape,
00106                  uint8* im2col_data, void* cpu_backend_context) {
00107   (void)cpu_backend_context;  // only used in optimized code.
00108   (void)im2col_data;   // only used in optimized code.
00109   (void)im2col_shape;  // only used in optimized code.
00110   const int stride_width = params.stride_width;
00111   const int stride_height = params.stride_height;
00112   const int dilation_width_factor = params.dilation_width_factor;
00113   const int dilation_height_factor = params.dilation_height_factor;
00114   const int pad_width = params.padding_values.width;
00115   const int pad_height = params.padding_values.height;
00116   const int32 input_offset = params.input_offset;
00117   const int32 filter_offset = params.weights_offset;
00118   const int32 output_offset = params.output_offset;
00119   const int32 output_multiplier = params.output_multiplier;
00120   const int output_shift = params.output_shift;
00121   const int32 output_activation_min = params.quantized_activation_min;
00122   const int32 output_activation_max = params.quantized_activation_max;
00123   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
00124 
00125   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
00126   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
00127   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
00128   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
00129   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
00130   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
00131   if (bias_data) {
00132     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
00133   }
00134   const int input_height = input_shape.Dims(1);
00135   const int input_width = input_shape.Dims(2);
00136   const int filter_height = filter_shape.Dims(1);
00137   const int filter_width = filter_shape.Dims(2);
00138   const int output_height = output_shape.Dims(1);
00139   const int output_width = output_shape.Dims(2);
00140   for (int batch = 0; batch < batches; ++batch) {
00141     for (int out_y = 0; out_y < output_height; ++out_y) {
00142       for (int out_x = 0; out_x < output_width; ++out_x) {
00143         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
00144           const int in_x_origin = (out_x * stride_width) - pad_width;
00145           const int in_y_origin = (out_y * stride_height) - pad_height;
00146           int32 acc = 0;
00147           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
00148             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
00149               for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
00150                 const int in_x = in_x_origin + dilation_width_factor * filter_x;
00151                 const int in_y =
00152                     in_y_origin + dilation_height_factor * filter_y;
00153                 // If the location is outside the bounds of the input image,
00154                 // use zero as a default value.
00155                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
00156                     (in_y < input_height)) {
00157                   int32 input_val = input_data[Offset(input_shape, batch, in_y,
00158                                                       in_x, in_channel)];
00159                   int32 filter_val =
00160                       filter_data[Offset(filter_shape, out_channel, filter_y,
00161                                          filter_x, in_channel)];
00162                   acc +=
00163                       (filter_val + filter_offset) * (input_val + input_offset);
00164                 }
00165               }
00166             }
00167           }
00168           if (bias_data) {
00169             acc += bias_data[out_channel];
00170           }
00171           acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
00172                                               output_shift);
00173           acc += output_offset;
00174           acc = std::max(acc, output_activation_min);
00175           acc = std::min(acc, output_activation_max);
00176           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
00177               static_cast<uint8>(acc);
00178         }
00179       }
00180     }
00181   }
00182 }
00183 
00184 inline void HybridConvPerChannel(
00185     const ConvParams& params, float* scaling_factors_ptr,
00186     const RuntimeShape& input_shape, const int8_t* input_data,
00187     const RuntimeShape& filter_shape, const int8_t* filter_data,
00188     const RuntimeShape& bias_shape, const float* bias_data,
00189     const RuntimeShape& output_shape, float* output_data,
00190     const RuntimeShape& im2col_shape, int8_t* im2col_data,
00191     const float* per_channel_scale, int32_t* input_offset) {
00192   (void)im2col_data;   // only used in optimized code.
00193   (void)im2col_shape;  // only used in optimized code.
00194   const int stride_width = params.stride_width;
00195   const int stride_height = params.stride_height;
00196   const int dilation_width_factor = params.dilation_width_factor;
00197   const int dilation_height_factor = params.dilation_height_factor;
00198   const int pad_width = params.padding_values.width;
00199   const int pad_height = params.padding_values.height;
00200   const float output_activation_min = params.float_activation_min;
00201   const float output_activation_max = params.float_activation_max;
00202   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
00203   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
00204   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
00205   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
00206   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
00207   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
00208   if (bias_data) {
00209     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
00210   }
00211   const int input_height = input_shape.Dims(1);
00212   const int input_width = input_shape.Dims(2);
00213   const int filter_height = filter_shape.Dims(1);
00214   const int filter_width = filter_shape.Dims(2);
00215   const int output_height = output_shape.Dims(1);
00216   const int output_width = output_shape.Dims(2);
00217   for (int batch = 0; batch < batches; ++batch) {
00218     for (int out_y = 0; out_y < output_height; ++out_y) {
00219       for (int out_x = 0; out_x < output_width; ++out_x) {
00220         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
00221           const int in_x_origin = (out_x * stride_width) - pad_width;
00222           const int in_y_origin = (out_y * stride_height) - pad_height;
00223           int32 acc = 0;
00224           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
00225             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
00226               for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
00227                 const int in_x = in_x_origin + dilation_width_factor * filter_x;
00228                 const int in_y =
00229                     in_y_origin + dilation_height_factor * filter_y;
00230                 // If the location is outside the bounds of the input image,
00231                 // use zero as a default value.
00232                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
00233                     (in_y < input_height)) {
00234                   int32 input_val = input_data[Offset(input_shape, batch, in_y,
00235                                                       in_x, in_channel)];
00236                   int32 filter_val =
00237                       filter_data[Offset(filter_shape, out_channel, filter_y,
00238                                          filter_x, in_channel)];
00239                   acc += filter_val * (input_val - input_offset[batch]);
00240                 }
00241               }
00242             }
00243           }
00244           float acc_float =
00245               acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
00246           if (bias_data) {
00247             acc_float += bias_data[out_channel];
00248           }
00249           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
00250               ActivationFunctionWithMinMax(acc_float, output_activation_min,
00251                                            output_activation_max);
00252         }
00253       }
00254     }
00255   }
00256 }
00257 
00258 }  // namespace reference_ops
00259 }  // namespace tflite
00260 
00261 
00262 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_