Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
add.h
00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 00002 00003 Licensed under the Apache License, Version 2.0 (the "License"); 00004 you may not use this file except in compliance with the License. 00005 You may obtain a copy of the License at 00006 00007 http://www.apache.org/licenses/LICENSE-2.0 00008 00009 Unless required by applicable law or agreed to in writing, software 00010 distributed under the License is distributed on an "AS IS" BASIS, 00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 See the License for the specific language governing permissions and 00013 limitations under the License. 00014 ==============================================================================*/ 00015 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_ 00016 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_ 00017 00018 #include <limits> 00019 00020 #include "tensorflow/lite/kernels/internal/common.h" 00021 #include "tensorflow/lite/kernels/internal/types.h" 00022 00023 namespace tflite { 00024 namespace reference_integer_ops { 00025 00026 // Element-wise add that can often be used for inner loop of broadcast add as 00027 // well as the non-broadcast add. 00028 inline void AddElementwise(int size, const ArithmeticParams& params, 00029 const int8_t* input1_data, const int8_t* input2_data, 00030 int8_t* output_data) { 00031 const int32_t int8_max_value = std::numeric_limits<int8_t>::max(); 00032 TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value); 00033 TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value); 00034 TFLITE_DCHECK_LE(params.input1_offset, int8_max_value); 00035 TFLITE_DCHECK_LE(params.input2_offset, int8_max_value); 00036 00037 for (int i = 0; i < size; ++i) { 00038 const int32 input1_val = params.input1_offset + input1_data[i]; 00039 const int32 input2_val = params.input2_offset + input2_data[i]; 00040 const int32 shifted_input1_val = input1_val * (1 << params.left_shift); 00041 const int32 shifted_input2_val = input2_val * (1 << params.left_shift); 00042 const int32 scaled_input1_val = 00043 MultiplyByQuantizedMultiplierSmallerThanOneExp( 00044 shifted_input1_val, params.input1_multiplier, params.input1_shift); 00045 const int32 scaled_input2_val = 00046 MultiplyByQuantizedMultiplierSmallerThanOneExp( 00047 shifted_input2_val, params.input2_multiplier, params.input2_shift); 00048 const int32 raw_sum = scaled_input1_val + scaled_input2_val; 00049 const int32 raw_output = 00050 MultiplyByQuantizedMultiplierSmallerThanOneExp( 00051 raw_sum, params.output_multiplier, params.output_shift) + 00052 params.output_offset; 00053 const int32 clamped_output = 00054 std::min(params.quantized_activation_max, 00055 std::max(params.quantized_activation_min, raw_output)); 00056 output_data[i] = static_cast<int8_t>(clamped_output); 00057 } 00058 } 00059 00060 inline void Add(const ArithmeticParams& params, 00061 const RuntimeShape& input1_shape, const int8_t* input1_data, 00062 const RuntimeShape& input2_shape, const int8_t* input2_data, 00063 const RuntimeShape& output_shape, int8_t* output_data) { 00064 TFLITE_DCHECK_LE(params.quantized_activation_min, 00065 params.quantized_activation_max); 00066 const int flat_size = 00067 MatchingElementsSize(input1_shape, input2_shape, output_shape); 00068 00069 const int32_t int8_max_value = std::numeric_limits<int8_t>::max(); 00070 TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value); 00071 TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value); 00072 TFLITE_DCHECK_LE(params.input1_offset, int8_max_value); 00073 TFLITE_DCHECK_LE(params.input2_offset, int8_max_value); 00074 AddElementwise(flat_size, params, input1_data, input2_data, output_data); 00075 } 00076 00077 inline void BroadcastAdd4DSlow(const ArithmeticParams& params, 00078 const RuntimeShape& input1_shape, 00079 const int8_t* input1_data, 00080 const RuntimeShape& input2_shape, 00081 const int8_t* input2_data, 00082 const RuntimeShape& output_shape, 00083 int8_t* output_data) { 00084 NdArrayDesc<4> desc1; 00085 NdArrayDesc<4> desc2; 00086 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, 00087 &desc2); 00088 const RuntimeShape extended_output_shape = 00089 RuntimeShape::ExtendedShape(4, output_shape); 00090 00091 // In Tensorflow, the dimensions are canonically named (batch_number, row, 00092 // col, channel), with extents (batches, height, width, depth), with the 00093 // trailing dimension changing most rapidly (channels has the smallest stride, 00094 // typically 1 element). 00095 // 00096 // In generated C code, we store arrays with the dimensions reversed. The 00097 // first dimension has smallest stride. 00098 // 00099 // We name our variables by their Tensorflow convention, but generate C code 00100 // nesting loops such that the innermost loop has the smallest stride for the 00101 // best cache behavior. 00102 for (int b = 0; b < extended_output_shape.Dims(0); ++b) { 00103 for (int y = 0; y < extended_output_shape.Dims(1); ++y) { 00104 for (int x = 0; x < extended_output_shape.Dims(2); ++x) { 00105 for (int c = 0; c < extended_output_shape.Dims(3); ++c) { 00106 const int32_t input1_val = 00107 params.input1_offset + 00108 input1_data[SubscriptToIndex(desc1, b, y, x, c)]; 00109 const int32_t input2_val = 00110 params.input2_offset + 00111 input2_data[SubscriptToIndex(desc2, b, y, x, c)]; 00112 const int32_t shifted_input1_val = 00113 input1_val * (1 << params.left_shift); 00114 const int32_t shifted_input2_val = 00115 input2_val * (1 << params.left_shift); 00116 const int32_t scaled_input1_val = 00117 MultiplyByQuantizedMultiplierSmallerThanOneExp( 00118 shifted_input1_val, params.input1_multiplier, 00119 params.input1_shift); 00120 const int32_t scaled_input2_val = 00121 MultiplyByQuantizedMultiplierSmallerThanOneExp( 00122 shifted_input2_val, params.input2_multiplier, 00123 params.input2_shift); 00124 const int32_t raw_sum = scaled_input1_val + scaled_input2_val; 00125 const int32_t raw_output = 00126 MultiplyByQuantizedMultiplierSmallerThanOneExp( 00127 raw_sum, params.output_multiplier, params.output_shift) + 00128 params.output_offset; 00129 const int32_t clamped_output = 00130 std::min(params.quantized_activation_max, 00131 std::max(params.quantized_activation_min, raw_output)); 00132 output_data[Offset(extended_output_shape, b, y, x, c)] = 00133 static_cast<int8_t>(clamped_output); 00134 } 00135 } 00136 } 00137 } 00138 } 00139 00140 } // namespace reference_integer_ops 00141 } // namespace tflite 00142 00143 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
Generated on Wed Jul 13 2022 16:03:34 by
