Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
micro_allocator.cc
00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 00002 00003 Licensed under the Apache License, Version 2.0 (the "License"); 00004 you may not use this file except in compliance with the License. 00005 You may obtain a copy of the License at 00006 00007 http://www.apache.org/licenses/LICENSE-2.0 00008 00009 Unless required by applicable law or agreed to in writing, software 00010 distributed under the License is distributed on an "AS IS" BASIS, 00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 See the License for the specific language governing permissions and 00013 limitations under the License. 00014 ==============================================================================*/ 00015 00016 #include "tensorflow/lite/experimental/micro/micro_allocator.h" 00017 00018 #include "tensorflow/lite/c/c_api_internal.h" 00019 #include "tensorflow/lite/core/api/flatbuffer_conversions.h" 00020 #include "tensorflow/lite/core/api/op_resolver.h" 00021 #include "tensorflow/lite/core/api/tensor_utils.h" 00022 #include "tensorflow/lite/experimental/micro/compatibility.h" 00023 #include "tensorflow/lite/experimental/micro/memory_helpers.h" 00024 #include "tensorflow/lite/experimental/micro/memory_planner/greedy_memory_planner.h" 00025 #include "tensorflow/lite/experimental/micro/simple_memory_allocator.h" 00026 00027 namespace tflite { 00028 00029 namespace { 00030 // Used to hold information used during allocation calculations. 00031 struct TensorInfo { 00032 const tflite::Tensor* flatbuffer_tensor; 00033 TfLiteTensor* runtime_tensor; 00034 int first_created; 00035 int last_used; 00036 bool needs_allocating; 00037 }; 00038 00039 // We align tensor buffers to 16-byte boundaries, since this is a common 00040 // requirement for SIMD extensions. 00041 constexpr int kBufferAlignment = 16; 00042 // For common data structures that doesn't need SIMD extensions. 00043 constexpr int kDefaultAlignment = sizeof(int); 00044 00045 class MicroBuiltinDataAllocator : public BuiltinDataAllocator { 00046 public: 00047 explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator) 00048 : memory_allocator_(memory_allocator) {} 00049 00050 void* Allocate(size_t size) override { 00051 return memory_allocator_->AllocateFromTail(size, kDefaultAlignment); 00052 } 00053 void Deallocate(void* data) override { 00054 // Do not deallocate, builtin data needs to be available for the life time 00055 // of the model. 00056 } 00057 00058 private: 00059 SimpleMemoryAllocator* memory_allocator_; 00060 00061 TF_LITE_REMOVE_VIRTUAL_DELETE 00062 }; 00063 00064 } // namespace 00065 00066 MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model, 00067 uint8_t* tensor_arena, size_t arena_size, 00068 ErrorReporter* error_reporter) 00069 : model_(model), 00070 memory_allocator_(tensor_arena, arena_size), 00071 error_reporter_(error_reporter), 00072 context_(context), 00073 arena_(tensor_arena), 00074 arena_size_(arena_size) { 00075 auto* subgraphs = model->subgraphs(); 00076 if (subgraphs->size() != 1) { 00077 error_reporter->Report("Only 1 subgraph is currently supported.\n"); 00078 return; 00079 } 00080 subgraph_ = (*subgraphs)[0]; 00081 tensors_ = subgraph_->tensors(); 00082 operators_ = subgraph_->operators(); 00083 00084 context_->tensors_size = tensors_->size(); 00085 context_->tensors = 00086 reinterpret_cast<TfLiteTensor*>(memory_allocator_.AllocateFromTail( 00087 sizeof(TfLiteTensor) * context_->tensors_size, kDefaultAlignment)); 00088 00089 // Null all inputs so we can later perform a null check to avoid re-allocating 00090 // registered pre-allocated inputs. 00091 for (size_t i = 0; i < context_->tensors_size; ++i) { 00092 context_->tensors[i].data.raw = nullptr; 00093 } 00094 00095 active_ = true; 00096 } 00097 00098 TfLiteStatus MicroAllocator::RegisterPreallocatedInput(uint8_t* buffer, 00099 size_t input_index) { 00100 if (buffer == nullptr || input_index < 0 || 00101 input_index >= subgraph_->inputs()->size()) { 00102 error_reporter_->Report("Invalid pre-allocated input %d provided.", 00103 input_index); 00104 return kTfLiteError; 00105 } 00106 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers = 00107 model_->buffers(); 00108 00109 const int tensor_index = subgraph_->inputs()->Get(input_index); 00110 const auto* tensor = tensors_->Get(tensor_index); 00111 return InitializeRuntimeTensor(*tensor, buffers, error_reporter_, 00112 &context_->tensors[tensor_index], buffer); 00113 } 00114 00115 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations( 00116 const OpResolver& op_resolver, 00117 NodeAndRegistration** node_and_registrations) { 00118 if (!active_) { 00119 return kTfLiteError; 00120 } 00121 00122 auto* output = 00123 reinterpret_cast<NodeAndRegistration*>(memory_allocator_.AllocateFromTail( 00124 sizeof(NodeAndRegistration) * operators_->size(), kDefaultAlignment)); 00125 if (output == nullptr) { 00126 error_reporter_->Report( 00127 "Failed to allocate memory for node_and_registrations."); 00128 return kTfLiteError; 00129 } 00130 TfLiteStatus status = kTfLiteOk; 00131 auto* opcodes = model_->operator_codes(); 00132 MicroBuiltinDataAllocator builtin_data_allocator(&memory_allocator_); 00133 for (size_t i = 0; i < operators_->size(); ++i) { 00134 const auto* op = operators_->Get(i); 00135 size_t index = op->opcode_index(); 00136 if (index < 0 || index >= opcodes->size()) { 00137 error_reporter_->Report("Missing registration for opcode_index %d\n", 00138 index); 00139 return kTfLiteError; 00140 } 00141 auto* opcode = (*opcodes)[index]; 00142 status = GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_, 00143 &(output[i].registration)); 00144 if (status != kTfLiteOk) { 00145 error_reporter_->Report("Failed to get registration from op code % d\n ", 00146 opcode); 00147 return status; 00148 } 00149 const auto* registration = output[i].registration; 00150 if (registration == nullptr) { 00151 error_reporter_->Report("Skipping op for opcode_index %d\n", index); 00152 return kTfLiteError; 00153 } 00154 BuiltinOperator op_type = 00155 static_cast<BuiltinOperator>(registration->builtin_code); 00156 00157 if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) { 00158 error_reporter_->Report( 00159 "Unsupported behavior: found builtin operator %s with custom " 00160 "options.\n", 00161 EnumNameBuiltinOperator(op_type)); 00162 return kTfLiteError; 00163 } 00164 00165 const char* custom_data = nullptr; 00166 size_t custom_data_size = 0; 00167 unsigned char* builtin_data = nullptr; 00168 if (op->custom_options()) { 00169 custom_data = reinterpret_cast<const char*>(op->custom_options()->data()); 00170 custom_data_size = op->custom_options()->size(); 00171 } else { 00172 TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_, 00173 &builtin_data_allocator, 00174 (void**)(&builtin_data))); 00175 } 00176 00177 // Disregard const qualifier to workaround with existing API. 00178 TfLiteIntArray* inputs_array = const_cast<TfLiteIntArray*>( 00179 reinterpret_cast<const TfLiteIntArray*>(op->inputs())); 00180 TfLiteIntArray* outputs_array = const_cast<TfLiteIntArray*>( 00181 reinterpret_cast<const TfLiteIntArray*>(op->outputs())); 00182 00183 TfLiteNode* node = &(output[i].node); 00184 node->inputs = inputs_array; 00185 node->outputs = outputs_array; 00186 // This is OK for now as temporary array is not in used. 00187 // TODO(wangtz): Support scratch buffers. 00188 node->temporaries = nullptr; 00189 node->user_data = nullptr; // Will be filled in after `init` 00190 node->builtin_data = reinterpret_cast<void*>(builtin_data); 00191 node->custom_initial_data = custom_data; 00192 node->custom_initial_data_size = custom_data_size; 00193 node->delegate = nullptr; 00194 } 00195 *node_and_registrations = output; 00196 return kTfLiteOk; 00197 } 00198 00199 TfLiteStatus MicroAllocator::FinishTensorAllocation() { 00200 if (!active_) { 00201 return kTfLiteError; 00202 } 00203 00204 const size_t tensors_size = tensors_->size(); 00205 00206 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers = 00207 model_->buffers(); 00208 00209 // Initialize runtime tensors. 00210 for (size_t i = 0; i < tensors_size; ++i) { 00211 auto* runtime_tensor = &context_->tensors[i]; 00212 auto* flatbuffer_tensor = tensors_->Get(i); 00213 00214 // Preallocated inputs have already been set up earlier, so skip them. 00215 const bool is_preallocated_input = (runtime_tensor->data.raw != nullptr); 00216 if (!is_preallocated_input) { 00217 TF_LITE_ENSURE_STATUS(InitializeRuntimeTensor(*flatbuffer_tensor, buffers, 00218 error_reporter_, 00219 runtime_tensor, nullptr)); 00220 } 00221 } 00222 00223 // tensor_info is only used in this function. 00224 auto tmp_allocator = memory_allocator_.CreateChildAllocator(); 00225 TensorInfo* tensor_info = 00226 reinterpret_cast<TensorInfo*>(tmp_allocator.AllocateFromTail( 00227 sizeof(TensorInfo) * tensors_size, sizeof(TensorInfo))); 00228 00229 // Set up the runtime data structures for all tensors. 00230 for (size_t i = 0; i < tensors_size; ++i) { 00231 TensorInfo* current = &tensor_info[i]; 00232 current->flatbuffer_tensor = &(*(tensors_->Get(i))); 00233 current->runtime_tensor = &context_->tensors[i]; 00234 const bool is_variable = current->flatbuffer_tensor->is_variable(); 00235 if (is_variable) { 00236 current->first_created = 0; 00237 current->last_used = operators_->size(); 00238 } else { 00239 current->first_created = -1; 00240 current->last_used = -1; 00241 } 00242 current->needs_allocating = false; 00243 } 00244 00245 // First go through the inputs and figure out if they need to be allocated. 00246 for (size_t i = 0; i < subgraph_->inputs()->size(); ++i) { 00247 const int tensor_index = subgraph_->inputs()->Get(i); 00248 TensorInfo* current = &tensor_info[tensor_index]; 00249 // Check for pre-allocated inputs. 00250 current->needs_allocating = (current->runtime_tensor->data.raw == nullptr); 00251 current->first_created = 0; 00252 } 00253 00254 // Mark all outputs as persistent to the end of the invocation. 00255 for (size_t i = 0; i < subgraph_->outputs()->size(); ++i) { 00256 const int tensor_index = subgraph_->outputs()->Get(i); 00257 TensorInfo* current = &tensor_info[tensor_index]; 00258 current->last_used = operators_->size() - 1; 00259 } 00260 00261 // Figure out when the first and last use of each tensor is. 00262 for (int i = (operators_->size() - 1); i >= 0; --i) { 00263 const auto* op = operators_->Get(i); 00264 for (size_t n = 0; n < op->inputs()->size(); ++n) { 00265 const int tensor_index = op->inputs()->Get(n); 00266 TensorInfo* current = &tensor_info[tensor_index]; 00267 if ((current->last_used == -1) || (current->last_used > i)) { 00268 current->last_used = i; 00269 } 00270 } 00271 for (size_t n = 0; n < op->outputs()->size(); ++n) { 00272 const int tensor_index = op->outputs()->Get(n); 00273 TensorInfo* current = &tensor_info[tensor_index]; 00274 if ((current->first_created == -1) || (current->first_created < i)) { 00275 current->first_created = i; 00276 } 00277 } 00278 } 00279 00280 // Work out which tensors need to be allocated. 00281 for (size_t i = 0; i < tensors_->size(); ++i) { 00282 TensorInfo* current = &tensor_info[i]; 00283 const bool is_read_only = 00284 (current->first_created == -1) && (current->last_used != -1); 00285 const bool is_preallocated_input = 00286 (current->runtime_tensor->data.raw != nullptr); 00287 const bool has_partial_lifetime = 00288 !is_read_only && 00289 ((current->first_created == -1) || (current->last_used == -1)); 00290 if (has_partial_lifetime) { 00291 error_reporter_->Report( 00292 "Logic error in memory planner, tensor %d has an invalid lifetime", 00293 i); 00294 return kTfLiteError; 00295 } 00296 if (!is_read_only && !is_preallocated_input) { 00297 current->needs_allocating = true; 00298 } 00299 } 00300 00301 uint8_t* aligned_arena = AlignPointerUp(arena_, kBufferAlignment); 00302 const size_t alignment_loss = (aligned_arena - arena_); 00303 00304 // Remaining arena size that memory planner can use for calculating offsets. 00305 int remaining_arena_size = 00306 arena_size_ - (tmp_allocator.GetDataSize() + alignment_loss); 00307 GreedyMemoryPlanner planner(aligned_arena, remaining_arena_size); 00308 00309 // Add the tensors to our allocation plan. 00310 for (size_t i = 0; i < tensors_->size(); ++i) { 00311 TensorInfo* current = &tensor_info[i]; 00312 if (current->needs_allocating) { 00313 size_t bytes_required; 00314 size_t type_size; 00315 TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(*current->flatbuffer_tensor, 00316 &bytes_required, &type_size, 00317 error_reporter_)); 00318 size_t aligned_bytes_required = 00319 AlignSizeUp(bytes_required, kBufferAlignment); 00320 TF_LITE_ENSURE_STATUS( 00321 planner.AddBuffer(error_reporter_, aligned_bytes_required, 00322 current->first_created, current->last_used)); 00323 } 00324 } 00325 00326 // Actual size available for placing tensors. This includes memory held by the 00327 // tensor info array, which will be released. 00328 int actual_available_arena_size = 00329 arena_size_ - (memory_allocator_.GetDataSize() + alignment_loss); 00330 // Make sure we have enough room. 00331 if (planner.GetMaximumMemorySize() > actual_available_arena_size) { 00332 error_reporter_->Report( 00333 "Arena size is too small for activation buffers. Needed %d but only %d " 00334 "was available.", 00335 planner.GetMaximumMemorySize(), remaining_arena_size); 00336 return kTfLiteError; 00337 } 00338 00339 // Figure out the actual memory addresses for each buffer, based on the plan. 00340 int planner_index = 0; 00341 for (size_t i = 0; i < tensors_->size(); ++i) { 00342 TensorInfo* current = &tensor_info[i]; 00343 if (current->needs_allocating) { 00344 int offset; 00345 TF_LITE_ENSURE_STATUS( 00346 planner.GetOffsetForBuffer(error_reporter_, planner_index, &offset)); 00347 current->runtime_tensor->data.uint8 = aligned_arena + offset; 00348 ++planner_index; 00349 } 00350 } 00351 00352 // Copy default value for variable tensors. Note that this will overwrite 00353 // the arena planner data so GetOffsetForBuffer will return wrong 00354 // result. 00355 for (size_t i = 0; i < tensors_->size(); ++i) { 00356 TensorInfo* current = &tensor_info[i]; 00357 // Set default value for variable tensors: 00358 if (current->flatbuffer_tensor->is_variable()) { 00359 if (current->runtime_tensor->data.uint8 == nullptr) { 00360 error_reporter_->Report("Variable is not allocated"); 00361 return kTfLiteError; 00362 } 00363 tflite::ResetVariableTensor(current->runtime_tensor); 00364 } 00365 } 00366 00367 active_ = false; 00368 return kTfLiteOk; 00369 } 00370 00371 TfLiteStatus MicroAllocator::InitializeRuntimeTensor( 00372 const tflite::Tensor& flatbuffer_tensor, 00373 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers, 00374 ErrorReporter* error_reporter, TfLiteTensor* result, 00375 uint8_t* preallocated_buffer) { 00376 if (!active_) { 00377 return kTfLiteError; 00378 } 00379 00380 // Make sure the serialized type is one we know how to deal with, and convert 00381 // it from a flatbuffer enum into a constant used by the kernel C API. 00382 TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(), 00383 &result->type, error_reporter)); 00384 // Make sure we remember if the serialized tensor is designated as a variable. 00385 result->is_variable = flatbuffer_tensor.is_variable(); 00386 00387 // We need to figure out where the actual contents of this tensor are stored 00388 // in memory. We'll check to see if there's a serialized buffer (pretty much 00389 // the same as a constant op in TensorFlow) associated with this tensor first, 00390 // and if there is update the runtime structure to point to its location in 00391 // memory. 00392 result->data.raw = nullptr; 00393 result->bytes = 0; 00394 // First see if there's any buffer information in the serialized tensor. 00395 if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) { 00396 // If we've found a buffer, does it have any data? 00397 if (auto* array = buffer->data()) { 00398 // If it has any data, is the data size larger than zero? 00399 if (size_t array_size = array->size()) { 00400 // We've found a buffer with valid data, so update the runtime tensor 00401 // data structure to point to it. 00402 result->data.raw = 00403 const_cast<char*>(reinterpret_cast<const char*>(array->data())); 00404 // We set the data from a serialized buffer, so record tha. 00405 result->allocation_type = kTfLiteMmapRo; 00406 } 00407 } 00408 // TODO(petewarden): It's not clear in what circumstances we could have a 00409 // buffer in the serialized tensor, but it doesn't have any data in it. Is 00410 // that a validly-generated file, and if so what does it mean, or is it an 00411 // error condition? It would be good to tighten up the specification to make 00412 // it less ambiguous. 00413 } 00414 00415 // TODO(petewarden): Some of these paths aren't getting enough testing 00416 // coverage, so we should figure out some tests that exercise them. 00417 if (!result->data.raw) { 00418 // The tensor contents haven't been set from a serialized buffer, so 00419 // make a note that they will be allocated from memory. The actual 00420 // allocation won't happen until later. 00421 result->allocation_type = kTfLiteArenaRw; 00422 if (preallocated_buffer != nullptr) { 00423 // If the client is supplying memory for the contents of the tensor 00424 // themselves, use it. 00425 // TODO(petewarden): Should we store the fact this is a client-allocated 00426 // buffer? 00427 result->data.raw = reinterpret_cast<char*>(preallocated_buffer); 00428 } 00429 } 00430 00431 // Figure out what the size in bytes of the buffer is and store it. 00432 size_t type_size; 00433 TF_LITE_ENSURE_STATUS(BytesRequiredForTensor( 00434 flatbuffer_tensor, &result->bytes, &type_size, error_reporter)); 00435 // Copy the shape of the tensor from the serialized data into the runtime 00436 // form. We have to allocate memory for this. 00437 result->dims = 00438 reinterpret_cast<TfLiteIntArray*>(memory_allocator_.AllocateFromTail( 00439 sizeof(int) * (flatbuffer_tensor.shape()->Length() + 1), 00440 kDefaultAlignment)); 00441 result->dims->size = flatbuffer_tensor.shape()->Length(); 00442 for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) { 00443 result->dims->data[n] = flatbuffer_tensor.shape()->Get(n); 00444 } 00445 // Copy the quantization information from the serialized data. 00446 const auto* src_quantization = flatbuffer_tensor.quantization(); 00447 if (src_quantization && src_quantization->scale() && 00448 (src_quantization->scale()->size() > 0) && 00449 src_quantization->zero_point() && 00450 (src_quantization->zero_point()->size() > 0)) { 00451 result->params.scale = src_quantization->scale()->Get(0); 00452 // This magic handles issues with little-endianness. 00453 for (unsigned int b = 0; b < sizeof(int64_t); ++b) 00454 *(reinterpret_cast<char*>(&result->params.zero_point) + b) = 00455 *(reinterpret_cast<const char*>( 00456 src_quantization->zero_point()->Data()) + 00457 b); 00458 result->params.zero_point = 00459 flatbuffers::EndianScalar(result->params.zero_point); 00460 00461 // Populate per-channel quantization params. 00462 int channels = src_quantization->scale()->size(); 00463 TfLiteAffineQuantization* quantization = 00464 reinterpret_cast<TfLiteAffineQuantization*>( 00465 memory_allocator_.AllocateFromTail(sizeof(TfLiteAffineQuantization), 00466 kDefaultAlignment)); 00467 int* zero_point_array = 00468 reinterpret_cast<int*>(memory_allocator_.AllocateFromTail( 00469 channels * sizeof(int) + sizeof(int), kDefaultAlignment)); 00470 int* scale_array = 00471 reinterpret_cast<int*>(memory_allocator_.AllocateFromTail( 00472 channels * sizeof(float) + sizeof(int), kDefaultAlignment)); 00473 zero_point_array[0] = channels; 00474 scale_array[0] = channels; 00475 int* zero_point_data = &zero_point_array[1]; 00476 float* scale_data = reinterpret_cast<float*>(&scale_array[1]); 00477 for (int i = 0; i < channels; i++) { 00478 zero_point_data[i] = src_quantization->zero_point()->Get(i); 00479 scale_data[i] = src_quantization->scale()->Get(i); 00480 } 00481 quantization->scale = reinterpret_cast<TfLiteFloatArray*>(scale_array); 00482 quantization->zero_point = 00483 reinterpret_cast<TfLiteIntArray*>(zero_point_array); 00484 00485 result->quantization = {kTfLiteAffineQuantization, quantization}; 00486 } 00487 // Copy the name, if there is one. 00488 if (flatbuffer_tensor.name()->c_str() != nullptr) { 00489 result->name = flatbuffer_tensor.name()->c_str(); 00490 } else { 00491 result->name = "<No name>"; 00492 } 00493 // These aren't used by the micro flavor of TFL, so set them to defaults. 00494 result->allocation = nullptr; 00495 result->delegate = nullptr; 00496 result->buffer_handle = 0; 00497 result->data_is_stale = false; 00498 return kTfLiteOk; 00499 } 00500 00501 } // namespace tflite
Generated on Wed Jul 13 2022 16:03:35 by
