Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers micro_allocator.cc Source File

micro_allocator.cc

00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
00002 
00003 Licensed under the Apache License, Version 2.0 (the "License");
00004 you may not use this file except in compliance with the License.
00005 You may obtain a copy of the License at
00006 
00007     http://www.apache.org/licenses/LICENSE-2.0
00008 
00009 Unless required by applicable law or agreed to in writing, software
00010 distributed under the License is distributed on an "AS IS" BASIS,
00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 See the License for the specific language governing permissions and
00013 limitations under the License.
00014 ==============================================================================*/
00015 
00016 #include "tensorflow/lite/experimental/micro/micro_allocator.h"
00017 
00018 #include "tensorflow/lite/c/c_api_internal.h"
00019 #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
00020 #include "tensorflow/lite/core/api/op_resolver.h"
00021 #include "tensorflow/lite/core/api/tensor_utils.h"
00022 #include "tensorflow/lite/experimental/micro/compatibility.h"
00023 #include "tensorflow/lite/experimental/micro/memory_helpers.h"
00024 #include "tensorflow/lite/experimental/micro/memory_planner/greedy_memory_planner.h"
00025 #include "tensorflow/lite/experimental/micro/simple_memory_allocator.h"
00026 
00027 namespace tflite {
00028 
00029 namespace {
00030 // Used to hold information used during allocation calculations.
00031 struct TensorInfo {
00032   const tflite::Tensor* flatbuffer_tensor;
00033   TfLiteTensor* runtime_tensor;
00034   int first_created;
00035   int last_used;
00036   bool needs_allocating;
00037 };
00038 
00039 // We align tensor buffers to 16-byte boundaries, since this is a common
00040 // requirement for SIMD extensions.
00041 constexpr int kBufferAlignment = 16;
00042 // For common data structures that doesn't need SIMD extensions.
00043 constexpr int kDefaultAlignment = sizeof(int);
00044 
00045 class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
00046  public:
00047   explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
00048       : memory_allocator_(memory_allocator) {}
00049 
00050   void* Allocate(size_t size) override {
00051     return memory_allocator_->AllocateFromTail(size, kDefaultAlignment);
00052   }
00053   void Deallocate(void* data) override {
00054     // Do not deallocate, builtin data needs to be available for the life time
00055     // of the model.
00056   }
00057 
00058  private:
00059   SimpleMemoryAllocator* memory_allocator_;
00060 
00061   TF_LITE_REMOVE_VIRTUAL_DELETE
00062 };
00063 
00064 }  // namespace
00065 
00066 MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
00067                                uint8_t* tensor_arena, size_t arena_size,
00068                                ErrorReporter* error_reporter)
00069     : model_(model),
00070       memory_allocator_(tensor_arena, arena_size),
00071       error_reporter_(error_reporter),
00072       context_(context),
00073       arena_(tensor_arena),
00074       arena_size_(arena_size) {
00075   auto* subgraphs = model->subgraphs();
00076   if (subgraphs->size() != 1) {
00077     error_reporter->Report("Only 1 subgraph is currently supported.\n");
00078     return;
00079   }
00080   subgraph_ = (*subgraphs)[0];
00081   tensors_ = subgraph_->tensors();
00082   operators_ = subgraph_->operators();
00083 
00084   context_->tensors_size = tensors_->size();
00085   context_->tensors =
00086       reinterpret_cast<TfLiteTensor*>(memory_allocator_.AllocateFromTail(
00087           sizeof(TfLiteTensor) * context_->tensors_size, kDefaultAlignment));
00088 
00089   // Null all inputs so we can later perform a null check to avoid re-allocating
00090   // registered pre-allocated inputs.
00091   for (size_t i = 0; i < context_->tensors_size; ++i) {
00092     context_->tensors[i].data.raw = nullptr;
00093   }
00094 
00095   active_ = true;
00096 }
00097 
00098 TfLiteStatus MicroAllocator::RegisterPreallocatedInput(uint8_t* buffer,
00099                                                        size_t input_index) {
00100   if (buffer == nullptr || input_index < 0 ||
00101       input_index >= subgraph_->inputs()->size()) {
00102     error_reporter_->Report("Invalid pre-allocated input %d provided.",
00103                             input_index);
00104     return kTfLiteError;
00105   }
00106   const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
00107       model_->buffers();
00108 
00109   const int tensor_index = subgraph_->inputs()->Get(input_index);
00110   const auto* tensor = tensors_->Get(tensor_index);
00111   return InitializeRuntimeTensor(*tensor, buffers, error_reporter_,
00112                                  &context_->tensors[tensor_index], buffer);
00113 }
00114 
00115 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
00116     const OpResolver& op_resolver,
00117     NodeAndRegistration** node_and_registrations) {
00118   if (!active_) {
00119     return kTfLiteError;
00120   }
00121 
00122   auto* output =
00123       reinterpret_cast<NodeAndRegistration*>(memory_allocator_.AllocateFromTail(
00124           sizeof(NodeAndRegistration) * operators_->size(), kDefaultAlignment));
00125   if (output == nullptr) {
00126     error_reporter_->Report(
00127         "Failed to allocate memory for node_and_registrations.");
00128     return kTfLiteError;
00129   }
00130   TfLiteStatus status = kTfLiteOk;
00131   auto* opcodes = model_->operator_codes();
00132   MicroBuiltinDataAllocator builtin_data_allocator(&memory_allocator_);
00133   for (size_t i = 0; i < operators_->size(); ++i) {
00134     const auto* op = operators_->Get(i);
00135     size_t index = op->opcode_index();
00136     if (index < 0 || index >= opcodes->size()) {
00137       error_reporter_->Report("Missing registration for opcode_index %d\n",
00138                               index);
00139       return kTfLiteError;
00140     }
00141     auto* opcode = (*opcodes)[index];
00142     status = GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
00143                                        &(output[i].registration));
00144     if (status != kTfLiteOk) {
00145       error_reporter_->Report("Failed to get registration from op code % d\n ",
00146                               opcode);
00147       return status;
00148     }
00149     const auto* registration = output[i].registration;
00150     if (registration == nullptr) {
00151       error_reporter_->Report("Skipping op for opcode_index %d\n", index);
00152       return kTfLiteError;
00153     }
00154     BuiltinOperator op_type =
00155         static_cast<BuiltinOperator>(registration->builtin_code);
00156 
00157     if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) {
00158       error_reporter_->Report(
00159           "Unsupported behavior: found builtin operator %s with custom "
00160           "options.\n",
00161           EnumNameBuiltinOperator(op_type));
00162       return kTfLiteError;
00163     }
00164 
00165     const char* custom_data = nullptr;
00166     size_t custom_data_size = 0;
00167     unsigned char* builtin_data = nullptr;
00168     if (op->custom_options()) {
00169       custom_data = reinterpret_cast<const char*>(op->custom_options()->data());
00170       custom_data_size = op->custom_options()->size();
00171     } else {
00172       TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_,
00173                                         &builtin_data_allocator,
00174                                         (void**)(&builtin_data)));
00175     }
00176 
00177     // Disregard const qualifier to workaround with existing API.
00178     TfLiteIntArray* inputs_array = const_cast<TfLiteIntArray*>(
00179         reinterpret_cast<const TfLiteIntArray*>(op->inputs()));
00180     TfLiteIntArray* outputs_array = const_cast<TfLiteIntArray*>(
00181         reinterpret_cast<const TfLiteIntArray*>(op->outputs()));
00182 
00183     TfLiteNode* node = &(output[i].node);
00184     node->inputs = inputs_array;
00185     node->outputs = outputs_array;
00186     // This is OK for now as temporary array is not in used.
00187     // TODO(wangtz): Support scratch buffers.
00188     node->temporaries = nullptr;
00189     node->user_data = nullptr;  // Will be filled in after `init`
00190     node->builtin_data = reinterpret_cast<void*>(builtin_data);
00191     node->custom_initial_data = custom_data;
00192     node->custom_initial_data_size = custom_data_size;
00193     node->delegate = nullptr;
00194   }
00195   *node_and_registrations = output;
00196   return kTfLiteOk;
00197 }
00198 
00199 TfLiteStatus MicroAllocator::FinishTensorAllocation() {
00200   if (!active_) {
00201     return kTfLiteError;
00202   }
00203 
00204   const size_t tensors_size = tensors_->size();
00205 
00206   const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
00207       model_->buffers();
00208 
00209   // Initialize runtime tensors.
00210   for (size_t i = 0; i < tensors_size; ++i) {
00211     auto* runtime_tensor = &context_->tensors[i];
00212     auto* flatbuffer_tensor = tensors_->Get(i);
00213 
00214     // Preallocated inputs have already been set up earlier, so skip them.
00215     const bool is_preallocated_input = (runtime_tensor->data.raw != nullptr);
00216     if (!is_preallocated_input) {
00217       TF_LITE_ENSURE_STATUS(InitializeRuntimeTensor(*flatbuffer_tensor, buffers,
00218                                                     error_reporter_,
00219                                                     runtime_tensor, nullptr));
00220     }
00221   }
00222 
00223   // tensor_info is only used in this function.
00224   auto tmp_allocator = memory_allocator_.CreateChildAllocator();
00225   TensorInfo* tensor_info =
00226       reinterpret_cast<TensorInfo*>(tmp_allocator.AllocateFromTail(
00227           sizeof(TensorInfo) * tensors_size, sizeof(TensorInfo)));
00228 
00229   // Set up the runtime data structures for all tensors.
00230   for (size_t i = 0; i < tensors_size; ++i) {
00231     TensorInfo* current = &tensor_info[i];
00232     current->flatbuffer_tensor = &(*(tensors_->Get(i)));
00233     current->runtime_tensor = &context_->tensors[i];
00234     const bool is_variable = current->flatbuffer_tensor->is_variable();
00235     if (is_variable) {
00236       current->first_created = 0;
00237       current->last_used = operators_->size();
00238     } else {
00239       current->first_created = -1;
00240       current->last_used = -1;
00241     }
00242     current->needs_allocating = false;
00243   }
00244 
00245   // First go through the inputs and figure out if they need to be allocated.
00246   for (size_t i = 0; i < subgraph_->inputs()->size(); ++i) {
00247     const int tensor_index = subgraph_->inputs()->Get(i);
00248     TensorInfo* current = &tensor_info[tensor_index];
00249     // Check for pre-allocated inputs.
00250     current->needs_allocating = (current->runtime_tensor->data.raw == nullptr);
00251     current->first_created = 0;
00252   }
00253 
00254   // Mark all outputs as persistent to the end of the invocation.
00255   for (size_t i = 0; i < subgraph_->outputs()->size(); ++i) {
00256     const int tensor_index = subgraph_->outputs()->Get(i);
00257     TensorInfo* current = &tensor_info[tensor_index];
00258     current->last_used = operators_->size() - 1;
00259   }
00260 
00261   // Figure out when the first and last use of each tensor is.
00262   for (int i = (operators_->size() - 1); i >= 0; --i) {
00263     const auto* op = operators_->Get(i);
00264     for (size_t n = 0; n < op->inputs()->size(); ++n) {
00265       const int tensor_index = op->inputs()->Get(n);
00266       TensorInfo* current = &tensor_info[tensor_index];
00267       if ((current->last_used == -1) || (current->last_used > i)) {
00268         current->last_used = i;
00269       }
00270     }
00271     for (size_t n = 0; n < op->outputs()->size(); ++n) {
00272       const int tensor_index = op->outputs()->Get(n);
00273       TensorInfo* current = &tensor_info[tensor_index];
00274       if ((current->first_created == -1) || (current->first_created < i)) {
00275         current->first_created = i;
00276       }
00277     }
00278   }
00279 
00280   // Work out which tensors need to be allocated.
00281   for (size_t i = 0; i < tensors_->size(); ++i) {
00282     TensorInfo* current = &tensor_info[i];
00283     const bool is_read_only =
00284         (current->first_created == -1) && (current->last_used != -1);
00285     const bool is_preallocated_input =
00286         (current->runtime_tensor->data.raw != nullptr);
00287     const bool has_partial_lifetime =
00288         !is_read_only &&
00289         ((current->first_created == -1) || (current->last_used == -1));
00290     if (has_partial_lifetime) {
00291       error_reporter_->Report(
00292           "Logic error in memory planner, tensor %d has an invalid lifetime",
00293           i);
00294       return kTfLiteError;
00295     }
00296     if (!is_read_only && !is_preallocated_input) {
00297       current->needs_allocating = true;
00298     }
00299   }
00300 
00301   uint8_t* aligned_arena = AlignPointerUp(arena_, kBufferAlignment);
00302   const size_t alignment_loss = (aligned_arena - arena_);
00303 
00304   // Remaining arena size that memory planner can use for calculating offsets.
00305   int remaining_arena_size =
00306       arena_size_ - (tmp_allocator.GetDataSize() + alignment_loss);
00307   GreedyMemoryPlanner planner(aligned_arena, remaining_arena_size);
00308 
00309   // Add the tensors to our allocation plan.
00310   for (size_t i = 0; i < tensors_->size(); ++i) {
00311     TensorInfo* current = &tensor_info[i];
00312     if (current->needs_allocating) {
00313       size_t bytes_required;
00314       size_t type_size;
00315       TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(*current->flatbuffer_tensor,
00316                                                    &bytes_required, &type_size,
00317                                                    error_reporter_));
00318       size_t aligned_bytes_required =
00319           AlignSizeUp(bytes_required, kBufferAlignment);
00320       TF_LITE_ENSURE_STATUS(
00321           planner.AddBuffer(error_reporter_, aligned_bytes_required,
00322                             current->first_created, current->last_used));
00323     }
00324   }
00325 
00326   // Actual size available for placing tensors. This includes memory held by the
00327   // tensor info array, which will be released.
00328   int actual_available_arena_size =
00329       arena_size_ - (memory_allocator_.GetDataSize() + alignment_loss);
00330   // Make sure we have enough room.
00331   if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
00332     error_reporter_->Report(
00333         "Arena size is too small for activation buffers. Needed %d but only %d "
00334         "was available.",
00335         planner.GetMaximumMemorySize(), remaining_arena_size);
00336     return kTfLiteError;
00337   }
00338 
00339   // Figure out the actual memory addresses for each buffer, based on the plan.
00340   int planner_index = 0;
00341   for (size_t i = 0; i < tensors_->size(); ++i) {
00342     TensorInfo* current = &tensor_info[i];
00343     if (current->needs_allocating) {
00344       int offset;
00345       TF_LITE_ENSURE_STATUS(
00346           planner.GetOffsetForBuffer(error_reporter_, planner_index, &offset));
00347       current->runtime_tensor->data.uint8 = aligned_arena + offset;
00348       ++planner_index;
00349     }
00350   }
00351 
00352   // Copy default value for variable tensors. Note that this will overwrite
00353   // the arena planner data so GetOffsetForBuffer will return wrong
00354   // result.
00355   for (size_t i = 0; i < tensors_->size(); ++i) {
00356     TensorInfo* current = &tensor_info[i];
00357     // Set default value for variable tensors:
00358     if (current->flatbuffer_tensor->is_variable()) {
00359       if (current->runtime_tensor->data.uint8 == nullptr) {
00360         error_reporter_->Report("Variable is not allocated");
00361         return kTfLiteError;
00362       }
00363       tflite::ResetVariableTensor(current->runtime_tensor);
00364     }
00365   }
00366 
00367   active_ = false;
00368   return kTfLiteOk;
00369 }
00370 
00371 TfLiteStatus MicroAllocator::InitializeRuntimeTensor(
00372     const tflite::Tensor& flatbuffer_tensor,
00373     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
00374     ErrorReporter* error_reporter, TfLiteTensor* result,
00375     uint8_t* preallocated_buffer) {
00376   if (!active_) {
00377     return kTfLiteError;
00378   }
00379 
00380   // Make sure the serialized type is one we know how to deal with, and convert
00381   // it from a flatbuffer enum into a constant used by the kernel C API.
00382   TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
00383                                           &result->type, error_reporter));
00384   // Make sure we remember if the serialized tensor is designated as a variable.
00385   result->is_variable = flatbuffer_tensor.is_variable();
00386 
00387   // We need to figure out where the actual contents of this tensor are stored
00388   // in memory. We'll check to see if there's a serialized buffer (pretty much
00389   // the same as a constant op in TensorFlow) associated with this tensor first,
00390   // and if there is update the runtime structure to point to its location in
00391   // memory.
00392   result->data.raw = nullptr;
00393   result->bytes = 0;
00394   // First see if there's any buffer information in the serialized tensor.
00395   if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
00396     // If we've found a buffer, does it have any data?
00397     if (auto* array = buffer->data()) {
00398       // If it has any data, is the data size larger than zero?
00399       if (size_t array_size = array->size()) {
00400         // We've found a buffer with valid data, so update the runtime tensor
00401         // data structure to point to it.
00402         result->data.raw =
00403             const_cast<char*>(reinterpret_cast<const char*>(array->data()));
00404         // We set the data from a serialized buffer, so record tha.
00405         result->allocation_type = kTfLiteMmapRo;
00406       }
00407     }
00408     // TODO(petewarden): It's not clear in what circumstances we could have a
00409     // buffer in the serialized tensor, but it doesn't have any data in it. Is
00410     // that a validly-generated file, and if so what does it mean, or is it an
00411     // error condition? It would be good to tighten up the specification to make
00412     // it less ambiguous.
00413   }
00414 
00415   // TODO(petewarden): Some of these paths aren't getting enough testing
00416   // coverage, so we should figure out some tests that exercise them.
00417   if (!result->data.raw) {
00418     // The tensor contents haven't been set from a serialized buffer, so
00419     // make a note that they will be allocated from memory. The actual
00420     // allocation won't happen until later.
00421     result->allocation_type = kTfLiteArenaRw;
00422     if (preallocated_buffer != nullptr) {
00423       // If the client is supplying memory for the contents of the tensor
00424       // themselves, use it.
00425       // TODO(petewarden): Should we store the fact this is a client-allocated
00426       // buffer?
00427       result->data.raw = reinterpret_cast<char*>(preallocated_buffer);
00428     }
00429   }
00430 
00431   // Figure out what the size in bytes of the buffer is and store it.
00432   size_t type_size;
00433   TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
00434       flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
00435   // Copy the shape of the tensor from the serialized data into the runtime
00436   // form. We have to allocate memory for this.
00437   result->dims =
00438       reinterpret_cast<TfLiteIntArray*>(memory_allocator_.AllocateFromTail(
00439           sizeof(int) * (flatbuffer_tensor.shape()->Length() + 1),
00440           kDefaultAlignment));
00441   result->dims->size = flatbuffer_tensor.shape()->Length();
00442   for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
00443     result->dims->data[n] = flatbuffer_tensor.shape()->Get(n);
00444   }
00445   // Copy the quantization information from the serialized data.
00446   const auto* src_quantization = flatbuffer_tensor.quantization();
00447   if (src_quantization && src_quantization->scale() &&
00448       (src_quantization->scale()->size() > 0) &&
00449       src_quantization->zero_point() &&
00450       (src_quantization->zero_point()->size() > 0)) {
00451     result->params.scale = src_quantization->scale()->Get(0);
00452     // This magic handles issues with little-endianness.
00453     for (unsigned int b = 0; b < sizeof(int64_t); ++b)
00454       *(reinterpret_cast<char*>(&result->params.zero_point) + b) =
00455           *(reinterpret_cast<const char*>(
00456                 src_quantization->zero_point()->Data()) +
00457             b);
00458     result->params.zero_point =
00459         flatbuffers::EndianScalar(result->params.zero_point);
00460 
00461     // Populate per-channel quantization params.
00462     int channels = src_quantization->scale()->size();
00463     TfLiteAffineQuantization* quantization =
00464         reinterpret_cast<TfLiteAffineQuantization*>(
00465             memory_allocator_.AllocateFromTail(sizeof(TfLiteAffineQuantization),
00466                                                kDefaultAlignment));
00467     int* zero_point_array =
00468         reinterpret_cast<int*>(memory_allocator_.AllocateFromTail(
00469             channels * sizeof(int) + sizeof(int), kDefaultAlignment));
00470     int* scale_array =
00471         reinterpret_cast<int*>(memory_allocator_.AllocateFromTail(
00472             channels * sizeof(float) + sizeof(int), kDefaultAlignment));
00473     zero_point_array[0] = channels;
00474     scale_array[0] = channels;
00475     int* zero_point_data = &zero_point_array[1];
00476     float* scale_data = reinterpret_cast<float*>(&scale_array[1]);
00477     for (int i = 0; i < channels; i++) {
00478       zero_point_data[i] = src_quantization->zero_point()->Get(i);
00479       scale_data[i] = src_quantization->scale()->Get(i);
00480     }
00481     quantization->scale = reinterpret_cast<TfLiteFloatArray*>(scale_array);
00482     quantization->zero_point =
00483         reinterpret_cast<TfLiteIntArray*>(zero_point_array);
00484 
00485     result->quantization = {kTfLiteAffineQuantization, quantization};
00486   }
00487   // Copy the name, if there is one.
00488   if (flatbuffer_tensor.name()->c_str() != nullptr) {
00489     result->name = flatbuffer_tensor.name()->c_str();
00490   } else {
00491     result->name = "<No name>";
00492   }
00493   // These aren't used by the micro flavor of TFL, so set them to defaults.
00494   result->allocation = nullptr;
00495   result->delegate = nullptr;
00496   result->buffer_handle = 0;
00497   result->data_is_stale = false;
00498   return kTfLiteOk;
00499 }
00500 
00501 }  // namespace tflite