Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers greedy_memory_planner.cc Source File

greedy_memory_planner.cc

00001 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
00002 
00003 Licensed under the Apache License, Version 2.0 (the "License");
00004 you may not use this file except in compliance with the License.
00005 You may obtain a copy of the License at
00006 
00007     http://www.apache.org/licenses/LICENSE-2.0
00008 
00009 Unless required by applicable law or agreed to in writing, software
00010 distributed under the License is distributed on an "AS IS" BASIS,
00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 See the License for the specific language governing permissions and
00013 limitations under the License.
00014 ==============================================================================*/
00015 
00016 #include "tensorflow/lite/experimental/micro/memory_planner/greedy_memory_planner.h"
00017 
00018 namespace tflite {
00019 
00020 // Simple stable in-place sort function. Not time-efficient for large arrays.
00021 // Would normally be in an anonymous namespace to keep it private, but we want
00022 // to be able to test it externally.
00023 void ReverseSortInPlace(int* values, int* ids, int size) {
00024   bool any_swapped;
00025   do {
00026     any_swapped = false;
00027     for (int i = 1; i < size; ++i) {
00028       if (values[i - 1] < values[i]) {
00029         const int value_temp = values[i - 1];
00030         values[i - 1] = values[i];
00031         values[i] = value_temp;
00032         const int id_temp = ids[i - 1];
00033         ids[i - 1] = ids[i];
00034         ids[i] = id_temp;
00035         any_swapped = true;
00036       }
00037     }
00038   } while (any_swapped);
00039 }
00040 
00041 GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
00042                                          int scratch_buffer_size)
00043     : buffer_count_(0), need_to_calculate_offsets_(true) {
00044   const int per_buffer_size = sizeof(BufferRequirements) +  // requirements_
00045                               sizeof(int) +  // buffer_sizes_sorted_by_size_
00046                               sizeof(int) +  // buffer_ids_sorted_by_size_
00047                               sizeof(ListEntry) +  // buffers_sorted_by_offset_
00048                               sizeof(int);         // buffer_offsets_;
00049   // Allocate the arrays we need within the scratch buffer arena.
00050   max_buffer_count_ = scratch_buffer_size / per_buffer_size;
00051 
00052   unsigned char* next_free = scratch_buffer;
00053   requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
00054   next_free += sizeof(BufferRequirements) * max_buffer_count_;
00055 
00056   buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
00057   next_free += sizeof(int) * max_buffer_count_;
00058 
00059   buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
00060   next_free += sizeof(int) * max_buffer_count_;
00061 
00062   buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
00063   next_free += sizeof(ListEntry) * max_buffer_count_;
00064 
00065   buffer_offsets_ = reinterpret_cast<int*>(next_free);
00066 }
00067 
00068 GreedyMemoryPlanner::~GreedyMemoryPlanner() {
00069   // We don't own the scratch buffer, so don't deallocate anything.
00070 }
00071 
00072 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
00073     tflite::ErrorReporter* error_reporter, int size, int first_time_used,
00074     int last_time_used) {
00075   if (buffer_count_ >= max_buffer_count_) {
00076     error_reporter->Report("Too many buffers (max is %d)", max_buffer_count_);
00077     return kTfLiteError;
00078   }
00079   BufferRequirements* current = &requirements_[buffer_count_];
00080   current->size = size;
00081   current->first_time_used = first_time_used;
00082   current->last_time_used = last_time_used;
00083   ++buffer_count_;
00084   need_to_calculate_offsets_ = true;
00085   return kTfLiteOk;
00086 }
00087 
00088 bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
00089     const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
00090     const int last_time_used) const {
00091   const BufferRequirements* entry_requirements =
00092       &requirements_[entry->requirements_index];
00093   if (entry_requirements->first_time_used > last_time_used) {
00094     return false;
00095   }
00096   if (first_time_used > entry_requirements->last_time_used) {
00097     return false;
00098   }
00099   return true;
00100 }
00101 
00102 GreedyMemoryPlanner::ListEntry*
00103 GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
00104     const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
00105     const int last_time_used) {
00106   ListEntry* result = nullptr;
00107   ListEntry* candidate_next_entry;
00108   if (start == nullptr) {
00109     candidate_next_entry = &buffers_sorted_by_offset_[0];
00110   } else {
00111     if (start->next_entry_index == -1) {
00112       return nullptr;
00113     }
00114     candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
00115   }
00116   do {
00117     if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
00118                                last_time_used)) {
00119       result = candidate_next_entry;
00120       break;
00121     }
00122     if (candidate_next_entry->next_entry_index == -1) {
00123       break;
00124     }
00125     candidate_next_entry =
00126         &buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
00127   } while (true);
00128   return result;
00129 }
00130 
00131 void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
00132   if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
00133     return;
00134   }
00135   need_to_calculate_offsets_ = false;
00136 
00137   // Start off by ordering the buffers in descending order of size.
00138   // This helps find a more compact layout. Intuitively, you can think
00139   // about putting the large buffers in place first, and then the
00140   // smaller buffers can fit in the gaps, rather than fragmenting the
00141   // gaps with small buffers at the beginning.
00142   for (int i = 0; i < buffer_count_; ++i) {
00143     buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
00144     buffer_ids_sorted_by_size_[i] = i;
00145     buffer_offsets_[i] = -1;
00146   }
00147   // This sorting algorithm is naive, and may end up taking a very long time
00148   // with hundreds of buffers.
00149   ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
00150                      buffer_count_);
00151 
00152   // Put the largest buffer at offset zero to start the process.
00153   ListEntry* first_entry = &buffers_sorted_by_offset_[0];
00154   first_entry->offset = 0;
00155   first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
00156   first_entry->next_entry_index = -1;
00157   next_free_entry_ = 1;
00158   buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
00159 
00160   // Work through the rest of the buffers to find a good gap to place each one.
00161   for (int i = 1; i < buffer_count_; ++i) {
00162     // The id is the order the buffer was originally added by the client.
00163     const int buffer_id = buffer_ids_sorted_by_size_[i];
00164     // Look at what size and time range the buffer needs to be active.
00165     BufferRequirements* wanted_requirements = &requirements_[buffer_id];
00166     const int wanted_size = wanted_requirements->size;
00167     const int wanted_first_time_used = wanted_requirements->first_time_used;
00168     const int wanted_last_time_used = wanted_requirements->last_time_used;
00169 
00170     // Find the first buffer that's active in our time range. All placed
00171     // buffers are stored in the order of their starting position in the arena
00172     // so that it's easy to find the next buffer in memory, and so the gap.
00173     // The candidate_entry variable holds the buffer that we're considering
00174     // placing the current buffer after.
00175     ListEntry* prior_entry = nullptr;
00176     int candidate_offset = 0;
00177     // Loop through the offset-ordered list of buffers, looking for gaps.
00178     while (true) {
00179       // Find out what the next active buffer is.
00180       ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
00181           prior_entry, wanted_first_time_used, wanted_last_time_used);
00182 
00183       if (prior_entry) {
00184         BufferRequirements* candidate_requirements =
00185             &requirements_[prior_entry->requirements_index];
00186         const int prior_entry_offset =
00187             prior_entry->offset + candidate_requirements->size;
00188         if (prior_entry_offset > candidate_offset) {
00189           candidate_offset = prior_entry_offset;
00190         }
00191       }
00192       if (next_entry == nullptr) {
00193         // We're at the end of the list, so we can always append the buffer
00194         // here.
00195         break;
00196       }
00197       // Find out how much space there is between us and the next buffer.
00198       const int gap = next_entry->offset - candidate_offset;
00199       if (gap >= wanted_size) {
00200         // This entry has a big enough gap between it and the next, so
00201         // use it!
00202         break;
00203       }
00204       // The gap wasn't big enough, so move on to another candidate.
00205       prior_entry = next_entry;
00206     }
00207     // At this point, we've either found a gap (possibly at the end of the
00208     // list) and want to place the buffer there, or there are no other active
00209     // buffers in this time range and so we can put it at offset zero.
00210     // Record the buffer's offset in our plan.
00211     buffer_offsets_[buffer_id] = candidate_offset;
00212     // Add the newly-placed buffer to our offset-ordered list, so that
00213     // subsequent passes can fit in their buffers around it.
00214     ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
00215     new_entry->offset = candidate_offset;
00216     new_entry->requirements_index = buffer_id;
00217     const int new_entry_index = next_free_entry_;
00218     ++next_free_entry_;
00219     ListEntry* current_entry = first_entry;
00220     // Make sure that we insert the buffer at the correct place in the ordered
00221     // list.
00222     while (true) {
00223       const int next_entry_index = current_entry->next_entry_index;
00224       if (next_entry_index == -1) {
00225         // We're at the end of the list, so just add the new entry here.
00226         current_entry->next_entry_index = new_entry_index;
00227         new_entry->next_entry_index = -1;
00228         break;
00229       }
00230       ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
00231       if (next_entry->offset > candidate_offset) {
00232         // We're at the right spot to do an insertion and retain the sorting
00233         // order, so place the new entry here.
00234         new_entry->next_entry_index = current_entry->next_entry_index;
00235         current_entry->next_entry_index = new_entry_index;
00236         break;
00237       }
00238       current_entry = next_entry;
00239     }
00240   }
00241 }
00242 
00243 int GreedyMemoryPlanner::GetMaximumMemorySize() {
00244   CalculateOffsetsIfNeeded();
00245   if (buffer_count_ == 0) {
00246     return 0;
00247   }
00248   ListEntry* entry = &buffers_sorted_by_offset_[0];
00249   int max_size = 0;
00250   while (entry) {
00251     BufferRequirements* requirements =
00252         &requirements_[entry->requirements_index];
00253     const int current_size = entry->offset + requirements->size;
00254     if (current_size > max_size) {
00255       max_size = current_size;
00256     }
00257     if (entry->next_entry_index == -1) {
00258       break;
00259     }
00260     entry = &buffers_sorted_by_offset_[entry->next_entry_index];
00261   }
00262   return max_size;
00263 }
00264 
00265 void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
00266   CalculateOffsetsIfNeeded();
00267 
00268   for (int i = 0; i < buffer_count_; ++i) {
00269     error_reporter->Report(
00270         "Planner buffer ID: %d, calculated offset: %d, size required: %d, "
00271         "first_time_created: %d, "
00272         "last_time_used: %d",
00273         i, buffer_offsets_[i], requirements_[i].size,
00274         requirements_[i].first_time_used, requirements_[i].last_time_used);
00275   }
00276 
00277   constexpr int kLineWidth = 80;
00278   int max_size = kLineWidth;
00279   int max_time = 0;
00280   for (int i = 0; i < buffer_count_; ++i) {
00281     BufferRequirements* requirements = &requirements_[i];
00282     const int offset = buffer_offsets_[i];
00283     const int last_time_used = requirements->last_time_used;
00284     const int size = offset + requirements->size;
00285     if (size > max_size) {
00286       max_size = size;
00287     }
00288     if (last_time_used > max_time) {
00289       max_time = last_time_used;
00290     }
00291   }
00292 
00293   char line[kLineWidth + 1];
00294   for (int t = 0; t <= max_time; ++t) {
00295     for (int c = 0; c < kLineWidth; ++c) {
00296       line[c] = '.';
00297     }
00298     for (int i = 0; i < buffer_count_; ++i) {
00299       BufferRequirements* requirements = &requirements_[i];
00300       if ((t < requirements->first_time_used) ||
00301           (t > requirements->last_time_used)) {
00302         continue;
00303       }
00304       const int offset = buffer_offsets_[i];
00305       if (offset == -1) {
00306         continue;
00307       }
00308       const int size = requirements->size;
00309       const int line_start = (offset * kLineWidth) / max_size;
00310       const int line_end = ((offset + size) * kLineWidth) / max_size;
00311       for (int n = line_start; n < line_end; ++n) {
00312         if (line[n] == '.') {
00313           char display;
00314           if (i < 10) {
00315             display = '0' + i;
00316           } else if (i < 36) {
00317             display = 'a' + (i - 10);
00318           } else if (i < 62) {
00319             display = 'A' + (i - 36);
00320           } else {
00321             display = '*';
00322           }
00323           line[n] = display;
00324         } else {
00325           line[n] = '!';
00326         }
00327       }
00328     }
00329     line[kLineWidth] = 0;
00330     error_reporter->Report("%s", line);
00331   }
00332 }
00333 
00334 int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
00335 
00336 TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
00337     tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
00338   CalculateOffsetsIfNeeded();
00339   if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
00340     error_reporter->Report("buffer index %d is outside range 0 to %d",
00341                            buffer_index, buffer_count_);
00342     return kTfLiteError;
00343   }
00344   *offset = buffer_offsets_[buffer_index];
00345   return kTfLiteOk;
00346 }
00347 
00348 bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
00349   CalculateOffsetsIfNeeded();
00350   bool were_overlaps_found = false;
00351   for (int i = 0; i < buffer_count_; ++i) {
00352     BufferRequirements* a_requirements = &requirements_[i];
00353     const int a_start_offset = buffer_offsets_[i];
00354     const int a_first_time_used = a_requirements->first_time_used;
00355     const int a_last_time_used = a_requirements->last_time_used;
00356     const int a_end_offset = a_start_offset + a_requirements->size;
00357     for (int j = 0; j < buffer_count_; ++j) {
00358       if (i == j) {
00359         continue;
00360       }
00361       BufferRequirements* b_requirements = &requirements_[j];
00362       const int b_start_offset = buffer_offsets_[j];
00363       const int b_first_time_used = b_requirements->first_time_used;
00364       const int b_last_time_used = b_requirements->last_time_used;
00365       const int b_end_offset = b_start_offset + b_requirements->size;
00366       if ((a_first_time_used > b_last_time_used) ||
00367           (b_first_time_used > a_last_time_used)) {
00368         // Buffers don't overlap in time.
00369         continue;
00370       }
00371       if ((a_start_offset >= b_end_offset) ||
00372           (b_start_offset >= a_end_offset)) {
00373         // No overlap in memory.
00374         continue;
00375       }
00376       were_overlaps_found = true;
00377       error_reporter->Report(
00378           "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)", i,
00379           a_first_time_used, a_last_time_used, a_start_offset, a_end_offset, j,
00380           b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
00381     }
00382   }
00383   return were_overlaps_found;
00384 }
00385 
00386 }  // namespace tflite