Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers string_util.h Source File

string_util.h

00001 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
00002 
00003 Licensed under the Apache License, Version 2.0 (the "License");
00004 you may not use this file except in compliance with the License.
00005 You may obtain a copy of the License at
00006 
00007     http://www.apache.org/licenses/LICENSE-2.0
00008 
00009 Unless required by applicable law or agreed to in writing, software
00010 distributed under the License is distributed on an "AS IS" BASIS,
00011 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 See the License for the specific language governing permissions and
00013 limitations under the License.
00014 ==============================================================================*/
00015 
00016 // Util methods to read and write String tensors.
00017 // String tensors are considered to be char tensor with protocol.
00018 //   [0, 3] 4 bytes: N, num of strings in the tensor in little endian.
00019 //   [(i+1)*4, (i+1)*4+3] 4 bytes: offset of i-th string in little endian.
00020 //   [(N+2)*4, (N+2)*4+3] 4 bytes: length of the whole char buffer.
00021 //   [offset(i), offset(i+1) - 1] : content of i-th string.
00022 // Example of a string tensor:
00023 // [
00024 //   2, 0, 0, 0,     # 2 strings.
00025 //   16, 0, 0, 0,    # 0-th string starts from index 12.
00026 //   18, 0, 0, 0,    # 1-st string starts from index 18.
00027 //   18, 0, 0, 0,    # total length of array.
00028 //   'A', 'B',       # 0-th string [16..17]: "AB"
00029 // ]                 # 1-th string, empty
00030 //
00031 // A typical usage:
00032 // In op.Eval(context, node):
00033 //   DynamicBuffer buf;
00034 //   # Add string "AB" to tensor, string is stored in dynamic buffer.
00035 //   buf.AddString("AB", 2);
00036 //   # Write content of DynamicBuffer to tensor in format of string tensor
00037 //   # described above.
00038 //   buf.WriteToTensor(tensor, nullptr)
00039 
00040 #ifndef TENSORFLOW_LITE_STRING_UTIL_H_
00041 #define TENSORFLOW_LITE_STRING_UTIL_H_
00042 
00043 #include <vector>
00044 
00045 #include "tensorflow/lite/c/c_api_internal.h"
00046 #include "tensorflow/lite/string_type.h"
00047 
00048 namespace tflite {
00049 
00050 // Convenient structure to store string pointer and length.
00051 typedef struct {
00052   const char* str;
00053   int len;
00054 } StringRef;
00055 
00056 // DynamicBuffer holds temporary buffer that will be used to create a dynamic
00057 // tensor. A typical usage is to initialize a DynamicBuffer object, fill in
00058 // content and call CreateStringTensor in op.Eval().
00059 class DynamicBuffer {
00060  public:
00061   DynamicBuffer() : offset_({0}) {}
00062 
00063   // Add string to dynamic buffer by resizing the buffer and copying the data.
00064   void AddString(const StringRef& string);
00065 
00066   // Add string to dynamic buffer by resizing the buffer and copying the data.
00067   void AddString(const char* str, size_t len);
00068 
00069   // Join a list of string with separator, and add as a single string to the
00070   // buffer.
00071   void AddJoinedString(const std::vector<StringRef>& strings, char separator);
00072 
00073   // Fill content into a buffer and returns the number of bytes stored.
00074   // The function allocates space for the buffer but does NOT take ownership.
00075   int WriteToBuffer(char** buffer);
00076 
00077   // Fill content into a string tensor, with the given new_shape. The new shape
00078   // must match the number of strings in this object. Caller relinquishes
00079   // ownership of new_shape. If 'new_shape' is nullptr, keep the tensor's
00080   // existing shape.
00081   void WriteToTensor(TfLiteTensor* tensor, TfLiteIntArray* new_shape);
00082 
00083   // Fill content into a string tensor. Set shape to {num_strings}.
00084   void WriteToTensorAsVector(TfLiteTensor* tensor);
00085 
00086  private:
00087   // Data buffer to store contents of strings, not including headers.
00088   std::vector<char> data_;
00089   // Offset of the starting index of each string in data buffer.
00090   std::vector<int32_t> offset_;
00091 };
00092 
00093 // Return num of strings in a String tensor.
00094 int GetStringCount(const void* raw_buffer);
00095 int GetStringCount(const TfLiteTensor* tensor);
00096 
00097 // Get String pointer and length of index-th string in tensor.
00098 // NOTE: This will not create a copy of string data.
00099 StringRef GetString(const void* raw_buffer, int string_index);
00100 StringRef GetString(const TfLiteTensor* tensor, int string_index);
00101 }  // namespace tflite
00102 
00103 #endif  // TENSORFLOW_LITE_STRING_UTIL_H_