Test
Dependencies: X_NUCLEO_IHM01A1
Tokenizer.cpp@3:24ab1c8fefef, 2019-12-29 (annotated)
- Committer:
- jackcassa1967
- Date:
- Sun Dec 29 18:36:23 2019 +0000
- Revision:
- 3:24ab1c8fefef
Start
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
jackcassa1967 | 3:24ab1c8fefef | 1 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 2 | // Tokenizer.cpp |
jackcassa1967 | 3:24ab1c8fefef | 3 | // ============= |
jackcassa1967 | 3:24ab1c8fefef | 4 | // General purpose string tokenizer (C++ string version) |
jackcassa1967 | 3:24ab1c8fefef | 5 | // |
jackcassa1967 | 3:24ab1c8fefef | 6 | // The default delimiters are space(" "), tab(\t, \v), newline(\n), |
jackcassa1967 | 3:24ab1c8fefef | 7 | // carriage return(\r), and form feed(\f). |
jackcassa1967 | 3:24ab1c8fefef | 8 | // If you want to use different delimiters, then use setDelimiter() to override |
jackcassa1967 | 3:24ab1c8fefef | 9 | // the delimiters. Note that the delimiter string can hold multiple characters. |
jackcassa1967 | 3:24ab1c8fefef | 10 | // |
jackcassa1967 | 3:24ab1c8fefef | 11 | // AUTHOR: Song Ho Ahn (song.ahn@gmail.com) |
jackcassa1967 | 3:24ab1c8fefef | 12 | // CREATED: 2005-05-25 |
jackcassa1967 | 3:24ab1c8fefef | 13 | // UPDATED: 2011-03-08 |
jackcassa1967 | 3:24ab1c8fefef | 14 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 15 | |
jackcassa1967 | 3:24ab1c8fefef | 16 | #include "Tokenizer.h" |
jackcassa1967 | 3:24ab1c8fefef | 17 | |
jackcassa1967 | 3:24ab1c8fefef | 18 | |
jackcassa1967 | 3:24ab1c8fefef | 19 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 20 | // constructor |
jackcassa1967 | 3:24ab1c8fefef | 21 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 22 | Tokenizer::Tokenizer() : buffer(""), token(""), delimiter(DEFAULT_DELIMITER) |
jackcassa1967 | 3:24ab1c8fefef | 23 | { |
jackcassa1967 | 3:24ab1c8fefef | 24 | currPos = buffer.begin(); |
jackcassa1967 | 3:24ab1c8fefef | 25 | } |
jackcassa1967 | 3:24ab1c8fefef | 26 | |
jackcassa1967 | 3:24ab1c8fefef | 27 | Tokenizer::Tokenizer(const std::string& str, const std::string& delimiter) : buffer(str), token(""), delimiter(delimiter) |
jackcassa1967 | 3:24ab1c8fefef | 28 | { |
jackcassa1967 | 3:24ab1c8fefef | 29 | currPos = buffer.begin(); |
jackcassa1967 | 3:24ab1c8fefef | 30 | } |
jackcassa1967 | 3:24ab1c8fefef | 31 | |
jackcassa1967 | 3:24ab1c8fefef | 32 | |
jackcassa1967 | 3:24ab1c8fefef | 33 | |
jackcassa1967 | 3:24ab1c8fefef | 34 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 35 | // destructor |
jackcassa1967 | 3:24ab1c8fefef | 36 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 37 | Tokenizer::~Tokenizer() |
jackcassa1967 | 3:24ab1c8fefef | 38 | { |
jackcassa1967 | 3:24ab1c8fefef | 39 | } |
jackcassa1967 | 3:24ab1c8fefef | 40 | |
jackcassa1967 | 3:24ab1c8fefef | 41 | |
jackcassa1967 | 3:24ab1c8fefef | 42 | |
jackcassa1967 | 3:24ab1c8fefef | 43 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 44 | // reset string buffer, delimiter and the currsor position |
jackcassa1967 | 3:24ab1c8fefef | 45 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 46 | void Tokenizer::set(const std::string& str, const std::string& delimiter) |
jackcassa1967 | 3:24ab1c8fefef | 47 | { |
jackcassa1967 | 3:24ab1c8fefef | 48 | this->buffer = str; |
jackcassa1967 | 3:24ab1c8fefef | 49 | this->delimiter = delimiter; |
jackcassa1967 | 3:24ab1c8fefef | 50 | this->currPos = buffer.begin(); |
jackcassa1967 | 3:24ab1c8fefef | 51 | } |
jackcassa1967 | 3:24ab1c8fefef | 52 | |
jackcassa1967 | 3:24ab1c8fefef | 53 | void Tokenizer::setString(const std::string& str) |
jackcassa1967 | 3:24ab1c8fefef | 54 | { |
jackcassa1967 | 3:24ab1c8fefef | 55 | this->buffer = str; |
jackcassa1967 | 3:24ab1c8fefef | 56 | this->currPos = buffer.begin(); |
jackcassa1967 | 3:24ab1c8fefef | 57 | } |
jackcassa1967 | 3:24ab1c8fefef | 58 | |
jackcassa1967 | 3:24ab1c8fefef | 59 | void Tokenizer::setDelimiter(const std::string& delimiter) |
jackcassa1967 | 3:24ab1c8fefef | 60 | { |
jackcassa1967 | 3:24ab1c8fefef | 61 | this->delimiter = delimiter; |
jackcassa1967 | 3:24ab1c8fefef | 62 | this->currPos = buffer.begin(); |
jackcassa1967 | 3:24ab1c8fefef | 63 | } |
jackcassa1967 | 3:24ab1c8fefef | 64 | |
jackcassa1967 | 3:24ab1c8fefef | 65 | |
jackcassa1967 | 3:24ab1c8fefef | 66 | |
jackcassa1967 | 3:24ab1c8fefef | 67 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 68 | // return the next token |
jackcassa1967 | 3:24ab1c8fefef | 69 | // If cannot find a token anymore, return "". |
jackcassa1967 | 3:24ab1c8fefef | 70 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 71 | std::string Tokenizer::next() |
jackcassa1967 | 3:24ab1c8fefef | 72 | { |
jackcassa1967 | 3:24ab1c8fefef | 73 | if(buffer.size() <= 0) return ""; // skip if buffer is empty |
jackcassa1967 | 3:24ab1c8fefef | 74 | |
jackcassa1967 | 3:24ab1c8fefef | 75 | token.clear(); // reset token string |
jackcassa1967 | 3:24ab1c8fefef | 76 | |
jackcassa1967 | 3:24ab1c8fefef | 77 | this->skipDelimiter(); // skip leading delimiters |
jackcassa1967 | 3:24ab1c8fefef | 78 | |
jackcassa1967 | 3:24ab1c8fefef | 79 | // append each char to token string until it meets delimiter |
jackcassa1967 | 3:24ab1c8fefef | 80 | while(currPos != buffer.end() && !isDelimiter(*currPos)) |
jackcassa1967 | 3:24ab1c8fefef | 81 | { |
jackcassa1967 | 3:24ab1c8fefef | 82 | token += *currPos; |
jackcassa1967 | 3:24ab1c8fefef | 83 | ++currPos; |
jackcassa1967 | 3:24ab1c8fefef | 84 | } |
jackcassa1967 | 3:24ab1c8fefef | 85 | return token; |
jackcassa1967 | 3:24ab1c8fefef | 86 | } |
jackcassa1967 | 3:24ab1c8fefef | 87 | |
jackcassa1967 | 3:24ab1c8fefef | 88 | |
jackcassa1967 | 3:24ab1c8fefef | 89 | |
jackcassa1967 | 3:24ab1c8fefef | 90 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 91 | // skip ang leading delimiters |
jackcassa1967 | 3:24ab1c8fefef | 92 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 93 | void Tokenizer::skipDelimiter() |
jackcassa1967 | 3:24ab1c8fefef | 94 | { |
jackcassa1967 | 3:24ab1c8fefef | 95 | while(currPos != buffer.end() && isDelimiter(*currPos)) |
jackcassa1967 | 3:24ab1c8fefef | 96 | ++currPos; |
jackcassa1967 | 3:24ab1c8fefef | 97 | } |
jackcassa1967 | 3:24ab1c8fefef | 98 | |
jackcassa1967 | 3:24ab1c8fefef | 99 | |
jackcassa1967 | 3:24ab1c8fefef | 100 | |
jackcassa1967 | 3:24ab1c8fefef | 101 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 102 | // return true if the current character is delimiter |
jackcassa1967 | 3:24ab1c8fefef | 103 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 104 | bool Tokenizer::isDelimiter(char c) |
jackcassa1967 | 3:24ab1c8fefef | 105 | { |
jackcassa1967 | 3:24ab1c8fefef | 106 | return (delimiter.find(c) != std::string::npos); |
jackcassa1967 | 3:24ab1c8fefef | 107 | } |
jackcassa1967 | 3:24ab1c8fefef | 108 | |
jackcassa1967 | 3:24ab1c8fefef | 109 | |
jackcassa1967 | 3:24ab1c8fefef | 110 | |
jackcassa1967 | 3:24ab1c8fefef | 111 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 112 | // split the input string into multiple tokens |
jackcassa1967 | 3:24ab1c8fefef | 113 | // This function scans tokens from the current cursor position. |
jackcassa1967 | 3:24ab1c8fefef | 114 | /////////////////////////////////////////////////////////////////////////////// |
jackcassa1967 | 3:24ab1c8fefef | 115 | std::vector<std::string> Tokenizer::split() |
jackcassa1967 | 3:24ab1c8fefef | 116 | { |
jackcassa1967 | 3:24ab1c8fefef | 117 | std::vector<std::string> tokens; |
jackcassa1967 | 3:24ab1c8fefef | 118 | std::string token; |
jackcassa1967 | 3:24ab1c8fefef | 119 | while((token = this->next()) != "") |
jackcassa1967 | 3:24ab1c8fefef | 120 | { |
jackcassa1967 | 3:24ab1c8fefef | 121 | tokens.push_back(token); |
jackcassa1967 | 3:24ab1c8fefef | 122 | } |
jackcassa1967 | 3:24ab1c8fefef | 123 | |
jackcassa1967 | 3:24ab1c8fefef | 124 | return tokens; |
jackcassa1967 | 3:24ab1c8fefef | 125 | } |