Maxim Integrated / Mbed OS MAXREFDES155#

Dependencies:   MaximInterface

Committer:
IanBenzMaxim
Date:
Fri Feb 24 11:23:12 2017 -0600
Revision:
0:33d4e66780c0
Initial commit.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
IanBenzMaxim 0:33d4e66780c0 1 // Tencent is pleased to support the open source community by making RapidJSON available.
IanBenzMaxim 0:33d4e66780c0 2 //
IanBenzMaxim 0:33d4e66780c0 3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
IanBenzMaxim 0:33d4e66780c0 4 //
IanBenzMaxim 0:33d4e66780c0 5 // Licensed under the MIT License (the "License"); you may not use this file except
IanBenzMaxim 0:33d4e66780c0 6 // in compliance with the License. You may obtain a copy of the License at
IanBenzMaxim 0:33d4e66780c0 7 //
IanBenzMaxim 0:33d4e66780c0 8 // http://opensource.org/licenses/MIT
IanBenzMaxim 0:33d4e66780c0 9 //
IanBenzMaxim 0:33d4e66780c0 10 // Unless required by applicable law or agreed to in writing, software distributed
IanBenzMaxim 0:33d4e66780c0 11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
IanBenzMaxim 0:33d4e66780c0 12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
IanBenzMaxim 0:33d4e66780c0 13 // specific language governing permissions and limitations under the License.
IanBenzMaxim 0:33d4e66780c0 14
IanBenzMaxim 0:33d4e66780c0 15 #ifndef RAPIDJSON_INTERNAL_REGEX_H_
IanBenzMaxim 0:33d4e66780c0 16 #define RAPIDJSON_INTERNAL_REGEX_H_
IanBenzMaxim 0:33d4e66780c0 17
IanBenzMaxim 0:33d4e66780c0 18 #include "../allocators.h"
IanBenzMaxim 0:33d4e66780c0 19 #include "../stream.h"
IanBenzMaxim 0:33d4e66780c0 20 #include "stack.h"
IanBenzMaxim 0:33d4e66780c0 21
IanBenzMaxim 0:33d4e66780c0 22 #ifdef __clang__
IanBenzMaxim 0:33d4e66780c0 23 RAPIDJSON_DIAG_PUSH
IanBenzMaxim 0:33d4e66780c0 24 RAPIDJSON_DIAG_OFF(padded)
IanBenzMaxim 0:33d4e66780c0 25 RAPIDJSON_DIAG_OFF(switch-enum)
IanBenzMaxim 0:33d4e66780c0 26 RAPIDJSON_DIAG_OFF(implicit-fallthrough)
IanBenzMaxim 0:33d4e66780c0 27 #endif
IanBenzMaxim 0:33d4e66780c0 28
IanBenzMaxim 0:33d4e66780c0 29 #ifdef __GNUC__
IanBenzMaxim 0:33d4e66780c0 30 RAPIDJSON_DIAG_PUSH
IanBenzMaxim 0:33d4e66780c0 31 RAPIDJSON_DIAG_OFF(effc++)
IanBenzMaxim 0:33d4e66780c0 32 #endif
IanBenzMaxim 0:33d4e66780c0 33
IanBenzMaxim 0:33d4e66780c0 34 #ifdef _MSC_VER
IanBenzMaxim 0:33d4e66780c0 35 RAPIDJSON_DIAG_PUSH
IanBenzMaxim 0:33d4e66780c0 36 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
IanBenzMaxim 0:33d4e66780c0 37 #endif
IanBenzMaxim 0:33d4e66780c0 38
IanBenzMaxim 0:33d4e66780c0 39 #ifndef RAPIDJSON_REGEX_VERBOSE
IanBenzMaxim 0:33d4e66780c0 40 #define RAPIDJSON_REGEX_VERBOSE 0
IanBenzMaxim 0:33d4e66780c0 41 #endif
IanBenzMaxim 0:33d4e66780c0 42
IanBenzMaxim 0:33d4e66780c0 43 RAPIDJSON_NAMESPACE_BEGIN
IanBenzMaxim 0:33d4e66780c0 44 namespace internal {
IanBenzMaxim 0:33d4e66780c0 45
IanBenzMaxim 0:33d4e66780c0 46 ///////////////////////////////////////////////////////////////////////////////
IanBenzMaxim 0:33d4e66780c0 47 // DecodedStream
IanBenzMaxim 0:33d4e66780c0 48
IanBenzMaxim 0:33d4e66780c0 49 template <typename SourceStream, typename Encoding>
IanBenzMaxim 0:33d4e66780c0 50 class DecodedStream {
IanBenzMaxim 0:33d4e66780c0 51 public:
IanBenzMaxim 0:33d4e66780c0 52 DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
IanBenzMaxim 0:33d4e66780c0 53 unsigned Peek() { return codepoint_; }
IanBenzMaxim 0:33d4e66780c0 54 unsigned Take() {
IanBenzMaxim 0:33d4e66780c0 55 unsigned c = codepoint_;
IanBenzMaxim 0:33d4e66780c0 56 if (c) // No further decoding when '\0'
IanBenzMaxim 0:33d4e66780c0 57 Decode();
IanBenzMaxim 0:33d4e66780c0 58 return c;
IanBenzMaxim 0:33d4e66780c0 59 }
IanBenzMaxim 0:33d4e66780c0 60
IanBenzMaxim 0:33d4e66780c0 61 private:
IanBenzMaxim 0:33d4e66780c0 62 void Decode() {
IanBenzMaxim 0:33d4e66780c0 63 if (!Encoding::Decode(ss_, &codepoint_))
IanBenzMaxim 0:33d4e66780c0 64 codepoint_ = 0;
IanBenzMaxim 0:33d4e66780c0 65 }
IanBenzMaxim 0:33d4e66780c0 66
IanBenzMaxim 0:33d4e66780c0 67 SourceStream& ss_;
IanBenzMaxim 0:33d4e66780c0 68 unsigned codepoint_;
IanBenzMaxim 0:33d4e66780c0 69 };
IanBenzMaxim 0:33d4e66780c0 70
IanBenzMaxim 0:33d4e66780c0 71 ///////////////////////////////////////////////////////////////////////////////
IanBenzMaxim 0:33d4e66780c0 72 // GenericRegex
IanBenzMaxim 0:33d4e66780c0 73
IanBenzMaxim 0:33d4e66780c0 74 static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
IanBenzMaxim 0:33d4e66780c0 75 static const SizeType kRegexInvalidRange = ~SizeType(0);
IanBenzMaxim 0:33d4e66780c0 76
IanBenzMaxim 0:33d4e66780c0 77 template <typename Encoding, typename Allocator>
IanBenzMaxim 0:33d4e66780c0 78 class GenericRegexSearch;
IanBenzMaxim 0:33d4e66780c0 79
IanBenzMaxim 0:33d4e66780c0 80 //! Regular expression engine with subset of ECMAscript grammar.
IanBenzMaxim 0:33d4e66780c0 81 /*!
IanBenzMaxim 0:33d4e66780c0 82 Supported regular expression syntax:
IanBenzMaxim 0:33d4e66780c0 83 - \c ab Concatenation
IanBenzMaxim 0:33d4e66780c0 84 - \c a|b Alternation
IanBenzMaxim 0:33d4e66780c0 85 - \c a? Zero or one
IanBenzMaxim 0:33d4e66780c0 86 - \c a* Zero or more
IanBenzMaxim 0:33d4e66780c0 87 - \c a+ One or more
IanBenzMaxim 0:33d4e66780c0 88 - \c a{3} Exactly 3 times
IanBenzMaxim 0:33d4e66780c0 89 - \c a{3,} At least 3 times
IanBenzMaxim 0:33d4e66780c0 90 - \c a{3,5} 3 to 5 times
IanBenzMaxim 0:33d4e66780c0 91 - \c (ab) Grouping
IanBenzMaxim 0:33d4e66780c0 92 - \c ^a At the beginning
IanBenzMaxim 0:33d4e66780c0 93 - \c a$ At the end
IanBenzMaxim 0:33d4e66780c0 94 - \c . Any character
IanBenzMaxim 0:33d4e66780c0 95 - \c [abc] Character classes
IanBenzMaxim 0:33d4e66780c0 96 - \c [a-c] Character class range
IanBenzMaxim 0:33d4e66780c0 97 - \c [a-z0-9_] Character class combination
IanBenzMaxim 0:33d4e66780c0 98 - \c [^abc] Negated character classes
IanBenzMaxim 0:33d4e66780c0 99 - \c [^a-c] Negated character class range
IanBenzMaxim 0:33d4e66780c0 100 - \c [\b] Backspace (U+0008)
IanBenzMaxim 0:33d4e66780c0 101 - \c \\| \\\\ ... Escape characters
IanBenzMaxim 0:33d4e66780c0 102 - \c \\f Form feed (U+000C)
IanBenzMaxim 0:33d4e66780c0 103 - \c \\n Line feed (U+000A)
IanBenzMaxim 0:33d4e66780c0 104 - \c \\r Carriage return (U+000D)
IanBenzMaxim 0:33d4e66780c0 105 - \c \\t Tab (U+0009)
IanBenzMaxim 0:33d4e66780c0 106 - \c \\v Vertical tab (U+000B)
IanBenzMaxim 0:33d4e66780c0 107
IanBenzMaxim 0:33d4e66780c0 108 \note This is a Thompson NFA engine, implemented with reference to
IanBenzMaxim 0:33d4e66780c0 109 Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).",
IanBenzMaxim 0:33d4e66780c0 110 https://swtch.com/~rsc/regexp/regexp1.html
IanBenzMaxim 0:33d4e66780c0 111 */
IanBenzMaxim 0:33d4e66780c0 112 template <typename Encoding, typename Allocator = CrtAllocator>
IanBenzMaxim 0:33d4e66780c0 113 class GenericRegex {
IanBenzMaxim 0:33d4e66780c0 114 public:
IanBenzMaxim 0:33d4e66780c0 115 typedef Encoding EncodingType;
IanBenzMaxim 0:33d4e66780c0 116 typedef typename Encoding::Ch Ch;
IanBenzMaxim 0:33d4e66780c0 117 template <typename, typename> friend class GenericRegexSearch;
IanBenzMaxim 0:33d4e66780c0 118
IanBenzMaxim 0:33d4e66780c0 119 GenericRegex(const Ch* source, Allocator* allocator = 0) :
IanBenzMaxim 0:33d4e66780c0 120 states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
IanBenzMaxim 0:33d4e66780c0 121 anchorBegin_(), anchorEnd_()
IanBenzMaxim 0:33d4e66780c0 122 {
IanBenzMaxim 0:33d4e66780c0 123 GenericStringStream<Encoding> ss(source);
IanBenzMaxim 0:33d4e66780c0 124 DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
IanBenzMaxim 0:33d4e66780c0 125 Parse(ds);
IanBenzMaxim 0:33d4e66780c0 126 }
IanBenzMaxim 0:33d4e66780c0 127
IanBenzMaxim 0:33d4e66780c0 128 ~GenericRegex() {}
IanBenzMaxim 0:33d4e66780c0 129
IanBenzMaxim 0:33d4e66780c0 130 bool IsValid() const {
IanBenzMaxim 0:33d4e66780c0 131 return root_ != kRegexInvalidState;
IanBenzMaxim 0:33d4e66780c0 132 }
IanBenzMaxim 0:33d4e66780c0 133
IanBenzMaxim 0:33d4e66780c0 134 private:
IanBenzMaxim 0:33d4e66780c0 135 enum Operator {
IanBenzMaxim 0:33d4e66780c0 136 kZeroOrOne,
IanBenzMaxim 0:33d4e66780c0 137 kZeroOrMore,
IanBenzMaxim 0:33d4e66780c0 138 kOneOrMore,
IanBenzMaxim 0:33d4e66780c0 139 kConcatenation,
IanBenzMaxim 0:33d4e66780c0 140 kAlternation,
IanBenzMaxim 0:33d4e66780c0 141 kLeftParenthesis
IanBenzMaxim 0:33d4e66780c0 142 };
IanBenzMaxim 0:33d4e66780c0 143
IanBenzMaxim 0:33d4e66780c0 144 static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
IanBenzMaxim 0:33d4e66780c0 145 static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
IanBenzMaxim 0:33d4e66780c0 146 static const unsigned kRangeNegationFlag = 0x80000000;
IanBenzMaxim 0:33d4e66780c0 147
IanBenzMaxim 0:33d4e66780c0 148 struct Range {
IanBenzMaxim 0:33d4e66780c0 149 unsigned start; //
IanBenzMaxim 0:33d4e66780c0 150 unsigned end;
IanBenzMaxim 0:33d4e66780c0 151 SizeType next;
IanBenzMaxim 0:33d4e66780c0 152 };
IanBenzMaxim 0:33d4e66780c0 153
IanBenzMaxim 0:33d4e66780c0 154 struct State {
IanBenzMaxim 0:33d4e66780c0 155 SizeType out; //!< Equals to kInvalid for matching state
IanBenzMaxim 0:33d4e66780c0 156 SizeType out1; //!< Equals to non-kInvalid for split
IanBenzMaxim 0:33d4e66780c0 157 SizeType rangeStart;
IanBenzMaxim 0:33d4e66780c0 158 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 159 };
IanBenzMaxim 0:33d4e66780c0 160
IanBenzMaxim 0:33d4e66780c0 161 struct Frag {
IanBenzMaxim 0:33d4e66780c0 162 Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
IanBenzMaxim 0:33d4e66780c0 163 SizeType start;
IanBenzMaxim 0:33d4e66780c0 164 SizeType out; //!< link-list of all output states
IanBenzMaxim 0:33d4e66780c0 165 SizeType minIndex;
IanBenzMaxim 0:33d4e66780c0 166 };
IanBenzMaxim 0:33d4e66780c0 167
IanBenzMaxim 0:33d4e66780c0 168 State& GetState(SizeType index) {
IanBenzMaxim 0:33d4e66780c0 169 RAPIDJSON_ASSERT(index < stateCount_);
IanBenzMaxim 0:33d4e66780c0 170 return states_.template Bottom<State>()[index];
IanBenzMaxim 0:33d4e66780c0 171 }
IanBenzMaxim 0:33d4e66780c0 172
IanBenzMaxim 0:33d4e66780c0 173 const State& GetState(SizeType index) const {
IanBenzMaxim 0:33d4e66780c0 174 RAPIDJSON_ASSERT(index < stateCount_);
IanBenzMaxim 0:33d4e66780c0 175 return states_.template Bottom<State>()[index];
IanBenzMaxim 0:33d4e66780c0 176 }
IanBenzMaxim 0:33d4e66780c0 177
IanBenzMaxim 0:33d4e66780c0 178 Range& GetRange(SizeType index) {
IanBenzMaxim 0:33d4e66780c0 179 RAPIDJSON_ASSERT(index < rangeCount_);
IanBenzMaxim 0:33d4e66780c0 180 return ranges_.template Bottom<Range>()[index];
IanBenzMaxim 0:33d4e66780c0 181 }
IanBenzMaxim 0:33d4e66780c0 182
IanBenzMaxim 0:33d4e66780c0 183 const Range& GetRange(SizeType index) const {
IanBenzMaxim 0:33d4e66780c0 184 RAPIDJSON_ASSERT(index < rangeCount_);
IanBenzMaxim 0:33d4e66780c0 185 return ranges_.template Bottom<Range>()[index];
IanBenzMaxim 0:33d4e66780c0 186 }
IanBenzMaxim 0:33d4e66780c0 187
IanBenzMaxim 0:33d4e66780c0 188 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 189 void Parse(DecodedStream<InputStream, Encoding>& ds) {
IanBenzMaxim 0:33d4e66780c0 190 Allocator allocator;
IanBenzMaxim 0:33d4e66780c0 191 Stack<Allocator> operandStack(&allocator, 256); // Frag
IanBenzMaxim 0:33d4e66780c0 192 Stack<Allocator> operatorStack(&allocator, 256); // Operator
IanBenzMaxim 0:33d4e66780c0 193 Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis)
IanBenzMaxim 0:33d4e66780c0 194
IanBenzMaxim 0:33d4e66780c0 195 *atomCountStack.template Push<unsigned>() = 0;
IanBenzMaxim 0:33d4e66780c0 196
IanBenzMaxim 0:33d4e66780c0 197 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 198 while (ds.Peek() != 0) {
IanBenzMaxim 0:33d4e66780c0 199 switch (codepoint = ds.Take()) {
IanBenzMaxim 0:33d4e66780c0 200 case '^':
IanBenzMaxim 0:33d4e66780c0 201 anchorBegin_ = true;
IanBenzMaxim 0:33d4e66780c0 202 break;
IanBenzMaxim 0:33d4e66780c0 203
IanBenzMaxim 0:33d4e66780c0 204 case '$':
IanBenzMaxim 0:33d4e66780c0 205 anchorEnd_ = true;
IanBenzMaxim 0:33d4e66780c0 206 break;
IanBenzMaxim 0:33d4e66780c0 207
IanBenzMaxim 0:33d4e66780c0 208 case '|':
IanBenzMaxim 0:33d4e66780c0 209 while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
IanBenzMaxim 0:33d4e66780c0 210 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
IanBenzMaxim 0:33d4e66780c0 211 return;
IanBenzMaxim 0:33d4e66780c0 212 *operatorStack.template Push<Operator>() = kAlternation;
IanBenzMaxim 0:33d4e66780c0 213 *atomCountStack.template Top<unsigned>() = 0;
IanBenzMaxim 0:33d4e66780c0 214 break;
IanBenzMaxim 0:33d4e66780c0 215
IanBenzMaxim 0:33d4e66780c0 216 case '(':
IanBenzMaxim 0:33d4e66780c0 217 *operatorStack.template Push<Operator>() = kLeftParenthesis;
IanBenzMaxim 0:33d4e66780c0 218 *atomCountStack.template Push<unsigned>() = 0;
IanBenzMaxim 0:33d4e66780c0 219 break;
IanBenzMaxim 0:33d4e66780c0 220
IanBenzMaxim 0:33d4e66780c0 221 case ')':
IanBenzMaxim 0:33d4e66780c0 222 while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
IanBenzMaxim 0:33d4e66780c0 223 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
IanBenzMaxim 0:33d4e66780c0 224 return;
IanBenzMaxim 0:33d4e66780c0 225 if (operatorStack.Empty())
IanBenzMaxim 0:33d4e66780c0 226 return;
IanBenzMaxim 0:33d4e66780c0 227 operatorStack.template Pop<Operator>(1);
IanBenzMaxim 0:33d4e66780c0 228 atomCountStack.template Pop<unsigned>(1);
IanBenzMaxim 0:33d4e66780c0 229 ImplicitConcatenation(atomCountStack, operatorStack);
IanBenzMaxim 0:33d4e66780c0 230 break;
IanBenzMaxim 0:33d4e66780c0 231
IanBenzMaxim 0:33d4e66780c0 232 case '?':
IanBenzMaxim 0:33d4e66780c0 233 if (!Eval(operandStack, kZeroOrOne))
IanBenzMaxim 0:33d4e66780c0 234 return;
IanBenzMaxim 0:33d4e66780c0 235 break;
IanBenzMaxim 0:33d4e66780c0 236
IanBenzMaxim 0:33d4e66780c0 237 case '*':
IanBenzMaxim 0:33d4e66780c0 238 if (!Eval(operandStack, kZeroOrMore))
IanBenzMaxim 0:33d4e66780c0 239 return;
IanBenzMaxim 0:33d4e66780c0 240 break;
IanBenzMaxim 0:33d4e66780c0 241
IanBenzMaxim 0:33d4e66780c0 242 case '+':
IanBenzMaxim 0:33d4e66780c0 243 if (!Eval(operandStack, kOneOrMore))
IanBenzMaxim 0:33d4e66780c0 244 return;
IanBenzMaxim 0:33d4e66780c0 245 break;
IanBenzMaxim 0:33d4e66780c0 246
IanBenzMaxim 0:33d4e66780c0 247 case '{':
IanBenzMaxim 0:33d4e66780c0 248 {
IanBenzMaxim 0:33d4e66780c0 249 unsigned n, m;
IanBenzMaxim 0:33d4e66780c0 250 if (!ParseUnsigned(ds, &n))
IanBenzMaxim 0:33d4e66780c0 251 return;
IanBenzMaxim 0:33d4e66780c0 252
IanBenzMaxim 0:33d4e66780c0 253 if (ds.Peek() == ',') {
IanBenzMaxim 0:33d4e66780c0 254 ds.Take();
IanBenzMaxim 0:33d4e66780c0 255 if (ds.Peek() == '}')
IanBenzMaxim 0:33d4e66780c0 256 m = kInfinityQuantifier;
IanBenzMaxim 0:33d4e66780c0 257 else if (!ParseUnsigned(ds, &m) || m < n)
IanBenzMaxim 0:33d4e66780c0 258 return;
IanBenzMaxim 0:33d4e66780c0 259 }
IanBenzMaxim 0:33d4e66780c0 260 else
IanBenzMaxim 0:33d4e66780c0 261 m = n;
IanBenzMaxim 0:33d4e66780c0 262
IanBenzMaxim 0:33d4e66780c0 263 if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
IanBenzMaxim 0:33d4e66780c0 264 return;
IanBenzMaxim 0:33d4e66780c0 265 ds.Take();
IanBenzMaxim 0:33d4e66780c0 266 }
IanBenzMaxim 0:33d4e66780c0 267 break;
IanBenzMaxim 0:33d4e66780c0 268
IanBenzMaxim 0:33d4e66780c0 269 case '.':
IanBenzMaxim 0:33d4e66780c0 270 PushOperand(operandStack, kAnyCharacterClass);
IanBenzMaxim 0:33d4e66780c0 271 ImplicitConcatenation(atomCountStack, operatorStack);
IanBenzMaxim 0:33d4e66780c0 272 break;
IanBenzMaxim 0:33d4e66780c0 273
IanBenzMaxim 0:33d4e66780c0 274 case '[':
IanBenzMaxim 0:33d4e66780c0 275 {
IanBenzMaxim 0:33d4e66780c0 276 SizeType range;
IanBenzMaxim 0:33d4e66780c0 277 if (!ParseRange(ds, &range))
IanBenzMaxim 0:33d4e66780c0 278 return;
IanBenzMaxim 0:33d4e66780c0 279 SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
IanBenzMaxim 0:33d4e66780c0 280 GetState(s).rangeStart = range;
IanBenzMaxim 0:33d4e66780c0 281 *operandStack.template Push<Frag>() = Frag(s, s, s);
IanBenzMaxim 0:33d4e66780c0 282 }
IanBenzMaxim 0:33d4e66780c0 283 ImplicitConcatenation(atomCountStack, operatorStack);
IanBenzMaxim 0:33d4e66780c0 284 break;
IanBenzMaxim 0:33d4e66780c0 285
IanBenzMaxim 0:33d4e66780c0 286 case '\\': // Escape character
IanBenzMaxim 0:33d4e66780c0 287 if (!CharacterEscape(ds, &codepoint))
IanBenzMaxim 0:33d4e66780c0 288 return; // Unsupported escape character
IanBenzMaxim 0:33d4e66780c0 289 // fall through to default
IanBenzMaxim 0:33d4e66780c0 290
IanBenzMaxim 0:33d4e66780c0 291 default: // Pattern character
IanBenzMaxim 0:33d4e66780c0 292 PushOperand(operandStack, codepoint);
IanBenzMaxim 0:33d4e66780c0 293 ImplicitConcatenation(atomCountStack, operatorStack);
IanBenzMaxim 0:33d4e66780c0 294 }
IanBenzMaxim 0:33d4e66780c0 295 }
IanBenzMaxim 0:33d4e66780c0 296
IanBenzMaxim 0:33d4e66780c0 297 while (!operatorStack.Empty())
IanBenzMaxim 0:33d4e66780c0 298 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
IanBenzMaxim 0:33d4e66780c0 299 return;
IanBenzMaxim 0:33d4e66780c0 300
IanBenzMaxim 0:33d4e66780c0 301 // Link the operand to matching state.
IanBenzMaxim 0:33d4e66780c0 302 if (operandStack.GetSize() == sizeof(Frag)) {
IanBenzMaxim 0:33d4e66780c0 303 Frag* e = operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 304 Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
IanBenzMaxim 0:33d4e66780c0 305 root_ = e->start;
IanBenzMaxim 0:33d4e66780c0 306
IanBenzMaxim 0:33d4e66780c0 307 #if RAPIDJSON_REGEX_VERBOSE
IanBenzMaxim 0:33d4e66780c0 308 printf("root: %d\n", root_);
IanBenzMaxim 0:33d4e66780c0 309 for (SizeType i = 0; i < stateCount_ ; i++) {
IanBenzMaxim 0:33d4e66780c0 310 State& s = GetState(i);
IanBenzMaxim 0:33d4e66780c0 311 printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
IanBenzMaxim 0:33d4e66780c0 312 }
IanBenzMaxim 0:33d4e66780c0 313 printf("\n");
IanBenzMaxim 0:33d4e66780c0 314 #endif
IanBenzMaxim 0:33d4e66780c0 315 }
IanBenzMaxim 0:33d4e66780c0 316 }
IanBenzMaxim 0:33d4e66780c0 317
IanBenzMaxim 0:33d4e66780c0 318 SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
IanBenzMaxim 0:33d4e66780c0 319 State* s = states_.template Push<State>();
IanBenzMaxim 0:33d4e66780c0 320 s->out = out;
IanBenzMaxim 0:33d4e66780c0 321 s->out1 = out1;
IanBenzMaxim 0:33d4e66780c0 322 s->codepoint = codepoint;
IanBenzMaxim 0:33d4e66780c0 323 s->rangeStart = kRegexInvalidRange;
IanBenzMaxim 0:33d4e66780c0 324 return stateCount_++;
IanBenzMaxim 0:33d4e66780c0 325 }
IanBenzMaxim 0:33d4e66780c0 326
IanBenzMaxim 0:33d4e66780c0 327 void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
IanBenzMaxim 0:33d4e66780c0 328 SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
IanBenzMaxim 0:33d4e66780c0 329 *operandStack.template Push<Frag>() = Frag(s, s, s);
IanBenzMaxim 0:33d4e66780c0 330 }
IanBenzMaxim 0:33d4e66780c0 331
IanBenzMaxim 0:33d4e66780c0 332 void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
IanBenzMaxim 0:33d4e66780c0 333 if (*atomCountStack.template Top<unsigned>())
IanBenzMaxim 0:33d4e66780c0 334 *operatorStack.template Push<Operator>() = kConcatenation;
IanBenzMaxim 0:33d4e66780c0 335 (*atomCountStack.template Top<unsigned>())++;
IanBenzMaxim 0:33d4e66780c0 336 }
IanBenzMaxim 0:33d4e66780c0 337
IanBenzMaxim 0:33d4e66780c0 338 SizeType Append(SizeType l1, SizeType l2) {
IanBenzMaxim 0:33d4e66780c0 339 SizeType old = l1;
IanBenzMaxim 0:33d4e66780c0 340 while (GetState(l1).out != kRegexInvalidState)
IanBenzMaxim 0:33d4e66780c0 341 l1 = GetState(l1).out;
IanBenzMaxim 0:33d4e66780c0 342 GetState(l1).out = l2;
IanBenzMaxim 0:33d4e66780c0 343 return old;
IanBenzMaxim 0:33d4e66780c0 344 }
IanBenzMaxim 0:33d4e66780c0 345
IanBenzMaxim 0:33d4e66780c0 346 void Patch(SizeType l, SizeType s) {
IanBenzMaxim 0:33d4e66780c0 347 for (SizeType next; l != kRegexInvalidState; l = next) {
IanBenzMaxim 0:33d4e66780c0 348 next = GetState(l).out;
IanBenzMaxim 0:33d4e66780c0 349 GetState(l).out = s;
IanBenzMaxim 0:33d4e66780c0 350 }
IanBenzMaxim 0:33d4e66780c0 351 }
IanBenzMaxim 0:33d4e66780c0 352
IanBenzMaxim 0:33d4e66780c0 353 bool Eval(Stack<Allocator>& operandStack, Operator op) {
IanBenzMaxim 0:33d4e66780c0 354 switch (op) {
IanBenzMaxim 0:33d4e66780c0 355 case kConcatenation:
IanBenzMaxim 0:33d4e66780c0 356 RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
IanBenzMaxim 0:33d4e66780c0 357 {
IanBenzMaxim 0:33d4e66780c0 358 Frag e2 = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 359 Frag e1 = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 360 Patch(e1.out, e2.start);
IanBenzMaxim 0:33d4e66780c0 361 *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
IanBenzMaxim 0:33d4e66780c0 362 }
IanBenzMaxim 0:33d4e66780c0 363 return true;
IanBenzMaxim 0:33d4e66780c0 364
IanBenzMaxim 0:33d4e66780c0 365 case kAlternation:
IanBenzMaxim 0:33d4e66780c0 366 if (operandStack.GetSize() >= sizeof(Frag) * 2) {
IanBenzMaxim 0:33d4e66780c0 367 Frag e2 = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 368 Frag e1 = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 369 SizeType s = NewState(e1.start, e2.start, 0);
IanBenzMaxim 0:33d4e66780c0 370 *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
IanBenzMaxim 0:33d4e66780c0 371 return true;
IanBenzMaxim 0:33d4e66780c0 372 }
IanBenzMaxim 0:33d4e66780c0 373 return false;
IanBenzMaxim 0:33d4e66780c0 374
IanBenzMaxim 0:33d4e66780c0 375 case kZeroOrOne:
IanBenzMaxim 0:33d4e66780c0 376 if (operandStack.GetSize() >= sizeof(Frag)) {
IanBenzMaxim 0:33d4e66780c0 377 Frag e = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 378 SizeType s = NewState(kRegexInvalidState, e.start, 0);
IanBenzMaxim 0:33d4e66780c0 379 *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
IanBenzMaxim 0:33d4e66780c0 380 return true;
IanBenzMaxim 0:33d4e66780c0 381 }
IanBenzMaxim 0:33d4e66780c0 382 return false;
IanBenzMaxim 0:33d4e66780c0 383
IanBenzMaxim 0:33d4e66780c0 384 case kZeroOrMore:
IanBenzMaxim 0:33d4e66780c0 385 if (operandStack.GetSize() >= sizeof(Frag)) {
IanBenzMaxim 0:33d4e66780c0 386 Frag e = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 387 SizeType s = NewState(kRegexInvalidState, e.start, 0);
IanBenzMaxim 0:33d4e66780c0 388 Patch(e.out, s);
IanBenzMaxim 0:33d4e66780c0 389 *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
IanBenzMaxim 0:33d4e66780c0 390 return true;
IanBenzMaxim 0:33d4e66780c0 391 }
IanBenzMaxim 0:33d4e66780c0 392 return false;
IanBenzMaxim 0:33d4e66780c0 393
IanBenzMaxim 0:33d4e66780c0 394 default:
IanBenzMaxim 0:33d4e66780c0 395 RAPIDJSON_ASSERT(op == kOneOrMore);
IanBenzMaxim 0:33d4e66780c0 396 if (operandStack.GetSize() >= sizeof(Frag)) {
IanBenzMaxim 0:33d4e66780c0 397 Frag e = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 398 SizeType s = NewState(kRegexInvalidState, e.start, 0);
IanBenzMaxim 0:33d4e66780c0 399 Patch(e.out, s);
IanBenzMaxim 0:33d4e66780c0 400 *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
IanBenzMaxim 0:33d4e66780c0 401 return true;
IanBenzMaxim 0:33d4e66780c0 402 }
IanBenzMaxim 0:33d4e66780c0 403 return false;
IanBenzMaxim 0:33d4e66780c0 404 }
IanBenzMaxim 0:33d4e66780c0 405 }
IanBenzMaxim 0:33d4e66780c0 406
IanBenzMaxim 0:33d4e66780c0 407 bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
IanBenzMaxim 0:33d4e66780c0 408 RAPIDJSON_ASSERT(n <= m);
IanBenzMaxim 0:33d4e66780c0 409 RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
IanBenzMaxim 0:33d4e66780c0 410
IanBenzMaxim 0:33d4e66780c0 411 if (n == 0) {
IanBenzMaxim 0:33d4e66780c0 412 if (m == 0) // a{0} not support
IanBenzMaxim 0:33d4e66780c0 413 return false;
IanBenzMaxim 0:33d4e66780c0 414 else if (m == kInfinityQuantifier)
IanBenzMaxim 0:33d4e66780c0 415 Eval(operandStack, kZeroOrMore); // a{0,} -> a*
IanBenzMaxim 0:33d4e66780c0 416 else {
IanBenzMaxim 0:33d4e66780c0 417 Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
IanBenzMaxim 0:33d4e66780c0 418 for (unsigned i = 0; i < m - 1; i++)
IanBenzMaxim 0:33d4e66780c0 419 CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
IanBenzMaxim 0:33d4e66780c0 420 for (unsigned i = 0; i < m - 1; i++)
IanBenzMaxim 0:33d4e66780c0 421 Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
IanBenzMaxim 0:33d4e66780c0 422 }
IanBenzMaxim 0:33d4e66780c0 423 return true;
IanBenzMaxim 0:33d4e66780c0 424 }
IanBenzMaxim 0:33d4e66780c0 425
IanBenzMaxim 0:33d4e66780c0 426 for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
IanBenzMaxim 0:33d4e66780c0 427 CloneTopOperand(operandStack);
IanBenzMaxim 0:33d4e66780c0 428
IanBenzMaxim 0:33d4e66780c0 429 if (m == kInfinityQuantifier)
IanBenzMaxim 0:33d4e66780c0 430 Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
IanBenzMaxim 0:33d4e66780c0 431 else if (m > n) {
IanBenzMaxim 0:33d4e66780c0 432 CloneTopOperand(operandStack); // a{3,5} -> a a a a
IanBenzMaxim 0:33d4e66780c0 433 Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
IanBenzMaxim 0:33d4e66780c0 434 for (unsigned i = n; i < m - 1; i++)
IanBenzMaxim 0:33d4e66780c0 435 CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
IanBenzMaxim 0:33d4e66780c0 436 for (unsigned i = n; i < m; i++)
IanBenzMaxim 0:33d4e66780c0 437 Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
IanBenzMaxim 0:33d4e66780c0 438 }
IanBenzMaxim 0:33d4e66780c0 439
IanBenzMaxim 0:33d4e66780c0 440 for (unsigned i = 0; i < n - 1; i++)
IanBenzMaxim 0:33d4e66780c0 441 Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
IanBenzMaxim 0:33d4e66780c0 442
IanBenzMaxim 0:33d4e66780c0 443 return true;
IanBenzMaxim 0:33d4e66780c0 444 }
IanBenzMaxim 0:33d4e66780c0 445
IanBenzMaxim 0:33d4e66780c0 446 static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
IanBenzMaxim 0:33d4e66780c0 447
IanBenzMaxim 0:33d4e66780c0 448 void CloneTopOperand(Stack<Allocator>& operandStack) {
IanBenzMaxim 0:33d4e66780c0 449 const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
IanBenzMaxim 0:33d4e66780c0 450 SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
IanBenzMaxim 0:33d4e66780c0 451 State* s = states_.template Push<State>(count);
IanBenzMaxim 0:33d4e66780c0 452 memcpy(s, &GetState(src.minIndex), count * sizeof(State));
IanBenzMaxim 0:33d4e66780c0 453 for (SizeType j = 0; j < count; j++) {
IanBenzMaxim 0:33d4e66780c0 454 if (s[j].out != kRegexInvalidState)
IanBenzMaxim 0:33d4e66780c0 455 s[j].out += count;
IanBenzMaxim 0:33d4e66780c0 456 if (s[j].out1 != kRegexInvalidState)
IanBenzMaxim 0:33d4e66780c0 457 s[j].out1 += count;
IanBenzMaxim 0:33d4e66780c0 458 }
IanBenzMaxim 0:33d4e66780c0 459 *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
IanBenzMaxim 0:33d4e66780c0 460 stateCount_ += count;
IanBenzMaxim 0:33d4e66780c0 461 }
IanBenzMaxim 0:33d4e66780c0 462
IanBenzMaxim 0:33d4e66780c0 463 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 464 bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
IanBenzMaxim 0:33d4e66780c0 465 unsigned r = 0;
IanBenzMaxim 0:33d4e66780c0 466 if (ds.Peek() < '0' || ds.Peek() > '9')
IanBenzMaxim 0:33d4e66780c0 467 return false;
IanBenzMaxim 0:33d4e66780c0 468 while (ds.Peek() >= '0' && ds.Peek() <= '9') {
IanBenzMaxim 0:33d4e66780c0 469 if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
IanBenzMaxim 0:33d4e66780c0 470 return false; // overflow
IanBenzMaxim 0:33d4e66780c0 471 r = r * 10 + (ds.Take() - '0');
IanBenzMaxim 0:33d4e66780c0 472 }
IanBenzMaxim 0:33d4e66780c0 473 *u = r;
IanBenzMaxim 0:33d4e66780c0 474 return true;
IanBenzMaxim 0:33d4e66780c0 475 }
IanBenzMaxim 0:33d4e66780c0 476
IanBenzMaxim 0:33d4e66780c0 477 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 478 bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
IanBenzMaxim 0:33d4e66780c0 479 bool isBegin = true;
IanBenzMaxim 0:33d4e66780c0 480 bool negate = false;
IanBenzMaxim 0:33d4e66780c0 481 int step = 0;
IanBenzMaxim 0:33d4e66780c0 482 SizeType start = kRegexInvalidRange;
IanBenzMaxim 0:33d4e66780c0 483 SizeType current = kRegexInvalidRange;
IanBenzMaxim 0:33d4e66780c0 484 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 485 while ((codepoint = ds.Take()) != 0) {
IanBenzMaxim 0:33d4e66780c0 486 if (isBegin) {
IanBenzMaxim 0:33d4e66780c0 487 isBegin = false;
IanBenzMaxim 0:33d4e66780c0 488 if (codepoint == '^') {
IanBenzMaxim 0:33d4e66780c0 489 negate = true;
IanBenzMaxim 0:33d4e66780c0 490 continue;
IanBenzMaxim 0:33d4e66780c0 491 }
IanBenzMaxim 0:33d4e66780c0 492 }
IanBenzMaxim 0:33d4e66780c0 493
IanBenzMaxim 0:33d4e66780c0 494 switch (codepoint) {
IanBenzMaxim 0:33d4e66780c0 495 case ']':
IanBenzMaxim 0:33d4e66780c0 496 if (start == kRegexInvalidRange)
IanBenzMaxim 0:33d4e66780c0 497 return false; // Error: nothing inside []
IanBenzMaxim 0:33d4e66780c0 498 if (step == 2) { // Add trailing '-'
IanBenzMaxim 0:33d4e66780c0 499 SizeType r = NewRange('-');
IanBenzMaxim 0:33d4e66780c0 500 RAPIDJSON_ASSERT(current != kRegexInvalidRange);
IanBenzMaxim 0:33d4e66780c0 501 GetRange(current).next = r;
IanBenzMaxim 0:33d4e66780c0 502 }
IanBenzMaxim 0:33d4e66780c0 503 if (negate)
IanBenzMaxim 0:33d4e66780c0 504 GetRange(start).start |= kRangeNegationFlag;
IanBenzMaxim 0:33d4e66780c0 505 *range = start;
IanBenzMaxim 0:33d4e66780c0 506 return true;
IanBenzMaxim 0:33d4e66780c0 507
IanBenzMaxim 0:33d4e66780c0 508 case '\\':
IanBenzMaxim 0:33d4e66780c0 509 if (ds.Peek() == 'b') {
IanBenzMaxim 0:33d4e66780c0 510 ds.Take();
IanBenzMaxim 0:33d4e66780c0 511 codepoint = 0x0008; // Escape backspace character
IanBenzMaxim 0:33d4e66780c0 512 }
IanBenzMaxim 0:33d4e66780c0 513 else if (!CharacterEscape(ds, &codepoint))
IanBenzMaxim 0:33d4e66780c0 514 return false;
IanBenzMaxim 0:33d4e66780c0 515 // fall through to default
IanBenzMaxim 0:33d4e66780c0 516
IanBenzMaxim 0:33d4e66780c0 517 default:
IanBenzMaxim 0:33d4e66780c0 518 switch (step) {
IanBenzMaxim 0:33d4e66780c0 519 case 1:
IanBenzMaxim 0:33d4e66780c0 520 if (codepoint == '-') {
IanBenzMaxim 0:33d4e66780c0 521 step++;
IanBenzMaxim 0:33d4e66780c0 522 break;
IanBenzMaxim 0:33d4e66780c0 523 }
IanBenzMaxim 0:33d4e66780c0 524 // fall through to step 0 for other characters
IanBenzMaxim 0:33d4e66780c0 525
IanBenzMaxim 0:33d4e66780c0 526 case 0:
IanBenzMaxim 0:33d4e66780c0 527 {
IanBenzMaxim 0:33d4e66780c0 528 SizeType r = NewRange(codepoint);
IanBenzMaxim 0:33d4e66780c0 529 if (current != kRegexInvalidRange)
IanBenzMaxim 0:33d4e66780c0 530 GetRange(current).next = r;
IanBenzMaxim 0:33d4e66780c0 531 if (start == kRegexInvalidRange)
IanBenzMaxim 0:33d4e66780c0 532 start = r;
IanBenzMaxim 0:33d4e66780c0 533 current = r;
IanBenzMaxim 0:33d4e66780c0 534 }
IanBenzMaxim 0:33d4e66780c0 535 step = 1;
IanBenzMaxim 0:33d4e66780c0 536 break;
IanBenzMaxim 0:33d4e66780c0 537
IanBenzMaxim 0:33d4e66780c0 538 default:
IanBenzMaxim 0:33d4e66780c0 539 RAPIDJSON_ASSERT(step == 2);
IanBenzMaxim 0:33d4e66780c0 540 GetRange(current).end = codepoint;
IanBenzMaxim 0:33d4e66780c0 541 step = 0;
IanBenzMaxim 0:33d4e66780c0 542 }
IanBenzMaxim 0:33d4e66780c0 543 }
IanBenzMaxim 0:33d4e66780c0 544 }
IanBenzMaxim 0:33d4e66780c0 545 return false;
IanBenzMaxim 0:33d4e66780c0 546 }
IanBenzMaxim 0:33d4e66780c0 547
IanBenzMaxim 0:33d4e66780c0 548 SizeType NewRange(unsigned codepoint) {
IanBenzMaxim 0:33d4e66780c0 549 Range* r = ranges_.template Push<Range>();
IanBenzMaxim 0:33d4e66780c0 550 r->start = r->end = codepoint;
IanBenzMaxim 0:33d4e66780c0 551 r->next = kRegexInvalidRange;
IanBenzMaxim 0:33d4e66780c0 552 return rangeCount_++;
IanBenzMaxim 0:33d4e66780c0 553 }
IanBenzMaxim 0:33d4e66780c0 554
IanBenzMaxim 0:33d4e66780c0 555 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 556 bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
IanBenzMaxim 0:33d4e66780c0 557 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 558 switch (codepoint = ds.Take()) {
IanBenzMaxim 0:33d4e66780c0 559 case '^':
IanBenzMaxim 0:33d4e66780c0 560 case '$':
IanBenzMaxim 0:33d4e66780c0 561 case '|':
IanBenzMaxim 0:33d4e66780c0 562 case '(':
IanBenzMaxim 0:33d4e66780c0 563 case ')':
IanBenzMaxim 0:33d4e66780c0 564 case '?':
IanBenzMaxim 0:33d4e66780c0 565 case '*':
IanBenzMaxim 0:33d4e66780c0 566 case '+':
IanBenzMaxim 0:33d4e66780c0 567 case '.':
IanBenzMaxim 0:33d4e66780c0 568 case '[':
IanBenzMaxim 0:33d4e66780c0 569 case ']':
IanBenzMaxim 0:33d4e66780c0 570 case '{':
IanBenzMaxim 0:33d4e66780c0 571 case '}':
IanBenzMaxim 0:33d4e66780c0 572 case '\\':
IanBenzMaxim 0:33d4e66780c0 573 *escapedCodepoint = codepoint; return true;
IanBenzMaxim 0:33d4e66780c0 574 case 'f': *escapedCodepoint = 0x000C; return true;
IanBenzMaxim 0:33d4e66780c0 575 case 'n': *escapedCodepoint = 0x000A; return true;
IanBenzMaxim 0:33d4e66780c0 576 case 'r': *escapedCodepoint = 0x000D; return true;
IanBenzMaxim 0:33d4e66780c0 577 case 't': *escapedCodepoint = 0x0009; return true;
IanBenzMaxim 0:33d4e66780c0 578 case 'v': *escapedCodepoint = 0x000B; return true;
IanBenzMaxim 0:33d4e66780c0 579 default:
IanBenzMaxim 0:33d4e66780c0 580 return false; // Unsupported escape character
IanBenzMaxim 0:33d4e66780c0 581 }
IanBenzMaxim 0:33d4e66780c0 582 }
IanBenzMaxim 0:33d4e66780c0 583
IanBenzMaxim 0:33d4e66780c0 584 Stack<Allocator> states_;
IanBenzMaxim 0:33d4e66780c0 585 Stack<Allocator> ranges_;
IanBenzMaxim 0:33d4e66780c0 586 SizeType root_;
IanBenzMaxim 0:33d4e66780c0 587 SizeType stateCount_;
IanBenzMaxim 0:33d4e66780c0 588 SizeType rangeCount_;
IanBenzMaxim 0:33d4e66780c0 589
IanBenzMaxim 0:33d4e66780c0 590 static const unsigned kInfinityQuantifier = ~0u;
IanBenzMaxim 0:33d4e66780c0 591
IanBenzMaxim 0:33d4e66780c0 592 // For SearchWithAnchoring()
IanBenzMaxim 0:33d4e66780c0 593 bool anchorBegin_;
IanBenzMaxim 0:33d4e66780c0 594 bool anchorEnd_;
IanBenzMaxim 0:33d4e66780c0 595 };
IanBenzMaxim 0:33d4e66780c0 596
IanBenzMaxim 0:33d4e66780c0 597 template <typename RegexType, typename Allocator = CrtAllocator>
IanBenzMaxim 0:33d4e66780c0 598 class GenericRegexSearch {
IanBenzMaxim 0:33d4e66780c0 599 public:
IanBenzMaxim 0:33d4e66780c0 600 typedef typename RegexType::EncodingType Encoding;
IanBenzMaxim 0:33d4e66780c0 601 typedef typename Encoding::Ch Ch;
IanBenzMaxim 0:33d4e66780c0 602
IanBenzMaxim 0:33d4e66780c0 603 GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) :
IanBenzMaxim 0:33d4e66780c0 604 regex_(regex), allocator_(allocator), ownAllocator_(0),
IanBenzMaxim 0:33d4e66780c0 605 state0_(allocator, 0), state1_(allocator, 0), stateSet_()
IanBenzMaxim 0:33d4e66780c0 606 {
IanBenzMaxim 0:33d4e66780c0 607 RAPIDJSON_ASSERT(regex_.IsValid());
IanBenzMaxim 0:33d4e66780c0 608 if (!allocator_)
IanBenzMaxim 0:33d4e66780c0 609 ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
IanBenzMaxim 0:33d4e66780c0 610 stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
IanBenzMaxim 0:33d4e66780c0 611 state0_.template Reserve<SizeType>(regex_.stateCount_);
IanBenzMaxim 0:33d4e66780c0 612 state1_.template Reserve<SizeType>(regex_.stateCount_);
IanBenzMaxim 0:33d4e66780c0 613 }
IanBenzMaxim 0:33d4e66780c0 614
IanBenzMaxim 0:33d4e66780c0 615 ~GenericRegexSearch() {
IanBenzMaxim 0:33d4e66780c0 616 Allocator::Free(stateSet_);
IanBenzMaxim 0:33d4e66780c0 617 RAPIDJSON_DELETE(ownAllocator_);
IanBenzMaxim 0:33d4e66780c0 618 }
IanBenzMaxim 0:33d4e66780c0 619
IanBenzMaxim 0:33d4e66780c0 620 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 621 bool Match(InputStream& is) {
IanBenzMaxim 0:33d4e66780c0 622 return SearchWithAnchoring(is, true, true);
IanBenzMaxim 0:33d4e66780c0 623 }
IanBenzMaxim 0:33d4e66780c0 624
IanBenzMaxim 0:33d4e66780c0 625 bool Match(const Ch* s) {
IanBenzMaxim 0:33d4e66780c0 626 GenericStringStream<Encoding> is(s);
IanBenzMaxim 0:33d4e66780c0 627 return Match(is);
IanBenzMaxim 0:33d4e66780c0 628 }
IanBenzMaxim 0:33d4e66780c0 629
IanBenzMaxim 0:33d4e66780c0 630 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 631 bool Search(InputStream& is) {
IanBenzMaxim 0:33d4e66780c0 632 return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
IanBenzMaxim 0:33d4e66780c0 633 }
IanBenzMaxim 0:33d4e66780c0 634
IanBenzMaxim 0:33d4e66780c0 635 bool Search(const Ch* s) {
IanBenzMaxim 0:33d4e66780c0 636 GenericStringStream<Encoding> is(s);
IanBenzMaxim 0:33d4e66780c0 637 return Search(is);
IanBenzMaxim 0:33d4e66780c0 638 }
IanBenzMaxim 0:33d4e66780c0 639
IanBenzMaxim 0:33d4e66780c0 640 private:
IanBenzMaxim 0:33d4e66780c0 641 typedef typename RegexType::State State;
IanBenzMaxim 0:33d4e66780c0 642 typedef typename RegexType::Range Range;
IanBenzMaxim 0:33d4e66780c0 643
IanBenzMaxim 0:33d4e66780c0 644 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 645 bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
IanBenzMaxim 0:33d4e66780c0 646 DecodedStream<InputStream, Encoding> ds(is);
IanBenzMaxim 0:33d4e66780c0 647
IanBenzMaxim 0:33d4e66780c0 648 state0_.Clear();
IanBenzMaxim 0:33d4e66780c0 649 Stack<Allocator> *current = &state0_, *next = &state1_;
IanBenzMaxim 0:33d4e66780c0 650 const size_t stateSetSize = GetStateSetSize();
IanBenzMaxim 0:33d4e66780c0 651 std::memset(stateSet_, 0, stateSetSize);
IanBenzMaxim 0:33d4e66780c0 652
IanBenzMaxim 0:33d4e66780c0 653 bool matched = AddState(*current, regex_.root_);
IanBenzMaxim 0:33d4e66780c0 654 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 655 while (!current->Empty() && (codepoint = ds.Take()) != 0) {
IanBenzMaxim 0:33d4e66780c0 656 std::memset(stateSet_, 0, stateSetSize);
IanBenzMaxim 0:33d4e66780c0 657 next->Clear();
IanBenzMaxim 0:33d4e66780c0 658 matched = false;
IanBenzMaxim 0:33d4e66780c0 659 for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
IanBenzMaxim 0:33d4e66780c0 660 const State& sr = regex_.GetState(*s);
IanBenzMaxim 0:33d4e66780c0 661 if (sr.codepoint == codepoint ||
IanBenzMaxim 0:33d4e66780c0 662 sr.codepoint == RegexType::kAnyCharacterClass ||
IanBenzMaxim 0:33d4e66780c0 663 (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
IanBenzMaxim 0:33d4e66780c0 664 {
IanBenzMaxim 0:33d4e66780c0 665 matched = AddState(*next, sr.out) || matched;
IanBenzMaxim 0:33d4e66780c0 666 if (!anchorEnd && matched)
IanBenzMaxim 0:33d4e66780c0 667 return true;
IanBenzMaxim 0:33d4e66780c0 668 }
IanBenzMaxim 0:33d4e66780c0 669 if (!anchorBegin)
IanBenzMaxim 0:33d4e66780c0 670 AddState(*next, regex_.root_);
IanBenzMaxim 0:33d4e66780c0 671 }
IanBenzMaxim 0:33d4e66780c0 672 internal::Swap(current, next);
IanBenzMaxim 0:33d4e66780c0 673 }
IanBenzMaxim 0:33d4e66780c0 674
IanBenzMaxim 0:33d4e66780c0 675 return matched;
IanBenzMaxim 0:33d4e66780c0 676 }
IanBenzMaxim 0:33d4e66780c0 677
IanBenzMaxim 0:33d4e66780c0 678 size_t GetStateSetSize() const {
IanBenzMaxim 0:33d4e66780c0 679 return (regex_.stateCount_ + 31) / 32 * 4;
IanBenzMaxim 0:33d4e66780c0 680 }
IanBenzMaxim 0:33d4e66780c0 681
IanBenzMaxim 0:33d4e66780c0 682 // Return whether the added states is a match state
IanBenzMaxim 0:33d4e66780c0 683 bool AddState(Stack<Allocator>& l, SizeType index) {
IanBenzMaxim 0:33d4e66780c0 684 RAPIDJSON_ASSERT(index != kRegexInvalidState);
IanBenzMaxim 0:33d4e66780c0 685
IanBenzMaxim 0:33d4e66780c0 686 const State& s = regex_.GetState(index);
IanBenzMaxim 0:33d4e66780c0 687 if (s.out1 != kRegexInvalidState) { // Split
IanBenzMaxim 0:33d4e66780c0 688 bool matched = AddState(l, s.out);
IanBenzMaxim 0:33d4e66780c0 689 return AddState(l, s.out1) || matched;
IanBenzMaxim 0:33d4e66780c0 690 }
IanBenzMaxim 0:33d4e66780c0 691 else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) {
IanBenzMaxim 0:33d4e66780c0 692 stateSet_[index >> 5] |= (1 << (index & 31));
IanBenzMaxim 0:33d4e66780c0 693 *l.template PushUnsafe<SizeType>() = index;
IanBenzMaxim 0:33d4e66780c0 694 }
IanBenzMaxim 0:33d4e66780c0 695 return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
IanBenzMaxim 0:33d4e66780c0 696 }
IanBenzMaxim 0:33d4e66780c0 697
IanBenzMaxim 0:33d4e66780c0 698 bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
IanBenzMaxim 0:33d4e66780c0 699 bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
IanBenzMaxim 0:33d4e66780c0 700 while (rangeIndex != kRegexInvalidRange) {
IanBenzMaxim 0:33d4e66780c0 701 const Range& r = regex_.GetRange(rangeIndex);
IanBenzMaxim 0:33d4e66780c0 702 if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
IanBenzMaxim 0:33d4e66780c0 703 return yes;
IanBenzMaxim 0:33d4e66780c0 704 rangeIndex = r.next;
IanBenzMaxim 0:33d4e66780c0 705 }
IanBenzMaxim 0:33d4e66780c0 706 return !yes;
IanBenzMaxim 0:33d4e66780c0 707 }
IanBenzMaxim 0:33d4e66780c0 708
IanBenzMaxim 0:33d4e66780c0 709 const RegexType& regex_;
IanBenzMaxim 0:33d4e66780c0 710 Allocator* allocator_;
IanBenzMaxim 0:33d4e66780c0 711 Allocator* ownAllocator_;
IanBenzMaxim 0:33d4e66780c0 712 Stack<Allocator> state0_;
IanBenzMaxim 0:33d4e66780c0 713 Stack<Allocator> state1_;
IanBenzMaxim 0:33d4e66780c0 714 uint32_t* stateSet_;
IanBenzMaxim 0:33d4e66780c0 715 };
IanBenzMaxim 0:33d4e66780c0 716
IanBenzMaxim 0:33d4e66780c0 717 typedef GenericRegex<UTF8<> > Regex;
IanBenzMaxim 0:33d4e66780c0 718 typedef GenericRegexSearch<Regex> RegexSearch;
IanBenzMaxim 0:33d4e66780c0 719
IanBenzMaxim 0:33d4e66780c0 720 } // namespace internal
IanBenzMaxim 0:33d4e66780c0 721 RAPIDJSON_NAMESPACE_END
IanBenzMaxim 0:33d4e66780c0 722
IanBenzMaxim 0:33d4e66780c0 723 #ifdef __clang__
IanBenzMaxim 0:33d4e66780c0 724 RAPIDJSON_DIAG_POP
IanBenzMaxim 0:33d4e66780c0 725 #endif
IanBenzMaxim 0:33d4e66780c0 726
IanBenzMaxim 0:33d4e66780c0 727 #ifdef _MSC_VER
IanBenzMaxim 0:33d4e66780c0 728 RAPIDJSON_DIAG_POP
IanBenzMaxim 0:33d4e66780c0 729 #endif
IanBenzMaxim 0:33d4e66780c0 730
IanBenzMaxim 0:33d4e66780c0 731 #endif // RAPIDJSON_INTERNAL_REGEX_H_