DeepCover Embedded Security in IoT: Public-key Secured Data Paths
Dependencies: MaximInterface
regex.h
00001 // Tencent is pleased to support the open source community by making RapidJSON available. 00002 // 00003 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 00004 // 00005 // Licensed under the MIT License (the "License"); you may not use this file except 00006 // in compliance with the License. You may obtain a copy of the License at 00007 // 00008 // http://opensource.org/licenses/MIT 00009 // 00010 // Unless required by applicable law or agreed to in writing, software distributed 00011 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 00012 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 00013 // specific language governing permissions and limitations under the License. 00014 00015 #ifndef RAPIDJSON_INTERNAL_REGEX_H_ 00016 #define RAPIDJSON_INTERNAL_REGEX_H_ 00017 00018 #include "../allocators.h" 00019 #include "../stream.h" 00020 #include "stack.h" 00021 00022 #ifdef __clang__ 00023 RAPIDJSON_DIAG_PUSH 00024 RAPIDJSON_DIAG_OFF(padded) 00025 RAPIDJSON_DIAG_OFF(switch-enum) 00026 RAPIDJSON_DIAG_OFF(implicit-fallthrough) 00027 #endif 00028 00029 #ifdef __GNUC__ 00030 RAPIDJSON_DIAG_PUSH 00031 RAPIDJSON_DIAG_OFF(effc++) 00032 #endif 00033 00034 #ifdef _MSC_VER 00035 RAPIDJSON_DIAG_PUSH 00036 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated 00037 #endif 00038 00039 #ifndef RAPIDJSON_REGEX_VERBOSE 00040 #define RAPIDJSON_REGEX_VERBOSE 0 00041 #endif 00042 00043 RAPIDJSON_NAMESPACE_BEGIN 00044 namespace internal { 00045 00046 /////////////////////////////////////////////////////////////////////////////// 00047 // DecodedStream 00048 00049 template <typename SourceStream, typename Encoding> 00050 class DecodedStream { 00051 public: 00052 DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); } 00053 unsigned Peek() { return codepoint_; } 00054 unsigned Take() { 00055 unsigned c = codepoint_; 00056 if (c) // No further decoding when '\0' 00057 Decode(); 00058 return c; 00059 } 00060 00061 private: 00062 void Decode() { 00063 if (!Encoding::Decode(ss_, &codepoint_)) 00064 codepoint_ = 0; 00065 } 00066 00067 SourceStream& ss_; 00068 unsigned codepoint_; 00069 }; 00070 00071 /////////////////////////////////////////////////////////////////////////////// 00072 // GenericRegex 00073 00074 static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1 00075 static const SizeType kRegexInvalidRange = ~SizeType(0); 00076 00077 template <typename Encoding, typename Allocator> 00078 class GenericRegexSearch; 00079 00080 //! Regular expression engine with subset of ECMAscript grammar. 00081 /*! 00082 Supported regular expression syntax: 00083 - \c ab Concatenation 00084 - \c a|b Alternation 00085 - \c a? Zero or one 00086 - \c a* Zero or more 00087 - \c a+ One or more 00088 - \c a{3} Exactly 3 times 00089 - \c a{3,} At least 3 times 00090 - \c a{3,5} 3 to 5 times 00091 - \c (ab) Grouping 00092 - \c ^a At the beginning 00093 - \c a$ At the end 00094 - \c . Any character 00095 - \c [abc] Character classes 00096 - \c [a-c] Character class range 00097 - \c [a-z0-9_] Character class combination 00098 - \c [^abc] Negated character classes 00099 - \c [^a-c] Negated character class range 00100 - \c [\b] Backspace (U+0008) 00101 - \c \\| \\\\ ... Escape characters 00102 - \c \\f Form feed (U+000C) 00103 - \c \\n Line feed (U+000A) 00104 - \c \\r Carriage return (U+000D) 00105 - \c \\t Tab (U+0009) 00106 - \c \\v Vertical tab (U+000B) 00107 00108 \note This is a Thompson NFA engine, implemented with reference to 00109 Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", 00110 https://swtch.com/~rsc/regexp/regexp1.html 00111 */ 00112 template <typename Encoding, typename Allocator = CrtAllocator> 00113 class GenericRegex { 00114 public: 00115 typedef Encoding EncodingType; 00116 typedef typename Encoding::Ch Ch; 00117 template <typename, typename> friend class GenericRegexSearch; 00118 00119 GenericRegex(const Ch* source, Allocator* allocator = 0) : 00120 states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), 00121 anchorBegin_(), anchorEnd_() 00122 { 00123 GenericStringStream<Encoding> ss(source); 00124 DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss); 00125 Parse(ds); 00126 } 00127 00128 ~GenericRegex() {} 00129 00130 bool IsValid() const { 00131 return root_ != kRegexInvalidState; 00132 } 00133 00134 private: 00135 enum Operator { 00136 kZeroOrOne, 00137 kZeroOrMore, 00138 kOneOrMore, 00139 kConcatenation, 00140 kAlternation, 00141 kLeftParenthesis 00142 }; 00143 00144 static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.' 00145 static const unsigned kRangeCharacterClass = 0xFFFFFFFE; 00146 static const unsigned kRangeNegationFlag = 0x80000000; 00147 00148 struct Range { 00149 unsigned start; // 00150 unsigned end; 00151 SizeType next; 00152 }; 00153 00154 struct State { 00155 SizeType out; //!< Equals to kInvalid for matching state 00156 SizeType out1; //!< Equals to non-kInvalid for split 00157 SizeType rangeStart; 00158 unsigned codepoint; 00159 }; 00160 00161 struct Frag { 00162 Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {} 00163 SizeType start; 00164 SizeType out; //!< link-list of all output states 00165 SizeType minIndex; 00166 }; 00167 00168 State& GetState(SizeType index) { 00169 RAPIDJSON_ASSERT(index < stateCount_); 00170 return states_.template Bottom<State>()[index]; 00171 } 00172 00173 const State& GetState(SizeType index) const { 00174 RAPIDJSON_ASSERT(index < stateCount_); 00175 return states_.template Bottom<State>()[index]; 00176 } 00177 00178 Range& GetRange(SizeType index) { 00179 RAPIDJSON_ASSERT(index < rangeCount_); 00180 return ranges_.template Bottom<Range>()[index]; 00181 } 00182 00183 const Range& GetRange(SizeType index) const { 00184 RAPIDJSON_ASSERT(index < rangeCount_); 00185 return ranges_.template Bottom<Range>()[index]; 00186 } 00187 00188 template <typename InputStream> 00189 void Parse(DecodedStream<InputStream, Encoding>& ds) { 00190 Allocator allocator; 00191 Stack<Allocator> operandStack(&allocator, 256); // Frag 00192 Stack<Allocator> operatorStack(&allocator, 256); // Operator 00193 Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis) 00194 00195 *atomCountStack.template Push<unsigned>() = 0; 00196 00197 unsigned codepoint; 00198 while (ds.Peek() != 0) { 00199 switch (codepoint = ds.Take()) { 00200 case '^': 00201 anchorBegin_ = true; 00202 break; 00203 00204 case '$': 00205 anchorEnd_ = true; 00206 break; 00207 00208 case '|': 00209 while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation) 00210 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) 00211 return; 00212 *operatorStack.template Push<Operator>() = kAlternation; 00213 *atomCountStack.template Top<unsigned>() = 0; 00214 break; 00215 00216 case '(': 00217 *operatorStack.template Push<Operator>() = kLeftParenthesis; 00218 *atomCountStack.template Push<unsigned>() = 0; 00219 break; 00220 00221 case ')': 00222 while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis) 00223 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) 00224 return; 00225 if (operatorStack.Empty()) 00226 return; 00227 operatorStack.template Pop<Operator>(1); 00228 atomCountStack.template Pop<unsigned>(1); 00229 ImplicitConcatenation(atomCountStack, operatorStack); 00230 break; 00231 00232 case '?': 00233 if (!Eval(operandStack, kZeroOrOne)) 00234 return; 00235 break; 00236 00237 case '*': 00238 if (!Eval(operandStack, kZeroOrMore)) 00239 return; 00240 break; 00241 00242 case '+': 00243 if (!Eval(operandStack, kOneOrMore)) 00244 return; 00245 break; 00246 00247 case '{': 00248 { 00249 unsigned n, m; 00250 if (!ParseUnsigned(ds, &n)) 00251 return; 00252 00253 if (ds.Peek() == ',') { 00254 ds.Take(); 00255 if (ds.Peek() == '}') 00256 m = kInfinityQuantifier; 00257 else if (!ParseUnsigned(ds, &m) || m < n) 00258 return; 00259 } 00260 else 00261 m = n; 00262 00263 if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}') 00264 return; 00265 ds.Take(); 00266 } 00267 break; 00268 00269 case '.': 00270 PushOperand(operandStack, kAnyCharacterClass); 00271 ImplicitConcatenation(atomCountStack, operatorStack); 00272 break; 00273 00274 case '[': 00275 { 00276 SizeType range; 00277 if (!ParseRange(ds, &range)) 00278 return; 00279 SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass); 00280 GetState(s).rangeStart = range; 00281 *operandStack.template Push<Frag>() = Frag(s, s, s); 00282 } 00283 ImplicitConcatenation(atomCountStack, operatorStack); 00284 break; 00285 00286 case '\\': // Escape character 00287 if (!CharacterEscape(ds, &codepoint)) 00288 return; // Unsupported escape character 00289 // fall through to default 00290 00291 default: // Pattern character 00292 PushOperand(operandStack, codepoint); 00293 ImplicitConcatenation(atomCountStack, operatorStack); 00294 } 00295 } 00296 00297 while (!operatorStack.Empty()) 00298 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) 00299 return; 00300 00301 // Link the operand to matching state. 00302 if (operandStack.GetSize() == sizeof(Frag)) { 00303 Frag* e = operandStack.template Pop<Frag>(1); 00304 Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0)); 00305 root_ = e->start; 00306 00307 #if RAPIDJSON_REGEX_VERBOSE 00308 printf("root: %d\n", root_); 00309 for (SizeType i = 0; i < stateCount_ ; i++) { 00310 State& s = GetState(i); 00311 printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint); 00312 } 00313 printf("\n"); 00314 #endif 00315 } 00316 } 00317 00318 SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) { 00319 State* s = states_.template Push<State>(); 00320 s->out = out; 00321 s->out1 = out1; 00322 s->codepoint = codepoint; 00323 s->rangeStart = kRegexInvalidRange; 00324 return stateCount_++; 00325 } 00326 00327 void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) { 00328 SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint); 00329 *operandStack.template Push<Frag>() = Frag(s, s, s); 00330 } 00331 00332 void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) { 00333 if (*atomCountStack.template Top<unsigned>()) 00334 *operatorStack.template Push<Operator>() = kConcatenation; 00335 (*atomCountStack.template Top<unsigned>())++; 00336 } 00337 00338 SizeType Append(SizeType l1, SizeType l2) { 00339 SizeType old = l1; 00340 while (GetState(l1).out != kRegexInvalidState) 00341 l1 = GetState(l1).out; 00342 GetState(l1).out = l2; 00343 return old; 00344 } 00345 00346 void Patch(SizeType l, SizeType s) { 00347 for (SizeType next; l != kRegexInvalidState; l = next) { 00348 next = GetState(l).out; 00349 GetState(l).out = s; 00350 } 00351 } 00352 00353 bool Eval(Stack<Allocator>& operandStack, Operator op) { 00354 switch (op) { 00355 case kConcatenation: 00356 RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2); 00357 { 00358 Frag e2 = *operandStack.template Pop<Frag>(1); 00359 Frag e1 = *operandStack.template Pop<Frag>(1); 00360 Patch(e1.out, e2.start); 00361 *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex)); 00362 } 00363 return true; 00364 00365 case kAlternation: 00366 if (operandStack.GetSize() >= sizeof(Frag) * 2) { 00367 Frag e2 = *operandStack.template Pop<Frag>(1); 00368 Frag e1 = *operandStack.template Pop<Frag>(1); 00369 SizeType s = NewState(e1.start, e2.start, 0); 00370 *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex)); 00371 return true; 00372 } 00373 return false; 00374 00375 case kZeroOrOne: 00376 if (operandStack.GetSize() >= sizeof(Frag)) { 00377 Frag e = *operandStack.template Pop<Frag>(1); 00378 SizeType s = NewState(kRegexInvalidState, e.start, 0); 00379 *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex); 00380 return true; 00381 } 00382 return false; 00383 00384 case kZeroOrMore: 00385 if (operandStack.GetSize() >= sizeof(Frag)) { 00386 Frag e = *operandStack.template Pop<Frag>(1); 00387 SizeType s = NewState(kRegexInvalidState, e.start, 0); 00388 Patch(e.out, s); 00389 *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex); 00390 return true; 00391 } 00392 return false; 00393 00394 default: 00395 RAPIDJSON_ASSERT(op == kOneOrMore); 00396 if (operandStack.GetSize() >= sizeof(Frag)) { 00397 Frag e = *operandStack.template Pop<Frag>(1); 00398 SizeType s = NewState(kRegexInvalidState, e.start, 0); 00399 Patch(e.out, s); 00400 *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex); 00401 return true; 00402 } 00403 return false; 00404 } 00405 } 00406 00407 bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) { 00408 RAPIDJSON_ASSERT(n <= m); 00409 RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag)); 00410 00411 if (n == 0) { 00412 if (m == 0) // a{0} not support 00413 return false; 00414 else if (m == kInfinityQuantifier) 00415 Eval(operandStack, kZeroOrMore); // a{0,} -> a* 00416 else { 00417 Eval(operandStack, kZeroOrOne); // a{0,5} -> a? 00418 for (unsigned i = 0; i < m - 1; i++) 00419 CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a? 00420 for (unsigned i = 0; i < m - 1; i++) 00421 Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a? 00422 } 00423 return true; 00424 } 00425 00426 for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a 00427 CloneTopOperand(operandStack); 00428 00429 if (m == kInfinityQuantifier) 00430 Eval(operandStack, kOneOrMore); // a{3,} -> a a a+ 00431 else if (m > n) { 00432 CloneTopOperand(operandStack); // a{3,5} -> a a a a 00433 Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a? 00434 for (unsigned i = n; i < m - 1; i++) 00435 CloneTopOperand(operandStack); // a{3,5} -> a a a a? a? 00436 for (unsigned i = n; i < m; i++) 00437 Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a? 00438 } 00439 00440 for (unsigned i = 0; i < n - 1; i++) 00441 Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a? 00442 00443 return true; 00444 } 00445 00446 static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; } 00447 00448 void CloneTopOperand(Stack<Allocator>& operandStack) { 00449 const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation 00450 SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_) 00451 State* s = states_.template Push<State>(count); 00452 memcpy(s, &GetState(src.minIndex), count * sizeof(State)); 00453 for (SizeType j = 0; j < count; j++) { 00454 if (s[j].out != kRegexInvalidState) 00455 s[j].out += count; 00456 if (s[j].out1 != kRegexInvalidState) 00457 s[j].out1 += count; 00458 } 00459 *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count); 00460 stateCount_ += count; 00461 } 00462 00463 template <typename InputStream> 00464 bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) { 00465 unsigned r = 0; 00466 if (ds.Peek() < '0' || ds.Peek() > '9') 00467 return false; 00468 while (ds.Peek() >= '0' && ds.Peek() <= '9') { 00469 if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295 00470 return false; // overflow 00471 r = r * 10 + (ds.Take() - '0'); 00472 } 00473 *u = r; 00474 return true; 00475 } 00476 00477 template <typename InputStream> 00478 bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) { 00479 bool isBegin = true; 00480 bool negate = false; 00481 int step = 0; 00482 SizeType start = kRegexInvalidRange; 00483 SizeType current = kRegexInvalidRange; 00484 unsigned codepoint; 00485 while ((codepoint = ds.Take()) != 0) { 00486 if (isBegin) { 00487 isBegin = false; 00488 if (codepoint == '^') { 00489 negate = true; 00490 continue; 00491 } 00492 } 00493 00494 switch (codepoint) { 00495 case ']': 00496 if (start == kRegexInvalidRange) 00497 return false; // Error: nothing inside [] 00498 if (step == 2) { // Add trailing '-' 00499 SizeType r = NewRange('-'); 00500 RAPIDJSON_ASSERT(current != kRegexInvalidRange); 00501 GetRange(current).next = r; 00502 } 00503 if (negate) 00504 GetRange(start).start |= kRangeNegationFlag; 00505 *range = start; 00506 return true; 00507 00508 case '\\': 00509 if (ds.Peek() == 'b') { 00510 ds.Take(); 00511 codepoint = 0x0008; // Escape backspace character 00512 } 00513 else if (!CharacterEscape(ds, &codepoint)) 00514 return false; 00515 // fall through to default 00516 00517 default: 00518 switch (step) { 00519 case 1: 00520 if (codepoint == '-') { 00521 step++; 00522 break; 00523 } 00524 // fall through to step 0 for other characters 00525 00526 case 0: 00527 { 00528 SizeType r = NewRange(codepoint); 00529 if (current != kRegexInvalidRange) 00530 GetRange(current).next = r; 00531 if (start == kRegexInvalidRange) 00532 start = r; 00533 current = r; 00534 } 00535 step = 1; 00536 break; 00537 00538 default: 00539 RAPIDJSON_ASSERT(step == 2); 00540 GetRange(current).end = codepoint; 00541 step = 0; 00542 } 00543 } 00544 } 00545 return false; 00546 } 00547 00548 SizeType NewRange(unsigned codepoint) { 00549 Range* r = ranges_.template Push<Range>(); 00550 r->start = r->end = codepoint; 00551 r->next = kRegexInvalidRange; 00552 return rangeCount_++; 00553 } 00554 00555 template <typename InputStream> 00556 bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) { 00557 unsigned codepoint; 00558 switch (codepoint = ds.Take()) { 00559 case '^': 00560 case '$': 00561 case '|': 00562 case '(': 00563 case ')': 00564 case '?': 00565 case '*': 00566 case '+': 00567 case '.': 00568 case '[': 00569 case ']': 00570 case '{': 00571 case '}': 00572 case '\\': 00573 *escapedCodepoint = codepoint; return true; 00574 case 'f': *escapedCodepoint = 0x000C; return true; 00575 case 'n': *escapedCodepoint = 0x000A; return true; 00576 case 'r': *escapedCodepoint = 0x000D; return true; 00577 case 't': *escapedCodepoint = 0x0009; return true; 00578 case 'v': *escapedCodepoint = 0x000B; return true; 00579 default: 00580 return false; // Unsupported escape character 00581 } 00582 } 00583 00584 Stack<Allocator> states_; 00585 Stack<Allocator> ranges_; 00586 SizeType root_; 00587 SizeType stateCount_; 00588 SizeType rangeCount_; 00589 00590 static const unsigned kInfinityQuantifier = ~0u; 00591 00592 // For SearchWithAnchoring() 00593 bool anchorBegin_; 00594 bool anchorEnd_; 00595 }; 00596 00597 template <typename RegexType, typename Allocator = CrtAllocator> 00598 class GenericRegexSearch { 00599 public: 00600 typedef typename RegexType::EncodingType Encoding; 00601 typedef typename Encoding::Ch Ch; 00602 00603 GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : 00604 regex_(regex), allocator_(allocator), ownAllocator_(0), 00605 state0_(allocator, 0), state1_(allocator, 0), stateSet_() 00606 { 00607 RAPIDJSON_ASSERT(regex_.IsValid()); 00608 if (!allocator_) 00609 ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); 00610 stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize())); 00611 state0_.template Reserve<SizeType>(regex_.stateCount_); 00612 state1_.template Reserve<SizeType>(regex_.stateCount_); 00613 } 00614 00615 ~GenericRegexSearch() { 00616 Allocator::Free(stateSet_); 00617 RAPIDJSON_DELETE(ownAllocator_); 00618 } 00619 00620 template <typename InputStream> 00621 bool Match(InputStream& is) { 00622 return SearchWithAnchoring(is, true, true); 00623 } 00624 00625 bool Match(const Ch* s) { 00626 GenericStringStream<Encoding> is(s); 00627 return Match(is); 00628 } 00629 00630 template <typename InputStream> 00631 bool Search(InputStream& is) { 00632 return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_); 00633 } 00634 00635 bool Search(const Ch* s) { 00636 GenericStringStream<Encoding> is(s); 00637 return Search(is); 00638 } 00639 00640 private: 00641 typedef typename RegexType::State State; 00642 typedef typename RegexType::Range Range; 00643 00644 template <typename InputStream> 00645 bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) { 00646 DecodedStream<InputStream, Encoding> ds(is); 00647 00648 state0_.Clear(); 00649 Stack<Allocator> *current = &state0_, *next = &state1_; 00650 const size_t stateSetSize = GetStateSetSize(); 00651 std::memset(stateSet_, 0, stateSetSize); 00652 00653 bool matched = AddState(*current, regex_.root_); 00654 unsigned codepoint; 00655 while (!current->Empty() && (codepoint = ds.Take()) != 0) { 00656 std::memset(stateSet_, 0, stateSetSize); 00657 next->Clear(); 00658 matched = false; 00659 for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) { 00660 const State& sr = regex_.GetState(*s); 00661 if (sr.codepoint == codepoint || 00662 sr.codepoint == RegexType::kAnyCharacterClass || 00663 (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) 00664 { 00665 matched = AddState(*next, sr.out) || matched; 00666 if (!anchorEnd && matched) 00667 return true; 00668 } 00669 if (!anchorBegin) 00670 AddState(*next, regex_.root_); 00671 } 00672 internal::Swap(current, next); 00673 } 00674 00675 return matched; 00676 } 00677 00678 size_t GetStateSetSize() const { 00679 return (regex_.stateCount_ + 31) / 32 * 4; 00680 } 00681 00682 // Return whether the added states is a match state 00683 bool AddState(Stack<Allocator>& l, SizeType index) { 00684 RAPIDJSON_ASSERT(index != kRegexInvalidState); 00685 00686 const State& s = regex_.GetState(index); 00687 if (s.out1 != kRegexInvalidState) { // Split 00688 bool matched = AddState(l, s.out); 00689 return AddState(l, s.out1) || matched; 00690 } 00691 else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) { 00692 stateSet_[index >> 5] |= (1 << (index & 31)); 00693 *l.template PushUnsafe<SizeType>() = index; 00694 } 00695 return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation. 00696 } 00697 00698 bool MatchRange(SizeType rangeIndex, unsigned codepoint) const { 00699 bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0; 00700 while (rangeIndex != kRegexInvalidRange) { 00701 const Range& r = regex_.GetRange(rangeIndex); 00702 if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end) 00703 return yes; 00704 rangeIndex = r.next; 00705 } 00706 return !yes; 00707 } 00708 00709 const RegexType& regex_; 00710 Allocator* allocator_; 00711 Allocator* ownAllocator_; 00712 Stack<Allocator> state0_; 00713 Stack<Allocator> state1_; 00714 uint32_t* stateSet_; 00715 }; 00716 00717 typedef GenericRegex<UTF8<> > Regex; 00718 typedef GenericRegexSearch<Regex> RegexSearch; 00719 00720 } // namespace internal 00721 RAPIDJSON_NAMESPACE_END 00722 00723 #ifdef __clang__ 00724 RAPIDJSON_DIAG_POP 00725 #endif 00726 00727 #ifdef _MSC_VER 00728 RAPIDJSON_DIAG_POP 00729 #endif 00730 00731 #endif // RAPIDJSON_INTERNAL_REGEX_H_
Generated on Tue Jul 12 2022 12:06:49 by 1.7.2