DeepCover Embedded Security in IoT: Public-key Secured Data Paths

Dependencies:   MaximInterface

The MAXREFDES155# is an internet-of-things (IoT) embedded-security reference design, built to authenticate and control a sensing node using elliptic-curve-based public-key cryptography with control and notification from a web server.

The hardware includes an ARM® mbed™ shield and attached sensor endpoint. The shield contains a DS2476 DeepCover® ECDSA/SHA-2 coprocessor, Wifi communication, LCD push-button controls, and status LEDs. The sensor endpoint is attached to the shield using a 300mm cable and contains a DS28C36 DeepCover ECDSA/SHA-2 authenticator, IR-thermal sensor, and aiming laser for the IR sensor. The MAXREFDES155# is equipped with a standard Arduino® form-factor shield connector for immediate testing using an mbed board such as the MAX32600MBED#. The combination of these two devices represent an IoT device. Communication to the web server is accomplished with the shield Wifi circuitry. Communication from the shield to the attached sensor module is accomplished over I2C . The sensor module represents an IoT endpoint that generates small data with a requirement for message authenticity/integrity and secure on/off operational control.

The design is hierarchical with each mbed platform and shield communicating data from the sensor node to a web server that maintains a centralized log and dispatches notifications as necessary. The simplicity of this design enables rapid integration into any star-topology IoT network to provide security with the low overhead and cost provided by the ECDSA-P256 asymmetric-key and SHA-256 symmetric-key algorithms.

More information about the MAXREFDES155# is available on the Maxim Integrated website.

Committer:
IanBenzMaxim
Date:
Tue Dec 03 12:56:25 2019 -0600
Revision:
18:c2631e985780
Parent:
16:a004191a79ab
Updated MaximInterface to version 2.1.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
IanBenzMaxim 0:33d4e66780c0 1 // Tencent is pleased to support the open source community by making RapidJSON available.
IanBenzMaxim 0:33d4e66780c0 2 //
IanBenzMaxim 0:33d4e66780c0 3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
IanBenzMaxim 0:33d4e66780c0 4 //
IanBenzMaxim 0:33d4e66780c0 5 // Licensed under the MIT License (the "License"); you may not use this file except
IanBenzMaxim 0:33d4e66780c0 6 // in compliance with the License. You may obtain a copy of the License at
IanBenzMaxim 0:33d4e66780c0 7 //
IanBenzMaxim 0:33d4e66780c0 8 // http://opensource.org/licenses/MIT
IanBenzMaxim 0:33d4e66780c0 9 //
IanBenzMaxim 0:33d4e66780c0 10 // Unless required by applicable law or agreed to in writing, software distributed
IanBenzMaxim 0:33d4e66780c0 11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
IanBenzMaxim 0:33d4e66780c0 12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
IanBenzMaxim 0:33d4e66780c0 13 // specific language governing permissions and limitations under the License.
IanBenzMaxim 0:33d4e66780c0 14
IanBenzMaxim 0:33d4e66780c0 15 #ifndef RAPIDJSON_INTERNAL_REGEX_H_
IanBenzMaxim 0:33d4e66780c0 16 #define RAPIDJSON_INTERNAL_REGEX_H_
IanBenzMaxim 0:33d4e66780c0 17
IanBenzMaxim 0:33d4e66780c0 18 #include "../allocators.h"
IanBenzMaxim 0:33d4e66780c0 19 #include "../stream.h"
IanBenzMaxim 0:33d4e66780c0 20 #include "stack.h"
IanBenzMaxim 0:33d4e66780c0 21
IanBenzMaxim 0:33d4e66780c0 22 #ifdef __clang__
IanBenzMaxim 0:33d4e66780c0 23 RAPIDJSON_DIAG_PUSH
IanBenzMaxim 0:33d4e66780c0 24 RAPIDJSON_DIAG_OFF(padded)
IanBenzMaxim 0:33d4e66780c0 25 RAPIDJSON_DIAG_OFF(switch-enum)
IanBenzMaxim 0:33d4e66780c0 26 RAPIDJSON_DIAG_OFF(implicit-fallthrough)
IanBenzMaxim 0:33d4e66780c0 27 #endif
IanBenzMaxim 0:33d4e66780c0 28
IanBenzMaxim 0:33d4e66780c0 29 #ifdef __GNUC__
IanBenzMaxim 0:33d4e66780c0 30 RAPIDJSON_DIAG_PUSH
IanBenzMaxim 0:33d4e66780c0 31 RAPIDJSON_DIAG_OFF(effc++)
IanBenzMaxim 0:33d4e66780c0 32 #endif
IanBenzMaxim 0:33d4e66780c0 33
IanBenzMaxim 0:33d4e66780c0 34 #ifdef _MSC_VER
IanBenzMaxim 0:33d4e66780c0 35 RAPIDJSON_DIAG_PUSH
IanBenzMaxim 0:33d4e66780c0 36 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
IanBenzMaxim 0:33d4e66780c0 37 #endif
IanBenzMaxim 0:33d4e66780c0 38
IanBenzMaxim 0:33d4e66780c0 39 #ifndef RAPIDJSON_REGEX_VERBOSE
IanBenzMaxim 0:33d4e66780c0 40 #define RAPIDJSON_REGEX_VERBOSE 0
IanBenzMaxim 0:33d4e66780c0 41 #endif
IanBenzMaxim 0:33d4e66780c0 42
IanBenzMaxim 0:33d4e66780c0 43 RAPIDJSON_NAMESPACE_BEGIN
IanBenzMaxim 0:33d4e66780c0 44 namespace internal {
IanBenzMaxim 0:33d4e66780c0 45
IanBenzMaxim 0:33d4e66780c0 46 ///////////////////////////////////////////////////////////////////////////////
IanBenzMaxim 0:33d4e66780c0 47 // DecodedStream
IanBenzMaxim 0:33d4e66780c0 48
IanBenzMaxim 0:33d4e66780c0 49 template <typename SourceStream, typename Encoding>
IanBenzMaxim 0:33d4e66780c0 50 class DecodedStream {
IanBenzMaxim 0:33d4e66780c0 51 public:
IanBenzMaxim 0:33d4e66780c0 52 DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
IanBenzMaxim 0:33d4e66780c0 53 unsigned Peek() { return codepoint_; }
IanBenzMaxim 0:33d4e66780c0 54 unsigned Take() {
IanBenzMaxim 0:33d4e66780c0 55 unsigned c = codepoint_;
IanBenzMaxim 0:33d4e66780c0 56 if (c) // No further decoding when '\0'
IanBenzMaxim 0:33d4e66780c0 57 Decode();
IanBenzMaxim 0:33d4e66780c0 58 return c;
IanBenzMaxim 0:33d4e66780c0 59 }
IanBenzMaxim 0:33d4e66780c0 60
IanBenzMaxim 0:33d4e66780c0 61 private:
IanBenzMaxim 0:33d4e66780c0 62 void Decode() {
IanBenzMaxim 0:33d4e66780c0 63 if (!Encoding::Decode(ss_, &codepoint_))
IanBenzMaxim 0:33d4e66780c0 64 codepoint_ = 0;
IanBenzMaxim 0:33d4e66780c0 65 }
IanBenzMaxim 0:33d4e66780c0 66
IanBenzMaxim 0:33d4e66780c0 67 SourceStream& ss_;
IanBenzMaxim 0:33d4e66780c0 68 unsigned codepoint_;
IanBenzMaxim 0:33d4e66780c0 69 };
IanBenzMaxim 0:33d4e66780c0 70
IanBenzMaxim 0:33d4e66780c0 71 ///////////////////////////////////////////////////////////////////////////////
IanBenzMaxim 0:33d4e66780c0 72 // GenericRegex
IanBenzMaxim 0:33d4e66780c0 73
IanBenzMaxim 0:33d4e66780c0 74 static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
IanBenzMaxim 0:33d4e66780c0 75 static const SizeType kRegexInvalidRange = ~SizeType(0);
IanBenzMaxim 0:33d4e66780c0 76
IanBenzMaxim 0:33d4e66780c0 77 template <typename Encoding, typename Allocator>
IanBenzMaxim 0:33d4e66780c0 78 class GenericRegexSearch;
IanBenzMaxim 0:33d4e66780c0 79
IanBenzMaxim 0:33d4e66780c0 80 //! Regular expression engine with subset of ECMAscript grammar.
IanBenzMaxim 0:33d4e66780c0 81 /*!
IanBenzMaxim 0:33d4e66780c0 82 Supported regular expression syntax:
IanBenzMaxim 0:33d4e66780c0 83 - \c ab Concatenation
IanBenzMaxim 0:33d4e66780c0 84 - \c a|b Alternation
IanBenzMaxim 0:33d4e66780c0 85 - \c a? Zero or one
IanBenzMaxim 0:33d4e66780c0 86 - \c a* Zero or more
IanBenzMaxim 0:33d4e66780c0 87 - \c a+ One or more
IanBenzMaxim 0:33d4e66780c0 88 - \c a{3} Exactly 3 times
IanBenzMaxim 0:33d4e66780c0 89 - \c a{3,} At least 3 times
IanBenzMaxim 0:33d4e66780c0 90 - \c a{3,5} 3 to 5 times
IanBenzMaxim 0:33d4e66780c0 91 - \c (ab) Grouping
IanBenzMaxim 0:33d4e66780c0 92 - \c ^a At the beginning
IanBenzMaxim 0:33d4e66780c0 93 - \c a$ At the end
IanBenzMaxim 0:33d4e66780c0 94 - \c . Any character
IanBenzMaxim 0:33d4e66780c0 95 - \c [abc] Character classes
IanBenzMaxim 0:33d4e66780c0 96 - \c [a-c] Character class range
IanBenzMaxim 0:33d4e66780c0 97 - \c [a-z0-9_] Character class combination
IanBenzMaxim 0:33d4e66780c0 98 - \c [^abc] Negated character classes
IanBenzMaxim 0:33d4e66780c0 99 - \c [^a-c] Negated character class range
IanBenzMaxim 0:33d4e66780c0 100 - \c [\b] Backspace (U+0008)
IanBenzMaxim 0:33d4e66780c0 101 - \c \\| \\\\ ... Escape characters
IanBenzMaxim 0:33d4e66780c0 102 - \c \\f Form feed (U+000C)
IanBenzMaxim 0:33d4e66780c0 103 - \c \\n Line feed (U+000A)
IanBenzMaxim 0:33d4e66780c0 104 - \c \\r Carriage return (U+000D)
IanBenzMaxim 0:33d4e66780c0 105 - \c \\t Tab (U+0009)
IanBenzMaxim 0:33d4e66780c0 106 - \c \\v Vertical tab (U+000B)
IanBenzMaxim 0:33d4e66780c0 107
IanBenzMaxim 0:33d4e66780c0 108 \note This is a Thompson NFA engine, implemented with reference to
IanBenzMaxim 0:33d4e66780c0 109 Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).",
IanBenzMaxim 0:33d4e66780c0 110 https://swtch.com/~rsc/regexp/regexp1.html
IanBenzMaxim 0:33d4e66780c0 111 */
IanBenzMaxim 0:33d4e66780c0 112 template <typename Encoding, typename Allocator = CrtAllocator>
IanBenzMaxim 0:33d4e66780c0 113 class GenericRegex {
IanBenzMaxim 0:33d4e66780c0 114 public:
IanBenzMaxim 0:33d4e66780c0 115 typedef Encoding EncodingType;
IanBenzMaxim 0:33d4e66780c0 116 typedef typename Encoding::Ch Ch;
IanBenzMaxim 0:33d4e66780c0 117 template <typename, typename> friend class GenericRegexSearch;
IanBenzMaxim 0:33d4e66780c0 118
IanBenzMaxim 0:33d4e66780c0 119 GenericRegex(const Ch* source, Allocator* allocator = 0) :
IanBenzMaxim 0:33d4e66780c0 120 states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
IanBenzMaxim 0:33d4e66780c0 121 anchorBegin_(), anchorEnd_()
IanBenzMaxim 0:33d4e66780c0 122 {
IanBenzMaxim 0:33d4e66780c0 123 GenericStringStream<Encoding> ss(source);
IanBenzMaxim 0:33d4e66780c0 124 DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
IanBenzMaxim 0:33d4e66780c0 125 Parse(ds);
IanBenzMaxim 0:33d4e66780c0 126 }
IanBenzMaxim 0:33d4e66780c0 127
IanBenzMaxim 0:33d4e66780c0 128 ~GenericRegex() {}
IanBenzMaxim 0:33d4e66780c0 129
IanBenzMaxim 0:33d4e66780c0 130 bool IsValid() const {
IanBenzMaxim 0:33d4e66780c0 131 return root_ != kRegexInvalidState;
IanBenzMaxim 0:33d4e66780c0 132 }
IanBenzMaxim 0:33d4e66780c0 133
IanBenzMaxim 0:33d4e66780c0 134 private:
IanBenzMaxim 0:33d4e66780c0 135 enum Operator {
IanBenzMaxim 0:33d4e66780c0 136 kZeroOrOne,
IanBenzMaxim 0:33d4e66780c0 137 kZeroOrMore,
IanBenzMaxim 0:33d4e66780c0 138 kOneOrMore,
IanBenzMaxim 0:33d4e66780c0 139 kConcatenation,
IanBenzMaxim 0:33d4e66780c0 140 kAlternation,
IanBenzMaxim 0:33d4e66780c0 141 kLeftParenthesis
IanBenzMaxim 0:33d4e66780c0 142 };
IanBenzMaxim 0:33d4e66780c0 143
IanBenzMaxim 0:33d4e66780c0 144 static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
IanBenzMaxim 0:33d4e66780c0 145 static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
IanBenzMaxim 0:33d4e66780c0 146 static const unsigned kRangeNegationFlag = 0x80000000;
IanBenzMaxim 0:33d4e66780c0 147
IanBenzMaxim 0:33d4e66780c0 148 struct Range {
IanBenzMaxim 0:33d4e66780c0 149 unsigned start; //
IanBenzMaxim 0:33d4e66780c0 150 unsigned end;
IanBenzMaxim 0:33d4e66780c0 151 SizeType next;
IanBenzMaxim 0:33d4e66780c0 152 };
IanBenzMaxim 0:33d4e66780c0 153
IanBenzMaxim 0:33d4e66780c0 154 struct State {
IanBenzMaxim 0:33d4e66780c0 155 SizeType out; //!< Equals to kInvalid for matching state
IanBenzMaxim 0:33d4e66780c0 156 SizeType out1; //!< Equals to non-kInvalid for split
IanBenzMaxim 0:33d4e66780c0 157 SizeType rangeStart;
IanBenzMaxim 0:33d4e66780c0 158 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 159 };
IanBenzMaxim 0:33d4e66780c0 160
IanBenzMaxim 0:33d4e66780c0 161 struct Frag {
IanBenzMaxim 0:33d4e66780c0 162 Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
IanBenzMaxim 0:33d4e66780c0 163 SizeType start;
IanBenzMaxim 0:33d4e66780c0 164 SizeType out; //!< link-list of all output states
IanBenzMaxim 0:33d4e66780c0 165 SizeType minIndex;
IanBenzMaxim 0:33d4e66780c0 166 };
IanBenzMaxim 0:33d4e66780c0 167
IanBenzMaxim 0:33d4e66780c0 168 State& GetState(SizeType index) {
IanBenzMaxim 0:33d4e66780c0 169 RAPIDJSON_ASSERT(index < stateCount_);
IanBenzMaxim 0:33d4e66780c0 170 return states_.template Bottom<State>()[index];
IanBenzMaxim 0:33d4e66780c0 171 }
IanBenzMaxim 0:33d4e66780c0 172
IanBenzMaxim 0:33d4e66780c0 173 const State& GetState(SizeType index) const {
IanBenzMaxim 0:33d4e66780c0 174 RAPIDJSON_ASSERT(index < stateCount_);
IanBenzMaxim 0:33d4e66780c0 175 return states_.template Bottom<State>()[index];
IanBenzMaxim 0:33d4e66780c0 176 }
IanBenzMaxim 0:33d4e66780c0 177
IanBenzMaxim 0:33d4e66780c0 178 Range& GetRange(SizeType index) {
IanBenzMaxim 0:33d4e66780c0 179 RAPIDJSON_ASSERT(index < rangeCount_);
IanBenzMaxim 0:33d4e66780c0 180 return ranges_.template Bottom<Range>()[index];
IanBenzMaxim 0:33d4e66780c0 181 }
IanBenzMaxim 0:33d4e66780c0 182
IanBenzMaxim 0:33d4e66780c0 183 const Range& GetRange(SizeType index) const {
IanBenzMaxim 0:33d4e66780c0 184 RAPIDJSON_ASSERT(index < rangeCount_);
IanBenzMaxim 0:33d4e66780c0 185 return ranges_.template Bottom<Range>()[index];
IanBenzMaxim 0:33d4e66780c0 186 }
IanBenzMaxim 0:33d4e66780c0 187
IanBenzMaxim 0:33d4e66780c0 188 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 189 void Parse(DecodedStream<InputStream, Encoding>& ds) {
IanBenzMaxim 0:33d4e66780c0 190 Allocator allocator;
IanBenzMaxim 0:33d4e66780c0 191 Stack<Allocator> operandStack(&allocator, 256); // Frag
IanBenzMaxim 0:33d4e66780c0 192 Stack<Allocator> operatorStack(&allocator, 256); // Operator
IanBenzMaxim 0:33d4e66780c0 193 Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis)
IanBenzMaxim 0:33d4e66780c0 194
IanBenzMaxim 0:33d4e66780c0 195 *atomCountStack.template Push<unsigned>() = 0;
IanBenzMaxim 0:33d4e66780c0 196
IanBenzMaxim 0:33d4e66780c0 197 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 198 while (ds.Peek() != 0) {
IanBenzMaxim 0:33d4e66780c0 199 switch (codepoint = ds.Take()) {
IanBenzMaxim 0:33d4e66780c0 200 case '^':
IanBenzMaxim 0:33d4e66780c0 201 anchorBegin_ = true;
IanBenzMaxim 0:33d4e66780c0 202 break;
IanBenzMaxim 0:33d4e66780c0 203
IanBenzMaxim 0:33d4e66780c0 204 case '$':
IanBenzMaxim 0:33d4e66780c0 205 anchorEnd_ = true;
IanBenzMaxim 0:33d4e66780c0 206 break;
IanBenzMaxim 0:33d4e66780c0 207
IanBenzMaxim 0:33d4e66780c0 208 case '|':
IanBenzMaxim 0:33d4e66780c0 209 while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
IanBenzMaxim 0:33d4e66780c0 210 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
IanBenzMaxim 0:33d4e66780c0 211 return;
IanBenzMaxim 0:33d4e66780c0 212 *operatorStack.template Push<Operator>() = kAlternation;
IanBenzMaxim 0:33d4e66780c0 213 *atomCountStack.template Top<unsigned>() = 0;
IanBenzMaxim 0:33d4e66780c0 214 break;
IanBenzMaxim 0:33d4e66780c0 215
IanBenzMaxim 0:33d4e66780c0 216 case '(':
IanBenzMaxim 0:33d4e66780c0 217 *operatorStack.template Push<Operator>() = kLeftParenthesis;
IanBenzMaxim 0:33d4e66780c0 218 *atomCountStack.template Push<unsigned>() = 0;
IanBenzMaxim 0:33d4e66780c0 219 break;
IanBenzMaxim 0:33d4e66780c0 220
IanBenzMaxim 0:33d4e66780c0 221 case ')':
IanBenzMaxim 0:33d4e66780c0 222 while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
IanBenzMaxim 0:33d4e66780c0 223 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
IanBenzMaxim 0:33d4e66780c0 224 return;
IanBenzMaxim 0:33d4e66780c0 225 if (operatorStack.Empty())
IanBenzMaxim 0:33d4e66780c0 226 return;
IanBenzMaxim 0:33d4e66780c0 227 operatorStack.template Pop<Operator>(1);
IanBenzMaxim 0:33d4e66780c0 228 atomCountStack.template Pop<unsigned>(1);
IanBenzMaxim 0:33d4e66780c0 229 ImplicitConcatenation(atomCountStack, operatorStack);
IanBenzMaxim 0:33d4e66780c0 230 break;
IanBenzMaxim 0:33d4e66780c0 231
IanBenzMaxim 0:33d4e66780c0 232 case '?':
IanBenzMaxim 0:33d4e66780c0 233 if (!Eval(operandStack, kZeroOrOne))
IanBenzMaxim 0:33d4e66780c0 234 return;
IanBenzMaxim 0:33d4e66780c0 235 break;
IanBenzMaxim 0:33d4e66780c0 236
IanBenzMaxim 0:33d4e66780c0 237 case '*':
IanBenzMaxim 0:33d4e66780c0 238 if (!Eval(operandStack, kZeroOrMore))
IanBenzMaxim 0:33d4e66780c0 239 return;
IanBenzMaxim 0:33d4e66780c0 240 break;
IanBenzMaxim 0:33d4e66780c0 241
IanBenzMaxim 0:33d4e66780c0 242 case '+':
IanBenzMaxim 0:33d4e66780c0 243 if (!Eval(operandStack, kOneOrMore))
IanBenzMaxim 0:33d4e66780c0 244 return;
IanBenzMaxim 0:33d4e66780c0 245 break;
IanBenzMaxim 0:33d4e66780c0 246
IanBenzMaxim 0:33d4e66780c0 247 case '{':
IanBenzMaxim 0:33d4e66780c0 248 {
IanBenzMaxim 0:33d4e66780c0 249 unsigned n, m;
IanBenzMaxim 0:33d4e66780c0 250 if (!ParseUnsigned(ds, &n))
IanBenzMaxim 0:33d4e66780c0 251 return;
IanBenzMaxim 0:33d4e66780c0 252
IanBenzMaxim 0:33d4e66780c0 253 if (ds.Peek() == ',') {
IanBenzMaxim 0:33d4e66780c0 254 ds.Take();
IanBenzMaxim 0:33d4e66780c0 255 if (ds.Peek() == '}')
IanBenzMaxim 0:33d4e66780c0 256 m = kInfinityQuantifier;
IanBenzMaxim 0:33d4e66780c0 257 else if (!ParseUnsigned(ds, &m) || m < n)
IanBenzMaxim 0:33d4e66780c0 258 return;
IanBenzMaxim 0:33d4e66780c0 259 }
IanBenzMaxim 0:33d4e66780c0 260 else
IanBenzMaxim 0:33d4e66780c0 261 m = n;
IanBenzMaxim 0:33d4e66780c0 262
IanBenzMaxim 0:33d4e66780c0 263 if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
IanBenzMaxim 0:33d4e66780c0 264 return;
IanBenzMaxim 0:33d4e66780c0 265 ds.Take();
IanBenzMaxim 0:33d4e66780c0 266 }
IanBenzMaxim 0:33d4e66780c0 267 break;
IanBenzMaxim 0:33d4e66780c0 268
IanBenzMaxim 0:33d4e66780c0 269 case '.':
IanBenzMaxim 0:33d4e66780c0 270 PushOperand(operandStack, kAnyCharacterClass);
IanBenzMaxim 0:33d4e66780c0 271 ImplicitConcatenation(atomCountStack, operatorStack);
IanBenzMaxim 0:33d4e66780c0 272 break;
IanBenzMaxim 0:33d4e66780c0 273
IanBenzMaxim 0:33d4e66780c0 274 case '[':
IanBenzMaxim 0:33d4e66780c0 275 {
IanBenzMaxim 0:33d4e66780c0 276 SizeType range;
IanBenzMaxim 0:33d4e66780c0 277 if (!ParseRange(ds, &range))
IanBenzMaxim 0:33d4e66780c0 278 return;
IanBenzMaxim 0:33d4e66780c0 279 SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
IanBenzMaxim 0:33d4e66780c0 280 GetState(s).rangeStart = range;
IanBenzMaxim 0:33d4e66780c0 281 *operandStack.template Push<Frag>() = Frag(s, s, s);
IanBenzMaxim 0:33d4e66780c0 282 }
IanBenzMaxim 0:33d4e66780c0 283 ImplicitConcatenation(atomCountStack, operatorStack);
IanBenzMaxim 0:33d4e66780c0 284 break;
IanBenzMaxim 0:33d4e66780c0 285
IanBenzMaxim 0:33d4e66780c0 286 case '\\': // Escape character
IanBenzMaxim 0:33d4e66780c0 287 if (!CharacterEscape(ds, &codepoint))
IanBenzMaxim 0:33d4e66780c0 288 return; // Unsupported escape character
IanBenzMaxim 0:33d4e66780c0 289 // fall through to default
IanBenzMaxim 0:33d4e66780c0 290
IanBenzMaxim 0:33d4e66780c0 291 default: // Pattern character
IanBenzMaxim 0:33d4e66780c0 292 PushOperand(operandStack, codepoint);
IanBenzMaxim 0:33d4e66780c0 293 ImplicitConcatenation(atomCountStack, operatorStack);
IanBenzMaxim 0:33d4e66780c0 294 }
IanBenzMaxim 0:33d4e66780c0 295 }
IanBenzMaxim 0:33d4e66780c0 296
IanBenzMaxim 0:33d4e66780c0 297 while (!operatorStack.Empty())
IanBenzMaxim 0:33d4e66780c0 298 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
IanBenzMaxim 0:33d4e66780c0 299 return;
IanBenzMaxim 0:33d4e66780c0 300
IanBenzMaxim 0:33d4e66780c0 301 // Link the operand to matching state.
IanBenzMaxim 0:33d4e66780c0 302 if (operandStack.GetSize() == sizeof(Frag)) {
IanBenzMaxim 0:33d4e66780c0 303 Frag* e = operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 304 Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
IanBenzMaxim 0:33d4e66780c0 305 root_ = e->start;
IanBenzMaxim 0:33d4e66780c0 306
IanBenzMaxim 0:33d4e66780c0 307 #if RAPIDJSON_REGEX_VERBOSE
IanBenzMaxim 0:33d4e66780c0 308 printf("root: %d\n", root_);
IanBenzMaxim 0:33d4e66780c0 309 for (SizeType i = 0; i < stateCount_ ; i++) {
IanBenzMaxim 0:33d4e66780c0 310 State& s = GetState(i);
IanBenzMaxim 0:33d4e66780c0 311 printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
IanBenzMaxim 0:33d4e66780c0 312 }
IanBenzMaxim 0:33d4e66780c0 313 printf("\n");
IanBenzMaxim 0:33d4e66780c0 314 #endif
IanBenzMaxim 0:33d4e66780c0 315 }
IanBenzMaxim 0:33d4e66780c0 316 }
IanBenzMaxim 0:33d4e66780c0 317
IanBenzMaxim 0:33d4e66780c0 318 SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
IanBenzMaxim 0:33d4e66780c0 319 State* s = states_.template Push<State>();
IanBenzMaxim 0:33d4e66780c0 320 s->out = out;
IanBenzMaxim 0:33d4e66780c0 321 s->out1 = out1;
IanBenzMaxim 0:33d4e66780c0 322 s->codepoint = codepoint;
IanBenzMaxim 0:33d4e66780c0 323 s->rangeStart = kRegexInvalidRange;
IanBenzMaxim 0:33d4e66780c0 324 return stateCount_++;
IanBenzMaxim 0:33d4e66780c0 325 }
IanBenzMaxim 0:33d4e66780c0 326
IanBenzMaxim 0:33d4e66780c0 327 void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
IanBenzMaxim 0:33d4e66780c0 328 SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
IanBenzMaxim 0:33d4e66780c0 329 *operandStack.template Push<Frag>() = Frag(s, s, s);
IanBenzMaxim 0:33d4e66780c0 330 }
IanBenzMaxim 0:33d4e66780c0 331
IanBenzMaxim 0:33d4e66780c0 332 void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
IanBenzMaxim 0:33d4e66780c0 333 if (*atomCountStack.template Top<unsigned>())
IanBenzMaxim 0:33d4e66780c0 334 *operatorStack.template Push<Operator>() = kConcatenation;
IanBenzMaxim 0:33d4e66780c0 335 (*atomCountStack.template Top<unsigned>())++;
IanBenzMaxim 0:33d4e66780c0 336 }
IanBenzMaxim 0:33d4e66780c0 337
IanBenzMaxim 0:33d4e66780c0 338 SizeType Append(SizeType l1, SizeType l2) {
IanBenzMaxim 0:33d4e66780c0 339 SizeType old = l1;
IanBenzMaxim 0:33d4e66780c0 340 while (GetState(l1).out != kRegexInvalidState)
IanBenzMaxim 0:33d4e66780c0 341 l1 = GetState(l1).out;
IanBenzMaxim 0:33d4e66780c0 342 GetState(l1).out = l2;
IanBenzMaxim 0:33d4e66780c0 343 return old;
IanBenzMaxim 0:33d4e66780c0 344 }
IanBenzMaxim 0:33d4e66780c0 345
IanBenzMaxim 0:33d4e66780c0 346 void Patch(SizeType l, SizeType s) {
IanBenzMaxim 0:33d4e66780c0 347 for (SizeType next; l != kRegexInvalidState; l = next) {
IanBenzMaxim 0:33d4e66780c0 348 next = GetState(l).out;
IanBenzMaxim 0:33d4e66780c0 349 GetState(l).out = s;
IanBenzMaxim 0:33d4e66780c0 350 }
IanBenzMaxim 0:33d4e66780c0 351 }
IanBenzMaxim 0:33d4e66780c0 352
IanBenzMaxim 0:33d4e66780c0 353 bool Eval(Stack<Allocator>& operandStack, Operator op) {
IanBenzMaxim 0:33d4e66780c0 354 switch (op) {
IanBenzMaxim 0:33d4e66780c0 355 case kConcatenation:
IanBenzMaxim 0:33d4e66780c0 356 RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
IanBenzMaxim 0:33d4e66780c0 357 {
IanBenzMaxim 0:33d4e66780c0 358 Frag e2 = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 359 Frag e1 = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 360 Patch(e1.out, e2.start);
IanBenzMaxim 0:33d4e66780c0 361 *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
IanBenzMaxim 0:33d4e66780c0 362 }
IanBenzMaxim 0:33d4e66780c0 363 return true;
IanBenzMaxim 0:33d4e66780c0 364
IanBenzMaxim 0:33d4e66780c0 365 case kAlternation:
IanBenzMaxim 0:33d4e66780c0 366 if (operandStack.GetSize() >= sizeof(Frag) * 2) {
IanBenzMaxim 0:33d4e66780c0 367 Frag e2 = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 368 Frag e1 = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 369 SizeType s = NewState(e1.start, e2.start, 0);
IanBenzMaxim 0:33d4e66780c0 370 *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
IanBenzMaxim 0:33d4e66780c0 371 return true;
IanBenzMaxim 0:33d4e66780c0 372 }
IanBenzMaxim 0:33d4e66780c0 373 return false;
IanBenzMaxim 0:33d4e66780c0 374
IanBenzMaxim 0:33d4e66780c0 375 case kZeroOrOne:
IanBenzMaxim 0:33d4e66780c0 376 if (operandStack.GetSize() >= sizeof(Frag)) {
IanBenzMaxim 0:33d4e66780c0 377 Frag e = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 378 SizeType s = NewState(kRegexInvalidState, e.start, 0);
IanBenzMaxim 0:33d4e66780c0 379 *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
IanBenzMaxim 0:33d4e66780c0 380 return true;
IanBenzMaxim 0:33d4e66780c0 381 }
IanBenzMaxim 0:33d4e66780c0 382 return false;
IanBenzMaxim 0:33d4e66780c0 383
IanBenzMaxim 0:33d4e66780c0 384 case kZeroOrMore:
IanBenzMaxim 0:33d4e66780c0 385 if (operandStack.GetSize() >= sizeof(Frag)) {
IanBenzMaxim 0:33d4e66780c0 386 Frag e = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 387 SizeType s = NewState(kRegexInvalidState, e.start, 0);
IanBenzMaxim 0:33d4e66780c0 388 Patch(e.out, s);
IanBenzMaxim 0:33d4e66780c0 389 *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
IanBenzMaxim 0:33d4e66780c0 390 return true;
IanBenzMaxim 0:33d4e66780c0 391 }
IanBenzMaxim 0:33d4e66780c0 392 return false;
IanBenzMaxim 0:33d4e66780c0 393
IanBenzMaxim 0:33d4e66780c0 394 default:
IanBenzMaxim 0:33d4e66780c0 395 RAPIDJSON_ASSERT(op == kOneOrMore);
IanBenzMaxim 0:33d4e66780c0 396 if (operandStack.GetSize() >= sizeof(Frag)) {
IanBenzMaxim 0:33d4e66780c0 397 Frag e = *operandStack.template Pop<Frag>(1);
IanBenzMaxim 0:33d4e66780c0 398 SizeType s = NewState(kRegexInvalidState, e.start, 0);
IanBenzMaxim 0:33d4e66780c0 399 Patch(e.out, s);
IanBenzMaxim 0:33d4e66780c0 400 *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
IanBenzMaxim 0:33d4e66780c0 401 return true;
IanBenzMaxim 0:33d4e66780c0 402 }
IanBenzMaxim 0:33d4e66780c0 403 return false;
IanBenzMaxim 0:33d4e66780c0 404 }
IanBenzMaxim 0:33d4e66780c0 405 }
IanBenzMaxim 0:33d4e66780c0 406
IanBenzMaxim 0:33d4e66780c0 407 bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
IanBenzMaxim 0:33d4e66780c0 408 RAPIDJSON_ASSERT(n <= m);
IanBenzMaxim 0:33d4e66780c0 409 RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
IanBenzMaxim 0:33d4e66780c0 410
IanBenzMaxim 0:33d4e66780c0 411 if (n == 0) {
IanBenzMaxim 0:33d4e66780c0 412 if (m == 0) // a{0} not support
IanBenzMaxim 0:33d4e66780c0 413 return false;
IanBenzMaxim 0:33d4e66780c0 414 else if (m == kInfinityQuantifier)
IanBenzMaxim 0:33d4e66780c0 415 Eval(operandStack, kZeroOrMore); // a{0,} -> a*
IanBenzMaxim 0:33d4e66780c0 416 else {
IanBenzMaxim 0:33d4e66780c0 417 Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
IanBenzMaxim 0:33d4e66780c0 418 for (unsigned i = 0; i < m - 1; i++)
IanBenzMaxim 0:33d4e66780c0 419 CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
IanBenzMaxim 0:33d4e66780c0 420 for (unsigned i = 0; i < m - 1; i++)
IanBenzMaxim 0:33d4e66780c0 421 Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
IanBenzMaxim 0:33d4e66780c0 422 }
IanBenzMaxim 0:33d4e66780c0 423 return true;
IanBenzMaxim 0:33d4e66780c0 424 }
IanBenzMaxim 0:33d4e66780c0 425
IanBenzMaxim 0:33d4e66780c0 426 for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
IanBenzMaxim 0:33d4e66780c0 427 CloneTopOperand(operandStack);
IanBenzMaxim 0:33d4e66780c0 428
IanBenzMaxim 0:33d4e66780c0 429 if (m == kInfinityQuantifier)
IanBenzMaxim 0:33d4e66780c0 430 Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
IanBenzMaxim 0:33d4e66780c0 431 else if (m > n) {
IanBenzMaxim 0:33d4e66780c0 432 CloneTopOperand(operandStack); // a{3,5} -> a a a a
IanBenzMaxim 0:33d4e66780c0 433 Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
IanBenzMaxim 0:33d4e66780c0 434 for (unsigned i = n; i < m - 1; i++)
IanBenzMaxim 0:33d4e66780c0 435 CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
IanBenzMaxim 0:33d4e66780c0 436 for (unsigned i = n; i < m; i++)
IanBenzMaxim 0:33d4e66780c0 437 Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
IanBenzMaxim 0:33d4e66780c0 438 }
IanBenzMaxim 0:33d4e66780c0 439
IanBenzMaxim 0:33d4e66780c0 440 for (unsigned i = 0; i < n - 1; i++)
IanBenzMaxim 0:33d4e66780c0 441 Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
IanBenzMaxim 0:33d4e66780c0 442
IanBenzMaxim 0:33d4e66780c0 443 return true;
IanBenzMaxim 0:33d4e66780c0 444 }
IanBenzMaxim 0:33d4e66780c0 445
IanBenzMaxim 0:33d4e66780c0 446 static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
IanBenzMaxim 0:33d4e66780c0 447
IanBenzMaxim 0:33d4e66780c0 448 void CloneTopOperand(Stack<Allocator>& operandStack) {
IanBenzMaxim 0:33d4e66780c0 449 const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
IanBenzMaxim 0:33d4e66780c0 450 SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
IanBenzMaxim 0:33d4e66780c0 451 State* s = states_.template Push<State>(count);
IanBenzMaxim 0:33d4e66780c0 452 memcpy(s, &GetState(src.minIndex), count * sizeof(State));
IanBenzMaxim 0:33d4e66780c0 453 for (SizeType j = 0; j < count; j++) {
IanBenzMaxim 0:33d4e66780c0 454 if (s[j].out != kRegexInvalidState)
IanBenzMaxim 0:33d4e66780c0 455 s[j].out += count;
IanBenzMaxim 0:33d4e66780c0 456 if (s[j].out1 != kRegexInvalidState)
IanBenzMaxim 0:33d4e66780c0 457 s[j].out1 += count;
IanBenzMaxim 0:33d4e66780c0 458 }
IanBenzMaxim 0:33d4e66780c0 459 *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
IanBenzMaxim 0:33d4e66780c0 460 stateCount_ += count;
IanBenzMaxim 0:33d4e66780c0 461 }
IanBenzMaxim 0:33d4e66780c0 462
IanBenzMaxim 0:33d4e66780c0 463 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 464 bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
IanBenzMaxim 0:33d4e66780c0 465 unsigned r = 0;
IanBenzMaxim 0:33d4e66780c0 466 if (ds.Peek() < '0' || ds.Peek() > '9')
IanBenzMaxim 0:33d4e66780c0 467 return false;
IanBenzMaxim 0:33d4e66780c0 468 while (ds.Peek() >= '0' && ds.Peek() <= '9') {
IanBenzMaxim 0:33d4e66780c0 469 if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
IanBenzMaxim 0:33d4e66780c0 470 return false; // overflow
IanBenzMaxim 0:33d4e66780c0 471 r = r * 10 + (ds.Take() - '0');
IanBenzMaxim 0:33d4e66780c0 472 }
IanBenzMaxim 0:33d4e66780c0 473 *u = r;
IanBenzMaxim 0:33d4e66780c0 474 return true;
IanBenzMaxim 0:33d4e66780c0 475 }
IanBenzMaxim 0:33d4e66780c0 476
IanBenzMaxim 0:33d4e66780c0 477 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 478 bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
IanBenzMaxim 0:33d4e66780c0 479 bool isBegin = true;
IanBenzMaxim 0:33d4e66780c0 480 bool negate = false;
IanBenzMaxim 0:33d4e66780c0 481 int step = 0;
IanBenzMaxim 0:33d4e66780c0 482 SizeType start = kRegexInvalidRange;
IanBenzMaxim 0:33d4e66780c0 483 SizeType current = kRegexInvalidRange;
IanBenzMaxim 0:33d4e66780c0 484 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 485 while ((codepoint = ds.Take()) != 0) {
IanBenzMaxim 0:33d4e66780c0 486 if (isBegin) {
IanBenzMaxim 0:33d4e66780c0 487 isBegin = false;
IanBenzMaxim 0:33d4e66780c0 488 if (codepoint == '^') {
IanBenzMaxim 0:33d4e66780c0 489 negate = true;
IanBenzMaxim 0:33d4e66780c0 490 continue;
IanBenzMaxim 0:33d4e66780c0 491 }
IanBenzMaxim 0:33d4e66780c0 492 }
IanBenzMaxim 0:33d4e66780c0 493
IanBenzMaxim 0:33d4e66780c0 494 switch (codepoint) {
IanBenzMaxim 0:33d4e66780c0 495 case ']':
IanBenzMaxim 0:33d4e66780c0 496 if (start == kRegexInvalidRange)
IanBenzMaxim 0:33d4e66780c0 497 return false; // Error: nothing inside []
IanBenzMaxim 0:33d4e66780c0 498 if (step == 2) { // Add trailing '-'
IanBenzMaxim 0:33d4e66780c0 499 SizeType r = NewRange('-');
IanBenzMaxim 0:33d4e66780c0 500 RAPIDJSON_ASSERT(current != kRegexInvalidRange);
IanBenzMaxim 0:33d4e66780c0 501 GetRange(current).next = r;
IanBenzMaxim 0:33d4e66780c0 502 }
IanBenzMaxim 0:33d4e66780c0 503 if (negate)
IanBenzMaxim 0:33d4e66780c0 504 GetRange(start).start |= kRangeNegationFlag;
IanBenzMaxim 0:33d4e66780c0 505 *range = start;
IanBenzMaxim 0:33d4e66780c0 506 return true;
IanBenzMaxim 0:33d4e66780c0 507
IanBenzMaxim 0:33d4e66780c0 508 case '\\':
IanBenzMaxim 0:33d4e66780c0 509 if (ds.Peek() == 'b') {
IanBenzMaxim 0:33d4e66780c0 510 ds.Take();
IanBenzMaxim 0:33d4e66780c0 511 codepoint = 0x0008; // Escape backspace character
IanBenzMaxim 0:33d4e66780c0 512 }
IanBenzMaxim 0:33d4e66780c0 513 else if (!CharacterEscape(ds, &codepoint))
IanBenzMaxim 0:33d4e66780c0 514 return false;
IanBenzMaxim 0:33d4e66780c0 515 // fall through to default
IanBenzMaxim 0:33d4e66780c0 516
IanBenzMaxim 0:33d4e66780c0 517 default:
IanBenzMaxim 0:33d4e66780c0 518 switch (step) {
IanBenzMaxim 0:33d4e66780c0 519 case 1:
IanBenzMaxim 0:33d4e66780c0 520 if (codepoint == '-') {
IanBenzMaxim 0:33d4e66780c0 521 step++;
IanBenzMaxim 0:33d4e66780c0 522 break;
IanBenzMaxim 0:33d4e66780c0 523 }
IanBenzMaxim 0:33d4e66780c0 524 // fall through to step 0 for other characters
IanBenzMaxim 0:33d4e66780c0 525
IanBenzMaxim 0:33d4e66780c0 526 case 0:
IanBenzMaxim 0:33d4e66780c0 527 {
IanBenzMaxim 0:33d4e66780c0 528 SizeType r = NewRange(codepoint);
IanBenzMaxim 0:33d4e66780c0 529 if (current != kRegexInvalidRange)
IanBenzMaxim 0:33d4e66780c0 530 GetRange(current).next = r;
IanBenzMaxim 0:33d4e66780c0 531 if (start == kRegexInvalidRange)
IanBenzMaxim 0:33d4e66780c0 532 start = r;
IanBenzMaxim 0:33d4e66780c0 533 current = r;
IanBenzMaxim 0:33d4e66780c0 534 }
IanBenzMaxim 0:33d4e66780c0 535 step = 1;
IanBenzMaxim 0:33d4e66780c0 536 break;
IanBenzMaxim 0:33d4e66780c0 537
IanBenzMaxim 0:33d4e66780c0 538 default:
IanBenzMaxim 0:33d4e66780c0 539 RAPIDJSON_ASSERT(step == 2);
IanBenzMaxim 0:33d4e66780c0 540 GetRange(current).end = codepoint;
IanBenzMaxim 0:33d4e66780c0 541 step = 0;
IanBenzMaxim 0:33d4e66780c0 542 }
IanBenzMaxim 0:33d4e66780c0 543 }
IanBenzMaxim 0:33d4e66780c0 544 }
IanBenzMaxim 0:33d4e66780c0 545 return false;
IanBenzMaxim 0:33d4e66780c0 546 }
IanBenzMaxim 0:33d4e66780c0 547
IanBenzMaxim 0:33d4e66780c0 548 SizeType NewRange(unsigned codepoint) {
IanBenzMaxim 0:33d4e66780c0 549 Range* r = ranges_.template Push<Range>();
IanBenzMaxim 0:33d4e66780c0 550 r->start = r->end = codepoint;
IanBenzMaxim 0:33d4e66780c0 551 r->next = kRegexInvalidRange;
IanBenzMaxim 0:33d4e66780c0 552 return rangeCount_++;
IanBenzMaxim 0:33d4e66780c0 553 }
IanBenzMaxim 0:33d4e66780c0 554
IanBenzMaxim 0:33d4e66780c0 555 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 556 bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
IanBenzMaxim 0:33d4e66780c0 557 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 558 switch (codepoint = ds.Take()) {
IanBenzMaxim 0:33d4e66780c0 559 case '^':
IanBenzMaxim 0:33d4e66780c0 560 case '$':
IanBenzMaxim 0:33d4e66780c0 561 case '|':
IanBenzMaxim 0:33d4e66780c0 562 case '(':
IanBenzMaxim 0:33d4e66780c0 563 case ')':
IanBenzMaxim 0:33d4e66780c0 564 case '?':
IanBenzMaxim 0:33d4e66780c0 565 case '*':
IanBenzMaxim 0:33d4e66780c0 566 case '+':
IanBenzMaxim 0:33d4e66780c0 567 case '.':
IanBenzMaxim 0:33d4e66780c0 568 case '[':
IanBenzMaxim 0:33d4e66780c0 569 case ']':
IanBenzMaxim 0:33d4e66780c0 570 case '{':
IanBenzMaxim 0:33d4e66780c0 571 case '}':
IanBenzMaxim 0:33d4e66780c0 572 case '\\':
IanBenzMaxim 0:33d4e66780c0 573 *escapedCodepoint = codepoint; return true;
IanBenzMaxim 0:33d4e66780c0 574 case 'f': *escapedCodepoint = 0x000C; return true;
IanBenzMaxim 0:33d4e66780c0 575 case 'n': *escapedCodepoint = 0x000A; return true;
IanBenzMaxim 0:33d4e66780c0 576 case 'r': *escapedCodepoint = 0x000D; return true;
IanBenzMaxim 0:33d4e66780c0 577 case 't': *escapedCodepoint = 0x0009; return true;
IanBenzMaxim 0:33d4e66780c0 578 case 'v': *escapedCodepoint = 0x000B; return true;
IanBenzMaxim 0:33d4e66780c0 579 default:
IanBenzMaxim 0:33d4e66780c0 580 return false; // Unsupported escape character
IanBenzMaxim 0:33d4e66780c0 581 }
IanBenzMaxim 0:33d4e66780c0 582 }
IanBenzMaxim 0:33d4e66780c0 583
IanBenzMaxim 0:33d4e66780c0 584 Stack<Allocator> states_;
IanBenzMaxim 0:33d4e66780c0 585 Stack<Allocator> ranges_;
IanBenzMaxim 0:33d4e66780c0 586 SizeType root_;
IanBenzMaxim 0:33d4e66780c0 587 SizeType stateCount_;
IanBenzMaxim 0:33d4e66780c0 588 SizeType rangeCount_;
IanBenzMaxim 0:33d4e66780c0 589
IanBenzMaxim 0:33d4e66780c0 590 static const unsigned kInfinityQuantifier = ~0u;
IanBenzMaxim 0:33d4e66780c0 591
IanBenzMaxim 0:33d4e66780c0 592 // For SearchWithAnchoring()
IanBenzMaxim 0:33d4e66780c0 593 bool anchorBegin_;
IanBenzMaxim 0:33d4e66780c0 594 bool anchorEnd_;
IanBenzMaxim 0:33d4e66780c0 595 };
IanBenzMaxim 0:33d4e66780c0 596
IanBenzMaxim 0:33d4e66780c0 597 template <typename RegexType, typename Allocator = CrtAllocator>
IanBenzMaxim 0:33d4e66780c0 598 class GenericRegexSearch {
IanBenzMaxim 0:33d4e66780c0 599 public:
IanBenzMaxim 0:33d4e66780c0 600 typedef typename RegexType::EncodingType Encoding;
IanBenzMaxim 0:33d4e66780c0 601 typedef typename Encoding::Ch Ch;
IanBenzMaxim 0:33d4e66780c0 602
IanBenzMaxim 0:33d4e66780c0 603 GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) :
IanBenzMaxim 0:33d4e66780c0 604 regex_(regex), allocator_(allocator), ownAllocator_(0),
IanBenzMaxim 0:33d4e66780c0 605 state0_(allocator, 0), state1_(allocator, 0), stateSet_()
IanBenzMaxim 0:33d4e66780c0 606 {
IanBenzMaxim 0:33d4e66780c0 607 RAPIDJSON_ASSERT(regex_.IsValid());
IanBenzMaxim 0:33d4e66780c0 608 if (!allocator_)
IanBenzMaxim 0:33d4e66780c0 609 ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
IanBenzMaxim 0:33d4e66780c0 610 stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
IanBenzMaxim 0:33d4e66780c0 611 state0_.template Reserve<SizeType>(regex_.stateCount_);
IanBenzMaxim 0:33d4e66780c0 612 state1_.template Reserve<SizeType>(regex_.stateCount_);
IanBenzMaxim 0:33d4e66780c0 613 }
IanBenzMaxim 0:33d4e66780c0 614
IanBenzMaxim 0:33d4e66780c0 615 ~GenericRegexSearch() {
IanBenzMaxim 0:33d4e66780c0 616 Allocator::Free(stateSet_);
IanBenzMaxim 0:33d4e66780c0 617 RAPIDJSON_DELETE(ownAllocator_);
IanBenzMaxim 0:33d4e66780c0 618 }
IanBenzMaxim 0:33d4e66780c0 619
IanBenzMaxim 0:33d4e66780c0 620 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 621 bool Match(InputStream& is) {
IanBenzMaxim 0:33d4e66780c0 622 return SearchWithAnchoring(is, true, true);
IanBenzMaxim 0:33d4e66780c0 623 }
IanBenzMaxim 0:33d4e66780c0 624
IanBenzMaxim 0:33d4e66780c0 625 bool Match(const Ch* s) {
IanBenzMaxim 0:33d4e66780c0 626 GenericStringStream<Encoding> is(s);
IanBenzMaxim 0:33d4e66780c0 627 return Match(is);
IanBenzMaxim 0:33d4e66780c0 628 }
IanBenzMaxim 0:33d4e66780c0 629
IanBenzMaxim 0:33d4e66780c0 630 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 631 bool Search(InputStream& is) {
IanBenzMaxim 0:33d4e66780c0 632 return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
IanBenzMaxim 0:33d4e66780c0 633 }
IanBenzMaxim 0:33d4e66780c0 634
IanBenzMaxim 0:33d4e66780c0 635 bool Search(const Ch* s) {
IanBenzMaxim 0:33d4e66780c0 636 GenericStringStream<Encoding> is(s);
IanBenzMaxim 0:33d4e66780c0 637 return Search(is);
IanBenzMaxim 0:33d4e66780c0 638 }
IanBenzMaxim 0:33d4e66780c0 639
IanBenzMaxim 0:33d4e66780c0 640 private:
IanBenzMaxim 0:33d4e66780c0 641 typedef typename RegexType::State State;
IanBenzMaxim 0:33d4e66780c0 642 typedef typename RegexType::Range Range;
IanBenzMaxim 0:33d4e66780c0 643
IanBenzMaxim 0:33d4e66780c0 644 template <typename InputStream>
IanBenzMaxim 0:33d4e66780c0 645 bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
IanBenzMaxim 0:33d4e66780c0 646 DecodedStream<InputStream, Encoding> ds(is);
IanBenzMaxim 0:33d4e66780c0 647
IanBenzMaxim 0:33d4e66780c0 648 state0_.Clear();
IanBenzMaxim 0:33d4e66780c0 649 Stack<Allocator> *current = &state0_, *next = &state1_;
IanBenzMaxim 0:33d4e66780c0 650 const size_t stateSetSize = GetStateSetSize();
IanBenzMaxim 0:33d4e66780c0 651 std::memset(stateSet_, 0, stateSetSize);
IanBenzMaxim 0:33d4e66780c0 652
IanBenzMaxim 0:33d4e66780c0 653 bool matched = AddState(*current, regex_.root_);
IanBenzMaxim 0:33d4e66780c0 654 unsigned codepoint;
IanBenzMaxim 0:33d4e66780c0 655 while (!current->Empty() && (codepoint = ds.Take()) != 0) {
IanBenzMaxim 0:33d4e66780c0 656 std::memset(stateSet_, 0, stateSetSize);
IanBenzMaxim 0:33d4e66780c0 657 next->Clear();
IanBenzMaxim 0:33d4e66780c0 658 matched = false;
IanBenzMaxim 0:33d4e66780c0 659 for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
IanBenzMaxim 0:33d4e66780c0 660 const State& sr = regex_.GetState(*s);
IanBenzMaxim 0:33d4e66780c0 661 if (sr.codepoint == codepoint ||
IanBenzMaxim 0:33d4e66780c0 662 sr.codepoint == RegexType::kAnyCharacterClass ||
IanBenzMaxim 0:33d4e66780c0 663 (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
IanBenzMaxim 0:33d4e66780c0 664 {
IanBenzMaxim 0:33d4e66780c0 665 matched = AddState(*next, sr.out) || matched;
IanBenzMaxim 0:33d4e66780c0 666 if (!anchorEnd && matched)
IanBenzMaxim 0:33d4e66780c0 667 return true;
IanBenzMaxim 0:33d4e66780c0 668 }
IanBenzMaxim 0:33d4e66780c0 669 if (!anchorBegin)
IanBenzMaxim 0:33d4e66780c0 670 AddState(*next, regex_.root_);
IanBenzMaxim 0:33d4e66780c0 671 }
IanBenzMaxim 0:33d4e66780c0 672 internal::Swap(current, next);
IanBenzMaxim 0:33d4e66780c0 673 }
IanBenzMaxim 0:33d4e66780c0 674
IanBenzMaxim 0:33d4e66780c0 675 return matched;
IanBenzMaxim 0:33d4e66780c0 676 }
IanBenzMaxim 0:33d4e66780c0 677
IanBenzMaxim 0:33d4e66780c0 678 size_t GetStateSetSize() const {
IanBenzMaxim 0:33d4e66780c0 679 return (regex_.stateCount_ + 31) / 32 * 4;
IanBenzMaxim 0:33d4e66780c0 680 }
IanBenzMaxim 0:33d4e66780c0 681
IanBenzMaxim 0:33d4e66780c0 682 // Return whether the added states is a match state
IanBenzMaxim 0:33d4e66780c0 683 bool AddState(Stack<Allocator>& l, SizeType index) {
IanBenzMaxim 0:33d4e66780c0 684 RAPIDJSON_ASSERT(index != kRegexInvalidState);
IanBenzMaxim 0:33d4e66780c0 685
IanBenzMaxim 0:33d4e66780c0 686 const State& s = regex_.GetState(index);
IanBenzMaxim 0:33d4e66780c0 687 if (s.out1 != kRegexInvalidState) { // Split
IanBenzMaxim 0:33d4e66780c0 688 bool matched = AddState(l, s.out);
IanBenzMaxim 0:33d4e66780c0 689 return AddState(l, s.out1) || matched;
IanBenzMaxim 0:33d4e66780c0 690 }
IanBenzMaxim 0:33d4e66780c0 691 else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) {
IanBenzMaxim 0:33d4e66780c0 692 stateSet_[index >> 5] |= (1 << (index & 31));
IanBenzMaxim 0:33d4e66780c0 693 *l.template PushUnsafe<SizeType>() = index;
IanBenzMaxim 0:33d4e66780c0 694 }
IanBenzMaxim 0:33d4e66780c0 695 return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
IanBenzMaxim 0:33d4e66780c0 696 }
IanBenzMaxim 0:33d4e66780c0 697
IanBenzMaxim 0:33d4e66780c0 698 bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
IanBenzMaxim 0:33d4e66780c0 699 bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
IanBenzMaxim 0:33d4e66780c0 700 while (rangeIndex != kRegexInvalidRange) {
IanBenzMaxim 0:33d4e66780c0 701 const Range& r = regex_.GetRange(rangeIndex);
IanBenzMaxim 0:33d4e66780c0 702 if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
IanBenzMaxim 0:33d4e66780c0 703 return yes;
IanBenzMaxim 0:33d4e66780c0 704 rangeIndex = r.next;
IanBenzMaxim 0:33d4e66780c0 705 }
IanBenzMaxim 0:33d4e66780c0 706 return !yes;
IanBenzMaxim 0:33d4e66780c0 707 }
IanBenzMaxim 0:33d4e66780c0 708
IanBenzMaxim 0:33d4e66780c0 709 const RegexType& regex_;
IanBenzMaxim 0:33d4e66780c0 710 Allocator* allocator_;
IanBenzMaxim 0:33d4e66780c0 711 Allocator* ownAllocator_;
IanBenzMaxim 0:33d4e66780c0 712 Stack<Allocator> state0_;
IanBenzMaxim 0:33d4e66780c0 713 Stack<Allocator> state1_;
IanBenzMaxim 0:33d4e66780c0 714 uint32_t* stateSet_;
IanBenzMaxim 0:33d4e66780c0 715 };
IanBenzMaxim 0:33d4e66780c0 716
IanBenzMaxim 0:33d4e66780c0 717 typedef GenericRegex<UTF8<> > Regex;
IanBenzMaxim 0:33d4e66780c0 718 typedef GenericRegexSearch<Regex> RegexSearch;
IanBenzMaxim 0:33d4e66780c0 719
IanBenzMaxim 0:33d4e66780c0 720 } // namespace internal
IanBenzMaxim 0:33d4e66780c0 721 RAPIDJSON_NAMESPACE_END
IanBenzMaxim 0:33d4e66780c0 722
IanBenzMaxim 0:33d4e66780c0 723 #ifdef __clang__
IanBenzMaxim 0:33d4e66780c0 724 RAPIDJSON_DIAG_POP
IanBenzMaxim 0:33d4e66780c0 725 #endif
IanBenzMaxim 0:33d4e66780c0 726
IanBenzMaxim 0:33d4e66780c0 727 #ifdef _MSC_VER
IanBenzMaxim 0:33d4e66780c0 728 RAPIDJSON_DIAG_POP
IanBenzMaxim 0:33d4e66780c0 729 #endif
IanBenzMaxim 0:33d4e66780c0 730
IanBenzMaxim 0:33d4e66780c0 731 #endif // RAPIDJSON_INTERNAL_REGEX_H_