DeepCover Embedded Security in IoT: Public-key Secured Data Paths
Dependencies: MaximInterface
encodings.h
00001 // Tencent is pleased to support the open source community by making RapidJSON available. 00002 // 00003 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 00004 // 00005 // Licensed under the MIT License (the "License"); you may not use this file except 00006 // in compliance with the License. You may obtain a copy of the License at 00007 // 00008 // http://opensource.org/licenses/MIT 00009 // 00010 // Unless required by applicable law or agreed to in writing, software distributed 00011 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 00012 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 00013 // specific language governing permissions and limitations under the License. 00014 00015 #ifndef RAPIDJSON_ENCODINGS_H_ 00016 #define RAPIDJSON_ENCODINGS_H_ 00017 00018 #include "rapidjson.h" 00019 00020 #ifdef _MSC_VER 00021 RAPIDJSON_DIAG_PUSH 00022 RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data 00023 RAPIDJSON_DIAG_OFF(4702) // unreachable code 00024 #elif defined(__GNUC__) 00025 RAPIDJSON_DIAG_PUSH 00026 RAPIDJSON_DIAG_OFF(effc++) 00027 RAPIDJSON_DIAG_OFF(overflow) 00028 #endif 00029 00030 RAPIDJSON_NAMESPACE_BEGIN 00031 00032 /////////////////////////////////////////////////////////////////////////////// 00033 // Encoding 00034 00035 /*! \class rapidjson::Encoding 00036 \brief Concept for encoding of Unicode characters. 00037 00038 \code 00039 concept Encoding { 00040 typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. 00041 00042 enum { supportUnicode = 1 }; // or 0 if not supporting unicode 00043 00044 //! \brief Encode a Unicode codepoint to an output stream. 00045 //! \param os Output stream. 00046 //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. 00047 template<typename OutputStream> 00048 static void Encode(OutputStream& os, unsigned codepoint); 00049 00050 //! \brief Decode a Unicode codepoint from an input stream. 00051 //! \param is Input stream. 00052 //! \param codepoint Output of the unicode codepoint. 00053 //! \return true if a valid codepoint can be decoded from the stream. 00054 template <typename InputStream> 00055 static bool Decode(InputStream& is, unsigned* codepoint); 00056 00057 //! \brief Validate one Unicode codepoint from an encoded stream. 00058 //! \param is Input stream to obtain codepoint. 00059 //! \param os Output for copying one codepoint. 00060 //! \return true if it is valid. 00061 //! \note This function just validating and copying the codepoint without actually decode it. 00062 template <typename InputStream, typename OutputStream> 00063 static bool Validate(InputStream& is, OutputStream& os); 00064 00065 // The following functions are deal with byte streams. 00066 00067 //! Take a character from input byte stream, skip BOM if exist. 00068 template <typename InputByteStream> 00069 static CharType TakeBOM(InputByteStream& is); 00070 00071 //! Take a character from input byte stream. 00072 template <typename InputByteStream> 00073 static Ch Take(InputByteStream& is); 00074 00075 //! Put BOM to output byte stream. 00076 template <typename OutputByteStream> 00077 static void PutBOM(OutputByteStream& os); 00078 00079 //! Put a character to output byte stream. 00080 template <typename OutputByteStream> 00081 static void Put(OutputByteStream& os, Ch c); 00082 }; 00083 \endcode 00084 */ 00085 00086 /////////////////////////////////////////////////////////////////////////////// 00087 // UTF8 00088 00089 //! UTF-8 encoding. 00090 /*! http://en.wikipedia.org/wiki/UTF-8 00091 http://tools.ietf.org/html/rfc3629 00092 \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. 00093 \note implements Encoding concept 00094 */ 00095 template<typename CharType = char> 00096 struct UTF8 { 00097 typedef CharType Ch; 00098 00099 enum { supportUnicode = 1 }; 00100 00101 template<typename OutputStream> 00102 static void Encode(OutputStream& os, unsigned codepoint) { 00103 if (codepoint <= 0x7F) 00104 os.Put(static_cast<Ch>(codepoint & 0xFF)); 00105 else if (codepoint <= 0x7FF) { 00106 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); 00107 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); 00108 } 00109 else if (codepoint <= 0xFFFF) { 00110 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); 00111 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 00112 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); 00113 } 00114 else { 00115 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 00116 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); 00117 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); 00118 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 00119 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); 00120 } 00121 } 00122 00123 template<typename OutputStream> 00124 static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 00125 if (codepoint <= 0x7F) 00126 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); 00127 else if (codepoint <= 0x7FF) { 00128 PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); 00129 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); 00130 } 00131 else if (codepoint <= 0xFFFF) { 00132 PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); 00133 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 00134 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); 00135 } 00136 else { 00137 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 00138 PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); 00139 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); 00140 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 00141 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); 00142 } 00143 } 00144 00145 template <typename InputStream> 00146 static bool Decode(InputStream& is, unsigned* codepoint) { 00147 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu) 00148 #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 00149 #define TAIL() COPY(); TRANS(0x70) 00150 typename InputStream::Ch c = is.Take(); 00151 if (!(c & 0x80)) { 00152 *codepoint = static_cast<unsigned char>(c); 00153 return true; 00154 } 00155 00156 unsigned char type = GetRange(static_cast<unsigned char>(c)); 00157 if (type >= 32) { 00158 *codepoint = 0; 00159 } else { 00160 *codepoint = (0xFF >> type) & static_cast<unsigned char>(c); 00161 } 00162 bool result = true; 00163 switch (type) { 00164 case 2: TAIL(); return result; 00165 case 3: TAIL(); TAIL(); return result; 00166 case 4: COPY(); TRANS(0x50); TAIL(); return result; 00167 case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; 00168 case 6: TAIL(); TAIL(); TAIL(); return result; 00169 case 10: COPY(); TRANS(0x20); TAIL(); return result; 00170 case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; 00171 default: return false; 00172 } 00173 #undef COPY 00174 #undef TRANS 00175 #undef TAIL 00176 } 00177 00178 template <typename InputStream, typename OutputStream> 00179 static bool Validate(InputStream& is, OutputStream& os) { 00180 #define COPY() os.Put(c = is.Take()) 00181 #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 00182 #define TAIL() COPY(); TRANS(0x70) 00183 Ch c; 00184 COPY(); 00185 if (!(c & 0x80)) 00186 return true; 00187 00188 bool result = true; 00189 switch (GetRange(static_cast<unsigned char>(c))) { 00190 case 2: TAIL(); return result; 00191 case 3: TAIL(); TAIL(); return result; 00192 case 4: COPY(); TRANS(0x50); TAIL(); return result; 00193 case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; 00194 case 6: TAIL(); TAIL(); TAIL(); return result; 00195 case 10: COPY(); TRANS(0x20); TAIL(); return result; 00196 case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; 00197 default: return false; 00198 } 00199 #undef COPY 00200 #undef TRANS 00201 #undef TAIL 00202 } 00203 00204 static unsigned char GetRange(unsigned char c) { 00205 // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ 00206 // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. 00207 static const unsigned char type[] = { 00208 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 00209 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 00210 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 00211 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 00212 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, 00213 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, 00214 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, 00215 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, 00216 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 00217 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, 00218 }; 00219 return type[c]; 00220 } 00221 00222 template <typename InputByteStream> 00223 static CharType TakeBOM(InputByteStream& is) { 00224 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00225 typename InputByteStream::Ch c = Take(is); 00226 if (static_cast<unsigned char>(c) != 0xEFu) return c; 00227 c = is.Take(); 00228 if (static_cast<unsigned char>(c) != 0xBBu) return c; 00229 c = is.Take(); 00230 if (static_cast<unsigned char>(c) != 0xBFu) return c; 00231 c = is.Take(); 00232 return c; 00233 } 00234 00235 template <typename InputByteStream> 00236 static Ch Take(InputByteStream& is) { 00237 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00238 return static_cast<Ch>(is.Take()); 00239 } 00240 00241 template <typename OutputByteStream> 00242 static void PutBOM(OutputByteStream& os) { 00243 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00244 os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu)); 00245 os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu)); 00246 os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu)); 00247 } 00248 00249 template <typename OutputByteStream> 00250 static void Put(OutputByteStream& os, Ch c) { 00251 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00252 os.Put(static_cast<typename OutputByteStream::Ch>(c)); 00253 } 00254 }; 00255 00256 /////////////////////////////////////////////////////////////////////////////// 00257 // UTF16 00258 00259 //! UTF-16 encoding. 00260 /*! http://en.wikipedia.org/wiki/UTF-16 00261 http://tools.ietf.org/html/rfc2781 00262 \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. 00263 \note implements Encoding concept 00264 00265 \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. 00266 For streaming, use UTF16LE and UTF16BE, which handle endianness. 00267 */ 00268 template<typename CharType = wchar_t> 00269 struct UTF16 { 00270 typedef CharType Ch; 00271 RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); 00272 00273 enum { supportUnicode = 1 }; 00274 00275 template<typename OutputStream> 00276 static void Encode(OutputStream& os, unsigned codepoint) { 00277 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); 00278 if (codepoint <= 0xFFFF) { 00279 RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 00280 os.Put(static_cast<typename OutputStream::Ch>(codepoint)); 00281 } 00282 else { 00283 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 00284 unsigned v = codepoint - 0x10000; 00285 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); 00286 os.Put((v & 0x3FF) | 0xDC00); 00287 } 00288 } 00289 00290 00291 template<typename OutputStream> 00292 static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 00293 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); 00294 if (codepoint <= 0xFFFF) { 00295 RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 00296 PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint)); 00297 } 00298 else { 00299 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 00300 unsigned v = codepoint - 0x10000; 00301 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); 00302 PutUnsafe(os, (v & 0x3FF) | 0xDC00); 00303 } 00304 } 00305 00306 template <typename InputStream> 00307 static bool Decode(InputStream& is, unsigned* codepoint) { 00308 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); 00309 typename InputStream::Ch c = is.Take(); 00310 if (c < 0xD800 || c > 0xDFFF) { 00311 *codepoint = static_cast<unsigned>(c); 00312 return true; 00313 } 00314 else if (c <= 0xDBFF) { 00315 *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10; 00316 c = is.Take(); 00317 *codepoint |= (static_cast<unsigned>(c) & 0x3FF); 00318 *codepoint += 0x10000; 00319 return c >= 0xDC00 && c <= 0xDFFF; 00320 } 00321 return false; 00322 } 00323 00324 template <typename InputStream, typename OutputStream> 00325 static bool Validate(InputStream& is, OutputStream& os) { 00326 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); 00327 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); 00328 typename InputStream::Ch c; 00329 os.Put(static_cast<typename OutputStream::Ch>(c = is.Take())); 00330 if (c < 0xD800 || c > 0xDFFF) 00331 return true; 00332 else if (c <= 0xDBFF) { 00333 os.Put(c = is.Take()); 00334 return c >= 0xDC00 && c <= 0xDFFF; 00335 } 00336 return false; 00337 } 00338 }; 00339 00340 //! UTF-16 little endian encoding. 00341 template<typename CharType = wchar_t> 00342 struct UTF16LE : UTF16<CharType> { 00343 template <typename InputByteStream> 00344 static CharType TakeBOM(InputByteStream& is) { 00345 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00346 CharType c = Take(is); 00347 return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; 00348 } 00349 00350 template <typename InputByteStream> 00351 static CharType Take(InputByteStream& is) { 00352 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00353 unsigned c = static_cast<uint8_t>(is.Take()); 00354 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; 00355 return static_cast<CharType>(c); 00356 } 00357 00358 template <typename OutputByteStream> 00359 static void PutBOM(OutputByteStream& os) { 00360 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00361 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); 00362 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); 00363 } 00364 00365 template <typename OutputByteStream> 00366 static void Put(OutputByteStream& os, CharType c) { 00367 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00368 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); 00369 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); 00370 } 00371 }; 00372 00373 //! UTF-16 big endian encoding. 00374 template<typename CharType = wchar_t> 00375 struct UTF16BE : UTF16<CharType> { 00376 template <typename InputByteStream> 00377 static CharType TakeBOM(InputByteStream& is) { 00378 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00379 CharType c = Take(is); 00380 return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; 00381 } 00382 00383 template <typename InputByteStream> 00384 static CharType Take(InputByteStream& is) { 00385 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00386 unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; 00387 c |= static_cast<uint8_t>(is.Take()); 00388 return static_cast<CharType>(c); 00389 } 00390 00391 template <typename OutputByteStream> 00392 static void PutBOM(OutputByteStream& os) { 00393 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00394 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); 00395 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); 00396 } 00397 00398 template <typename OutputByteStream> 00399 static void Put(OutputByteStream& os, CharType c) { 00400 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00401 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); 00402 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); 00403 } 00404 }; 00405 00406 /////////////////////////////////////////////////////////////////////////////// 00407 // UTF32 00408 00409 //! UTF-32 encoding. 00410 /*! http://en.wikipedia.org/wiki/UTF-32 00411 \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. 00412 \note implements Encoding concept 00413 00414 \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. 00415 For streaming, use UTF32LE and UTF32BE, which handle endianness. 00416 */ 00417 template<typename CharType = unsigned> 00418 struct UTF32 { 00419 typedef CharType Ch; 00420 RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); 00421 00422 enum { supportUnicode = 1 }; 00423 00424 template<typename OutputStream> 00425 static void Encode(OutputStream& os, unsigned codepoint) { 00426 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); 00427 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 00428 os.Put(codepoint); 00429 } 00430 00431 template<typename OutputStream> 00432 static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 00433 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); 00434 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 00435 PutUnsafe(os, codepoint); 00436 } 00437 00438 template <typename InputStream> 00439 static bool Decode(InputStream& is, unsigned* codepoint) { 00440 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); 00441 Ch c = is.Take(); 00442 *codepoint = c; 00443 return c <= 0x10FFFF; 00444 } 00445 00446 template <typename InputStream, typename OutputStream> 00447 static bool Validate(InputStream& is, OutputStream& os) { 00448 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); 00449 Ch c; 00450 os.Put(c = is.Take()); 00451 return c <= 0x10FFFF; 00452 } 00453 }; 00454 00455 //! UTF-32 little endian enocoding. 00456 template<typename CharType = unsigned> 00457 struct UTF32LE : UTF32<CharType> { 00458 template <typename InputByteStream> 00459 static CharType TakeBOM(InputByteStream& is) { 00460 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00461 CharType c = Take(is); 00462 return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 00463 } 00464 00465 template <typename InputByteStream> 00466 static CharType Take(InputByteStream& is) { 00467 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00468 unsigned c = static_cast<uint8_t>(is.Take()); 00469 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; 00470 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; 00471 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; 00472 return static_cast<CharType>(c); 00473 } 00474 00475 template <typename OutputByteStream> 00476 static void PutBOM(OutputByteStream& os) { 00477 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00478 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); 00479 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); 00480 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); 00481 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); 00482 } 00483 00484 template <typename OutputByteStream> 00485 static void Put(OutputByteStream& os, CharType c) { 00486 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00487 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); 00488 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); 00489 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); 00490 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); 00491 } 00492 }; 00493 00494 //! UTF-32 big endian encoding. 00495 template<typename CharType = unsigned> 00496 struct UTF32BE : UTF32<CharType> { 00497 template <typename InputByteStream> 00498 static CharType TakeBOM(InputByteStream& is) { 00499 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00500 CharType c = Take(is); 00501 return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 00502 } 00503 00504 template <typename InputByteStream> 00505 static CharType Take(InputByteStream& is) { 00506 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00507 unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; 00508 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; 00509 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; 00510 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); 00511 return static_cast<CharType>(c); 00512 } 00513 00514 template <typename OutputByteStream> 00515 static void PutBOM(OutputByteStream& os) { 00516 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00517 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); 00518 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); 00519 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); 00520 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); 00521 } 00522 00523 template <typename OutputByteStream> 00524 static void Put(OutputByteStream& os, CharType c) { 00525 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00526 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); 00527 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); 00528 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); 00529 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); 00530 } 00531 }; 00532 00533 /////////////////////////////////////////////////////////////////////////////// 00534 // ASCII 00535 00536 //! ASCII encoding. 00537 /*! http://en.wikipedia.org/wiki/ASCII 00538 \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. 00539 \note implements Encoding concept 00540 */ 00541 template<typename CharType = char> 00542 struct ASCII { 00543 typedef CharType Ch; 00544 00545 enum { supportUnicode = 0 }; 00546 00547 template<typename OutputStream> 00548 static void Encode(OutputStream& os, unsigned codepoint) { 00549 RAPIDJSON_ASSERT(codepoint <= 0x7F); 00550 os.Put(static_cast<Ch>(codepoint & 0xFF)); 00551 } 00552 00553 template<typename OutputStream> 00554 static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 00555 RAPIDJSON_ASSERT(codepoint <= 0x7F); 00556 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); 00557 } 00558 00559 template <typename InputStream> 00560 static bool Decode(InputStream& is, unsigned* codepoint) { 00561 uint8_t c = static_cast<uint8_t>(is.Take()); 00562 *codepoint = c; 00563 return c <= 0X7F; 00564 } 00565 00566 template <typename InputStream, typename OutputStream> 00567 static bool Validate(InputStream& is, OutputStream& os) { 00568 uint8_t c = static_cast<uint8_t>(is.Take()); 00569 os.Put(static_cast<typename OutputStream::Ch>(c)); 00570 return c <= 0x7F; 00571 } 00572 00573 template <typename InputByteStream> 00574 static CharType TakeBOM(InputByteStream& is) { 00575 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00576 uint8_t c = static_cast<uint8_t>(Take(is)); 00577 return static_cast<Ch>(c); 00578 } 00579 00580 template <typename InputByteStream> 00581 static Ch Take(InputByteStream& is) { 00582 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 00583 return static_cast<Ch>(is.Take()); 00584 } 00585 00586 template <typename OutputByteStream> 00587 static void PutBOM(OutputByteStream& os) { 00588 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00589 (void)os; 00590 } 00591 00592 template <typename OutputByteStream> 00593 static void Put(OutputByteStream& os, Ch c) { 00594 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 00595 os.Put(static_cast<typename OutputByteStream::Ch>(c)); 00596 } 00597 }; 00598 00599 /////////////////////////////////////////////////////////////////////////////// 00600 // AutoUTF 00601 00602 //! Runtime-specified UTF encoding type of a stream. 00603 enum UTFType { 00604 kUTF8 = 0, //!< UTF-8. 00605 kUTF16LE = 1, //!< UTF-16 little endian. 00606 kUTF16BE = 2, //!< UTF-16 big endian. 00607 kUTF32LE = 3, //!< UTF-32 little endian. 00608 kUTF32BE = 4 //!< UTF-32 big endian. 00609 }; 00610 00611 //! Dynamically select encoding according to stream's runtime-specified UTF encoding type. 00612 /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType(). 00613 */ 00614 template<typename CharType> 00615 struct AutoUTF { 00616 typedef CharType Ch; 00617 00618 enum { supportUnicode = 1 }; 00619 00620 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x 00621 00622 template<typename OutputStream> 00623 RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) { 00624 typedef void (*EncodeFunc)(OutputStream&, unsigned); 00625 static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; 00626 (*f[os.GetType()])(os, codepoint); 00627 } 00628 00629 template<typename OutputStream> 00630 RAPIDJSON_FORCEINLINE static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { 00631 typedef void (*EncodeFunc)(OutputStream&, unsigned); 00632 static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) }; 00633 (*f[os.GetType()])(os, codepoint); 00634 } 00635 00636 template <typename InputStream> 00637 RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { 00638 typedef bool (*DecodeFunc)(InputStream&, unsigned*); 00639 static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; 00640 return (*f[is.GetType()])(is, codepoint); 00641 } 00642 00643 template <typename InputStream, typename OutputStream> 00644 RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { 00645 typedef bool (*ValidateFunc)(InputStream&, OutputStream&); 00646 static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; 00647 return (*f[is.GetType()])(is, os); 00648 } 00649 00650 #undef RAPIDJSON_ENCODINGS_FUNC 00651 }; 00652 00653 /////////////////////////////////////////////////////////////////////////////// 00654 // Transcoder 00655 00656 //! Encoding conversion. 00657 template<typename SourceEncoding, typename TargetEncoding> 00658 struct Transcoder { 00659 //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. 00660 template<typename InputStream, typename OutputStream> 00661 RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { 00662 unsigned codepoint; 00663 if (!SourceEncoding::Decode(is, &codepoint)) 00664 return false; 00665 TargetEncoding::Encode(os, codepoint); 00666 return true; 00667 } 00668 00669 template<typename InputStream, typename OutputStream> 00670 RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { 00671 unsigned codepoint; 00672 if (!SourceEncoding::Decode(is, &codepoint)) 00673 return false; 00674 TargetEncoding::EncodeUnsafe(os, codepoint); 00675 return true; 00676 } 00677 00678 //! Validate one Unicode codepoint from an encoded stream. 00679 template<typename InputStream, typename OutputStream> 00680 RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { 00681 return Transcode(is, os); // Since source/target encoding is different, must transcode. 00682 } 00683 }; 00684 00685 // Forward declaration. 00686 template<typename Stream> 00687 inline void PutUnsafe(Stream& stream, typename Stream::Ch c); 00688 00689 //! Specialization of Transcoder with same source and target encoding. 00690 template<typename Encoding> 00691 struct Transcoder<Encoding, Encoding> { 00692 template<typename InputStream, typename OutputStream> 00693 RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { 00694 os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class. 00695 return true; 00696 } 00697 00698 template<typename InputStream, typename OutputStream> 00699 RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { 00700 PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from primary template class. 00701 return true; 00702 } 00703 00704 template<typename InputStream, typename OutputStream> 00705 RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { 00706 return Encoding::Validate(is, os); // source/target encoding are the same 00707 } 00708 }; 00709 00710 RAPIDJSON_NAMESPACE_END 00711 00712 #if defined(__GNUC__) || defined(_MSC_VER) 00713 RAPIDJSON_DIAG_POP 00714 #endif 00715 00716 #endif // RAPIDJSON_ENCODINGS_H_
Generated on Tue Jul 12 2022 12:06:48 by 1.7.2