DeepCover Embedded Security in IoT: Public-key Secured Data Paths

Dependencies:   MaximInterface

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers encodings.h Source File

encodings.h

00001 // Tencent is pleased to support the open source community by making RapidJSON available.
00002 // 
00003 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
00004 //
00005 // Licensed under the MIT License (the "License"); you may not use this file except
00006 // in compliance with the License. You may obtain a copy of the License at
00007 //
00008 // http://opensource.org/licenses/MIT
00009 //
00010 // Unless required by applicable law or agreed to in writing, software distributed 
00011 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
00012 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
00013 // specific language governing permissions and limitations under the License.
00014 
00015 #ifndef RAPIDJSON_ENCODINGS_H_
00016 #define RAPIDJSON_ENCODINGS_H_
00017 
00018 #include "rapidjson.h"
00019 
00020 #ifdef _MSC_VER
00021 RAPIDJSON_DIAG_PUSH
00022 RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
00023 RAPIDJSON_DIAG_OFF(4702)  // unreachable code
00024 #elif defined(__GNUC__)
00025 RAPIDJSON_DIAG_PUSH
00026 RAPIDJSON_DIAG_OFF(effc++)
00027 RAPIDJSON_DIAG_OFF(overflow)
00028 #endif
00029 
00030 RAPIDJSON_NAMESPACE_BEGIN
00031 
00032 ///////////////////////////////////////////////////////////////////////////////
00033 // Encoding
00034 
00035 /*! \class rapidjson::Encoding
00036     \brief Concept for encoding of Unicode characters.
00037 
00038 \code
00039 concept Encoding {
00040     typename Ch;    //! Type of character. A "character" is actually a code unit in unicode's definition.
00041 
00042     enum { supportUnicode = 1 }; // or 0 if not supporting unicode
00043 
00044     //! \brief Encode a Unicode codepoint to an output stream.
00045     //! \param os Output stream.
00046     //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
00047     template<typename OutputStream>
00048     static void Encode(OutputStream& os, unsigned codepoint);
00049 
00050     //! \brief Decode a Unicode codepoint from an input stream.
00051     //! \param is Input stream.
00052     //! \param codepoint Output of the unicode codepoint.
00053     //! \return true if a valid codepoint can be decoded from the stream.
00054     template <typename InputStream>
00055     static bool Decode(InputStream& is, unsigned* codepoint);
00056 
00057     //! \brief Validate one Unicode codepoint from an encoded stream.
00058     //! \param is Input stream to obtain codepoint.
00059     //! \param os Output for copying one codepoint.
00060     //! \return true if it is valid.
00061     //! \note This function just validating and copying the codepoint without actually decode it.
00062     template <typename InputStream, typename OutputStream>
00063     static bool Validate(InputStream& is, OutputStream& os);
00064 
00065     // The following functions are deal with byte streams.
00066 
00067     //! Take a character from input byte stream, skip BOM if exist.
00068     template <typename InputByteStream>
00069     static CharType TakeBOM(InputByteStream& is);
00070 
00071     //! Take a character from input byte stream.
00072     template <typename InputByteStream>
00073     static Ch Take(InputByteStream& is);
00074 
00075     //! Put BOM to output byte stream.
00076     template <typename OutputByteStream>
00077     static void PutBOM(OutputByteStream& os);
00078 
00079     //! Put a character to output byte stream.
00080     template <typename OutputByteStream>
00081     static void Put(OutputByteStream& os, Ch c);
00082 };
00083 \endcode
00084 */
00085 
00086 ///////////////////////////////////////////////////////////////////////////////
00087 // UTF8
00088 
00089 //! UTF-8 encoding.
00090 /*! http://en.wikipedia.org/wiki/UTF-8
00091     http://tools.ietf.org/html/rfc3629
00092     \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
00093     \note implements Encoding concept
00094 */
00095 template<typename CharType = char>
00096 struct UTF8 {
00097     typedef CharType Ch;
00098 
00099     enum { supportUnicode = 1 };
00100 
00101     template<typename OutputStream>
00102     static void Encode(OutputStream& os, unsigned codepoint) {
00103         if (codepoint <= 0x7F) 
00104             os.Put(static_cast<Ch>(codepoint & 0xFF));
00105         else if (codepoint <= 0x7FF) {
00106             os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
00107             os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
00108         }
00109         else if (codepoint <= 0xFFFF) {
00110             os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
00111             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
00112             os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
00113         }
00114         else {
00115             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
00116             os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
00117             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
00118             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
00119             os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
00120         }
00121     }
00122 
00123     template<typename OutputStream>
00124     static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
00125         if (codepoint <= 0x7F) 
00126             PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
00127         else if (codepoint <= 0x7FF) {
00128             PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
00129             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
00130         }
00131         else if (codepoint <= 0xFFFF) {
00132             PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
00133             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
00134             PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
00135         }
00136         else {
00137             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
00138             PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
00139             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
00140             PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
00141             PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
00142         }
00143     }
00144 
00145     template <typename InputStream>
00146     static bool Decode(InputStream& is, unsigned* codepoint) {
00147 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
00148 #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
00149 #define TAIL() COPY(); TRANS(0x70)
00150         typename InputStream::Ch c = is.Take();
00151         if (!(c & 0x80)) {
00152             *codepoint = static_cast<unsigned char>(c);
00153             return true;
00154         }
00155 
00156         unsigned char type = GetRange(static_cast<unsigned char>(c));
00157         if (type >= 32) {
00158             *codepoint = 0;
00159         } else {
00160             *codepoint = (0xFF >> type) & static_cast<unsigned char>(c);
00161         }
00162         bool result = true;
00163         switch (type) {
00164         case 2: TAIL(); return result;
00165         case 3: TAIL(); TAIL(); return result;
00166         case 4: COPY(); TRANS(0x50); TAIL(); return result;
00167         case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
00168         case 6: TAIL(); TAIL(); TAIL(); return result;
00169         case 10: COPY(); TRANS(0x20); TAIL(); return result;
00170         case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
00171         default: return false;
00172         }
00173 #undef COPY
00174 #undef TRANS
00175 #undef TAIL
00176     }
00177 
00178     template <typename InputStream, typename OutputStream>
00179     static bool Validate(InputStream& is, OutputStream& os) {
00180 #define COPY() os.Put(c = is.Take())
00181 #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
00182 #define TAIL() COPY(); TRANS(0x70)
00183         Ch c;
00184         COPY();
00185         if (!(c & 0x80))
00186             return true;
00187 
00188         bool result = true;
00189         switch (GetRange(static_cast<unsigned char>(c))) {
00190         case 2: TAIL(); return result;
00191         case 3: TAIL(); TAIL(); return result;
00192         case 4: COPY(); TRANS(0x50); TAIL(); return result;
00193         case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
00194         case 6: TAIL(); TAIL(); TAIL(); return result;
00195         case 10: COPY(); TRANS(0x20); TAIL(); return result;
00196         case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
00197         default: return false;
00198         }
00199 #undef COPY
00200 #undef TRANS
00201 #undef TAIL
00202     }
00203 
00204     static unsigned char GetRange(unsigned char c) {
00205         // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
00206         // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
00207         static const unsigned char type[] = {
00208             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00209             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00210             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00211             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00212             0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
00213             0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
00214             0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
00215             0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
00216             8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
00217             10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
00218         };
00219         return type[c];
00220     }
00221 
00222     template <typename InputByteStream>
00223     static CharType TakeBOM(InputByteStream& is) {
00224         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00225         typename InputByteStream::Ch c = Take(is);
00226         if (static_cast<unsigned char>(c) != 0xEFu) return c;
00227         c = is.Take();
00228         if (static_cast<unsigned char>(c) != 0xBBu) return c;
00229         c = is.Take();
00230         if (static_cast<unsigned char>(c) != 0xBFu) return c;
00231         c = is.Take();
00232         return c;
00233     }
00234 
00235     template <typename InputByteStream>
00236     static Ch Take(InputByteStream& is) {
00237         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00238         return static_cast<Ch>(is.Take());
00239     }
00240 
00241     template <typename OutputByteStream>
00242     static void PutBOM(OutputByteStream& os) {
00243         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00244         os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
00245         os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
00246         os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
00247     }
00248 
00249     template <typename OutputByteStream>
00250     static void Put(OutputByteStream& os, Ch c) {
00251         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00252         os.Put(static_cast<typename OutputByteStream::Ch>(c));
00253     }
00254 };
00255 
00256 ///////////////////////////////////////////////////////////////////////////////
00257 // UTF16
00258 
00259 //! UTF-16 encoding.
00260 /*! http://en.wikipedia.org/wiki/UTF-16
00261     http://tools.ietf.org/html/rfc2781
00262     \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
00263     \note implements Encoding concept
00264 
00265     \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
00266     For streaming, use UTF16LE and UTF16BE, which handle endianness.
00267 */
00268 template<typename CharType = wchar_t>
00269 struct UTF16 {
00270     typedef CharType Ch;
00271     RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
00272 
00273     enum { supportUnicode = 1 };
00274 
00275     template<typename OutputStream>
00276     static void Encode(OutputStream& os, unsigned codepoint) {
00277         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
00278         if (codepoint <= 0xFFFF) {
00279             RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
00280             os.Put(static_cast<typename OutputStream::Ch>(codepoint));
00281         }
00282         else {
00283             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
00284             unsigned v = codepoint - 0x10000;
00285             os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
00286             os.Put((v & 0x3FF) | 0xDC00);
00287         }
00288     }
00289 
00290 
00291     template<typename OutputStream>
00292     static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
00293         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
00294         if (codepoint <= 0xFFFF) {
00295             RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
00296             PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
00297         }
00298         else {
00299             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
00300             unsigned v = codepoint - 0x10000;
00301             PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
00302             PutUnsafe(os, (v & 0x3FF) | 0xDC00);
00303         }
00304     }
00305 
00306     template <typename InputStream>
00307     static bool Decode(InputStream& is, unsigned* codepoint) {
00308         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
00309         typename InputStream::Ch c = is.Take();
00310         if (c < 0xD800 || c > 0xDFFF) {
00311             *codepoint = static_cast<unsigned>(c);
00312             return true;
00313         }
00314         else if (c <= 0xDBFF) {
00315             *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10;
00316             c = is.Take();
00317             *codepoint |= (static_cast<unsigned>(c) & 0x3FF);
00318             *codepoint += 0x10000;
00319             return c >= 0xDC00 && c <= 0xDFFF;
00320         }
00321         return false;
00322     }
00323 
00324     template <typename InputStream, typename OutputStream>
00325     static bool Validate(InputStream& is, OutputStream& os) {
00326         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
00327         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
00328         typename InputStream::Ch c;
00329         os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
00330         if (c < 0xD800 || c > 0xDFFF)
00331             return true;
00332         else if (c <= 0xDBFF) {
00333             os.Put(c = is.Take());
00334             return c >= 0xDC00 && c <= 0xDFFF;
00335         }
00336         return false;
00337     }
00338 };
00339 
00340 //! UTF-16 little endian encoding.
00341 template<typename CharType = wchar_t>
00342 struct UTF16LE : UTF16<CharType> {
00343     template <typename InputByteStream>
00344     static CharType TakeBOM(InputByteStream& is) {
00345         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00346         CharType c = Take(is);
00347         return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
00348     }
00349 
00350     template <typename InputByteStream>
00351     static CharType Take(InputByteStream& is) {
00352         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00353         unsigned c = static_cast<uint8_t>(is.Take());
00354         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
00355         return static_cast<CharType>(c);
00356     }
00357 
00358     template <typename OutputByteStream>
00359     static void PutBOM(OutputByteStream& os) {
00360         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00361         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
00362         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
00363     }
00364 
00365     template <typename OutputByteStream>
00366     static void Put(OutputByteStream& os, CharType c) {
00367         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00368         os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
00369         os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
00370     }
00371 };
00372 
00373 //! UTF-16 big endian encoding.
00374 template<typename CharType = wchar_t>
00375 struct UTF16BE : UTF16<CharType> {
00376     template <typename InputByteStream>
00377     static CharType TakeBOM(InputByteStream& is) {
00378         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00379         CharType c = Take(is);
00380         return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
00381     }
00382 
00383     template <typename InputByteStream>
00384     static CharType Take(InputByteStream& is) {
00385         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00386         unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
00387         c |= static_cast<uint8_t>(is.Take());
00388         return static_cast<CharType>(c);
00389     }
00390 
00391     template <typename OutputByteStream>
00392     static void PutBOM(OutputByteStream& os) {
00393         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00394         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
00395         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
00396     }
00397 
00398     template <typename OutputByteStream>
00399     static void Put(OutputByteStream& os, CharType c) {
00400         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00401         os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
00402         os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
00403     }
00404 };
00405 
00406 ///////////////////////////////////////////////////////////////////////////////
00407 // UTF32
00408 
00409 //! UTF-32 encoding. 
00410 /*! http://en.wikipedia.org/wiki/UTF-32
00411     \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
00412     \note implements Encoding concept
00413 
00414     \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
00415     For streaming, use UTF32LE and UTF32BE, which handle endianness.
00416 */
00417 template<typename CharType = unsigned>
00418 struct UTF32 {
00419     typedef CharType Ch;
00420     RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
00421 
00422     enum { supportUnicode = 1 };
00423 
00424     template<typename OutputStream>
00425     static void Encode(OutputStream& os, unsigned codepoint) {
00426         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
00427         RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
00428         os.Put(codepoint);
00429     }
00430 
00431     template<typename OutputStream>
00432     static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
00433         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
00434         RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
00435         PutUnsafe(os, codepoint);
00436     }
00437 
00438     template <typename InputStream>
00439     static bool Decode(InputStream& is, unsigned* codepoint) {
00440         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
00441         Ch c = is.Take();
00442         *codepoint = c;
00443         return c <= 0x10FFFF;
00444     }
00445 
00446     template <typename InputStream, typename OutputStream>
00447     static bool Validate(InputStream& is, OutputStream& os) {
00448         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
00449         Ch c;
00450         os.Put(c = is.Take());
00451         return c <= 0x10FFFF;
00452     }
00453 };
00454 
00455 //! UTF-32 little endian enocoding.
00456 template<typename CharType = unsigned>
00457 struct UTF32LE : UTF32<CharType> {
00458     template <typename InputByteStream>
00459     static CharType TakeBOM(InputByteStream& is) {
00460         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00461         CharType c = Take(is);
00462         return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
00463     }
00464 
00465     template <typename InputByteStream>
00466     static CharType Take(InputByteStream& is) {
00467         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00468         unsigned c = static_cast<uint8_t>(is.Take());
00469         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
00470         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
00471         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
00472         return static_cast<CharType>(c);
00473     }
00474 
00475     template <typename OutputByteStream>
00476     static void PutBOM(OutputByteStream& os) {
00477         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00478         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
00479         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
00480         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
00481         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
00482     }
00483 
00484     template <typename OutputByteStream>
00485     static void Put(OutputByteStream& os, CharType c) {
00486         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00487         os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
00488         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
00489         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
00490         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
00491     }
00492 };
00493 
00494 //! UTF-32 big endian encoding.
00495 template<typename CharType = unsigned>
00496 struct UTF32BE : UTF32<CharType> {
00497     template <typename InputByteStream>
00498     static CharType TakeBOM(InputByteStream& is) {
00499         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00500         CharType c = Take(is);
00501         return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 
00502     }
00503 
00504     template <typename InputByteStream>
00505     static CharType Take(InputByteStream& is) {
00506         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00507         unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
00508         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
00509         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
00510         c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
00511         return static_cast<CharType>(c);
00512     }
00513 
00514     template <typename OutputByteStream>
00515     static void PutBOM(OutputByteStream& os) {
00516         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00517         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
00518         os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
00519         os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
00520         os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
00521     }
00522 
00523     template <typename OutputByteStream>
00524     static void Put(OutputByteStream& os, CharType c) {
00525         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00526         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
00527         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
00528         os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
00529         os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
00530     }
00531 };
00532 
00533 ///////////////////////////////////////////////////////////////////////////////
00534 // ASCII
00535 
00536 //! ASCII encoding.
00537 /*! http://en.wikipedia.org/wiki/ASCII
00538     \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
00539     \note implements Encoding concept
00540 */
00541 template<typename CharType = char>
00542 struct ASCII {
00543     typedef CharType Ch;
00544 
00545     enum { supportUnicode = 0 };
00546 
00547     template<typename OutputStream>
00548     static void Encode(OutputStream& os, unsigned codepoint) {
00549         RAPIDJSON_ASSERT(codepoint <= 0x7F);
00550         os.Put(static_cast<Ch>(codepoint & 0xFF));
00551     }
00552 
00553     template<typename OutputStream>
00554     static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
00555         RAPIDJSON_ASSERT(codepoint <= 0x7F);
00556         PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
00557     }
00558 
00559     template <typename InputStream>
00560     static bool Decode(InputStream& is, unsigned* codepoint) {
00561         uint8_t c = static_cast<uint8_t>(is.Take());
00562         *codepoint = c;
00563         return c <= 0X7F;
00564     }
00565 
00566     template <typename InputStream, typename OutputStream>
00567     static bool Validate(InputStream& is, OutputStream& os) {
00568         uint8_t c = static_cast<uint8_t>(is.Take());
00569         os.Put(static_cast<typename OutputStream::Ch>(c));
00570         return c <= 0x7F;
00571     }
00572 
00573     template <typename InputByteStream>
00574     static CharType TakeBOM(InputByteStream& is) {
00575         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00576         uint8_t c = static_cast<uint8_t>(Take(is));
00577         return static_cast<Ch>(c);
00578     }
00579 
00580     template <typename InputByteStream>
00581     static Ch Take(InputByteStream& is) {
00582         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
00583         return static_cast<Ch>(is.Take());
00584     }
00585 
00586     template <typename OutputByteStream>
00587     static void PutBOM(OutputByteStream& os) {
00588         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00589         (void)os;
00590     }
00591 
00592     template <typename OutputByteStream>
00593     static void Put(OutputByteStream& os, Ch c) {
00594         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
00595         os.Put(static_cast<typename OutputByteStream::Ch>(c));
00596     }
00597 };
00598 
00599 ///////////////////////////////////////////////////////////////////////////////
00600 // AutoUTF
00601 
00602 //! Runtime-specified UTF encoding type of a stream.
00603 enum UTFType {
00604     kUTF8 = 0,      //!< UTF-8.
00605     kUTF16LE = 1,   //!< UTF-16 little endian.
00606     kUTF16BE = 2,   //!< UTF-16 big endian.
00607     kUTF32LE = 3,   //!< UTF-32 little endian.
00608     kUTF32BE = 4    //!< UTF-32 big endian.
00609 };
00610 
00611 //! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
00612 /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
00613 */
00614 template<typename CharType>
00615 struct AutoUTF {
00616     typedef CharType Ch;
00617 
00618     enum { supportUnicode = 1 };
00619 
00620 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
00621 
00622     template<typename OutputStream>
00623     RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) {
00624         typedef void (*EncodeFunc)(OutputStream&, unsigned);
00625         static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
00626         (*f[os.GetType()])(os, codepoint);
00627     }
00628 
00629     template<typename OutputStream>
00630     RAPIDJSON_FORCEINLINE static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
00631         typedef void (*EncodeFunc)(OutputStream&, unsigned);
00632         static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) };
00633         (*f[os.GetType()])(os, codepoint);
00634     }
00635 
00636     template <typename InputStream>
00637     RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
00638         typedef bool (*DecodeFunc)(InputStream&, unsigned*);
00639         static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
00640         return (*f[is.GetType()])(is, codepoint);
00641     }
00642 
00643     template <typename InputStream, typename OutputStream>
00644     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
00645         typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
00646         static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
00647         return (*f[is.GetType()])(is, os);
00648     }
00649 
00650 #undef RAPIDJSON_ENCODINGS_FUNC
00651 };
00652 
00653 ///////////////////////////////////////////////////////////////////////////////
00654 // Transcoder
00655 
00656 //! Encoding conversion.
00657 template<typename SourceEncoding, typename TargetEncoding>
00658 struct Transcoder {
00659     //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
00660     template<typename InputStream, typename OutputStream>
00661     RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
00662         unsigned codepoint;
00663         if (!SourceEncoding::Decode(is, &codepoint))
00664             return false;
00665         TargetEncoding::Encode(os, codepoint);
00666         return true;
00667     }
00668 
00669     template<typename InputStream, typename OutputStream>
00670     RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
00671         unsigned codepoint;
00672         if (!SourceEncoding::Decode(is, &codepoint))
00673             return false;
00674         TargetEncoding::EncodeUnsafe(os, codepoint);
00675         return true;
00676     }
00677 
00678     //! Validate one Unicode codepoint from an encoded stream.
00679     template<typename InputStream, typename OutputStream>
00680     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
00681         return Transcode(is, os);   // Since source/target encoding is different, must transcode.
00682     }
00683 };
00684 
00685 // Forward declaration.
00686 template<typename Stream>
00687 inline void PutUnsafe(Stream& stream, typename Stream::Ch c);
00688 
00689 //! Specialization of Transcoder with same source and target encoding.
00690 template<typename Encoding>
00691 struct Transcoder<Encoding, Encoding> {
00692     template<typename InputStream, typename OutputStream>
00693     RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
00694         os.Put(is.Take());  // Just copy one code unit. This semantic is different from primary template class.
00695         return true;
00696     }
00697     
00698     template<typename InputStream, typename OutputStream>
00699     RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
00700         PutUnsafe(os, is.Take());  // Just copy one code unit. This semantic is different from primary template class.
00701         return true;
00702     }
00703     
00704     template<typename InputStream, typename OutputStream>
00705     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
00706         return Encoding::Validate(is, os);  // source/target encoding are the same
00707     }
00708 };
00709 
00710 RAPIDJSON_NAMESPACE_END
00711 
00712 #if defined(__GNUC__) || defined(_MSC_VER)
00713 RAPIDJSON_DIAG_POP
00714 #endif
00715 
00716 #endif // RAPIDJSON_ENCODINGS_H_