Mbed port of the Simple Plain Xml parser. See http://code.google.com/p/spxml/ for more details. This library uses less memory and is much better suited to streaming data than TinyXML (doesn\'t use as much C++ features, and especially works without streams). See http://mbed.org/users/hlipka/notebook/xml-parsing/ for usage examples.
Dependents: spxmltest_weather VFD_fontx2_weather weather_LCD_display News_LCD_display ... more
spxmlcodec.cpp@0:3fa97f2c0505, 2010-11-24 (annotated)
- Committer:
- hlipka
- Date:
- Wed Nov 24 20:52:14 2010 +0000
- Revision:
- 0:3fa97f2c0505
initial revision
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
hlipka | 0:3fa97f2c0505 | 1 | /* |
hlipka | 0:3fa97f2c0505 | 2 | * Copyright 2007 Stephen Liu |
hlipka | 0:3fa97f2c0505 | 3 | * LGPL, see http://code.google.com/p/spxml/ |
hlipka | 0:3fa97f2c0505 | 4 | * For license terms, see the file COPYING along with this library. |
hlipka | 0:3fa97f2c0505 | 5 | */ |
hlipka | 0:3fa97f2c0505 | 6 | |
hlipka | 0:3fa97f2c0505 | 7 | #include <string.h> |
hlipka | 0:3fa97f2c0505 | 8 | #include <stdlib.h> |
hlipka | 0:3fa97f2c0505 | 9 | #include <ctype.h> |
hlipka | 0:3fa97f2c0505 | 10 | |
hlipka | 0:3fa97f2c0505 | 11 | #include "spxmlcodec.hpp" |
hlipka | 0:3fa97f2c0505 | 12 | #include "spxmlutils.hpp" |
hlipka | 0:3fa97f2c0505 | 13 | |
hlipka | 0:3fa97f2c0505 | 14 | const char * SP_XmlStringCodec :: DEFAULT_ENCODING = "utf-8"; |
hlipka | 0:3fa97f2c0505 | 15 | |
hlipka | 0:3fa97f2c0505 | 16 | const char SP_XmlStringCodec :: XML_CHARS [] = |
hlipka | 0:3fa97f2c0505 | 17 | { '<', '>', '&', '\'', '"' }; |
hlipka | 0:3fa97f2c0505 | 18 | const char * SP_XmlStringCodec :: ESC_CHARS [] = |
hlipka | 0:3fa97f2c0505 | 19 | { "<", ">", "&", "'", """ }; |
hlipka | 0:3fa97f2c0505 | 20 | |
hlipka | 0:3fa97f2c0505 | 21 | int SP_XmlStringCodec :: decode( const char * encoding, const char * encodeValue, |
hlipka | 0:3fa97f2c0505 | 22 | SP_XmlStringBuffer * outBuffer ) |
hlipka | 0:3fa97f2c0505 | 23 | { |
hlipka | 0:3fa97f2c0505 | 24 | int isUtf8 = ( 0 == strcasecmp( encoding, "utf-8" ) ); |
hlipka | 0:3fa97f2c0505 | 25 | |
hlipka | 0:3fa97f2c0505 | 26 | const char * pos = encodeValue; |
hlipka | 0:3fa97f2c0505 | 27 | for( ; '\0' != *pos; ) { |
hlipka | 0:3fa97f2c0505 | 28 | if( '&' == *pos ) { |
hlipka | 0:3fa97f2c0505 | 29 | int index = -1; |
hlipka | 0:3fa97f2c0505 | 30 | int len = 0; |
hlipka | 0:3fa97f2c0505 | 31 | for( int i = 0; i < (int)( sizeof( ESC_CHARS ) / sizeof( ESC_CHARS[0] ) ); i++ ) { |
hlipka | 0:3fa97f2c0505 | 32 | len = strlen( ESC_CHARS[ i ] ); |
hlipka | 0:3fa97f2c0505 | 33 | if( 0 == strncmp( pos, ESC_CHARS[i], len ) ) { |
hlipka | 0:3fa97f2c0505 | 34 | index = i; |
hlipka | 0:3fa97f2c0505 | 35 | break; |
hlipka | 0:3fa97f2c0505 | 36 | } |
hlipka | 0:3fa97f2c0505 | 37 | } |
hlipka | 0:3fa97f2c0505 | 38 | if( index >= 0 ) { |
hlipka | 0:3fa97f2c0505 | 39 | outBuffer->append( XML_CHARS[ index ] ); |
hlipka | 0:3fa97f2c0505 | 40 | pos += len; |
hlipka | 0:3fa97f2c0505 | 41 | } else { |
hlipka | 0:3fa97f2c0505 | 42 | char * next = ""; |
hlipka | 0:3fa97f2c0505 | 43 | int ch = 0; |
hlipka | 0:3fa97f2c0505 | 44 | if( '#' == *( pos + 1 ) ) { |
hlipka | 0:3fa97f2c0505 | 45 | if( 'x' == *( pos + 2 ) ) { |
hlipka | 0:3fa97f2c0505 | 46 | ch = strtol( pos + 3, &next, 16 ); |
hlipka | 0:3fa97f2c0505 | 47 | } else { |
hlipka | 0:3fa97f2c0505 | 48 | ch = strtol( pos + 2, &next, 10 ); |
hlipka | 0:3fa97f2c0505 | 49 | } |
hlipka | 0:3fa97f2c0505 | 50 | } |
hlipka | 0:3fa97f2c0505 | 51 | |
hlipka | 0:3fa97f2c0505 | 52 | // TODO: fully support xml entity, currently only support unicode entity |
hlipka | 0:3fa97f2c0505 | 53 | if( ';' == *next && 0 != ch ) { |
hlipka | 0:3fa97f2c0505 | 54 | if( isUtf8 ) { |
hlipka | 0:3fa97f2c0505 | 55 | SP_XmlUtf8Codec::uni2utf8( ch, outBuffer ); |
hlipka | 0:3fa97f2c0505 | 56 | } else { |
hlipka | 0:3fa97f2c0505 | 57 | outBuffer->append( ch ); |
hlipka | 0:3fa97f2c0505 | 58 | } |
hlipka | 0:3fa97f2c0505 | 59 | pos = next + 1; |
hlipka | 0:3fa97f2c0505 | 60 | } else { |
hlipka | 0:3fa97f2c0505 | 61 | outBuffer->append( *pos++ ); |
hlipka | 0:3fa97f2c0505 | 62 | } |
hlipka | 0:3fa97f2c0505 | 63 | } |
hlipka | 0:3fa97f2c0505 | 64 | } else { |
hlipka | 0:3fa97f2c0505 | 65 | outBuffer->append( *pos++ ); |
hlipka | 0:3fa97f2c0505 | 66 | } |
hlipka | 0:3fa97f2c0505 | 67 | } |
hlipka | 0:3fa97f2c0505 | 68 | |
hlipka | 0:3fa97f2c0505 | 69 | return 0; |
hlipka | 0:3fa97f2c0505 | 70 | } |
hlipka | 0:3fa97f2c0505 | 71 | |
hlipka | 0:3fa97f2c0505 | 72 | int SP_XmlStringCodec :: encode( const char * encoding, const char * decodeValue, |
hlipka | 0:3fa97f2c0505 | 73 | SP_XmlStringBuffer * outBuffer ) |
hlipka | 0:3fa97f2c0505 | 74 | { |
hlipka | 0:3fa97f2c0505 | 75 | int isUtf8 = ( 0 == strcasecmp( encoding, "utf-8" ) ); |
hlipka | 0:3fa97f2c0505 | 76 | |
hlipka | 0:3fa97f2c0505 | 77 | const unsigned char * pos = (unsigned char *)decodeValue; |
hlipka | 0:3fa97f2c0505 | 78 | for( ; '\0' != *pos; pos++ ) { |
hlipka | 0:3fa97f2c0505 | 79 | int index = -1; |
hlipka | 0:3fa97f2c0505 | 80 | for( int i = 0; i < (int)( sizeof( XML_CHARS ) / sizeof( XML_CHARS[0] ) ); i++ ) { |
hlipka | 0:3fa97f2c0505 | 81 | if( XML_CHARS[i] == *pos ) { |
hlipka | 0:3fa97f2c0505 | 82 | index = i; |
hlipka | 0:3fa97f2c0505 | 83 | break; |
hlipka | 0:3fa97f2c0505 | 84 | } |
hlipka | 0:3fa97f2c0505 | 85 | } |
hlipka | 0:3fa97f2c0505 | 86 | if( index >= 0 && '\'' != *pos ) { |
hlipka | 0:3fa97f2c0505 | 87 | outBuffer->append( ESC_CHARS[ index ] ); |
hlipka | 0:3fa97f2c0505 | 88 | } else { |
hlipka | 0:3fa97f2c0505 | 89 | if( isUtf8 ) { |
hlipka | 0:3fa97f2c0505 | 90 | int ch = 0; |
hlipka | 0:3fa97f2c0505 | 91 | int len = SP_XmlUtf8Codec::utf82uni( (unsigned char*)pos, &ch ); |
hlipka | 0:3fa97f2c0505 | 92 | |
hlipka | 0:3fa97f2c0505 | 93 | if( len > 0 ) { |
hlipka | 0:3fa97f2c0505 | 94 | pos += len - 1; |
hlipka | 0:3fa97f2c0505 | 95 | |
hlipka | 0:3fa97f2c0505 | 96 | char temp[ 32 ] = { 0 }; |
hlipka | 0:3fa97f2c0505 | 97 | snprintf( temp, sizeof( temp ), "&#%d;", ch ); |
hlipka | 0:3fa97f2c0505 | 98 | outBuffer->append( temp ); |
hlipka | 0:3fa97f2c0505 | 99 | } else { |
hlipka | 0:3fa97f2c0505 | 100 | outBuffer->append( *pos ); |
hlipka | 0:3fa97f2c0505 | 101 | } |
hlipka | 0:3fa97f2c0505 | 102 | } else { |
hlipka | 0:3fa97f2c0505 | 103 | if( *pos < 32 ) { |
hlipka | 0:3fa97f2c0505 | 104 | char temp[ 32 ] = { 0 }; |
hlipka | 0:3fa97f2c0505 | 105 | snprintf( temp, sizeof( temp ), "&#%d;", *pos ); |
hlipka | 0:3fa97f2c0505 | 106 | outBuffer->append( temp ); |
hlipka | 0:3fa97f2c0505 | 107 | } else { |
hlipka | 0:3fa97f2c0505 | 108 | outBuffer->append( *pos ); |
hlipka | 0:3fa97f2c0505 | 109 | } |
hlipka | 0:3fa97f2c0505 | 110 | } |
hlipka | 0:3fa97f2c0505 | 111 | } |
hlipka | 0:3fa97f2c0505 | 112 | } |
hlipka | 0:3fa97f2c0505 | 113 | |
hlipka | 0:3fa97f2c0505 | 114 | return 0; |
hlipka | 0:3fa97f2c0505 | 115 | } |
hlipka | 0:3fa97f2c0505 | 116 | |
hlipka | 0:3fa97f2c0505 | 117 | int SP_XmlStringCodec :: isNameChar( const char * encoding, char c ) |
hlipka | 0:3fa97f2c0505 | 118 | { |
hlipka | 0:3fa97f2c0505 | 119 | if( 0 == strcasecmp( encoding, "utf-8" ) ) { |
hlipka | 0:3fa97f2c0505 | 120 | return 1; |
hlipka | 0:3fa97f2c0505 | 121 | } else { |
hlipka | 0:3fa97f2c0505 | 122 | return isalnum(c) || c == ':' || c == '-' || c == '.' || c == '_'; |
hlipka | 0:3fa97f2c0505 | 123 | } |
hlipka | 0:3fa97f2c0505 | 124 | } |
hlipka | 0:3fa97f2c0505 | 125 | |
hlipka | 0:3fa97f2c0505 | 126 | //========================================================= |
hlipka | 0:3fa97f2c0505 | 127 | |
hlipka | 0:3fa97f2c0505 | 128 | int SP_XmlUtf8Codec :: utf82uni( const unsigned char * utf8, int * ch ) |
hlipka | 0:3fa97f2c0505 | 129 | { |
hlipka | 0:3fa97f2c0505 | 130 | int len = 0; |
hlipka | 0:3fa97f2c0505 | 131 | |
hlipka | 0:3fa97f2c0505 | 132 | unsigned char c1 = 0, c2 = 0, c3 = 0, c4 = 0; |
hlipka | 0:3fa97f2c0505 | 133 | |
hlipka | 0:3fa97f2c0505 | 134 | if( *utf8 >= 0x80 ) { |
hlipka | 0:3fa97f2c0505 | 135 | c1 = *utf8++; |
hlipka | 0:3fa97f2c0505 | 136 | |
hlipka | 0:3fa97f2c0505 | 137 | if( c1 < 0xE0 ) { // 2 bytes |
hlipka | 0:3fa97f2c0505 | 138 | if( '\0' != ( c2 = *utf8 ) ) { |
hlipka | 0:3fa97f2c0505 | 139 | *ch = ((c1 & 0x1F) << 6) | (c2 & 0x3F); |
hlipka | 0:3fa97f2c0505 | 140 | len = 2; |
hlipka | 0:3fa97f2c0505 | 141 | } |
hlipka | 0:3fa97f2c0505 | 142 | } else if( c1 < 0xF0 ) { // 3 bytes |
hlipka | 0:3fa97f2c0505 | 143 | if( '\0' != ( c2 = *utf8++ ) && '\0' != ( c3 = *utf8 ) ) { |
hlipka | 0:3fa97f2c0505 | 144 | *ch = ((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6)| (c3 & 0x3F); |
hlipka | 0:3fa97f2c0505 | 145 | len = 3; |
hlipka | 0:3fa97f2c0505 | 146 | } |
hlipka | 0:3fa97f2c0505 | 147 | } else { // 4 bytes |
hlipka | 0:3fa97f2c0505 | 148 | if( '\0' != ( c2 = *utf8++ ) && '\0' != ( c3 = *utf8++ ) |
hlipka | 0:3fa97f2c0505 | 149 | && '\0' != ( c4 = *utf8 ) ) { |
hlipka | 0:3fa97f2c0505 | 150 | *ch = ((c1 & 0x07) << 16) | ((c2 & 0x3F) << 12) |
hlipka | 0:3fa97f2c0505 | 151 | | ((c3 & 0x3F) << 6) | (c4 & 0x3F); |
hlipka | 0:3fa97f2c0505 | 152 | len = 4; |
hlipka | 0:3fa97f2c0505 | 153 | } |
hlipka | 0:3fa97f2c0505 | 154 | } |
hlipka | 0:3fa97f2c0505 | 155 | } |
hlipka | 0:3fa97f2c0505 | 156 | |
hlipka | 0:3fa97f2c0505 | 157 | return len; |
hlipka | 0:3fa97f2c0505 | 158 | } |
hlipka | 0:3fa97f2c0505 | 159 | |
hlipka | 0:3fa97f2c0505 | 160 | void SP_XmlUtf8Codec :: uni2utf8( int ch, SP_XmlStringBuffer * outBuffer ) |
hlipka | 0:3fa97f2c0505 | 161 | { |
hlipka | 0:3fa97f2c0505 | 162 | if( ch < 0x80 ) outBuffer->append( ch ); |
hlipka | 0:3fa97f2c0505 | 163 | else if( ch < 0x800 ) { |
hlipka | 0:3fa97f2c0505 | 164 | outBuffer->append( 0xC0 | ( ch >> 6 ) ); |
hlipka | 0:3fa97f2c0505 | 165 | outBuffer->append( 0x80 | ( ch & 0x3F ) ); |
hlipka | 0:3fa97f2c0505 | 166 | } else if( ch < 0x10000 ) { |
hlipka | 0:3fa97f2c0505 | 167 | outBuffer->append( 0xE0 | ( ch >> 12 ) ); |
hlipka | 0:3fa97f2c0505 | 168 | outBuffer->append( 0x80 | ( ( ch >> 6 ) & 0x3F ) ); |
hlipka | 0:3fa97f2c0505 | 169 | outBuffer->append( 0x80 | ( ch & 0x3F ) ); |
hlipka | 0:3fa97f2c0505 | 170 | } else if( ch < 0x200000 ) { |
hlipka | 0:3fa97f2c0505 | 171 | outBuffer->append( 0xF0 | ( ch >> 18 ) ); |
hlipka | 0:3fa97f2c0505 | 172 | outBuffer->append( 0x80 | ( ( ch >> 12 ) & 0x3F ) ); |
hlipka | 0:3fa97f2c0505 | 173 | outBuffer->append( 0x80 | ( ( ch >> 6 ) & 0x3F ) ); |
hlipka | 0:3fa97f2c0505 | 174 | outBuffer->append( 0x80 | ( ch & 0x3F ) ); |
hlipka | 0:3fa97f2c0505 | 175 | } |
hlipka | 0:3fa97f2c0505 | 176 | } |
hlipka | 0:3fa97f2c0505 | 177 |