Mbed port of the Simple Plain Xml parser. See http://code.google.com/p/spxml/ for more details. This library uses less memory and is much better suited to streaming data than TinyXML (doesn\'t use as much C++ features, and especially works without streams). See http://mbed.org/users/hlipka/notebook/xml-parsing/ for usage examples.

Dependents:   spxmltest_weather VFD_fontx2_weather weather_LCD_display News_LCD_display ... more

Committer:
hlipka
Date:
Wed Nov 24 20:52:14 2010 +0000
Revision:
0:3fa97f2c0505
initial revision

Who changed what in which revision?

UserRevisionLine numberNew contents of line
hlipka 0:3fa97f2c0505 1 /*
hlipka 0:3fa97f2c0505 2 * Copyright 2007 Stephen Liu
hlipka 0:3fa97f2c0505 3 * LGPL, see http://code.google.com/p/spxml/
hlipka 0:3fa97f2c0505 4 * For license terms, see the file COPYING along with this library.
hlipka 0:3fa97f2c0505 5 */
hlipka 0:3fa97f2c0505 6
hlipka 0:3fa97f2c0505 7 #include <string.h>
hlipka 0:3fa97f2c0505 8 #include <stdlib.h>
hlipka 0:3fa97f2c0505 9 #include <ctype.h>
hlipka 0:3fa97f2c0505 10
hlipka 0:3fa97f2c0505 11 #include "spxmlcodec.hpp"
hlipka 0:3fa97f2c0505 12 #include "spxmlutils.hpp"
hlipka 0:3fa97f2c0505 13
hlipka 0:3fa97f2c0505 14 const char * SP_XmlStringCodec :: DEFAULT_ENCODING = "utf-8";
hlipka 0:3fa97f2c0505 15
hlipka 0:3fa97f2c0505 16 const char SP_XmlStringCodec :: XML_CHARS [] =
hlipka 0:3fa97f2c0505 17 { '<', '>', '&', '\'', '"' };
hlipka 0:3fa97f2c0505 18 const char * SP_XmlStringCodec :: ESC_CHARS [] =
hlipka 0:3fa97f2c0505 19 { "&lt;", "&gt;", "&amp;", "&apos;", "&quot;" };
hlipka 0:3fa97f2c0505 20
hlipka 0:3fa97f2c0505 21 int SP_XmlStringCodec :: decode( const char * encoding, const char * encodeValue,
hlipka 0:3fa97f2c0505 22 SP_XmlStringBuffer * outBuffer )
hlipka 0:3fa97f2c0505 23 {
hlipka 0:3fa97f2c0505 24 int isUtf8 = ( 0 == strcasecmp( encoding, "utf-8" ) );
hlipka 0:3fa97f2c0505 25
hlipka 0:3fa97f2c0505 26 const char * pos = encodeValue;
hlipka 0:3fa97f2c0505 27 for( ; '\0' != *pos; ) {
hlipka 0:3fa97f2c0505 28 if( '&' == *pos ) {
hlipka 0:3fa97f2c0505 29 int index = -1;
hlipka 0:3fa97f2c0505 30 int len = 0;
hlipka 0:3fa97f2c0505 31 for( int i = 0; i < (int)( sizeof( ESC_CHARS ) / sizeof( ESC_CHARS[0] ) ); i++ ) {
hlipka 0:3fa97f2c0505 32 len = strlen( ESC_CHARS[ i ] );
hlipka 0:3fa97f2c0505 33 if( 0 == strncmp( pos, ESC_CHARS[i], len ) ) {
hlipka 0:3fa97f2c0505 34 index = i;
hlipka 0:3fa97f2c0505 35 break;
hlipka 0:3fa97f2c0505 36 }
hlipka 0:3fa97f2c0505 37 }
hlipka 0:3fa97f2c0505 38 if( index >= 0 ) {
hlipka 0:3fa97f2c0505 39 outBuffer->append( XML_CHARS[ index ] );
hlipka 0:3fa97f2c0505 40 pos += len;
hlipka 0:3fa97f2c0505 41 } else {
hlipka 0:3fa97f2c0505 42 char * next = "";
hlipka 0:3fa97f2c0505 43 int ch = 0;
hlipka 0:3fa97f2c0505 44 if( '#' == *( pos + 1 ) ) {
hlipka 0:3fa97f2c0505 45 if( 'x' == *( pos + 2 ) ) {
hlipka 0:3fa97f2c0505 46 ch = strtol( pos + 3, &next, 16 );
hlipka 0:3fa97f2c0505 47 } else {
hlipka 0:3fa97f2c0505 48 ch = strtol( pos + 2, &next, 10 );
hlipka 0:3fa97f2c0505 49 }
hlipka 0:3fa97f2c0505 50 }
hlipka 0:3fa97f2c0505 51
hlipka 0:3fa97f2c0505 52 // TODO: fully support xml entity, currently only support unicode entity
hlipka 0:3fa97f2c0505 53 if( ';' == *next && 0 != ch ) {
hlipka 0:3fa97f2c0505 54 if( isUtf8 ) {
hlipka 0:3fa97f2c0505 55 SP_XmlUtf8Codec::uni2utf8( ch, outBuffer );
hlipka 0:3fa97f2c0505 56 } else {
hlipka 0:3fa97f2c0505 57 outBuffer->append( ch );
hlipka 0:3fa97f2c0505 58 }
hlipka 0:3fa97f2c0505 59 pos = next + 1;
hlipka 0:3fa97f2c0505 60 } else {
hlipka 0:3fa97f2c0505 61 outBuffer->append( *pos++ );
hlipka 0:3fa97f2c0505 62 }
hlipka 0:3fa97f2c0505 63 }
hlipka 0:3fa97f2c0505 64 } else {
hlipka 0:3fa97f2c0505 65 outBuffer->append( *pos++ );
hlipka 0:3fa97f2c0505 66 }
hlipka 0:3fa97f2c0505 67 }
hlipka 0:3fa97f2c0505 68
hlipka 0:3fa97f2c0505 69 return 0;
hlipka 0:3fa97f2c0505 70 }
hlipka 0:3fa97f2c0505 71
hlipka 0:3fa97f2c0505 72 int SP_XmlStringCodec :: encode( const char * encoding, const char * decodeValue,
hlipka 0:3fa97f2c0505 73 SP_XmlStringBuffer * outBuffer )
hlipka 0:3fa97f2c0505 74 {
hlipka 0:3fa97f2c0505 75 int isUtf8 = ( 0 == strcasecmp( encoding, "utf-8" ) );
hlipka 0:3fa97f2c0505 76
hlipka 0:3fa97f2c0505 77 const unsigned char * pos = (unsigned char *)decodeValue;
hlipka 0:3fa97f2c0505 78 for( ; '\0' != *pos; pos++ ) {
hlipka 0:3fa97f2c0505 79 int index = -1;
hlipka 0:3fa97f2c0505 80 for( int i = 0; i < (int)( sizeof( XML_CHARS ) / sizeof( XML_CHARS[0] ) ); i++ ) {
hlipka 0:3fa97f2c0505 81 if( XML_CHARS[i] == *pos ) {
hlipka 0:3fa97f2c0505 82 index = i;
hlipka 0:3fa97f2c0505 83 break;
hlipka 0:3fa97f2c0505 84 }
hlipka 0:3fa97f2c0505 85 }
hlipka 0:3fa97f2c0505 86 if( index >= 0 && '\'' != *pos ) {
hlipka 0:3fa97f2c0505 87 outBuffer->append( ESC_CHARS[ index ] );
hlipka 0:3fa97f2c0505 88 } else {
hlipka 0:3fa97f2c0505 89 if( isUtf8 ) {
hlipka 0:3fa97f2c0505 90 int ch = 0;
hlipka 0:3fa97f2c0505 91 int len = SP_XmlUtf8Codec::utf82uni( (unsigned char*)pos, &ch );
hlipka 0:3fa97f2c0505 92
hlipka 0:3fa97f2c0505 93 if( len > 0 ) {
hlipka 0:3fa97f2c0505 94 pos += len - 1;
hlipka 0:3fa97f2c0505 95
hlipka 0:3fa97f2c0505 96 char temp[ 32 ] = { 0 };
hlipka 0:3fa97f2c0505 97 snprintf( temp, sizeof( temp ), "&#%d;", ch );
hlipka 0:3fa97f2c0505 98 outBuffer->append( temp );
hlipka 0:3fa97f2c0505 99 } else {
hlipka 0:3fa97f2c0505 100 outBuffer->append( *pos );
hlipka 0:3fa97f2c0505 101 }
hlipka 0:3fa97f2c0505 102 } else {
hlipka 0:3fa97f2c0505 103 if( *pos < 32 ) {
hlipka 0:3fa97f2c0505 104 char temp[ 32 ] = { 0 };
hlipka 0:3fa97f2c0505 105 snprintf( temp, sizeof( temp ), "&#%d;", *pos );
hlipka 0:3fa97f2c0505 106 outBuffer->append( temp );
hlipka 0:3fa97f2c0505 107 } else {
hlipka 0:3fa97f2c0505 108 outBuffer->append( *pos );
hlipka 0:3fa97f2c0505 109 }
hlipka 0:3fa97f2c0505 110 }
hlipka 0:3fa97f2c0505 111 }
hlipka 0:3fa97f2c0505 112 }
hlipka 0:3fa97f2c0505 113
hlipka 0:3fa97f2c0505 114 return 0;
hlipka 0:3fa97f2c0505 115 }
hlipka 0:3fa97f2c0505 116
hlipka 0:3fa97f2c0505 117 int SP_XmlStringCodec :: isNameChar( const char * encoding, char c )
hlipka 0:3fa97f2c0505 118 {
hlipka 0:3fa97f2c0505 119 if( 0 == strcasecmp( encoding, "utf-8" ) ) {
hlipka 0:3fa97f2c0505 120 return 1;
hlipka 0:3fa97f2c0505 121 } else {
hlipka 0:3fa97f2c0505 122 return isalnum(c) || c == ':' || c == '-' || c == '.' || c == '_';
hlipka 0:3fa97f2c0505 123 }
hlipka 0:3fa97f2c0505 124 }
hlipka 0:3fa97f2c0505 125
hlipka 0:3fa97f2c0505 126 //=========================================================
hlipka 0:3fa97f2c0505 127
hlipka 0:3fa97f2c0505 128 int SP_XmlUtf8Codec :: utf82uni( const unsigned char * utf8, int * ch )
hlipka 0:3fa97f2c0505 129 {
hlipka 0:3fa97f2c0505 130 int len = 0;
hlipka 0:3fa97f2c0505 131
hlipka 0:3fa97f2c0505 132 unsigned char c1 = 0, c2 = 0, c3 = 0, c4 = 0;
hlipka 0:3fa97f2c0505 133
hlipka 0:3fa97f2c0505 134 if( *utf8 >= 0x80 ) {
hlipka 0:3fa97f2c0505 135 c1 = *utf8++;
hlipka 0:3fa97f2c0505 136
hlipka 0:3fa97f2c0505 137 if( c1 < 0xE0 ) { // 2 bytes
hlipka 0:3fa97f2c0505 138 if( '\0' != ( c2 = *utf8 ) ) {
hlipka 0:3fa97f2c0505 139 *ch = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
hlipka 0:3fa97f2c0505 140 len = 2;
hlipka 0:3fa97f2c0505 141 }
hlipka 0:3fa97f2c0505 142 } else if( c1 < 0xF0 ) { // 3 bytes
hlipka 0:3fa97f2c0505 143 if( '\0' != ( c2 = *utf8++ ) && '\0' != ( c3 = *utf8 ) ) {
hlipka 0:3fa97f2c0505 144 *ch = ((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6)| (c3 & 0x3F);
hlipka 0:3fa97f2c0505 145 len = 3;
hlipka 0:3fa97f2c0505 146 }
hlipka 0:3fa97f2c0505 147 } else { // 4 bytes
hlipka 0:3fa97f2c0505 148 if( '\0' != ( c2 = *utf8++ ) && '\0' != ( c3 = *utf8++ )
hlipka 0:3fa97f2c0505 149 && '\0' != ( c4 = *utf8 ) ) {
hlipka 0:3fa97f2c0505 150 *ch = ((c1 & 0x07) << 16) | ((c2 & 0x3F) << 12)
hlipka 0:3fa97f2c0505 151 | ((c3 & 0x3F) << 6) | (c4 & 0x3F);
hlipka 0:3fa97f2c0505 152 len = 4;
hlipka 0:3fa97f2c0505 153 }
hlipka 0:3fa97f2c0505 154 }
hlipka 0:3fa97f2c0505 155 }
hlipka 0:3fa97f2c0505 156
hlipka 0:3fa97f2c0505 157 return len;
hlipka 0:3fa97f2c0505 158 }
hlipka 0:3fa97f2c0505 159
hlipka 0:3fa97f2c0505 160 void SP_XmlUtf8Codec :: uni2utf8( int ch, SP_XmlStringBuffer * outBuffer )
hlipka 0:3fa97f2c0505 161 {
hlipka 0:3fa97f2c0505 162 if( ch < 0x80 ) outBuffer->append( ch );
hlipka 0:3fa97f2c0505 163 else if( ch < 0x800 ) {
hlipka 0:3fa97f2c0505 164 outBuffer->append( 0xC0 | ( ch >> 6 ) );
hlipka 0:3fa97f2c0505 165 outBuffer->append( 0x80 | ( ch & 0x3F ) );
hlipka 0:3fa97f2c0505 166 } else if( ch < 0x10000 ) {
hlipka 0:3fa97f2c0505 167 outBuffer->append( 0xE0 | ( ch >> 12 ) );
hlipka 0:3fa97f2c0505 168 outBuffer->append( 0x80 | ( ( ch >> 6 ) & 0x3F ) );
hlipka 0:3fa97f2c0505 169 outBuffer->append( 0x80 | ( ch & 0x3F ) );
hlipka 0:3fa97f2c0505 170 } else if( ch < 0x200000 ) {
hlipka 0:3fa97f2c0505 171 outBuffer->append( 0xF0 | ( ch >> 18 ) );
hlipka 0:3fa97f2c0505 172 outBuffer->append( 0x80 | ( ( ch >> 12 ) & 0x3F ) );
hlipka 0:3fa97f2c0505 173 outBuffer->append( 0x80 | ( ( ch >> 6 ) & 0x3F ) );
hlipka 0:3fa97f2c0505 174 outBuffer->append( 0x80 | ( ch & 0x3F ) );
hlipka 0:3fa97f2c0505 175 }
hlipka 0:3fa97f2c0505 176 }
hlipka 0:3fa97f2c0505 177