A port of the irrlicht XML parser library.
CXMLReaderImpl.h@0:41a49a73580c, 2010-11-17 (annotated)
- Committer:
- hlipka
- Date:
- Wed Nov 17 20:19:41 2010 +0000
- Revision:
- 0:41a49a73580c
initial version
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
hlipka | 0:41a49a73580c | 1 | // Copyright (C) 2002-2005 Nikolaus Gebhardt |
hlipka | 0:41a49a73580c | 2 | // This file is part of the "Irrlicht Engine" and the "irrXML" project. |
hlipka | 0:41a49a73580c | 3 | // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h |
hlipka | 0:41a49a73580c | 4 | |
hlipka | 0:41a49a73580c | 5 | #ifndef __ICXML_READER_IMPL_H_INCLUDED__ |
hlipka | 0:41a49a73580c | 6 | #define __ICXML_READER_IMPL_H_INCLUDED__ |
hlipka | 0:41a49a73580c | 7 | |
hlipka | 0:41a49a73580c | 8 | #include "irrXML.h" |
hlipka | 0:41a49a73580c | 9 | #include "irrString.h" |
hlipka | 0:41a49a73580c | 10 | #include "irrArray.h" |
hlipka | 0:41a49a73580c | 11 | #include "fast_atof.h" |
hlipka | 0:41a49a73580c | 12 | |
hlipka | 0:41a49a73580c | 13 | #ifdef _DEBUG |
hlipka | 0:41a49a73580c | 14 | #define IRR_DEBUGPRINT(x) printf((x)); |
hlipka | 0:41a49a73580c | 15 | #else // _DEBUG |
hlipka | 0:41a49a73580c | 16 | #define IRR_DEBUGPRINT(x) |
hlipka | 0:41a49a73580c | 17 | #endif // _DEBUG |
hlipka | 0:41a49a73580c | 18 | |
hlipka | 0:41a49a73580c | 19 | |
hlipka | 0:41a49a73580c | 20 | namespace irr |
hlipka | 0:41a49a73580c | 21 | { |
hlipka | 0:41a49a73580c | 22 | namespace io |
hlipka | 0:41a49a73580c | 23 | { |
hlipka | 0:41a49a73580c | 24 | |
hlipka | 0:41a49a73580c | 25 | |
hlipka | 0:41a49a73580c | 26 | //! implementation of the IrrXMLReader |
hlipka | 0:41a49a73580c | 27 | template<class char_type, class superclass> |
hlipka | 0:41a49a73580c | 28 | class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass> |
hlipka | 0:41a49a73580c | 29 | { |
hlipka | 0:41a49a73580c | 30 | public: |
hlipka | 0:41a49a73580c | 31 | |
hlipka | 0:41a49a73580c | 32 | //! Constructor |
hlipka | 0:41a49a73580c | 33 | CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true) |
hlipka | 0:41a49a73580c | 34 | : TextData(0), P(0), TextSize(0), TextBegin(0), CurrentNodeType(EXN_NONE), |
hlipka | 0:41a49a73580c | 35 | SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII) |
hlipka | 0:41a49a73580c | 36 | { |
hlipka | 0:41a49a73580c | 37 | if (!callback) |
hlipka | 0:41a49a73580c | 38 | return; |
hlipka | 0:41a49a73580c | 39 | |
hlipka | 0:41a49a73580c | 40 | storeTargetFormat(); |
hlipka | 0:41a49a73580c | 41 | |
hlipka | 0:41a49a73580c | 42 | // read whole xml file |
hlipka | 0:41a49a73580c | 43 | |
hlipka | 0:41a49a73580c | 44 | readFile(callback); |
hlipka | 0:41a49a73580c | 45 | |
hlipka | 0:41a49a73580c | 46 | // clean up |
hlipka | 0:41a49a73580c | 47 | |
hlipka | 0:41a49a73580c | 48 | if (deleteCallBack) |
hlipka | 0:41a49a73580c | 49 | delete callback; |
hlipka | 0:41a49a73580c | 50 | |
hlipka | 0:41a49a73580c | 51 | // create list with special characters |
hlipka | 0:41a49a73580c | 52 | |
hlipka | 0:41a49a73580c | 53 | createSpecialCharacterList(); |
hlipka | 0:41a49a73580c | 54 | |
hlipka | 0:41a49a73580c | 55 | // set pointer to text begin |
hlipka | 0:41a49a73580c | 56 | P = TextBegin; |
hlipka | 0:41a49a73580c | 57 | } |
hlipka | 0:41a49a73580c | 58 | |
hlipka | 0:41a49a73580c | 59 | |
hlipka | 0:41a49a73580c | 60 | //! Destructor |
hlipka | 0:41a49a73580c | 61 | virtual ~CXMLReaderImpl() |
hlipka | 0:41a49a73580c | 62 | { |
hlipka | 0:41a49a73580c | 63 | delete [] TextData; |
hlipka | 0:41a49a73580c | 64 | } |
hlipka | 0:41a49a73580c | 65 | |
hlipka | 0:41a49a73580c | 66 | |
hlipka | 0:41a49a73580c | 67 | //! Reads forward to the next xml node. |
hlipka | 0:41a49a73580c | 68 | //! \return Returns false, if there was no further node. |
hlipka | 0:41a49a73580c | 69 | virtual bool read() |
hlipka | 0:41a49a73580c | 70 | { |
hlipka | 0:41a49a73580c | 71 | // if not end reached, parse the node |
hlipka | 0:41a49a73580c | 72 | if (P && (unsigned int)(P - TextBegin) < TextSize - 1 && *P != 0) |
hlipka | 0:41a49a73580c | 73 | { |
hlipka | 0:41a49a73580c | 74 | parseCurrentNode(); |
hlipka | 0:41a49a73580c | 75 | return true; |
hlipka | 0:41a49a73580c | 76 | } |
hlipka | 0:41a49a73580c | 77 | |
hlipka | 0:41a49a73580c | 78 | _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX; |
hlipka | 0:41a49a73580c | 79 | return false; |
hlipka | 0:41a49a73580c | 80 | } |
hlipka | 0:41a49a73580c | 81 | |
hlipka | 0:41a49a73580c | 82 | |
hlipka | 0:41a49a73580c | 83 | //! Returns the type of the current XML node. |
hlipka | 0:41a49a73580c | 84 | virtual EXML_NODE getNodeType() const |
hlipka | 0:41a49a73580c | 85 | { |
hlipka | 0:41a49a73580c | 86 | return CurrentNodeType; |
hlipka | 0:41a49a73580c | 87 | } |
hlipka | 0:41a49a73580c | 88 | |
hlipka | 0:41a49a73580c | 89 | |
hlipka | 0:41a49a73580c | 90 | //! Returns attribute count of the current XML node. |
hlipka | 0:41a49a73580c | 91 | virtual int getAttributeCount() const |
hlipka | 0:41a49a73580c | 92 | { |
hlipka | 0:41a49a73580c | 93 | return Attributes.size(); |
hlipka | 0:41a49a73580c | 94 | } |
hlipka | 0:41a49a73580c | 95 | |
hlipka | 0:41a49a73580c | 96 | |
hlipka | 0:41a49a73580c | 97 | //! Returns name of an attribute. |
hlipka | 0:41a49a73580c | 98 | virtual const char_type* getAttributeName(int idx) const |
hlipka | 0:41a49a73580c | 99 | { |
hlipka | 0:41a49a73580c | 100 | if (idx < 0 || idx >= (int)Attributes.size()) |
hlipka | 0:41a49a73580c | 101 | return 0; |
hlipka | 0:41a49a73580c | 102 | |
hlipka | 0:41a49a73580c | 103 | return Attributes[idx].Name.c_str(); |
hlipka | 0:41a49a73580c | 104 | } |
hlipka | 0:41a49a73580c | 105 | |
hlipka | 0:41a49a73580c | 106 | |
hlipka | 0:41a49a73580c | 107 | //! Returns the value of an attribute. |
hlipka | 0:41a49a73580c | 108 | virtual const char_type* getAttributeValue(int idx) const |
hlipka | 0:41a49a73580c | 109 | { |
hlipka | 0:41a49a73580c | 110 | if (idx < 0 || idx >= (int)Attributes.size()) |
hlipka | 0:41a49a73580c | 111 | return 0; |
hlipka | 0:41a49a73580c | 112 | |
hlipka | 0:41a49a73580c | 113 | return Attributes[idx].Value.c_str(); |
hlipka | 0:41a49a73580c | 114 | } |
hlipka | 0:41a49a73580c | 115 | |
hlipka | 0:41a49a73580c | 116 | |
hlipka | 0:41a49a73580c | 117 | //! Returns the value of an attribute. |
hlipka | 0:41a49a73580c | 118 | virtual const char_type* getAttributeValue(const char_type* name) const |
hlipka | 0:41a49a73580c | 119 | { |
hlipka | 0:41a49a73580c | 120 | const SAttribute* attr = getAttributeByName(name); |
hlipka | 0:41a49a73580c | 121 | if (!attr) |
hlipka | 0:41a49a73580c | 122 | return 0; |
hlipka | 0:41a49a73580c | 123 | |
hlipka | 0:41a49a73580c | 124 | return attr->Value.c_str(); |
hlipka | 0:41a49a73580c | 125 | } |
hlipka | 0:41a49a73580c | 126 | |
hlipka | 0:41a49a73580c | 127 | |
hlipka | 0:41a49a73580c | 128 | //! Returns the value of an attribute |
hlipka | 0:41a49a73580c | 129 | virtual const char_type* getAttributeValueSafe(const char_type* name) const |
hlipka | 0:41a49a73580c | 130 | { |
hlipka | 0:41a49a73580c | 131 | const SAttribute* attr = getAttributeByName(name); |
hlipka | 0:41a49a73580c | 132 | if (!attr) |
hlipka | 0:41a49a73580c | 133 | return EmptyString.c_str(); |
hlipka | 0:41a49a73580c | 134 | |
hlipka | 0:41a49a73580c | 135 | return attr->Value.c_str(); |
hlipka | 0:41a49a73580c | 136 | } |
hlipka | 0:41a49a73580c | 137 | |
hlipka | 0:41a49a73580c | 138 | |
hlipka | 0:41a49a73580c | 139 | |
hlipka | 0:41a49a73580c | 140 | //! Returns the value of an attribute as integer. |
hlipka | 0:41a49a73580c | 141 | virtual int getAttributeValueAsInt(const char_type* name) const |
hlipka | 0:41a49a73580c | 142 | { |
hlipka | 0:41a49a73580c | 143 | return (int)getAttributeValueAsFloat(name); |
hlipka | 0:41a49a73580c | 144 | } |
hlipka | 0:41a49a73580c | 145 | |
hlipka | 0:41a49a73580c | 146 | |
hlipka | 0:41a49a73580c | 147 | //! Returns the value of an attribute as integer. |
hlipka | 0:41a49a73580c | 148 | virtual int getAttributeValueAsInt(int idx) const |
hlipka | 0:41a49a73580c | 149 | { |
hlipka | 0:41a49a73580c | 150 | return (int)getAttributeValueAsFloat(idx); |
hlipka | 0:41a49a73580c | 151 | } |
hlipka | 0:41a49a73580c | 152 | |
hlipka | 0:41a49a73580c | 153 | |
hlipka | 0:41a49a73580c | 154 | //! Returns the value of an attribute as float. |
hlipka | 0:41a49a73580c | 155 | virtual float getAttributeValueAsFloat(const char_type* name) const |
hlipka | 0:41a49a73580c | 156 | { |
hlipka | 0:41a49a73580c | 157 | const SAttribute* attr = getAttributeByName(name); |
hlipka | 0:41a49a73580c | 158 | if (!attr) |
hlipka | 0:41a49a73580c | 159 | return 0; |
hlipka | 0:41a49a73580c | 160 | |
hlipka | 0:41a49a73580c | 161 | core::stringc c = attr->Value.c_str(); |
hlipka | 0:41a49a73580c | 162 | return core::fast_atof(c.c_str()); |
hlipka | 0:41a49a73580c | 163 | } |
hlipka | 0:41a49a73580c | 164 | |
hlipka | 0:41a49a73580c | 165 | |
hlipka | 0:41a49a73580c | 166 | //! Returns the value of an attribute as float. |
hlipka | 0:41a49a73580c | 167 | virtual float getAttributeValueAsFloat(int idx) const |
hlipka | 0:41a49a73580c | 168 | { |
hlipka | 0:41a49a73580c | 169 | const char_type* attrvalue = getAttributeValue(idx); |
hlipka | 0:41a49a73580c | 170 | if (!attrvalue) |
hlipka | 0:41a49a73580c | 171 | return 0; |
hlipka | 0:41a49a73580c | 172 | |
hlipka | 0:41a49a73580c | 173 | core::stringc c = attrvalue; |
hlipka | 0:41a49a73580c | 174 | return core::fast_atof(c.c_str()); |
hlipka | 0:41a49a73580c | 175 | } |
hlipka | 0:41a49a73580c | 176 | |
hlipka | 0:41a49a73580c | 177 | |
hlipka | 0:41a49a73580c | 178 | //! Returns the name of the current node. |
hlipka | 0:41a49a73580c | 179 | virtual const char_type* getNodeName() const |
hlipka | 0:41a49a73580c | 180 | { |
hlipka | 0:41a49a73580c | 181 | return NodeName.c_str(); |
hlipka | 0:41a49a73580c | 182 | } |
hlipka | 0:41a49a73580c | 183 | |
hlipka | 0:41a49a73580c | 184 | |
hlipka | 0:41a49a73580c | 185 | //! Returns data of the current node. |
hlipka | 0:41a49a73580c | 186 | virtual const char_type* getNodeData() const |
hlipka | 0:41a49a73580c | 187 | { |
hlipka | 0:41a49a73580c | 188 | return NodeName.c_str(); |
hlipka | 0:41a49a73580c | 189 | } |
hlipka | 0:41a49a73580c | 190 | |
hlipka | 0:41a49a73580c | 191 | |
hlipka | 0:41a49a73580c | 192 | //! Returns if an element is an empty element, like <foo /> |
hlipka | 0:41a49a73580c | 193 | virtual bool isEmptyElement() const |
hlipka | 0:41a49a73580c | 194 | { |
hlipka | 0:41a49a73580c | 195 | return IsEmptyElement; |
hlipka | 0:41a49a73580c | 196 | } |
hlipka | 0:41a49a73580c | 197 | |
hlipka | 0:41a49a73580c | 198 | //! Returns format of the source xml file. |
hlipka | 0:41a49a73580c | 199 | virtual ETEXT_FORMAT getSourceFormat() const |
hlipka | 0:41a49a73580c | 200 | { |
hlipka | 0:41a49a73580c | 201 | return SourceFormat; |
hlipka | 0:41a49a73580c | 202 | } |
hlipka | 0:41a49a73580c | 203 | |
hlipka | 0:41a49a73580c | 204 | //! Returns format of the strings returned by the parser. |
hlipka | 0:41a49a73580c | 205 | virtual ETEXT_FORMAT getParserFormat() const |
hlipka | 0:41a49a73580c | 206 | { |
hlipka | 0:41a49a73580c | 207 | return TargetFormat; |
hlipka | 0:41a49a73580c | 208 | } |
hlipka | 0:41a49a73580c | 209 | |
hlipka | 0:41a49a73580c | 210 | private: |
hlipka | 0:41a49a73580c | 211 | |
hlipka | 0:41a49a73580c | 212 | // Reads the current xml node |
hlipka | 0:41a49a73580c | 213 | void parseCurrentNode() |
hlipka | 0:41a49a73580c | 214 | { |
hlipka | 0:41a49a73580c | 215 | char_type* start = P; |
hlipka | 0:41a49a73580c | 216 | |
hlipka | 0:41a49a73580c | 217 | // more forward until '<' found |
hlipka | 0:41a49a73580c | 218 | while(*P != L'<' && *P) |
hlipka | 0:41a49a73580c | 219 | ++P; |
hlipka | 0:41a49a73580c | 220 | |
hlipka | 0:41a49a73580c | 221 | if (!*P) |
hlipka | 0:41a49a73580c | 222 | return; |
hlipka | 0:41a49a73580c | 223 | |
hlipka | 0:41a49a73580c | 224 | if (P - start > 0) |
hlipka | 0:41a49a73580c | 225 | { |
hlipka | 0:41a49a73580c | 226 | // we found some text, store it |
hlipka | 0:41a49a73580c | 227 | if (setText(start, P)) |
hlipka | 0:41a49a73580c | 228 | return; |
hlipka | 0:41a49a73580c | 229 | } |
hlipka | 0:41a49a73580c | 230 | |
hlipka | 0:41a49a73580c | 231 | ++P; |
hlipka | 0:41a49a73580c | 232 | |
hlipka | 0:41a49a73580c | 233 | // based on current token, parse and report next element |
hlipka | 0:41a49a73580c | 234 | switch(*P) |
hlipka | 0:41a49a73580c | 235 | { |
hlipka | 0:41a49a73580c | 236 | case L'/': |
hlipka | 0:41a49a73580c | 237 | parseClosingXMLElement(); |
hlipka | 0:41a49a73580c | 238 | break; |
hlipka | 0:41a49a73580c | 239 | case L'?': |
hlipka | 0:41a49a73580c | 240 | ignoreDefinition(); |
hlipka | 0:41a49a73580c | 241 | break; |
hlipka | 0:41a49a73580c | 242 | case L'!': |
hlipka | 0:41a49a73580c | 243 | if (!parseCDATA()) |
hlipka | 0:41a49a73580c | 244 | parseComment(); |
hlipka | 0:41a49a73580c | 245 | break; |
hlipka | 0:41a49a73580c | 246 | default: |
hlipka | 0:41a49a73580c | 247 | parseOpeningXMLElement(); |
hlipka | 0:41a49a73580c | 248 | break; |
hlipka | 0:41a49a73580c | 249 | } |
hlipka | 0:41a49a73580c | 250 | } |
hlipka | 0:41a49a73580c | 251 | |
hlipka | 0:41a49a73580c | 252 | |
hlipka | 0:41a49a73580c | 253 | //! sets the state that text was found. Returns true if set should be set |
hlipka | 0:41a49a73580c | 254 | bool setText(char_type* start, char_type* end) |
hlipka | 0:41a49a73580c | 255 | { |
hlipka | 0:41a49a73580c | 256 | // check if text is more than 2 characters, and if not, check if there is |
hlipka | 0:41a49a73580c | 257 | // only white space, so that this text won't be reported |
hlipka | 0:41a49a73580c | 258 | if (end - start < 3) |
hlipka | 0:41a49a73580c | 259 | { |
hlipka | 0:41a49a73580c | 260 | char_type* p = start; |
hlipka | 0:41a49a73580c | 261 | for(; p != end; ++p) |
hlipka | 0:41a49a73580c | 262 | if (!isWhiteSpace(*p)) |
hlipka | 0:41a49a73580c | 263 | break; |
hlipka | 0:41a49a73580c | 264 | |
hlipka | 0:41a49a73580c | 265 | if (p == end) |
hlipka | 0:41a49a73580c | 266 | return false; |
hlipka | 0:41a49a73580c | 267 | } |
hlipka | 0:41a49a73580c | 268 | |
hlipka | 0:41a49a73580c | 269 | // set current text to the parsed text, and replace xml special characters |
hlipka | 0:41a49a73580c | 270 | core::string<char_type> s(start, (int)(end - start)); |
hlipka | 0:41a49a73580c | 271 | NodeName = replaceSpecialCharacters(s); |
hlipka | 0:41a49a73580c | 272 | |
hlipka | 0:41a49a73580c | 273 | // current XML node type is text |
hlipka | 0:41a49a73580c | 274 | CurrentNodeType = EXN_TEXT; |
hlipka | 0:41a49a73580c | 275 | |
hlipka | 0:41a49a73580c | 276 | return true; |
hlipka | 0:41a49a73580c | 277 | } |
hlipka | 0:41a49a73580c | 278 | |
hlipka | 0:41a49a73580c | 279 | |
hlipka | 0:41a49a73580c | 280 | |
hlipka | 0:41a49a73580c | 281 | //! ignores an xml definition like <?xml something /> |
hlipka | 0:41a49a73580c | 282 | void ignoreDefinition() |
hlipka | 0:41a49a73580c | 283 | { |
hlipka | 0:41a49a73580c | 284 | CurrentNodeType = EXN_UNKNOWN; |
hlipka | 0:41a49a73580c | 285 | |
hlipka | 0:41a49a73580c | 286 | // move until end marked with '>' reached |
hlipka | 0:41a49a73580c | 287 | while(*P != L'>') |
hlipka | 0:41a49a73580c | 288 | ++P; |
hlipka | 0:41a49a73580c | 289 | |
hlipka | 0:41a49a73580c | 290 | ++P; |
hlipka | 0:41a49a73580c | 291 | } |
hlipka | 0:41a49a73580c | 292 | |
hlipka | 0:41a49a73580c | 293 | |
hlipka | 0:41a49a73580c | 294 | //! parses a comment |
hlipka | 0:41a49a73580c | 295 | void parseComment() |
hlipka | 0:41a49a73580c | 296 | { |
hlipka | 0:41a49a73580c | 297 | CurrentNodeType = EXN_COMMENT; |
hlipka | 0:41a49a73580c | 298 | P += 1; |
hlipka | 0:41a49a73580c | 299 | |
hlipka | 0:41a49a73580c | 300 | char_type *pCommentBegin = P; |
hlipka | 0:41a49a73580c | 301 | |
hlipka | 0:41a49a73580c | 302 | int count = 1; |
hlipka | 0:41a49a73580c | 303 | |
hlipka | 0:41a49a73580c | 304 | // move until end of comment reached |
hlipka | 0:41a49a73580c | 305 | while(count) |
hlipka | 0:41a49a73580c | 306 | { |
hlipka | 0:41a49a73580c | 307 | if (*P == L'>') |
hlipka | 0:41a49a73580c | 308 | --count; |
hlipka | 0:41a49a73580c | 309 | else |
hlipka | 0:41a49a73580c | 310 | if (*P == L'<') |
hlipka | 0:41a49a73580c | 311 | ++count; |
hlipka | 0:41a49a73580c | 312 | |
hlipka | 0:41a49a73580c | 313 | ++P; |
hlipka | 0:41a49a73580c | 314 | } |
hlipka | 0:41a49a73580c | 315 | |
hlipka | 0:41a49a73580c | 316 | P -= 3; |
hlipka | 0:41a49a73580c | 317 | NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2)); |
hlipka | 0:41a49a73580c | 318 | P += 3; |
hlipka | 0:41a49a73580c | 319 | } |
hlipka | 0:41a49a73580c | 320 | |
hlipka | 0:41a49a73580c | 321 | |
hlipka | 0:41a49a73580c | 322 | //! parses an opening xml element and reads attributes |
hlipka | 0:41a49a73580c | 323 | void parseOpeningXMLElement() |
hlipka | 0:41a49a73580c | 324 | { |
hlipka | 0:41a49a73580c | 325 | CurrentNodeType = EXN_ELEMENT; |
hlipka | 0:41a49a73580c | 326 | IsEmptyElement = false; |
hlipka | 0:41a49a73580c | 327 | Attributes.clear(); |
hlipka | 0:41a49a73580c | 328 | |
hlipka | 0:41a49a73580c | 329 | // find name |
hlipka | 0:41a49a73580c | 330 | const char_type* startName = P; |
hlipka | 0:41a49a73580c | 331 | |
hlipka | 0:41a49a73580c | 332 | // find end of element |
hlipka | 0:41a49a73580c | 333 | while(*P != L'>' && !isWhiteSpace(*P)) |
hlipka | 0:41a49a73580c | 334 | ++P; |
hlipka | 0:41a49a73580c | 335 | |
hlipka | 0:41a49a73580c | 336 | const char_type* endName = P; |
hlipka | 0:41a49a73580c | 337 | |
hlipka | 0:41a49a73580c | 338 | // find Attributes |
hlipka | 0:41a49a73580c | 339 | while(*P != L'>') |
hlipka | 0:41a49a73580c | 340 | { |
hlipka | 0:41a49a73580c | 341 | if (isWhiteSpace(*P)) |
hlipka | 0:41a49a73580c | 342 | ++P; |
hlipka | 0:41a49a73580c | 343 | else |
hlipka | 0:41a49a73580c | 344 | { |
hlipka | 0:41a49a73580c | 345 | if (*P != L'/') |
hlipka | 0:41a49a73580c | 346 | { |
hlipka | 0:41a49a73580c | 347 | // we've got an attribute |
hlipka | 0:41a49a73580c | 348 | |
hlipka | 0:41a49a73580c | 349 | // read the attribute names |
hlipka | 0:41a49a73580c | 350 | const char_type* attributeNameBegin = P; |
hlipka | 0:41a49a73580c | 351 | |
hlipka | 0:41a49a73580c | 352 | while(!isWhiteSpace(*P) && *P != L'=') |
hlipka | 0:41a49a73580c | 353 | ++P; |
hlipka | 0:41a49a73580c | 354 | |
hlipka | 0:41a49a73580c | 355 | const char_type* attributeNameEnd = P; |
hlipka | 0:41a49a73580c | 356 | ++P; |
hlipka | 0:41a49a73580c | 357 | |
hlipka | 0:41a49a73580c | 358 | // read the attribute value |
hlipka | 0:41a49a73580c | 359 | // check for quotes and single quotes, thx to murphy |
hlipka | 0:41a49a73580c | 360 | while( (*P != L'\"') && (*P != L'\'') && *P) |
hlipka | 0:41a49a73580c | 361 | ++P; |
hlipka | 0:41a49a73580c | 362 | |
hlipka | 0:41a49a73580c | 363 | if (!*P) // malformatted xml file |
hlipka | 0:41a49a73580c | 364 | return; |
hlipka | 0:41a49a73580c | 365 | |
hlipka | 0:41a49a73580c | 366 | const char_type attributeQuoteChar = *P; |
hlipka | 0:41a49a73580c | 367 | |
hlipka | 0:41a49a73580c | 368 | ++P; |
hlipka | 0:41a49a73580c | 369 | const char_type* attributeValueBegin = P; |
hlipka | 0:41a49a73580c | 370 | |
hlipka | 0:41a49a73580c | 371 | while(*P != attributeQuoteChar && *P) |
hlipka | 0:41a49a73580c | 372 | ++P; |
hlipka | 0:41a49a73580c | 373 | |
hlipka | 0:41a49a73580c | 374 | if (!*P) // malformatted xml file |
hlipka | 0:41a49a73580c | 375 | return; |
hlipka | 0:41a49a73580c | 376 | |
hlipka | 0:41a49a73580c | 377 | const char_type* attributeValueEnd = P; |
hlipka | 0:41a49a73580c | 378 | ++P; |
hlipka | 0:41a49a73580c | 379 | |
hlipka | 0:41a49a73580c | 380 | SAttribute attr; |
hlipka | 0:41a49a73580c | 381 | attr.Name = core::string<char_type>(attributeNameBegin, |
hlipka | 0:41a49a73580c | 382 | (int)(attributeNameEnd - attributeNameBegin)); |
hlipka | 0:41a49a73580c | 383 | |
hlipka | 0:41a49a73580c | 384 | core::string<char_type> s(attributeValueBegin, |
hlipka | 0:41a49a73580c | 385 | (int)(attributeValueEnd - attributeValueBegin)); |
hlipka | 0:41a49a73580c | 386 | |
hlipka | 0:41a49a73580c | 387 | attr.Value = replaceSpecialCharacters(s); |
hlipka | 0:41a49a73580c | 388 | Attributes.push_back(attr); |
hlipka | 0:41a49a73580c | 389 | } |
hlipka | 0:41a49a73580c | 390 | else |
hlipka | 0:41a49a73580c | 391 | { |
hlipka | 0:41a49a73580c | 392 | // tag is closed directly |
hlipka | 0:41a49a73580c | 393 | ++P; |
hlipka | 0:41a49a73580c | 394 | IsEmptyElement = true; |
hlipka | 0:41a49a73580c | 395 | break; |
hlipka | 0:41a49a73580c | 396 | } |
hlipka | 0:41a49a73580c | 397 | } |
hlipka | 0:41a49a73580c | 398 | } |
hlipka | 0:41a49a73580c | 399 | |
hlipka | 0:41a49a73580c | 400 | // check if this tag is closing directly |
hlipka | 0:41a49a73580c | 401 | if (endName > startName && *(endName-1) == L'/') |
hlipka | 0:41a49a73580c | 402 | { |
hlipka | 0:41a49a73580c | 403 | // directly closing tag |
hlipka | 0:41a49a73580c | 404 | IsEmptyElement = true; |
hlipka | 0:41a49a73580c | 405 | endName--; |
hlipka | 0:41a49a73580c | 406 | } |
hlipka | 0:41a49a73580c | 407 | |
hlipka | 0:41a49a73580c | 408 | NodeName = core::string<char_type>(startName, (int)(endName - startName)); |
hlipka | 0:41a49a73580c | 409 | |
hlipka | 0:41a49a73580c | 410 | ++P; |
hlipka | 0:41a49a73580c | 411 | } |
hlipka | 0:41a49a73580c | 412 | |
hlipka | 0:41a49a73580c | 413 | |
hlipka | 0:41a49a73580c | 414 | //! parses an closing xml tag |
hlipka | 0:41a49a73580c | 415 | void parseClosingXMLElement() |
hlipka | 0:41a49a73580c | 416 | { |
hlipka | 0:41a49a73580c | 417 | CurrentNodeType = EXN_ELEMENT_END; |
hlipka | 0:41a49a73580c | 418 | IsEmptyElement = false; |
hlipka | 0:41a49a73580c | 419 | Attributes.clear(); |
hlipka | 0:41a49a73580c | 420 | |
hlipka | 0:41a49a73580c | 421 | ++P; |
hlipka | 0:41a49a73580c | 422 | const char_type* pBeginClose = P; |
hlipka | 0:41a49a73580c | 423 | |
hlipka | 0:41a49a73580c | 424 | while(*P != L'>') |
hlipka | 0:41a49a73580c | 425 | ++P; |
hlipka | 0:41a49a73580c | 426 | |
hlipka | 0:41a49a73580c | 427 | NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose)); |
hlipka | 0:41a49a73580c | 428 | ++P; |
hlipka | 0:41a49a73580c | 429 | } |
hlipka | 0:41a49a73580c | 430 | |
hlipka | 0:41a49a73580c | 431 | //! parses a possible CDATA section, returns false if begin was not a CDATA section |
hlipka | 0:41a49a73580c | 432 | bool parseCDATA() |
hlipka | 0:41a49a73580c | 433 | { |
hlipka | 0:41a49a73580c | 434 | if (*(P+1) != L'[') |
hlipka | 0:41a49a73580c | 435 | return false; |
hlipka | 0:41a49a73580c | 436 | |
hlipka | 0:41a49a73580c | 437 | CurrentNodeType = EXN_CDATA; |
hlipka | 0:41a49a73580c | 438 | |
hlipka | 0:41a49a73580c | 439 | // skip '<![CDATA[' |
hlipka | 0:41a49a73580c | 440 | int count=0; |
hlipka | 0:41a49a73580c | 441 | while( *P && count<8 ) |
hlipka | 0:41a49a73580c | 442 | { |
hlipka | 0:41a49a73580c | 443 | ++P; |
hlipka | 0:41a49a73580c | 444 | ++count; |
hlipka | 0:41a49a73580c | 445 | } |
hlipka | 0:41a49a73580c | 446 | |
hlipka | 0:41a49a73580c | 447 | if (!*P) |
hlipka | 0:41a49a73580c | 448 | return true; |
hlipka | 0:41a49a73580c | 449 | |
hlipka | 0:41a49a73580c | 450 | char_type *cDataBegin = P; |
hlipka | 0:41a49a73580c | 451 | char_type *cDataEnd = 0; |
hlipka | 0:41a49a73580c | 452 | |
hlipka | 0:41a49a73580c | 453 | // find end of CDATA |
hlipka | 0:41a49a73580c | 454 | while(*P && !cDataEnd) |
hlipka | 0:41a49a73580c | 455 | { |
hlipka | 0:41a49a73580c | 456 | if (*P == L'>' && |
hlipka | 0:41a49a73580c | 457 | (*(P-1) == L']') && |
hlipka | 0:41a49a73580c | 458 | (*(P-2) == L']')) |
hlipka | 0:41a49a73580c | 459 | { |
hlipka | 0:41a49a73580c | 460 | cDataEnd = P - 2; |
hlipka | 0:41a49a73580c | 461 | } |
hlipka | 0:41a49a73580c | 462 | |
hlipka | 0:41a49a73580c | 463 | ++P; |
hlipka | 0:41a49a73580c | 464 | } |
hlipka | 0:41a49a73580c | 465 | |
hlipka | 0:41a49a73580c | 466 | if ( cDataEnd ) |
hlipka | 0:41a49a73580c | 467 | NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin)); |
hlipka | 0:41a49a73580c | 468 | else |
hlipka | 0:41a49a73580c | 469 | NodeName = ""; |
hlipka | 0:41a49a73580c | 470 | |
hlipka | 0:41a49a73580c | 471 | return true; |
hlipka | 0:41a49a73580c | 472 | } |
hlipka | 0:41a49a73580c | 473 | |
hlipka | 0:41a49a73580c | 474 | |
hlipka | 0:41a49a73580c | 475 | // structure for storing attribute-name pairs |
hlipka | 0:41a49a73580c | 476 | struct SAttribute |
hlipka | 0:41a49a73580c | 477 | { |
hlipka | 0:41a49a73580c | 478 | core::string<char_type> Name; |
hlipka | 0:41a49a73580c | 479 | core::string<char_type> Value; |
hlipka | 0:41a49a73580c | 480 | }; |
hlipka | 0:41a49a73580c | 481 | |
hlipka | 0:41a49a73580c | 482 | // finds a current attribute by name, returns 0 if not found |
hlipka | 0:41a49a73580c | 483 | const SAttribute* getAttributeByName(const char_type* name) const |
hlipka | 0:41a49a73580c | 484 | { |
hlipka | 0:41a49a73580c | 485 | if (!name) |
hlipka | 0:41a49a73580c | 486 | return 0; |
hlipka | 0:41a49a73580c | 487 | |
hlipka | 0:41a49a73580c | 488 | core::string<char_type> n = name; |
hlipka | 0:41a49a73580c | 489 | |
hlipka | 0:41a49a73580c | 490 | for (int i=0; i<(int)Attributes.size(); ++i) |
hlipka | 0:41a49a73580c | 491 | if (Attributes[i].Name == n) |
hlipka | 0:41a49a73580c | 492 | return &Attributes[i]; |
hlipka | 0:41a49a73580c | 493 | |
hlipka | 0:41a49a73580c | 494 | return 0; |
hlipka | 0:41a49a73580c | 495 | } |
hlipka | 0:41a49a73580c | 496 | |
hlipka | 0:41a49a73580c | 497 | // replaces xml special characters in a string and creates a new one |
hlipka | 0:41a49a73580c | 498 | core::string<char_type> replaceSpecialCharacters( |
hlipka | 0:41a49a73580c | 499 | core::string<char_type>& origstr) |
hlipka | 0:41a49a73580c | 500 | { |
hlipka | 0:41a49a73580c | 501 | int pos = origstr.findFirst(L'&'); |
hlipka | 0:41a49a73580c | 502 | int oldPos = 0; |
hlipka | 0:41a49a73580c | 503 | |
hlipka | 0:41a49a73580c | 504 | if (pos == -1) |
hlipka | 0:41a49a73580c | 505 | return origstr; |
hlipka | 0:41a49a73580c | 506 | |
hlipka | 0:41a49a73580c | 507 | core::string<char_type> newstr; |
hlipka | 0:41a49a73580c | 508 | |
hlipka | 0:41a49a73580c | 509 | while(pos != -1 && pos < origstr.size()-2) |
hlipka | 0:41a49a73580c | 510 | { |
hlipka | 0:41a49a73580c | 511 | // check if it is one of the special characters |
hlipka | 0:41a49a73580c | 512 | |
hlipka | 0:41a49a73580c | 513 | int specialChar = -1; |
hlipka | 0:41a49a73580c | 514 | for (int i=0; i<(int)SpecialCharacters.size(); ++i) |
hlipka | 0:41a49a73580c | 515 | { |
hlipka | 0:41a49a73580c | 516 | const char_type* p = &origstr.c_str()[pos]+1; |
hlipka | 0:41a49a73580c | 517 | |
hlipka | 0:41a49a73580c | 518 | if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1)) |
hlipka | 0:41a49a73580c | 519 | { |
hlipka | 0:41a49a73580c | 520 | specialChar = i; |
hlipka | 0:41a49a73580c | 521 | break; |
hlipka | 0:41a49a73580c | 522 | } |
hlipka | 0:41a49a73580c | 523 | } |
hlipka | 0:41a49a73580c | 524 | |
hlipka | 0:41a49a73580c | 525 | if (specialChar != -1) |
hlipka | 0:41a49a73580c | 526 | { |
hlipka | 0:41a49a73580c | 527 | newstr.append(origstr.subString(oldPos, pos - oldPos)); |
hlipka | 0:41a49a73580c | 528 | newstr.append(SpecialCharacters[specialChar][0]); |
hlipka | 0:41a49a73580c | 529 | pos += SpecialCharacters[specialChar].size(); |
hlipka | 0:41a49a73580c | 530 | } |
hlipka | 0:41a49a73580c | 531 | else |
hlipka | 0:41a49a73580c | 532 | { |
hlipka | 0:41a49a73580c | 533 | newstr.append(origstr.subString(oldPos, pos - oldPos + 1)); |
hlipka | 0:41a49a73580c | 534 | pos += 1; |
hlipka | 0:41a49a73580c | 535 | } |
hlipka | 0:41a49a73580c | 536 | |
hlipka | 0:41a49a73580c | 537 | // find next & |
hlipka | 0:41a49a73580c | 538 | oldPos = pos; |
hlipka | 0:41a49a73580c | 539 | pos = origstr.findNext(L'&', pos); |
hlipka | 0:41a49a73580c | 540 | } |
hlipka | 0:41a49a73580c | 541 | |
hlipka | 0:41a49a73580c | 542 | if (oldPos < origstr.size()-1) |
hlipka | 0:41a49a73580c | 543 | newstr.append(origstr.subString(oldPos, origstr.size()-oldPos)); |
hlipka | 0:41a49a73580c | 544 | |
hlipka | 0:41a49a73580c | 545 | return newstr; |
hlipka | 0:41a49a73580c | 546 | } |
hlipka | 0:41a49a73580c | 547 | |
hlipka | 0:41a49a73580c | 548 | |
hlipka | 0:41a49a73580c | 549 | |
hlipka | 0:41a49a73580c | 550 | //! reads the xml file and converts it into the wanted character format. |
hlipka | 0:41a49a73580c | 551 | bool readFile(IFileReadCallBack* callback) |
hlipka | 0:41a49a73580c | 552 | { |
hlipka | 0:41a49a73580c | 553 | int size = callback->getSize(); |
hlipka | 0:41a49a73580c | 554 | size += 4; // We need two terminating 0's at the end. |
hlipka | 0:41a49a73580c | 555 | // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4. |
hlipka | 0:41a49a73580c | 556 | |
hlipka | 0:41a49a73580c | 557 | char* data8 = new char[size]; |
hlipka | 0:41a49a73580c | 558 | |
hlipka | 0:41a49a73580c | 559 | if (!callback->read(data8, size-4)) |
hlipka | 0:41a49a73580c | 560 | { |
hlipka | 0:41a49a73580c | 561 | delete [] data8; |
hlipka | 0:41a49a73580c | 562 | return false; |
hlipka | 0:41a49a73580c | 563 | } |
hlipka | 0:41a49a73580c | 564 | |
hlipka | 0:41a49a73580c | 565 | // add zeros at end |
hlipka | 0:41a49a73580c | 566 | |
hlipka | 0:41a49a73580c | 567 | data8[size-1] = 0; |
hlipka | 0:41a49a73580c | 568 | data8[size-2] = 0; |
hlipka | 0:41a49a73580c | 569 | data8[size-3] = 0; |
hlipka | 0:41a49a73580c | 570 | data8[size-4] = 0; |
hlipka | 0:41a49a73580c | 571 | |
hlipka | 0:41a49a73580c | 572 | char16* data16 = reinterpret_cast<char16*>(data8); |
hlipka | 0:41a49a73580c | 573 | char32* data32 = reinterpret_cast<char32*>(data8); |
hlipka | 0:41a49a73580c | 574 | |
hlipka | 0:41a49a73580c | 575 | // now we need to convert the data to the desired target format |
hlipka | 0:41a49a73580c | 576 | // based on the byte order mark. |
hlipka | 0:41a49a73580c | 577 | |
hlipka | 0:41a49a73580c | 578 | const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF; |
hlipka | 0:41a49a73580c | 579 | const int UTF16_BE = 0xFFFE; |
hlipka | 0:41a49a73580c | 580 | const int UTF16_LE = 0xFEFF; |
hlipka | 0:41a49a73580c | 581 | const int UTF32_BE = 0xFFFE0000; |
hlipka | 0:41a49a73580c | 582 | const int UTF32_LE = 0x0000FEFF; |
hlipka | 0:41a49a73580c | 583 | |
hlipka | 0:41a49a73580c | 584 | // check source for all utf versions and convert to target data format |
hlipka | 0:41a49a73580c | 585 | |
hlipka | 0:41a49a73580c | 586 | if (size >= 4 && data32[0] == (char32)UTF32_BE) |
hlipka | 0:41a49a73580c | 587 | { |
hlipka | 0:41a49a73580c | 588 | // UTF-32, big endian |
hlipka | 0:41a49a73580c | 589 | SourceFormat = ETF_UTF32_BE; |
hlipka | 0:41a49a73580c | 590 | convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header |
hlipka | 0:41a49a73580c | 591 | } |
hlipka | 0:41a49a73580c | 592 | else |
hlipka | 0:41a49a73580c | 593 | if (size >= 4 && data32[0] == (char32)UTF32_LE) |
hlipka | 0:41a49a73580c | 594 | { |
hlipka | 0:41a49a73580c | 595 | // UTF-32, little endian |
hlipka | 0:41a49a73580c | 596 | SourceFormat = ETF_UTF32_LE; |
hlipka | 0:41a49a73580c | 597 | convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header |
hlipka | 0:41a49a73580c | 598 | } |
hlipka | 0:41a49a73580c | 599 | else |
hlipka | 0:41a49a73580c | 600 | if (size >= 2 && data16[0] == UTF16_BE) |
hlipka | 0:41a49a73580c | 601 | { |
hlipka | 0:41a49a73580c | 602 | // UTF-16, big endian |
hlipka | 0:41a49a73580c | 603 | SourceFormat = ETF_UTF16_BE; |
hlipka | 0:41a49a73580c | 604 | convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header |
hlipka | 0:41a49a73580c | 605 | } |
hlipka | 0:41a49a73580c | 606 | else |
hlipka | 0:41a49a73580c | 607 | if (size >= 2 && data16[0] == UTF16_LE) |
hlipka | 0:41a49a73580c | 608 | { |
hlipka | 0:41a49a73580c | 609 | // UTF-16, little endian |
hlipka | 0:41a49a73580c | 610 | SourceFormat = ETF_UTF16_LE; |
hlipka | 0:41a49a73580c | 611 | convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header |
hlipka | 0:41a49a73580c | 612 | } |
hlipka | 0:41a49a73580c | 613 | else |
hlipka | 0:41a49a73580c | 614 | if (size >= 3 && data8[0] == UTF8[0] && data8[1] == UTF8[1] && data8[2] == UTF8[2]) |
hlipka | 0:41a49a73580c | 615 | { |
hlipka | 0:41a49a73580c | 616 | // UTF-8 |
hlipka | 0:41a49a73580c | 617 | SourceFormat = ETF_UTF8; |
hlipka | 0:41a49a73580c | 618 | convertTextData(data8+3, data8, size); // data8+3 because we need to skip the header |
hlipka | 0:41a49a73580c | 619 | } |
hlipka | 0:41a49a73580c | 620 | else |
hlipka | 0:41a49a73580c | 621 | { |
hlipka | 0:41a49a73580c | 622 | // ASCII |
hlipka | 0:41a49a73580c | 623 | SourceFormat = ETF_ASCII; |
hlipka | 0:41a49a73580c | 624 | convertTextData(data8, data8, size); |
hlipka | 0:41a49a73580c | 625 | } |
hlipka | 0:41a49a73580c | 626 | |
hlipka | 0:41a49a73580c | 627 | return true; |
hlipka | 0:41a49a73580c | 628 | } |
hlipka | 0:41a49a73580c | 629 | |
hlipka | 0:41a49a73580c | 630 | |
hlipka | 0:41a49a73580c | 631 | //! converts the text file into the desired format. |
hlipka | 0:41a49a73580c | 632 | //! \param source: begin of the text (without byte order mark) |
hlipka | 0:41a49a73580c | 633 | //! \param pointerToStore: pointer to text data block which can be |
hlipka | 0:41a49a73580c | 634 | //! stored or deleted based on the nesessary conversion. |
hlipka | 0:41a49a73580c | 635 | //! \param sizeWithoutHeader: Text size in characters without header |
hlipka | 0:41a49a73580c | 636 | template<class src_char_type> |
hlipka | 0:41a49a73580c | 637 | void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader) |
hlipka | 0:41a49a73580c | 638 | { |
hlipka | 0:41a49a73580c | 639 | // convert little to big endian if necessary |
hlipka | 0:41a49a73580c | 640 | if (sizeof(src_char_type) > 1 && |
hlipka | 0:41a49a73580c | 641 | isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat)) |
hlipka | 0:41a49a73580c | 642 | convertToLittleEndian(source); |
hlipka | 0:41a49a73580c | 643 | |
hlipka | 0:41a49a73580c | 644 | // check if conversion is necessary: |
hlipka | 0:41a49a73580c | 645 | if (sizeof(src_char_type) == sizeof(char_type)) |
hlipka | 0:41a49a73580c | 646 | { |
hlipka | 0:41a49a73580c | 647 | // no need to convert |
hlipka | 0:41a49a73580c | 648 | TextBegin = (char_type*)source; |
hlipka | 0:41a49a73580c | 649 | TextData = (char_type*)pointerToStore; |
hlipka | 0:41a49a73580c | 650 | TextSize = sizeWithoutHeader; |
hlipka | 0:41a49a73580c | 651 | } |
hlipka | 0:41a49a73580c | 652 | else |
hlipka | 0:41a49a73580c | 653 | { |
hlipka | 0:41a49a73580c | 654 | // convert source into target data format. |
hlipka | 0:41a49a73580c | 655 | // TODO: implement a real conversion. This one just |
hlipka | 0:41a49a73580c | 656 | // copies bytes. This is a problem when there are |
hlipka | 0:41a49a73580c | 657 | // unicode symbols using more than one character. |
hlipka | 0:41a49a73580c | 658 | |
hlipka | 0:41a49a73580c | 659 | TextData = new char_type[sizeWithoutHeader]; |
hlipka | 0:41a49a73580c | 660 | |
hlipka | 0:41a49a73580c | 661 | for (int i=0; i<sizeWithoutHeader; ++i) |
hlipka | 0:41a49a73580c | 662 | TextData[i] = (char_type)source[i]; |
hlipka | 0:41a49a73580c | 663 | |
hlipka | 0:41a49a73580c | 664 | TextBegin = TextData; |
hlipka | 0:41a49a73580c | 665 | TextSize = sizeWithoutHeader; |
hlipka | 0:41a49a73580c | 666 | |
hlipka | 0:41a49a73580c | 667 | // delete original data because no longer needed |
hlipka | 0:41a49a73580c | 668 | delete [] pointerToStore; |
hlipka | 0:41a49a73580c | 669 | } |
hlipka | 0:41a49a73580c | 670 | } |
hlipka | 0:41a49a73580c | 671 | |
hlipka | 0:41a49a73580c | 672 | //! converts whole text buffer to little endian |
hlipka | 0:41a49a73580c | 673 | template<class src_char_type> |
hlipka | 0:41a49a73580c | 674 | void convertToLittleEndian(src_char_type* t) |
hlipka | 0:41a49a73580c | 675 | { |
hlipka | 0:41a49a73580c | 676 | if (sizeof(src_char_type) == 4) |
hlipka | 0:41a49a73580c | 677 | { |
hlipka | 0:41a49a73580c | 678 | // 32 bit |
hlipka | 0:41a49a73580c | 679 | |
hlipka | 0:41a49a73580c | 680 | while(*t) |
hlipka | 0:41a49a73580c | 681 | { |
hlipka | 0:41a49a73580c | 682 | *t = ((*t & 0xff000000) >> 24) | |
hlipka | 0:41a49a73580c | 683 | ((*t & 0x00ff0000) >> 8) | |
hlipka | 0:41a49a73580c | 684 | ((*t & 0x0000ff00) << 8) | |
hlipka | 0:41a49a73580c | 685 | ((*t & 0x000000ff) << 24); |
hlipka | 0:41a49a73580c | 686 | ++t; |
hlipka | 0:41a49a73580c | 687 | } |
hlipka | 0:41a49a73580c | 688 | } |
hlipka | 0:41a49a73580c | 689 | else |
hlipka | 0:41a49a73580c | 690 | { |
hlipka | 0:41a49a73580c | 691 | // 16 bit |
hlipka | 0:41a49a73580c | 692 | |
hlipka | 0:41a49a73580c | 693 | while(*t) |
hlipka | 0:41a49a73580c | 694 | { |
hlipka | 0:41a49a73580c | 695 | *t = (*t >> 8) | (*t << 8); |
hlipka | 0:41a49a73580c | 696 | ++t; |
hlipka | 0:41a49a73580c | 697 | } |
hlipka | 0:41a49a73580c | 698 | } |
hlipka | 0:41a49a73580c | 699 | } |
hlipka | 0:41a49a73580c | 700 | |
hlipka | 0:41a49a73580c | 701 | //! returns if a format is little endian |
hlipka | 0:41a49a73580c | 702 | inline bool isLittleEndian(ETEXT_FORMAT f) |
hlipka | 0:41a49a73580c | 703 | { |
hlipka | 0:41a49a73580c | 704 | return f == ETF_ASCII || |
hlipka | 0:41a49a73580c | 705 | f == ETF_UTF8 || |
hlipka | 0:41a49a73580c | 706 | f == ETF_UTF16_LE || |
hlipka | 0:41a49a73580c | 707 | f == ETF_UTF32_LE; |
hlipka | 0:41a49a73580c | 708 | } |
hlipka | 0:41a49a73580c | 709 | |
hlipka | 0:41a49a73580c | 710 | |
hlipka | 0:41a49a73580c | 711 | //! returns true if a character is whitespace |
hlipka | 0:41a49a73580c | 712 | inline bool isWhiteSpace(char_type c) |
hlipka | 0:41a49a73580c | 713 | { |
hlipka | 0:41a49a73580c | 714 | return (c==' ' || c=='\t' || c=='\n' || c=='\r'); |
hlipka | 0:41a49a73580c | 715 | } |
hlipka | 0:41a49a73580c | 716 | |
hlipka | 0:41a49a73580c | 717 | |
hlipka | 0:41a49a73580c | 718 | //! generates a list with xml special characters |
hlipka | 0:41a49a73580c | 719 | void createSpecialCharacterList() |
hlipka | 0:41a49a73580c | 720 | { |
hlipka | 0:41a49a73580c | 721 | // list of strings containing special symbols, |
hlipka | 0:41a49a73580c | 722 | // the first character is the special character, |
hlipka | 0:41a49a73580c | 723 | // the following is the symbol string without trailing &. |
hlipka | 0:41a49a73580c | 724 | |
hlipka | 0:41a49a73580c | 725 | SpecialCharacters.push_back("&"); |
hlipka | 0:41a49a73580c | 726 | SpecialCharacters.push_back("<lt;"); |
hlipka | 0:41a49a73580c | 727 | SpecialCharacters.push_back(">gt;"); |
hlipka | 0:41a49a73580c | 728 | SpecialCharacters.push_back("\"quot;"); |
hlipka | 0:41a49a73580c | 729 | SpecialCharacters.push_back("'apos;"); |
hlipka | 0:41a49a73580c | 730 | |
hlipka | 0:41a49a73580c | 731 | } |
hlipka | 0:41a49a73580c | 732 | |
hlipka | 0:41a49a73580c | 733 | |
hlipka | 0:41a49a73580c | 734 | //! compares the first n characters of the strings |
hlipka | 0:41a49a73580c | 735 | bool equalsn(const char_type* str1, const char_type* str2, int len) |
hlipka | 0:41a49a73580c | 736 | { |
hlipka | 0:41a49a73580c | 737 | int i; |
hlipka | 0:41a49a73580c | 738 | for(i=0; str1[i] && str2[i] && i < len; ++i) |
hlipka | 0:41a49a73580c | 739 | if (str1[i] != str2[i]) |
hlipka | 0:41a49a73580c | 740 | return false; |
hlipka | 0:41a49a73580c | 741 | |
hlipka | 0:41a49a73580c | 742 | // if one (or both) of the strings was smaller then they |
hlipka | 0:41a49a73580c | 743 | // are only equal if they have the same lenght |
hlipka | 0:41a49a73580c | 744 | return (i == len) || (str1[i] == 0 && str2[i] == 0); |
hlipka | 0:41a49a73580c | 745 | } |
hlipka | 0:41a49a73580c | 746 | |
hlipka | 0:41a49a73580c | 747 | |
hlipka | 0:41a49a73580c | 748 | //! stores the target text format |
hlipka | 0:41a49a73580c | 749 | void storeTargetFormat() |
hlipka | 0:41a49a73580c | 750 | { |
hlipka | 0:41a49a73580c | 751 | // get target format. We could have done this using template specialization, |
hlipka | 0:41a49a73580c | 752 | // but VisualStudio 6 don't like it and we want to support it. |
hlipka | 0:41a49a73580c | 753 | |
hlipka | 0:41a49a73580c | 754 | switch(sizeof(char_type)) |
hlipka | 0:41a49a73580c | 755 | { |
hlipka | 0:41a49a73580c | 756 | case 1: |
hlipka | 0:41a49a73580c | 757 | TargetFormat = ETF_UTF8; |
hlipka | 0:41a49a73580c | 758 | break; |
hlipka | 0:41a49a73580c | 759 | case 2: |
hlipka | 0:41a49a73580c | 760 | TargetFormat = ETF_UTF16_LE; |
hlipka | 0:41a49a73580c | 761 | break; |
hlipka | 0:41a49a73580c | 762 | case 4: |
hlipka | 0:41a49a73580c | 763 | TargetFormat = ETF_UTF32_LE; |
hlipka | 0:41a49a73580c | 764 | break; |
hlipka | 0:41a49a73580c | 765 | default: |
hlipka | 0:41a49a73580c | 766 | TargetFormat = ETF_ASCII; // should never happen. |
hlipka | 0:41a49a73580c | 767 | } |
hlipka | 0:41a49a73580c | 768 | } |
hlipka | 0:41a49a73580c | 769 | |
hlipka | 0:41a49a73580c | 770 | |
hlipka | 0:41a49a73580c | 771 | // instance variables: |
hlipka | 0:41a49a73580c | 772 | |
hlipka | 0:41a49a73580c | 773 | char_type* TextData; // data block of the text file |
hlipka | 0:41a49a73580c | 774 | char_type* P; // current point in text to parse |
hlipka | 0:41a49a73580c | 775 | char_type* TextBegin; // start of text to parse |
hlipka | 0:41a49a73580c | 776 | unsigned int TextSize; // size of text to parse in characters, not bytes |
hlipka | 0:41a49a73580c | 777 | |
hlipka | 0:41a49a73580c | 778 | EXML_NODE CurrentNodeType; // type of the currently parsed node |
hlipka | 0:41a49a73580c | 779 | ETEXT_FORMAT SourceFormat; // source format of the xml file |
hlipka | 0:41a49a73580c | 780 | ETEXT_FORMAT TargetFormat; // output format of this parser |
hlipka | 0:41a49a73580c | 781 | |
hlipka | 0:41a49a73580c | 782 | core::string<char_type> NodeName; // name of the node currently in |
hlipka | 0:41a49a73580c | 783 | core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods |
hlipka | 0:41a49a73580c | 784 | |
hlipka | 0:41a49a73580c | 785 | bool IsEmptyElement; // is the currently parsed node empty? |
hlipka | 0:41a49a73580c | 786 | |
hlipka | 0:41a49a73580c | 787 | core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList() |
hlipka | 0:41a49a73580c | 788 | |
hlipka | 0:41a49a73580c | 789 | core::array<SAttribute> Attributes; // attributes of current element |
hlipka | 0:41a49a73580c | 790 | |
hlipka | 0:41a49a73580c | 791 | }; // end CXMLReaderImpl |
hlipka | 0:41a49a73580c | 792 | |
hlipka | 0:41a49a73580c | 793 | |
hlipka | 0:41a49a73580c | 794 | } // end namespace |
hlipka | 0:41a49a73580c | 795 | } // end namespace |
hlipka | 0:41a49a73580c | 796 | |
hlipka | 0:41a49a73580c | 797 | #endif |