SAX based XML parser
Dependents: giken9_HTMLServer_Temp_Sample
Diff: expatpp.cpp
- Revision:
- 0:07919e3d6c56
- Child:
- 1:e96b2af301dd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/expatpp.cpp Fri Apr 08 09:18:41 2011 +0000 @@ -0,0 +1,804 @@ +// expatpp +#ifdef UNDER_CE + #include <string.h> + #include <windows.h> + #include <dbgapi.h> + #define assert ASSERT +#else + #include <string> + using namespace std; + #include <assert.h> +#endif +#include "expatpp.h" + +#pragma diag_suppress 1299 + + +// may be defined in xmltchar.h or elsewhere +#ifndef tcscmp + #ifdef XML_UNICODE + #define tcscmp wcscmp + #else + #define tcscmp strcmp + #endif // XML_UNICODE +#endif // tcscmp + + +#ifndef BUFSIZ + #define BUFSIZ 4096 +#endif + +expatpp::expatpp(bool createParser) : + mParser(0), // in case of exception below + mHaveParsed(false) +{ + if (createParser) { + // subclasses may call this ctor after parser created! + mParser = XML_ParserCreate(0); + SetupHandlers(); + } +} + + +void +expatpp::SetupHandlers() +{ + ::XML_SetUserData(mParser, this); + ::XML_SetElementHandler(mParser, startElementCallback, endElementCallback); + ::XML_SetCharacterDataHandler(mParser, charDataCallback); + ::XML_SetProcessingInstructionHandler(mParser, processingInstructionCallback); + ::XML_SetDefaultHandler(mParser, defaultHandlerCallback); + ::XML_SetUnparsedEntityDeclHandler(mParser, unParsedEntityDeclCallback); + ::XML_SetNotationDeclHandler(mParser, notationDeclCallback); + ::XML_SetNotStandaloneHandler(mParser, notStandaloneHandlerCallback); + ::XML_SetNamespaceDeclHandler(mParser, startNamespaceCallback, endNamespaceCallback); +#ifndef EXPATPP_COMPATIBLE_EXPAT12 + ::XML_SetAttlistDeclHandler(mParser, attlistDeclCallback); + ::XML_SetCdataSectionHandler(mParser, startCdataSectionCallback, endCdataSectionCallback); + ::XML_SetCommentHandler(mParser, commentCallback); + ::XML_SetDoctypeDeclHandler(mParser, startDoctypeDeclCallback, endDoctypeDeclCallback); + ::XML_SetElementDeclHandler(mParser, elementDeclCallback); + ::XML_SetEntityDeclHandler(mParser, entityDeclCallback); + ::XML_SetSkippedEntityHandler(mParser, skippedEntityCallback); + ::XML_SetXmlDeclHandler(mParser, xmlDeclCallback); +#endif +} + + +expatpp::~expatpp() +{ + if (mParser) // allows subclasses to avoid finishing parsing + ReleaseParser(); +} + + +/** + Provide single point that will call XML_ParserFree. + Nothing else in this code should call XML_ParserFree! +*/ +void +expatpp::ReleaseParser() +{ + ::XML_ParserFree(mParser); + mParser = 0; +} + + + +/** + Provide single point that will call XML_ParserReset. + Guarded against trivial reset before use in case that breaks + expat or creates overhead. + + \todo pass in encoding to XML_ParserReset when we support encodings +*/ +void +expatpp::ResetParser() +{ +#ifdef EXPATPP_COMPATIBLE_EXPAT12 + assert(!"Reset not available in earlier than expat 1.95.3");s +#else + /*if (mHaveParsed) { + ::XML_ParserReset(mParser, NULL); + SetupHandlers(); + mHaveParsed = false; + }*/ + if (mHaveParsed) { + ReleaseParser(); + mParser = XML_ParserCreate(0); + SetupHandlers(); + mHaveParsed = false; + } +#endif +} + + +/** + Parse entire file, basically copy of the loop from the elements.c example. +*/ +XML_Status +expatpp::parseFile(FILE* inFile) +{ + ResetParser(); + + char buf[BUFSIZ]; + int done; + if (!inFile) + return XML_STATUS_ERROR; + fseek(inFile, 0, SEEK_SET); // reset for reading + do { + size_t len = fread(buf, 1, sizeof(buf), inFile); + done = len < sizeof(buf); + enum XML_Status parseStatus; + if ((parseStatus = XML_Parse(buf, len, done))!=XML_STATUS_OK) { + return parseStatus; + } + } while (!done); + return XML_STATUS_OK; +} + + +XML_Status +expatpp::XML_Parse(const char *s, int len, int isFinal) +{ + mHaveParsed = true; + const XML_Status retStatus = ::XML_Parse(mParser, s, len, isFinal); + if (isFinal) + CheckFinalStatus(retStatus); + return retStatus; +} + + +XML_Error +expatpp::XML_GetErrorCode() +{ + return ::XML_GetErrorCode(mParser); +} + + +int +expatpp::XML_GetCurrentLineNumber() +{ + return ::XML_GetCurrentLineNumber(mParser); +} + + +int +expatpp::XML_GetCurrentColumnNumber() +{ + return ::XML_GetCurrentColumnNumber(mParser); +} + + + + +/** + Parse string which is assumed to be entire XML document. + Written to stop stupid errors of being off by one in the string length causing + wasted debugging time, such as: +\verbatim + const char[] kSampleSettings = "<settings/>"; + const int sampleSize = sizeof(kSampleSettings)-1; // unless you remember to subtract one here will get invalid token error + if (!parser.XML_Parse(kSampleSettings, sampleSize, 1)) { +\endverbatim +*/ +XML_Status +expatpp::parseString(const char* inString) +{ + ResetParser(); + const int inLen = strlen(inString); + return XML_Parse(inString, inLen, 1); +} + +void +expatpp::startElementCallback(void *userData, const XML_Char* name, const XML_Char** atts) +{ + ((expatpp*)userData)->startElement(name, atts); +} + + +void +expatpp::endElementCallback(void *userData, const XML_Char* name) +{ + ((expatpp*)userData)->endElement(name); +} + + +void +expatpp::startNamespaceCallback(void *userData, const XML_Char* prefix, const XML_Char* uri) +{ + ((expatpp*)userData)->startNamespace(prefix, uri); +} + + +void +expatpp::endNamespaceCallback(void *userData, const XML_Char* prefix) +{ + ((expatpp*)userData)->endNamespace(prefix); +} + + +void +expatpp::charDataCallback(void *userData, const XML_Char* s, int len) +{ + ((expatpp*)userData)->charData(s, len); +} + + +void +expatpp:: processingInstructionCallback(void *userData, const XML_Char* target, const XML_Char* data) +{ + ((expatpp*)userData)->processingInstruction(target, data); +} + + +void +expatpp::defaultHandlerCallback(void* userData, const XML_Char* s, int len) +{ + ((expatpp*)userData)->defaultHandler(s, len); +} + + +int +expatpp::notStandaloneHandlerCallback(void* userData) +{ + return ((expatpp*)userData)->notStandaloneHandler(); +} + + +void +expatpp::unParsedEntityDeclCallback(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName) +{ + ((expatpp*)userData)->unparsedEntityDecl(entityName, base, systemId, publicId, notationName); +} + + +void +expatpp::notationDeclCallback(void *userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) +{ + ((expatpp*)userData)->notationDecl(notationName, base, systemId, publicId); +} + + +void +expatpp::startElement(const XML_Char*, const XML_Char**) +{} + + +void +expatpp::endElement(const XML_Char*) +{} + + +void +expatpp::startNamespace(const XML_Char* /* prefix */, const XML_Char* /* uri */) +{} + + +void +expatpp::endNamespace(const XML_Char*) +{} + + +void +expatpp::charData(const XML_Char*, int ) +{ +} + + +void +expatpp::processingInstruction(const XML_Char*, const XML_Char*) +{ +} + + +void +expatpp::defaultHandler(const XML_Char*, int) +{ +} + + +int +expatpp::notStandaloneHandler() +{ + return 0; +} + + +void +expatpp::unparsedEntityDecl(const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*) +{ +} + + +void +expatpp::notationDecl(const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*) +{ +} + + +int +expatpp::skipWhiteSpace(const XML_Char* startFrom) +{ + // use our own XML definition of white space + // TO DO - confirm this is correct! + const XML_Char* s = startFrom; + XML_Char c = *s; + while ((c==' ') || (c=='\t') || (c=='\n') || (c=='\r')) { + s++; + c = *s; + } + const int numSkipped = s - startFrom; + return numSkipped; +} + + +/** + Iterate the paired attribute name/value until find a pair with matching name. + \return pointer to the value or null if not found. +*/ +const XML_Char* +expatpp::getAttribute(const XML_Char* matchingName, const XML_Char** atts) +{ + for (int i=0; atts[i]; i++) { + const XML_Char* attributeName = atts[i++]; + assert(attributeName); // shouldn't fail this because of loop test above + if(tcscmp(attributeName, matchingName)==0) { + return atts[i]; // if 2nd item was missing, this returns 0 safely indicating failure + } + } + return 0; +} + + +/** +\bug will always return 0 for PPC +*/ +bool +expatpp::getIntegerAttribute(const XML_Char *matchingName, const XML_Char **atts, int& outAtt) +{ + const XML_Char* attStr = getAttribute(matchingName, atts); + if (!attStr) + return false; + int i=0; +#ifdef XML_UNICODE +fail to compile because need this now +#else + sscanf(attStr, "%d", &i); +#endif + outAtt = i; + return true; +} + + +/** +\bug will always return 0 for PPC +*/ +bool +expatpp::getDoubleAttribute(const XML_Char *matchingName, const XML_Char **atts, double& outAtt) +{ + const XML_Char* attStr = getAttribute(matchingName, atts); + if (!attStr) + return false; + float f = 0.0; // sscanf doesn't allow point to double +#ifdef XML_UNICODE +fail to compile because need this now +#else + sscanf(attStr, "%f", &f); +#endif + outAtt = f; + return true; +} + + +bool +expatpp::emptyCharData(const XML_Char *s, int len) +{ +// usually call from top of overriden charData methods + if (len==0) + return true; //*** early exit - empty string, may never occur?? + +// skip newline and empty whitespace + if ( + ((len==1) && ( (s[0]=='\n') || (s[0]=='\r')) ) || // just CR or just LF + ((len==2) && (s[0]=='\r') && (s[1]=='\n')) // DOS-style CRLF + ) + return true; //*** early exit - newline + + const int lastCharAt = len-1; + if (s[lastCharAt]==' ') { // maybe all whitespace + int i; + for (i=0; i<lastCharAt; i++) { + if (s[i]!=' ') + break; + } + if (i==lastCharAt) + return true; //*** early exit - all spaces + } + return false; +} + + +//-------- Added for expat 1.95.5--------------- +void +expatpp::attlistDeclCallback(void *userData, + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired) +{ + ((expatpp*)userData)->attlistDecl(elname, attname, att_type, dflt, isrequired); +} + + +void +expatpp::commentCallback(void *userData, const XML_Char *data) +{ + ((expatpp*)userData)->comment(data); +} + + +void +expatpp::elementDeclCallback(void *userData, const XML_Char *name, XML_Content *model) +{ + ((expatpp*)userData)->elementDecl(name, model); +} + + +void +expatpp::endCdataSectionCallback(void *userData) +{ + ((expatpp*)userData)->endCdataSection(); +} + + +void +expatpp::endDoctypeDeclCallback(void *userData) +{ + ((expatpp*)userData)->endDoctypeDecl(); +} + + +void +expatpp::entityDeclCallback(void *userData, + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + ((expatpp*)userData)->entityDecl(entityName, is_parameter_entity, value, value_length, base, systemId, publicId, notationName); +} + + +void +expatpp::skippedEntityCallback(void *userData, const XML_Char *entityName, int is_parameter_entity) +{ + ((expatpp*)userData)->skippedEntity(entityName, is_parameter_entity); +} + + +void +expatpp::startCdataSectionCallback(void *userData) +{ + ((expatpp*)userData)->startCdataSection(); +} + + +void +expatpp::startDoctypeDeclCallback(void *userData, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +{ + ((expatpp*)userData)->startDoctypeDecl(doctypeName, sysid, pubid, has_internal_subset); +} + + +void +expatpp::xmlDeclCallback(void *userData, const XML_Char *version, + const XML_Char *encoding, + int standalone) +{ + ((expatpp*)userData)->xmlDecl(version, encoding, standalone); +} + + +void +expatpp::attlistDecl( + const XML_Char *elname, + const XML_Char *attname, + const XML_Char *att_type, + const XML_Char *dflt, + int isrequired) +{ +} + + +void +expatpp::comment( const XML_Char *data) +{ +} + + +void +expatpp::elementDecl( const XML_Char *name, XML_Content *model) +{ +} + + +void +expatpp::endCdataSection() +{ +} + + +void +expatpp::endDoctypeDecl() +{ +} + + +void +expatpp::entityDecl( + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ +} + + +void +expatpp::skippedEntity( const XML_Char *entityName, int is_parameter_entity) +{ +} + + +void +expatpp::startCdataSection() +{ +} + + +void +expatpp::startDoctypeDecl(const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +{ +} + + +void +expatpp::xmlDecl( const XML_Char *version, + const XML_Char *encoding, + int standalone) +{ +} + + + + +// ------------------------------------------------------- +// e x p a t p p N e s t i n g +// ------------------------------------------------------- +/** + \param parent can be null in which case this is root parser + + \note The handlers set in here MUST be also set in SetupHandlers + which is a virtual method invoked by expatpp::ResetParser. Otherwise + you can have subtle bugs with a nested parser not properly returning + after reusing a parser (nasty and found rapidly only via extensive unit + tests and plentiful assertions!). + + \WARNING + The assumption that is not obvious here is that if you want to use + nested parsers, then your topmost parser must also be an expatppNesting + subclass, NOT an expatpp subclass, because we need the + nestedStartElementCallback and nestedEndElementCallback + callbacks to override those in the expatpp ctor. + + + + \todo go back over code in detail and confirm above warning still valid + I think if we used expat's functions to invoke the registered callback + might be safer - the explicit function call we have in nestedEndElementCallback + certainly assumes the parent type. +*/ +expatppNesting::expatppNesting(expatppNesting* parent) : + expatpp(parent==0), // don't create parser - we're taking over from parent if given + mDepth(0), + mParent(parent), + mOwnedChild(0), + mSelfDeleting(true) +{ + if ( parent ) + { + RegisterWithParentXMLParser(); + parent->AdoptChild(this); + } + else + { + // No parent - the expatpp constructor will have created a new mParser (expat parser) + ::XML_SetElementHandler(mParser, nestedStartElementCallback, nestedEndElementCallback); + } + assert(mParser); // either we created above or expatpp +} + + +expatppNesting::~expatppNesting() +{ + assert(!mParent); // if we are a sub-parser, should not delete without calling returnToParent + DeleteChild(); +} + + +/** + Call parent version then override same as in our ctor. +*/ +void +expatppNesting::SetupHandlers() +{ + expatpp::SetupHandlers(); + ::XML_SetElementHandler(mParser, nestedStartElementCallback, nestedEndElementCallback); +} + +/** + Must use if you have adopted a child parser and want to dispose of it early. +*/ +void +expatppNesting::DeleteChild() +{ + delete mOwnedChild; + mOwnedChild = 0; +} + + +/** + Invoked as a callback from a child ctor when we pass in a parent pointer. + OR used from switchToNewSubParser, in which case it may be the 2nd time + we're called for a given child (see scenarios in expatppNesting class comment). +*/ +void +expatppNesting::AdoptChild(expatppNesting* adoptingChild) +{ + if ( mOwnedChild != adoptingChild ) + { + delete mOwnedChild; + mOwnedChild = adoptingChild; + } +} + + +/** + to use parent's underlying expat parser +*/ +void +expatppNesting::RegisterWithParentXMLParser() +{ + mParser = mParent->mParser; + ::XML_SetUserData(mParser, this); +} + + +/** + User code (typically the startElement handler of user parsers derived from expatppNesting) + may call + switchToNewSubParser( new UserChildParser() ); + to hand off the current document to a child parser that understands the next segment of XML. + Control will be returned to the original (parent) parser when the end of the child element + is reached. + In its lifetime a 'parent' parser may switch control to several child parsers (one at a time + of course) as it moves through the document encoutering various types of child element. + + A child to which older code (eg: OOFILE) has just switched control by + new childParser(this) will be self-deleting and will clear our mOwnedChild in its dtor. +*/ +void expatppNesting::switchToNewSubParser( expatppNesting* pAdoptedChild ) +{ + assert(pAdoptedChild); + AdoptChild(pAdoptedChild); + pAdoptedChild->BeAdopted(this); +} + + +/** + If this is root parser, nestedEndElementCallback won't call returnToParent. + Therefore it is safe to put parsers on the stack. +*/ +expatppNesting* +expatppNesting::returnToParent() +{ + expatppNesting* ret = mParent; + ::XML_SetUserData(mParser, mParent); + mParent=0; + mParser=0; // prevent parser shutdown by expatpp::~expatpp!! + if (mSelfDeleting) { + ret->OwnedChildOrphansItself(this); + delete this; // MUST BE LAST THING CALLED IN NON-VIRTUAL FUNCTION, NO MEMBER ACCESS + } + return ret; +} + + +void +expatppNesting::nestedStartElementCallback(void *userData, const XML_Char* name, const XML_Char** atts) +{ + assert(userData); + expatppNesting* nestedParser = (expatppNesting*)userData; + nestedParser->mDepth++; + nestedParser->startElement(name, atts); // probably user override +} + + +/** + If this is root parser, will never hit nestedEndElementCallback after closing element, + except for when we call it. + \param userData should be non-nil except for specific case of ending root +*/ +void +expatppNesting::nestedEndElementCallback(void *userData, const XML_Char* name) +{ + if (!userData) + return; // end tag for root + + expatppNesting* nestedParser = (expatppNesting*)userData; +// we don't know until we hit a closing tag 'outside' us that our run is done + if (nestedParser->mDepth==0) { + expatppNesting* parentParser = nestedParser->returnToParent(); + nestedEndElementCallback(parentParser, name); // callbacks for expatppNesting stay registered, so safe + //if we don't invoke their callback, they will not balance their mDepth + } + else { + // end of an element this parser has started - normal case + nestedParser->endElement(name); // probably user override + nestedParser->mDepth--; + } +} + + +/** + Called by switchToNewSubParser to indicate a newly created child parser + is now the currently active child for adoptingParent and the child + isn't expected to be self deleting. + + Normal code to create an owned child would be either + switchToNewSubParser( new UserChildParser(this) ); + where this is the currently active parser and you want to be deleting it, or + new UserChildParser(this); + to have a child parser self-delete + + \par Important Safety Note + Copes with the situation of people forgetting to pass + in the parent parser (and hence creating a new one by default) + if invoked by switchToNewSubParser( new UserChildParser() ) + by somewhat wastefully deleting the parser created in expatpp::expatpp + by us being a root parser. +*/ +void +expatppNesting::BeAdopted(expatppNesting* adoptingParent) +{ + if (mParent) { + assert(mParent==adoptingParent); + } + else { // root parser being adopted, cleanup! + ReleaseParser(); + mParent = adoptingParent; + RegisterWithParentXMLParser(); + } + mSelfDeleting = false; +} + + + +