Paul Cercueil / libxml2

Dependents:   libiio

Committer:
pcercuei
Date:
Thu Aug 25 10:05:35 2016 +0000
Revision:
0:03b5121a232e
Add basic C files of libxml2 2.9.4

Who changed what in which revision?

UserRevisionLine numberNew contents of line
pcercuei 0:03b5121a232e 1 /*
pcercuei 0:03b5121a232e 2 * Summary: interface for an HTML 4.0 non-verifying parser
pcercuei 0:03b5121a232e 3 * Description: this module implements an HTML 4.0 non-verifying parser
pcercuei 0:03b5121a232e 4 * with API compatible with the XML parser ones. It should
pcercuei 0:03b5121a232e 5 * be able to parse "real world" HTML, even if severely
pcercuei 0:03b5121a232e 6 * broken from a specification point of view.
pcercuei 0:03b5121a232e 7 *
pcercuei 0:03b5121a232e 8 * Copy: See Copyright for the status of this software.
pcercuei 0:03b5121a232e 9 *
pcercuei 0:03b5121a232e 10 * Author: Daniel Veillard
pcercuei 0:03b5121a232e 11 */
pcercuei 0:03b5121a232e 12
pcercuei 0:03b5121a232e 13 #ifndef __HTML_PARSER_H__
pcercuei 0:03b5121a232e 14 #define __HTML_PARSER_H__
pcercuei 0:03b5121a232e 15 #include <libxml/xmlversion.h>
pcercuei 0:03b5121a232e 16 #include <libxml/parser.h>
pcercuei 0:03b5121a232e 17
pcercuei 0:03b5121a232e 18 #ifdef LIBXML_HTML_ENABLED
pcercuei 0:03b5121a232e 19
pcercuei 0:03b5121a232e 20 #ifdef __cplusplus
pcercuei 0:03b5121a232e 21 extern "C" {
pcercuei 0:03b5121a232e 22 #endif
pcercuei 0:03b5121a232e 23
pcercuei 0:03b5121a232e 24 /*
pcercuei 0:03b5121a232e 25 * Most of the back-end structures from XML and HTML are shared.
pcercuei 0:03b5121a232e 26 */
pcercuei 0:03b5121a232e 27 typedef xmlParserCtxt htmlParserCtxt;
pcercuei 0:03b5121a232e 28 typedef xmlParserCtxtPtr htmlParserCtxtPtr;
pcercuei 0:03b5121a232e 29 typedef xmlParserNodeInfo htmlParserNodeInfo;
pcercuei 0:03b5121a232e 30 typedef xmlSAXHandler htmlSAXHandler;
pcercuei 0:03b5121a232e 31 typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
pcercuei 0:03b5121a232e 32 typedef xmlParserInput htmlParserInput;
pcercuei 0:03b5121a232e 33 typedef xmlParserInputPtr htmlParserInputPtr;
pcercuei 0:03b5121a232e 34 typedef xmlDocPtr htmlDocPtr;
pcercuei 0:03b5121a232e 35 typedef xmlNodePtr htmlNodePtr;
pcercuei 0:03b5121a232e 36
pcercuei 0:03b5121a232e 37 /*
pcercuei 0:03b5121a232e 38 * Internal description of an HTML element, representing HTML 4.01
pcercuei 0:03b5121a232e 39 * and XHTML 1.0 (which share the same structure).
pcercuei 0:03b5121a232e 40 */
pcercuei 0:03b5121a232e 41 typedef struct _htmlElemDesc htmlElemDesc;
pcercuei 0:03b5121a232e 42 typedef htmlElemDesc *htmlElemDescPtr;
pcercuei 0:03b5121a232e 43 struct _htmlElemDesc {
pcercuei 0:03b5121a232e 44 const char *name; /* The tag name */
pcercuei 0:03b5121a232e 45 char startTag; /* Whether the start tag can be implied */
pcercuei 0:03b5121a232e 46 char endTag; /* Whether the end tag can be implied */
pcercuei 0:03b5121a232e 47 char saveEndTag; /* Whether the end tag should be saved */
pcercuei 0:03b5121a232e 48 char empty; /* Is this an empty element ? */
pcercuei 0:03b5121a232e 49 char depr; /* Is this a deprecated element ? */
pcercuei 0:03b5121a232e 50 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
pcercuei 0:03b5121a232e 51 char isinline; /* is this a block 0 or inline 1 element */
pcercuei 0:03b5121a232e 52 const char *desc; /* the description */
pcercuei 0:03b5121a232e 53
pcercuei 0:03b5121a232e 54 /* NRK Jan.2003
pcercuei 0:03b5121a232e 55 * New fields encapsulating HTML structure
pcercuei 0:03b5121a232e 56 *
pcercuei 0:03b5121a232e 57 * Bugs:
pcercuei 0:03b5121a232e 58 * This is a very limited representation. It fails to tell us when
pcercuei 0:03b5121a232e 59 * an element *requires* subelements (we only have whether they're
pcercuei 0:03b5121a232e 60 * allowed or not), and it doesn't tell us where CDATA and PCDATA
pcercuei 0:03b5121a232e 61 * are allowed. Some element relationships are not fully represented:
pcercuei 0:03b5121a232e 62 * these are flagged with the word MODIFIER
pcercuei 0:03b5121a232e 63 */
pcercuei 0:03b5121a232e 64 const char** subelts; /* allowed sub-elements of this element */
pcercuei 0:03b5121a232e 65 const char* defaultsubelt; /* subelement for suggested auto-repair
pcercuei 0:03b5121a232e 66 if necessary or NULL */
pcercuei 0:03b5121a232e 67 const char** attrs_opt; /* Optional Attributes */
pcercuei 0:03b5121a232e 68 const char** attrs_depr; /* Additional deprecated attributes */
pcercuei 0:03b5121a232e 69 const char** attrs_req; /* Required attributes */
pcercuei 0:03b5121a232e 70 };
pcercuei 0:03b5121a232e 71
pcercuei 0:03b5121a232e 72 /*
pcercuei 0:03b5121a232e 73 * Internal description of an HTML entity.
pcercuei 0:03b5121a232e 74 */
pcercuei 0:03b5121a232e 75 typedef struct _htmlEntityDesc htmlEntityDesc;
pcercuei 0:03b5121a232e 76 typedef htmlEntityDesc *htmlEntityDescPtr;
pcercuei 0:03b5121a232e 77 struct _htmlEntityDesc {
pcercuei 0:03b5121a232e 78 unsigned int value; /* the UNICODE value for the character */
pcercuei 0:03b5121a232e 79 const char *name; /* The entity name */
pcercuei 0:03b5121a232e 80 const char *desc; /* the description */
pcercuei 0:03b5121a232e 81 };
pcercuei 0:03b5121a232e 82
pcercuei 0:03b5121a232e 83 /*
pcercuei 0:03b5121a232e 84 * There is only few public functions.
pcercuei 0:03b5121a232e 85 */
pcercuei 0:03b5121a232e 86 XMLPUBFUN const htmlElemDesc * XMLCALL
pcercuei 0:03b5121a232e 87 htmlTagLookup (const xmlChar *tag);
pcercuei 0:03b5121a232e 88 XMLPUBFUN const htmlEntityDesc * XMLCALL
pcercuei 0:03b5121a232e 89 htmlEntityLookup(const xmlChar *name);
pcercuei 0:03b5121a232e 90 XMLPUBFUN const htmlEntityDesc * XMLCALL
pcercuei 0:03b5121a232e 91 htmlEntityValueLookup(unsigned int value);
pcercuei 0:03b5121a232e 92
pcercuei 0:03b5121a232e 93 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 94 htmlIsAutoClosed(htmlDocPtr doc,
pcercuei 0:03b5121a232e 95 htmlNodePtr elem);
pcercuei 0:03b5121a232e 96 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 97 htmlAutoCloseTag(htmlDocPtr doc,
pcercuei 0:03b5121a232e 98 const xmlChar *name,
pcercuei 0:03b5121a232e 99 htmlNodePtr elem);
pcercuei 0:03b5121a232e 100 XMLPUBFUN const htmlEntityDesc * XMLCALL
pcercuei 0:03b5121a232e 101 htmlParseEntityRef(htmlParserCtxtPtr ctxt,
pcercuei 0:03b5121a232e 102 const xmlChar **str);
pcercuei 0:03b5121a232e 103 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 104 htmlParseCharRef(htmlParserCtxtPtr ctxt);
pcercuei 0:03b5121a232e 105 XMLPUBFUN void XMLCALL
pcercuei 0:03b5121a232e 106 htmlParseElement(htmlParserCtxtPtr ctxt);
pcercuei 0:03b5121a232e 107
pcercuei 0:03b5121a232e 108 XMLPUBFUN htmlParserCtxtPtr XMLCALL
pcercuei 0:03b5121a232e 109 htmlNewParserCtxt(void);
pcercuei 0:03b5121a232e 110
pcercuei 0:03b5121a232e 111 XMLPUBFUN htmlParserCtxtPtr XMLCALL
pcercuei 0:03b5121a232e 112 htmlCreateMemoryParserCtxt(const char *buffer,
pcercuei 0:03b5121a232e 113 int size);
pcercuei 0:03b5121a232e 114
pcercuei 0:03b5121a232e 115 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 116 htmlParseDocument(htmlParserCtxtPtr ctxt);
pcercuei 0:03b5121a232e 117 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 118 htmlSAXParseDoc (xmlChar *cur,
pcercuei 0:03b5121a232e 119 const char *encoding,
pcercuei 0:03b5121a232e 120 htmlSAXHandlerPtr sax,
pcercuei 0:03b5121a232e 121 void *userData);
pcercuei 0:03b5121a232e 122 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 123 htmlParseDoc (xmlChar *cur,
pcercuei 0:03b5121a232e 124 const char *encoding);
pcercuei 0:03b5121a232e 125 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 126 htmlSAXParseFile(const char *filename,
pcercuei 0:03b5121a232e 127 const char *encoding,
pcercuei 0:03b5121a232e 128 htmlSAXHandlerPtr sax,
pcercuei 0:03b5121a232e 129 void *userData);
pcercuei 0:03b5121a232e 130 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 131 htmlParseFile (const char *filename,
pcercuei 0:03b5121a232e 132 const char *encoding);
pcercuei 0:03b5121a232e 133 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 134 UTF8ToHtml (unsigned char *out,
pcercuei 0:03b5121a232e 135 int *outlen,
pcercuei 0:03b5121a232e 136 const unsigned char *in,
pcercuei 0:03b5121a232e 137 int *inlen);
pcercuei 0:03b5121a232e 138 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 139 htmlEncodeEntities(unsigned char *out,
pcercuei 0:03b5121a232e 140 int *outlen,
pcercuei 0:03b5121a232e 141 const unsigned char *in,
pcercuei 0:03b5121a232e 142 int *inlen, int quoteChar);
pcercuei 0:03b5121a232e 143 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 144 htmlIsScriptAttribute(const xmlChar *name);
pcercuei 0:03b5121a232e 145 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 146 htmlHandleOmittedElem(int val);
pcercuei 0:03b5121a232e 147
pcercuei 0:03b5121a232e 148 #ifdef LIBXML_PUSH_ENABLED
pcercuei 0:03b5121a232e 149 /**
pcercuei 0:03b5121a232e 150 * Interfaces for the Push mode.
pcercuei 0:03b5121a232e 151 */
pcercuei 0:03b5121a232e 152 XMLPUBFUN htmlParserCtxtPtr XMLCALL
pcercuei 0:03b5121a232e 153 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
pcercuei 0:03b5121a232e 154 void *user_data,
pcercuei 0:03b5121a232e 155 const char *chunk,
pcercuei 0:03b5121a232e 156 int size,
pcercuei 0:03b5121a232e 157 const char *filename,
pcercuei 0:03b5121a232e 158 xmlCharEncoding enc);
pcercuei 0:03b5121a232e 159 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 160 htmlParseChunk (htmlParserCtxtPtr ctxt,
pcercuei 0:03b5121a232e 161 const char *chunk,
pcercuei 0:03b5121a232e 162 int size,
pcercuei 0:03b5121a232e 163 int terminate);
pcercuei 0:03b5121a232e 164 #endif /* LIBXML_PUSH_ENABLED */
pcercuei 0:03b5121a232e 165
pcercuei 0:03b5121a232e 166 XMLPUBFUN void XMLCALL
pcercuei 0:03b5121a232e 167 htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
pcercuei 0:03b5121a232e 168
pcercuei 0:03b5121a232e 169 /*
pcercuei 0:03b5121a232e 170 * New set of simpler/more flexible APIs
pcercuei 0:03b5121a232e 171 */
pcercuei 0:03b5121a232e 172 /**
pcercuei 0:03b5121a232e 173 * xmlParserOption:
pcercuei 0:03b5121a232e 174 *
pcercuei 0:03b5121a232e 175 * This is the set of XML parser options that can be passed down
pcercuei 0:03b5121a232e 176 * to the xmlReadDoc() and similar calls.
pcercuei 0:03b5121a232e 177 */
pcercuei 0:03b5121a232e 178 typedef enum {
pcercuei 0:03b5121a232e 179 HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */
pcercuei 0:03b5121a232e 180 HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */
pcercuei 0:03b5121a232e 181 HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
pcercuei 0:03b5121a232e 182 HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
pcercuei 0:03b5121a232e 183 HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
pcercuei 0:03b5121a232e 184 HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
pcercuei 0:03b5121a232e 185 HTML_PARSE_NONET = 1<<11,/* Forbid network access */
pcercuei 0:03b5121a232e 186 HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
pcercuei 0:03b5121a232e 187 HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */
pcercuei 0:03b5121a232e 188 HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */
pcercuei 0:03b5121a232e 189 } htmlParserOption;
pcercuei 0:03b5121a232e 190
pcercuei 0:03b5121a232e 191 XMLPUBFUN void XMLCALL
pcercuei 0:03b5121a232e 192 htmlCtxtReset (htmlParserCtxtPtr ctxt);
pcercuei 0:03b5121a232e 193 XMLPUBFUN int XMLCALL
pcercuei 0:03b5121a232e 194 htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
pcercuei 0:03b5121a232e 195 int options);
pcercuei 0:03b5121a232e 196 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 197 htmlReadDoc (const xmlChar *cur,
pcercuei 0:03b5121a232e 198 const char *URL,
pcercuei 0:03b5121a232e 199 const char *encoding,
pcercuei 0:03b5121a232e 200 int options);
pcercuei 0:03b5121a232e 201 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 202 htmlReadFile (const char *URL,
pcercuei 0:03b5121a232e 203 const char *encoding,
pcercuei 0:03b5121a232e 204 int options);
pcercuei 0:03b5121a232e 205 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 206 htmlReadMemory (const char *buffer,
pcercuei 0:03b5121a232e 207 int size,
pcercuei 0:03b5121a232e 208 const char *URL,
pcercuei 0:03b5121a232e 209 const char *encoding,
pcercuei 0:03b5121a232e 210 int options);
pcercuei 0:03b5121a232e 211 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 212 htmlReadFd (int fd,
pcercuei 0:03b5121a232e 213 const char *URL,
pcercuei 0:03b5121a232e 214 const char *encoding,
pcercuei 0:03b5121a232e 215 int options);
pcercuei 0:03b5121a232e 216 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 217 htmlReadIO (xmlInputReadCallback ioread,
pcercuei 0:03b5121a232e 218 xmlInputCloseCallback ioclose,
pcercuei 0:03b5121a232e 219 void *ioctx,
pcercuei 0:03b5121a232e 220 const char *URL,
pcercuei 0:03b5121a232e 221 const char *encoding,
pcercuei 0:03b5121a232e 222 int options);
pcercuei 0:03b5121a232e 223 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 224 htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
pcercuei 0:03b5121a232e 225 const xmlChar *cur,
pcercuei 0:03b5121a232e 226 const char *URL,
pcercuei 0:03b5121a232e 227 const char *encoding,
pcercuei 0:03b5121a232e 228 int options);
pcercuei 0:03b5121a232e 229 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 230 htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
pcercuei 0:03b5121a232e 231 const char *filename,
pcercuei 0:03b5121a232e 232 const char *encoding,
pcercuei 0:03b5121a232e 233 int options);
pcercuei 0:03b5121a232e 234 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 235 htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
pcercuei 0:03b5121a232e 236 const char *buffer,
pcercuei 0:03b5121a232e 237 int size,
pcercuei 0:03b5121a232e 238 const char *URL,
pcercuei 0:03b5121a232e 239 const char *encoding,
pcercuei 0:03b5121a232e 240 int options);
pcercuei 0:03b5121a232e 241 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 242 htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
pcercuei 0:03b5121a232e 243 int fd,
pcercuei 0:03b5121a232e 244 const char *URL,
pcercuei 0:03b5121a232e 245 const char *encoding,
pcercuei 0:03b5121a232e 246 int options);
pcercuei 0:03b5121a232e 247 XMLPUBFUN htmlDocPtr XMLCALL
pcercuei 0:03b5121a232e 248 htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
pcercuei 0:03b5121a232e 249 xmlInputReadCallback ioread,
pcercuei 0:03b5121a232e 250 xmlInputCloseCallback ioclose,
pcercuei 0:03b5121a232e 251 void *ioctx,
pcercuei 0:03b5121a232e 252 const char *URL,
pcercuei 0:03b5121a232e 253 const char *encoding,
pcercuei 0:03b5121a232e 254 int options);
pcercuei 0:03b5121a232e 255
pcercuei 0:03b5121a232e 256 /* NRK/Jan2003: further knowledge of HTML structure
pcercuei 0:03b5121a232e 257 */
pcercuei 0:03b5121a232e 258 typedef enum {
pcercuei 0:03b5121a232e 259 HTML_NA = 0 , /* something we don't check at all */
pcercuei 0:03b5121a232e 260 HTML_INVALID = 0x1 ,
pcercuei 0:03b5121a232e 261 HTML_DEPRECATED = 0x2 ,
pcercuei 0:03b5121a232e 262 HTML_VALID = 0x4 ,
pcercuei 0:03b5121a232e 263 HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
pcercuei 0:03b5121a232e 264 } htmlStatus ;
pcercuei 0:03b5121a232e 265
pcercuei 0:03b5121a232e 266 /* Using htmlElemDesc rather than name here, to emphasise the fact
pcercuei 0:03b5121a232e 267 that otherwise there's a lookup overhead
pcercuei 0:03b5121a232e 268 */
pcercuei 0:03b5121a232e 269 XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
pcercuei 0:03b5121a232e 270 XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
pcercuei 0:03b5121a232e 271 XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
pcercuei 0:03b5121a232e 272 XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
pcercuei 0:03b5121a232e 273 /**
pcercuei 0:03b5121a232e 274 * htmlDefaultSubelement:
pcercuei 0:03b5121a232e 275 * @elt: HTML element
pcercuei 0:03b5121a232e 276 *
pcercuei 0:03b5121a232e 277 * Returns the default subelement for this element
pcercuei 0:03b5121a232e 278 */
pcercuei 0:03b5121a232e 279 #define htmlDefaultSubelement(elt) elt->defaultsubelt
pcercuei 0:03b5121a232e 280 /**
pcercuei 0:03b5121a232e 281 * htmlElementAllowedHereDesc:
pcercuei 0:03b5121a232e 282 * @parent: HTML parent element
pcercuei 0:03b5121a232e 283 * @elt: HTML element
pcercuei 0:03b5121a232e 284 *
pcercuei 0:03b5121a232e 285 * Checks whether an HTML element description may be a
pcercuei 0:03b5121a232e 286 * direct child of the specified element.
pcercuei 0:03b5121a232e 287 *
pcercuei 0:03b5121a232e 288 * Returns 1 if allowed; 0 otherwise.
pcercuei 0:03b5121a232e 289 */
pcercuei 0:03b5121a232e 290 #define htmlElementAllowedHereDesc(parent,elt) \
pcercuei 0:03b5121a232e 291 htmlElementAllowedHere((parent), (elt)->name)
pcercuei 0:03b5121a232e 292 /**
pcercuei 0:03b5121a232e 293 * htmlRequiredAttrs:
pcercuei 0:03b5121a232e 294 * @elt: HTML element
pcercuei 0:03b5121a232e 295 *
pcercuei 0:03b5121a232e 296 * Returns the attributes required for the specified element.
pcercuei 0:03b5121a232e 297 */
pcercuei 0:03b5121a232e 298 #define htmlRequiredAttrs(elt) (elt)->attrs_req
pcercuei 0:03b5121a232e 299
pcercuei 0:03b5121a232e 300
pcercuei 0:03b5121a232e 301 #ifdef __cplusplus
pcercuei 0:03b5121a232e 302 }
pcercuei 0:03b5121a232e 303 #endif
pcercuei 0:03b5121a232e 304
pcercuei 0:03b5121a232e 305 #endif /* LIBXML_HTML_ENABLED */
pcercuei 0:03b5121a232e 306 #endif /* __HTML_PARSER_H__ */
pcercuei 0:03b5121a232e 307