SAX based XML parser
Dependents: giken9_HTMLServer_Temp_Sample
xmltok_impl.c@0:07919e3d6c56, 2011-04-08 (annotated)
- Committer:
- andrewbonney
- Date:
- Fri Apr 08 09:18:41 2011 +0000
- Revision:
- 0:07919e3d6c56
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
andrewbonney | 0:07919e3d6c56 | 1 | /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd |
andrewbonney | 0:07919e3d6c56 | 2 | See the file COPYING for copying permission. |
andrewbonney | 0:07919e3d6c56 | 3 | */ |
andrewbonney | 0:07919e3d6c56 | 4 | |
andrewbonney | 0:07919e3d6c56 | 5 | /* This file is included! */ |
andrewbonney | 0:07919e3d6c56 | 6 | #pragma diag_suppress 111 |
andrewbonney | 0:07919e3d6c56 | 7 | |
andrewbonney | 0:07919e3d6c56 | 8 | #ifdef XML_TOK_IMPL_C |
andrewbonney | 0:07919e3d6c56 | 9 | |
andrewbonney | 0:07919e3d6c56 | 10 | #ifndef IS_INVALID_CHAR |
andrewbonney | 0:07919e3d6c56 | 11 | #define IS_INVALID_CHAR(enc, ptr, n) (0) |
andrewbonney | 0:07919e3d6c56 | 12 | #endif |
andrewbonney | 0:07919e3d6c56 | 13 | |
andrewbonney | 0:07919e3d6c56 | 14 | #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 15 | case BT_LEAD ## n: \ |
andrewbonney | 0:07919e3d6c56 | 16 | if (end - ptr < n) \ |
andrewbonney | 0:07919e3d6c56 | 17 | return XML_TOK_PARTIAL_CHAR; \ |
andrewbonney | 0:07919e3d6c56 | 18 | if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
andrewbonney | 0:07919e3d6c56 | 19 | *(nextTokPtr) = (ptr); \ |
andrewbonney | 0:07919e3d6c56 | 20 | return XML_TOK_INVALID; \ |
andrewbonney | 0:07919e3d6c56 | 21 | } \ |
andrewbonney | 0:07919e3d6c56 | 22 | ptr += n; \ |
andrewbonney | 0:07919e3d6c56 | 23 | break; |
andrewbonney | 0:07919e3d6c56 | 24 | |
andrewbonney | 0:07919e3d6c56 | 25 | #define INVALID_CASES(ptr, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 26 | INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 27 | INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 28 | INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 29 | case BT_NONXML: \ |
andrewbonney | 0:07919e3d6c56 | 30 | case BT_MALFORM: \ |
andrewbonney | 0:07919e3d6c56 | 31 | case BT_TRAIL: \ |
andrewbonney | 0:07919e3d6c56 | 32 | *(nextTokPtr) = (ptr); \ |
andrewbonney | 0:07919e3d6c56 | 33 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 34 | |
andrewbonney | 0:07919e3d6c56 | 35 | #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 36 | case BT_LEAD ## n: \ |
andrewbonney | 0:07919e3d6c56 | 37 | if (end - ptr < n) \ |
andrewbonney | 0:07919e3d6c56 | 38 | return XML_TOK_PARTIAL_CHAR; \ |
andrewbonney | 0:07919e3d6c56 | 39 | if (!IS_NAME_CHAR(enc, ptr, n)) { \ |
andrewbonney | 0:07919e3d6c56 | 40 | *nextTokPtr = ptr; \ |
andrewbonney | 0:07919e3d6c56 | 41 | return XML_TOK_INVALID; \ |
andrewbonney | 0:07919e3d6c56 | 42 | } \ |
andrewbonney | 0:07919e3d6c56 | 43 | ptr += n; \ |
andrewbonney | 0:07919e3d6c56 | 44 | break; |
andrewbonney | 0:07919e3d6c56 | 45 | |
andrewbonney | 0:07919e3d6c56 | 46 | #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 47 | case BT_NONASCII: \ |
andrewbonney | 0:07919e3d6c56 | 48 | if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
andrewbonney | 0:07919e3d6c56 | 49 | *nextTokPtr = ptr; \ |
andrewbonney | 0:07919e3d6c56 | 50 | return XML_TOK_INVALID; \ |
andrewbonney | 0:07919e3d6c56 | 51 | } \ |
andrewbonney | 0:07919e3d6c56 | 52 | case BT_NMSTRT: \ |
andrewbonney | 0:07919e3d6c56 | 53 | case BT_HEX: \ |
andrewbonney | 0:07919e3d6c56 | 54 | case BT_DIGIT: \ |
andrewbonney | 0:07919e3d6c56 | 55 | case BT_NAME: \ |
andrewbonney | 0:07919e3d6c56 | 56 | case BT_MINUS: \ |
andrewbonney | 0:07919e3d6c56 | 57 | ptr += MINBPC(enc); \ |
andrewbonney | 0:07919e3d6c56 | 58 | break; \ |
andrewbonney | 0:07919e3d6c56 | 59 | CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 60 | CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 61 | CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 62 | |
andrewbonney | 0:07919e3d6c56 | 63 | #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 64 | case BT_LEAD ## n: \ |
andrewbonney | 0:07919e3d6c56 | 65 | if (end - ptr < n) \ |
andrewbonney | 0:07919e3d6c56 | 66 | return XML_TOK_PARTIAL_CHAR; \ |
andrewbonney | 0:07919e3d6c56 | 67 | if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
andrewbonney | 0:07919e3d6c56 | 68 | *nextTokPtr = ptr; \ |
andrewbonney | 0:07919e3d6c56 | 69 | return XML_TOK_INVALID; \ |
andrewbonney | 0:07919e3d6c56 | 70 | } \ |
andrewbonney | 0:07919e3d6c56 | 71 | ptr += n; \ |
andrewbonney | 0:07919e3d6c56 | 72 | break; |
andrewbonney | 0:07919e3d6c56 | 73 | |
andrewbonney | 0:07919e3d6c56 | 74 | #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 75 | case BT_NONASCII: \ |
andrewbonney | 0:07919e3d6c56 | 76 | if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
andrewbonney | 0:07919e3d6c56 | 77 | *nextTokPtr = ptr; \ |
andrewbonney | 0:07919e3d6c56 | 78 | return XML_TOK_INVALID; \ |
andrewbonney | 0:07919e3d6c56 | 79 | } \ |
andrewbonney | 0:07919e3d6c56 | 80 | case BT_NMSTRT: \ |
andrewbonney | 0:07919e3d6c56 | 81 | case BT_HEX: \ |
andrewbonney | 0:07919e3d6c56 | 82 | ptr += MINBPC(enc); \ |
andrewbonney | 0:07919e3d6c56 | 83 | break; \ |
andrewbonney | 0:07919e3d6c56 | 84 | CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 85 | CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
andrewbonney | 0:07919e3d6c56 | 86 | CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 87 | |
andrewbonney | 0:07919e3d6c56 | 88 | #ifndef PREFIX |
andrewbonney | 0:07919e3d6c56 | 89 | #define PREFIX(ident) ident |
andrewbonney | 0:07919e3d6c56 | 90 | #endif |
andrewbonney | 0:07919e3d6c56 | 91 | |
andrewbonney | 0:07919e3d6c56 | 92 | /* ptr points to character following "<!-" */ |
andrewbonney | 0:07919e3d6c56 | 93 | |
andrewbonney | 0:07919e3d6c56 | 94 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 95 | PREFIX(scanComment)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 96 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 97 | { |
andrewbonney | 0:07919e3d6c56 | 98 | if (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 99 | if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
andrewbonney | 0:07919e3d6c56 | 100 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 101 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 102 | } |
andrewbonney | 0:07919e3d6c56 | 103 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 104 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 105 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 106 | INVALID_CASES(ptr, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 107 | case BT_MINUS: |
andrewbonney | 0:07919e3d6c56 | 108 | if ((ptr += MINBPC(enc)) == end) |
andrewbonney | 0:07919e3d6c56 | 109 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 110 | if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
andrewbonney | 0:07919e3d6c56 | 111 | if ((ptr += MINBPC(enc)) == end) |
andrewbonney | 0:07919e3d6c56 | 112 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 113 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 114 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 115 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 116 | } |
andrewbonney | 0:07919e3d6c56 | 117 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 118 | return XML_TOK_COMMENT; |
andrewbonney | 0:07919e3d6c56 | 119 | } |
andrewbonney | 0:07919e3d6c56 | 120 | break; |
andrewbonney | 0:07919e3d6c56 | 121 | default: |
andrewbonney | 0:07919e3d6c56 | 122 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 123 | break; |
andrewbonney | 0:07919e3d6c56 | 124 | } |
andrewbonney | 0:07919e3d6c56 | 125 | } |
andrewbonney | 0:07919e3d6c56 | 126 | } |
andrewbonney | 0:07919e3d6c56 | 127 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 128 | } |
andrewbonney | 0:07919e3d6c56 | 129 | |
andrewbonney | 0:07919e3d6c56 | 130 | /* ptr points to character following "<!" */ |
andrewbonney | 0:07919e3d6c56 | 131 | |
andrewbonney | 0:07919e3d6c56 | 132 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 133 | PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 134 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 135 | { |
andrewbonney | 0:07919e3d6c56 | 136 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 137 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 138 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 139 | case BT_MINUS: |
andrewbonney | 0:07919e3d6c56 | 140 | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 141 | case BT_LSQB: |
andrewbonney | 0:07919e3d6c56 | 142 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 143 | return XML_TOK_COND_SECT_OPEN; |
andrewbonney | 0:07919e3d6c56 | 144 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 145 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 146 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 147 | break; |
andrewbonney | 0:07919e3d6c56 | 148 | default: |
andrewbonney | 0:07919e3d6c56 | 149 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 150 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 151 | } |
andrewbonney | 0:07919e3d6c56 | 152 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 153 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 154 | case BT_PERCNT: |
andrewbonney | 0:07919e3d6c56 | 155 | if (ptr + MINBPC(enc) == end) |
andrewbonney | 0:07919e3d6c56 | 156 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 157 | /* don't allow <!ENTITY% foo "whatever"> */ |
andrewbonney | 0:07919e3d6c56 | 158 | switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
andrewbonney | 0:07919e3d6c56 | 159 | case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: |
andrewbonney | 0:07919e3d6c56 | 160 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 161 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 162 | } |
andrewbonney | 0:07919e3d6c56 | 163 | /* fall through */ |
andrewbonney | 0:07919e3d6c56 | 164 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 165 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 166 | return XML_TOK_DECL_OPEN; |
andrewbonney | 0:07919e3d6c56 | 167 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 168 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 169 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 170 | break; |
andrewbonney | 0:07919e3d6c56 | 171 | default: |
andrewbonney | 0:07919e3d6c56 | 172 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 173 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 174 | } |
andrewbonney | 0:07919e3d6c56 | 175 | } |
andrewbonney | 0:07919e3d6c56 | 176 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 177 | } |
andrewbonney | 0:07919e3d6c56 | 178 | |
andrewbonney | 0:07919e3d6c56 | 179 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 180 | PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 181 | const char *end, int *tokPtr) |
andrewbonney | 0:07919e3d6c56 | 182 | { |
andrewbonney | 0:07919e3d6c56 | 183 | int upper = 0; |
andrewbonney | 0:07919e3d6c56 | 184 | *tokPtr = XML_TOK_PI; |
andrewbonney | 0:07919e3d6c56 | 185 | if (end - ptr != MINBPC(enc)*3) |
andrewbonney | 0:07919e3d6c56 | 186 | return 1; |
andrewbonney | 0:07919e3d6c56 | 187 | switch (BYTE_TO_ASCII(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 188 | case ASCII_x: |
andrewbonney | 0:07919e3d6c56 | 189 | break; |
andrewbonney | 0:07919e3d6c56 | 190 | case ASCII_X: |
andrewbonney | 0:07919e3d6c56 | 191 | upper = 1; |
andrewbonney | 0:07919e3d6c56 | 192 | break; |
andrewbonney | 0:07919e3d6c56 | 193 | default: |
andrewbonney | 0:07919e3d6c56 | 194 | return 1; |
andrewbonney | 0:07919e3d6c56 | 195 | } |
andrewbonney | 0:07919e3d6c56 | 196 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 197 | switch (BYTE_TO_ASCII(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 198 | case ASCII_m: |
andrewbonney | 0:07919e3d6c56 | 199 | break; |
andrewbonney | 0:07919e3d6c56 | 200 | case ASCII_M: |
andrewbonney | 0:07919e3d6c56 | 201 | upper = 1; |
andrewbonney | 0:07919e3d6c56 | 202 | break; |
andrewbonney | 0:07919e3d6c56 | 203 | default: |
andrewbonney | 0:07919e3d6c56 | 204 | return 1; |
andrewbonney | 0:07919e3d6c56 | 205 | } |
andrewbonney | 0:07919e3d6c56 | 206 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 207 | switch (BYTE_TO_ASCII(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 208 | case ASCII_l: |
andrewbonney | 0:07919e3d6c56 | 209 | break; |
andrewbonney | 0:07919e3d6c56 | 210 | case ASCII_L: |
andrewbonney | 0:07919e3d6c56 | 211 | upper = 1; |
andrewbonney | 0:07919e3d6c56 | 212 | break; |
andrewbonney | 0:07919e3d6c56 | 213 | default: |
andrewbonney | 0:07919e3d6c56 | 214 | return 1; |
andrewbonney | 0:07919e3d6c56 | 215 | } |
andrewbonney | 0:07919e3d6c56 | 216 | if (upper) |
andrewbonney | 0:07919e3d6c56 | 217 | return 0; |
andrewbonney | 0:07919e3d6c56 | 218 | *tokPtr = XML_TOK_XML_DECL; |
andrewbonney | 0:07919e3d6c56 | 219 | return 1; |
andrewbonney | 0:07919e3d6c56 | 220 | } |
andrewbonney | 0:07919e3d6c56 | 221 | |
andrewbonney | 0:07919e3d6c56 | 222 | /* ptr points to character following "<?" */ |
andrewbonney | 0:07919e3d6c56 | 223 | |
andrewbonney | 0:07919e3d6c56 | 224 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 225 | PREFIX(scanPi)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 226 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 227 | { |
andrewbonney | 0:07919e3d6c56 | 228 | int tok; |
andrewbonney | 0:07919e3d6c56 | 229 | const char *target = ptr; |
andrewbonney | 0:07919e3d6c56 | 230 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 231 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 232 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 233 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 234 | default: |
andrewbonney | 0:07919e3d6c56 | 235 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 236 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 237 | } |
andrewbonney | 0:07919e3d6c56 | 238 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 239 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 240 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 241 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 242 | if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
andrewbonney | 0:07919e3d6c56 | 243 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 244 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 245 | } |
andrewbonney | 0:07919e3d6c56 | 246 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 247 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 248 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 249 | INVALID_CASES(ptr, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 250 | case BT_QUEST: |
andrewbonney | 0:07919e3d6c56 | 251 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 252 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 253 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 254 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 255 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 256 | return tok; |
andrewbonney | 0:07919e3d6c56 | 257 | } |
andrewbonney | 0:07919e3d6c56 | 258 | break; |
andrewbonney | 0:07919e3d6c56 | 259 | default: |
andrewbonney | 0:07919e3d6c56 | 260 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 261 | break; |
andrewbonney | 0:07919e3d6c56 | 262 | } |
andrewbonney | 0:07919e3d6c56 | 263 | } |
andrewbonney | 0:07919e3d6c56 | 264 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 265 | case BT_QUEST: |
andrewbonney | 0:07919e3d6c56 | 266 | if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
andrewbonney | 0:07919e3d6c56 | 267 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 268 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 269 | } |
andrewbonney | 0:07919e3d6c56 | 270 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 271 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 272 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 273 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 274 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 275 | return tok; |
andrewbonney | 0:07919e3d6c56 | 276 | } |
andrewbonney | 0:07919e3d6c56 | 277 | /* fall through */ |
andrewbonney | 0:07919e3d6c56 | 278 | default: |
andrewbonney | 0:07919e3d6c56 | 279 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 280 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 281 | } |
andrewbonney | 0:07919e3d6c56 | 282 | } |
andrewbonney | 0:07919e3d6c56 | 283 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 284 | } |
andrewbonney | 0:07919e3d6c56 | 285 | |
andrewbonney | 0:07919e3d6c56 | 286 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 287 | PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 288 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 289 | { |
andrewbonney | 0:07919e3d6c56 | 290 | static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, |
andrewbonney | 0:07919e3d6c56 | 291 | ASCII_T, ASCII_A, ASCII_LSQB }; |
andrewbonney | 0:07919e3d6c56 | 292 | int i; |
andrewbonney | 0:07919e3d6c56 | 293 | /* CDATA[ */ |
andrewbonney | 0:07919e3d6c56 | 294 | if (end - ptr < 6 * MINBPC(enc)) |
andrewbonney | 0:07919e3d6c56 | 295 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 296 | for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 297 | if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { |
andrewbonney | 0:07919e3d6c56 | 298 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 299 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 300 | } |
andrewbonney | 0:07919e3d6c56 | 301 | } |
andrewbonney | 0:07919e3d6c56 | 302 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 303 | return XML_TOK_CDATA_SECT_OPEN; |
andrewbonney | 0:07919e3d6c56 | 304 | } |
andrewbonney | 0:07919e3d6c56 | 305 | |
andrewbonney | 0:07919e3d6c56 | 306 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 307 | PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 308 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 309 | { |
andrewbonney | 0:07919e3d6c56 | 310 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 311 | return XML_TOK_NONE; |
andrewbonney | 0:07919e3d6c56 | 312 | if (MINBPC(enc) > 1) { |
andrewbonney | 0:07919e3d6c56 | 313 | size_t n = end - ptr; |
andrewbonney | 0:07919e3d6c56 | 314 | if (n & (MINBPC(enc) - 1)) { |
andrewbonney | 0:07919e3d6c56 | 315 | n &= ~(MINBPC(enc) - 1); |
andrewbonney | 0:07919e3d6c56 | 316 | if (n == 0) |
andrewbonney | 0:07919e3d6c56 | 317 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 318 | end = ptr + n; |
andrewbonney | 0:07919e3d6c56 | 319 | } |
andrewbonney | 0:07919e3d6c56 | 320 | } |
andrewbonney | 0:07919e3d6c56 | 321 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 322 | case BT_RSQB: |
andrewbonney | 0:07919e3d6c56 | 323 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 324 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 325 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 326 | if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
andrewbonney | 0:07919e3d6c56 | 327 | break; |
andrewbonney | 0:07919e3d6c56 | 328 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 329 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 330 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 331 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 332 | ptr -= MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 333 | break; |
andrewbonney | 0:07919e3d6c56 | 334 | } |
andrewbonney | 0:07919e3d6c56 | 335 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 336 | return XML_TOK_CDATA_SECT_CLOSE; |
andrewbonney | 0:07919e3d6c56 | 337 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 338 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 339 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 340 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 341 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
andrewbonney | 0:07919e3d6c56 | 342 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 343 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 344 | return XML_TOK_DATA_NEWLINE; |
andrewbonney | 0:07919e3d6c56 | 345 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 346 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 347 | return XML_TOK_DATA_NEWLINE; |
andrewbonney | 0:07919e3d6c56 | 348 | INVALID_CASES(ptr, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 349 | default: |
andrewbonney | 0:07919e3d6c56 | 350 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 351 | break; |
andrewbonney | 0:07919e3d6c56 | 352 | } |
andrewbonney | 0:07919e3d6c56 | 353 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 354 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 355 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 356 | case BT_LEAD ## n: \ |
andrewbonney | 0:07919e3d6c56 | 357 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
andrewbonney | 0:07919e3d6c56 | 358 | *nextTokPtr = ptr; \ |
andrewbonney | 0:07919e3d6c56 | 359 | return XML_TOK_DATA_CHARS; \ |
andrewbonney | 0:07919e3d6c56 | 360 | } \ |
andrewbonney | 0:07919e3d6c56 | 361 | ptr += n; \ |
andrewbonney | 0:07919e3d6c56 | 362 | break; |
andrewbonney | 0:07919e3d6c56 | 363 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
andrewbonney | 0:07919e3d6c56 | 364 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 365 | case BT_NONXML: |
andrewbonney | 0:07919e3d6c56 | 366 | case BT_MALFORM: |
andrewbonney | 0:07919e3d6c56 | 367 | case BT_TRAIL: |
andrewbonney | 0:07919e3d6c56 | 368 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 369 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 370 | case BT_RSQB: |
andrewbonney | 0:07919e3d6c56 | 371 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 372 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 373 | default: |
andrewbonney | 0:07919e3d6c56 | 374 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 375 | break; |
andrewbonney | 0:07919e3d6c56 | 376 | } |
andrewbonney | 0:07919e3d6c56 | 377 | } |
andrewbonney | 0:07919e3d6c56 | 378 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 379 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 380 | } |
andrewbonney | 0:07919e3d6c56 | 381 | |
andrewbonney | 0:07919e3d6c56 | 382 | /* ptr points to character following "</" */ |
andrewbonney | 0:07919e3d6c56 | 383 | |
andrewbonney | 0:07919e3d6c56 | 384 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 385 | PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 386 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 387 | { |
andrewbonney | 0:07919e3d6c56 | 388 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 389 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 390 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 391 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 392 | default: |
andrewbonney | 0:07919e3d6c56 | 393 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 394 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 395 | } |
andrewbonney | 0:07919e3d6c56 | 396 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 397 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 398 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 399 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 400 | for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 401 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 402 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 403 | break; |
andrewbonney | 0:07919e3d6c56 | 404 | case BT_GT: |
andrewbonney | 0:07919e3d6c56 | 405 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 406 | return XML_TOK_END_TAG; |
andrewbonney | 0:07919e3d6c56 | 407 | default: |
andrewbonney | 0:07919e3d6c56 | 408 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 409 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 410 | } |
andrewbonney | 0:07919e3d6c56 | 411 | } |
andrewbonney | 0:07919e3d6c56 | 412 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 413 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 414 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 415 | /* no need to check qname syntax here, |
andrewbonney | 0:07919e3d6c56 | 416 | since end-tag must match exactly */ |
andrewbonney | 0:07919e3d6c56 | 417 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 418 | break; |
andrewbonney | 0:07919e3d6c56 | 419 | #endif |
andrewbonney | 0:07919e3d6c56 | 420 | case BT_GT: |
andrewbonney | 0:07919e3d6c56 | 421 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 422 | return XML_TOK_END_TAG; |
andrewbonney | 0:07919e3d6c56 | 423 | default: |
andrewbonney | 0:07919e3d6c56 | 424 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 425 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 426 | } |
andrewbonney | 0:07919e3d6c56 | 427 | } |
andrewbonney | 0:07919e3d6c56 | 428 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 429 | } |
andrewbonney | 0:07919e3d6c56 | 430 | |
andrewbonney | 0:07919e3d6c56 | 431 | /* ptr points to character following "&#X" */ |
andrewbonney | 0:07919e3d6c56 | 432 | |
andrewbonney | 0:07919e3d6c56 | 433 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 434 | PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 435 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 436 | { |
andrewbonney | 0:07919e3d6c56 | 437 | if (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 438 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 439 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 440 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 441 | break; |
andrewbonney | 0:07919e3d6c56 | 442 | default: |
andrewbonney | 0:07919e3d6c56 | 443 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 444 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 445 | } |
andrewbonney | 0:07919e3d6c56 | 446 | for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 447 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 448 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 449 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 450 | break; |
andrewbonney | 0:07919e3d6c56 | 451 | case BT_SEMI: |
andrewbonney | 0:07919e3d6c56 | 452 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 453 | return XML_TOK_CHAR_REF; |
andrewbonney | 0:07919e3d6c56 | 454 | default: |
andrewbonney | 0:07919e3d6c56 | 455 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 456 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 457 | } |
andrewbonney | 0:07919e3d6c56 | 458 | } |
andrewbonney | 0:07919e3d6c56 | 459 | } |
andrewbonney | 0:07919e3d6c56 | 460 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 461 | } |
andrewbonney | 0:07919e3d6c56 | 462 | |
andrewbonney | 0:07919e3d6c56 | 463 | /* ptr points to character following "&#" */ |
andrewbonney | 0:07919e3d6c56 | 464 | |
andrewbonney | 0:07919e3d6c56 | 465 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 466 | PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 467 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 468 | { |
andrewbonney | 0:07919e3d6c56 | 469 | if (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 470 | if (CHAR_MATCHES(enc, ptr, ASCII_x)) |
andrewbonney | 0:07919e3d6c56 | 471 | return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 472 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 473 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 474 | break; |
andrewbonney | 0:07919e3d6c56 | 475 | default: |
andrewbonney | 0:07919e3d6c56 | 476 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 477 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 478 | } |
andrewbonney | 0:07919e3d6c56 | 479 | for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 480 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 481 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 482 | break; |
andrewbonney | 0:07919e3d6c56 | 483 | case BT_SEMI: |
andrewbonney | 0:07919e3d6c56 | 484 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 485 | return XML_TOK_CHAR_REF; |
andrewbonney | 0:07919e3d6c56 | 486 | default: |
andrewbonney | 0:07919e3d6c56 | 487 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 488 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 489 | } |
andrewbonney | 0:07919e3d6c56 | 490 | } |
andrewbonney | 0:07919e3d6c56 | 491 | } |
andrewbonney | 0:07919e3d6c56 | 492 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 493 | } |
andrewbonney | 0:07919e3d6c56 | 494 | |
andrewbonney | 0:07919e3d6c56 | 495 | /* ptr points to character following "&" */ |
andrewbonney | 0:07919e3d6c56 | 496 | |
andrewbonney | 0:07919e3d6c56 | 497 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 498 | PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 499 | const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 500 | { |
andrewbonney | 0:07919e3d6c56 | 501 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 502 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 503 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 504 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 505 | case BT_NUM: |
andrewbonney | 0:07919e3d6c56 | 506 | return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 507 | default: |
andrewbonney | 0:07919e3d6c56 | 508 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 509 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 510 | } |
andrewbonney | 0:07919e3d6c56 | 511 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 512 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 513 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 514 | case BT_SEMI: |
andrewbonney | 0:07919e3d6c56 | 515 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 516 | return XML_TOK_ENTITY_REF; |
andrewbonney | 0:07919e3d6c56 | 517 | default: |
andrewbonney | 0:07919e3d6c56 | 518 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 519 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 520 | } |
andrewbonney | 0:07919e3d6c56 | 521 | } |
andrewbonney | 0:07919e3d6c56 | 522 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 523 | } |
andrewbonney | 0:07919e3d6c56 | 524 | |
andrewbonney | 0:07919e3d6c56 | 525 | /* ptr points to character following first character of attribute name */ |
andrewbonney | 0:07919e3d6c56 | 526 | |
andrewbonney | 0:07919e3d6c56 | 527 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 528 | PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 529 | const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 530 | { |
andrewbonney | 0:07919e3d6c56 | 531 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 532 | int hadColon = 0; |
andrewbonney | 0:07919e3d6c56 | 533 | #endif |
andrewbonney | 0:07919e3d6c56 | 534 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 535 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 536 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 537 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 538 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 539 | if (hadColon) { |
andrewbonney | 0:07919e3d6c56 | 540 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 541 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 542 | } |
andrewbonney | 0:07919e3d6c56 | 543 | hadColon = 1; |
andrewbonney | 0:07919e3d6c56 | 544 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 545 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 546 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 547 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 548 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 549 | default: |
andrewbonney | 0:07919e3d6c56 | 550 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 551 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 552 | } |
andrewbonney | 0:07919e3d6c56 | 553 | break; |
andrewbonney | 0:07919e3d6c56 | 554 | #endif |
andrewbonney | 0:07919e3d6c56 | 555 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 556 | for (;;) { |
andrewbonney | 0:07919e3d6c56 | 557 | int t; |
andrewbonney | 0:07919e3d6c56 | 558 | |
andrewbonney | 0:07919e3d6c56 | 559 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 560 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 561 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 562 | t = BYTE_TYPE(enc, ptr); |
andrewbonney | 0:07919e3d6c56 | 563 | if (t == BT_EQUALS) |
andrewbonney | 0:07919e3d6c56 | 564 | break; |
andrewbonney | 0:07919e3d6c56 | 565 | switch (t) { |
andrewbonney | 0:07919e3d6c56 | 566 | case BT_S: |
andrewbonney | 0:07919e3d6c56 | 567 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 568 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 569 | break; |
andrewbonney | 0:07919e3d6c56 | 570 | default: |
andrewbonney | 0:07919e3d6c56 | 571 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 572 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 573 | } |
andrewbonney | 0:07919e3d6c56 | 574 | } |
andrewbonney | 0:07919e3d6c56 | 575 | /* fall through */ |
andrewbonney | 0:07919e3d6c56 | 576 | case BT_EQUALS: |
andrewbonney | 0:07919e3d6c56 | 577 | { |
andrewbonney | 0:07919e3d6c56 | 578 | int open; |
andrewbonney | 0:07919e3d6c56 | 579 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 580 | hadColon = 0; |
andrewbonney | 0:07919e3d6c56 | 581 | #endif |
andrewbonney | 0:07919e3d6c56 | 582 | for (;;) { |
andrewbonney | 0:07919e3d6c56 | 583 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 584 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 585 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 586 | open = BYTE_TYPE(enc, ptr); |
andrewbonney | 0:07919e3d6c56 | 587 | if (open == BT_QUOT || open == BT_APOS) |
andrewbonney | 0:07919e3d6c56 | 588 | break; |
andrewbonney | 0:07919e3d6c56 | 589 | switch (open) { |
andrewbonney | 0:07919e3d6c56 | 590 | case BT_S: |
andrewbonney | 0:07919e3d6c56 | 591 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 592 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 593 | break; |
andrewbonney | 0:07919e3d6c56 | 594 | default: |
andrewbonney | 0:07919e3d6c56 | 595 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 596 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 597 | } |
andrewbonney | 0:07919e3d6c56 | 598 | } |
andrewbonney | 0:07919e3d6c56 | 599 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 600 | /* in attribute value */ |
andrewbonney | 0:07919e3d6c56 | 601 | for (;;) { |
andrewbonney | 0:07919e3d6c56 | 602 | int t; |
andrewbonney | 0:07919e3d6c56 | 603 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 604 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 605 | t = BYTE_TYPE(enc, ptr); |
andrewbonney | 0:07919e3d6c56 | 606 | if (t == open) |
andrewbonney | 0:07919e3d6c56 | 607 | break; |
andrewbonney | 0:07919e3d6c56 | 608 | switch (t) { |
andrewbonney | 0:07919e3d6c56 | 609 | INVALID_CASES(ptr, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 610 | case BT_AMP: |
andrewbonney | 0:07919e3d6c56 | 611 | { |
andrewbonney | 0:07919e3d6c56 | 612 | int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
andrewbonney | 0:07919e3d6c56 | 613 | if (tok <= 0) { |
andrewbonney | 0:07919e3d6c56 | 614 | if (tok == XML_TOK_INVALID) |
andrewbonney | 0:07919e3d6c56 | 615 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 616 | return tok; |
andrewbonney | 0:07919e3d6c56 | 617 | } |
andrewbonney | 0:07919e3d6c56 | 618 | break; |
andrewbonney | 0:07919e3d6c56 | 619 | } |
andrewbonney | 0:07919e3d6c56 | 620 | case BT_LT: |
andrewbonney | 0:07919e3d6c56 | 621 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 622 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 623 | default: |
andrewbonney | 0:07919e3d6c56 | 624 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 625 | break; |
andrewbonney | 0:07919e3d6c56 | 626 | } |
andrewbonney | 0:07919e3d6c56 | 627 | } |
andrewbonney | 0:07919e3d6c56 | 628 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 629 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 630 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 631 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 632 | case BT_S: |
andrewbonney | 0:07919e3d6c56 | 633 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 634 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 635 | break; |
andrewbonney | 0:07919e3d6c56 | 636 | case BT_SOL: |
andrewbonney | 0:07919e3d6c56 | 637 | goto sol; |
andrewbonney | 0:07919e3d6c56 | 638 | case BT_GT: |
andrewbonney | 0:07919e3d6c56 | 639 | goto gt; |
andrewbonney | 0:07919e3d6c56 | 640 | default: |
andrewbonney | 0:07919e3d6c56 | 641 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 642 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 643 | } |
andrewbonney | 0:07919e3d6c56 | 644 | /* ptr points to closing quote */ |
andrewbonney | 0:07919e3d6c56 | 645 | for (;;) { |
andrewbonney | 0:07919e3d6c56 | 646 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 647 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 648 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 649 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 650 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 651 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 652 | continue; |
andrewbonney | 0:07919e3d6c56 | 653 | case BT_GT: |
andrewbonney | 0:07919e3d6c56 | 654 | gt: |
andrewbonney | 0:07919e3d6c56 | 655 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 656 | return XML_TOK_START_TAG_WITH_ATTS; |
andrewbonney | 0:07919e3d6c56 | 657 | case BT_SOL: |
andrewbonney | 0:07919e3d6c56 | 658 | sol: |
andrewbonney | 0:07919e3d6c56 | 659 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 660 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 661 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 662 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 663 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 664 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 665 | } |
andrewbonney | 0:07919e3d6c56 | 666 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 667 | return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
andrewbonney | 0:07919e3d6c56 | 668 | default: |
andrewbonney | 0:07919e3d6c56 | 669 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 670 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 671 | } |
andrewbonney | 0:07919e3d6c56 | 672 | break; |
andrewbonney | 0:07919e3d6c56 | 673 | } |
andrewbonney | 0:07919e3d6c56 | 674 | break; |
andrewbonney | 0:07919e3d6c56 | 675 | } |
andrewbonney | 0:07919e3d6c56 | 676 | default: |
andrewbonney | 0:07919e3d6c56 | 677 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 678 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 679 | } |
andrewbonney | 0:07919e3d6c56 | 680 | } |
andrewbonney | 0:07919e3d6c56 | 681 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 682 | } |
andrewbonney | 0:07919e3d6c56 | 683 | |
andrewbonney | 0:07919e3d6c56 | 684 | /* ptr points to character following "<" */ |
andrewbonney | 0:07919e3d6c56 | 685 | |
andrewbonney | 0:07919e3d6c56 | 686 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 687 | PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 688 | const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 689 | { |
andrewbonney | 0:07919e3d6c56 | 690 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 691 | int hadColon; |
andrewbonney | 0:07919e3d6c56 | 692 | #endif |
andrewbonney | 0:07919e3d6c56 | 693 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 694 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 695 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 696 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 697 | case BT_EXCL: |
andrewbonney | 0:07919e3d6c56 | 698 | if ((ptr += MINBPC(enc)) == end) |
andrewbonney | 0:07919e3d6c56 | 699 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 700 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 701 | case BT_MINUS: |
andrewbonney | 0:07919e3d6c56 | 702 | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 703 | case BT_LSQB: |
andrewbonney | 0:07919e3d6c56 | 704 | return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), |
andrewbonney | 0:07919e3d6c56 | 705 | end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 706 | } |
andrewbonney | 0:07919e3d6c56 | 707 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 708 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 709 | case BT_QUEST: |
andrewbonney | 0:07919e3d6c56 | 710 | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 711 | case BT_SOL: |
andrewbonney | 0:07919e3d6c56 | 712 | return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 713 | default: |
andrewbonney | 0:07919e3d6c56 | 714 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 715 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 716 | } |
andrewbonney | 0:07919e3d6c56 | 717 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 718 | hadColon = 0; |
andrewbonney | 0:07919e3d6c56 | 719 | #endif |
andrewbonney | 0:07919e3d6c56 | 720 | /* we have a start-tag */ |
andrewbonney | 0:07919e3d6c56 | 721 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 722 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 723 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 724 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 725 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 726 | if (hadColon) { |
andrewbonney | 0:07919e3d6c56 | 727 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 728 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 729 | } |
andrewbonney | 0:07919e3d6c56 | 730 | hadColon = 1; |
andrewbonney | 0:07919e3d6c56 | 731 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 732 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 733 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 734 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 735 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 736 | default: |
andrewbonney | 0:07919e3d6c56 | 737 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 738 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 739 | } |
andrewbonney | 0:07919e3d6c56 | 740 | break; |
andrewbonney | 0:07919e3d6c56 | 741 | #endif |
andrewbonney | 0:07919e3d6c56 | 742 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 743 | { |
andrewbonney | 0:07919e3d6c56 | 744 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 745 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 746 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 747 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 748 | case BT_GT: |
andrewbonney | 0:07919e3d6c56 | 749 | goto gt; |
andrewbonney | 0:07919e3d6c56 | 750 | case BT_SOL: |
andrewbonney | 0:07919e3d6c56 | 751 | goto sol; |
andrewbonney | 0:07919e3d6c56 | 752 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 753 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 754 | continue; |
andrewbonney | 0:07919e3d6c56 | 755 | default: |
andrewbonney | 0:07919e3d6c56 | 756 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 757 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 758 | } |
andrewbonney | 0:07919e3d6c56 | 759 | return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 760 | } |
andrewbonney | 0:07919e3d6c56 | 761 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 762 | } |
andrewbonney | 0:07919e3d6c56 | 763 | case BT_GT: |
andrewbonney | 0:07919e3d6c56 | 764 | gt: |
andrewbonney | 0:07919e3d6c56 | 765 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 766 | return XML_TOK_START_TAG_NO_ATTS; |
andrewbonney | 0:07919e3d6c56 | 767 | case BT_SOL: |
andrewbonney | 0:07919e3d6c56 | 768 | sol: |
andrewbonney | 0:07919e3d6c56 | 769 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 770 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 771 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 772 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 773 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 774 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 775 | } |
andrewbonney | 0:07919e3d6c56 | 776 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 777 | return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
andrewbonney | 0:07919e3d6c56 | 778 | default: |
andrewbonney | 0:07919e3d6c56 | 779 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 780 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 781 | } |
andrewbonney | 0:07919e3d6c56 | 782 | } |
andrewbonney | 0:07919e3d6c56 | 783 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 784 | } |
andrewbonney | 0:07919e3d6c56 | 785 | |
andrewbonney | 0:07919e3d6c56 | 786 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 787 | PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 788 | const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 789 | { |
andrewbonney | 0:07919e3d6c56 | 790 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 791 | return XML_TOK_NONE; |
andrewbonney | 0:07919e3d6c56 | 792 | if (MINBPC(enc) > 1) { |
andrewbonney | 0:07919e3d6c56 | 793 | size_t n = end - ptr; |
andrewbonney | 0:07919e3d6c56 | 794 | if (n & (MINBPC(enc) - 1)) { |
andrewbonney | 0:07919e3d6c56 | 795 | n &= ~(MINBPC(enc) - 1); |
andrewbonney | 0:07919e3d6c56 | 796 | if (n == 0) |
andrewbonney | 0:07919e3d6c56 | 797 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 798 | end = ptr + n; |
andrewbonney | 0:07919e3d6c56 | 799 | } |
andrewbonney | 0:07919e3d6c56 | 800 | } |
andrewbonney | 0:07919e3d6c56 | 801 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 802 | case BT_LT: |
andrewbonney | 0:07919e3d6c56 | 803 | return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 804 | case BT_AMP: |
andrewbonney | 0:07919e3d6c56 | 805 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 806 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 807 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 808 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 809 | return XML_TOK_TRAILING_CR; |
andrewbonney | 0:07919e3d6c56 | 810 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
andrewbonney | 0:07919e3d6c56 | 811 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 812 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 813 | return XML_TOK_DATA_NEWLINE; |
andrewbonney | 0:07919e3d6c56 | 814 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 815 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 816 | return XML_TOK_DATA_NEWLINE; |
andrewbonney | 0:07919e3d6c56 | 817 | case BT_RSQB: |
andrewbonney | 0:07919e3d6c56 | 818 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 819 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 820 | return XML_TOK_TRAILING_RSQB; |
andrewbonney | 0:07919e3d6c56 | 821 | if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
andrewbonney | 0:07919e3d6c56 | 822 | break; |
andrewbonney | 0:07919e3d6c56 | 823 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 824 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 825 | return XML_TOK_TRAILING_RSQB; |
andrewbonney | 0:07919e3d6c56 | 826 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 827 | ptr -= MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 828 | break; |
andrewbonney | 0:07919e3d6c56 | 829 | } |
andrewbonney | 0:07919e3d6c56 | 830 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 831 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 832 | INVALID_CASES(ptr, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 833 | default: |
andrewbonney | 0:07919e3d6c56 | 834 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 835 | break; |
andrewbonney | 0:07919e3d6c56 | 836 | } |
andrewbonney | 0:07919e3d6c56 | 837 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 838 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 839 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 840 | case BT_LEAD ## n: \ |
andrewbonney | 0:07919e3d6c56 | 841 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
andrewbonney | 0:07919e3d6c56 | 842 | *nextTokPtr = ptr; \ |
andrewbonney | 0:07919e3d6c56 | 843 | return XML_TOK_DATA_CHARS; \ |
andrewbonney | 0:07919e3d6c56 | 844 | } \ |
andrewbonney | 0:07919e3d6c56 | 845 | ptr += n; \ |
andrewbonney | 0:07919e3d6c56 | 846 | break; |
andrewbonney | 0:07919e3d6c56 | 847 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
andrewbonney | 0:07919e3d6c56 | 848 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 849 | case BT_RSQB: |
andrewbonney | 0:07919e3d6c56 | 850 | if (ptr + MINBPC(enc) != end) { |
andrewbonney | 0:07919e3d6c56 | 851 | if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { |
andrewbonney | 0:07919e3d6c56 | 852 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 853 | break; |
andrewbonney | 0:07919e3d6c56 | 854 | } |
andrewbonney | 0:07919e3d6c56 | 855 | if (ptr + 2*MINBPC(enc) != end) { |
andrewbonney | 0:07919e3d6c56 | 856 | if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 857 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 858 | break; |
andrewbonney | 0:07919e3d6c56 | 859 | } |
andrewbonney | 0:07919e3d6c56 | 860 | *nextTokPtr = ptr + 2*MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 861 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 862 | } |
andrewbonney | 0:07919e3d6c56 | 863 | } |
andrewbonney | 0:07919e3d6c56 | 864 | /* fall through */ |
andrewbonney | 0:07919e3d6c56 | 865 | case BT_AMP: |
andrewbonney | 0:07919e3d6c56 | 866 | case BT_LT: |
andrewbonney | 0:07919e3d6c56 | 867 | case BT_NONXML: |
andrewbonney | 0:07919e3d6c56 | 868 | case BT_MALFORM: |
andrewbonney | 0:07919e3d6c56 | 869 | case BT_TRAIL: |
andrewbonney | 0:07919e3d6c56 | 870 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 871 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 872 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 873 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 874 | default: |
andrewbonney | 0:07919e3d6c56 | 875 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 876 | break; |
andrewbonney | 0:07919e3d6c56 | 877 | } |
andrewbonney | 0:07919e3d6c56 | 878 | } |
andrewbonney | 0:07919e3d6c56 | 879 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 880 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 881 | } |
andrewbonney | 0:07919e3d6c56 | 882 | |
andrewbonney | 0:07919e3d6c56 | 883 | /* ptr points to character following "%" */ |
andrewbonney | 0:07919e3d6c56 | 884 | |
andrewbonney | 0:07919e3d6c56 | 885 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 886 | PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 887 | const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 888 | { |
andrewbonney | 0:07919e3d6c56 | 889 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 890 | return -XML_TOK_PERCENT; |
andrewbonney | 0:07919e3d6c56 | 891 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 892 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 893 | case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: |
andrewbonney | 0:07919e3d6c56 | 894 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 895 | return XML_TOK_PERCENT; |
andrewbonney | 0:07919e3d6c56 | 896 | default: |
andrewbonney | 0:07919e3d6c56 | 897 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 898 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 899 | } |
andrewbonney | 0:07919e3d6c56 | 900 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 901 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 902 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 903 | case BT_SEMI: |
andrewbonney | 0:07919e3d6c56 | 904 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 905 | return XML_TOK_PARAM_ENTITY_REF; |
andrewbonney | 0:07919e3d6c56 | 906 | default: |
andrewbonney | 0:07919e3d6c56 | 907 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 908 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 909 | } |
andrewbonney | 0:07919e3d6c56 | 910 | } |
andrewbonney | 0:07919e3d6c56 | 911 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 912 | } |
andrewbonney | 0:07919e3d6c56 | 913 | |
andrewbonney | 0:07919e3d6c56 | 914 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 915 | PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 916 | const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 917 | { |
andrewbonney | 0:07919e3d6c56 | 918 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 919 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 920 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 921 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 922 | default: |
andrewbonney | 0:07919e3d6c56 | 923 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 924 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 925 | } |
andrewbonney | 0:07919e3d6c56 | 926 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 927 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 928 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 929 | case BT_CR: case BT_LF: case BT_S: |
andrewbonney | 0:07919e3d6c56 | 930 | case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: |
andrewbonney | 0:07919e3d6c56 | 931 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 932 | return XML_TOK_POUND_NAME; |
andrewbonney | 0:07919e3d6c56 | 933 | default: |
andrewbonney | 0:07919e3d6c56 | 934 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 935 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 936 | } |
andrewbonney | 0:07919e3d6c56 | 937 | } |
andrewbonney | 0:07919e3d6c56 | 938 | return -XML_TOK_POUND_NAME; |
andrewbonney | 0:07919e3d6c56 | 939 | } |
andrewbonney | 0:07919e3d6c56 | 940 | |
andrewbonney | 0:07919e3d6c56 | 941 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 942 | PREFIX(scanLit)(int open, const ENCODING *enc, |
andrewbonney | 0:07919e3d6c56 | 943 | const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 944 | const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 945 | { |
andrewbonney | 0:07919e3d6c56 | 946 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 947 | int t = BYTE_TYPE(enc, ptr); |
andrewbonney | 0:07919e3d6c56 | 948 | switch (t) { |
andrewbonney | 0:07919e3d6c56 | 949 | INVALID_CASES(ptr, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 950 | case BT_QUOT: |
andrewbonney | 0:07919e3d6c56 | 951 | case BT_APOS: |
andrewbonney | 0:07919e3d6c56 | 952 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 953 | if (t != open) |
andrewbonney | 0:07919e3d6c56 | 954 | break; |
andrewbonney | 0:07919e3d6c56 | 955 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 956 | return -XML_TOK_LITERAL; |
andrewbonney | 0:07919e3d6c56 | 957 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 958 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 959 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 960 | case BT_GT: case BT_PERCNT: case BT_LSQB: |
andrewbonney | 0:07919e3d6c56 | 961 | return XML_TOK_LITERAL; |
andrewbonney | 0:07919e3d6c56 | 962 | default: |
andrewbonney | 0:07919e3d6c56 | 963 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 964 | } |
andrewbonney | 0:07919e3d6c56 | 965 | default: |
andrewbonney | 0:07919e3d6c56 | 966 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 967 | break; |
andrewbonney | 0:07919e3d6c56 | 968 | } |
andrewbonney | 0:07919e3d6c56 | 969 | } |
andrewbonney | 0:07919e3d6c56 | 970 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 971 | } |
andrewbonney | 0:07919e3d6c56 | 972 | |
andrewbonney | 0:07919e3d6c56 | 973 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 974 | PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 975 | const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 976 | { |
andrewbonney | 0:07919e3d6c56 | 977 | int tok; |
andrewbonney | 0:07919e3d6c56 | 978 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 979 | return XML_TOK_NONE; |
andrewbonney | 0:07919e3d6c56 | 980 | if (MINBPC(enc) > 1) { |
andrewbonney | 0:07919e3d6c56 | 981 | size_t n = end - ptr; |
andrewbonney | 0:07919e3d6c56 | 982 | if (n & (MINBPC(enc) - 1)) { |
andrewbonney | 0:07919e3d6c56 | 983 | n &= ~(MINBPC(enc) - 1); |
andrewbonney | 0:07919e3d6c56 | 984 | if (n == 0) |
andrewbonney | 0:07919e3d6c56 | 985 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 986 | end = ptr + n; |
andrewbonney | 0:07919e3d6c56 | 987 | } |
andrewbonney | 0:07919e3d6c56 | 988 | } |
andrewbonney | 0:07919e3d6c56 | 989 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 990 | case BT_QUOT: |
andrewbonney | 0:07919e3d6c56 | 991 | return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 992 | case BT_APOS: |
andrewbonney | 0:07919e3d6c56 | 993 | return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 994 | case BT_LT: |
andrewbonney | 0:07919e3d6c56 | 995 | { |
andrewbonney | 0:07919e3d6c56 | 996 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 997 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 998 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 999 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1000 | case BT_EXCL: |
andrewbonney | 0:07919e3d6c56 | 1001 | return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 1002 | case BT_QUEST: |
andrewbonney | 0:07919e3d6c56 | 1003 | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 1004 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 1005 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 1006 | case BT_NONASCII: |
andrewbonney | 0:07919e3d6c56 | 1007 | case BT_LEAD2: |
andrewbonney | 0:07919e3d6c56 | 1008 | case BT_LEAD3: |
andrewbonney | 0:07919e3d6c56 | 1009 | case BT_LEAD4: |
andrewbonney | 0:07919e3d6c56 | 1010 | *nextTokPtr = ptr - MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1011 | return XML_TOK_INSTANCE_START; |
andrewbonney | 0:07919e3d6c56 | 1012 | } |
andrewbonney | 0:07919e3d6c56 | 1013 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1014 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1015 | } |
andrewbonney | 0:07919e3d6c56 | 1016 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 1017 | if (ptr + MINBPC(enc) == end) { |
andrewbonney | 0:07919e3d6c56 | 1018 | *nextTokPtr = end; |
andrewbonney | 0:07919e3d6c56 | 1019 | /* indicate that this might be part of a CR/LF pair */ |
andrewbonney | 0:07919e3d6c56 | 1020 | return -XML_TOK_PROLOG_S; |
andrewbonney | 0:07919e3d6c56 | 1021 | } |
andrewbonney | 0:07919e3d6c56 | 1022 | /* fall through */ |
andrewbonney | 0:07919e3d6c56 | 1023 | case BT_S: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1024 | for (;;) { |
andrewbonney | 0:07919e3d6c56 | 1025 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1026 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 1027 | break; |
andrewbonney | 0:07919e3d6c56 | 1028 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1029 | case BT_S: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1030 | break; |
andrewbonney | 0:07919e3d6c56 | 1031 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 1032 | /* don't split CR/LF pair */ |
andrewbonney | 0:07919e3d6c56 | 1033 | if (ptr + MINBPC(enc) != end) |
andrewbonney | 0:07919e3d6c56 | 1034 | break; |
andrewbonney | 0:07919e3d6c56 | 1035 | /* fall through */ |
andrewbonney | 0:07919e3d6c56 | 1036 | default: |
andrewbonney | 0:07919e3d6c56 | 1037 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1038 | return XML_TOK_PROLOG_S; |
andrewbonney | 0:07919e3d6c56 | 1039 | } |
andrewbonney | 0:07919e3d6c56 | 1040 | } |
andrewbonney | 0:07919e3d6c56 | 1041 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1042 | return XML_TOK_PROLOG_S; |
andrewbonney | 0:07919e3d6c56 | 1043 | case BT_PERCNT: |
andrewbonney | 0:07919e3d6c56 | 1044 | return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 1045 | case BT_COMMA: |
andrewbonney | 0:07919e3d6c56 | 1046 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1047 | return XML_TOK_COMMA; |
andrewbonney | 0:07919e3d6c56 | 1048 | case BT_LSQB: |
andrewbonney | 0:07919e3d6c56 | 1049 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1050 | return XML_TOK_OPEN_BRACKET; |
andrewbonney | 0:07919e3d6c56 | 1051 | case BT_RSQB: |
andrewbonney | 0:07919e3d6c56 | 1052 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1053 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 1054 | return -XML_TOK_CLOSE_BRACKET; |
andrewbonney | 0:07919e3d6c56 | 1055 | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
andrewbonney | 0:07919e3d6c56 | 1056 | if (ptr + MINBPC(enc) == end) |
andrewbonney | 0:07919e3d6c56 | 1057 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 1058 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 1059 | *nextTokPtr = ptr + 2*MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1060 | return XML_TOK_COND_SECT_CLOSE; |
andrewbonney | 0:07919e3d6c56 | 1061 | } |
andrewbonney | 0:07919e3d6c56 | 1062 | } |
andrewbonney | 0:07919e3d6c56 | 1063 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1064 | return XML_TOK_CLOSE_BRACKET; |
andrewbonney | 0:07919e3d6c56 | 1065 | case BT_LPAR: |
andrewbonney | 0:07919e3d6c56 | 1066 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1067 | return XML_TOK_OPEN_PAREN; |
andrewbonney | 0:07919e3d6c56 | 1068 | case BT_RPAR: |
andrewbonney | 0:07919e3d6c56 | 1069 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1070 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 1071 | return -XML_TOK_CLOSE_PAREN; |
andrewbonney | 0:07919e3d6c56 | 1072 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1073 | case BT_AST: |
andrewbonney | 0:07919e3d6c56 | 1074 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1075 | return XML_TOK_CLOSE_PAREN_ASTERISK; |
andrewbonney | 0:07919e3d6c56 | 1076 | case BT_QUEST: |
andrewbonney | 0:07919e3d6c56 | 1077 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1078 | return XML_TOK_CLOSE_PAREN_QUESTION; |
andrewbonney | 0:07919e3d6c56 | 1079 | case BT_PLUS: |
andrewbonney | 0:07919e3d6c56 | 1080 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1081 | return XML_TOK_CLOSE_PAREN_PLUS; |
andrewbonney | 0:07919e3d6c56 | 1082 | case BT_CR: case BT_LF: case BT_S: |
andrewbonney | 0:07919e3d6c56 | 1083 | case BT_GT: case BT_COMMA: case BT_VERBAR: |
andrewbonney | 0:07919e3d6c56 | 1084 | case BT_RPAR: |
andrewbonney | 0:07919e3d6c56 | 1085 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1086 | return XML_TOK_CLOSE_PAREN; |
andrewbonney | 0:07919e3d6c56 | 1087 | } |
andrewbonney | 0:07919e3d6c56 | 1088 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1089 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1090 | case BT_VERBAR: |
andrewbonney | 0:07919e3d6c56 | 1091 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1092 | return XML_TOK_OR; |
andrewbonney | 0:07919e3d6c56 | 1093 | case BT_GT: |
andrewbonney | 0:07919e3d6c56 | 1094 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1095 | return XML_TOK_DECL_CLOSE; |
andrewbonney | 0:07919e3d6c56 | 1096 | case BT_NUM: |
andrewbonney | 0:07919e3d6c56 | 1097 | return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 1098 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 1099 | case BT_LEAD ## n: \ |
andrewbonney | 0:07919e3d6c56 | 1100 | if (end - ptr < n) \ |
andrewbonney | 0:07919e3d6c56 | 1101 | return XML_TOK_PARTIAL_CHAR; \ |
andrewbonney | 0:07919e3d6c56 | 1102 | if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
andrewbonney | 0:07919e3d6c56 | 1103 | ptr += n; \ |
andrewbonney | 0:07919e3d6c56 | 1104 | tok = XML_TOK_NAME; \ |
andrewbonney | 0:07919e3d6c56 | 1105 | break; \ |
andrewbonney | 0:07919e3d6c56 | 1106 | } \ |
andrewbonney | 0:07919e3d6c56 | 1107 | if (IS_NAME_CHAR(enc, ptr, n)) { \ |
andrewbonney | 0:07919e3d6c56 | 1108 | ptr += n; \ |
andrewbonney | 0:07919e3d6c56 | 1109 | tok = XML_TOK_NMTOKEN; \ |
andrewbonney | 0:07919e3d6c56 | 1110 | break; \ |
andrewbonney | 0:07919e3d6c56 | 1111 | } \ |
andrewbonney | 0:07919e3d6c56 | 1112 | *nextTokPtr = ptr; \ |
andrewbonney | 0:07919e3d6c56 | 1113 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1114 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
andrewbonney | 0:07919e3d6c56 | 1115 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 1116 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 1117 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 1118 | tok = XML_TOK_NAME; |
andrewbonney | 0:07919e3d6c56 | 1119 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1120 | break; |
andrewbonney | 0:07919e3d6c56 | 1121 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 1122 | case BT_NAME: |
andrewbonney | 0:07919e3d6c56 | 1123 | case BT_MINUS: |
andrewbonney | 0:07919e3d6c56 | 1124 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 1125 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 1126 | #endif |
andrewbonney | 0:07919e3d6c56 | 1127 | tok = XML_TOK_NMTOKEN; |
andrewbonney | 0:07919e3d6c56 | 1128 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1129 | break; |
andrewbonney | 0:07919e3d6c56 | 1130 | case BT_NONASCII: |
andrewbonney | 0:07919e3d6c56 | 1131 | if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1132 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1133 | tok = XML_TOK_NAME; |
andrewbonney | 0:07919e3d6c56 | 1134 | break; |
andrewbonney | 0:07919e3d6c56 | 1135 | } |
andrewbonney | 0:07919e3d6c56 | 1136 | if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1137 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1138 | tok = XML_TOK_NMTOKEN; |
andrewbonney | 0:07919e3d6c56 | 1139 | break; |
andrewbonney | 0:07919e3d6c56 | 1140 | } |
andrewbonney | 0:07919e3d6c56 | 1141 | /* fall through */ |
andrewbonney | 0:07919e3d6c56 | 1142 | default: |
andrewbonney | 0:07919e3d6c56 | 1143 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1144 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1145 | } |
andrewbonney | 0:07919e3d6c56 | 1146 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 1147 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1148 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 1149 | case BT_GT: case BT_RPAR: case BT_COMMA: |
andrewbonney | 0:07919e3d6c56 | 1150 | case BT_VERBAR: case BT_LSQB: case BT_PERCNT: |
andrewbonney | 0:07919e3d6c56 | 1151 | case BT_S: case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1152 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1153 | return tok; |
andrewbonney | 0:07919e3d6c56 | 1154 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 1155 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 1156 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1157 | switch (tok) { |
andrewbonney | 0:07919e3d6c56 | 1158 | case XML_TOK_NAME: |
andrewbonney | 0:07919e3d6c56 | 1159 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 1160 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 1161 | tok = XML_TOK_PREFIXED_NAME; |
andrewbonney | 0:07919e3d6c56 | 1162 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1163 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 1164 | default: |
andrewbonney | 0:07919e3d6c56 | 1165 | tok = XML_TOK_NMTOKEN; |
andrewbonney | 0:07919e3d6c56 | 1166 | break; |
andrewbonney | 0:07919e3d6c56 | 1167 | } |
andrewbonney | 0:07919e3d6c56 | 1168 | break; |
andrewbonney | 0:07919e3d6c56 | 1169 | case XML_TOK_PREFIXED_NAME: |
andrewbonney | 0:07919e3d6c56 | 1170 | tok = XML_TOK_NMTOKEN; |
andrewbonney | 0:07919e3d6c56 | 1171 | break; |
andrewbonney | 0:07919e3d6c56 | 1172 | } |
andrewbonney | 0:07919e3d6c56 | 1173 | break; |
andrewbonney | 0:07919e3d6c56 | 1174 | #endif |
andrewbonney | 0:07919e3d6c56 | 1175 | case BT_PLUS: |
andrewbonney | 0:07919e3d6c56 | 1176 | if (tok == XML_TOK_NMTOKEN) { |
andrewbonney | 0:07919e3d6c56 | 1177 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1178 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1179 | } |
andrewbonney | 0:07919e3d6c56 | 1180 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1181 | return XML_TOK_NAME_PLUS; |
andrewbonney | 0:07919e3d6c56 | 1182 | case BT_AST: |
andrewbonney | 0:07919e3d6c56 | 1183 | if (tok == XML_TOK_NMTOKEN) { |
andrewbonney | 0:07919e3d6c56 | 1184 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1185 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1186 | } |
andrewbonney | 0:07919e3d6c56 | 1187 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1188 | return XML_TOK_NAME_ASTERISK; |
andrewbonney | 0:07919e3d6c56 | 1189 | case BT_QUEST: |
andrewbonney | 0:07919e3d6c56 | 1190 | if (tok == XML_TOK_NMTOKEN) { |
andrewbonney | 0:07919e3d6c56 | 1191 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1192 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1193 | } |
andrewbonney | 0:07919e3d6c56 | 1194 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1195 | return XML_TOK_NAME_QUESTION; |
andrewbonney | 0:07919e3d6c56 | 1196 | default: |
andrewbonney | 0:07919e3d6c56 | 1197 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1198 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1199 | } |
andrewbonney | 0:07919e3d6c56 | 1200 | } |
andrewbonney | 0:07919e3d6c56 | 1201 | return -tok; |
andrewbonney | 0:07919e3d6c56 | 1202 | } |
andrewbonney | 0:07919e3d6c56 | 1203 | |
andrewbonney | 0:07919e3d6c56 | 1204 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1205 | PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 1206 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 1207 | { |
andrewbonney | 0:07919e3d6c56 | 1208 | const char *start; |
andrewbonney | 0:07919e3d6c56 | 1209 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 1210 | return XML_TOK_NONE; |
andrewbonney | 0:07919e3d6c56 | 1211 | start = ptr; |
andrewbonney | 0:07919e3d6c56 | 1212 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 1213 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1214 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 1215 | case BT_LEAD ## n: ptr += n; break; |
andrewbonney | 0:07919e3d6c56 | 1216 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
andrewbonney | 0:07919e3d6c56 | 1217 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 1218 | case BT_AMP: |
andrewbonney | 0:07919e3d6c56 | 1219 | if (ptr == start) |
andrewbonney | 0:07919e3d6c56 | 1220 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 1221 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1222 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1223 | case BT_LT: |
andrewbonney | 0:07919e3d6c56 | 1224 | /* this is for inside entity references */ |
andrewbonney | 0:07919e3d6c56 | 1225 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1226 | return XML_TOK_INVALID; |
andrewbonney | 0:07919e3d6c56 | 1227 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1228 | if (ptr == start) { |
andrewbonney | 0:07919e3d6c56 | 1229 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1230 | return XML_TOK_DATA_NEWLINE; |
andrewbonney | 0:07919e3d6c56 | 1231 | } |
andrewbonney | 0:07919e3d6c56 | 1232 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1233 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1234 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 1235 | if (ptr == start) { |
andrewbonney | 0:07919e3d6c56 | 1236 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1237 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 1238 | return XML_TOK_TRAILING_CR; |
andrewbonney | 0:07919e3d6c56 | 1239 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
andrewbonney | 0:07919e3d6c56 | 1240 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1241 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1242 | return XML_TOK_DATA_NEWLINE; |
andrewbonney | 0:07919e3d6c56 | 1243 | } |
andrewbonney | 0:07919e3d6c56 | 1244 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1245 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1246 | case BT_S: |
andrewbonney | 0:07919e3d6c56 | 1247 | if (ptr == start) { |
andrewbonney | 0:07919e3d6c56 | 1248 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1249 | return XML_TOK_ATTRIBUTE_VALUE_S; |
andrewbonney | 0:07919e3d6c56 | 1250 | } |
andrewbonney | 0:07919e3d6c56 | 1251 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1252 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1253 | default: |
andrewbonney | 0:07919e3d6c56 | 1254 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1255 | break; |
andrewbonney | 0:07919e3d6c56 | 1256 | } |
andrewbonney | 0:07919e3d6c56 | 1257 | } |
andrewbonney | 0:07919e3d6c56 | 1258 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1259 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1260 | } |
andrewbonney | 0:07919e3d6c56 | 1261 | |
andrewbonney | 0:07919e3d6c56 | 1262 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1263 | PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 1264 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 1265 | { |
andrewbonney | 0:07919e3d6c56 | 1266 | const char *start; |
andrewbonney | 0:07919e3d6c56 | 1267 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 1268 | return XML_TOK_NONE; |
andrewbonney | 0:07919e3d6c56 | 1269 | start = ptr; |
andrewbonney | 0:07919e3d6c56 | 1270 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 1271 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1272 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 1273 | case BT_LEAD ## n: ptr += n; break; |
andrewbonney | 0:07919e3d6c56 | 1274 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
andrewbonney | 0:07919e3d6c56 | 1275 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 1276 | case BT_AMP: |
andrewbonney | 0:07919e3d6c56 | 1277 | if (ptr == start) |
andrewbonney | 0:07919e3d6c56 | 1278 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 1279 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1280 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1281 | case BT_PERCNT: |
andrewbonney | 0:07919e3d6c56 | 1282 | if (ptr == start) { |
andrewbonney | 0:07919e3d6c56 | 1283 | int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), |
andrewbonney | 0:07919e3d6c56 | 1284 | end, nextTokPtr); |
andrewbonney | 0:07919e3d6c56 | 1285 | return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; |
andrewbonney | 0:07919e3d6c56 | 1286 | } |
andrewbonney | 0:07919e3d6c56 | 1287 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1288 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1289 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1290 | if (ptr == start) { |
andrewbonney | 0:07919e3d6c56 | 1291 | *nextTokPtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1292 | return XML_TOK_DATA_NEWLINE; |
andrewbonney | 0:07919e3d6c56 | 1293 | } |
andrewbonney | 0:07919e3d6c56 | 1294 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1295 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1296 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 1297 | if (ptr == start) { |
andrewbonney | 0:07919e3d6c56 | 1298 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1299 | if (ptr == end) |
andrewbonney | 0:07919e3d6c56 | 1300 | return XML_TOK_TRAILING_CR; |
andrewbonney | 0:07919e3d6c56 | 1301 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
andrewbonney | 0:07919e3d6c56 | 1302 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1303 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1304 | return XML_TOK_DATA_NEWLINE; |
andrewbonney | 0:07919e3d6c56 | 1305 | } |
andrewbonney | 0:07919e3d6c56 | 1306 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1307 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1308 | default: |
andrewbonney | 0:07919e3d6c56 | 1309 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1310 | break; |
andrewbonney | 0:07919e3d6c56 | 1311 | } |
andrewbonney | 0:07919e3d6c56 | 1312 | } |
andrewbonney | 0:07919e3d6c56 | 1313 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1314 | return XML_TOK_DATA_CHARS; |
andrewbonney | 0:07919e3d6c56 | 1315 | } |
andrewbonney | 0:07919e3d6c56 | 1316 | |
andrewbonney | 0:07919e3d6c56 | 1317 | #ifdef XML_DTD |
andrewbonney | 0:07919e3d6c56 | 1318 | |
andrewbonney | 0:07919e3d6c56 | 1319 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1320 | PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 1321 | const char *end, const char **nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 1322 | { |
andrewbonney | 0:07919e3d6c56 | 1323 | int level = 0; |
andrewbonney | 0:07919e3d6c56 | 1324 | if (MINBPC(enc) > 1) { |
andrewbonney | 0:07919e3d6c56 | 1325 | size_t n = end - ptr; |
andrewbonney | 0:07919e3d6c56 | 1326 | if (n & (MINBPC(enc) - 1)) { |
andrewbonney | 0:07919e3d6c56 | 1327 | n &= ~(MINBPC(enc) - 1); |
andrewbonney | 0:07919e3d6c56 | 1328 | end = ptr + n; |
andrewbonney | 0:07919e3d6c56 | 1329 | } |
andrewbonney | 0:07919e3d6c56 | 1330 | } |
andrewbonney | 0:07919e3d6c56 | 1331 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 1332 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1333 | INVALID_CASES(ptr, nextTokPtr) |
andrewbonney | 0:07919e3d6c56 | 1334 | case BT_LT: |
andrewbonney | 0:07919e3d6c56 | 1335 | if ((ptr += MINBPC(enc)) == end) |
andrewbonney | 0:07919e3d6c56 | 1336 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 1337 | if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { |
andrewbonney | 0:07919e3d6c56 | 1338 | if ((ptr += MINBPC(enc)) == end) |
andrewbonney | 0:07919e3d6c56 | 1339 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 1340 | if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { |
andrewbonney | 0:07919e3d6c56 | 1341 | ++level; |
andrewbonney | 0:07919e3d6c56 | 1342 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1343 | } |
andrewbonney | 0:07919e3d6c56 | 1344 | } |
andrewbonney | 0:07919e3d6c56 | 1345 | break; |
andrewbonney | 0:07919e3d6c56 | 1346 | case BT_RSQB: |
andrewbonney | 0:07919e3d6c56 | 1347 | if ((ptr += MINBPC(enc)) == end) |
andrewbonney | 0:07919e3d6c56 | 1348 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 1349 | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
andrewbonney | 0:07919e3d6c56 | 1350 | if ((ptr += MINBPC(enc)) == end) |
andrewbonney | 0:07919e3d6c56 | 1351 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 1352 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
andrewbonney | 0:07919e3d6c56 | 1353 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1354 | if (level == 0) { |
andrewbonney | 0:07919e3d6c56 | 1355 | *nextTokPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1356 | return XML_TOK_IGNORE_SECT; |
andrewbonney | 0:07919e3d6c56 | 1357 | } |
andrewbonney | 0:07919e3d6c56 | 1358 | --level; |
andrewbonney | 0:07919e3d6c56 | 1359 | } |
andrewbonney | 0:07919e3d6c56 | 1360 | } |
andrewbonney | 0:07919e3d6c56 | 1361 | break; |
andrewbonney | 0:07919e3d6c56 | 1362 | default: |
andrewbonney | 0:07919e3d6c56 | 1363 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1364 | break; |
andrewbonney | 0:07919e3d6c56 | 1365 | } |
andrewbonney | 0:07919e3d6c56 | 1366 | } |
andrewbonney | 0:07919e3d6c56 | 1367 | return XML_TOK_PARTIAL; |
andrewbonney | 0:07919e3d6c56 | 1368 | } |
andrewbonney | 0:07919e3d6c56 | 1369 | |
andrewbonney | 0:07919e3d6c56 | 1370 | #endif /* XML_DTD */ |
andrewbonney | 0:07919e3d6c56 | 1371 | |
andrewbonney | 0:07919e3d6c56 | 1372 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1373 | PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
andrewbonney | 0:07919e3d6c56 | 1374 | const char **badPtr) |
andrewbonney | 0:07919e3d6c56 | 1375 | { |
andrewbonney | 0:07919e3d6c56 | 1376 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1377 | end -= MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1378 | for (; ptr != end; ptr += MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 1379 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1380 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 1381 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 1382 | case BT_MINUS: |
andrewbonney | 0:07919e3d6c56 | 1383 | case BT_APOS: |
andrewbonney | 0:07919e3d6c56 | 1384 | case BT_LPAR: |
andrewbonney | 0:07919e3d6c56 | 1385 | case BT_RPAR: |
andrewbonney | 0:07919e3d6c56 | 1386 | case BT_PLUS: |
andrewbonney | 0:07919e3d6c56 | 1387 | case BT_COMMA: |
andrewbonney | 0:07919e3d6c56 | 1388 | case BT_SOL: |
andrewbonney | 0:07919e3d6c56 | 1389 | case BT_EQUALS: |
andrewbonney | 0:07919e3d6c56 | 1390 | case BT_QUEST: |
andrewbonney | 0:07919e3d6c56 | 1391 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 1392 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1393 | case BT_SEMI: |
andrewbonney | 0:07919e3d6c56 | 1394 | case BT_EXCL: |
andrewbonney | 0:07919e3d6c56 | 1395 | case BT_AST: |
andrewbonney | 0:07919e3d6c56 | 1396 | case BT_PERCNT: |
andrewbonney | 0:07919e3d6c56 | 1397 | case BT_NUM: |
andrewbonney | 0:07919e3d6c56 | 1398 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 1399 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 1400 | #endif |
andrewbonney | 0:07919e3d6c56 | 1401 | break; |
andrewbonney | 0:07919e3d6c56 | 1402 | case BT_S: |
andrewbonney | 0:07919e3d6c56 | 1403 | if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { |
andrewbonney | 0:07919e3d6c56 | 1404 | *badPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1405 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1406 | } |
andrewbonney | 0:07919e3d6c56 | 1407 | break; |
andrewbonney | 0:07919e3d6c56 | 1408 | case BT_NAME: |
andrewbonney | 0:07919e3d6c56 | 1409 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 1410 | if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
andrewbonney | 0:07919e3d6c56 | 1411 | break; |
andrewbonney | 0:07919e3d6c56 | 1412 | default: |
andrewbonney | 0:07919e3d6c56 | 1413 | switch (BYTE_TO_ASCII(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1414 | case 0x24: /* $ */ |
andrewbonney | 0:07919e3d6c56 | 1415 | case 0x40: /* @ */ |
andrewbonney | 0:07919e3d6c56 | 1416 | break; |
andrewbonney | 0:07919e3d6c56 | 1417 | default: |
andrewbonney | 0:07919e3d6c56 | 1418 | *badPtr = ptr; |
andrewbonney | 0:07919e3d6c56 | 1419 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1420 | } |
andrewbonney | 0:07919e3d6c56 | 1421 | break; |
andrewbonney | 0:07919e3d6c56 | 1422 | } |
andrewbonney | 0:07919e3d6c56 | 1423 | } |
andrewbonney | 0:07919e3d6c56 | 1424 | return 1; |
andrewbonney | 0:07919e3d6c56 | 1425 | } |
andrewbonney | 0:07919e3d6c56 | 1426 | |
andrewbonney | 0:07919e3d6c56 | 1427 | /* This must only be called for a well-formed start-tag or empty |
andrewbonney | 0:07919e3d6c56 | 1428 | element tag. Returns the number of attributes. Pointers to the |
andrewbonney | 0:07919e3d6c56 | 1429 | first attsMax attributes are stored in atts. |
andrewbonney | 0:07919e3d6c56 | 1430 | */ |
andrewbonney | 0:07919e3d6c56 | 1431 | |
andrewbonney | 0:07919e3d6c56 | 1432 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1433 | PREFIX(getAtts)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 1434 | int attsMax, ATTRIBUTE *atts) |
andrewbonney | 0:07919e3d6c56 | 1435 | { |
andrewbonney | 0:07919e3d6c56 | 1436 | enum { other, inName, inValue } state = inName; |
andrewbonney | 0:07919e3d6c56 | 1437 | int nAtts = 0; |
andrewbonney | 0:07919e3d6c56 | 1438 | int open = 0; /* defined when state == inValue; |
andrewbonney | 0:07919e3d6c56 | 1439 | initialization just to shut up compilers */ |
andrewbonney | 0:07919e3d6c56 | 1440 | |
andrewbonney | 0:07919e3d6c56 | 1441 | for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 1442 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1443 | #define START_NAME \ |
andrewbonney | 0:07919e3d6c56 | 1444 | if (state == other) { \ |
andrewbonney | 0:07919e3d6c56 | 1445 | if (nAtts < attsMax) { \ |
andrewbonney | 0:07919e3d6c56 | 1446 | atts[nAtts].name = ptr; \ |
andrewbonney | 0:07919e3d6c56 | 1447 | atts[nAtts].normalized = 1; \ |
andrewbonney | 0:07919e3d6c56 | 1448 | } \ |
andrewbonney | 0:07919e3d6c56 | 1449 | state = inName; \ |
andrewbonney | 0:07919e3d6c56 | 1450 | } |
andrewbonney | 0:07919e3d6c56 | 1451 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 1452 | case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; |
andrewbonney | 0:07919e3d6c56 | 1453 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
andrewbonney | 0:07919e3d6c56 | 1454 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 1455 | case BT_NONASCII: |
andrewbonney | 0:07919e3d6c56 | 1456 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 1457 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 1458 | START_NAME |
andrewbonney | 0:07919e3d6c56 | 1459 | break; |
andrewbonney | 0:07919e3d6c56 | 1460 | #undef START_NAME |
andrewbonney | 0:07919e3d6c56 | 1461 | case BT_QUOT: |
andrewbonney | 0:07919e3d6c56 | 1462 | if (state != inValue) { |
andrewbonney | 0:07919e3d6c56 | 1463 | if (nAtts < attsMax) |
andrewbonney | 0:07919e3d6c56 | 1464 | atts[nAtts].valuePtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1465 | state = inValue; |
andrewbonney | 0:07919e3d6c56 | 1466 | open = BT_QUOT; |
andrewbonney | 0:07919e3d6c56 | 1467 | } |
andrewbonney | 0:07919e3d6c56 | 1468 | else if (open == BT_QUOT) { |
andrewbonney | 0:07919e3d6c56 | 1469 | state = other; |
andrewbonney | 0:07919e3d6c56 | 1470 | if (nAtts < attsMax) |
andrewbonney | 0:07919e3d6c56 | 1471 | atts[nAtts].valueEnd = ptr; |
andrewbonney | 0:07919e3d6c56 | 1472 | nAtts++; |
andrewbonney | 0:07919e3d6c56 | 1473 | } |
andrewbonney | 0:07919e3d6c56 | 1474 | break; |
andrewbonney | 0:07919e3d6c56 | 1475 | case BT_APOS: |
andrewbonney | 0:07919e3d6c56 | 1476 | if (state != inValue) { |
andrewbonney | 0:07919e3d6c56 | 1477 | if (nAtts < attsMax) |
andrewbonney | 0:07919e3d6c56 | 1478 | atts[nAtts].valuePtr = ptr + MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1479 | state = inValue; |
andrewbonney | 0:07919e3d6c56 | 1480 | open = BT_APOS; |
andrewbonney | 0:07919e3d6c56 | 1481 | } |
andrewbonney | 0:07919e3d6c56 | 1482 | else if (open == BT_APOS) { |
andrewbonney | 0:07919e3d6c56 | 1483 | state = other; |
andrewbonney | 0:07919e3d6c56 | 1484 | if (nAtts < attsMax) |
andrewbonney | 0:07919e3d6c56 | 1485 | atts[nAtts].valueEnd = ptr; |
andrewbonney | 0:07919e3d6c56 | 1486 | nAtts++; |
andrewbonney | 0:07919e3d6c56 | 1487 | } |
andrewbonney | 0:07919e3d6c56 | 1488 | break; |
andrewbonney | 0:07919e3d6c56 | 1489 | case BT_AMP: |
andrewbonney | 0:07919e3d6c56 | 1490 | if (nAtts < attsMax) |
andrewbonney | 0:07919e3d6c56 | 1491 | atts[nAtts].normalized = 0; |
andrewbonney | 0:07919e3d6c56 | 1492 | break; |
andrewbonney | 0:07919e3d6c56 | 1493 | case BT_S: |
andrewbonney | 0:07919e3d6c56 | 1494 | if (state == inName) |
andrewbonney | 0:07919e3d6c56 | 1495 | state = other; |
andrewbonney | 0:07919e3d6c56 | 1496 | else if (state == inValue |
andrewbonney | 0:07919e3d6c56 | 1497 | && nAtts < attsMax |
andrewbonney | 0:07919e3d6c56 | 1498 | && atts[nAtts].normalized |
andrewbonney | 0:07919e3d6c56 | 1499 | && (ptr == atts[nAtts].valuePtr |
andrewbonney | 0:07919e3d6c56 | 1500 | || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE |
andrewbonney | 0:07919e3d6c56 | 1501 | || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE |
andrewbonney | 0:07919e3d6c56 | 1502 | || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
andrewbonney | 0:07919e3d6c56 | 1503 | atts[nAtts].normalized = 0; |
andrewbonney | 0:07919e3d6c56 | 1504 | break; |
andrewbonney | 0:07919e3d6c56 | 1505 | case BT_CR: case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1506 | /* This case ensures that the first attribute name is counted |
andrewbonney | 0:07919e3d6c56 | 1507 | Apart from that we could just change state on the quote. */ |
andrewbonney | 0:07919e3d6c56 | 1508 | if (state == inName) |
andrewbonney | 0:07919e3d6c56 | 1509 | state = other; |
andrewbonney | 0:07919e3d6c56 | 1510 | else if (state == inValue && nAtts < attsMax) |
andrewbonney | 0:07919e3d6c56 | 1511 | atts[nAtts].normalized = 0; |
andrewbonney | 0:07919e3d6c56 | 1512 | break; |
andrewbonney | 0:07919e3d6c56 | 1513 | case BT_GT: |
andrewbonney | 0:07919e3d6c56 | 1514 | case BT_SOL: |
andrewbonney | 0:07919e3d6c56 | 1515 | if (state != inValue) |
andrewbonney | 0:07919e3d6c56 | 1516 | return nAtts; |
andrewbonney | 0:07919e3d6c56 | 1517 | break; |
andrewbonney | 0:07919e3d6c56 | 1518 | default: |
andrewbonney | 0:07919e3d6c56 | 1519 | break; |
andrewbonney | 0:07919e3d6c56 | 1520 | } |
andrewbonney | 0:07919e3d6c56 | 1521 | } |
andrewbonney | 0:07919e3d6c56 | 1522 | /* not reached */ |
andrewbonney | 0:07919e3d6c56 | 1523 | } |
andrewbonney | 0:07919e3d6c56 | 1524 | |
andrewbonney | 0:07919e3d6c56 | 1525 | static int PTRFASTCALL |
andrewbonney | 0:07919e3d6c56 | 1526 | PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) |
andrewbonney | 0:07919e3d6c56 | 1527 | { |
andrewbonney | 0:07919e3d6c56 | 1528 | int result = 0; |
andrewbonney | 0:07919e3d6c56 | 1529 | /* skip &# */ |
andrewbonney | 0:07919e3d6c56 | 1530 | ptr += 2*MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1531 | if (CHAR_MATCHES(enc, ptr, ASCII_x)) { |
andrewbonney | 0:07919e3d6c56 | 1532 | for (ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1533 | !CHAR_MATCHES(enc, ptr, ASCII_SEMI); |
andrewbonney | 0:07919e3d6c56 | 1534 | ptr += MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 1535 | int c = BYTE_TO_ASCII(enc, ptr); |
andrewbonney | 0:07919e3d6c56 | 1536 | switch (c) { |
andrewbonney | 0:07919e3d6c56 | 1537 | case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: |
andrewbonney | 0:07919e3d6c56 | 1538 | case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: |
andrewbonney | 0:07919e3d6c56 | 1539 | result <<= 4; |
andrewbonney | 0:07919e3d6c56 | 1540 | result |= (c - ASCII_0); |
andrewbonney | 0:07919e3d6c56 | 1541 | break; |
andrewbonney | 0:07919e3d6c56 | 1542 | case ASCII_A: case ASCII_B: case ASCII_C: |
andrewbonney | 0:07919e3d6c56 | 1543 | case ASCII_D: case ASCII_E: case ASCII_F: |
andrewbonney | 0:07919e3d6c56 | 1544 | result <<= 4; |
andrewbonney | 0:07919e3d6c56 | 1545 | result += 10 + (c - ASCII_A); |
andrewbonney | 0:07919e3d6c56 | 1546 | break; |
andrewbonney | 0:07919e3d6c56 | 1547 | case ASCII_a: case ASCII_b: case ASCII_c: |
andrewbonney | 0:07919e3d6c56 | 1548 | case ASCII_d: case ASCII_e: case ASCII_f: |
andrewbonney | 0:07919e3d6c56 | 1549 | result <<= 4; |
andrewbonney | 0:07919e3d6c56 | 1550 | result += 10 + (c - ASCII_a); |
andrewbonney | 0:07919e3d6c56 | 1551 | break; |
andrewbonney | 0:07919e3d6c56 | 1552 | } |
andrewbonney | 0:07919e3d6c56 | 1553 | if (result >= 0x110000) |
andrewbonney | 0:07919e3d6c56 | 1554 | return -1; |
andrewbonney | 0:07919e3d6c56 | 1555 | } |
andrewbonney | 0:07919e3d6c56 | 1556 | } |
andrewbonney | 0:07919e3d6c56 | 1557 | else { |
andrewbonney | 0:07919e3d6c56 | 1558 | for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 1559 | int c = BYTE_TO_ASCII(enc, ptr); |
andrewbonney | 0:07919e3d6c56 | 1560 | result *= 10; |
andrewbonney | 0:07919e3d6c56 | 1561 | result += (c - ASCII_0); |
andrewbonney | 0:07919e3d6c56 | 1562 | if (result >= 0x110000) |
andrewbonney | 0:07919e3d6c56 | 1563 | return -1; |
andrewbonney | 0:07919e3d6c56 | 1564 | } |
andrewbonney | 0:07919e3d6c56 | 1565 | } |
andrewbonney | 0:07919e3d6c56 | 1566 | return checkCharRefNumber(result); |
andrewbonney | 0:07919e3d6c56 | 1567 | } |
andrewbonney | 0:07919e3d6c56 | 1568 | |
andrewbonney | 0:07919e3d6c56 | 1569 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1570 | PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 1571 | const char *end) |
andrewbonney | 0:07919e3d6c56 | 1572 | { |
andrewbonney | 0:07919e3d6c56 | 1573 | switch ((end - ptr)/MINBPC(enc)) { |
andrewbonney | 0:07919e3d6c56 | 1574 | case 2: |
andrewbonney | 0:07919e3d6c56 | 1575 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { |
andrewbonney | 0:07919e3d6c56 | 1576 | switch (BYTE_TO_ASCII(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1577 | case ASCII_l: |
andrewbonney | 0:07919e3d6c56 | 1578 | return ASCII_LT; |
andrewbonney | 0:07919e3d6c56 | 1579 | case ASCII_g: |
andrewbonney | 0:07919e3d6c56 | 1580 | return ASCII_GT; |
andrewbonney | 0:07919e3d6c56 | 1581 | } |
andrewbonney | 0:07919e3d6c56 | 1582 | } |
andrewbonney | 0:07919e3d6c56 | 1583 | break; |
andrewbonney | 0:07919e3d6c56 | 1584 | case 3: |
andrewbonney | 0:07919e3d6c56 | 1585 | if (CHAR_MATCHES(enc, ptr, ASCII_a)) { |
andrewbonney | 0:07919e3d6c56 | 1586 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1587 | if (CHAR_MATCHES(enc, ptr, ASCII_m)) { |
andrewbonney | 0:07919e3d6c56 | 1588 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1589 | if (CHAR_MATCHES(enc, ptr, ASCII_p)) |
andrewbonney | 0:07919e3d6c56 | 1590 | return ASCII_AMP; |
andrewbonney | 0:07919e3d6c56 | 1591 | } |
andrewbonney | 0:07919e3d6c56 | 1592 | } |
andrewbonney | 0:07919e3d6c56 | 1593 | break; |
andrewbonney | 0:07919e3d6c56 | 1594 | case 4: |
andrewbonney | 0:07919e3d6c56 | 1595 | switch (BYTE_TO_ASCII(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1596 | case ASCII_q: |
andrewbonney | 0:07919e3d6c56 | 1597 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1598 | if (CHAR_MATCHES(enc, ptr, ASCII_u)) { |
andrewbonney | 0:07919e3d6c56 | 1599 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1600 | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
andrewbonney | 0:07919e3d6c56 | 1601 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1602 | if (CHAR_MATCHES(enc, ptr, ASCII_t)) |
andrewbonney | 0:07919e3d6c56 | 1603 | return ASCII_QUOT; |
andrewbonney | 0:07919e3d6c56 | 1604 | } |
andrewbonney | 0:07919e3d6c56 | 1605 | } |
andrewbonney | 0:07919e3d6c56 | 1606 | break; |
andrewbonney | 0:07919e3d6c56 | 1607 | case ASCII_a: |
andrewbonney | 0:07919e3d6c56 | 1608 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1609 | if (CHAR_MATCHES(enc, ptr, ASCII_p)) { |
andrewbonney | 0:07919e3d6c56 | 1610 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1611 | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
andrewbonney | 0:07919e3d6c56 | 1612 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1613 | if (CHAR_MATCHES(enc, ptr, ASCII_s)) |
andrewbonney | 0:07919e3d6c56 | 1614 | return ASCII_APOS; |
andrewbonney | 0:07919e3d6c56 | 1615 | } |
andrewbonney | 0:07919e3d6c56 | 1616 | } |
andrewbonney | 0:07919e3d6c56 | 1617 | break; |
andrewbonney | 0:07919e3d6c56 | 1618 | } |
andrewbonney | 0:07919e3d6c56 | 1619 | } |
andrewbonney | 0:07919e3d6c56 | 1620 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1621 | } |
andrewbonney | 0:07919e3d6c56 | 1622 | |
andrewbonney | 0:07919e3d6c56 | 1623 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1624 | PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
andrewbonney | 0:07919e3d6c56 | 1625 | { |
andrewbonney | 0:07919e3d6c56 | 1626 | for (;;) { |
andrewbonney | 0:07919e3d6c56 | 1627 | switch (BYTE_TYPE(enc, ptr1)) { |
andrewbonney | 0:07919e3d6c56 | 1628 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 1629 | case BT_LEAD ## n: \ |
andrewbonney | 0:07919e3d6c56 | 1630 | if (*ptr1++ != *ptr2++) \ |
andrewbonney | 0:07919e3d6c56 | 1631 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1632 | LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) |
andrewbonney | 0:07919e3d6c56 | 1633 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 1634 | /* fall through */ |
andrewbonney | 0:07919e3d6c56 | 1635 | if (*ptr1++ != *ptr2++) |
andrewbonney | 0:07919e3d6c56 | 1636 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1637 | break; |
andrewbonney | 0:07919e3d6c56 | 1638 | case BT_NONASCII: |
andrewbonney | 0:07919e3d6c56 | 1639 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 1640 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 1641 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 1642 | #endif |
andrewbonney | 0:07919e3d6c56 | 1643 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 1644 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 1645 | case BT_NAME: |
andrewbonney | 0:07919e3d6c56 | 1646 | case BT_MINUS: |
andrewbonney | 0:07919e3d6c56 | 1647 | if (*ptr2++ != *ptr1++) |
andrewbonney | 0:07919e3d6c56 | 1648 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1649 | if (MINBPC(enc) > 1) { |
andrewbonney | 0:07919e3d6c56 | 1650 | if (*ptr2++ != *ptr1++) |
andrewbonney | 0:07919e3d6c56 | 1651 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1652 | if (MINBPC(enc) > 2) { |
andrewbonney | 0:07919e3d6c56 | 1653 | if (*ptr2++ != *ptr1++) |
andrewbonney | 0:07919e3d6c56 | 1654 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1655 | if (MINBPC(enc) > 3) { |
andrewbonney | 0:07919e3d6c56 | 1656 | if (*ptr2++ != *ptr1++) |
andrewbonney | 0:07919e3d6c56 | 1657 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1658 | } |
andrewbonney | 0:07919e3d6c56 | 1659 | } |
andrewbonney | 0:07919e3d6c56 | 1660 | } |
andrewbonney | 0:07919e3d6c56 | 1661 | break; |
andrewbonney | 0:07919e3d6c56 | 1662 | default: |
andrewbonney | 0:07919e3d6c56 | 1663 | if (MINBPC(enc) == 1 && *ptr1 == *ptr2) |
andrewbonney | 0:07919e3d6c56 | 1664 | return 1; |
andrewbonney | 0:07919e3d6c56 | 1665 | switch (BYTE_TYPE(enc, ptr2)) { |
andrewbonney | 0:07919e3d6c56 | 1666 | case BT_LEAD2: |
andrewbonney | 0:07919e3d6c56 | 1667 | case BT_LEAD3: |
andrewbonney | 0:07919e3d6c56 | 1668 | case BT_LEAD4: |
andrewbonney | 0:07919e3d6c56 | 1669 | case BT_NONASCII: |
andrewbonney | 0:07919e3d6c56 | 1670 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 1671 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 1672 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 1673 | #endif |
andrewbonney | 0:07919e3d6c56 | 1674 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 1675 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 1676 | case BT_NAME: |
andrewbonney | 0:07919e3d6c56 | 1677 | case BT_MINUS: |
andrewbonney | 0:07919e3d6c56 | 1678 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1679 | default: |
andrewbonney | 0:07919e3d6c56 | 1680 | return 1; |
andrewbonney | 0:07919e3d6c56 | 1681 | } |
andrewbonney | 0:07919e3d6c56 | 1682 | } |
andrewbonney | 0:07919e3d6c56 | 1683 | } |
andrewbonney | 0:07919e3d6c56 | 1684 | /* not reached */ |
andrewbonney | 0:07919e3d6c56 | 1685 | } |
andrewbonney | 0:07919e3d6c56 | 1686 | |
andrewbonney | 0:07919e3d6c56 | 1687 | static int PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1688 | PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, |
andrewbonney | 0:07919e3d6c56 | 1689 | const char *end1, const char *ptr2) |
andrewbonney | 0:07919e3d6c56 | 1690 | { |
andrewbonney | 0:07919e3d6c56 | 1691 | for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
andrewbonney | 0:07919e3d6c56 | 1692 | if (ptr1 == end1) |
andrewbonney | 0:07919e3d6c56 | 1693 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1694 | if (!CHAR_MATCHES(enc, ptr1, *ptr2)) |
andrewbonney | 0:07919e3d6c56 | 1695 | return 0; |
andrewbonney | 0:07919e3d6c56 | 1696 | } |
andrewbonney | 0:07919e3d6c56 | 1697 | return ptr1 == end1; |
andrewbonney | 0:07919e3d6c56 | 1698 | } |
andrewbonney | 0:07919e3d6c56 | 1699 | |
andrewbonney | 0:07919e3d6c56 | 1700 | static int PTRFASTCALL |
andrewbonney | 0:07919e3d6c56 | 1701 | PREFIX(nameLength)(const ENCODING *enc, const char *ptr) |
andrewbonney | 0:07919e3d6c56 | 1702 | { |
andrewbonney | 0:07919e3d6c56 | 1703 | const char *start = ptr; |
andrewbonney | 0:07919e3d6c56 | 1704 | for (;;) { |
andrewbonney | 0:07919e3d6c56 | 1705 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1706 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 1707 | case BT_LEAD ## n: ptr += n; break; |
andrewbonney | 0:07919e3d6c56 | 1708 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
andrewbonney | 0:07919e3d6c56 | 1709 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 1710 | case BT_NONASCII: |
andrewbonney | 0:07919e3d6c56 | 1711 | case BT_NMSTRT: |
andrewbonney | 0:07919e3d6c56 | 1712 | #ifdef XML_NS |
andrewbonney | 0:07919e3d6c56 | 1713 | case BT_COLON: |
andrewbonney | 0:07919e3d6c56 | 1714 | #endif |
andrewbonney | 0:07919e3d6c56 | 1715 | case BT_HEX: |
andrewbonney | 0:07919e3d6c56 | 1716 | case BT_DIGIT: |
andrewbonney | 0:07919e3d6c56 | 1717 | case BT_NAME: |
andrewbonney | 0:07919e3d6c56 | 1718 | case BT_MINUS: |
andrewbonney | 0:07919e3d6c56 | 1719 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1720 | break; |
andrewbonney | 0:07919e3d6c56 | 1721 | default: |
andrewbonney | 0:07919e3d6c56 | 1722 | return (int)(ptr - start); |
andrewbonney | 0:07919e3d6c56 | 1723 | } |
andrewbonney | 0:07919e3d6c56 | 1724 | } |
andrewbonney | 0:07919e3d6c56 | 1725 | } |
andrewbonney | 0:07919e3d6c56 | 1726 | |
andrewbonney | 0:07919e3d6c56 | 1727 | static const char * PTRFASTCALL |
andrewbonney | 0:07919e3d6c56 | 1728 | PREFIX(skipS)(const ENCODING *enc, const char *ptr) |
andrewbonney | 0:07919e3d6c56 | 1729 | { |
andrewbonney | 0:07919e3d6c56 | 1730 | for (;;) { |
andrewbonney | 0:07919e3d6c56 | 1731 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1732 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1733 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 1734 | case BT_S: |
andrewbonney | 0:07919e3d6c56 | 1735 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1736 | break; |
andrewbonney | 0:07919e3d6c56 | 1737 | default: |
andrewbonney | 0:07919e3d6c56 | 1738 | return ptr; |
andrewbonney | 0:07919e3d6c56 | 1739 | } |
andrewbonney | 0:07919e3d6c56 | 1740 | } |
andrewbonney | 0:07919e3d6c56 | 1741 | } |
andrewbonney | 0:07919e3d6c56 | 1742 | |
andrewbonney | 0:07919e3d6c56 | 1743 | static void PTRCALL |
andrewbonney | 0:07919e3d6c56 | 1744 | PREFIX(updatePosition)(const ENCODING *enc, |
andrewbonney | 0:07919e3d6c56 | 1745 | const char *ptr, |
andrewbonney | 0:07919e3d6c56 | 1746 | const char *end, |
andrewbonney | 0:07919e3d6c56 | 1747 | POSITION *pos) |
andrewbonney | 0:07919e3d6c56 | 1748 | { |
andrewbonney | 0:07919e3d6c56 | 1749 | while (ptr != end) { |
andrewbonney | 0:07919e3d6c56 | 1750 | switch (BYTE_TYPE(enc, ptr)) { |
andrewbonney | 0:07919e3d6c56 | 1751 | #define LEAD_CASE(n) \ |
andrewbonney | 0:07919e3d6c56 | 1752 | case BT_LEAD ## n: \ |
andrewbonney | 0:07919e3d6c56 | 1753 | ptr += n; \ |
andrewbonney | 0:07919e3d6c56 | 1754 | break; |
andrewbonney | 0:07919e3d6c56 | 1755 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
andrewbonney | 0:07919e3d6c56 | 1756 | #undef LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 1757 | case BT_LF: |
andrewbonney | 0:07919e3d6c56 | 1758 | pos->columnNumber = (XML_Size)-1; |
andrewbonney | 0:07919e3d6c56 | 1759 | pos->lineNumber++; |
andrewbonney | 0:07919e3d6c56 | 1760 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1761 | break; |
andrewbonney | 0:07919e3d6c56 | 1762 | case BT_CR: |
andrewbonney | 0:07919e3d6c56 | 1763 | pos->lineNumber++; |
andrewbonney | 0:07919e3d6c56 | 1764 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1765 | if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) |
andrewbonney | 0:07919e3d6c56 | 1766 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1767 | pos->columnNumber = (XML_Size)-1; |
andrewbonney | 0:07919e3d6c56 | 1768 | break; |
andrewbonney | 0:07919e3d6c56 | 1769 | default: |
andrewbonney | 0:07919e3d6c56 | 1770 | ptr += MINBPC(enc); |
andrewbonney | 0:07919e3d6c56 | 1771 | break; |
andrewbonney | 0:07919e3d6c56 | 1772 | } |
andrewbonney | 0:07919e3d6c56 | 1773 | pos->columnNumber++; |
andrewbonney | 0:07919e3d6c56 | 1774 | } |
andrewbonney | 0:07919e3d6c56 | 1775 | } |
andrewbonney | 0:07919e3d6c56 | 1776 | |
andrewbonney | 0:07919e3d6c56 | 1777 | #undef DO_LEAD_CASE |
andrewbonney | 0:07919e3d6c56 | 1778 | #undef MULTIBYTE_CASES |
andrewbonney | 0:07919e3d6c56 | 1779 | #undef INVALID_CASES |
andrewbonney | 0:07919e3d6c56 | 1780 | #undef CHECK_NAME_CASE |
andrewbonney | 0:07919e3d6c56 | 1781 | #undef CHECK_NAME_CASES |
andrewbonney | 0:07919e3d6c56 | 1782 | #undef CHECK_NMSTRT_CASE |
andrewbonney | 0:07919e3d6c56 | 1783 | #undef CHECK_NMSTRT_CASES |
andrewbonney | 0:07919e3d6c56 | 1784 | |
andrewbonney | 0:07919e3d6c56 | 1785 | #endif /* XML_TOK_IMPL_C */ |