yajl - JSON library working with the compiler. URL: http://lloyd.github.com/yajl/

Dependencies:   mbed

Committer:
rolf
Date:
Wed Nov 18 17:56:51 2009 +0000
Revision:
0:34f4a53d4ca3

        

Who changed what in which revision?

UserRevisionLine numberNew contents of line
rolf 0:34f4a53d4ca3 1 /*
rolf 0:34f4a53d4ca3 2 * Copyright 2007-2009, Lloyd Hilaiel.
rolf 0:34f4a53d4ca3 3 *
rolf 0:34f4a53d4ca3 4 * Redistribution and use in source and binary forms, with or without
rolf 0:34f4a53d4ca3 5 * modification, are permitted provided that the following conditions are
rolf 0:34f4a53d4ca3 6 * met:
rolf 0:34f4a53d4ca3 7 *
rolf 0:34f4a53d4ca3 8 * 1. Redistributions of source code must retain the above copyright
rolf 0:34f4a53d4ca3 9 * notice, this list of conditions and the following disclaimer.
rolf 0:34f4a53d4ca3 10 *
rolf 0:34f4a53d4ca3 11 * 2. Redistributions in binary form must reproduce the above copyright
rolf 0:34f4a53d4ca3 12 * notice, this list of conditions and the following disclaimer in
rolf 0:34f4a53d4ca3 13 * the documentation and/or other materials provided with the
rolf 0:34f4a53d4ca3 14 * distribution.
rolf 0:34f4a53d4ca3 15 *
rolf 0:34f4a53d4ca3 16 * 3. Neither the name of Lloyd Hilaiel nor the names of its
rolf 0:34f4a53d4ca3 17 * contributors may be used to endorse or promote products derived
rolf 0:34f4a53d4ca3 18 * from this software without specific prior written permission.
rolf 0:34f4a53d4ca3 19 *
rolf 0:34f4a53d4ca3 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
rolf 0:34f4a53d4ca3 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
rolf 0:34f4a53d4ca3 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
rolf 0:34f4a53d4ca3 23 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
rolf 0:34f4a53d4ca3 24 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
rolf 0:34f4a53d4ca3 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
rolf 0:34f4a53d4ca3 26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
rolf 0:34f4a53d4ca3 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
rolf 0:34f4a53d4ca3 28 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
rolf 0:34f4a53d4ca3 29 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
rolf 0:34f4a53d4ca3 30 * POSSIBILITY OF SUCH DAMAGE.
rolf 0:34f4a53d4ca3 31 */
rolf 0:34f4a53d4ca3 32 #ifdef __cplusplus
rolf 0:34f4a53d4ca3 33 extern "C" {
rolf 0:34f4a53d4ca3 34 #endif
rolf 0:34f4a53d4ca3 35
rolf 0:34f4a53d4ca3 36
rolf 0:34f4a53d4ca3 37 #include "yajl_lex.h"
rolf 0:34f4a53d4ca3 38 #include "yajl_buf.h"
rolf 0:34f4a53d4ca3 39
rolf 0:34f4a53d4ca3 40 #include <stdlib.h>
rolf 0:34f4a53d4ca3 41 #include <stdio.h>
rolf 0:34f4a53d4ca3 42 #include <assert.h>
rolf 0:34f4a53d4ca3 43 #include <string.h>
rolf 0:34f4a53d4ca3 44
rolf 0:34f4a53d4ca3 45 #ifdef YAJL_LEXER_DEBUG
rolf 0:34f4a53d4ca3 46 static const char *
rolf 0:34f4a53d4ca3 47 tokToStr(yajl_tok tok)
rolf 0:34f4a53d4ca3 48 {
rolf 0:34f4a53d4ca3 49 switch (tok) {
rolf 0:34f4a53d4ca3 50 case yajl_tok_bool: return "bool";
rolf 0:34f4a53d4ca3 51 case yajl_tok_colon: return "colon";
rolf 0:34f4a53d4ca3 52 case yajl_tok_comma: return "comma";
rolf 0:34f4a53d4ca3 53 case yajl_tok_eof: return "eof";
rolf 0:34f4a53d4ca3 54 case yajl_tok_error: return "error";
rolf 0:34f4a53d4ca3 55 case yajl_tok_left_brace: return "brace";
rolf 0:34f4a53d4ca3 56 case yajl_tok_left_bracket: return "bracket";
rolf 0:34f4a53d4ca3 57 case yajl_tok_null: return "null";
rolf 0:34f4a53d4ca3 58 case yajl_tok_integer: return "integer";
rolf 0:34f4a53d4ca3 59 case yajl_tok_double: return "double";
rolf 0:34f4a53d4ca3 60 case yajl_tok_right_brace: return "brace";
rolf 0:34f4a53d4ca3 61 case yajl_tok_right_bracket: return "bracket";
rolf 0:34f4a53d4ca3 62 case yajl_tok_string: return "string";
rolf 0:34f4a53d4ca3 63 case yajl_tok_string_with_escapes: return "string_with_escapes";
rolf 0:34f4a53d4ca3 64 }
rolf 0:34f4a53d4ca3 65 return "unknown";
rolf 0:34f4a53d4ca3 66 }
rolf 0:34f4a53d4ca3 67 #endif
rolf 0:34f4a53d4ca3 68
rolf 0:34f4a53d4ca3 69 /* Impact of the stream parsing feature on the lexer:
rolf 0:34f4a53d4ca3 70 *
rolf 0:34f4a53d4ca3 71 * YAJL support stream parsing. That is, the ability to parse the first
rolf 0:34f4a53d4ca3 72 * bits of a chunk of JSON before the last bits are available (still on
rolf 0:34f4a53d4ca3 73 * the network or disk). This makes the lexer more complex. The
rolf 0:34f4a53d4ca3 74 * responsibility of the lexer is to handle transparently the case where
rolf 0:34f4a53d4ca3 75 * a chunk boundary falls in the middle of a token. This is
rolf 0:34f4a53d4ca3 76 * accomplished is via a buffer and a character reading abstraction.
rolf 0:34f4a53d4ca3 77 *
rolf 0:34f4a53d4ca3 78 * Overview of implementation
rolf 0:34f4a53d4ca3 79 *
rolf 0:34f4a53d4ca3 80 * When we lex to end of input string before end of token is hit, we
rolf 0:34f4a53d4ca3 81 * copy all of the input text composing the token into our lexBuf.
rolf 0:34f4a53d4ca3 82 *
rolf 0:34f4a53d4ca3 83 * Every time we read a character, we do so through the readChar function.
rolf 0:34f4a53d4ca3 84 * readChar's responsibility is to handle pulling all chars from the buffer
rolf 0:34f4a53d4ca3 85 * before pulling chars from input text
rolf 0:34f4a53d4ca3 86 */
rolf 0:34f4a53d4ca3 87
rolf 0:34f4a53d4ca3 88 struct yajl_lexer_t {
rolf 0:34f4a53d4ca3 89 /* the overal line and char offset into the data */
rolf 0:34f4a53d4ca3 90 unsigned int lineOff;
rolf 0:34f4a53d4ca3 91 unsigned int charOff;
rolf 0:34f4a53d4ca3 92
rolf 0:34f4a53d4ca3 93 /* error */
rolf 0:34f4a53d4ca3 94 yajl_lex_error error;
rolf 0:34f4a53d4ca3 95
rolf 0:34f4a53d4ca3 96 /* a input buffer to handle the case where a token is spread over
rolf 0:34f4a53d4ca3 97 * multiple chunks */
rolf 0:34f4a53d4ca3 98 yajl_buf buf;
rolf 0:34f4a53d4ca3 99
rolf 0:34f4a53d4ca3 100 /* in the case where we have data in the lexBuf, bufOff holds
rolf 0:34f4a53d4ca3 101 * the current offset into the lexBuf. */
rolf 0:34f4a53d4ca3 102 unsigned int bufOff;
rolf 0:34f4a53d4ca3 103
rolf 0:34f4a53d4ca3 104 /* are we using the lex buf? */
rolf 0:34f4a53d4ca3 105 unsigned int bufInUse;
rolf 0:34f4a53d4ca3 106
rolf 0:34f4a53d4ca3 107 /* shall we allow comments? */
rolf 0:34f4a53d4ca3 108 unsigned int allowComments;
rolf 0:34f4a53d4ca3 109
rolf 0:34f4a53d4ca3 110 /* shall we validate utf8 inside strings? */
rolf 0:34f4a53d4ca3 111 unsigned int validateUTF8;
rolf 0:34f4a53d4ca3 112
rolf 0:34f4a53d4ca3 113 yajl_alloc_funcs * alloc;
rolf 0:34f4a53d4ca3 114 };
rolf 0:34f4a53d4ca3 115
rolf 0:34f4a53d4ca3 116 static unsigned char
rolf 0:34f4a53d4ca3 117 readChar(yajl_lexer lxr, const unsigned char * txt, unsigned int *off)
rolf 0:34f4a53d4ca3 118 {
rolf 0:34f4a53d4ca3 119 if (lxr->bufInUse && yajl_buf_len(lxr->buf) &&
rolf 0:34f4a53d4ca3 120 lxr->bufOff < yajl_buf_len(lxr->buf))
rolf 0:34f4a53d4ca3 121 {
rolf 0:34f4a53d4ca3 122 return *((const unsigned char *) yajl_buf_data(lxr->buf) +
rolf 0:34f4a53d4ca3 123 (lxr->bufOff)++);
rolf 0:34f4a53d4ca3 124 }
rolf 0:34f4a53d4ca3 125 return txt[(*off)++];
rolf 0:34f4a53d4ca3 126 }
rolf 0:34f4a53d4ca3 127
rolf 0:34f4a53d4ca3 128 static void
rolf 0:34f4a53d4ca3 129 unreadChar(yajl_lexer lxr, unsigned int *off)
rolf 0:34f4a53d4ca3 130 {
rolf 0:34f4a53d4ca3 131 if (*off > 0) (*off)--;
rolf 0:34f4a53d4ca3 132 else (lxr->bufOff)--;
rolf 0:34f4a53d4ca3 133 }
rolf 0:34f4a53d4ca3 134
rolf 0:34f4a53d4ca3 135 yajl_lexer
rolf 0:34f4a53d4ca3 136 yajl_lex_alloc(yajl_alloc_funcs * alloc,
rolf 0:34f4a53d4ca3 137 unsigned int allowComments, unsigned int validateUTF8)
rolf 0:34f4a53d4ca3 138 {
rolf 0:34f4a53d4ca3 139 yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
rolf 0:34f4a53d4ca3 140 memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
rolf 0:34f4a53d4ca3 141 lxr->buf = yajl_buf_alloc(alloc);
rolf 0:34f4a53d4ca3 142 lxr->allowComments = allowComments;
rolf 0:34f4a53d4ca3 143 lxr->validateUTF8 = validateUTF8;
rolf 0:34f4a53d4ca3 144 lxr->alloc = alloc;
rolf 0:34f4a53d4ca3 145 return lxr;
rolf 0:34f4a53d4ca3 146 }
rolf 0:34f4a53d4ca3 147
rolf 0:34f4a53d4ca3 148 void
rolf 0:34f4a53d4ca3 149 yajl_lex_free(yajl_lexer lxr)
rolf 0:34f4a53d4ca3 150 {
rolf 0:34f4a53d4ca3 151 yajl_buf_free(lxr->buf);
rolf 0:34f4a53d4ca3 152 YA_FREE(lxr->alloc, lxr);
rolf 0:34f4a53d4ca3 153 return;
rolf 0:34f4a53d4ca3 154 }
rolf 0:34f4a53d4ca3 155
rolf 0:34f4a53d4ca3 156 /* a lookup table which lets us quickly determine three things:
rolf 0:34f4a53d4ca3 157 * VEC - valid escaped conrol char
rolf 0:34f4a53d4ca3 158 * IJC - invalid json char
rolf 0:34f4a53d4ca3 159 * VHC - valid hex char
rolf 0:34f4a53d4ca3 160 * note. the solidus '/' may be escaped or not.
rolf 0:34f4a53d4ca3 161 * note. the
rolf 0:34f4a53d4ca3 162 */
rolf 0:34f4a53d4ca3 163 #define VEC 1
rolf 0:34f4a53d4ca3 164 #define IJC 2
rolf 0:34f4a53d4ca3 165 #define VHC 4
rolf 0:34f4a53d4ca3 166 static const char charLookupTable[256] =
rolf 0:34f4a53d4ca3 167 {
rolf 0:34f4a53d4ca3 168 /*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
rolf 0:34f4a53d4ca3 169 /*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
rolf 0:34f4a53d4ca3 170 /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
rolf 0:34f4a53d4ca3 171 /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
rolf 0:34f4a53d4ca3 172
rolf 0:34f4a53d4ca3 173 /*20*/ 0 , 0 , VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 174 /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
rolf 0:34f4a53d4ca3 175 /*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
rolf 0:34f4a53d4ca3 176 /*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 177
rolf 0:34f4a53d4ca3 178 /*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
rolf 0:34f4a53d4ca3 179 /*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 180 /*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 181 /*58*/ 0 , 0 , 0 , 0 , VEC|IJC, 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 182
rolf 0:34f4a53d4ca3 183 /*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
rolf 0:34f4a53d4ca3 184 /*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
rolf 0:34f4a53d4ca3 185 /*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 186 /*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 187
rolf 0:34f4a53d4ca3 188 /* include these so we don't have to always check the range of the char */
rolf 0:34f4a53d4ca3 189 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 190 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 191 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 192 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 193
rolf 0:34f4a53d4ca3 194 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 195 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 196 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 197 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 198
rolf 0:34f4a53d4ca3 199 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 200 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 201 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 202 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 203
rolf 0:34f4a53d4ca3 204 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 205 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 206 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
rolf 0:34f4a53d4ca3 207 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
rolf 0:34f4a53d4ca3 208 };
rolf 0:34f4a53d4ca3 209
rolf 0:34f4a53d4ca3 210 /** process a variable length utf8 encoded codepoint.
rolf 0:34f4a53d4ca3 211 *
rolf 0:34f4a53d4ca3 212 * returns:
rolf 0:34f4a53d4ca3 213 * yajl_tok_string - if valid utf8 char was parsed and offset was
rolf 0:34f4a53d4ca3 214 * advanced
rolf 0:34f4a53d4ca3 215 * yajl_tok_eof - if end of input was hit before validation could
rolf 0:34f4a53d4ca3 216 * complete
rolf 0:34f4a53d4ca3 217 * yajl_tok_error - if invalid utf8 was encountered
rolf 0:34f4a53d4ca3 218 *
rolf 0:34f4a53d4ca3 219 * NOTE: on error the offset will point to the first char of the
rolf 0:34f4a53d4ca3 220 * invalid utf8 */
rolf 0:34f4a53d4ca3 221 #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
rolf 0:34f4a53d4ca3 222
rolf 0:34f4a53d4ca3 223 static yajl_tok
rolf 0:34f4a53d4ca3 224 yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
rolf 0:34f4a53d4ca3 225 unsigned int jsonTextLen, unsigned int * offset,
rolf 0:34f4a53d4ca3 226 unsigned char curChar)
rolf 0:34f4a53d4ca3 227 {
rolf 0:34f4a53d4ca3 228 if (curChar <= 0x7f) {
rolf 0:34f4a53d4ca3 229 /* single byte */
rolf 0:34f4a53d4ca3 230 return yajl_tok_string;
rolf 0:34f4a53d4ca3 231 } else if ((curChar >> 5) == 0x6) {
rolf 0:34f4a53d4ca3 232 /* two byte */
rolf 0:34f4a53d4ca3 233 UTF8_CHECK_EOF;
rolf 0:34f4a53d4ca3 234 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 235 if ((curChar >> 6) == 0x2) return yajl_tok_string;
rolf 0:34f4a53d4ca3 236 } else if ((curChar >> 4) == 0x0e) {
rolf 0:34f4a53d4ca3 237 /* three byte */
rolf 0:34f4a53d4ca3 238 UTF8_CHECK_EOF;
rolf 0:34f4a53d4ca3 239 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 240 if ((curChar >> 6) == 0x2) {
rolf 0:34f4a53d4ca3 241 UTF8_CHECK_EOF;
rolf 0:34f4a53d4ca3 242 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 243 if ((curChar >> 6) == 0x2) return yajl_tok_string;
rolf 0:34f4a53d4ca3 244 }
rolf 0:34f4a53d4ca3 245 } else if ((curChar >> 3) == 0x1e) {
rolf 0:34f4a53d4ca3 246 /* four byte */
rolf 0:34f4a53d4ca3 247 UTF8_CHECK_EOF;
rolf 0:34f4a53d4ca3 248 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 249 if ((curChar >> 6) == 0x2) {
rolf 0:34f4a53d4ca3 250 UTF8_CHECK_EOF;
rolf 0:34f4a53d4ca3 251 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 252 if ((curChar >> 6) == 0x2) {
rolf 0:34f4a53d4ca3 253 UTF8_CHECK_EOF;
rolf 0:34f4a53d4ca3 254 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 255 if ((curChar >> 6) == 0x2) return yajl_tok_string;
rolf 0:34f4a53d4ca3 256 }
rolf 0:34f4a53d4ca3 257 }
rolf 0:34f4a53d4ca3 258 }
rolf 0:34f4a53d4ca3 259
rolf 0:34f4a53d4ca3 260 return yajl_tok_error;
rolf 0:34f4a53d4ca3 261 }
rolf 0:34f4a53d4ca3 262
rolf 0:34f4a53d4ca3 263 /* lex a string. input is the lexer, pointer to beginning of
rolf 0:34f4a53d4ca3 264 * json text, and start of string (offset).
rolf 0:34f4a53d4ca3 265 * a token is returned which has the following meanings:
rolf 0:34f4a53d4ca3 266 * yajl_tok_string: lex of string was successful. offset points to
rolf 0:34f4a53d4ca3 267 * terminating '"'.
rolf 0:34f4a53d4ca3 268 * yajl_tok_eof: end of text was encountered before we could complete
rolf 0:34f4a53d4ca3 269 * the lex.
rolf 0:34f4a53d4ca3 270 * yajl_tok_error: embedded in the string were unallowable chars. offset
rolf 0:34f4a53d4ca3 271 * points to the offending char
rolf 0:34f4a53d4ca3 272 */
rolf 0:34f4a53d4ca3 273 #define STR_CHECK_EOF \
rolf 0:34f4a53d4ca3 274 if (*offset >= jsonTextLen) { \
rolf 0:34f4a53d4ca3 275 tok = yajl_tok_eof; \
rolf 0:34f4a53d4ca3 276 goto finish_string_lex; \
rolf 0:34f4a53d4ca3 277 }
rolf 0:34f4a53d4ca3 278
rolf 0:34f4a53d4ca3 279 static yajl_tok
rolf 0:34f4a53d4ca3 280 yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
rolf 0:34f4a53d4ca3 281 unsigned int jsonTextLen, unsigned int * offset)
rolf 0:34f4a53d4ca3 282 {
rolf 0:34f4a53d4ca3 283 yajl_tok tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 284 int hasEscapes = 0;
rolf 0:34f4a53d4ca3 285
rolf 0:34f4a53d4ca3 286 for (;;) {
rolf 0:34f4a53d4ca3 287 unsigned char curChar;
rolf 0:34f4a53d4ca3 288
rolf 0:34f4a53d4ca3 289 STR_CHECK_EOF;
rolf 0:34f4a53d4ca3 290
rolf 0:34f4a53d4ca3 291 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 292
rolf 0:34f4a53d4ca3 293 /* quote terminates */
rolf 0:34f4a53d4ca3 294 if (curChar == '"') {
rolf 0:34f4a53d4ca3 295 tok = yajl_tok_string;
rolf 0:34f4a53d4ca3 296 break;
rolf 0:34f4a53d4ca3 297 }
rolf 0:34f4a53d4ca3 298 /* backslash escapes a set of control chars, */
rolf 0:34f4a53d4ca3 299 else if (curChar == '\\') {
rolf 0:34f4a53d4ca3 300 hasEscapes = 1;
rolf 0:34f4a53d4ca3 301 STR_CHECK_EOF;
rolf 0:34f4a53d4ca3 302
rolf 0:34f4a53d4ca3 303 /* special case \u */
rolf 0:34f4a53d4ca3 304 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 305 if (curChar == 'u') {
rolf 0:34f4a53d4ca3 306 unsigned int i = 0;
rolf 0:34f4a53d4ca3 307
rolf 0:34f4a53d4ca3 308 for (i=0;i<4;i++) {
rolf 0:34f4a53d4ca3 309 STR_CHECK_EOF;
rolf 0:34f4a53d4ca3 310 curChar = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 311 if (!(charLookupTable[curChar] & VHC)) {
rolf 0:34f4a53d4ca3 312 /* back up to offending char */
rolf 0:34f4a53d4ca3 313 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 314 lexer->error = yajl_lex_string_invalid_hex_char;
rolf 0:34f4a53d4ca3 315 goto finish_string_lex;
rolf 0:34f4a53d4ca3 316 }
rolf 0:34f4a53d4ca3 317 }
rolf 0:34f4a53d4ca3 318 } else if (!(charLookupTable[curChar] & VEC)) {
rolf 0:34f4a53d4ca3 319 /* back up to offending char */
rolf 0:34f4a53d4ca3 320 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 321 lexer->error = yajl_lex_string_invalid_escaped_char;
rolf 0:34f4a53d4ca3 322 goto finish_string_lex;
rolf 0:34f4a53d4ca3 323 }
rolf 0:34f4a53d4ca3 324 }
rolf 0:34f4a53d4ca3 325 /* when not validating UTF8 it's a simple table lookup to determine
rolf 0:34f4a53d4ca3 326 * if the present character is invalid */
rolf 0:34f4a53d4ca3 327 else if(charLookupTable[curChar] & IJC) {
rolf 0:34f4a53d4ca3 328 /* back up to offending char */
rolf 0:34f4a53d4ca3 329 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 330 lexer->error = yajl_lex_string_invalid_json_char;
rolf 0:34f4a53d4ca3 331 goto finish_string_lex;
rolf 0:34f4a53d4ca3 332 }
rolf 0:34f4a53d4ca3 333 /* when in validate UTF8 mode we need to do some extra work */
rolf 0:34f4a53d4ca3 334 else if (lexer->validateUTF8) {
rolf 0:34f4a53d4ca3 335 yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
rolf 0:34f4a53d4ca3 336 offset, curChar);
rolf 0:34f4a53d4ca3 337
rolf 0:34f4a53d4ca3 338 if (t == yajl_tok_eof) {
rolf 0:34f4a53d4ca3 339 tok = yajl_tok_eof;
rolf 0:34f4a53d4ca3 340 goto finish_string_lex;
rolf 0:34f4a53d4ca3 341 } else if (t == yajl_tok_error) {
rolf 0:34f4a53d4ca3 342 lexer->error = yajl_lex_string_invalid_utf8;
rolf 0:34f4a53d4ca3 343 goto finish_string_lex;
rolf 0:34f4a53d4ca3 344 }
rolf 0:34f4a53d4ca3 345 }
rolf 0:34f4a53d4ca3 346 /* accept it, and move on */
rolf 0:34f4a53d4ca3 347 }
rolf 0:34f4a53d4ca3 348 finish_string_lex:
rolf 0:34f4a53d4ca3 349 /* tell our buddy, the parser, wether he needs to process this string
rolf 0:34f4a53d4ca3 350 * again */
rolf 0:34f4a53d4ca3 351 if (hasEscapes && tok == yajl_tok_string) {
rolf 0:34f4a53d4ca3 352 tok = yajl_tok_string_with_escapes;
rolf 0:34f4a53d4ca3 353 }
rolf 0:34f4a53d4ca3 354
rolf 0:34f4a53d4ca3 355 return tok;
rolf 0:34f4a53d4ca3 356 }
rolf 0:34f4a53d4ca3 357
rolf 0:34f4a53d4ca3 358 #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
rolf 0:34f4a53d4ca3 359
rolf 0:34f4a53d4ca3 360 static yajl_tok
rolf 0:34f4a53d4ca3 361 yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
rolf 0:34f4a53d4ca3 362 unsigned int jsonTextLen, unsigned int * offset)
rolf 0:34f4a53d4ca3 363 {
rolf 0:34f4a53d4ca3 364 /** XXX: numbers are the only entities in json that we must lex
rolf 0:34f4a53d4ca3 365 * _beyond_ in order to know that they are complete. There
rolf 0:34f4a53d4ca3 366 * is an ambiguous case for integers at EOF. */
rolf 0:34f4a53d4ca3 367
rolf 0:34f4a53d4ca3 368 unsigned char c;
rolf 0:34f4a53d4ca3 369
rolf 0:34f4a53d4ca3 370 yajl_tok tok = yajl_tok_integer;
rolf 0:34f4a53d4ca3 371
rolf 0:34f4a53d4ca3 372 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 373 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 374
rolf 0:34f4a53d4ca3 375 /* optional leading minus */
rolf 0:34f4a53d4ca3 376 if (c == '-') {
rolf 0:34f4a53d4ca3 377 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 378 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 379 }
rolf 0:34f4a53d4ca3 380
rolf 0:34f4a53d4ca3 381 /* a single zero, or a series of integers */
rolf 0:34f4a53d4ca3 382 if (c == '0') {
rolf 0:34f4a53d4ca3 383 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 384 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 385 } else if (c >= '1' && c <= '9') {
rolf 0:34f4a53d4ca3 386 do {
rolf 0:34f4a53d4ca3 387 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 388 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 389 } while (c >= '0' && c <= '9');
rolf 0:34f4a53d4ca3 390 } else {
rolf 0:34f4a53d4ca3 391 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 392 lexer->error = yajl_lex_missing_integer_after_minus;
rolf 0:34f4a53d4ca3 393 return yajl_tok_error;
rolf 0:34f4a53d4ca3 394 }
rolf 0:34f4a53d4ca3 395
rolf 0:34f4a53d4ca3 396 /* optional fraction (indicates this is floating point) */
rolf 0:34f4a53d4ca3 397 if (c == '.') {
rolf 0:34f4a53d4ca3 398 int numRd = 0;
rolf 0:34f4a53d4ca3 399
rolf 0:34f4a53d4ca3 400 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 401 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 402
rolf 0:34f4a53d4ca3 403 while (c >= '0' && c <= '9') {
rolf 0:34f4a53d4ca3 404 numRd++;
rolf 0:34f4a53d4ca3 405 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 406 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 407 }
rolf 0:34f4a53d4ca3 408
rolf 0:34f4a53d4ca3 409 if (!numRd) {
rolf 0:34f4a53d4ca3 410 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 411 lexer->error = yajl_lex_missing_integer_after_decimal;
rolf 0:34f4a53d4ca3 412 return yajl_tok_error;
rolf 0:34f4a53d4ca3 413 }
rolf 0:34f4a53d4ca3 414 tok = yajl_tok_double;
rolf 0:34f4a53d4ca3 415 }
rolf 0:34f4a53d4ca3 416
rolf 0:34f4a53d4ca3 417 /* optional exponent (indicates this is floating point) */
rolf 0:34f4a53d4ca3 418 if (c == 'e' || c == 'E') {
rolf 0:34f4a53d4ca3 419 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 420 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 421
rolf 0:34f4a53d4ca3 422 /* optional sign */
rolf 0:34f4a53d4ca3 423 if (c == '+' || c == '-') {
rolf 0:34f4a53d4ca3 424 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 425 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 426 }
rolf 0:34f4a53d4ca3 427
rolf 0:34f4a53d4ca3 428 if (c >= '0' && c <= '9') {
rolf 0:34f4a53d4ca3 429 do {
rolf 0:34f4a53d4ca3 430 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 431 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 432 } while (c >= '0' && c <= '9');
rolf 0:34f4a53d4ca3 433 } else {
rolf 0:34f4a53d4ca3 434 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 435 lexer->error = yajl_lex_missing_integer_after_exponent;
rolf 0:34f4a53d4ca3 436 return yajl_tok_error;
rolf 0:34f4a53d4ca3 437 }
rolf 0:34f4a53d4ca3 438 tok = yajl_tok_double;
rolf 0:34f4a53d4ca3 439 }
rolf 0:34f4a53d4ca3 440
rolf 0:34f4a53d4ca3 441 /* we always go "one too far" */
rolf 0:34f4a53d4ca3 442 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 443
rolf 0:34f4a53d4ca3 444 return tok;
rolf 0:34f4a53d4ca3 445 }
rolf 0:34f4a53d4ca3 446
rolf 0:34f4a53d4ca3 447 static yajl_tok
rolf 0:34f4a53d4ca3 448 yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
rolf 0:34f4a53d4ca3 449 unsigned int jsonTextLen, unsigned int * offset)
rolf 0:34f4a53d4ca3 450 {
rolf 0:34f4a53d4ca3 451 unsigned char c;
rolf 0:34f4a53d4ca3 452
rolf 0:34f4a53d4ca3 453 yajl_tok tok = yajl_tok_comment;
rolf 0:34f4a53d4ca3 454
rolf 0:34f4a53d4ca3 455 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 456 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 457
rolf 0:34f4a53d4ca3 458 /* either slash or star expected */
rolf 0:34f4a53d4ca3 459 if (c == '/') {
rolf 0:34f4a53d4ca3 460 /* now we throw away until end of line */
rolf 0:34f4a53d4ca3 461 do {
rolf 0:34f4a53d4ca3 462 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 463 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 464 } while (c != '\n');
rolf 0:34f4a53d4ca3 465 } else if (c == '*') {
rolf 0:34f4a53d4ca3 466 /* now we throw away until end of comment */
rolf 0:34f4a53d4ca3 467 for (;;) {
rolf 0:34f4a53d4ca3 468 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 469 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 470 if (c == '*') {
rolf 0:34f4a53d4ca3 471 RETURN_IF_EOF;
rolf 0:34f4a53d4ca3 472 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 473 if (c == '/') {
rolf 0:34f4a53d4ca3 474 break;
rolf 0:34f4a53d4ca3 475 } else {
rolf 0:34f4a53d4ca3 476 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 477 }
rolf 0:34f4a53d4ca3 478 }
rolf 0:34f4a53d4ca3 479 }
rolf 0:34f4a53d4ca3 480 } else {
rolf 0:34f4a53d4ca3 481 lexer->error = yajl_lex_invalid_char;
rolf 0:34f4a53d4ca3 482 tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 483 }
rolf 0:34f4a53d4ca3 484
rolf 0:34f4a53d4ca3 485 return tok;
rolf 0:34f4a53d4ca3 486 }
rolf 0:34f4a53d4ca3 487
rolf 0:34f4a53d4ca3 488 yajl_tok
rolf 0:34f4a53d4ca3 489 yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
rolf 0:34f4a53d4ca3 490 unsigned int jsonTextLen, unsigned int * offset,
rolf 0:34f4a53d4ca3 491 const unsigned char ** outBuf, unsigned int * outLen)
rolf 0:34f4a53d4ca3 492 {
rolf 0:34f4a53d4ca3 493 yajl_tok tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 494 unsigned char c;
rolf 0:34f4a53d4ca3 495 unsigned int startOffset = *offset;
rolf 0:34f4a53d4ca3 496
rolf 0:34f4a53d4ca3 497 *outBuf = NULL;
rolf 0:34f4a53d4ca3 498 *outLen = 0;
rolf 0:34f4a53d4ca3 499
rolf 0:34f4a53d4ca3 500 for (;;) {
rolf 0:34f4a53d4ca3 501 assert(*offset <= jsonTextLen);
rolf 0:34f4a53d4ca3 502
rolf 0:34f4a53d4ca3 503 if (*offset >= jsonTextLen) {
rolf 0:34f4a53d4ca3 504 tok = yajl_tok_eof;
rolf 0:34f4a53d4ca3 505 goto lexed;
rolf 0:34f4a53d4ca3 506 }
rolf 0:34f4a53d4ca3 507
rolf 0:34f4a53d4ca3 508 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 509
rolf 0:34f4a53d4ca3 510 switch (c) {
rolf 0:34f4a53d4ca3 511 case '{':
rolf 0:34f4a53d4ca3 512 tok = yajl_tok_left_bracket;
rolf 0:34f4a53d4ca3 513 goto lexed;
rolf 0:34f4a53d4ca3 514 case '}':
rolf 0:34f4a53d4ca3 515 tok = yajl_tok_right_bracket;
rolf 0:34f4a53d4ca3 516 goto lexed;
rolf 0:34f4a53d4ca3 517 case '[':
rolf 0:34f4a53d4ca3 518 tok = yajl_tok_left_brace;
rolf 0:34f4a53d4ca3 519 goto lexed;
rolf 0:34f4a53d4ca3 520 case ']':
rolf 0:34f4a53d4ca3 521 tok = yajl_tok_right_brace;
rolf 0:34f4a53d4ca3 522 goto lexed;
rolf 0:34f4a53d4ca3 523 case ',':
rolf 0:34f4a53d4ca3 524 tok = yajl_tok_comma;
rolf 0:34f4a53d4ca3 525 goto lexed;
rolf 0:34f4a53d4ca3 526 case ':':
rolf 0:34f4a53d4ca3 527 tok = yajl_tok_colon;
rolf 0:34f4a53d4ca3 528 goto lexed;
rolf 0:34f4a53d4ca3 529 case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
rolf 0:34f4a53d4ca3 530 startOffset++;
rolf 0:34f4a53d4ca3 531 break;
rolf 0:34f4a53d4ca3 532 case 't': {
rolf 0:34f4a53d4ca3 533 const char * want = "rue";
rolf 0:34f4a53d4ca3 534 do {
rolf 0:34f4a53d4ca3 535 if (*offset >= jsonTextLen) {
rolf 0:34f4a53d4ca3 536 tok = yajl_tok_eof;
rolf 0:34f4a53d4ca3 537 goto lexed;
rolf 0:34f4a53d4ca3 538 }
rolf 0:34f4a53d4ca3 539 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 540 if (c != *want) {
rolf 0:34f4a53d4ca3 541 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 542 lexer->error = yajl_lex_invalid_string;
rolf 0:34f4a53d4ca3 543 tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 544 goto lexed;
rolf 0:34f4a53d4ca3 545 }
rolf 0:34f4a53d4ca3 546 } while (*(++want));
rolf 0:34f4a53d4ca3 547 tok = yajl_tok_bool;
rolf 0:34f4a53d4ca3 548 goto lexed;
rolf 0:34f4a53d4ca3 549 }
rolf 0:34f4a53d4ca3 550 case 'f': {
rolf 0:34f4a53d4ca3 551 const char * want = "alse";
rolf 0:34f4a53d4ca3 552 do {
rolf 0:34f4a53d4ca3 553 if (*offset >= jsonTextLen) {
rolf 0:34f4a53d4ca3 554 tok = yajl_tok_eof;
rolf 0:34f4a53d4ca3 555 goto lexed;
rolf 0:34f4a53d4ca3 556 }
rolf 0:34f4a53d4ca3 557 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 558 if (c != *want) {
rolf 0:34f4a53d4ca3 559 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 560 lexer->error = yajl_lex_invalid_string;
rolf 0:34f4a53d4ca3 561 tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 562 goto lexed;
rolf 0:34f4a53d4ca3 563 }
rolf 0:34f4a53d4ca3 564 } while (*(++want));
rolf 0:34f4a53d4ca3 565 tok = yajl_tok_bool;
rolf 0:34f4a53d4ca3 566 goto lexed;
rolf 0:34f4a53d4ca3 567 }
rolf 0:34f4a53d4ca3 568 case 'n': {
rolf 0:34f4a53d4ca3 569 const char * want = "ull";
rolf 0:34f4a53d4ca3 570 do {
rolf 0:34f4a53d4ca3 571 if (*offset >= jsonTextLen) {
rolf 0:34f4a53d4ca3 572 tok = yajl_tok_eof;
rolf 0:34f4a53d4ca3 573 goto lexed;
rolf 0:34f4a53d4ca3 574 }
rolf 0:34f4a53d4ca3 575 c = readChar(lexer, jsonText, offset);
rolf 0:34f4a53d4ca3 576 if (c != *want) {
rolf 0:34f4a53d4ca3 577 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 578 lexer->error = yajl_lex_invalid_string;
rolf 0:34f4a53d4ca3 579 tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 580 goto lexed;
rolf 0:34f4a53d4ca3 581 }
rolf 0:34f4a53d4ca3 582 } while (*(++want));
rolf 0:34f4a53d4ca3 583 tok = yajl_tok_null;
rolf 0:34f4a53d4ca3 584 goto lexed;
rolf 0:34f4a53d4ca3 585 }
rolf 0:34f4a53d4ca3 586 case '"': {
rolf 0:34f4a53d4ca3 587 tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
rolf 0:34f4a53d4ca3 588 jsonTextLen, offset);
rolf 0:34f4a53d4ca3 589 goto lexed;
rolf 0:34f4a53d4ca3 590 }
rolf 0:34f4a53d4ca3 591 case '-':
rolf 0:34f4a53d4ca3 592 case '0': case '1': case '2': case '3': case '4':
rolf 0:34f4a53d4ca3 593 case '5': case '6': case '7': case '8': case '9': {
rolf 0:34f4a53d4ca3 594 /* integer parsing wants to start from the beginning */
rolf 0:34f4a53d4ca3 595 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 596 tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
rolf 0:34f4a53d4ca3 597 jsonTextLen, offset);
rolf 0:34f4a53d4ca3 598 goto lexed;
rolf 0:34f4a53d4ca3 599 }
rolf 0:34f4a53d4ca3 600 case '/':
rolf 0:34f4a53d4ca3 601 /* hey, look, a probable comment! If comments are disabled
rolf 0:34f4a53d4ca3 602 * it's an error. */
rolf 0:34f4a53d4ca3 603 if (!lexer->allowComments) {
rolf 0:34f4a53d4ca3 604 unreadChar(lexer, offset);
rolf 0:34f4a53d4ca3 605 lexer->error = yajl_lex_unallowed_comment;
rolf 0:34f4a53d4ca3 606 tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 607 goto lexed;
rolf 0:34f4a53d4ca3 608 }
rolf 0:34f4a53d4ca3 609 /* if comments are enabled, then we should try to lex
rolf 0:34f4a53d4ca3 610 * the thing. possible outcomes are
rolf 0:34f4a53d4ca3 611 * - successful lex (tok_comment, which means continue),
rolf 0:34f4a53d4ca3 612 * - malformed comment opening (slash not followed by
rolf 0:34f4a53d4ca3 613 * '*' or '/') (tok_error)
rolf 0:34f4a53d4ca3 614 * - eof hit. (tok_eof) */
rolf 0:34f4a53d4ca3 615 tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
rolf 0:34f4a53d4ca3 616 jsonTextLen, offset);
rolf 0:34f4a53d4ca3 617 if (tok == yajl_tok_comment) {
rolf 0:34f4a53d4ca3 618 /* "error" is silly, but that's the initial
rolf 0:34f4a53d4ca3 619 * state of tok. guilty until proven innocent. */
rolf 0:34f4a53d4ca3 620 tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 621 yajl_buf_clear(lexer->buf);
rolf 0:34f4a53d4ca3 622 lexer->bufInUse = 0;
rolf 0:34f4a53d4ca3 623 startOffset = *offset;
rolf 0:34f4a53d4ca3 624 break;
rolf 0:34f4a53d4ca3 625 }
rolf 0:34f4a53d4ca3 626 /* hit error or eof, bail */
rolf 0:34f4a53d4ca3 627 goto lexed;
rolf 0:34f4a53d4ca3 628 default:
rolf 0:34f4a53d4ca3 629 lexer->error = yajl_lex_invalid_char;
rolf 0:34f4a53d4ca3 630 tok = yajl_tok_error;
rolf 0:34f4a53d4ca3 631 goto lexed;
rolf 0:34f4a53d4ca3 632 }
rolf 0:34f4a53d4ca3 633 }
rolf 0:34f4a53d4ca3 634
rolf 0:34f4a53d4ca3 635
rolf 0:34f4a53d4ca3 636 lexed:
rolf 0:34f4a53d4ca3 637 /* need to append to buffer if the buffer is in use or
rolf 0:34f4a53d4ca3 638 * if it's an EOF token */
rolf 0:34f4a53d4ca3 639 if (tok == yajl_tok_eof || lexer->bufInUse) {
rolf 0:34f4a53d4ca3 640 if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
rolf 0:34f4a53d4ca3 641 lexer->bufInUse = 1;
rolf 0:34f4a53d4ca3 642 yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
rolf 0:34f4a53d4ca3 643 lexer->bufOff = 0;
rolf 0:34f4a53d4ca3 644
rolf 0:34f4a53d4ca3 645 if (tok != yajl_tok_eof) {
rolf 0:34f4a53d4ca3 646 *outBuf = yajl_buf_data(lexer->buf);
rolf 0:34f4a53d4ca3 647 *outLen = yajl_buf_len(lexer->buf);
rolf 0:34f4a53d4ca3 648 lexer->bufInUse = 0;
rolf 0:34f4a53d4ca3 649 }
rolf 0:34f4a53d4ca3 650 } else if (tok != yajl_tok_error) {
rolf 0:34f4a53d4ca3 651 *outBuf = jsonText + startOffset;
rolf 0:34f4a53d4ca3 652 *outLen = *offset - startOffset;
rolf 0:34f4a53d4ca3 653 }
rolf 0:34f4a53d4ca3 654
rolf 0:34f4a53d4ca3 655 /* special case for strings. skip the quotes. */
rolf 0:34f4a53d4ca3 656 if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
rolf 0:34f4a53d4ca3 657 {
rolf 0:34f4a53d4ca3 658 assert(*outLen >= 2);
rolf 0:34f4a53d4ca3 659 (*outBuf)++;
rolf 0:34f4a53d4ca3 660 *outLen -= 2;
rolf 0:34f4a53d4ca3 661 }
rolf 0:34f4a53d4ca3 662
rolf 0:34f4a53d4ca3 663
rolf 0:34f4a53d4ca3 664 #ifdef YAJL_LEXER_DEBUG
rolf 0:34f4a53d4ca3 665 if (tok == yajl_tok_error) {
rolf 0:34f4a53d4ca3 666 printf("lexical error: %s\n",
rolf 0:34f4a53d4ca3 667 yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
rolf 0:34f4a53d4ca3 668 } else if (tok == yajl_tok_eof) {
rolf 0:34f4a53d4ca3 669 printf("EOF hit\n");
rolf 0:34f4a53d4ca3 670 } else {
rolf 0:34f4a53d4ca3 671 printf("lexed %s: '", tokToStr(tok));
rolf 0:34f4a53d4ca3 672 fwrite(*outBuf, 1, *outLen, stdout);
rolf 0:34f4a53d4ca3 673 printf("'\n");
rolf 0:34f4a53d4ca3 674 }
rolf 0:34f4a53d4ca3 675 #endif
rolf 0:34f4a53d4ca3 676
rolf 0:34f4a53d4ca3 677 return tok;
rolf 0:34f4a53d4ca3 678 }
rolf 0:34f4a53d4ca3 679
rolf 0:34f4a53d4ca3 680 const char *
rolf 0:34f4a53d4ca3 681 yajl_lex_error_to_string(yajl_lex_error error)
rolf 0:34f4a53d4ca3 682 {
rolf 0:34f4a53d4ca3 683 switch (error) {
rolf 0:34f4a53d4ca3 684 case yajl_lex_e_ok:
rolf 0:34f4a53d4ca3 685 return "ok, no error";
rolf 0:34f4a53d4ca3 686 case yajl_lex_string_invalid_utf8:
rolf 0:34f4a53d4ca3 687 return "invalid bytes in UTF8 string.";
rolf 0:34f4a53d4ca3 688 case yajl_lex_string_invalid_escaped_char:
rolf 0:34f4a53d4ca3 689 return "inside a string, '\\' occurs before a character "
rolf 0:34f4a53d4ca3 690 "which it may not.";
rolf 0:34f4a53d4ca3 691 case yajl_lex_string_invalid_json_char:
rolf 0:34f4a53d4ca3 692 return "invalid character inside string.";
rolf 0:34f4a53d4ca3 693 case yajl_lex_string_invalid_hex_char:
rolf 0:34f4a53d4ca3 694 return "invalid (non-hex) character occurs after '\\u' inside "
rolf 0:34f4a53d4ca3 695 "string.";
rolf 0:34f4a53d4ca3 696 case yajl_lex_invalid_char:
rolf 0:34f4a53d4ca3 697 return "invalid char in json text.";
rolf 0:34f4a53d4ca3 698 case yajl_lex_invalid_string:
rolf 0:34f4a53d4ca3 699 return "invalid string in json text.";
rolf 0:34f4a53d4ca3 700 case yajl_lex_missing_integer_after_exponent:
rolf 0:34f4a53d4ca3 701 return "malformed number, a digit is required after the exponent.";
rolf 0:34f4a53d4ca3 702 case yajl_lex_missing_integer_after_decimal:
rolf 0:34f4a53d4ca3 703 return "malformed number, a digit is required after the "
rolf 0:34f4a53d4ca3 704 "decimal point.";
rolf 0:34f4a53d4ca3 705 case yajl_lex_missing_integer_after_minus:
rolf 0:34f4a53d4ca3 706 return "malformed number, a digit is required after the "
rolf 0:34f4a53d4ca3 707 "minus sign.";
rolf 0:34f4a53d4ca3 708 case yajl_lex_unallowed_comment:
rolf 0:34f4a53d4ca3 709 return "probable comment found in input text, comments are "
rolf 0:34f4a53d4ca3 710 "not enabled.";
rolf 0:34f4a53d4ca3 711 }
rolf 0:34f4a53d4ca3 712 return "unknown error code";
rolf 0:34f4a53d4ca3 713 }
rolf 0:34f4a53d4ca3 714
rolf 0:34f4a53d4ca3 715
rolf 0:34f4a53d4ca3 716 /** allows access to more specific information about the lexical
rolf 0:34f4a53d4ca3 717 * error when yajl_lex_lex returns yajl_tok_error. */
rolf 0:34f4a53d4ca3 718 yajl_lex_error
rolf 0:34f4a53d4ca3 719 yajl_lex_get_error(yajl_lexer lexer)
rolf 0:34f4a53d4ca3 720 {
rolf 0:34f4a53d4ca3 721 if (lexer == NULL) return (yajl_lex_error) -1;
rolf 0:34f4a53d4ca3 722 return lexer->error;
rolf 0:34f4a53d4ca3 723 }
rolf 0:34f4a53d4ca3 724
rolf 0:34f4a53d4ca3 725 unsigned int yajl_lex_current_line(yajl_lexer lexer)
rolf 0:34f4a53d4ca3 726 {
rolf 0:34f4a53d4ca3 727 return lexer->lineOff;
rolf 0:34f4a53d4ca3 728 }
rolf 0:34f4a53d4ca3 729
rolf 0:34f4a53d4ca3 730 unsigned int yajl_lex_current_char(yajl_lexer lexer)
rolf 0:34f4a53d4ca3 731 {
rolf 0:34f4a53d4ca3 732 return lexer->charOff;
rolf 0:34f4a53d4ca3 733 }
rolf 0:34f4a53d4ca3 734
rolf 0:34f4a53d4ca3 735 yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
rolf 0:34f4a53d4ca3 736 unsigned int jsonTextLen, unsigned int offset)
rolf 0:34f4a53d4ca3 737 {
rolf 0:34f4a53d4ca3 738 const unsigned char * outBuf;
rolf 0:34f4a53d4ca3 739 unsigned int outLen;
rolf 0:34f4a53d4ca3 740 unsigned int bufLen = yajl_buf_len(lexer->buf);
rolf 0:34f4a53d4ca3 741 unsigned int bufOff = lexer->bufOff;
rolf 0:34f4a53d4ca3 742 unsigned int bufInUse = lexer->bufInUse;
rolf 0:34f4a53d4ca3 743 yajl_tok tok;
rolf 0:34f4a53d4ca3 744
rolf 0:34f4a53d4ca3 745 tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
rolf 0:34f4a53d4ca3 746 &outBuf, &outLen);
rolf 0:34f4a53d4ca3 747
rolf 0:34f4a53d4ca3 748 lexer->bufOff = bufOff;
rolf 0:34f4a53d4ca3 749 lexer->bufInUse = bufInUse;
rolf 0:34f4a53d4ca3 750 yajl_buf_truncate(lexer->buf, bufLen);
rolf 0:34f4a53d4ca3 751
rolf 0:34f4a53d4ca3 752 return tok;
rolf 0:34f4a53d4ca3 753 }
rolf 0:34f4a53d4ca3 754
rolf 0:34f4a53d4ca3 755 #ifdef __cplusplus
rolf 0:34f4a53d4ca3 756 }
rolf 0:34f4a53d4ca3 757 #endif