Johan Wikman / SQUIRREL3

Dependents:   Squirrel

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers sqlexer.cpp Source File

sqlexer.cpp

00001 /*
00002     see copyright notice in squirrel.h
00003 */
00004 #include "sqpcheader.h"
00005 #include <ctype.h>
00006 #include <stdlib.h>
00007 #include "sqtable.h"
00008 #include "sqstring.h"
00009 #include "sqcompiler.h"
00010 #include "sqlexer.h"
00011 
00012 #define CUR_CHAR (_currdata)
00013 #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
00014 #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
00015 #define NEXT() {Next();_currentcolumn++;}
00016 #define INIT_TEMP_STRING() { _longstr.resize(0);}
00017 #define APPEND_CHAR(c) { _longstr.push_back(c);}
00018 #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
00019 #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
00020 
00021 SQLexer::SQLexer(){}
00022 SQLexer::~SQLexer()
00023 {
00024     _keywords->Release();
00025 }
00026 
00027 void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
00028 {
00029     _errfunc = efunc;
00030     _errtarget = ed;
00031     _sharedstate = ss;
00032     _keywords = SQTable::Create(ss, 26);
00033     ADD_KEYWORD(while, TK_WHILE);
00034     ADD_KEYWORD(do, TK_DO);
00035     ADD_KEYWORD(if, TK_IF);
00036     ADD_KEYWORD(else, TK_ELSE);
00037     ADD_KEYWORD(break, TK_BREAK);
00038     ADD_KEYWORD(continue, TK_CONTINUE);
00039     ADD_KEYWORD(return, TK_RETURN);
00040     ADD_KEYWORD(null, TK_NULL);
00041     ADD_KEYWORD(function, TK_FUNCTION);
00042     ADD_KEYWORD(local, TK_LOCAL);
00043     ADD_KEYWORD(for, TK_FOR);
00044     ADD_KEYWORD(foreach, TK_FOREACH);
00045     ADD_KEYWORD(in, TK_IN);
00046     ADD_KEYWORD(typeof, TK_TYPEOF);
00047     ADD_KEYWORD(base, TK_BASE);
00048     ADD_KEYWORD(delete, TK_DELETE);
00049     ADD_KEYWORD(try, TK_TRY);
00050     ADD_KEYWORD(catch, TK_CATCH);
00051     ADD_KEYWORD(throw, TK_THROW);
00052     ADD_KEYWORD(clone, TK_CLONE);
00053     ADD_KEYWORD(yield, TK_YIELD);
00054     ADD_KEYWORD(resume, TK_RESUME);
00055     ADD_KEYWORD(switch, TK_SWITCH);
00056     ADD_KEYWORD(case, TK_CASE);
00057     ADD_KEYWORD(default, TK_DEFAULT);
00058     ADD_KEYWORD(this, TK_THIS);
00059     ADD_KEYWORD(class,TK_CLASS);
00060     ADD_KEYWORD(extends,TK_EXTENDS);
00061     ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
00062     ADD_KEYWORD(instanceof,TK_INSTANCEOF);
00063     ADD_KEYWORD(true,TK_TRUE);
00064     ADD_KEYWORD(false,TK_FALSE);
00065     ADD_KEYWORD(static,TK_STATIC);
00066     ADD_KEYWORD(enum,TK_ENUM);
00067     ADD_KEYWORD(const,TK_CONST);
00068 
00069     _readf = rg;
00070     _up = up;
00071     _lasttokenline = _currentline = 1;
00072     _currentcolumn = 0;
00073     _prevtoken = -1;
00074     _reached_eof = SQFalse;
00075     Next();
00076 }
00077 
00078 void SQLexer::Error(const SQChar *err)
00079 {
00080     _errfunc(_errtarget,err);
00081 }
00082 
00083 void SQLexer::Next()
00084 {
00085     SQInteger t = _readf(_up);
00086     if(t > MAX_CHAR) Error(_SC("Invalid character"));
00087     if(t != 0) {
00088         _currdata = (LexChar)t;
00089         return;
00090     }
00091     _currdata = SQUIRREL_EOB;
00092     _reached_eof = SQTrue;
00093 }
00094 
00095 const SQChar *SQLexer::Tok2Str(SQInteger tok)
00096 {
00097     SQObjectPtr itr, key, val;
00098     SQInteger nitr;
00099     while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
00100         itr = (SQInteger)nitr;
00101         if(((SQInteger)_integer(val)) == tok)
00102             return _stringval(key);
00103     }
00104     return NULL;
00105 }
00106 
00107 void SQLexer::LexBlockComment()
00108 {
00109     bool done = false;
00110     while(!done) {
00111         switch(CUR_CHAR) {
00112             case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
00113             case _SC('\n'): _currentline++; NEXT(); continue;
00114             case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
00115             default: NEXT();
00116         }
00117     }
00118 }
00119 void SQLexer::LexLineComment()
00120 {
00121     do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
00122 }
00123 
00124 SQInteger SQLexer::Lex()
00125 {
00126     _lasttokenline = _currentline;
00127     while(CUR_CHAR != SQUIRREL_EOB) {
00128         switch(CUR_CHAR){
00129         case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
00130         case _SC('\n'):
00131             _currentline++;
00132             _prevtoken=_curtoken;
00133             _curtoken=_SC('\n');
00134             NEXT();
00135             _currentcolumn=1;
00136             continue;
00137         case _SC('#'): LexLineComment(); continue;
00138         case _SC('/'):
00139             NEXT();
00140             switch(CUR_CHAR){
00141             case _SC('*'):
00142                 NEXT();
00143                 LexBlockComment();
00144                 continue;   
00145             case _SC('/'):
00146                 LexLineComment();
00147                 continue;
00148             case _SC('='):
00149                 NEXT();
00150                 RETURN_TOKEN(TK_DIVEQ);
00151                 continue;
00152             case _SC('>'):
00153                 NEXT();
00154                 RETURN_TOKEN(TK_ATTR_CLOSE);
00155                 continue;
00156             default:
00157                 RETURN_TOKEN('/');
00158             }
00159         case _SC('='):
00160             NEXT();
00161             if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
00162             else { NEXT(); RETURN_TOKEN(TK_EQ); }
00163         case _SC('<'):
00164             NEXT();
00165             switch(CUR_CHAR) {
00166             case _SC('='):
00167                 NEXT(); 
00168                 if(CUR_CHAR == _SC('>')) {
00169                     NEXT();
00170                     RETURN_TOKEN(TK_3WAYSCMP); 
00171                 }
00172                 RETURN_TOKEN(TK_LE) 
00173                 break;
00174             case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
00175             case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
00176             case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
00177             }
00178             RETURN_TOKEN('<');
00179         case _SC('>'):
00180             NEXT();
00181             if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
00182             else if(CUR_CHAR == _SC('>')){ 
00183                 NEXT(); 
00184                 if(CUR_CHAR == _SC('>')){
00185                     NEXT();
00186                     RETURN_TOKEN(TK_USHIFTR);
00187                 }
00188                 RETURN_TOKEN(TK_SHIFTR);
00189             }
00190             else { RETURN_TOKEN('>') }
00191         case _SC('!'):
00192             NEXT();
00193             if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
00194             else { NEXT(); RETURN_TOKEN(TK_NE); }
00195         case _SC('@'): {
00196             SQInteger stype;
00197             NEXT();
00198             if(CUR_CHAR != _SC('"')) {
00199                 RETURN_TOKEN('@');
00200             }
00201             if((stype=ReadString('"',true))!=-1) {
00202                 RETURN_TOKEN(stype);
00203             }
00204             Error(_SC("error parsing the string"));
00205                        }
00206         case _SC('"'):
00207         case _SC('\''): {
00208             SQInteger stype;
00209             if((stype=ReadString(CUR_CHAR,false))!=-1){
00210                 RETURN_TOKEN(stype);
00211             }
00212             Error(_SC("error parsing the string"));
00213             }
00214         case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
00215         case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
00216             {SQInteger ret = CUR_CHAR;
00217             NEXT(); RETURN_TOKEN(ret); }
00218         case _SC('.'):
00219             NEXT();
00220             if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
00221             NEXT();
00222             if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
00223             NEXT();
00224             RETURN_TOKEN(TK_VARPARAMS);
00225         case _SC('&'):
00226             NEXT();
00227             if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
00228             else { NEXT(); RETURN_TOKEN(TK_AND); }
00229         case _SC('|'):
00230             NEXT();
00231             if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
00232             else { NEXT(); RETURN_TOKEN(TK_OR); }
00233         case _SC(':'):
00234             NEXT();
00235             if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
00236             else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
00237         case _SC('*'):
00238             NEXT();
00239             if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
00240             else RETURN_TOKEN('*');
00241         case _SC('%'):
00242             NEXT();
00243             if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
00244             else RETURN_TOKEN('%');
00245         case _SC('-'):
00246             NEXT();
00247             if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
00248             else if  (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
00249             else RETURN_TOKEN('-');
00250         case _SC('+'):
00251             NEXT();
00252             if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
00253             else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
00254             else RETURN_TOKEN('+');
00255         case SQUIRREL_EOB:
00256             return 0;
00257         default:{
00258                 if (scisdigit(CUR_CHAR)) {
00259                     SQInteger ret = ReadNumber();
00260                     RETURN_TOKEN(ret);
00261                 }
00262                 else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
00263                     SQInteger t = ReadID();
00264                     RETURN_TOKEN(t);
00265                 }
00266                 else {
00267                     SQInteger c = CUR_CHAR;
00268                     if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
00269                     NEXT();
00270                     RETURN_TOKEN(c);  
00271                 }
00272                 RETURN_TOKEN(0);
00273             }
00274         }
00275     }
00276     return 0;    
00277 }
00278     
00279 SQInteger SQLexer::GetIDType(SQChar *s)
00280 {
00281     SQObjectPtr t;
00282     if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
00283         return SQInteger(_integer(t));
00284     }
00285     return TK_IDENTIFIER;
00286 }
00287 
00288 
00289 SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
00290 {
00291     INIT_TEMP_STRING();
00292     NEXT();
00293     if(IS_EOB()) return -1;
00294     for(;;) {
00295         while(CUR_CHAR != ndelim) {
00296             switch(CUR_CHAR) {
00297             case SQUIRREL_EOB:
00298                 Error(_SC("unfinished string"));
00299                 return -1;
00300             case _SC('\n'): 
00301                 if(!verbatim) Error(_SC("newline in a constant")); 
00302                 APPEND_CHAR(CUR_CHAR); NEXT(); 
00303                 _currentline++;
00304                 break;
00305             case _SC('\\'):
00306                 if(verbatim) {
00307                     APPEND_CHAR('\\'); NEXT(); 
00308                 }
00309                 else {
00310                     NEXT();
00311                     switch(CUR_CHAR) {
00312                     case _SC('x'): NEXT(); {
00313                         if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected")); 
00314                         const SQInteger maxdigits = 4;
00315                         SQChar temp[maxdigits+1];
00316                         SQInteger n = 0;
00317                         while(isxdigit(CUR_CHAR) && n < maxdigits) {
00318                             temp[n] = CUR_CHAR;
00319                             n++;
00320                             NEXT();
00321                         }
00322                         temp[n] = 0;
00323                         SQChar *sTemp;
00324                         APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
00325                     }
00326                     break;
00327                     case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
00328                     case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
00329                     case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
00330                     case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
00331                     case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
00332                     case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
00333                     case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
00334                     case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
00335                     case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
00336                     case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
00337                     case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
00338                     default:
00339                         Error(_SC("unrecognised escaper char"));
00340                     break;
00341                     }
00342                 }
00343                 break;
00344             default:
00345                 APPEND_CHAR(CUR_CHAR);
00346                 NEXT();
00347             }
00348         }
00349         NEXT();
00350         if(verbatim && CUR_CHAR == '"') { //double quotation
00351             APPEND_CHAR(CUR_CHAR);
00352             NEXT();
00353         }
00354         else {
00355             break;
00356         }
00357     }
00358     TERMINATE_BUFFER();
00359     SQInteger len = _longstr.size()-1;
00360     if(ndelim == _SC('\'')) {
00361         if(len == 0) Error(_SC("empty constant"));
00362         if(len > 1) Error(_SC("constant too long"));
00363         _nvalue = _longstr[0];
00364         return TK_INTEGER;
00365     }
00366     _svalue = &_longstr[0];
00367     return TK_STRING_LITERAL;
00368 }
00369 
00370 void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
00371 {
00372     *res = 0;
00373     while(*s != 0)
00374     {
00375         if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
00376         else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
00377         else { assert(0); }
00378     }
00379 }
00380 
00381 void LexInteger(const SQChar *s,SQUnsignedInteger *res)
00382 {
00383     *res = 0;
00384     while(*s != 0)
00385     {
00386         *res = (*res)*10+((*s++)-'0');
00387     }
00388 }
00389 
00390 SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
00391 
00392 void LexOctal(const SQChar *s,SQUnsignedInteger *res)
00393 {
00394     *res = 0;
00395     while(*s != 0)
00396     {
00397         if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
00398         else { assert(0); }
00399     }
00400 }
00401 
00402 SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
00403 
00404 
00405 #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
00406 SQInteger SQLexer::ReadNumber()
00407 {
00408 #define TINT 1
00409 #define TFLOAT 2
00410 #define THEX 3
00411 #define TSCIENTIFIC 4
00412 #define TOCTAL 5
00413     SQInteger type = TINT, firstchar = CUR_CHAR;
00414     SQChar *sTemp;
00415     INIT_TEMP_STRING();
00416     NEXT();
00417     if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
00418         if(scisodigit(CUR_CHAR)) {
00419             type = TOCTAL;
00420             while(scisodigit(CUR_CHAR)) {
00421                 APPEND_CHAR(CUR_CHAR);
00422                 NEXT();
00423             }
00424             if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
00425         }
00426         else {
00427             NEXT();
00428             type = THEX;
00429             while(isxdigit(CUR_CHAR)) {
00430                 APPEND_CHAR(CUR_CHAR);
00431                 NEXT();
00432             }
00433             if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
00434         }
00435     }
00436     else {
00437         APPEND_CHAR((int)firstchar);
00438         while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
00439             if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
00440             if(isexponent(CUR_CHAR)) {
00441                 if(type != TFLOAT) Error(_SC("invalid numeric format"));
00442                 type = TSCIENTIFIC;
00443                 APPEND_CHAR(CUR_CHAR);
00444                 NEXT();
00445                 if(CUR_CHAR == '+' || CUR_CHAR == '-'){
00446                     APPEND_CHAR(CUR_CHAR);
00447                     NEXT();
00448                 }
00449                 if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
00450             }
00451             
00452             APPEND_CHAR(CUR_CHAR);
00453             NEXT();
00454         }
00455     }
00456     TERMINATE_BUFFER();
00457     switch(type) {
00458     case TSCIENTIFIC:
00459     case TFLOAT:
00460         _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
00461         return TK_FLOAT;
00462     case TINT:
00463         LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00464         return TK_INTEGER;
00465     case THEX:
00466         LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00467         return TK_INTEGER;
00468     case TOCTAL:
00469         LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00470         return TK_INTEGER;
00471     }
00472     return 0;
00473 }
00474 
00475 SQInteger SQLexer::ReadID()
00476 {
00477     SQInteger res;
00478     INIT_TEMP_STRING();
00479     do {
00480         APPEND_CHAR(CUR_CHAR);
00481         NEXT();
00482     } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
00483     TERMINATE_BUFFER();
00484     res = GetIDType(&_longstr[0]);
00485     if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
00486         _svalue = &_longstr[0];
00487     }
00488     return res;
00489 }