The Squirrel interpreter. See http://www.squirrel-lang.org/

Dependents:   Squirrel

Revision:
0:97a4f8cc534c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/squirrel/sqlexer.cpp	Tue Dec 16 10:20:34 2014 +0000
@@ -0,0 +1,489 @@
+/*
+	see copyright notice in squirrel.h
+*/
+#include "sqpcheader.h"
+#include <ctype.h>
+#include <stdlib.h>
+#include "sqtable.h"
+#include "sqstring.h"
+#include "sqcompiler.h"
+#include "sqlexer.h"
+
+#define CUR_CHAR (_currdata)
+#define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
+#define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
+#define NEXT() {Next();_currentcolumn++;}
+#define INIT_TEMP_STRING() { _longstr.resize(0);}
+#define APPEND_CHAR(c) { _longstr.push_back(c);}
+#define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
+#define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
+
+SQLexer::SQLexer(){}
+SQLexer::~SQLexer()
+{
+	_keywords->Release();
+}
+
+void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
+{
+	_errfunc = efunc;
+	_errtarget = ed;
+	_sharedstate = ss;
+	_keywords = SQTable::Create(ss, 26);
+	ADD_KEYWORD(while, TK_WHILE);
+	ADD_KEYWORD(do, TK_DO);
+	ADD_KEYWORD(if, TK_IF);
+	ADD_KEYWORD(else, TK_ELSE);
+	ADD_KEYWORD(break, TK_BREAK);
+	ADD_KEYWORD(continue, TK_CONTINUE);
+	ADD_KEYWORD(return, TK_RETURN);
+	ADD_KEYWORD(null, TK_NULL);
+	ADD_KEYWORD(function, TK_FUNCTION);
+	ADD_KEYWORD(local, TK_LOCAL);
+	ADD_KEYWORD(for, TK_FOR);
+	ADD_KEYWORD(foreach, TK_FOREACH);
+	ADD_KEYWORD(in, TK_IN);
+	ADD_KEYWORD(typeof, TK_TYPEOF);
+	ADD_KEYWORD(base, TK_BASE);
+	ADD_KEYWORD(delete, TK_DELETE);
+	ADD_KEYWORD(try, TK_TRY);
+	ADD_KEYWORD(catch, TK_CATCH);
+	ADD_KEYWORD(throw, TK_THROW);
+	ADD_KEYWORD(clone, TK_CLONE);
+	ADD_KEYWORD(yield, TK_YIELD);
+	ADD_KEYWORD(resume, TK_RESUME);
+	ADD_KEYWORD(switch, TK_SWITCH);
+	ADD_KEYWORD(case, TK_CASE);
+	ADD_KEYWORD(default, TK_DEFAULT);
+	ADD_KEYWORD(this, TK_THIS);
+	ADD_KEYWORD(class,TK_CLASS);
+	ADD_KEYWORD(extends,TK_EXTENDS);
+	ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
+	ADD_KEYWORD(instanceof,TK_INSTANCEOF);
+	ADD_KEYWORD(true,TK_TRUE);
+	ADD_KEYWORD(false,TK_FALSE);
+	ADD_KEYWORD(static,TK_STATIC);
+	ADD_KEYWORD(enum,TK_ENUM);
+	ADD_KEYWORD(const,TK_CONST);
+
+	_readf = rg;
+	_up = up;
+	_lasttokenline = _currentline = 1;
+	_currentcolumn = 0;
+	_prevtoken = -1;
+	_reached_eof = SQFalse;
+	Next();
+}
+
+void SQLexer::Error(const SQChar *err)
+{
+	_errfunc(_errtarget,err);
+}
+
+void SQLexer::Next()
+{
+	SQInteger t = _readf(_up);
+	if(t > MAX_CHAR) Error(_SC("Invalid character"));
+	if(t != 0) {
+		_currdata = (LexChar)t;
+		return;
+	}
+	_currdata = SQUIRREL_EOB;
+	_reached_eof = SQTrue;
+}
+
+const SQChar *SQLexer::Tok2Str(SQInteger tok)
+{
+	SQObjectPtr itr, key, val;
+	SQInteger nitr;
+	while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
+		itr = (SQInteger)nitr;
+		if(((SQInteger)_integer(val)) == tok)
+			return _stringval(key);
+	}
+	return NULL;
+}
+
+void SQLexer::LexBlockComment()
+{
+	bool done = false;
+	while(!done) {
+		switch(CUR_CHAR) {
+			case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
+			case _SC('\n'): _currentline++; NEXT(); continue;
+			case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
+			default: NEXT();
+		}
+	}
+}
+void SQLexer::LexLineComment()
+{
+	do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
+}
+
+SQInteger SQLexer::Lex()
+{
+	_lasttokenline = _currentline;
+	while(CUR_CHAR != SQUIRREL_EOB) {
+		switch(CUR_CHAR){
+		case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
+		case _SC('\n'):
+			_currentline++;
+			_prevtoken=_curtoken;
+			_curtoken=_SC('\n');
+			NEXT();
+			_currentcolumn=1;
+			continue;
+		case _SC('#'): LexLineComment(); continue;
+		case _SC('/'):
+			NEXT();
+			switch(CUR_CHAR){
+			case _SC('*'):
+				NEXT();
+				LexBlockComment();
+				continue;	
+			case _SC('/'):
+				LexLineComment();
+				continue;
+			case _SC('='):
+				NEXT();
+				RETURN_TOKEN(TK_DIVEQ);
+				continue;
+			case _SC('>'):
+				NEXT();
+				RETURN_TOKEN(TK_ATTR_CLOSE);
+				continue;
+			default:
+				RETURN_TOKEN('/');
+			}
+		case _SC('='):
+			NEXT();
+			if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
+			else { NEXT(); RETURN_TOKEN(TK_EQ); }
+		case _SC('<'):
+			NEXT();
+			switch(CUR_CHAR) {
+			case _SC('='):
+				NEXT(); 
+				if(CUR_CHAR == _SC('>')) {
+					NEXT();
+					RETURN_TOKEN(TK_3WAYSCMP); 
+				}
+				RETURN_TOKEN(TK_LE) 
+				break;
+			case _SC('-'): NEXT(); RETURN_TOKEN(TK_NEWSLOT); break;
+			case _SC('<'): NEXT(); RETURN_TOKEN(TK_SHIFTL); break;
+			case _SC('/'): NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); break;
+			}
+			RETURN_TOKEN('<');
+		case _SC('>'):
+			NEXT();
+			if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
+			else if(CUR_CHAR == _SC('>')){ 
+				NEXT(); 
+				if(CUR_CHAR == _SC('>')){
+					NEXT();
+					RETURN_TOKEN(TK_USHIFTR);
+				}
+				RETURN_TOKEN(TK_SHIFTR);
+			}
+			else { RETURN_TOKEN('>') }
+		case _SC('!'):
+			NEXT();
+			if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
+			else { NEXT(); RETURN_TOKEN(TK_NE); }
+		case _SC('@'): {
+			SQInteger stype;
+			NEXT();
+			if(CUR_CHAR != _SC('"')) {
+				RETURN_TOKEN('@');
+			}
+			if((stype=ReadString('"',true))!=-1) {
+				RETURN_TOKEN(stype);
+			}
+			Error(_SC("error parsing the string"));
+					   }
+		case _SC('"'):
+		case _SC('\''): {
+			SQInteger stype;
+			if((stype=ReadString(CUR_CHAR,false))!=-1){
+				RETURN_TOKEN(stype);
+			}
+			Error(_SC("error parsing the string"));
+			}
+		case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
+		case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
+			{SQInteger ret = CUR_CHAR;
+			NEXT(); RETURN_TOKEN(ret); }
+		case _SC('.'):
+			NEXT();
+			if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
+			NEXT();
+			if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
+			NEXT();
+			RETURN_TOKEN(TK_VARPARAMS);
+		case _SC('&'):
+			NEXT();
+			if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
+			else { NEXT(); RETURN_TOKEN(TK_AND); }
+		case _SC('|'):
+			NEXT();
+			if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
+			else { NEXT(); RETURN_TOKEN(TK_OR); }
+		case _SC(':'):
+			NEXT();
+			if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
+			else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
+		case _SC('*'):
+			NEXT();
+			if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
+			else RETURN_TOKEN('*');
+		case _SC('%'):
+			NEXT();
+			if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
+			else RETURN_TOKEN('%');
+		case _SC('-'):
+			NEXT();
+			if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
+			else if  (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
+			else RETURN_TOKEN('-');
+		case _SC('+'):
+			NEXT();
+			if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
+			else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
+			else RETURN_TOKEN('+');
+		case SQUIRREL_EOB:
+			return 0;
+		default:{
+				if (scisdigit(CUR_CHAR)) {
+					SQInteger ret = ReadNumber();
+					RETURN_TOKEN(ret);
+				}
+				else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
+					SQInteger t = ReadID();
+					RETURN_TOKEN(t);
+				}
+				else {
+					SQInteger c = CUR_CHAR;
+					if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
+					NEXT();
+					RETURN_TOKEN(c);  
+				}
+				RETURN_TOKEN(0);
+			}
+		}
+	}
+	return 0;    
+}
+	
+SQInteger SQLexer::GetIDType(SQChar *s)
+{
+	SQObjectPtr t;
+	if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
+		return SQInteger(_integer(t));
+	}
+	return TK_IDENTIFIER;
+}
+
+
+SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
+{
+	INIT_TEMP_STRING();
+	NEXT();
+	if(IS_EOB()) return -1;
+	for(;;) {
+		while(CUR_CHAR != ndelim) {
+			switch(CUR_CHAR) {
+			case SQUIRREL_EOB:
+				Error(_SC("unfinished string"));
+				return -1;
+			case _SC('\n'): 
+				if(!verbatim) Error(_SC("newline in a constant")); 
+				APPEND_CHAR(CUR_CHAR); NEXT(); 
+				_currentline++;
+				break;
+			case _SC('\\'):
+				if(verbatim) {
+					APPEND_CHAR('\\'); NEXT(); 
+				}
+				else {
+					NEXT();
+					switch(CUR_CHAR) {
+					case _SC('x'): NEXT(); {
+						if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected")); 
+						const SQInteger maxdigits = 4;
+						SQChar temp[maxdigits+1];
+						SQInteger n = 0;
+						while(isxdigit(CUR_CHAR) && n < maxdigits) {
+							temp[n] = CUR_CHAR;
+							n++;
+							NEXT();
+						}
+						temp[n] = 0;
+						SQChar *sTemp;
+						APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
+					}
+				    break;
+					case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
+					case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
+					case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
+					case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
+					case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
+					case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
+					case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
+					case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
+					case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
+					case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
+					case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
+					default:
+						Error(_SC("unrecognised escaper char"));
+					break;
+					}
+				}
+				break;
+			default:
+				APPEND_CHAR(CUR_CHAR);
+				NEXT();
+			}
+		}
+		NEXT();
+		if(verbatim && CUR_CHAR == '"') { //double quotation
+			APPEND_CHAR(CUR_CHAR);
+			NEXT();
+		}
+		else {
+			break;
+		}
+	}
+	TERMINATE_BUFFER();
+	SQInteger len = _longstr.size()-1;
+	if(ndelim == _SC('\'')) {
+		if(len == 0) Error(_SC("empty constant"));
+		if(len > 1) Error(_SC("constant too long"));
+		_nvalue = _longstr[0];
+		return TK_INTEGER;
+	}
+	_svalue = &_longstr[0];
+	return TK_STRING_LITERAL;
+}
+
+void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
+{
+	*res = 0;
+	while(*s != 0)
+	{
+		if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
+		else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
+		else { assert(0); }
+	}
+}
+
+void LexInteger(const SQChar *s,SQUnsignedInteger *res)
+{
+	*res = 0;
+	while(*s != 0)
+	{
+		*res = (*res)*10+((*s++)-'0');
+	}
+}
+
+SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
+
+void LexOctal(const SQChar *s,SQUnsignedInteger *res)
+{
+	*res = 0;
+	while(*s != 0)
+	{
+		if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
+		else { assert(0); }
+	}
+}
+
+SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
+
+
+#define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
+SQInteger SQLexer::ReadNumber()
+{
+#define TINT 1
+#define TFLOAT 2
+#define THEX 3
+#define TSCIENTIFIC 4
+#define TOCTAL 5
+	SQInteger type = TINT, firstchar = CUR_CHAR;
+	SQChar *sTemp;
+	INIT_TEMP_STRING();
+	NEXT();
+	if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
+		if(scisodigit(CUR_CHAR)) {
+			type = TOCTAL;
+			while(scisodigit(CUR_CHAR)) {
+				APPEND_CHAR(CUR_CHAR);
+				NEXT();
+			}
+			if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
+		}
+		else {
+			NEXT();
+			type = THEX;
+			while(isxdigit(CUR_CHAR)) {
+				APPEND_CHAR(CUR_CHAR);
+				NEXT();
+			}
+			if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
+		}
+	}
+	else {
+		APPEND_CHAR((int)firstchar);
+		while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
+            if(CUR_CHAR == _SC('.') || isexponent(CUR_CHAR)) type = TFLOAT;
+			if(isexponent(CUR_CHAR)) {
+				if(type != TFLOAT) Error(_SC("invalid numeric format"));
+				type = TSCIENTIFIC;
+				APPEND_CHAR(CUR_CHAR);
+				NEXT();
+				if(CUR_CHAR == '+' || CUR_CHAR == '-'){
+					APPEND_CHAR(CUR_CHAR);
+					NEXT();
+				}
+				if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
+			}
+			
+			APPEND_CHAR(CUR_CHAR);
+			NEXT();
+		}
+	}
+	TERMINATE_BUFFER();
+	switch(type) {
+	case TSCIENTIFIC:
+	case TFLOAT:
+		_fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
+		return TK_FLOAT;
+	case TINT:
+		LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
+		return TK_INTEGER;
+	case THEX:
+		LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
+		return TK_INTEGER;
+	case TOCTAL:
+		LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
+		return TK_INTEGER;
+	}
+	return 0;
+}
+
+SQInteger SQLexer::ReadID()
+{
+	SQInteger res;
+	INIT_TEMP_STRING();
+	do {
+		APPEND_CHAR(CUR_CHAR);
+		NEXT();
+	} while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
+	TERMINATE_BUFFER();
+	res = GetIDType(&_longstr[0]);
+	if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
+		_svalue = &_longstr[0];
+	}
+	return res;
+}