MBED port of https://github.com/ys1382/filagree . The only change is adding #define MBED
compile.c
- Committer:
- yusufx
- Date:
- 2012-05-30
- Revision:
- 0:1a89e28dea91
File content as of revision 0:1a89e28dea91:
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdarg.h> #include <ctype.h> #include <errno.h> #include "util.h" #include "struct.h" #include "serial.h" #include "compile.h" #include "vm.h" #include "variable.h" #define TAG "compile" #define ERROR_LEX "Lexigraphical error" #define ERROR_PARSE "Parsological error" #define ERROR_TOKEN "Unknown token" #define QUOTE '\'' #define ESCAPE '\\' #define ESCAPED_NEWLINE 'n' #define ESCAPED_TAB 't' #define ESCAPED_QUOTE '\'' uint32_t line; struct array* lex_list; struct map *imports = NULL; struct byte_array *read_file(const struct byte_array *filename); // token /////////////////////////////////////////////////////////////////// enum Lexeme { LEX_NONE, LEX_IMPORT, LEX_LEFT_COMMENT, LEX_RIGHT_COMMENT, LEX_PLUS, LEX_MINUS, LEX_TIMES, LEX_DIVIDE, LEX_MODULO, LEX_AND, LEX_OR, LEX_NOT, LEX_BAND, LEX_BOR, LEX_INVERSE, LEX_XOR, LEX_LSHIFT, LEX_RSHIFT, LEX_GREATER, LEX_LESSER, LEX_SAME, LEX_SET, LEX_DIFFERENT, LEX_COMMA, LEX_PERIOD, LEX_COLON, LEX_LINE_COMMENT, LEX_LEFTHESIS, LEX_RIGHTHESIS, LEX_LEFTSQUARE, LEX_RIGHTSQUARE, LEX_LEFTSTACHE, LEX_RIGHTSTACHE, LEX_IDENTIFIER, LEX_STRING, LEX_INTEGER, LEX_TRUE, LEX_FALSE, LEX_IF, LEX_THEN, LEX_ELSE, LEX_END, LEX_WHILE, LEX_FOR, LEX_IN, LEX_WHERE, LEX_FUNCTION, LEX_RETURN, LEX_TRY, LEX_CATCH, LEX_THROW, LEX_DO, }; struct token { enum Lexeme lexeme; uint32_t number; struct byte_array *string; uint32_t at_line; }; struct number_string lexemes[] = { {LEX_IMPORT, "import"}, {LEX_LEFT_COMMENT, "/*"}, {LEX_RIGHT_COMMENT, "*/"}, {LEX_LINE_COMMENT, "#"}, {LEX_PLUS, "+"}, {LEX_MINUS, "-"}, {LEX_TIMES, "*"}, {LEX_DIVIDE, "/"}, {LEX_MODULO, "%"}, {LEX_BAND, "&"}, {LEX_BOR, "|"}, {LEX_INVERSE, "~"}, {LEX_XOR, "^"}, {LEX_RSHIFT, ">>"}, {LEX_LSHIFT, "<<"}, {LEX_AND, "and"}, {LEX_OR, "or"}, {LEX_NOT, "not"}, {LEX_GREATER, ">"}, {LEX_LESSER, "<"}, {LEX_SAME, "=="}, {LEX_SET, "="}, {LEX_DIFFERENT, "!="}, {LEX_COMMA, ","}, {LEX_PERIOD, "."}, {LEX_COLON, ":"}, {LEX_LEFTHESIS, "("}, {LEX_RIGHTHESIS, ")"}, {LEX_LEFTSQUARE, "["}, {LEX_RIGHTSQUARE, "]"}, {LEX_LEFTSTACHE, "{"}, {LEX_RIGHTSTACHE, "}"}, {LEX_TRUE, "true"}, {LEX_FALSE, "false"}, {LEX_IF, "if"}, {LEX_THEN, "then"}, {LEX_ELSE, "else"}, {LEX_END, "end"}, {LEX_WHILE, "while"}, {LEX_FUNCTION, "function"}, {LEX_FOR, "for"}, {LEX_IN, "in"}, {LEX_WHERE, "where"}, {LEX_RETURN, "return"}, {LEX_TRY, "try"}, {LEX_CATCH, "catch"}, {LEX_THROW, "throw"}, {LEX_DO, "do"} }; const char* lexeme_to_string(enum Lexeme lexeme) { switch (lexeme) { case LEX_IDENTIFIER: return "id"; case LEX_STRING: return "string"; case LEX_INTEGER: return "number"; default: return NUM_TO_STRING(lexemes, lexeme); } } // display //////////////////////////////////////////////////////////////////// #ifdef DEBUG void display_token(struct token *token, int depth) { assert_message(depth < 10, "kablooie!"); char* indent = (char*)malloc(sizeof(char)*(depth+1)); int i; for (i=0; i<depth; i++) indent[i] = '\t'; indent[i] = 0; DEBUGPRINT("%s%d ", indent, token->lexeme); switch (token->lexeme) { case LEX_NONE: break; case LEX_INTEGER: DEBUGPRINT("%u", token->number); break; case LEX_STRING: case LEX_IDENTIFIER: { char* s = byte_array_to_string(token->string); DEBUGPRINT("%s:%s", lexeme_to_string(token->lexeme), s); free(s); } break; case LEX_FUNCTION: DEBUGPRINT("fdecl"); break; default: DEBUGPRINT("%s", lexeme_to_string(token->lexeme)); break; } DEBUGPRINT("\n"); free(indent); } void display_lex_list() { int i, n = lex_list->length; for (i=0; i<n; i++) { DEBUGPRINT("\t%d: ", i); display_token((struct token*)array_get(lex_list,i), 0); } } #endif // lex ///////////////////////////////////////////////////////////////////// struct array* lex(struct byte_array *binput); struct token *token_new(enum Lexeme lexeme, int at_line) { struct token *t = (struct token*)malloc(sizeof(struct token)); t->lexeme = lexeme; t->string = NULL; t->number = 0; t->at_line = line; return t; } struct token *insert_token(enum Lexeme lexeme) { struct token *token = token_new(lexeme, line); array_add(lex_list, token); return token; } int insert_token_number(const char *input, int i) { struct token *token = insert_token(LEX_INTEGER); token->number = atoi(&input[i]); //while (isdigit(input[i++])); for (; isdigit(input[i]); i++); return i; } bool isiden(char c) { return isalnum(c) || (c=='_'); } bool isKeyword(int n) { for (int c,i=0; (c = lexemes[n].chars[i]); i++) if (!isalpha(c)) return false; return true; } int insert_token_string(enum Lexeme lexeme, const char* input, int i) { struct byte_array *string = byte_array_new(); while ((lexeme==LEX_IDENTIFIER && isiden(input[i])) || (lexeme==LEX_STRING && input[i] != QUOTE)) { uint8_t c = input[i++]; //DEBUGPRINT("@%d c=%3d %c\n", i, c, c); if (c == ESCAPE) { c = input[i++]; switch (c) { case ESCAPED_NEWLINE: c = '\n'; break; case ESCAPED_TAB: c = '\t'; break; case ESCAPED_QUOTE: c = '\''; break; default: return (VOID_INT)exit_message("unknown escape"); } } byte_array_add_byte(string, c); } int end = i + (lexeme==LEX_STRING); struct token *token = token_new(lexeme, line); token->string = string; array_add(lex_list, token); //display_token(token, 0); return end; } int insert_lexeme(int index) { struct number_string sn = lexemes[index]; insert_token((enum Lexeme)sn.number); return strlen(sn.chars); } int import(const char* input, int i) { i += strlen(lexeme_to_string(LEX_IMPORT)); while (isspace(input[i++])); struct byte_array *path = byte_array_new(); while (!isspace(input[i]) && input[i]!=QUOTE) byte_array_add_byte(path, input[i++]); byte_array_append(path, byte_array_from_string(EXTENSION_SRC)); if (!map_has(imports, path)) { map_insert(imports, path, NULL); struct byte_array *imported = read_file(path); lex(imported); } return i+1; } struct array* lex(struct byte_array *binput) { lex_list = array_new(); imports = map_new(); int i=0,j; char c; line = 1; const char* input = byte_array_to_string(binput); const char* right_comment = lexeme_to_string(LEX_RIGHT_COMMENT); int right_comment_len = strlen(right_comment); lexmore: while (i < binput->length) { for (j=0; j<ARRAY_LEN(lexemes); j++) { char* lexstr = lexemes[j].chars; if (!strncmp(&input[i], lexstr, strlen(lexstr))) { if (isKeyword(j) && i<binput->length-1 && isalpha(input[i+strlen(lexemes[j].chars)])) { //DEBUGPRINT("%s is a keyword, %c is alpha\n", lexemes[j].chars, input[i+strlen(lexemes[j].chars)]); continue; // just an identifier } enum Lexeme lexeme = (enum Lexeme)lexemes[j].number; if (lexeme == LEX_LEFT_COMMENT) { // start comment with /* while (strncmp(&input[i], right_comment, right_comment_len)) if (input[i++] == '\n') line++; i += right_comment_len; } else if (lexeme == LEX_LINE_COMMENT) { // start line comment with # while (input[i++] != '\n'); line++; } else if (lexeme == LEX_IMPORT) i = import(input, i); else { //DEBUGPRINT("lexeme=%d\n",lexeme); i += insert_lexeme(j); } goto lexmore; } } c = input[i]; if (isdigit(c)) i = insert_token_number(input,i); else if (isiden(c)) i = insert_token_string(LEX_IDENTIFIER, input, i); else if (c == '\n') { line++; i++; } else if (isspace(c)) i++; else if (c=='\'') i = insert_token_string(LEX_STRING, input, i+1); else return (struct array*)exit_message(ERROR_LEX); } #ifdef DEBUG // display_lex_list(); #endif return lex_list; } /* parse /////////////////////////////////////////////////////////////////// BNF: <statements> --> ( <assignment> | <fcall> | <ifthenelse> | <loop> | <rejoinder> | <iterloop> | <trycatch> | <throw> )* <trycatch> --> LEX_TRY <statements> LEX_CATCH <variable> <statements> LEX_END <throw> --> LEX_THROW <expression> <assignment> --> <destination>, LEX_SET <source>, <destination> --> <variable> | ( <expression> <member> ) <source> --> <expression> <ifthenelse> --> IF <expression> THEN <statements> ( ELSE IF <expression> THEN <statements> )* ( ELSE <statements>)? LEX_END <loop> --> WHILE <expression> LEX_DO <statements> LEX_END <iterator> --> LEX_FOR LEX_IDENTIFIER LEX_IN <expression> ( LEX_WHERE <expression> )? <iterloop> --> <iterator> LEX_DO <statements> LEX_END <comprehension> --> LEX_LEFTSQUARE <expression> <iterator> LEX_RIGHTSQUARE <rejoinder> --> LEX_RETURN <source>, <fdecl> --> FUNCTION LEX_LEFTHESIS <variable>, LEX_RIGHTHESIS <statements> LEX_END <fcall> --> <expression> <call> <call> --> LEX_LEFTHESIS <source>, LEX_RIGHTHESIS <expression> --> <exp2> ( ( LEX_SAME | LEX_DIFFERENT | LEX_GREATER | LEX_LESSER ) <exp2> )? <exp2> --> <exp3> ( ( LEX_PLUS | LEX_MINUS ) <exp3> )* <exp3> --> <exp4> ( ( LEX_PLUS | LEX_MINUS | LEX_TIMES | LEX_DIVIDE | LEX_MODULO | LEX_BAND | LEX_BOR | LEX_XOR | LEX_LSHIFT | LEX_RSHIFT) <exp3> )* <exp4> --> (LEX_NOT | LEX_MINUS | LEX_INVERSE)? <exp5> <exp5> --> <exp6> ( <call> | <member> )* <exp6> --> ( LEX_LEFTTHESIS <expression> LEX_RIGHTTHESIS ) | <atom> <atom> --> LEX_IDENTIFIER | <float> | <integer> | <boolean> | <table> | <comprehension> | <fdecl> <variable> --> LEX_IDENTIFIER <boolean> --> LEX_TRUE | LEX_FALSE <floater> --> LEX_INTEGER LEX_PERIOD LEX_INTEGER <string> --> LEX_STRING <table> --> LEX_LEFTSQUARE <element>, LEX_RIGHTSQUARE <element> --> <expression> ( LEX_COLON <expression> )? <member> --> ( LEX_LEFTSQUARE <expression> LEX_RIGHTSQUARE ) | ( LEX_PERIOD LEX_STRING ) */////////////////////////////////////////////////////////////////////////// enum Nonterminal { SYMBOL_STATEMENTS, SYMBOL_ASSIGNMENT, SYMBOL_SOURCE, SYMBOL_DESTINATION, SYMBOL_IF_THEN_ELSE, SYMBOL_LOOP, SYMBOL_EXPRESSION, SYMBOL_INTEGER, SYMBOL_FLOAT, SYMBOL_STRING, SYMBOL_VARIABLE, SYMBOL_TABLE, SYMBOL_PAIR, SYMBOL_FDECL, SYMBOL_FCALL, SYMBOL_MEMBER, SYMBOL_RETURN, SYMBOL_BOOLEAN, SYMBOL_NIL, SYMBOL_ITERATOR, SYMBOL_ITERLOOP, SYMBOL_COMPREHENSION, SYMBOL_TRYCATCH, SYMBOL_THROW, } nonterminal; struct symbol { enum Nonterminal nonterminal; const struct token *token; struct array* list; struct symbol *index, *value; float floater; bool lhs; }; struct number_string nonterminals[] = { {SYMBOL_STATEMENTS, "statements"}, {SYMBOL_ASSIGNMENT, "assignment"}, {SYMBOL_SOURCE, "source"}, {SYMBOL_DESTINATION, "destination"}, {SYMBOL_IF_THEN_ELSE, "if-then-else"}, {SYMBOL_LOOP, "loop"}, {SYMBOL_EXPRESSION, "expression"}, {SYMBOL_INTEGER, "integer"}, {SYMBOL_FLOAT, "float"}, {SYMBOL_STRING, "string"}, {SYMBOL_VARIABLE, "variable"}, {SYMBOL_TABLE, "table"}, {SYMBOL_PAIR, "pair"}, {SYMBOL_FDECL, "fdecl"}, {SYMBOL_FCALL, "fcall"}, {SYMBOL_MEMBER, "member"}, {SYMBOL_RETURN, "return"}, {SYMBOL_BOOLEAN, "boolean"}, {SYMBOL_NIL, "nil"}, {SYMBOL_ITERATOR, "iterator"}, {SYMBOL_ITERLOOP, "iterloop"}, {SYMBOL_COMPREHENSION, "comprehension"}, {SYMBOL_TRYCATCH, "try-catch"}, {SYMBOL_THROW, "throw"}, }; struct array* parse_list; uint32_t parse_index; struct symbol *expression(); struct symbol *statements(); struct symbol *comprehension(); struct symbol *symbol_new(enum Nonterminal nonterminal) { // DEBUGPRINT("symbol_new %s\n", NUM_TO_STRING(nonterminals, ARRAY_LEN(nonterminals), nonterminal)); struct symbol *s = (struct symbol*)malloc(sizeof(struct symbol)); s->nonterminal = nonterminal; s->list = array_new(); s->index = s->value = NULL; s->lhs = false; return s; } struct symbol *symbol_add(struct symbol *s, struct symbol *t) { null_check(s); if (!t) return NULL; //DEBUGPRINT("symbol_add %s\n", nonterminals[t->nonterminal]); array_add(s->list, t); return s; } #ifdef DEBUG void display_symbol(const struct symbol *symbol, int depth) { // null_check(symbol); if (!symbol) return; assert_message(depth < 100, "kablooie!"); char* indent = (char*)malloc(sizeof(char)*(depth+1)); int i; for (i=0; i<depth; i++) indent[i] = '\t'; indent[i] = 0; DEBUGPRINT("%s%d %s", indent, symbol->nonterminal, NUM_TO_STRING(nonterminals, symbol->nonterminal)); switch (symbol->nonterminal) { case SYMBOL_INTEGER: DEBUGPRINT(": %u\n", symbol->token->number); break; case SYMBOL_FLOAT: DEBUGPRINT(": %f\n", symbol->floater); break; case SYMBOL_VARIABLE: { char* s = byte_array_to_string(symbol->token->string); DEBUGPRINT(": '%s (%s)'\n", s, symbol->lhs ? "lhs":"rhs"); free(s); } break; case SYMBOL_STRING: { char* s = byte_array_to_string(symbol->token->string); DEBUGPRINT(": '%s'\n", s); free(s); } break; case SYMBOL_EXPRESSION: DEBUGPRINT(" %s\n", lexeme_to_string(symbol->token->lexeme)); break; case SYMBOL_TRYCATCH: DEBUGPRINT("try\n"); display_symbol(symbol->index, depth); DEBUGPRINT("catch %s\n", byte_array_to_string(symbol->token->string)); display_symbol(symbol->value, depth); break; default: DEBUGPRINT("\n"); depth++; display_symbol(symbol->index, depth); display_symbol(symbol->value, depth); depth--; break; } const struct array *list = symbol->list; if (list && list->length) { DEBUGPRINT("%s\tlist:\n", indent); depth++; for (int k=0; k<list->length; k++) { const struct symbol *item = array_get(list, k); display_symbol(item, depth); } } free(indent); } #endif #define LOOKAHEAD (lookahead(0)) #define FETCH_OR_QUIT(x) if (!fetch(x)) return NULL; #define OR_ERROR(x) { exit_message("missing %s at line %d", NUM_TO_STRING(lexemes, x), line); return NULL; } #define FETCH_OR_ERROR(x) if (!fetch(x)) OR_ERROR(x); enum Lexeme lookahead(int n) { if (parse_index + n >= parse_list->length) return LEX_NONE; struct token *token = (struct token*)parse_list->data[parse_index+n]; assert_message(token!=0, ERROR_NULL); return token->lexeme; } struct token *fetch(enum Lexeme lexeme) { if (parse_index >= parse_list->length) return NULL; struct token *token = (struct token*)parse_list->data[parse_index]; if (token->lexeme != lexeme) { //DEBUGPRINT("fetched %s instead of %s at %d\n", lexeme_to_string(token->lexeme), lexeme_to_string(lexeme), parse_index); return NULL; } //DEBUGPRINT("fetched %s at %d\n", lexeme_to_string(lexeme), parse_index); // display_token(token, 0); parse_index++; return token; } typedef struct symbol*(Parsnip)(); struct symbol *one_of(Parsnip *p, ...) { uint32_t start = parse_index; struct symbol *t = NULL; va_list argp; va_start(argp, p); for (; p && !(t=p()); p = va_arg(argp, Parsnip*)) parse_index=start; va_end(argp); return t; } struct token *fetch_lookahead(enum Lexeme lexeme, ...) { struct token *t=NULL; va_list argp; va_start(argp, lexeme); for (; lexeme; lexeme = (enum Lexeme)va_arg(argp, int)) { if (LOOKAHEAD == lexeme) { t = fetch(lexeme); break; } } va_end(argp); return t; } struct symbol *symbol_fetch(enum Nonterminal n, enum Lexeme goal, ...) { if (parse_index >= parse_list->length) return NULL; struct token *token = (struct token*)parse_list->data[parse_index]; assert_message(token!=0, ERROR_NULL); enum Lexeme lexeme = token->lexeme; struct symbol *symbol = NULL; va_list argp; va_start(argp, goal); for (; goal; goal = (enum Lexeme)va_arg(argp, int)) { if (lexeme == goal) { symbol = symbol_new(n); symbol->token = token; // DEBUGPRINT("fetched %s at %d\n", lexeme_to_string(lexeme), parse_index); // display_token(token, 0); parse_index++; break; } } va_end(argp); return symbol; } struct symbol *symbol_adds(struct symbol *s, struct symbol *child, ...) { va_list argp; va_start(argp, child); for (; child; child = va_arg(argp, struct symbol*)) array_add(s->list, child); va_end(argp); return s; } // <x>, --> ( <x> ( LEX_COMMA <x> )* )? // e.g. a list of zero or more <x>, separated by commas struct symbol *repeated(enum Nonterminal nonterminal, Parsnip *p) { struct symbol *r, *s = symbol_new(nonterminal); do { if (!(r=p())) break; symbol_add(s, r); } while (fetch(LEX_COMMA)); return s; } //////////////////////////////// BNFs // <destination> --> <variable> | ( <expression> <member> ) struct symbol *destination() { struct symbol *e = expression(); if (!e || (e->nonterminal != SYMBOL_MEMBER && e->nonterminal != SYMBOL_VARIABLE)) return NULL; e->lhs = true; return e; }; //<variable> --> LEX_IDENTIFIER struct symbol *variable() { return symbol_fetch(SYMBOL_VARIABLE, LEX_IDENTIFIER, NULL); } // <fdecl> --> FUNCTION LEX_LEFTHESIS <variable>, LEX_RIGHTHESIS <statements> LEX_END struct symbol *fdecl() { FETCH_OR_QUIT(LEX_FUNCTION) FETCH_OR_ERROR(LEX_LEFTHESIS); struct symbol *s = symbol_new(SYMBOL_FDECL); s->index = repeated(SYMBOL_DESTINATION, &destination); FETCH_OR_ERROR(LEX_RIGHTHESIS) s->value = statements(); //DEBUGPRINT("fdecl: %d statements\n", s->value->list->length); //DEBUGPRINT("lookahead=%s\n", NUM_TO_STRING(lexemes, LOOKAHEAD)); FETCH_OR_ERROR(LEX_END); return s; } // <element> --> <expression> ( LEX_COLON <expression> )? struct symbol *element() { struct symbol *e = expression(); if (fetch(LEX_COLON)) { // i.e. x:y struct symbol *p = symbol_new(SYMBOL_PAIR); p->index = e; p->value = expression(); return p; } else { // p->index = symbol_new(SYMBOL_NIL); return e; } } // <table> --> LEX_LEFTSQUARE <element>, LEX_RIGHTSQUARE struct symbol *table() { //return list(SYMBOL_TABLE, LEX_LEFTSQUARE, LEX_RIGHTSQUARE, LEX_COLON); FETCH_OR_QUIT(LEX_LEFTSQUARE); struct symbol *s = repeated(SYMBOL_TABLE, &element); FETCH_OR_ERROR(LEX_RIGHTSQUARE); return s; } struct symbol *integer() { // DEBUGPRINT("integer\n"); struct token *t = fetch(LEX_INTEGER); if (!t) return NULL; struct symbol *s = symbol_new(SYMBOL_INTEGER); s->token = t; return s; } struct symbol *boolean() { struct token *t = fetch_lookahead(LEX_TRUE, LEX_FALSE, NULL); if (!t) return NULL; struct symbol *s = symbol_new(SYMBOL_BOOLEAN); s->token = t; return s; } struct symbol *floater() { // DEBUGPRINT("floater\n"); struct token *t = fetch(LEX_INTEGER); if (!t) return NULL; FETCH_OR_QUIT(LEX_PERIOD); struct token *u = fetch(LEX_INTEGER); float decimal = u->number; while (decimal > 1) decimal /= 10; struct symbol *s = symbol_new(SYMBOL_FLOAT); s->floater = t->number + decimal; return s; } // <string> --> LEX_STRING struct symbol *string() { struct token *t = fetch(LEX_STRING); if (!t) return NULL; struct symbol *s = symbol_new(SYMBOL_STRING); s->token = t; return s; } // <atom> --> LEX_IDENTIFIER | <float> | <integer> | <boolean> | <table> | <comprehension> | <fdecl> struct symbol *atom() { return one_of(&variable, &string, &floater, &integer, &boolean, &comprehension, &table, &fdecl, NULL); } // <exp5> --> ( LEX_LEFTTHESIS <expression> LEX_RIGHTTHESIS ) | <atom> struct symbol *exp5() { if (fetch(LEX_LEFTHESIS)) { struct symbol *s = expression(); fetch(LEX_RIGHTHESIS); return s; } return atom(); } // <member> --> ( LEX_LEFTSQUARE <expression> LEX_RIGHTSQUARE ) | ( LEX_PERIOD LEX_STRING ) struct symbol *member() { struct symbol *m = symbol_new(SYMBOL_MEMBER); if (fetch(LEX_PERIOD)) { m->index = variable(); m->index->nonterminal = SYMBOL_STRING; } else { FETCH_OR_QUIT(LEX_LEFTSQUARE); m->index = expression(); FETCH_OR_QUIT(LEX_RIGHTSQUARE); } return m; } // <call> --> LEX_LEFTHESIS <source>, LEX_RIGHTHESIS struct symbol *call() { struct symbol *s = symbol_new(SYMBOL_FCALL); FETCH_OR_QUIT(LEX_LEFTHESIS); s->index = repeated(SYMBOL_SOURCE, &expression); // arguments FETCH_OR_ERROR(LEX_RIGHTHESIS); return s; } // <fcall> --> <expression> <call> struct symbol *fcall() { struct symbol *e = expression(); return (e && e->nonterminal == SYMBOL_FCALL) ? e : NULL; } // <exp4> --> <exp5> ( <call> | member )* struct symbol *exp4() { struct symbol *g, *f; f = exp5(); while (f && (g = one_of(&call, &member, NULL))) { g->value = f; f = g; } return f; } // <exp3> --> (NOT | LEX_MINUS)? <exp4> struct symbol *exp3() { struct symbol *e; if ((e = symbol_fetch(SYMBOL_EXPRESSION, LEX_MINUS, LEX_NOT, NULL))) return symbol_add(e, exp3()); return exp4(); } // <exp2> --> (<exp3> ( ( LEX_PLUS | LEX_MINUS | LEX_TIMES | LEX_DIVIDE | MODULO ))* <exp3> struct symbol *expTwo() { struct symbol *e, *f; e = exp3(); while ((f = symbol_fetch(SYMBOL_EXPRESSION, LEX_PLUS, LEX_MINUS, LEX_TIMES, LEX_DIVIDE, LEX_MODULO, NULL))) e = symbol_adds(f, e, exp3(), NULL); return e; } // <expression> --> <exp2> ( ( LEX_SAME | LEX_DIFFERENT | LEX_GREATER | LEX_LESSER ) <exp2> )? struct symbol *expression() { struct symbol *f, *e = expTwo(); while ((f = symbol_fetch(SYMBOL_EXPRESSION, LEX_SAME, LEX_DIFFERENT, LEX_GREATER, LEX_LESSER, NULL))) e = symbol_adds(f, e, expTwo(), NULL); return e; } // <assignment> --> <destination>, LEX_SET <source>, struct symbol *assignment() { struct symbol *d = repeated(SYMBOL_DESTINATION, &destination); FETCH_OR_QUIT(LEX_SET); struct symbol *s = symbol_new(SYMBOL_ASSIGNMENT); s->value = repeated(SYMBOL_SOURCE, expression); s->index = d; return s; } /* <ifthenelse> --> IF <expression> THEN <statements> (ELSE IF <expression> THEN <statements>)* (ELSE <statements>)? END */ struct symbol *ifthenelse() { FETCH_OR_QUIT(LEX_IF); struct symbol *f = symbol_new(SYMBOL_IF_THEN_ELSE); symbol_add(f, expression()); fetch(LEX_THEN); symbol_add(f, statements()); while (lookahead(0) == LEX_ELSE && lookahead(1) == LEX_IF) { fetch(LEX_ELSE); fetch(LEX_IF); symbol_add(f, expression()); fetch(LEX_THEN); symbol_add(f, statements()); } if (fetch_lookahead(LEX_ELSE)) symbol_add(f, statements()); fetch(LEX_END); return f; } // <loop> --> WHILE <expression> <statements> END struct symbol *loop() { FETCH_OR_QUIT(LEX_WHILE); struct symbol *s = symbol_new(SYMBOL_LOOP); s->index = expression(); s->value = statements(); FETCH_OR_ERROR(LEX_END); return s; } // <iterator> --> LEX_FOR LEX_IDENTIFIER LEX_IN <expression> ( LEX_WHERE <expression> )? struct symbol *iterator() { FETCH_OR_QUIT(LEX_FOR); const struct token *t = fetch(LEX_IDENTIFIER); struct symbol *s = symbol_new(SYMBOL_ITERATOR); s->token = t; FETCH_OR_ERROR(LEX_IN); s->value = expression(); if (fetch_lookahead(LEX_WHERE)) s->index = expression(); return s; } // <comprehension> --> LEX_LEFTSQUARE <expression> <iterator> LEX_RIGHTSQUARE struct symbol *comprehension() { // DEBUGPRINT("comprehension\n"); FETCH_OR_QUIT(LEX_LEFTSQUARE); struct symbol *s = symbol_new(SYMBOL_COMPREHENSION); s->value = expression(); s->index = iterator(); if (!s->index) return NULL; FETCH_OR_ERROR(LEX_RIGHTSQUARE); return s; } // <iterloop> --> <iterator> <statements> LEX_END struct symbol *iterloop() { struct symbol *i = iterator(); if (!i) return NULL; struct symbol *s = symbol_new(SYMBOL_ITERLOOP); s->index = i; s->value = statements(); FETCH_OR_ERROR(LEX_END); return s; } // <rejoinder> --> // LEX_RETURN <expression> struct symbol *rejoinder() { FETCH_OR_QUIT(LEX_RETURN); return repeated(SYMBOL_RETURN, &expression); // return values } // <trycatch> --> LEX_TRY <statements> LEX_CATCH <destination> <statements> LEX_END struct symbol *trycatch() { FETCH_OR_QUIT(LEX_TRY); struct symbol *s = symbol_new(SYMBOL_TRYCATCH); s->index = statements(); FETCH_OR_ERROR(LEX_CATCH); if (!(s->token = fetch(LEX_IDENTIFIER))) OR_ERROR(LEX_IDENTIFIER); s->lhs = true; s->value = statements(); FETCH_OR_ERROR(LEX_END); return s; } // <throw> --> LEX_THROW <expression> struct symbol *thrower() { FETCH_OR_QUIT(LEX_THROW); struct symbol *s = symbol_new(SYMBOL_THROW); s->value = expression(); return s; } struct symbol *strings_and_variables() { struct array *sav = array_new(); while ((array_add(sav, variable()) || array_add(sav, string()))); return (struct symbol*)sav; } // <statements> --> ( <assignment> | <fcall> | <ifthenelse> | <loop> | <rejoinder> | <iterloop> ) * struct symbol *statements() { struct symbol *s = symbol_new(SYMBOL_STATEMENTS); struct symbol *t; while ((t = one_of(&assignment, &fcall, &ifthenelse, &loop, &rejoinder, &iterloop, &trycatch, &thrower, NULL))) symbol_add(s, t); return s; } struct symbol *parse(struct array *list, uint32_t index) { DEBUGPRINT("parse:\n"); assert_message(list!=0, ERROR_NULL); assert_message(index<list->length, ERROR_INDEX); parse_list = list; parse_index = index; struct symbol *p = statements(); #ifdef DEBUG display_symbol(p, 1); #endif return p; } // generate //////////////////////////////////////////////////////////////// struct byte_array *generate_code(struct byte_array *code, struct symbol *root); void generate_step(struct byte_array *code, int count, int action,...) { byte_array_add_byte(code, action); va_list argp; uint8_t parameter; for(va_start(argp, action); --count;) { parameter = va_arg(argp, int); byte_array_add_byte(code, (uint8_t)parameter); } va_end(argp); } void generate_items(struct byte_array *code, const struct symbol* root) { const struct array *items = root->list; uint32_t num_items = items->length; for (int i=0; i<num_items; i++) { struct symbol *item = (struct symbol*)array_get(items, i); generate_code(code, item); } } void generate_items_then_op(struct byte_array *code, enum Opcode opcode, const struct symbol* root) { generate_items(code, root); generate_step(code, 1, opcode); serial_encode_int(code, 0, root->list->length); } void generate_statements(struct byte_array *code, struct symbol *root) { if (root) generate_items(code, root); } void generate_return(struct byte_array *code, struct symbol *root) { generate_items_then_op(code, VM_RET, root); } void generate_nil(struct byte_array *code, struct symbol *root) { generate_step(code, 1, VM_NIL); } static void generate_jump(struct byte_array *code, int32_t offset) { generate_step(code, 1, VM_JMP); serial_encode_int(code, 0, offset); } void generate_list(struct byte_array *code, struct symbol *root) { generate_items_then_op(code, VM_LST, root); } void generate_float(struct byte_array *code, struct symbol *root) { generate_step(code, 1, VM_FLT); serial_encode_float(code, 0, root->floater); } void generate_integer(struct byte_array *code, struct symbol *root) { generate_step(code, 1, VM_INT); serial_encode_int(code, 0, root->token->number); } void generate_string(struct byte_array *code, struct symbol *root) { generate_step(code, 1, VM_STR); serial_encode_string(code, 0, root->token->string); } void generate_source(struct byte_array *code, struct symbol *root) { generate_items_then_op(code, VM_SRC, root); } void generate_destination(struct byte_array *code, struct symbol *root) { generate_items(code, root); generate_step(code, 1, VM_DST); } void generate_assignment(struct byte_array *code, struct symbol *root) { generate_code(code, root->value); generate_code(code, root->index); } void generate_variable(struct byte_array *code, struct symbol *root) { generate_step(code, 1, root->lhs ? VM_SET : VM_VAR); serial_encode_string(code, 0, root->token->string); } void generate_boolean(struct byte_array *code, struct symbol *root) { uint32_t value = 0; switch (root->token->lexeme) { case LEX_TRUE: value = 1; break; case LEX_FALSE: value = 0; break; default: exit_message("bad boolean value"); return; } generate_step(code, 1, VM_BOOL); serial_encode_int(code, 0, value); } void generate_fdecl(struct byte_array *code, struct symbol *root) { struct byte_array *f = byte_array_new(); generate_code(f, root->index); generate_code(f, root->value); generate_step(code, 1, VM_FNC); serial_encode_string(code, 0, f); } void generate_pair(struct byte_array *code, struct symbol *root) { generate_code(code, root->index); generate_code(code, root->value); generate_step(code, 2, VM_MAP, 1); } void generate_member(struct byte_array *code, struct symbol *root) { generate_code(code, root->index); // array_get(root->branches, INDEX)); generate_code(code, root->value); // array_get(root->branches, ITERABLE)); enum Opcode op = root->lhs ? VM_PUT : VM_GET; generate_step(code, 1, op); } void generate_fcall(struct byte_array *code, struct symbol *root) { generate_code(code, root->index); // arguments if (root->value->nonterminal == SYMBOL_MEMBER) { generate_code(code, root->value->index); generate_code(code, root->value->value); generate_step(code, 1, VM_MET); } else { generate_code(code, root->value); // function generate_step(code, 1, VM_CAL); } } void generate_math(struct byte_array *code, struct symbol *root) { enum Lexeme lexeme = root->token->lexeme; generate_statements(code, root); enum Opcode op = VM_NIL; switch (lexeme) { case LEX_PLUS: op = VM_ADD; break; case LEX_MINUS: op = VM_SUB; break; case LEX_TIMES: op = VM_MUL; break; case LEX_DIVIDE: op = VM_DIV; break; case LEX_MODULO: op = VM_MOD; break; case LEX_AND: op = VM_AND; break; case LEX_OR: op = VM_OR; break; case LEX_NOT: op = VM_NOT; break; case LEX_SAME: op = VM_EQU; break; case LEX_DIFFERENT: op = VM_NEQ; break; case LEX_GREATER: op = VM_GTN; break; case LEX_LESSER: op = VM_LTN; break; default: exit_message("bad math lexeme"); break; } generate_step(code, 1, op); } void generate_ifthenelse(struct byte_array *code, struct symbol *root) { struct array *gotos = array_new(); // for skipping to end of elseifs, if one is true for (int i=0; i<root->list->length; i+=2) { // then struct symbol *thn = (struct symbol*)array_get(root->list, i+1); assert_message(thn->nonterminal == SYMBOL_STATEMENTS, "branch syntax error"); struct byte_array *thn_code = byte_array_new(); generate_code(thn_code, thn); generate_jump(thn_code, 0); // jump to end // if struct symbol *iff = (struct symbol*)array_get(root->list, i); generate_code(code, iff); generate_step(code, 2, VM_IFF, thn_code->length); byte_array_append(code, thn_code); array_add(gotos, (void*)(VOID_INT)(code->length-1)); // else if (root->list->length > i+2) { struct symbol *els = (struct symbol*)array_get(root->list, i+2); if (els->nonterminal == SYMBOL_STATEMENTS) { assert_message(root->list->length == i+3, "else should be the last branch"); generate_code(code, els); break; } } } // go back and fill in where to jump to the end of if-then-elseif-else when done for (int j=0; j<gotos->length; j++) { VOID_INT g = (VOID_INT)array_get(gotos, j); code->data[g] = code->length - g; } } void generate_loop(struct byte_array *code, struct symbol *root) { struct byte_array *ifa = byte_array_new(); generate_code(ifa, root->index); struct byte_array *b = byte_array_new(); generate_code(b, root->value); struct byte_array *thn = byte_array_new(); generate_step(thn, 1, VM_IFF); serial_encode_int(thn, 0, b->length + 2); struct byte_array *while_a_do_b = byte_array_concatenate(4, code, ifa, thn, b); byte_array_append(code, while_a_do_b); int32_t loop_length = ifa->length + thn->length + b->length; generate_jump(code, -loop_length-1); } // <iterator> --> LEX_FOR LEX_IDENTIFIER LEX_IN <expression> ( LEX_WHERE <expression> )? void generate_iterator(struct byte_array *code, struct symbol *root, enum Opcode op) { struct symbol *ator = root->index; generate_code(code, ator->value); // IN b generate_step(code, 1, op); // iterator or comprehension serial_encode_string(code, 0, ator->token->string); // FOR a if (ator->index) { // WHERE c struct byte_array *where = byte_array_new(); generate_code(where, ator->index); serial_encode_string(code, 0, where); } else generate_nil(code, NULL); struct byte_array *what = byte_array_new(); generate_code(what, root->value); serial_encode_string(code, 0, what); // DO d } // <iterloop> --> <iterator> <statements> LEX_END void generate_iterloop(struct byte_array *code, struct symbol *root) { generate_iterator(code, root, VM_ITR); } // <comprehension> --> LEX_LEFTSQUARE <expression> <iterator> LEX_RIGHTSQUARE void generate_comprehension(struct byte_array *code, struct symbol *root) { generate_iterator(code, root, VM_COM); } // <trycatch> --> LEX_TRY <statements> LEX_CATCH <variable> <statements> LEX_END void generate_trycatch(struct byte_array *code, struct symbol *root) { struct byte_array *trial = generate_code(NULL, root->index); generate_step(code, 1, VM_TRY); serial_encode_string(code, 0, trial); serial_encode_string(code, 0, root->token->string); struct byte_array *catcher = generate_code(NULL, root->value); serial_encode_string(code, 0, catcher); } void generate_throw(struct byte_array *code, struct symbol *root) { generate_code(code, root->value); generate_step(code, 1, VM_TRO); } typedef void(generator)(struct byte_array*, struct symbol*); struct byte_array *generate_code(struct byte_array *code, struct symbol *root) { if (!root) return NULL; generator *g = NULL; //DEBUGPRINT("generate_code %s\n", nonterminals[root->nonterminal]); switch(root->nonterminal) { case SYMBOL_NIL: g = generate_nil; break; case SYMBOL_PAIR: g = generate_pair; break; case SYMBOL_LOOP: g = generate_loop; break; case SYMBOL_FDECL: g = generate_fdecl; break; case SYMBOL_TABLE: g = generate_list; break; case SYMBOL_FCALL: g = generate_fcall; break; case SYMBOL_FLOAT: g = generate_float; break; case SYMBOL_MEMBER: g = generate_member; break; case SYMBOL_RETURN: g = generate_return; break; case SYMBOL_SOURCE: g = generate_source; break; case SYMBOL_STRING: g = generate_string; break; case SYMBOL_INTEGER: g = generate_integer; break; case SYMBOL_BOOLEAN: g = generate_boolean; break; case SYMBOL_ITERLOOP: g = generate_iterloop; break; case SYMBOL_VARIABLE: g = generate_variable; break; case SYMBOL_STATEMENTS: g = generate_statements; break; case SYMBOL_ASSIGNMENT: g = generate_assignment; break; case SYMBOL_EXPRESSION: g = generate_math; break; case SYMBOL_DESTINATION: g = generate_destination; break; case SYMBOL_IF_THEN_ELSE: g = generate_ifthenelse; break; case SYMBOL_COMPREHENSION: g = generate_comprehension; break; case SYMBOL_TRYCATCH: g = generate_trycatch; break; case SYMBOL_THROW: g = generate_throw; break; default: return (struct byte_array*)exit_message(ERROR_TOKEN); } if (!code) code = byte_array_new(); if (g) g(code, root); return code; } struct byte_array *generate_program(struct symbol *root) { DEBUGPRINT("generate:\n"); struct byte_array *code = byte_array_new(); generate_code(code, root); struct byte_array *program = serial_encode_int(0, 0, code->length); byte_array_append(program, code); #ifdef DEBUG display_program(program); #endif return program; } // build /////////////////////////////////////////////////////////////////// struct byte_array *build_string(const struct byte_array *input) { assert_message(input, ERROR_NULL); struct byte_array *input_copy = byte_array_copy(input); DEBUGPRINT("lex %d:\n", input_copy->length); struct array* list = lex(input_copy); struct symbol *tree = parse(list, 0); return generate_program(tree); } struct byte_array *build_file(const struct byte_array* filename) { struct byte_array *input = read_file(filename); return build_string(input); } void compile_file(const char* str) { struct byte_array *filename = byte_array_from_string(str); struct byte_array *program = build_file(filename); struct byte_array *dotfg = byte_array_from_string(EXTENSION_SRC); struct byte_array *dotfgbc = byte_array_from_string(EXTENSION_BC); int offset = byte_array_find(filename, dotfg, 0); assert_message(offset > 0, "invalid source file name"); byte_array_replace(filename, dotfgbc, offset, dotfg->length); write_file(filename, program); }