diff --git a/docs/enum_prefixes.md b/docs/enum_prefixes.md index 0d3349a..89cf7e3 100644 --- a/docs/enum_prefixes.md +++ b/docs/enum_prefixes.md @@ -17,5 +17,6 @@ | ST_ | symbol.type | src/main/c/symbol_table.h | | STP_ | symbol_table.phase | src/main/c/symbol_table.h | | DI_ | parse_tree.decl.init | src/main/c/parse_tree.h | +| RC_ | return_condition | src/main/c/parse_tree.h | ------------------------------------------------------------------------------- ``` diff --git a/golden.sh b/golden.sh index a256bf7..c9ab067 100755 --- a/golden.sh +++ b/golden.sh @@ -80,7 +80,7 @@ indent() { local i - for i in $( seq 0 1 "$indent_count" ) + for i in $( seq 1 1 "$indent_count" ) do printf " |" done diff --git a/src/main/c/grammar.c b/src/main/c/grammar.c index 7f78fab..67f2c4c 100644 --- a/src/main/c/grammar.c +++ b/src/main/c/grammar.c @@ -197,11 +197,11 @@ RULE(GR_STMT_FUNCTION) { NT_STMT, { NONTERM(NT_FUNCTION), END } }; RULE(GR_FUNCTION) { NT_FUNCTION, { NONTERM(NT_TYPE), TERM(TT_IDENTIFIER), TERM(TT_L_PAREN), NONTERM(NT_PARAMS), TERM(TT_R_PAREN), - TERM(TT_TERMINATOR), NONTERM(NT_STMTS), TERM(TT_FC_END), + NONTERM(NT_LINETERMS), NONTERM(NT_STMTS), TERM(TT_FC_END), TERM(TT_IDENTIFIER), END } }; RULE(GR_FUNCTION_NOPARAMS) { NT_FUNCTION, { NONTERM(NT_TYPE), TERM(TT_IDENTIFIER), TERM(TT_L_PAREN), TERM(TT_R_PAREN), - TERM(TT_TERMINATOR), NONTERM(NT_STMTS), TERM(TT_FC_END), + NONTERM(NT_LINETERMS), NONTERM(NT_STMTS), TERM(TT_FC_END), TERM(TT_IDENTIFIER), END } }; RULE(GR_PARAMS_LIST) { NT_PARAMS, { NONTERM(NT_PARAM), TERM(TT_COMMA), NONTERM(NT_PARAMS), END } }; diff --git a/src/main/c/interpreter.c b/src/main/c/interpreter.c index 09630cf..e3851c7 100644 --- a/src/main/c/interpreter.c +++ b/src/main/c/interpreter.c @@ -147,30 +147,32 @@ */ static inline void interpret_nt_stmts(parse_tree const* ptree, - symbol_table* symtbl); + symbol_table* symtbl, value* returnVal); void interpret(parse_tree const* ptree) { // -> [end-of-file] - interpret_nt_stmts(&ptree->children[0], &interpreter_global); + interpret_nt_stmts(&ptree->children[0], &interpreter_global, NULL); } -static inline void interpret_nt_stmt(parse_tree const* ptree, - symbol_table* symtbl); +static inline bool interpret_nt_stmt(parse_tree const* ptree, + symbol_table* symtbl, value* returnVal); static inline void interpret_nt_stmts(parse_tree const* ptree, - symbol_table* symtbl) + symbol_table* symtbl, value* returnVal) { if(ptree->rule == GR_STMTS_LIST) { // -> - interpret_nt_stmt(&ptree->children[0], symtbl); - interpret_nt_stmts(&ptree->children[2], symtbl); + if(interpret_nt_stmt(&ptree->children[0], symtbl, returnVal)) + { + interpret_nt_stmts(&ptree->children[2], symtbl, returnVal); + } } else if(ptree->rule == GR_STMTS_SINGLE) { // -> - interpret_nt_stmt(&ptree->children[0], symtbl); + interpret_nt_stmt(&ptree->children[0], symtbl, returnVal); } } @@ -186,16 +188,21 @@ symbol_table* symtbl); static inline void interpret_nt_function(parse_tree const* ptree, symbol_table* symtbl); +static inline void interpret_nt_return_stmt(parse_tree const* ptree, + symbol_table* symtbl, value* val); -static inline void interpret_nt_stmt(parse_tree const* ptree, - symbol_table* symtbl) +static inline bool interpret_nt_stmt(parse_tree const* ptree, + symbol_table* symtbl, value* returnVal) { if(ptree->rule == GR_STMT_EXPRESSION) { // -> + + // even though we have a returnVal parameter, use a dummy var + // because this shouldn't modify the returnVal, which could be null. value v; interpret_nt_expression(&ptree->children[0], symtbl, &v); - (void) v; + value_uninit(&v); } else if(ptree->rule == GR_STMT_DECLARATION) { @@ -220,9 +227,17 @@ else if(ptree->rule == GR_STMT_FUNCTION) { // -> - error("Functions not supported"); + interpret_nt_function(&ptree->children[0], symtbl); + } + else if(ptree->rule == GR_STMT_RETURN) + { + // + interpret_nt_return_stmt(&ptree->children[0], symtbl, returnVal); + return false; } else { panic("Unexpected statement type"); } + + return true; } /** @@ -657,6 +672,7 @@ case GR_EXPR_EXPONENT_FALL: { // -> + assert_flc(val != NULL, ptree->token); interpret_nt_negate_expr(&ptree->children[0], symtbl, val); break; } @@ -715,6 +731,7 @@ case GR_EXPR_NEGATE_FALL: { // -> + assert_flc(val != NULL, ptree->token); interpret_nt_select_expr(&ptree->children[0], symtbl, val); break; } @@ -742,6 +759,7 @@ case GR_EXPR_SELECT_FALL: { // -> + assert_flc(val != NULL, ptree->token); interpret_nt_function_expr(&ptree->children[0], symtbl, val); break; } @@ -758,14 +776,77 @@ { switch(ptree->rule) { + case GR_EXPR_FUNCTION: + case GR_EXPR_FUNCTION_EMPTY: + { + symbol* func = symbol_table_get(symtbl, ptree->children[0].token); + assert(func != NULL); + + array_t(value) arg_vals = array_alloc(0, sizeof(value)); + parse_tree* args = (ptree->rule == GR_EXPR_FUNCTION_EMPTY) + ? NULL + : &ptree->children[2]; + + while(args != NULL) + { + value arg_val; + interpret_nt_expression(&args->children[0], symtbl, &arg_val); + arg_vals = array_push(arg_vals, &arg_val); + + args = (args->rule == GR_ARGS_SINGLE) + ? NULL + : &args->children[2]; + } + + assert(array_length(arg_vals) + == array_length(func->decl->semantic.func.parameters)); + + for(size_t i = 0; i < array_length(arg_vals); i++) + { + value_convert(&arg_vals[i], + &func->decl->semantic.func.parameters[i]); + } + + symbol_table child_symtbl; + symbol_table_init_interpret(&child_symtbl); + child_symtbl.parent = &interpreter_global; + + for(size_t i = 0; i < array_length(arg_vals); i++) + { + symbol sym; + symbol_init_i_variable( + &sym, func->decl->semantic.func.names[i], &arg_vals[i]); + symbol_table_put(&child_symtbl, &sym); + } + array_free(arg_vals); + + if(func->decl->semantic.func.returnType.type == DT_VOID) + { + value_init_void(val); + } + + size_t offset = (ptree->rule == GR_EXPR_FUNCTION_EMPTY) + ? 0 + : 1; + + assert_flc(val != NULL, ptree->token); + + interpret_nt_stmts( + &func->decl->children[offset + 5], &child_symtbl, val); + + symbol_table_uninit(&child_symtbl); + + break; + } case GR_EXPR_FUNCTION_FALL: { // -> + assert_flc(val != NULL, ptree->token); interpret_nt_parenthesis_expr(&ptree->children[0], symtbl, val); break; } default: - panic("Invalid parenthesis rule"); + panic("Invalid function rule"); } } @@ -786,6 +867,7 @@ case GR_EXPR_PARENTHESIS_FALL: { // -> + assert_flc(val != NULL, ptree->token); interpret_nt_leaf_expr(&ptree->children[0], symtbl, val); break; } @@ -804,7 +886,7 @@ // -> [identifier] symbol const* sym = symbol_table_get(symtbl, ptree->children[0].token); - assert(sym != NULL); + assert_flc(sym != NULL, ptree->token); assert(sym->type == ST_I_VARIABLE); value_init_copy(val, &sym->i_variable.variable); @@ -937,3 +1019,17 @@ value_uninit(&val); } +static inline void interpret_nt_function(parse_tree const* ptree, + symbol_table* symtbl) +{ + symbol func_sym; + symbol_init_function_interpret( + &func_sym, ptree->children[1].token, ptree); + symbol_table_put(symtbl, &func_sym); +} + +static inline void interpret_nt_return_stmt(parse_tree const* ptree, + symbol_table* symtbl, value* val) +{ + interpret_nt_expression(&ptree->children[1], symtbl, val); +} diff --git a/src/main/c/lexer.c b/src/main/c/lexer.c index 5622e99..72df6d3 100644 --- a/src/main/c/lexer.c +++ b/src/main/c/lexer.c @@ -51,12 +51,16 @@ /** The current state in the deterministic finite automata */ automata* state; + + /** If the lexer has encountered a lexing error */ + bool valid; } lex_context; static inline automata* lex_advance(const char c, token* const tkn, const automata* const atm); static inline void do_lexing(lex_context* lc); -static inline void lex_error(const token* tkn, const automata* atm); +static inline void lex_error(const token* tkn, const automata* atm, + lex_context* lc); static inline void automata_string_hack(automata* this, automata* hack, char s, char e, bool first) @@ -327,6 +331,53 @@ automata_string_hack(start, id, 'A', 'Z', true); automata_string_hack(start, id, '0', '9', false); + // Comments + { + automata_transition* curr = start->transitions; + while(curr->type != AT_SINGLE || curr->single.match != '/') + { + curr = curr->nextTransition; + } + + automata* st = curr->nextState; + + automata* cmt_start = malloc(sizeof(automata)); + automata_init(cmt_start); + cmt_start->accept = false; + cmt_start->acceptToken = TT_COMMENT; + + automata_insert_single(st, cmt_start, '*'); + + automata* cmt_cont = malloc(sizeof(automata)); + automata_init(cmt_cont); + cmt_cont->accept = false; + cmt_cont->acceptToken = TT_COMMENT; + + automata* cmt_2last = malloc(sizeof(automata)); + automata_init(cmt_2last); + cmt_2last->accept = false; + cmt_2last->acceptToken = TT_COMMENT; + + automata_insert_range(cmt_start, cmt_cont, '\t', ')'); + automata_insert_single(cmt_start, cmt_2last, '*'); + automata_insert_range(cmt_start, cmt_cont, '+', '~'); + + automata_insert_range(cmt_cont, cmt_cont, '\t', ')'); + automata_insert_single(cmt_cont, cmt_2last, '*'); + automata_insert_range(cmt_cont, cmt_cont, '+', '~'); + + automata* cmt_last = malloc(sizeof(automata)); + automata_init(cmt_last); + cmt_last->accept = true; + cmt_last->acceptToken = TT_COMMENT; + + automata_insert_range(cmt_2last, cmt_cont, '\t', ')'); + automata_insert_single(cmt_2last, cmt_2last, '*'); + automata_insert_range(cmt_2last, cmt_cont, '+', '.'); + automata_insert_single(cmt_2last, cmt_last, '/'); + automata_insert_range(cmt_2last, cmt_cont, '0', '~'); + } + LEXER_AUTOMATA = start; } @@ -352,6 +403,7 @@ lc.state = LEXER_AUTOMATA; lc.tokens = array_alloc(0, sizeof(token)); lc.tokenBuffPlace = 0; + lc.valid = true; // initialize the 1st token (others are initialized by do_lexing) token tkn1; @@ -378,7 +430,17 @@ eofTkn->text = ""; eofTkn->type = TT_EOF; - return lc.tokens; + if(lc.valid) + { + return lc.tokens; + } + else + { + token* curr = lc.tokens; + while (curr->type != TT_EOF) { free(curr->text); curr++; } + array_free(lc.tokens); + return NULL; + } } array_t(token) lex_file(char* fname) @@ -401,6 +463,7 @@ lc.state = LEXER_AUTOMATA; lc.tokens = array_alloc(0, sizeof(token)); lc.tokenBuffPlace = 0; + lc.valid = true; // initialize the 1st token (others are initialized by do_lexing) token tkn1; @@ -455,7 +518,18 @@ eofTkn->type = TT_EOF; fclose(ish); - return lc.tokens; + + if(lc.valid) + { + return lc.tokens; + } + else + { + token* curr = lc.tokens; + while (curr->type != TT_EOF) { free(curr->text); curr++; } + array_free(lc.tokens); + return NULL; + } } /** @@ -557,7 +631,8 @@ if(printError) { - lex_error(&lc->tokens[array_length(lc->tokens) - 2], currState); + lex_error( + &lc->tokens[array_length(lc->tokens) - 2], currState, lc); } } } @@ -583,11 +658,14 @@ return ret->nextState; } } + /** * Prints out the error message for an improperly scanned token. */ -static inline void lex_error(const token* tkn, const automata* atm) +static inline void lex_error(const token* tkn, const automata* atm, + lex_context* lc) { (void) atm; // suppress unused warning. TODO: print allowed characters??. error_flc(tkn, "Cannot parse token \"%s\"", tkn->text); + lc->valid = false; } diff --git a/src/main/c/lexer.h b/src/main/c/lexer.h index 79b8764..63d2b68 100644 --- a/src/main/c/lexer.h +++ b/src/main/c/lexer.h @@ -27,6 +27,8 @@ * Break a string into tokens. This is a convenience function for * ``lex_string(str, strlen(str), fname)``. * fname is used for writing errors. + * + * this is sugar for lex_string_len. */ array_t(token) lex_string(char* str, char* fname); @@ -34,12 +36,16 @@ * Break a string into tokens. str is the string to lex, strLen is how long * it is, as if ``strlen(str)`` had been called, and fname is how to identify * it in error messages. + * + * returns NULL on an error. */ array_t(token) lex_string_len(char* str, size_t strLen, char* fname); /** * Lex the contents of an entire file. * caller is responsible for freeing return value. + * + * returns NULL on error. */ array_t(token) lex_file(char* fname); diff --git a/src/main/c/main.c b/src/main/c/main.c index e51cb7c..cbb398f 100644 --- a/src/main/c/main.c +++ b/src/main/c/main.c @@ -43,7 +43,6 @@ static bool Show_Parse_Tree = false; static array_t(char*) Input_Files = NULL; -void print_tokens(array_t(token const) tokens); void print_parse_tree(parse_tree const * tree); FILE* LOG_FILE; @@ -67,24 +66,33 @@ { for(size_t i = 0; i < array_length(Input_Files); ++i) { - /* TODO: stop if a phase fails */ array_t(token) tkns = lex_file(Input_Files[i]); + if(tkns != NULL) + { + if(Show_Tokens) { print_tokens(tkns); } - if(Show_Tokens) { print_tokens(tkns); } + // Hack because leading line terminators is badly grammar. + while(tkns[0].type == TT_TERMINATOR) + { + array_shift(tkns); + } - parse_tree* ptree = parse_tokens(tkns); + parse_tree* ptree = parse_tokens(tkns); + if(ptree != NULL) + { + if(Show_Parse_Tree) { print_parse_tree(ptree); } - if(Show_Parse_Tree) { print_parse_tree(ptree); } + if(semantic_analyze(ptree)) { + interpret(ptree); + } - semantic_analyze(ptree); + parse_tree_destroy(ptree); + } - interpret(ptree); - - parse_tree_destroy(ptree); - - token* curr = tkns; - while (curr->type != TT_EOF) { free(curr->text); curr++; } - array_free(tkns); + token* curr = tkns; + while (curr->type != TT_EOF) { free(curr->text); curr++; } + array_free(tkns); + } } } else @@ -171,20 +179,6 @@ #endif } -void print_tokens(array_t(token const) tokens) -{ - int i = 0; - - token const * curr = tokens; - while(curr->type != TT_EOF) - { - info_flc(curr, "%d token(%s) \"%s\"", i, - token_type_to_cstr(curr->type), curr->text); - curr++; - i++; - } -} - void print_parse_tree_helper(parse_tree const* tree, int const depth) { assert(tree->token != NULL); diff --git a/src/main/c/parse_tree.c b/src/main/c/parse_tree.c index 666f25e..e945dda 100644 --- a/src/main/c/parse_tree.c +++ b/src/main/c/parse_tree.c @@ -59,9 +59,21 @@ { array_free(this->semantic.decl.init); } - else if(this->semantic.type == PTS_STMT) + else if(this->semantic.type == PTS_FLOW) { - datatype_uninit(&this->semantic.stmt.returnType); + datatype_uninit(&this->semantic.flow.returnType); + } + else if(this->semantic.type == PTS_FUNC) + { + datatype_uninit(&this->semantic.func.returnType); + for(size_t i = 0; i < array_length(this->semantic.func.parameters); + i++) + { + datatype_uninit(&this->semantic.func.parameters[i]); + } + + array_free(this->semantic.func.parameters); + array_free(this->semantic.func.names); } // Add a recursion stack on the heap diff --git a/src/main/c/parse_tree.h b/src/main/c/parse_tree.h index 788d561..51529d2 100644 --- a/src/main/c/parse_tree.h +++ b/src/main/c/parse_tree.h @@ -53,6 +53,13 @@ DI_INIT } decl_initialization; +typedef enum +{ + RC_YES, + RC_NO, + RC_COND +} return_condition; + struct parse_tree; typedef struct parse_tree parse_tree; struct parse_tree @@ -86,7 +93,8 @@ PTS_EXPR, PTS_DECL, PTS_TYPE, - PTS_STMT, + PTS_FLOW, + PTS_FUNC, PTS_UNINIT } type; @@ -104,10 +112,18 @@ { datatype dt; } datatype; - struct // stmt + struct // flow { + bool shouldReturn; datatype returnType; - } stmt; + return_condition retCond; + } flow; + struct // func + { + array_t(datatype) parameters; + array_t(token*) names; + datatype returnType; + } func; }; } semantic; }; diff --git a/src/main/c/parser.h b/src/main/c/parser.h index 196c82e..fa35466 100644 --- a/src/main/c/parser.h +++ b/src/main/c/parser.h @@ -296,9 +296,9 @@ //! @param module //! Target module of compilation. //! @return -//! #STBOOL_SUCCESS on success. +//! a parse_tree on success. //! @return -//! #STBOOL_FAILURE on failure. +//! NULL on failure. parse_tree* parse_tokens(array_t(token) tokens); /** diff --git a/src/main/c/semantic.c b/src/main/c/semantic.c index 24b1637..58eae00 100644 --- a/src/main/c/semantic.c +++ b/src/main/c/semantic.c @@ -104,54 +104,73 @@ assert(ptree->children[1].type == PTT_TERMINAL); assert(ptree->children[1].token->type == TT_EOF); - ptree->children[0].semantic.type = PTS_STMT; - datatype_init(&ptree->children[0].semantic.stmt.returnType); - ptree->children[0].semantic.stmt.returnType.type = DT_VOID; + ptree->children[0].semantic.type = PTS_FLOW; + datatype_init(&ptree->children[0].semantic.flow.returnType); + ptree->children[0].semantic.flow.returnType.type = DT_VOID; + ptree->children[0].semantic.flow.shouldReturn = false; - return semantic_nt_stmts(&ptree->children[0], &semantic_global); + return semantic_nt_stmts(&ptree->children[0], &semantic_global) + && ptree->children[0].semantic.flow.retCond == RC_NO; } static inline bool semantic_nt_stmt(parse_tree* ptree, symbol_table* symtbl); static inline bool semantic_nt_stmts(parse_tree* ptree, symbol_table* symtbl) { - assert(ptree->type == PTT_NONTERMINAL); - assert(ptree->nonterminal == NT_STMTS); + assert_nonterm(ptree, NT_STMTS); + + ptree->children[0].semantic.type = PTS_FLOW; + datatype_init_copy(&ptree->children[0].semantic.flow.returnType, + &ptree->semantic.flow.returnType); + ptree->children[0].semantic.flow.shouldReturn = + ptree->semantic.flow.shouldReturn; switch(ptree->rule) { case GR_STMTS_LIST: // -> - assert(ptree->children[0].type == PTT_NONTERMINAL); - assert(ptree->children[0].nonterminal == NT_STMT); - assert(ptree->children[1].type == PTT_NONTERMINAL); - assert(ptree->children[1].nonterminal == NT_LINETERMS); - assert(ptree->children[2].type == PTT_NONTERMINAL); - assert(ptree->children[2].nonterminal == NT_STMTS); + assert_nonterm(&ptree->children[0], NT_STMT); + assert_nonterm(&ptree->children[1], NT_LINETERMS); + assert_nonterm(&ptree->children[2], NT_STMTS); - ptree->children[2].semantic.type = PTS_STMT; - datatype_init_copy(&ptree->children[2].semantic.stmt.returnType, - &ptree->semantic.stmt.returnType); + ptree->children[2].semantic.type = PTS_FLOW; + datatype_init_copy(&ptree->children[2].semantic.flow.returnType, + &ptree->semantic.flow.returnType); + ptree->children[2].semantic.flow.shouldReturn = + ptree->semantic.flow.shouldReturn; - return semantic_nt_stmt(&ptree->children[0], symtbl) - && semantic_nt_stmts(&ptree->children[2], symtbl) - && (ptree->children[0].semantic.stmt.returnType.type == DT_VOID - || datatype_comparable( - &ptree->children[0].semantic.stmt.returnType, - &ptree->semantic.stmt.returnType)); + bool valid_list = semantic_nt_stmt(&ptree->children[0], symtbl) + && semantic_nt_stmts(&ptree->children[2], symtbl); + + if(ptree->children[0].semantic.flow.retCond == RC_YES) + { + warn_flc(ptree->children[2].token, + "Statements after return will not be executed."); + ptree->semantic.flow.retCond = RC_YES; + } + if(ptree->children[0].semantic.flow.retCond == RC_COND + && ptree->children[2].semantic.flow.retCond != RC_YES) + { + ptree->semantic.flow.retCond = RC_COND; + } + else + { + ptree->semantic.flow.retCond = + ptree->children[2].semantic.flow.retCond; + } + + return valid_list; case GR_STMTS_SINGLE: // -> - assert(ptree->children[0].type == PTT_NONTERMINAL); - assert(ptree->children[0].nonterminal == NT_STMT); - assert(ptree->children[1].type == PTT_NONTERMINAL); - assert(ptree->children[1].nonterminal == NT_LINETERMS); + assert_nonterm(&ptree->children[0], NT_STMT); + assert_nonterm(&ptree->children[1], NT_LINETERMS); - return semantic_nt_stmt(&ptree->children[0], symtbl) - && (ptree->children[0].semantic.stmt.returnType.type == DT_VOID - || datatype_comparable( - &ptree->children[0].semantic.stmt.returnType, - &ptree->semantic.stmt.returnType)); + bool valid_single = semantic_nt_stmt(&ptree->children[0], symtbl); + ptree->semantic.flow.retCond = + ptree->children[0].semantic.flow.retCond; + + return valid_single; default: panic("Invalid stmts rule"); @@ -166,51 +185,48 @@ static inline bool semantic_nt_declaration_assignment(parse_tree* ptree, symbol_table* symtbl); static inline bool semantic_nt_assignment(parse_tree* ptree, - symbol_table* symtbl); -static inline bool semantic_nt_print(parse_tree* ptree, - symbol_table* symtbl); + symbol_table* symtbl); +static inline bool semantic_nt_print(parse_tree* ptree, symbol_table* symtbl); static inline bool semantic_nt_function(parse_tree* ptree, symbol_table* symtbl); +static inline bool semantic_nt_return(parse_tree* ptree, symbol_table* symtbl); static inline bool semantic_nt_stmt(parse_tree* ptree, symbol_table* symtbl) { assert(ptree->type == PTT_NONTERMINAL); assert(ptree->nonterminal == NT_STMT); + ptree->semantic.flow.retCond = RC_NO; + switch(ptree->rule) { case GR_STMT_EXPRESSION: // -> - assert(ptree->children[0].type == PTT_NONTERMINAL); - assert(ptree->children[0].nonterminal == NT_EXPRESSION); + assert_nonterm(&ptree->children[0], NT_EXPRESSION); return semantic_nt_expression(&ptree->children[0], symtbl); case GR_STMT_DECLARATION: // -> - assert(ptree->children[0].type == PTT_NONTERMINAL); - assert(ptree->children[0].nonterminal == NT_DECLARATION); + assert_nonterm(&ptree->children[0], NT_DECLARATION); return semantic_nt_declaration(&ptree->children[0], symtbl); case GR_STMT_DECLARATION_ASSIGNMENT: // -> - assert(ptree->children[0].type == PTT_NONTERMINAL); - assert(ptree->children[0].nonterminal == NT_DECLARATION_ASSIGNMENT); + assert_nonterm(&ptree->children[0], NT_DECLARATION_ASSIGNMENT); return semantic_nt_declaration_assignment(&ptree->children[0], symtbl); case GR_STMT_ASSIGNMENT: // -> - assert(ptree->children[0].type == PTT_NONTERMINAL); - assert(ptree->children[0].nonterminal == NT_ASSIGNMENT); + assert_nonterm(&ptree->children[0], NT_ASSIGNMENT); return semantic_nt_assignment(&ptree->children[0], symtbl); case GR_STMT_PRINT: // -> - assert(ptree->children[0].type == PTT_NONTERMINAL); - assert(ptree->children[0].nonterminal == NT_PRINT); + assert_nonterm(&ptree->children[0], NT_PRINT); return semantic_nt_print(&ptree->children[0], symtbl); @@ -219,6 +235,22 @@ return semantic_nt_function(&ptree->children[0], symtbl); + case GR_STMT_RETURN: + assert_nonterm(&ptree->children[0], NT_RETURN); + + ptree->children[0].semantic.type = PTS_FLOW; + datatype_init_copy(&ptree->children[0].semantic.flow.returnType, + &ptree->semantic.flow.returnType); + ptree->children[0].semantic.flow.shouldReturn = + ptree->semantic.flow.shouldReturn; + + bool valid = semantic_nt_return(&ptree->children[0], symtbl); + + ptree->semantic.flow.retCond = + ptree->children[0].semantic.flow.retCond; + + return valid; + default: panic("Invalid stmt rule"); return false; @@ -237,10 +269,12 @@ assert_expr_fall(ptree, NT_OR_EXPR); + ptree->semantic.type = PTS_EXPR; bool success = semantic_nt_or_expr(&ptree->children[0], symtbl); datatype_init_copy(&ptree->semantic.expr.dt, &ptree->children[0].semantic.expr.dt); + return success; } @@ -927,6 +961,103 @@ ptree->semantic.type = PTS_EXPR; switch(ptree->rule) { + case GR_EXPR_FUNCTION: + case GR_EXPR_FUNCTION_EMPTY: + assert_term(&ptree->children[0], TT_IDENTIFIER); + symbol* func = symbol_table_get(symtbl, ptree->children[0].token); + + bool valid = true; + if(func == NULL) + { + error_flc(ptree->children[0].token, "No such function, \'%s\'", + ptree->children[0].token->text); + valid = false; + } + + parse_tree* args = (ptree->rule == GR_EXPR_FUNCTION_EMPTY) + ? NULL + : &ptree->children[2]; + + array_t(datatype) arg_types = array_alloc(0, sizeof(datatype)); + array_t(token*) arg_tkns = array_alloc(0, sizeof(token*)); + while(args != NULL) + { + assert_nonterm(args, NT_ARGS); + + parse_tree* arg = &args->children[0]; + assert_nonterm(arg, NT_EXPRESSION); + + if(!semantic_nt_expression(arg, symtbl)) + { + valid = false; + } + + assert(arg->semantic.type == PTS_EXPR); + datatype arg_type; + datatype_init_copy(&arg_type, &arg->semantic.expr.dt); + arg_types = array_push(arg_types, &arg_type); + arg_tkns = array_push(arg_tkns, &arg->token); + + args = (args->rule == GR_ARGS_SINGLE) + ? NULL + : &args->children[2]; + } + + if(func != NULL) + { + if(func->type != ST_FUNCTION) + { + error_flc(ptree->children[0].token, "\'%s\' is not a function", + ptree->children[0].token->text); + valid = false; + } + else + { + if(array_length(arg_types) + != array_length(func->decl->semantic.func.parameters)) + { + error_flc(ptree->children[0].token, + "%zu arguments provided to function \'%s\' but %zu are" + " required", + array_length(arg_types), + ptree->children[0].token->text, + array_length(func->decl->semantic.func.parameters)); + valid = false; + } + + size_t const min_len = (array_length(arg_types) + < array_length(func->decl->semantic.func.parameters)) + ? array_length(func->decl->semantic.func.parameters) + : array_length(arg_types); + + for(size_t i = 0; i < min_len; i++) + { + if(!datatype_comparable(&arg_types[i], + &func->decl->semantic.func.parameters[i])) + { + error_flc(arg_tkns[i], + "cannot convert argument %zu of function \'%s\'" + " from type \'%s\' to type \'%s\'", + i, ptree->children[0].token->text, + datatype_tostring(&arg_types[i]), + datatype_tostring( + &func->decl->semantic.func.parameters[i])); + valid = false; + } + } + } + } + + for(size_t i = 0; i < array_length(arg_types); i++) + { + datatype_uninit(&arg_types[i]); + } + + array_free(arg_types); + array_free(arg_tkns); + + return valid; + case GR_EXPR_FUNCTION_FALL: { // -> @@ -939,7 +1070,7 @@ return success; } default: - panic("Invalid function rule: not yet implemented"); + panic("Invalid function rule"); return false; } } @@ -1090,7 +1221,7 @@ { error_flc(ptree->token, "Redefinition of symbol \"%s\" is illegal.", ptree->children[1].token->text); - parse_tree* prevDef = + parse_tree const* prevDef = symbol_table_get(symtbl, ptree->children[1].token)->decl; if(prevDef != NULL) { @@ -1106,7 +1237,7 @@ { warn_flc(ptree->token, "Redefinition of symbol \"%s\" masks prior definition.", ptree->children[1].token->text); - parse_tree* prevDef = + parse_tree const* prevDef = symbol_table_get(symtbl, ptree->children[1].token)->decl; if(prevDef != NULL) { @@ -1152,7 +1283,7 @@ { error_flc(ptree->token, "Redefinition of symbol \"%s\" is illegal.", ptree->children[1].token->text); - parse_tree* prevDef = + parse_tree const* prevDef = symbol_table_get(symtbl, ptree->children[1].token)->decl; if(prevDef != NULL) { @@ -1168,7 +1299,7 @@ { warn_flc(ptree->token, "Redefinition of symbol \"%s\" masks prior definition.", ptree->children[1].token->text); - parse_tree* prevDef = + parse_tree const* prevDef = symbol_table_get(symtbl, ptree->children[1].token)->decl; if(prevDef != NULL) { @@ -1297,21 +1428,216 @@ symbol_table* symtbl) { assert_nonterm(ptree, NT_FUNCTION); + + if(symtbl != &semantic_global) + { + error_flc(ptree->token, + "Function declarations are only valid in the global scope."); + return false; + } + switch(ptree->rule) { - case GR_FUNCTION: - { - - - return true; - } + case GR_FUNCTION: /* fallthrough */ case GR_FUNCTION_NOPARAMS: { + bool valid = true; - return true; + assert_term(&ptree->children[1], TT_IDENTIFIER); + if(symbol_table_has(symtbl, ptree->children[1].token)) + { + error_flc(ptree->children[1].token, + "Function declaration redefines identifier \'%s\'", + ptree->children[1].token->text); + valid = false; + } + + datatype return_type; + if(semantic_nt_type(&ptree->children[0], symtbl)) + { + datatype_init_copy(&return_type, + &ptree->children[0].semantic.datatype.dt); + } + else + { + datatype_init(&return_type); + valid = false; + } + + array_t(datatype) parameters = array_alloc(0, sizeof(datatype)); + array_t(token*) names = array_alloc(0, sizeof(token*)); + + symbol_table child_symtbl; + symbol_table_init_semantic(&child_symtbl); + child_symtbl.parent = symtbl; + + parse_tree* params = NULL; + if(ptree->rule == GR_FUNCTION) + { + params = &ptree->children[3]; + } + + while(params != NULL) + { + assert_flc(params->rule == GR_PARAMS_LIST + || params->rule == GR_PARAMS_SINGLE, params->token); + + parse_tree* param = ¶ms->children[0]; + assert_flc(param->rule == GR_PARAM, param->token); + + datatype param_type1; + datatype param_type2; + if(semantic_nt_type(¶m->children[0], symtbl)) + { + datatype_init_copy(¶m_type1, + ¶m->children[0].semantic.datatype.dt); + datatype_init_copy(¶m_type2, + ¶m->children[0].semantic.datatype.dt); + } + else + { + valid = false; + datatype_init(¶m_type1); + datatype_init(¶m_type2); + } + + assert_term(¶m->children[1], TT_IDENTIFIER); + if(symbol_table_has(symtbl, param->children[1].token)) + { + warn_flc(param->children[1].token, + "Parameter masks identifier '%s'", + param->children[1].token->text); + } + + parameters = array_push(parameters, ¶m_type1); + names = array_push(names, ¶m->children[1].token); + + param->semantic.type = PTS_DECL; + param->semantic.decl.init = + array_alloc(1, sizeof(decl_initialization)); + param->semantic.decl.init[0] = DI_INIT; + + symbol sym; + symbol_init_s_variable(&sym, param->children[1].token, + ¶m_type2); + sym.decl = param; + + symbol_table_put(&child_symtbl, &sym); + + if(params->rule == GR_PARAMS_LIST) + { + params = ¶m->children[1]; + } + else + { + params = NULL; + } + } + + size_t offset = ptree->rule == GR_FUNCTION ? 1 : 0; + assert_nonterm(&ptree->children[offset + 5], NT_STMTS); + ptree->children[offset + 5].semantic.type = PTS_FLOW; + ptree->children[offset + 5].semantic.flow.shouldReturn = true; + datatype_init_copy( + &ptree->children[offset + 5].semantic.flow.returnType, + &return_type); + + + + if(!semantic_nt_stmts(&ptree->children[offset + 5], &child_symtbl)) + { + valid = false; + } + + if(strcmp(ptree->children[1].token->text, + ptree->children[offset + 7].token->text) != 0) + { + valid = false; + error_flc(ptree->children[offset + 7].token, + "Function end identifier does not match opening identifier"); + } + + if(DT_VOID != + ptree->children[offset + 5].semantic.flow.returnType.type) + { + return_condition rc = + ptree->children[offset + 5].semantic.flow.retCond; + if(rc == RC_COND) + { + warn_flc(ptree->children[offset + 6].token, + "Non-void function may return conditionally"); + } + else if(rc == RC_NO) + { + warn_flc(ptree->children[offset + 6].token, + "Non-void function does not return"); + valid = false; + } + } + + symbol_table_uninit(&child_symtbl); + + symbol func_sym; + symbol_init_function_semantic(&func_sym, ptree->children[1].token, + ptree, &return_type, parameters, names); + symbol_table_put(symtbl, &func_sym); + + return valid; } default: panic("unsupported function type"); } return false; } + +static inline bool semantic_nt_return(parse_tree* ptree, symbol_table* symtbl) +{ + assert_flc(ptree->semantic.type == PTS_FLOW, ptree->token); + + if(!ptree->semantic.flow.shouldReturn) + { + error_flc(ptree->token, "Cannot return outside of a function"); + return false; + } + + switch(ptree->rule) + { + case GR_RETURN_EXPR: + if(semantic_nt_expression(&ptree->children[1], symtbl)) + { + assert_flc(ptree->children[1].semantic.type == PTS_EXPR, + ptree->children[1].token); + if(!datatype_comparable(&ptree->semantic.flow.returnType, + &ptree->children[1].semantic.expr.dt)) + { + error_flc(ptree->children[1].token, + "Cannot convert expression type \'%s\' to return" + " type \'%s\'", + datatype_tostring(&ptree->children[1].semantic.expr.dt), + datatype_tostring(&ptree->semantic.flow.returnType)); + return false; + } + + ptree->semantic.flow.retCond = RC_YES; + return true; + } + + return false; + + case GR_RETURN_VOID: + if(ptree->semantic.flow.returnType.type != DT_VOID) + { + error_flc(ptree->token, + "Cannot return empty from function with type %s", + datatype_tostring(&ptree->semantic.flow.returnType)); + return false; + } + + ptree->semantic.flow.retCond = RC_YES; + return true; + + default: + panic("unsupported return statement"); + } + return false; +} diff --git a/src/main/c/symbol_table.c b/src/main/c/symbol_table.c index ab12467..4124685 100644 --- a/src/main/c/symbol_table.c +++ b/src/main/c/symbol_table.c @@ -55,6 +55,31 @@ this->i_variable.inited = false; } +void symbol_init_function_semantic(symbol* this, token const* const id, + parse_tree* ptree, datatype const* const return_type, + array_t(datatype) parameters, array_t(token*) names) +{ + assert_flc(id->type == TT_IDENTIFIER, id); + this->id = id; + this->type = ST_FUNCTION; + + ptree->semantic.func.parameters = parameters; + ptree->semantic.func.names = names; + datatype_init_copy(&ptree->semantic.func.returnType, return_type); + + this->decl = ptree; +} + +void symbol_init_function_interpret(symbol* this, token const* const id, + parse_tree const* ptree) +{ + assert_flc(id->type == TT_IDENTIFIER, id); + this->id = id; + this->type = ST_FUNCTION; + + this->decl = ptree; +} + void symbol_uninit(symbol* this) { switch(this->type) @@ -65,6 +90,9 @@ case ST_I_VARIABLE: value_uninit(&this->i_variable.variable); break; + case ST_FUNCTION: + // empty + break; default: panic("cannot uninitialize this symbol"); } diff --git a/src/main/c/symbol_table.h b/src/main/c/symbol_table.h index 60663db..8240f7f 100644 --- a/src/main/c/symbol_table.h +++ b/src/main/c/symbol_table.h @@ -42,7 +42,7 @@ ST_FUNCTION, } type; - parse_tree* decl; + parse_tree const* decl; union { @@ -55,11 +55,6 @@ value variable; bool inited; } i_variable; - struct - { - array_t(datatype) parameters; - datatype returnType; - } function; }; } symbol; @@ -69,7 +64,7 @@ * * @param this the symbol object. * @param id its identifier. - * @param val its value, this is copied. + * @param dt its type, this is copied. */ void symbol_init_s_variable(symbol* this, token const* const id, datatype const* const dt); @@ -96,8 +91,12 @@ void symbol_init_i_variable_uninit(symbol* this, token const* const id, datatype const* const dt); -void symbol_init_function(symbol* this, token const* const id, - parse_tree* ptree); +void symbol_init_function_semantic(symbol* this, token const* const id, + parse_tree* ptree, datatype const* const return_type, + array_t(datatype) parameters, array_t(token*) names); + +void symbol_init_function_interpret(symbol* this, token const* const id, + parse_tree const* ptree); /** * Uninitialize a symbol. diff --git a/src/main/c/token.c b/src/main/c/token.c index c60edfa..3facf61 100644 --- a/src/main/c/token.c +++ b/src/main/c/token.c @@ -137,3 +137,18 @@ panic("Invalid token type"); } } + +void print_tokens(array_t(token const) tokens) +{ + int i = 0; + + token const * curr = tokens; + while(curr->type != TT_EOF) + { + info_flc(curr, "%d token(%s) \"%s\"", i, + token_type_to_cstr(curr->type), curr->text); + curr++; + i++; + } +} + diff --git a/src/main/c/token.h b/src/main/c/token.h index e49783b..fc96410 100644 --- a/src/main/c/token.h +++ b/src/main/c/token.h @@ -17,6 +17,8 @@ #include +#include + typedef enum // token_type { // Not tokens, but necessary for the lexer @@ -106,4 +108,9 @@ */ const char * token_type_to_cstr(const token_type tt); +/** + * Print out a list of tokens. + */ +void print_tokens(array_t(token const) tokens); + #endif//__TOKEN_H__ diff --git a/src/main/c/value.c b/src/main/c/value.c index 3c24b5b..b3890b4 100644 --- a/src/main/c/value.c +++ b/src/main/c/value.c @@ -147,6 +147,12 @@ this->string_val.capacity = cap; } +void value_init_void(value* this) +{ + datatype_init(&this->dt); + this->dt.type = DT_VOID; +} + void value_init_copy(value* this, value const* const other) { datatype_init_copy(&this->dt, &other->dt); diff --git a/src/main/c/value.h b/src/main/c/value.h index cbec4ee..68f2b85 100644 --- a/src/main/c/value.h +++ b/src/main/c/value.h @@ -136,6 +136,13 @@ void value_init_string_empty(value* this, size_t cap); /** + * Initialize a value with void type. + * + * @param this the value object. + */ +void value_init_void(value* this); + +/** * Initialize a value as a copy of another value. * * @param this the value being initialized. diff --git a/src/test/golden/grammar/code_with_comments/expected.inr b/src/test/golden/grammar/code_with_comments/expected.inr new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/test/golden/grammar/code_with_comments/expected.inr diff --git a/src/test/golden/grammar/code_with_comments/expected.pxl b/src/test/golden/grammar/code_with_comments/expected.pxl new file mode 100644 index 0000000..be590ab --- /dev/null +++ b/src/test/golden/grammar/code_with_comments/expected.pxl @@ -0,0 +1,2 @@ +Hello, World! +0 diff --git a/src/test/golden/grammar/code_with_comments/input.pxl b/src/test/golden/grammar/code_with_comments/input.pxl new file mode 100644 index 0000000..a4c7fb0 --- /dev/null +++ b/src/test/golden/grammar/code_with_comments/input.pxl @@ -0,0 +1,29 @@ +/** + * this is a documentation comment + * + * @param there is a parameter + * @return there is return value + */ + +/* Here is some code */ +println "Hello, World!" +println 3*3 + 4*4 - 5*5 + +/* comment out some code */ +/* +println "hello world" +println 3*3 + 4*4 - 5*5 +*/ + +/******************/ + +/* a super uuugly comment */ +/* comment asdfasdf ASDFASDF hjklhjkl HJKLHJKL' + \\\\\ \n\r\t !@#$%^&*()!@#$%^&*() 12345678912345678900 + { } {{{ ]]][] + /* /* /******* + *** + *\\\\\\\ *****|||||\\\\\\***** /********** + ~!@#$%^&*(())_ + /* +*/ diff --git a/src/test/golden/grammar/code_with_extra_whitespace/expected.inr b/src/test/golden/grammar/code_with_extra_whitespace/expected.inr new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/test/golden/grammar/code_with_extra_whitespace/expected.inr diff --git a/src/test/golden/grammar/code_with_extra_whitespace/expected.pxl b/src/test/golden/grammar/code_with_extra_whitespace/expected.pxl new file mode 100644 index 0000000..f194aee --- /dev/null +++ b/src/test/golden/grammar/code_with_extra_whitespace/expected.pxl @@ -0,0 +1,3 @@ +Hello, World! +Hi +Hi diff --git a/src/test/golden/grammar/code_with_extra_whitespace/input.pxl b/src/test/golden/grammar/code_with_extra_whitespace/input.pxl new file mode 100644 index 0000000..98fd873 --- /dev/null +++ b/src/test/golden/grammar/code_with_extra_whitespace/input.pxl @@ -0,0 +1,8 @@ + + +println "Hello, World!" + +println "Hi" + + +println "Hi"