void unary () { if (try_match("!")) { //Recurse to allow chains of unary operations, LIFO order unary(); fputs("cmp eax, 0\n" "mov eax, 0\n" "sete al\n", output); } else if (try_match("-")) { unary(); fputs("neg eax\n", output); } else { //This function call compiles itself object(); if (see("++") || see("--")) { fprintf(output, "mov ebx, eax\n" "mov eax, [ebx]\n" "%s dword ptr [ebx], 1\n", see("++") ? "add" : "sub"); needs_lvalue("assignment operator '%s' requires a modifiable object\n"); next(); } } }
void line () { if (see("if")) if_branch(); else if (see("while") || see("do")) while_loop(); else if (see("int") || see("char") || see("bool")) decl(decl_local); else if (try_match("{")) { while (waiting_for("}")) line(); match("}"); } else { bool ret = try_match("return"); if (waiting_for(";")) expr(0); if (ret) fprintf(output, "jmp _%08d\n", return_to); match(";"); } }
void expr (int level) { if (level == 5) { unary(); return; } expr(level+1); while ( level == 4 ? see("+") || see("-") || see("*") : level == 3 ? see("==") || see("!=") || see("<") || see(">=") : false) { fputs("push eax\n", output); char* instr = see("+") ? "add" : see("-") ? "sub" : see("*") ? "imul" : see("==") ? "e" : see("!=") ? "ne" : see("<") ? "l" : "ge"; next(); expr(level+1); if (level == 4) fprintf(output, "mov ebx, eax\n" "pop eax\n" "%s eax, ebx\n", instr); else fprintf(output, "pop ebx\n" "cmp ebx, eax\n" "mov eax, 0\n" "set%s al\n", instr); } if (level == 2) while (see("||") || see("&&")) { int shortcircuit = new_label(); fprintf(output, "cmp eax, 0\n" "j%s _%08d\n", see("||") ? "nz" : "z", shortcircuit); next(); expr(level+1); fprintf(output, "\t_%08d:\n", shortcircuit); } if (level == 1 && try_match("?")) branch(true); if (level == 0 && try_match("=")) { fputs("push eax\n", output); needs_lvalue("assignment requires a modifiable object\n"); expr(level+1); fputs("pop ebx\n" "mov dword ptr [ebx], eax\n", output); } }
void object () { factor(); while (true) { if (try_match("(")) { fputs("push eax\n", output); int arg_no = 0; if (waiting_for(")")) { //cdecl requires arguments to be pushed on backwards int start_label = new_label(); int end_label = new_label(); int prev_label = end_label; fprintf(output, "jmp _%08d\n", start_label); do { int next_label = emit_label(new_label()); expr(0); fprintf(output, "push eax\n" "jmp _%08d\n", prev_label); arg_no++; prev_label = next_label; } while (try_match(",")); fprintf(output, "_%08d:\n", start_label); fprintf(output, "jmp _%08d\n", prev_label); fprintf(output, "_%08d:\n", end_label); } match(")"); fprintf(output, "call dword ptr [esp+%d]\n", arg_no*word_size); fprintf(output, "add esp, %d\n", (arg_no+1)*word_size); } else if (try_match("[")) { fputs("push eax\n", output); expr(0); match("]"); if (see("=") || see("++") || see("--")) lvalue = true; fprintf(output, "pop ebx\n" "%s eax, [eax*%d+ebx]\n", lvalue ? "lea" : "mov", word_size); } else return; } }
void while_loop () { int loop_to = emit_label(new_label()); int break_to = new_label(); bool do_while = try_match("do"); if (do_while) line(); match("while"); match("("); expr(0); match(")"); fprintf(output, "cmp eax, 0\n" "je _%08d\n", break_to); if (do_while) match(";"); else line(); fprintf(output, "jmp _%08d\n", loop_to); fprintf(output, "\t_%08d:\n", break_to); }
std_match_result const& matcher::next() { if( ! have_result_ ) try_match(); if( have_match_ ) { have_result_ = false; return match_; } else { throw std::runtime_error("no more matches"); } }
void factor () { lvalue = false; if (see("true") || see("false")) { fprintf(output, "mov eax, %d\n", see("true") ? 1 : 0); next(); } else if (token == token_ident) { int global = sym_lookup(globals, global_no, buffer); int local = sym_lookup(locals, local_no, buffer); require(global >= 0 || local >= 0, "no symbol '%s' declared\n"); next(); if (see("=") || see("++") || see("--")) lvalue = true; if (global >= 0) fprintf(output, "%s eax, [%s]\n", is_fn[global] || lvalue ? "lea" : "mov", globals[global]); else if (local >= 0) fprintf(output, "%s eax, [ebp%+d]\n", lvalue ? "lea" : "mov", offsets[local]); } else if (token == token_int || token == token_char) { fprintf(output, "mov eax, %s\n", buffer); next(); } else if (token == token_str) { fputs(".section .rodata\n", output); int str = emit_label(new_label()); //Consecutive string literals are concatenated while (token == token_str) { fprintf(output, ".ascii %s\n", buffer); next(); } fputs(".byte 0\n" ".section .text\n", output); fprintf(output, "mov eax, offset _%08d\n", str); } else if (try_match("(")) { expr(0); match(")"); } else error("expected an expression, found '%s'\n"); }
/** * FnApp = { Atom | ( "`" Atom "`" ) } * * A series of at least one expr-atom. If multiple, then the last is a * function to which the others are applied. Instead, one of them may * be explicitly marked in backticks as the function. */ static ast* parseFnApp (parserCtx* ctx) { /*Filled iff there is a backtick function*/ ast* fn = 0; vector(ast*) nodes = vectorInit(3, malloc); /*Require at least one expr*/ if (!see(ctx, "!")) vectorPush(&nodes, parseAtom(ctx)); while (waiting_for_delim(ctx)) { if (try_match(ctx, "!")) { if (fn) { error(ctx)("Multiple explicit functions: '%s'\n", ctx->current.buffer); vectorPush(&nodes, fn); } fn = parseAtom(ctx); } else vectorPush(&nodes, parseAtom(ctx)); } if (fn) return astCreateFnApp(nodes, fn); else if (nodes.length == 0) { /*Shouldn't happen due to the way it parses*/ errprintf("FnApp took no AST nodes"); return astCreateInvalid(); } else if (nodes.length == 1) { /*No application*/ ast* node = vectorPop(&nodes); vectorFree(&nodes); return node; } else { /*The last node is the fn*/ fn = vectorPop(&nodes); return astCreateFnApp(nodes, fn); } }
int do_match(const char *regex, const char *str, gboolean match_newline) { regex_t *re; int result; regex_errbuf errmsg; re = get_regex_from_cache(regex, &errmsg, match_newline); if (!re) error("regex \"%s\": %s", regex, errmsg); /*NOTREACHED*/ result = try_match(re, str, &errmsg); if (result == MATCH_ERROR) error("regex \"%s\": %s", regex, errmsg); /*NOTREACHED*/ return result; }
void branch (bool isexpr) { int false_branch = new_label(); int join = new_label(); fprintf(output, "cmp eax, 0\n" "je _%08d\n", false_branch); isexpr ? expr(1) : line(); fprintf(output, "jmp _%08d\n", join); fprintf(output, "\t_%08d:\n", false_branch); if (isexpr) { match(":"); expr(1); } else if (try_match("else")) line(); fprintf(output, "\t_%08d:\n", join); }
int match_tar(const char *glob, const char *str) { char *regex; regex_t *re; int result; regex_errbuf errmsg; regex = tar_to_regex(glob); re = get_regex_from_cache(regex, &errmsg, TRUE); if (!re) error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg); /*NOTREACHED*/ result = try_match(re, str, &errmsg); if (result == MATCH_ERROR) error("glob \"%s\" -> regex \"%s\": %s", glob, regex, errmsg); /*NOTREACHED*/ g_free(regex); return result; }
std::tuple<Args&...> operator=(std::tuple<UArgs...> const& other){ if(try_match(other, gen_seq<sizeof...(Args)>{})) _args = other; return _args; }
bool matcher::has_next() { if( !have_result_ ) try_match(); return have_match_; }
/** * BOP = Pipe * Pipe = Logical [{ "|" | "|>" Logical }] * Logical = Equality [{ "&&" | "||" Equality }] * Equality = Sum [{ "==" | "!=" | "<" | "<=" | ">" | ">=" Sum }] * Sum = Product [{ "+" | "-" | "++" Product }] * Product = Exit [{ "*" | "/" | "%" Exit }] * Exit = FnApp * * The grammar is ambiguous, but these operators are left associative, * which is to say: * x op y op z == (x op y) op z */ static ast* parseBOP (parserCtx* ctx, int level) { /* (1) Operator precedence parsing! As all the productions above are essentially the same, with different operators, we can parse them with the same function. Read the comments in numbered order, this was (1)*/ /* (5) Finally, once we reach the top level we escape the recursion into... */ if (level == 5) return parseFnApp(ctx); /* (2) The left hand side is the production one level up*/ ast* node = parseBOP(ctx, level+1); opKind op; /* (3) Accept operators associated with this level, and store which kind is found*/ while ( level == 0 ? (op = try_match(ctx, "|") ? opPipe : try_match(ctx, "|>") ? opWrite : opNull) : level == 1 ? (op = try_match(ctx, "&&") ? opLogicalAnd : try_match(ctx, "||") ? opLogicalOr : opNull) : level == 2 ? (op = try_match(ctx, "==") ? opEqual : try_match(ctx, "!=") ? opNotEqual : try_match(ctx, "<") ? opLess : try_match(ctx, "<=") ? opLessEqual : try_match(ctx, ">") ? opGreater : try_match(ctx, ">=") ? opGreaterEqual : opNull) : level == 3 ? (op = try_match(ctx, "+") ? opAdd : try_match(ctx, "-") ? opSubtract : try_match(ctx, "++") ? opConcat : opNull) : level == 4 ? (op = try_match(ctx, "*") ? opMultiply : try_match(ctx, "/") ? opDivide : try_match(ctx, "%") ? opModulo : opNull) : (op = opNull)) { /* (4) Bundle it up with an RHS, also the level up*/ ast* rhs = parseBOP(ctx, level+1); node = astCreateBOP(node, rhs, op); } return node; }
/** * Atom = ( "(" [ Expr [{ "," Expr }] ] ")" ) * | ( "[" [{ Expr }] "]" ) * | FnLit | Path | <Str> | <Symbol> */ static ast* parseAtom (parserCtx* ctx) { ast* node; if (try_match(ctx, "(")) { /*Empty brackets => unit literal*/ if (see(ctx, ")")) node = astCreateUnitLit(); else { node = parseExpr(ctx); /*Tuple literal*/ if (see(ctx, ",")) { vector(ast*) nodes = vectorInit(3, malloc); vectorPush(&nodes, node); while (try_match(ctx, ",")) vectorPush(&nodes, parseExpr(ctx)); node = astCreateTupleLit(nodes); } } match(ctx, ")"); /*List literal*/ } else if (try_match(ctx, "[")) { vector(ast*) nodes = vectorInit(4, malloc); if (waiting_for(ctx, "]")) do { vectorPush(&nodes, parseExpr(ctx)); } while (try_match(ctx, ",")); node = astCreateListLit(nodes); match(ctx, "]"); } else if (see(ctx, "\\")) { node = parseFnLit(ctx); } else if (see(ctx, "true") || see(ctx, "false")) { node = astCreateBoolLit(see(ctx, "true")); accept(ctx); } else if (see_kind(ctx, tokenIntLit)) { node = astCreateIntLit(atoi(ctx->current.buffer)); accept(ctx); } else if (see_kind(ctx, tokenStrLit)) { node = astCreateStrLit(ctx->current.buffer); accept(ctx); } else if (see_kind(ctx, tokenNormal)) { sym* symbol; if (isPathToken(ctx->current.buffer)) node = parsePath(ctx); else if ((symbol = symLookup(ctx->scope, ctx->current.buffer))) node = astCreateSymbol(symbol); else node = astCreateFileLit(ctx->current.buffer); accept(ctx); } else { expected(ctx, "expression"); node = astCreateInvalid(); } return node; }
void decl (int kind) { //A C declaration comes in three forms: // - Local decls, which end in a semicolon and can have an initializer. // - Parameter decls, which do not and cannot. // - Module decls, which end in a semicolon unless there is a function body. bool fn = false; bool fn_impl = false; int local; next(); while (try_match("*")) ; //Owned (freed) by the symbol table char* ident = strdup(buffer); next(); //Functions if (try_match("(")) { if (kind == decl_module) new_scope(); //Params if (waiting_for(")")) do { decl(decl_param); } while (try_match(",")); match(")"); new_fn(ident); fn = true; //Body if (see("{")) { require(kind == decl_module, "a function implementation is illegal here\n"); fn_impl = true; function(ident); } //Add it to the symbol table } else { if (kind == decl_local) { local = new_local(ident); } else (kind == decl_module ? new_global : new_param)(ident); } //Initialization if (see("=")) require(!fn && kind != decl_param, fn ? "cannot initialize a function\n" : "cannot initialize a parameter\n"); if (kind == decl_module) { fputs(".section .data\n", output); if (try_match("=")) { if (token == token_int) fprintf(output, "%s: .quad %d\n", ident, atoi(buffer)); else error("expected a constant expression, found '%s'\n"); next(); //Static data defaults to zero if no initializer } else if (!fn) fprintf(output, "%s: .quad 0\n", ident); fputs(".section .text\n", output); } else if (try_match("=")) { expr(0); fprintf(output, "mov dword ptr [ebp%+d], eax\n", offsets[local]); } if (!fn_impl && kind != decl_param) match(";"); }