void OperatorAdd(TToken Decl, int Precedence, int Associativity, SYMBOLS List, const char* Pattern) { static int Initialized; /* We implicitly add the EOF token as an operator before any * other operators. It's an appropriate place to do so, since * $ is more or less the highest precedence operator (and higher * precedence is associated with operators defined earlier in * the BLACC input). */ if(!Initialized) { TToken Operand = TokenFake(TK_IDENT, "<id>"); TSymbol* EofSym; TSymbol* OperandSym; SYMBOLS EofList = NULL; SYMBOLS OperandList = NULL; EofSym = SymbolFind(EOFToken); assert(EofSym != NULL); /* first, add the $ operator (our logical EOF) */ EofList = SymbolListAdd(EofList, SymbolFind(EOFToken)); OperatorAdd_(EOFToken, 99999, TK_NONASSOC, EofList, "."); /* second, add the operand "operator" */ OperandSym = SymbolAdd(Operand, SYM_TERMINAL); OperandList = SymbolListAdd(OperandList, OperandSym); OperatorAdd_(Operand, 0, TK_NONASSOC, OperandList, "."); Initialized = TRUE; } OperatorAdd_(Decl, Precedence, Associativity, List, Pattern); }
/* AcceptLhs() - accept the left-hand side of a production. */ static TSymbol* AcceptLhs(TToken Token) { TSymbol* Symbol; Symbol = SymbolFind(Token); if(Symbol) /* if this symbol already seen */ { if(SymbolGetBit(Symbol, SYM_TERMINAL)) { SyntaxError(ERROR_TERM_USED_AS_NONTERM, Token, "Expecting a non-terminal to start a production, " "but '%.*s' was previously defined as a terminal.\n", Symbol->Name.TextLen, Symbol->Name.Text); } else { assert(SymbolGetBit(Symbol, SYM_TERMINAL) == FALSE); if(Symbol->Lhs.Text && Globals.Verbose) { /* ??? report line previously defined on! */ SyntaxError(ERROR_NONE, Token, "Non-terminal previously defined. Legal, but mildly unusual.\n"); } } } else { Symbol = SymbolNewNonTerm(Token); Symbol->Lhs = Token; } return Symbol; }
/* ParseToken() - parse a token declaration. * * %token (ident literal?)+ \n */ static void ParseToken(TToken Token) { TSymbol* NewTerminal = NULL; Token = InputPeekNext(); if(Token.Type != TK_IDENT && Token.Type != TK_QUOTED) SyntaxError(ERROR_MISSING_NAME_IN_TOKEN_DECL, Token, "Expecting identifier after '%token'.\n"); while(Token.Type == TK_QUOTED || Token.Type == TK_IDENT) { if(Token.Type == TK_QUOTED && NewTerminal == NULL) SyntaxError(ERROR_MISSING_NAME_IN_TOKEN_DECL, Token, "Missing token name in front of quoted string.\n"); if(Token.Type == TK_IDENT) { NewTerminal = SymbolFind(Token); if(NewTerminal) { SyntaxError(ERROR_TERM_ALREADY_DEFINED, Token, "'%.*s' was previously defined.\n", Token.TextLen, Token.Text); } else NewTerminal = SymbolNewTerm(Token); } else if(Token.Type == TK_QUOTED) { TSymbol* PreviousDef; PreviousDef = LiteralFind(Token); //??? Does this really work? Are we guaranteed that context of a token name will //??? be the line it was first defined on? if(PreviousDef) { TContext Previous; Previous = InputGetContext(PreviousDef->Name.Text); assert(Previous.LineNumber != 0); SyntaxError(ERROR_LITERAL_ALREADY_DEFINED, Token, "This literal was previously assigned a token name of '%.*s' on line %d:\n%.*s\n", PreviousDef->Name.TextLen, PreviousDef->Name.Text, Previous.LineNumber, Previous.LineLen, Previous.LineStart); } LiteralAdd(NewTerminal, Token); NewTerminal = NULL; } InputGetNext(); /* eat token we already peeked at */ Token = InputPeekNext(); } }
/* AcceptRhs() - accept a right-hand-side production item. * * On the right-hand side of a production, we just found either an * identifier or a quoted string. If it's an identifier, then it's * either a symbolic name for a terminal, or a non-terminal. If it's * a quoted string, then it's an unnamed terminal. * * We return the TSymbol associated with Token (created or found). */ static TSymbol* AcceptRhs(TToken LhsToken, TToken Token, TRule* Rule) { TSymbol* Symbol = NULL; if(Token.Type == TK_IDENT || Token.Type == TK_QUOTED) { Symbol = SymbolFind(Token); if(Symbol == NULL) /* if unknown identifier, presume non-terminal */ { if(Token.Type == TK_IDENT) Symbol = SymbolNewNonTerm(Token); else if(Token.Type == TK_QUOTED) { int Value = IsCharLiteral(Token); if(Value >= 0) { Symbol = SymbolNewTerm(Token); SymbolSetValue(Symbol, Value); } else { Symbol = LiteralFind(Token); if(Symbol == NULL) { TToken Clean = Unquote(Token); SyntaxError(ERROR_UNDECL_LITERAL_IN_PROD, Token, "Literal '%.*s' in production of '%.*s' should be " "defined with a %%token declaration.\n", Clean.TextLen, Clean.Text, LhsToken.TextLen, LhsToken.Text); } } } else assert(FALSE); } } else assert(FALSE); RuleAddSymbol(Rule, Symbol, Token); Symbol->UsedInRule = TRUE; return Symbol; }
/* ParseOperand() - parse operand declaration * * %operand ident* */ static void ParseOperand(TToken Token) { TSymbol* NewTerminal = NULL; Token = InputPeekNext(); while(Token.Type == TK_IDENT) { NewTerminal = SymbolFind(Token); if(NewTerminal == NULL) NewTerminal = SymbolNewTerm(Token); NewTerminal->Operand = TRUE; InputGetNext(); /* eat token we already peeked at */ Token = InputPeekNext(); } }
TSymbol* SymbolNewTerm(TToken Token) { TSymbol* Result; if(SymbolFind(Token)) ErrorExit(ERROR_TERM_ALREADY_DEFINED, "Terminal already defined: '%.*s'\n", Token.TextLen, Token.Text); Result = SymbolAdd(Token, SYM_TERMINAL); assert(Result != NULL); Globals.Terminals = SymbolListAdd(Globals.Terminals, Result); /* Bit of a hack, but we'll set ->First here. * the FIRST() set of a terminal is just the terminal */ Result->First = SymbolListAdd(Result->First, Result); return Result; }
/* SkipTest() - skip over a %test declaration. * * We don't do anything with test data during the initial parsing phase, * so this function just skips over a %test declaration, assuming it is * syntactically correct. */ static void SkipTest(TToken TestStart) { TSymbol* Symbol; TToken Token; // Token = InputGetLine(); // assert(Token.Type == TK_LINE); Token = InputGetNext(); while(Token.Type != TK_TEST) { switch(Token.Type) { case TK_EOF : SyntaxError(ERROR_EOF_IN_TEST, TestStart, "Found no terminating '%test' for this test."); break; case TK_IDENT : case TK_QUOTED : if(Token.Type == TK_IDENT) Symbol = SymbolFind(Token); else Symbol = LiteralFind(Token); if(!Symbol) SyntaxError(ERROR_UNDEF_SYMBOL_IN_TEST, Token, "Undefined symbol inside %%test."); else if(SymbolGetBit(Symbol, SYM_TERMINAL) == FALSE) SyntaxError(ERROR_NONTERM_IN_TEST, Token, "Only terminal symbols allowed in %%test data."); break; default: SyntaxError(ERROR_BAD_TOKEN_IN_TEST, Token, "Expecting terminal (literal or symbolic) inside %%test."); } Token = InputGetNext(); } }
/* OperatorToSymbol() - from a token representing an operator, produce a TSymbol * * An operator declaration might look like this: * %left X++ * When we encounter the '++', if it's not already been defined as * a token literal like this: * %token TK_SOMETHING_OR_OTHER '++' * we will look up the corresponding token. We will also implicitly * create the token if necessary if it's a single-character literal. * Finally, if it's a multi-character literal and not already defined, * then that's a syntax error. */ static TSymbol* OperatorToSymbol(TToken Token) { TSymbol* Result; /* maybe it's an already defined literal */ Result = LiteralFind(Token); if(Result == NULL && Token.TextLen == 1) /* if it's not a defined literal */ { TToken OpToken = TokenFake(TK_QUOTED, "'%.*s'", Token.TextLen, Token.Text); Result = SymbolFind(OpToken); if(Result == NULL) { Result = SymbolNewTerm(OpToken); SymbolSetValue(Result, Token.Text[0]); } /* else, we wasted a few bytes of memory that won't get * freed up until InputDestroy() gets called. */ } return Result; }
TParseTables* GenerateParseTables(void) { size_t IP, IPStart; int NNonTerms, NSymbols; int iRule, iTerminal; SymIt NonTerm; TSymbol* Symbol; TParseTables* Tables; SYMBOLS LLNonTerms; int Reduced; int TerminalsEmitted; printf("GenerateParseTables() begins\n"); Tables = NEW(TParseTables); assert(Tables != NULL); Tables->LLNonTerms = LLNonTerms = GetLLNonTerms(); /* get list of only nonterminals we care about */ /* assign integers to any undefined tokens */ DefineTokens(Tables); IP = BLC_HDR_SIZE; /* skip over initial header containing 5 two-byte table sizes */ NNonTerms = SymbolListCount(LLNonTerms); /* generate terminal symbol table */ IPStart = IP; TerminalsEmitted = 0; for(iTerminal=Tables->MinTokenVal; iTerminal <= Tables->MaxTokenVal; ++iTerminal) { Symbol = SymbolFromValue(iTerminal); if(Symbol) { /* store token ID, followed by its null-terminated string */ IP = Store8(Tables, IP, iTerminal); IP = StoreStr(Tables, IP, SymbolStr(Symbol)); ++TerminalsEmitted; } } IP = Store8(Tables, IP, 0); /* sentinel byte */ Store16(Tables, BLC_HDR_TERMSYMTAB_SIZE, IP - IPStart); /* generate nonterminal symbol table */ IPStart = IP; NonTerm = SymItNew(LLNonTerms); while(SymbolIterate(&NonTerm)) { IP = StoreStr(Tables, IP, SymbolStr(NonTerm.Symbol)); } IP = Store8(Tables, IP, 0); /* sentinel byte */ Store16(Tables, BLC_HDR_NONTERMSYMTAB_SIZE, IP - IPStart); /* for each production of <start>, add an entry point */ IPStart = IP; Symbol = SymbolStart(); assert(Symbol != NULL); for(iRule = 0; iRule < RuleCount(Symbol); ++iRule) { /* actual value will have to be backpatched */ MarkPatch(IP, AddRule(Tables, Symbol->Rules[iRule], Symbol)); IP = Store16(Tables, IP, 0); } Store16(Tables, BLC_HDR_ENTRYTABLE_SIZE, IP - IPStart); /* for each nonterminal, generate its SELECT body*/ IPStart = IP; Tables->SelSectOfs = IPStart; NonTerm = SymItNew(LLNonTerms); while(SymbolIterate(&NonTerm)) IP = GenerateSelect(Tables, IP, NonTerm.Symbol); Store16(Tables, BLC_HDR_SELECTTABLE_SIZE, IP - IPStart); Dump("Generate opcodes for each unique rule\n"); /* generate opcodes for each unique rule */ IPStart = IP; Tables->RuleSectOfs = IPStart; for(iRule = 0; iRule < Tables->NRules; ++iRule) { int iProdItem; TRule* Rule = Tables->Rules[iRule]; IntAdd(&Tables->RuleOffsets, IP); Reduced = FALSE; /* have not performed a reduction for this rule yet */ NSymbols = SymbolListCount(Rule->Symbols); fprintf(stdout, "-> "); SymbolListDump(stdout, Rule->Symbols, " "); fprintf(stdout, "\n"); for(iProdItem = 0; iProdItem < NSymbols; ++iProdItem) { TSymbol* Symbol = SymbolListGet(Rule->Symbols, iProdItem); if(SymbolGetBit(Symbol, SYM_TERMINAL)) { IP = Store8(Tables, IP, BLCOP_MATCH); IP = Store8(Tables, IP, Symbol->Value); if(Symbol == SymbolFind(EOFToken)) { IP = Store8(Tables, IP, BLCOP_HALT); Reduced = TRUE; } } else if(SymbolIsAction(Symbol)) { int FinalAction, ArgCount; assert(Rule->RuleId < 255); assert(iProdItem < 255); FinalAction = FALSE; if(iProdItem == NSymbols-1) FinalAction = TRUE; else if(Rule->TailRecursive && iProdItem == NSymbols-2) FinalAction = TRUE; ArgCount = iProdItem; if(Rule->TailRecursive == 2) ++ArgCount; fprintf(stdout, "iProdItem=%d, NSymbols=%d,TailRecursive=%d,FinalAction=%d\n", iProdItem, NSymbols, Rule->TailRecursive, FinalAction); IP = Store8(Tables, IP, FinalAction?BLCOP_ACTRED:BLCOP_ACTION8); IP = Store8(Tables, IP, Symbol->Action->Number); IP = Store8(Tables, IP, ArgCount); Symbol->Action->ArgCount = ArgCount; if(FinalAction) Reduced = TRUE; } else if(SymbolListContains(LLNonTerms, Symbol)) /* non-terminal */ { int iSymbol = SymbolListContains(LLNonTerms, Symbol); if(RuleCount(Symbol) > 1) { if(Symbol->Name.Text[0] == '`') /* if a tail recursive rule... */ { IP = Store8(Tables, IP, BLCOP_TAILSELECT); IP = Store8(Tables, IP, iProdItem); Reduced = TRUE; /* TAILSELECT opcode must do the reducing to shuffle stack correctly */ } else IP = Store8(Tables, IP, BLCOP_LLSELECT); IP = Store16(Tables, IP, Symbol->SelectOffset); } /* else, only 1 rule to choose from, so just transfer control to that rule! */ else { int iRule = AddRule(Tables, Symbol->Rules[0], Symbol); IP = Store8(Tables, IP, BLCOP_CALL); IP = MarkPatch(IP, iRule); } assert(iSymbol>0); /* ???TODO why is this???*/ } else // else, it's an operator trigger { assert(Symbol->LR0 != NULL); fprintf(stderr, "watch out: we don't handle LR(0) yet!\n"); } } if(!Reduced) { IP = Store8(Tables, IP, BLCOP_REDUCE); IP = Store8(Tables, IP, NSymbols); } } Store16(Tables, BLC_HDR_RULEOPCODE_SIZE, IP - IPStart); DumpVerbose("Backpatch opcode addresses.\n"); for(iRule = 0; iRule < Tables->NRules; ++iRule) BackPatch(Tables, iRule, Tables->RuleOffsets.v[iRule] - IPStart); DumpVerbose("Backpatching complete.\n"); assert(IP < (1024*64)); // Opcodes = realloc(Opcodes, IP * sizeof(Opcodes[0])); Tables->NOpcodes = IP; Dump("GenerateParseTables() returns after %d opcodes\n", Tables->NOpcodes); return Tables; }
static void convert_to_ExprToken(Token *token, ExprTokenVector *expr_vector) { Symbol *id = NULL; assert(token); // just in case temp_expr_token.type = TERM; temp_expr_token.handle_start = false; temp_expr_token.token = token; temp_expr_token.E.var_type = UNDEF_; temp_expr_token.E.data_type = UNDEF; switch(token->type) { case TT_identifier: id = SymbolFind(funcContext, token->str.data); if (id) // <loc_var> or <arg> or <function> { if (check_id_function(id)) token->type = TT_function; else { temp_expr_token.E.var_type = LOCAL; temp_expr_token.E.offset = id->index; temp_expr_token.E.data_type = (DataType) id->type; } } else { id = SymbolFind(mainContext, token->str.data); if (id) // <glob_var> or <function> { if (id->type == T_FunPointer) // function token->type = TT_function; else { temp_expr_token.E.var_type = GLOBAL; temp_expr_token.E.offset = id->index; temp_expr_token.E.data_type = (DataType) id->type; } } } if (id == NULL) { setError(ERR_UndefVarOrFunction); return; } break; case TT_real: temp_expr_token.E.var_type = CONST; temp_expr_token.E.data_type = DOUBLE; temp_expr_token.E.double_ = token->r; temp_expr_token.E.initialized = true; break; case TT_integer: temp_expr_token.E.var_type = CONST; temp_expr_token.E.data_type = INT; temp_expr_token.E.int_ = token->n; temp_expr_token.E.initialized = true; break; case TT_string: temp_expr_token.E.var_type = CONST; temp_expr_token.E.data_type = STRING; temp_expr_token.E.str = &(token->str); temp_expr_token.E.initialized = true; break; case TT_bool: temp_expr_token.E.var_type = CONST; temp_expr_token.E.data_type = BOOL; temp_expr_token.E.bool_ = (bool)token->n; temp_expr_token.E.initialized = true; break; case TT_minus: if (check_unary_minus(expr_vector)) token->type = TT_unaryMinus; break; default: // :-) break; } }
// 3, 4, 6, 8 .. tokens static inline void reduce_handle_function(THandle handle) { int num_of_commas = 0; ExprToken *temp = handle.first; if ((++temp)->token->type != TT_leftBrace) { setError(ERR_Reduction); return; } temp++; Symbol *id = SymbolFind(mainContext, handle.first->token->str.data); Context *context = id->funCont; return_value_data_type = (DataType) context->returnType; for (int32_t i = 0; i < context->argCount; i++) // check arguments { if (temp->E.data_type != context->arg[i]->type) { setError(ERR_TypeCompatibilityArg); return; } if (i != context->argCount - 1 && (++temp)->token->type != TT_comma) { setError(ERR_ArgCount); return; } temp++; } // check the end of handle if ((temp)->token->type != TT_rightBrace || temp != handle.last) { setError(ERR_Reduction); return; } // reserve place for return value b.var_type = CONST; // for pushing initialized flag b.initialized = false; a.offset = MY_OFFSET++; generateInstruction(PUSH, &a, &b, &c); // b = pushed operand if (id->index >= 0) // normal functions { for (int32_t i = 0; i < context->argCount; i++) // push arguments in reversed order { temp--; b = temp->E; b.initialized = true; a.offset = MY_OFFSET++; generateInstruction(PUSH, &a, &b, &c); // b = pushed operand, a = local dst temp--; } } else // built-in functions { for (int32_t i = 0; i < context->argCount; i++) // push arguments in reversed order { temp--; b = temp->E; b.initialized = true; a.offset = MY_OFFSET++; generateInstruction(PUSHX, &a, &b, &c); // b = pushed operand, a = local dst temp--; } } if (id->stateFunc == FS_Declared) int64_tVectorAppend(id->adressVector, tape->used); a.offset = id->index; c.offset = MY_OFFSET - 1; // generate CALL instruction switch (a.offset) { case -1: generateInstruction(CALL_LENGTH, &a, &b, &c); break; case -2: generateInstruction(CALL_COPY, &a, &b, &c); break; case -3: generateInstruction(CALL_FIND, &a, &b, &c); break; case -4: generateInstruction(CALL_SORT, &a, &b, &c); break; default: b.int_ = context->locCount; generateInstruction(CALL, &a, &b, &c); break; } // reducing tokenvector MY_OFFSET -= context->argCount; handle.first->handle_start = false; handle.first->type = NONTERM; handle.first->E.data_type = return_value_data_type; handle.first->E.var_type = LOCAL; handle.first->E.offset = MY_OFFSET - 1; if (context->argCount > 1) num_of_commas = context->argCount - 1; ExprTokenVectorPopMore(handle.expr_vector, context->argCount + 2 + num_of_commas); }