void Print(std::wostream& os, MMap& map, Grammar& g) { for (auto it = map.begin(); it != map.end(); it++) { os << L"[" << it->m_pNotTerminal->GetName() << L", " << it->m_pTerminal->GetName() << L"]"; //Print(os, it->m_pTerminal); os << L" : " ; const Production& production = g.GetProduction(it->m_RuleIndex); Print(os , production); os << std::endl; } }
//controi todos os first(A) first(a) da gramatica g FirstSets BuildFirstSets3(const Grammar& g) { FirstSets first; //if X is a terminal: FIRST (X) = X //X is epsilon: FIRST (X) = epsilon for (int k = 0 ; k < g.GetNumOfGrammarSymbols(); k++) { if (g.GetSymbol(k)->IsTerminal()) { std::set<const GrammarSymbol*> fs; fs.insert(g.GetSymbol(k)); first.AddSet(g.GetSymbol(k), std::move(fs)); } } bool added = true; while (added) { added = false; for (int i = 0 ; i < g.GetNumOfProductions(); i++) { const Production& production = g.GetProduction(i); const GrammarSymbol* X = production.GetLeftSymbol(); int epsilonCount = 0; for (int k = 0 ; k < production.GetNumOfRightSymbols(); k++) { // Put FIRST (Y1) - {epsilon} into FIRST (X) if (k == 0) { //Copia todos (-epsilon) de Y1first para Y1first(X) auto Y1first = first.CreateGet(production.GetRightSymbol(k)); if (Y1first.find(g.epsilon()) != Y1first.end()) { epsilonCount = 1; } for (auto it = Y1first.begin(); it != Y1first.end(); it++) { if (*it != g.epsilon()) { const GrammarSymbol* ptemp = *it; added = added || first.AddToSet(X, ptemp); } } continue; } // O anterior tinha epsilon? auto YKfirst = first.CreateGet(production.GetRightSymbol(k - 1)); if (YKfirst.find(g.epsilon()) != YKfirst.end()) { epsilonCount++; //Copia todos (-epsilon) para first(X) auto Y1first = first.CreateGet(production.GetRightSymbol(k)); for (auto it = Y1first.begin(); it != Y1first.end(); it++) { if (*it != g.epsilon()) { const GrammarSymbol* ptemp = *it; added = added || first.AddToSet(X, ptemp); } } } else { //se o anterior nao tinha epsilon ja pode parar break; } } // se todos eram epsilon entao adicionar epsilon TODO -1 if (epsilonCount != 0 && epsilonCount == production.GetNumOfRightSymbols()) { added = added || first.AddToSet(X, g.epsilon()); } } } return first; }
MMap BuildMTable(const FirstSets& first, const FollowSets& follow, Grammar& g) { MMap M; std::wcout << L"\n Building M table \n"; for (int k = 0; k < g.GetNumOfProductions(); k++) { //Para cada producao da gramatica A = alfa const Production& production = g.GetProduction(k); Print(std::wcout, production); //Aqui o first tem que ser em relacao a "alfa" e não a "A" // pois o A engloba todos os "firsts" e aqui se quer o first especifico // desta producao // Entao o FirstSets& first é o "pior caso" o first de qualquer "A" // No aho novo parece que tem um defeito de escrita e que o first deveria // ser first alfa no dois (esta so no segundo) //Nao testei se o follow teria que ter algo assim std::set<const GrammarSymbol*> f = GetFirstSet(first, g, production); //Regra 1 //auto f = first.Get(production.GetLeftSymbol()); for (auto it = f.begin(); it != f.end(); ++it) { // Para cada terminal a em primeiro(A) const GrammarSymbol* pgs = (*it); if (pgs->IsTerminal() && pgs != g.epsilon()) { //M[A, a] = alfa std::wcout << L"[" << production.GetLeftSymbol()->GetName() << L"," << pgs->GetName() << L"] = " ; Print(std::wcout, production); /*if (M.find(MKey(production.GetLeftSymbol(), pgs)) != M.end()) { std::wcout << L"<-- duplicated" << std::endl; throw std::exception("multiple entries"); }*/ auto MTableIt = M.find(MKey(production.GetLeftSymbol(), pgs, k)); if (MTableIt != M.end()) { if (MTableIt->m_pNotTerminal->GetName() != production.GetLeftSymbol()->GetName()) { //if (MTableIt->) //M.insert(MKey(production.GetLeftSymbol(), pgs, k)); std::string strError; strError = "Multiple entries "; strError += to_utf8_string(production.GetLeftSymbol()->GetName()); strError += " -> "; strError += to_utf8_string(pgs->GetName()); throw std::exception(strError.c_str()); } else { //ja existe a regra igual //std::string strError; //strError = "Multiple entries "; //strError += to_utf8_string(production.GetLeftSymbol()->GetName()); //strError += " -> "; //strError += to_utf8_string(pgs->GetName()); //throw std::exception(strError.c_str()); } } else { //criar a regra std::wcout << std::endl; M.insert(MKey(production.GetLeftSymbol(), pgs, k)); } //M[MKey(production.GetLeftSymbol(), pgs)] = k; } else if (pgs == g.epsilon()) { // Nao existe epsilon o input stream // entao vou fazer para cada follow auto fo = follow.Get(production.GetLeftSymbol()); //se esta em folow for (auto it = fo.begin(); it != fo.end(); ++it) { const GrammarSymbol* b = (*it); if (b->IsTerminal() && b != g.epsilon()) //ou $ que da no mesmo { std::wcout << L"[" << production.GetLeftSymbol()->GetName() << L"," << b->GetName() << L"] = " ; Print(std::wcout, production); auto MTableIt = M.find(MKey(production.GetLeftSymbol(), b, k)); if (MTableIt != M.end()) { if (MTableIt->m_pNotTerminal->GetName() != production.GetLeftSymbol()->GetName()) { std::wcout << L"<-- duplicated" << std::endl; throw std::exception("multiple entries"); } else { //std::wcout << L"<-- duplicated" << std::endl; //throw std::exception("multiple entries"); } } else { std::wcout << std::endl; M.insert(MKey(production.GetLeftSymbol(), b, k)); } //M[MKey(production.GetLeftSymbol(), b)] = k; } } } } } return M; }
//controi todos os follow(A) da gramatica g FollowSets BuildFolowSets3(Grammar& g, const FirstSets& first) { FollowSets follow; //Follow(start-symbol):= {$}; follow.AddToSet(g.GetStartSymbol(), g.endmarker()); //for all nonterminals A != start-symbol do Follow(A):={}; for (int i = 0 ; i < g.GetNumOfGrammarSymbols(); i++) { const GrammarSymbol* pgs = g.GetSymbol(i); if (!pgs->IsTerminal() && pgs != g.GetStartSymbol()) { follow.CreateGet(pgs); } } bool changed = true; while (changed) { changed = false; // Para cada producao A -> X1...Xn for (int k = 0 ; k < g.GetNumOfProductions(); k++) { const Production& A = g.GetProduction(k); Print(std::wcout, A); std::wcout << std::endl; for (int i = 0 ; i < A.GetNumOfRightSymbols(); i++) { //If there is a production A -> alfa B beta , then everything in FIRST(beta) except epsilon //is in FOLLOW(B). const GrammarSymbol* Xi = A.GetRightSymbol(i); //Para cada nao terminal Xi da producao if (!Xi->IsTerminal()) { //add First(Xi+1..Xn) - {epsilon} to Follow(Xi) std::set<const GrammarSymbol*> firstXi1_Xn; bool epsilon_is_in_firstXi1_Xn = false; //se Xi não é o ultimo if (i < (A.GetNumOfRightSymbols() - 1)) { firstXi1_Xn = GetFirstSet(first, g, A, i + 1); for (auto it = firstXi1_Xn.begin(); it != firstXi1_Xn.end(); ++it) { if (*it != g.epsilon()) { if (follow.AddToSet(Xi, *it)) { changed = true; } } else { epsilon_is_in_firstXi1_Xn = true; } } } // If there is a production A -> alfaB, or a production A -> alfa B beta , where // FIRST(beta) contains epsilon, then everything in FOLLOW (A) is in FOLLOW (B) . if (i == (A.GetNumOfRightSymbols() - 1) || epsilon_is_in_firstXi1_Xn) { //add Follow(A) to Follow(Xi) auto FollowA = follow.Get(A.GetLeftSymbol()); for (auto it = FollowA.begin(); it != FollowA.end(); ++it) { if (follow.AddToSet(Xi, *it)) { changed = true; } } } }//para este Xi } //para cada Xi } //para cada producao } //enquando mudar... return follow; }
static void GenerateDescRec3(std::wostream& os, Grammar& g, MMap& map, const std::wstring& tokenPrefix) { if (map.empty()) { return; } PrintGeneratedFileHeader(os); PrintGeneratedFileLicense(os); os << L"#pragma once\n"; os << L"\n"; os << L"#include <string.h>\n"; os << L"#include \"" << g.GetModuleName() << L"Lex.h\"\n"; os << L"\n"; os << L"\n"; //PrintOutputClass(os, g); PrintFowardDeclarations(os, g, map); int i = 0; int sub = 0; int currentRuleIndex = -1; auto it = map.begin(); for (; it != map.end();) { int currentRuleIndex = it->m_pNotTerminal->GetIndex(); //Faz todos desta regra (até ela mudar) os << TAB_1 << L"int " << GetFunctionName(g, it->m_pNotTerminal->GetName()) << L"( " << g.GetLanguageName() << L"_Context* ctx)\n"; os << TAB_1 << L"{\n"; int sub = 0; bool allsame = false; bool firstPrinted = false; if (PreAnalise(map, it, allsame) == 1) { //se so tem um nao precisa testar pelo token pois sempre haver um teste //a seguir de qualquer forma const Production& production = g.GetProduction(it->m_RuleIndex); os << TAB__2 << L"//"; Print(os, production); os << L"\n"; PrintProduction(os, production, g, tokenPrefix, TAB__2); it++; } else { while (it->m_pNotTerminal->GetIndex() == currentRuleIndex) { //se todos forem iguais nao testa pelo token //e so imprimi o primeiro que eh igual aos outros if (!allsame) { if (sub == 0) { os << TAB__2 << L"if (IsToken(ctx, " << tokenPrefix << it->m_pTerminal->GetName() << L"))\n"; } else { os << TAB__2 << L"else if (IsToken(ctx, " << tokenPrefix << it->m_pTerminal->GetName() << L"))\n"; } } const Production& production = g.GetProduction(it->m_RuleIndex); if (!allsame) { os << TAB__2 << L"{\n"; } if (!allsame || (allsame && !firstPrinted)) { os << (!allsame ? TAB___3 : TAB__2); os << L"/*"; Print(os, production); os << L"*/\n"; PrintProduction(os, production, g, tokenPrefix, !allsame ? TAB___3 : TAB__2); if (!allsame) { os << TAB__2 << L"}\n"; } } sub++; it++; if (it == map.end()) { break; } firstPrinted = true; } } os << TAB_1 << L"}\n\n"; } }
void GenerateDescRecC(std::wostream& os, Grammar& g, MMap& map, const std::wstring& tokenPrefix, const std::wstring& parserFileSuffix) { if (map.empty()) { return; } PrintGeneratedFileHeader(os); PrintGeneratedFileLicense(os); os << L"\n"; os << L"#include \"stdafx.h\"\n"; os << L"#include <assert.h>\n"; os << L"\n"; os << L"#include \"" << g.GetModuleName() << L"Lex.h\"\n"; os << L"#include \"" << g.GetModuleName() << parserFileSuffix << L".h\"\n"; os << L"\n"; os << L"\n"; //os << L"#include \"sstream.h\"\n"; //os << L"#include \"errors.h\"\n"; os << L"\n"; os << L"\n"; PrintActionsNames(os, g, false); //PrintActions(os, g, false); os << L"\n"; PrintFowardDeclarations(os, g, map); std::wstring ws(SourceCode); find_replace(ws, L"{GRAMMAR}", g.GetLanguageName()); find_replace(ws, L"{MODULE}", g.GetLanguageName()); os << ws; int i = 0; int currentRuleIndex = -1; auto it = map.begin(); int rulecount = 0; for (; it != map.end();) { int currentRuleIndex = it->m_pNotTerminal->GetIndex(); //Faz todos desta regra (até ela mudar) os << L"Result " << GetFunctionName(g, it->m_pNotTerminal->GetName()) << L"( " << g.GetLanguageName() + L"_Context* ctx)\n"; os << L"{\n"; os << TAB_1 << L"Result result = RESULT_OK;\n"; os << TAB_1 << L"" << g.GetLanguageName() << L"_Tokens token = ctx->token; \n"; os << L"\n"; int sub = 0; rulecount = 0; while (it->m_pNotTerminal->GetIndex() == currentRuleIndex) { int currentResultIndex = it->m_RuleIndex; //faz todos que resultam na mesma producao int count = 0; while (currentResultIndex == it->m_RuleIndex) { if (count == 0) { os << TAB_1; if (rulecount > 0) { os << L"else "; } os << L"if (token == " << tokenPrefix << it->m_pTerminal->GetName(); } else { os << L" ||\n"; os << TAB_1 << L" token == " << tokenPrefix << it->m_pTerminal->GetName(); } auto itcopy = it; it++; count++; if (it == map.end() || currentResultIndex != it->m_RuleIndex) { os << L")\n"; //fecha if const Production& production = g.GetProduction(itcopy->m_RuleIndex); os << TAB_1 << L"{\n"; os << TAB__2; os << L"/*"; Print(os, production); os << L"*/\n"; PrintProduction(os, production, g, tokenPrefix, TAB__2); os << TAB_1 << L"}\n"; break; } if (it == map.end()) { break; } } /* if (it != map.end()) { it++; } */ if (it == map.end()) { break; } rulecount++; } //regra os << TAB_1 << L"else\n"; os << TAB_1 << L"{\n"; os << TAB__2 << g.GetLanguageName() << L"_OnAction(ctx, " << g.GetLanguageName() << L"_OnError); \n"; os << TAB__2 << L"return RESULT_FAIL;\n"; os << TAB_1 << L"}\n"; os << L"\n"; os << TAB_1 << L"return result;\n"; os << L"}\n\n"; } }