void CFG::constructCanonicalLR1Collection() { if(!augmented) { augmentGrammar(); } canonicalLR1Collection.clear(); canonicalLR1Collection = vector<vector<LR1Item>>( { closure( vector<LR1Item>( { LR1Item( find_if( p.begin(), p.end(), [this](const Production& pr) { return pr.left == s; } ) - p.begin(), 0, "$" ) }) ) }); bool updated = false; vector<string> symbols = v; symbols.insert(symbols.end(), t.begin(), t.end()); do { updated = false; for(size_t i = 0; i < canonicalLR1Collection.size(); ++i) { for(size_t j = 0; j < symbols.size(); ++j) { vector<LR1Item> goToIX = goTo(canonicalLR1Collection[i], symbols[j]); if(goToIX.size() != 0 && !in(goToIX, canonicalLR1Collection)) { canonicalLR1Collection.push_back(goToIX); updated = true; } } } }while(updated); }
static void closureLR1Item(LR1Item item, LR1ItemCollection& col) { if (!col.insert(item).second) return; auto& body = getProductBody(item.productID); if (item.pos >= body.size()) return; if (!isNonTerm(body[item.pos])) return; set<int> first; { vector<int> seq(body.begin() + item.pos + 1, body.end()); seq.push_back(item.term); first = SyntaxSymbolAttributeCache::instance()->getFirst(&seq[0], &seq[0] + seq.size()); } int pbegin, pend; getNonTermProductRange(body[item.pos], pbegin, pend); for (int pid = pbegin; pid < pend; ++pid) { for (auto term : first) { closureLR1Item(LR1Item(pid, 0, term), col); } } }
vector<LR1Item> CFG::goTo(const vector<LR1Item>& is, const string& x) { vector<LR1Item> js; for(size_t i = 0; i < is.size(); ++i) { Production pr = p[is[i].productionIndex]; if(!(pr.right.size() == 1 && pr.right[0] == "") && is[i].dotPosition < pr.right.size() && pr.right[is[i].dotPosition] == x) { js.push_back( LR1Item( is[i].productionIndex, is[i].dotPosition + 1, is[i].lookahead ) ); } } return closure(js); }
vector<LR1Item> CFG::closure(const vector<LR1Item>& is) { vector<LR1Item> js = is; bool updated = false; do { updated = false; for(size_t i = 0; i < js.size(); ++i) { // (A -> alpha . B beta, a) Production pr = p[js[i].productionIndex]; if(js[i].dotPosition < pr.right.size()) { string B = pr.right[js[i].dotPosition]; if(in(B, v)) { vector<string> betaa; for(size_t k = js[i].dotPosition + 1; k < pr.right.size(); ++k) { betaa.push_back(pr.right[k]); } betaa.push_back(js[i].lookahead); vector<string> firstBetaa = computeFirst(betaa); for(size_t j = 0; j < p.size(); ++j) { if(p[j].left == B) { for(size_t k = 0; k < firstBetaa.size(); ++k) { string b = firstBetaa[k]; if(b != "") { bool exist = false; for(size_t l = 0; l < js.size(); ++l) { if(js[l].productionIndex == j && js[l].dotPosition == 0 && js[l].lookahead == b) { exist = true; break; } } if(!exist) { js.push_back(LR1Item(j, 0, b)); updated = true; } } } } } } } } }while(updated); return js; }
void LALRParser::build() { LR0ItemCollectionFamily LR0Family; LR0Family.build(); map<LR1Point, set<int> > point2Terms; map<LR1Point, set<LR1Point> > point2Points; set<LR1Point> unhandled; for (int state = 0; state < (int)LR0Family.ID2Collection.size(); ++state) { for (auto item : LR0Family.ID2Collection[state]) { LR1Point pt0(state, item.productID, item.pos); LR1ItemCollection col; closureLR1Item(LR1Item(item.productID, item.pos, ESS_Term_End), col); for (auto _item : col) { if (isImportantLR0Item(_item.productID, _item.pos)) { LR1Point pt1(state, _item.productID, _item.pos); if (_item.term == ESS_Term_End) { point2Points[pt0].insert(pt1); } else { point2Terms[pt1].insert(_item.term); unhandled.insert(pt1); } auto &body = getProductBody(_item.productID); if (_item.pos >= body.size()) continue; LR1Point pt2(LR0Family.transMap[state][body[_item.pos]], _item.productID, _item.pos + 1); point2Points[pt1].insert(pt2); } else { auto &body = getProductBody(_item.productID); if (_item.pos >= body.size()) continue; LR1Point pt2(LR0Family.transMap[state][body[_item.pos]], _item.productID, _item.pos + 1); if (_item.term == ESS_Term_End) { point2Points[pt0].insert(pt2); } else { point2Terms[pt2].insert(_item.term); unhandled.insert(pt2); } } } } } while (!unhandled.empty()) { LR1Point point = *unhandled.begin(); unhandled.erase(unhandled.begin()); auto& terms = point2Terms[point]; for (auto ditem : point2Points[point]) { auto& dterms = point2Terms[ditem]; auto osize = dterms.size(); dterms.insert(terms.begin(), terms.end()); if (dterms.size() != osize) { unhandled.insert(ditem); } } } m_gotoTable.setStateCount((int)LR0Family.ID2Collection.size()); m_actionTable.setStateCount((int)LR0Family.ID2Collection.size()); for (int state = 0; state < (int)LR0Family.ID2Collection.size(); ++state) { for (auto term : g_termList) { Action act; { auto &m = LR0Family.transMap[state]; if (m.count(term)) { mergeAction(act, Action(Action::T_Shift, m[term]), LR0Family); } } for (auto item : LR0Family.ID2Collection[state]) { auto& body = getProductBody(item.productID); if (item.pos != body.size()) continue; auto& terms = point2Terms[LR1Point(state, item.productID, item.pos)]; if (terms.count(term) == 0) continue; mergeAction(act, Action(Action::T_Reduce, item.productID), LR0Family); } m_actionTable.setAction(state, term, act); } for (auto nonTerm : g_nonTermList) { auto &m = LR0Family.transMap[state]; if (m.count(nonTerm)) { m_gotoTable.setNextState(state, nonTerm, m[nonTerm]); } } } }