unsigned int getRule(stringstream& ss, S2Setmap& pt2base, V2Imap& goodman) { char c; ss >> c; assert(c == '('); string sym = ""; ss >> sym; unsigned int symI = sym2base[sym]; // printf("got symbol %s (%u)\n",sym.c_str(),symI); vector<unsigned int> kSyms; kSyms.push_back(symI); while(true) { c = ss.peek(); if(c == ' ') { //it's a space ss.ignore(1); //ignore that space } else if(c == '(') { //nonterminal //get child node index unsigned int index = getRule(ss,pt2base,goodman); kSyms.push_back(index); } else if (c == ')') { ss.ignore(1); //burn closing paren // printf("Finished Rule with sym %s and %lu kids\n",sym.c_str(),kSyms.size()-1); unsigned int index = 0; while(kSyms.size() > 3) { //try to add glue rule // printf("K = %lu\n",kSyms.size()); unsigned int r = kSyms.back(); kSyms.pop_back(); unsigned int l = kSyms.back(); kSyms.pop_back(); unsigned int glueI = nSym*2; vector<unsigned int> gluerule; gluerule.push_back(glueI);//glue symbol gluerule.push_back(l); gluerule.push_back(r); unsigned int glueindex = 0; V2Imap::iterator fter = goodman.find(gluerule); if(fter == goodman.end()) { //new node goodman[gluerule] = goodmanIndex; baseSym.push_back(glueI); glueindex = goodmanIndex; canL.push_back(false); canR.push_back(false); canL[l] = true; canR[r] = true; leftlook[l].push_back(make_pair(r,goodmanIndex)); bmap[make_pair(l,r)].insert(goodmanIndex); goodmanIndex++; } else { //seen it glueindex = fter->second; } kSyms.push_back(glueindex); } V2Imap::iterator fter = goodman.find(kSyms); if(fter == goodman.end()) { //new node goodman[kSyms] = goodmanIndex; baseSym.push_back(symI); index = goodmanIndex; canL.push_back(false); canR.push_back(false); goodmanIndex++; if(kSyms.size() == 3) { //add index -> l r // printf("BR : %u -> %u %u\n",index,kSyms[1],kSyms[2]); canL[kSyms[1]] = true; canR[kSyms[2]] = true; leftlook[kSyms[1]].push_back(make_pair(kSyms[2],index)); bmap[make_pair(kSyms[1],kSyms[2])].insert(index); } else { //one child //add index -> k umap[kSyms[1]].insert(index); //printf("UR : %u -> %u\n",index,kSyms[1]); } } else { //seen it index = fter->second; } return index; } else { //terminal string term = ""; while(ss.peek() != ')') { term += ss.get(); } // printf("got terminal %s\n",term.c_str()); ss.ignore(1); //burn closing paren unsigned int index = 0; if(term == "<>") { //nonterminal leaf index = sym2base[sym]; } else { //preterminal node //get index and add //printf("T:%s\n",term.c_str()); S2Setmap::iterator fter = pt2base.find(term); ptsyms.insert(symI); if(fter != pt2base.end()) {//seen this terminal before set<unsigned int>& bsymset = fter->second; //the base syms that have been seen to parse this terminal if(bsymset.find(symI) == bsymset.end()) { //never found this pterm rule before preterms[term].insert(goodmanIndex); index = goodmanIndex; baseSym.push_back(symI); ++goodmanIndex; canL.push_back(false); canR.push_back(false); bsymset.insert(symI); } else { //this is a small set...maybe an unideal implementation tho set<unsigned int>& symset = preterms[term]; for(set<unsigned int>::iterator iter = symset.begin();iter != symset.end();++iter) { if(baseSym[*iter] == symI) index = *iter; } } } else { preterms[term].insert(goodmanIndex); index = goodmanIndex; baseSym.push_back(symI); canL.push_back(false); canR.push_back(false); ++goodmanIndex; set<unsigned int> bsymset; bsymset.insert(symI); pt2base[term] = bsymset; } } // printf("returning %u - %u\n",index,goodmanIndex); return index; } } return 1; }