Example #1
0
    unsigned int getRule(stringstream& ss, S2Setmap& pt2base, V2Imap& goodman) {
        char c;
        ss >> c;
        assert(c == '(');

        string sym = "";
        ss >> sym;
        unsigned int symI = sym2base[sym];
        //        printf("got symbol %s (%u)\n",sym.c_str(),symI);
        
        vector<unsigned int> kSyms;
        kSyms.push_back(symI);
        
        while(true) {
            c = ss.peek();
            if(c == ' ') { //it's a space
                ss.ignore(1); //ignore that space
            } else if(c == '(') { //nonterminal
                //get child node index
                unsigned int index = getRule(ss,pt2base,goodman);
                kSyms.push_back(index);                
            } else if (c == ')') {
                ss.ignore(1); //burn closing paren
                //      printf("Finished Rule with sym %s and %lu kids\n",sym.c_str(),kSyms.size()-1);
                unsigned int index = 0; 
                
                while(kSyms.size() > 3) { //try to add glue rule
                    //  printf("K = %lu\n",kSyms.size());
                    unsigned int r = kSyms.back();
                    kSyms.pop_back();
                    unsigned int l = kSyms.back();
                    kSyms.pop_back();

                    unsigned int glueI = nSym*2;
                    
                    vector<unsigned int> gluerule;
                    gluerule.push_back(glueI);//glue symbol
                    gluerule.push_back(l);
                    gluerule.push_back(r);

                    unsigned int glueindex = 0; 
                    V2Imap::iterator fter = goodman.find(gluerule);
                    if(fter == goodman.end()) { //new node
                        goodman[gluerule] = goodmanIndex;
                        baseSym.push_back(glueI);
                        glueindex = goodmanIndex;

                        canL.push_back(false);
                        canR.push_back(false);
                        
                        canL[l] = true;
                        canR[r] = true;

                        leftlook[l].push_back(make_pair(r,goodmanIndex));
                        
                        bmap[make_pair(l,r)].insert(goodmanIndex);
                        
                        goodmanIndex++;
                    } else { //seen it
                        glueindex = fter->second;
                    }
                    kSyms.push_back(glueindex);
                }

                V2Imap::iterator fter = goodman.find(kSyms);
                if(fter == goodman.end()) { //new node
                    goodman[kSyms] = goodmanIndex;
                    baseSym.push_back(symI);
                    index = goodmanIndex;

                    canL.push_back(false);
                    canR.push_back(false);
                    
                    goodmanIndex++;

                    if(kSyms.size() == 3) {
                        //add index -> l r
                        //    printf("BR : %u -> %u %u\n",index,kSyms[1],kSyms[2]);
                        canL[kSyms[1]] = true;
                        canR[kSyms[2]] = true;
                        leftlook[kSyms[1]].push_back(make_pair(kSyms[2],index));
                        bmap[make_pair(kSyms[1],kSyms[2])].insert(index);
                    } else { //one child
                        //add index -> k
                        umap[kSyms[1]].insert(index);
                        //printf("UR : %u -> %u\n",index,kSyms[1]);
                    }
                    
                } else { //seen it
                    index = fter->second;
                }
                                
                return index;
            } else { //terminal
                string term = "";
                while(ss.peek() != ')') {
                    term += ss.get();
                }
                //                printf("got terminal %s\n",term.c_str());
                ss.ignore(1); //burn closing paren

                unsigned int index = 0;
                if(term == "<>") { //nonterminal leaf
                    index = sym2base[sym];
                } else { //preterminal node
                    //get index and add
                    //printf("T:%s\n",term.c_str());
                    S2Setmap::iterator fter = pt2base.find(term);

                    ptsyms.insert(symI);
                    
                    if(fter != pt2base.end()) {//seen this terminal before
                        set<unsigned int>& bsymset = fter->second; //the base syms that have been seen to parse this terminal
                        
                        if(bsymset.find(symI) == bsymset.end()) { //never found this pterm rule before
                            preterms[term].insert(goodmanIndex);
                            index = goodmanIndex;
                            baseSym.push_back(symI);
                            ++goodmanIndex;
                            canL.push_back(false);
                            canR.push_back(false);
                        
                            bsymset.insert(symI);
                        } else {
                            //this is a small set...maybe an unideal implementation tho
                            set<unsigned int>& symset = preterms[term];
                            for(set<unsigned int>::iterator iter = symset.begin();iter != symset.end();++iter) {
                                if(baseSym[*iter] == symI)
                                    index = *iter;
                            }
                        }
                    } else {
                        preterms[term].insert(goodmanIndex);
                        index = goodmanIndex;
                        baseSym.push_back(symI);
                        canL.push_back(false);
                        canR.push_back(false);
                        ++goodmanIndex;
                 
                        set<unsigned int> bsymset;
                        bsymset.insert(symI);
                        pt2base[term] = bsymset;
                    }
                }
                //                printf("returning %u - %u\n",index,goodmanIndex);
                return index;
            }
        }
        
        return 1;
    }