std::vector<int> search(std::string s){ if (s.length() == 0) { return indexes; }else{ char first = s.at(0); if (children.find(first) != children.end()) { std::string remain(s.substr(1)); return children[first]->search(remain); } } std::vector<int> tmp; return tmp; }
void insertString(std::string s, int index){ indexes.push_back(index); if (s.size()>0) { value = s.at(0); std::shared_ptr<SuffixTreeNode> child; if (children.find(value) != children.end()) { child = children[value]; }else{ child.reset(new SuffixTreeNode()); children[value] = child; } std::string remain(s.substr(1)); child->insertString(remain, index); } }
// SPIMI-1: write current postings to intermediate files // // {word} => {ndoc => [doc][frq]} // pst.trm.x pst.doc.x // // where x is the id of current intermediate file // void IndexWriter::flushPSTBlk(hashmap<string, vector<pair<int, int> > >&pst, int turn) { ofstream ftrm, fdoc; string prefix = path+"/"+POSTINGS_FILE; ftrm.open((prefix+".trm."+itoa(turn)).c_str(), ios::binary); fdoc.open((prefix+".doc."+itoa(turn)).c_str(), ios::binary); hashmap<string, vector<pair<int, int> > >::iterator it; vector<pair<int, int> >::iterator jt; vector<string> list; for (it = pst.begin(); it != pst.end(); ++it) { list.push_back(it->first); } sort(list.begin(), list.end()); for (unsigned i = 0; i < list.size(); ++i) { it = pst.find(list[i]); string &term = list[i]; int ndoc = it->second.size(), cnt = 0, accum = 0; for (jt = it->second.begin(); jt != it->second.end(); ++jt) { int did = jt->first; int frq = jt->second; didbuf[cnt] = did; frqbuf[cnt] = frq; accum += frq; assert(accum > 0 || !(cout << accum << endl)); cnt++; } fwrite(fdoc, &ndoc, sizeof(ndoc)); fwrite(fdoc, didbuf, sizeof(didbuf[0])*ndoc); fwrite(fdoc, frqbuf, sizeof(frqbuf[0])*ndoc); TermAttr attr; attr.str = term; attr.df = ndoc; attr.cf = accum; attr.flush(ftrm); } ftrm.close(); fdoc.close(); }