Ejemplo n.º 1
0
 std::vector<int> search(std::string s){
     if (s.length() == 0) {
         return indexes;
     }else{
         char first = s.at(0);
         if (children.find(first) != children.end()) {
             std::string remain(s.substr(1));
             return children[first]->search(remain);
         }
     }
     
     std::vector<int> tmp;
     return tmp;
 }
Ejemplo n.º 2
0
 void insertString(std::string s, int index){
     indexes.push_back(index);
     if (s.size()>0) {
         value = s.at(0);
         std::shared_ptr<SuffixTreeNode> child;
         if (children.find(value) != children.end()) {
             child = children[value];
         }else{
             child.reset(new SuffixTreeNode());
             children[value] = child;
         }
         std::string remain(s.substr(1));
         child->insertString(remain, index);
     }
 }
Ejemplo n.º 3
0
// SPIMI-1: write current postings to intermediate files
//
//   {word}  =>  {ndoc => [doc][frq]}
// pst.trm.x       pst.doc.x         
//
// where x is the id of current intermediate file
//
void IndexWriter::flushPSTBlk(hashmap<string, vector<pair<int, int> > >&pst, int turn) {
  ofstream ftrm, fdoc;
  string prefix = path+"/"+POSTINGS_FILE;

  ftrm.open((prefix+".trm."+itoa(turn)).c_str(), ios::binary);
  fdoc.open((prefix+".doc."+itoa(turn)).c_str(), ios::binary);

  hashmap<string, vector<pair<int, int> > >::iterator it;
  vector<pair<int, int> >::iterator jt;
  vector<string> list;
  for (it = pst.begin(); it != pst.end(); ++it) {
    list.push_back(it->first);
  }
  sort(list.begin(), list.end());

  for (unsigned i = 0; i < list.size(); ++i) {
    it = pst.find(list[i]);
    string &term = list[i]; 
    int ndoc = it->second.size(), cnt = 0, accum = 0;

    for (jt = it->second.begin(); jt != it->second.end(); ++jt) {
      int did = jt->first;
      int frq = jt->second;
      didbuf[cnt] = did;
      frqbuf[cnt] = frq;
      accum += frq;
      assert(accum > 0 || !(cout << accum << endl));
      cnt++;
    }
    fwrite(fdoc, &ndoc, sizeof(ndoc));
    fwrite(fdoc, didbuf, sizeof(didbuf[0])*ndoc);
    fwrite(fdoc, frqbuf, sizeof(frqbuf[0])*ndoc);

    TermAttr attr;
    attr.str = term;
    attr.df = ndoc;
    attr.cf = accum;
    attr.flush(ftrm);
  }
  ftrm.close();
  fdoc.close();
}