Ejemplo n.º 1
0
Archivo: bowfl.cpp Proyecto: Accio/snap
void TBowFl::SaveLnDocTxt(const PBowDocBs& BowDocBs, const TStr& FNm, const bool& UseDocStrP){
  TFOut SOut(FNm);
  int Docs=BowDocBs->GetDocs();
  for (int DId=0; DId<Docs; DId++){
    printf("%d/%d\r", DId+1, Docs);
    // output document-name
    TStr DocNm=TStr::GetFNmStr(BowDocBs->GetDocNm(DId));
    SOut.PutStr(DocNm);
    // output categories
    for (int CIdN=0; CIdN<BowDocBs->GetDocCIds(DId); CIdN++){
      int CId=BowDocBs->GetDocCId(DId, CIdN);
      TStr CatNm=TStr::GetFNmStr(BowDocBs->GetCatNm(CId));
      SOut.PutCh(' '); SOut.PutCh('!'); SOut.PutStr(CatNm);
    }
    // output words
    if (UseDocStrP){
      TStr DocStr=BowDocBs->GetDocStr(DId);
//      DocStr.DelChAll('\n'); DocStr.DelChAll('\r');
      SOut.PutCh(' '); SOut.PutStr(DocStr);
    } else {
        int DocWIds=BowDocBs->GetDocWIds(DId);
        int WId; double WordFq;
        for (int DocWIdN=0; DocWIdN<DocWIds; DocWIdN++){
          BowDocBs->GetDocWIdFq(DId, DocWIdN, WId, WordFq);
          TStr WordStr=BowDocBs->GetWordStr(WId);
          for (int WordFqN=0; WordFqN<WordFq; WordFqN++){
            SOut.PutCh(' '); SOut.PutStr(WordStr);
          }
        }
    }
    SOut.PutLn();
  }
  printf("\n");
}
Ejemplo n.º 2
0
void TBowFl::SaveSparseMatlabTxt(const PBowDocBs& BowDocBs,
    const PBowDocWgtBs& BowDocWgtBs, const TStr& FNm,
    const TStr& CatFNm, const TIntV& _DIdV) {

  TIntV DIdV;
  if (_DIdV.Empty()) {
      BowDocBs->GetAllDIdV(DIdV);
  } else {
      DIdV = _DIdV;
  }
  // generate map of row-ids to words
  TFOut WdMapSOut(TStr::PutFExt(FNm, ".row-to-word-map.dat"));
  for (int WId = 0; WId < BowDocWgtBs->GetWords(); WId++) {
    TStr WdStr = BowDocBs->GetWordStr(WId);
    WdMapSOut.PutStrLn(TStr::Fmt("%d %s", WId+1,  WdStr.CStr()));
  }
  WdMapSOut.Flush();
  // generate map of col-ids to document names
  TFOut DocMapSOut(TStr::PutFExt(FNm, ".col-to-docName-map.dat"));
  for (int DocN = 0; DocN < DIdV.Len(); DocN++) {
    const int DId = DIdV[DocN];
    TStr DocNm = BowDocBs->GetDocNm(DId);
    DocMapSOut.PutStrLn(TStr::Fmt("%d %d %s", DocN, DId,  DocNm.CStr()));
  }
  DocMapSOut.Flush();
  // save documents' sparse vectors
  TFOut SOut(FNm);
  for (int DocN = 0; DocN < DIdV.Len(); DocN++){
    const int DId = DIdV[DocN];
    PBowSpV DocSpV = BowDocWgtBs->GetSpV(DId);
    const int DocWIds = DocSpV->GetWIds();
    for (int DocWIdN=0; DocWIdN<DocWIds; DocWIdN++){
      const int WId = DocSpV->GetWId(DocWIdN);
      const double WordWgt = DocSpV->GetWgt(DocWIdN);
      SOut.PutStrLn(TStr::Fmt("%d %d %.16f", WId+1, DocN+1, WordWgt));
    }
  }
  SOut.Flush();
  // save documents' category sparse vectors
  if (!CatFNm.Empty()) {
    TFOut CatSOut(CatFNm);
    for (int DocN = 0; DocN < DIdV.Len(); DocN++){
      const int DId = DIdV[DocN];
      const int DocCIds = BowDocBs->GetDocCIds(DId);
      for (int DocCIdN=0; DocCIdN<DocCIds; DocCIdN++){
        const int CId = BowDocBs->GetDocCId(DId, DocCIdN);
        const double CatWgt = 1.0;
        CatSOut.PutStrLn(TStr::Fmt("%d %d %.16f", CId+1, DocN+1, CatWgt));
      }
    }
    CatSOut.Flush();
  }
}