void TBowFl::SaveLnDocTxt(const PBowDocBs& BowDocBs, const TStr& FNm, const bool& UseDocStrP){ TFOut SOut(FNm); int Docs=BowDocBs->GetDocs(); for (int DId=0; DId<Docs; DId++){ printf("%d/%d\r", DId+1, Docs); // output document-name TStr DocNm=TStr::GetFNmStr(BowDocBs->GetDocNm(DId)); SOut.PutStr(DocNm); // output categories for (int CIdN=0; CIdN<BowDocBs->GetDocCIds(DId); CIdN++){ int CId=BowDocBs->GetDocCId(DId, CIdN); TStr CatNm=TStr::GetFNmStr(BowDocBs->GetCatNm(CId)); SOut.PutCh(' '); SOut.PutCh('!'); SOut.PutStr(CatNm); } // output words if (UseDocStrP){ TStr DocStr=BowDocBs->GetDocStr(DId); // DocStr.DelChAll('\n'); DocStr.DelChAll('\r'); SOut.PutCh(' '); SOut.PutStr(DocStr); } else { int DocWIds=BowDocBs->GetDocWIds(DId); int WId; double WordFq; for (int DocWIdN=0; DocWIdN<DocWIds; DocWIdN++){ BowDocBs->GetDocWIdFq(DId, DocWIdN, WId, WordFq); TStr WordStr=BowDocBs->GetWordStr(WId); for (int WordFqN=0; WordFqN<WordFq; WordFqN++){ SOut.PutCh(' '); SOut.PutStr(WordStr); } } } SOut.PutLn(); } printf("\n"); }
void TBowFl::SaveSparseMatlabTxt(const PBowDocBs& BowDocBs, const PBowDocWgtBs& BowDocWgtBs, const TStr& FNm, const TStr& CatFNm, const TIntV& _DIdV) { TIntV DIdV; if (_DIdV.Empty()) { BowDocBs->GetAllDIdV(DIdV); } else { DIdV = _DIdV; } // generate map of row-ids to words TFOut WdMapSOut(TStr::PutFExt(FNm, ".row-to-word-map.dat")); for (int WId = 0; WId < BowDocWgtBs->GetWords(); WId++) { TStr WdStr = BowDocBs->GetWordStr(WId); WdMapSOut.PutStrLn(TStr::Fmt("%d %s", WId+1, WdStr.CStr())); } WdMapSOut.Flush(); // generate map of col-ids to document names TFOut DocMapSOut(TStr::PutFExt(FNm, ".col-to-docName-map.dat")); for (int DocN = 0; DocN < DIdV.Len(); DocN++) { const int DId = DIdV[DocN]; TStr DocNm = BowDocBs->GetDocNm(DId); DocMapSOut.PutStrLn(TStr::Fmt("%d %d %s", DocN, DId, DocNm.CStr())); } DocMapSOut.Flush(); // save documents' sparse vectors TFOut SOut(FNm); for (int DocN = 0; DocN < DIdV.Len(); DocN++){ const int DId = DIdV[DocN]; PBowSpV DocSpV = BowDocWgtBs->GetSpV(DId); const int DocWIds = DocSpV->GetWIds(); for (int DocWIdN=0; DocWIdN<DocWIds; DocWIdN++){ const int WId = DocSpV->GetWId(DocWIdN); const double WordWgt = DocSpV->GetWgt(DocWIdN); SOut.PutStrLn(TStr::Fmt("%d %d %.16f", WId+1, DocN+1, WordWgt)); } } SOut.Flush(); // save documents' category sparse vectors if (!CatFNm.Empty()) { TFOut CatSOut(CatFNm); for (int DocN = 0; DocN < DIdV.Len(); DocN++){ const int DId = DIdV[DocN]; const int DocCIds = BowDocBs->GetDocCIds(DId); for (int DocCIdN=0; DocCIdN<DocCIds; DocCIdN++){ const int CId = BowDocBs->GetDocCId(DId, DocCIdN); const double CatWgt = 1.0; CatSOut.PutStrLn(TStr::Fmt("%d %d %.16f", CId+1, DocN+1, CatWgt)); } } CatSOut.Flush(); } }