///////////////////////////////////////////////// // Find-File TFFile::TFFile(const TStr& FNmWc, const bool& _RecurseP): FPathV(), FExtV(), FBaseWc(), CsImpP(false), RecurseP(_RecurseP), FPathN(0-1), FFileDesc(TFFileDesc::New()), SubFFile(), CurFNm(), CurFNmN(0-1){ // prepare file-base-name wild-card FBaseWc=FNmWc.GetFBase(); if (!CsImpP){FBaseWc.ToUc();} // get & assign file-name TStr FPath=FNmWc.GetFPath(); FPathV.Add(TStr::GetNrFPath(FPath)); }
PGeoIpBs TGeoIpBs::LoadBin(const TStr& FNm){ // load and return if exists if (TFile::Exists(FNm)) { TFIn SIn(FNm); return Load(SIn); } // otherwise assume we have CSV and we need to parse it first printf("Cannot fine %s, loading from raw files\n", FNm.CStr()); TStr FPath = FNm.GetFPath() + "/GeoIP/"; PGeoIpBs GeoIpBs = LoadCsv(FPath); GeoIpBs->SaveBin(FNm); return GeoIpBs; }
void TTb::SaveAssis(const TStr& FNm){ TStr DoFNm=FNm.GetFPath()+"AsDo"+FNm.GetFMid().GetSubStr(0, 3)+".Dat"; TStr DaFNm=FNm.GetFPath()+"AsDa"+FNm.GetFMid().GetSubStr(0, 3)+".Dat"; TOLx DoLx(PSOut(new TFOut(DoFNm)), TFSet()|oloFrcEoln|oloSigNum|oloUniStr); int Dscs=GetVar(0)->GetVarType()->GetDscs(); Assert(Dscs>0); DoLx.PutInt(Dscs); DoLx.PutDosLn(); for (int DscN=0; DscN<Dscs; DscN++){ TTbVal Val=GetVar(0)->GetVarType()->GetVal(DscN); DoLx.PutStr(GetVar(0)->GetVarType()->GetValStr(Val)); DoLx.PutDosLn(); } DoLx.PutInt(TInt(GetVars()-1)); DoLx.PutDosLn(); for (int VarN=1; VarN<GetVars(); VarN++){ DoLx.PutStr(GetVar(VarN)->GetNm()); DoLx.PutDosLn(); int Dscs=GetVar(VarN)->GetVarType()->GetDscs(); if (Dscs>0){ DoLx.PutInt(Dscs); DoLx.PutDosLn(); for (int DscN=0; DscN<Dscs; DscN++){ TTbVal Val=GetVar(VarN)->GetVarType()->GetVal(DscN); DoLx.PutStr(GetVar(VarN)->GetVarType()->GetValStr(DscN)); DoLx.PutDosLn();} } else { DoLx.PutInt(TInt(0)); DoLx.PutInt(TInt(100)); DoLx.PutDosLn(); } } TOLx DaLx(PSOut(new TFOut(DaFNm)), TFSet()|oloFrcEoln|oloSigNum|oloUniStr); for (int TupN=0; TupN<GetTups(); TupN++){ for (int VarN=0; VarN<GetVars(); VarN++){ TTbVal Val=GetVal(TupN, VarN); switch (Val.GetValTag()){ case tvtUnknw: DaLx.PutSym(syQuestion); break; case tvtUnimp: DaLx.PutSym(syAsterisk); break; case tvtUnapp: DaLx.PutSym(syAsterisk); break; case tvtDsc: DaLx.PutInt(TInt(1+Val.GetDsc())); break; case tvtFlt: DaLx.PutFlt(Val.GetFlt()); break; default: Fail; } } DaLx.PutDosLn(); } }
void TCpDoc::SaveTBsToCpd( const TStr& InTBsFNm, const TStr& OutCpdFNm, const int& /*MxDocs*/){ // open input text-base TStr TxtBsNm=InTBsFNm.GetFBase(); TStr TxtBsFPath=InTBsFNm.GetFPath(); PTxtBs TxtBs=TTxtBs::New(TxtBsNm, TxtBsFPath, faRdOnly); // create output file PSOut SOut=TFOut::New(OutCpdFNm); // traverse input documents TBlobPt TrvBlobPt=TxtBs->FFirstDocId(); TBlobPt DocId; int DocN=0; TStr DocNm; TStr DocStr; while (TxtBs->FNextDocId(TrvBlobPt, DocId)){ DocN++; if (DocN%100==0){printf("%d docs\r", DocN);} // get document data TxtBs->GetDocNmStr(DocId, DocNm, DocStr); // create cpd document PCpDoc CpDoc=TCpDoc::New(); CpDoc->DocNm=DocNm; CpDoc->ParStrV.Add(DocStr, 1); // save cpd document CpDoc->Save(*SOut); } }
PBowDocBs TBowFl::LoadTBsTxt( const TStr& TBsFNm, const int& MxDocs, const TStr& SwSetTypeNm, const TStr& StemmerTypeNm, const int& MxNGramLen, const int& MnNGramFq){ // prepare stop-words PSwSet SwSet=TSwSet::GetSwSet(SwSetTypeNm); // prepare stemmer PStemmer Stemmer=TStemmer::GetStemmer(StemmerTypeNm); // create ngrams PNGramBs NGramBs; if (!((MxNGramLen==1)&&(MnNGramFq==1))){ NGramBs=TNGramBs::GetNGramBsFromTBs( TBsFNm, MxDocs, MxNGramLen, MnNGramFq, SwSet, Stemmer); } // create document-base PBowDocBs BowDocBs=TBowDocBs::New(SwSet, Stemmer, NGramBs); // open input text-base TStr TxtBsNm=TBsFNm.GetFBase(); TStr TxtBsFPath=TBsFNm.GetFPath(); PTxtBs TxtBs=TTxtBs::New(TxtBsNm, TxtBsFPath, faRdOnly); // traverse documents TBlobPt TxtBsTrvBlobPt=TxtBs->FFirstDocId(); TBlobPt TxtBsDocId; int Docs=0; while (TxtBs->FNextDocId(TxtBsTrvBlobPt, TxtBsDocId)){ Docs++; if (Docs%100==0){printf("%d\r", Docs);} if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;} // get document data TStr DocNm; TStr DocStr; TxtBs->GetDocNmStr(TxtBsDocId, DocNm, DocStr); // add document to bow BowDocBs->AddHtmlDoc(DocNm, TStrV(), DocStr, false); } // return results BowDocBs->AssertOk(); return BowDocBs; }
// get local include folder if exists void AddLocalLibFPath() { TStr LibFPath = FNm.GetFPath() + "lib/"; if (TDir::Exists(LibFPath)) { IncludeFPathV.Add(LibFPath); } }