예제 #1
0
/////////////////////////////////////////////////
// Find-File
TFFile::TFFile(const TStr& FNmWc, const bool& _RecurseP):
  FPathV(), FExtV(), FBaseWc(),
  CsImpP(false), RecurseP(_RecurseP), FPathN(0-1),
  FFileDesc(TFFileDesc::New()), SubFFile(), CurFNm(), CurFNmN(0-1){
  // prepare file-base-name wild-card
  FBaseWc=FNmWc.GetFBase(); if (!CsImpP){FBaseWc.ToUc();}
  // get & assign file-name
  TStr FPath=FNmWc.GetFPath();
  FPathV.Add(TStr::GetNrFPath(FPath));
}
예제 #2
0
파일: geoip.cpp 프로젝트: edgeflip/dmoz
PGeoIpBs TGeoIpBs::LoadBin(const TStr& FNm){
    // load and return if exists
    if (TFile::Exists(FNm)) {
        TFIn SIn(FNm); return Load(SIn);
    } 
    // otherwise assume we have CSV and we need to parse it first
    printf("Cannot fine %s, loading from raw files\n", FNm.CStr());
    TStr FPath = FNm.GetFPath() + "/GeoIP/";
    PGeoIpBs GeoIpBs = LoadCsv(FPath);
    GeoIpBs->SaveBin(FNm);
    return GeoIpBs;
}
예제 #3
0
void TTb::SaveAssis(const TStr& FNm){
  TStr DoFNm=FNm.GetFPath()+"AsDo"+FNm.GetFMid().GetSubStr(0, 3)+".Dat";
  TStr DaFNm=FNm.GetFPath()+"AsDa"+FNm.GetFMid().GetSubStr(0, 3)+".Dat";

  TOLx DoLx(PSOut(new TFOut(DoFNm)), TFSet()|oloFrcEoln|oloSigNum|oloUniStr);
  int Dscs=GetVar(0)->GetVarType()->GetDscs(); Assert(Dscs>0);
  DoLx.PutInt(Dscs); DoLx.PutDosLn();
  for (int DscN=0; DscN<Dscs; DscN++){
    TTbVal Val=GetVar(0)->GetVarType()->GetVal(DscN);
    DoLx.PutStr(GetVar(0)->GetVarType()->GetValStr(Val)); DoLx.PutDosLn();
  }
  DoLx.PutInt(TInt(GetVars()-1)); DoLx.PutDosLn();
  for (int VarN=1; VarN<GetVars(); VarN++){
    DoLx.PutStr(GetVar(VarN)->GetNm()); DoLx.PutDosLn();
    int Dscs=GetVar(VarN)->GetVarType()->GetDscs();
    if (Dscs>0){
      DoLx.PutInt(Dscs); DoLx.PutDosLn();
      for (int DscN=0; DscN<Dscs; DscN++){
        TTbVal Val=GetVar(VarN)->GetVarType()->GetVal(DscN);
        DoLx.PutStr(GetVar(VarN)->GetVarType()->GetValStr(DscN)); DoLx.PutDosLn();}
    } else {
      DoLx.PutInt(TInt(0)); DoLx.PutInt(TInt(100)); DoLx.PutDosLn();
    }
  }

  TOLx DaLx(PSOut(new TFOut(DaFNm)), TFSet()|oloFrcEoln|oloSigNum|oloUniStr);
  for (int TupN=0; TupN<GetTups(); TupN++){
    for (int VarN=0; VarN<GetVars(); VarN++){
      TTbVal Val=GetVal(TupN, VarN);
      switch (Val.GetValTag()){
        case tvtUnknw: DaLx.PutSym(syQuestion); break;
        case tvtUnimp: DaLx.PutSym(syAsterisk); break;
        case tvtUnapp: DaLx.PutSym(syAsterisk); break;
        case tvtDsc: DaLx.PutInt(TInt(1+Val.GetDsc())); break;
        case tvtFlt: DaLx.PutFlt(Val.GetFlt()); break;
        default: Fail;
      }
    }
    DaLx.PutDosLn();
  }
}
예제 #4
0
void TCpDoc::SaveTBsToCpd(
 const TStr& InTBsFNm, const TStr& OutCpdFNm, const int& /*MxDocs*/){
  // open input text-base
  TStr TxtBsNm=InTBsFNm.GetFBase();
  TStr TxtBsFPath=InTBsFNm.GetFPath();
  PTxtBs TxtBs=TTxtBs::New(TxtBsNm, TxtBsFPath, faRdOnly);
  // create output file
  PSOut SOut=TFOut::New(OutCpdFNm);
  // traverse input documents
  TBlobPt TrvBlobPt=TxtBs->FFirstDocId(); TBlobPt DocId;
  int DocN=0; TStr DocNm; TStr DocStr;
  while (TxtBs->FNextDocId(TrvBlobPt, DocId)){
    DocN++; if (DocN%100==0){printf("%d docs\r", DocN);}
    // get document data
    TxtBs->GetDocNmStr(DocId, DocNm, DocStr);
    // create cpd document
    PCpDoc CpDoc=TCpDoc::New();
    CpDoc->DocNm=DocNm;
    CpDoc->ParStrV.Add(DocStr, 1);
    // save cpd document
    CpDoc->Save(*SOut);
  }
}
예제 #5
0
파일: bowfl.cpp 프로젝트: Accio/snap
PBowDocBs TBowFl::LoadTBsTxt(
 const TStr& TBsFNm, const int& MxDocs,
 const TStr& SwSetTypeNm, const TStr& StemmerTypeNm,
 const int& MxNGramLen, const int& MnNGramFq){
  // prepare stop-words
  PSwSet SwSet=TSwSet::GetSwSet(SwSetTypeNm);
  // prepare stemmer
  PStemmer Stemmer=TStemmer::GetStemmer(StemmerTypeNm);
  // create ngrams
  PNGramBs NGramBs;
  if (!((MxNGramLen==1)&&(MnNGramFq==1))){
    NGramBs=TNGramBs::GetNGramBsFromTBs(
     TBsFNm, MxDocs,
     MxNGramLen, MnNGramFq, SwSet, Stemmer);
  }
  // create document-base
  PBowDocBs BowDocBs=TBowDocBs::New(SwSet, Stemmer, NGramBs);
  // open input text-base
  TStr TxtBsNm=TBsFNm.GetFBase();
  TStr TxtBsFPath=TBsFNm.GetFPath();
  PTxtBs TxtBs=TTxtBs::New(TxtBsNm, TxtBsFPath, faRdOnly);
  // traverse documents
  TBlobPt TxtBsTrvBlobPt=TxtBs->FFirstDocId(); TBlobPt TxtBsDocId; int Docs=0;
  while (TxtBs->FNextDocId(TxtBsTrvBlobPt, TxtBsDocId)){
    Docs++; if (Docs%100==0){printf("%d\r", Docs);}
    if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;}
    // get document data
    TStr DocNm; TStr DocStr;
    TxtBs->GetDocNmStr(TxtBsDocId, DocNm, DocStr);
    // add document to bow
    BowDocBs->AddHtmlDoc(DocNm, TStrV(), DocStr, false);
  }
  // return results
  BowDocBs->AssertOk();
  return BowDocBs;
}
예제 #6
0
파일: main.cpp 프로젝트: adobekan/qminer
		// get local include folder if exists
		void AddLocalLibFPath() {
			TStr LibFPath = FNm.GetFPath() + "lib/";
			if (TDir::Exists(LibFPath)) {
				IncludeFPathV.Add(LibFPath);
			}
		}