PSIn TFIn::New(const TStr& FNm){ try { return PSIn(new TFIn(FNm)); } catch (PExcept& Except) { printf("*** Exception: %s\n", Except->GetMsgStr().CStr()); EFailR(Except->GetMsgStr()); } return PSIn(new TFIn(FNm)); }
PSIn TMOut::GetSIn(const bool& IsCut, const int& CutBfL){ IAssert((CutBfL==-1)||((0<=CutBfL))); int SInBfL= (CutBfL==-1) ? BfL : TInt::GetMn(BfL, CutBfL); PSIn SIn; if (OwnBf&&IsCut&&(SInBfL==BfL)){ SIn=PSIn(new TMIn(Bf, SInBfL, true)); Bf=NULL; BfL=MxBfL=0; OwnBf=true; } else { SIn=PSIn(new TMIn(Bf, SInBfL, false)); if (IsCut){CutBf(SInBfL);} } return SIn; }
PSIn TFRnd::GetSIn(const int& BfL, TCs& Cs){ char* Bf=new char[BfL]; GetBf(Bf, BfL); Cs=TCs::GetCsFromBf(Bf, BfL); PSIn SIn=PSIn(new TMIn(Bf, BfL, true)); return SIn; }
PTransCorpus TTransCorpus::LoadTxt(const TStr& InOrgFNm, const TStr& InTransFNm, const TStr& InRefTransFNm) { // open files PSIn OrgSIn = !InOrgFNm.Empty() ? TFIn::New(InOrgFNm) : PSIn(); PSIn TransSIn = !InTransFNm.Empty() ? TFIn::New(InTransFNm) : PSIn(); PSIn RefTransSIn = !InRefTransFNm.Empty() ? TFIn::New(InRefTransFNm) : PSIn(); // check which are given const bool IsOrgP = !OrgSIn.Empty(); const bool IsTransP = !TransSIn.Empty(); const bool IsRefTransP = !RefTransSIn.Empty(); // print warnings if (!IsOrgP) { printf("No original sentences!\n"); } if (!IsTransP) { printf("No machine translation sentences!\n"); } if (!IsRefTransP) { printf("No reference translation sentences!\n"); } // traverse the files and add sentences PTransCorpus TransCorpus = TTransCorpus::New(); TLnRet OrgLnRet(OrgSIn), TransLnRet(TransSIn), RefTransLnRet(RefTransSIn); TStr OrgLnStr, TransLnStr, RefTransLnStr; int LnN = 1; forever { // try to read next line, otherwise break if (IsOrgP && !OrgLnRet.NextLn(OrgLnStr)) { break; } if (IsTransP && !TransLnRet.NextLn(TransLnStr)) { break; } if (IsRefTransP && !RefTransLnRet.NextLn(RefTransLnStr)) { break; } // print progress if (LnN % 100 == 0) { printf(" %7d Sentences\r", LnN); } // add sentence and translation(s) to the corpus if (!IsOrgP) { TransCorpus->AddSentenceNoOrg(LnN, TransLnStr, RefTransLnStr); } else if (!IsTransP) { TransCorpus->AddSentenceNoTrans(LnN, OrgLnStr, RefTransLnStr); } else { IAssert(IsRefTransP); TransCorpus->AddSentence(LnN, OrgLnStr, TransLnStr, RefTransLnStr); } // next sentence :-) LnN++; } printf("\n"); // finish return TransCorpus; }
void TWebTravelCmuPww::StartTravel(){ PutConstrs(); TStrV UrlStrV(1000, 0); PSIn SIn=PSIn(new TFIn(InFNm)); TILx Lx(SIn, TFSet()|iloRetEoln); Lx.GetSym(syQStr, syEof); while (Lx.Sym!=syEof){ TStr UrlStr=Lx.Str; if (IsUrlOk(UrlStr)){ UrlStrV.Add(UrlStr);} Lx.GetStrToEoln(); Lx.GetEoln(); Lx.GetSym(syQStr, syEof); } for (int UrlStrN=0; UrlStrN<UrlStrV.Len(); UrlStrN++){ Go(UrlStrV[UrlStrN]); } }
void TWebTravelHomeNet::StartTravel(){ PutConstrs(); TStrV UrlStrV(300000, 0); TIntIntH UserIdToDocsH(1000); PSIn SIn=PSIn(new TFIn(InFNm)); TILx Lx(SIn, TFSet()|iloRetEoln); TChA UrlStr; Lx.GetSym(syInt, syEof); while ((Lx.Sym!=syEof)&&(Lx.SymLnN<200000)){ // while (Lx.Sym!=syEof){ int UserId=Lx.Int; Lx.GetSym(syComma); Lx.GetInt(); Lx.GetSym(syComma); Lx.GetInt(); Lx.GetSym(syComma); Lx.GetInt(); Lx.GetSym(syComma); Lx.GetInt(); Lx.GetSym(syComma); TStr Method=Lx.GetIdStr(); Lx.GetSym(syComma); // GET, POST UrlStr.Clr(); UrlStr+=Lx.GetIdStr(); Lx.GetSym(syComma); // http, ftp UrlStr+="://"; UrlStr+=Lx.GetStrToCh(','); Lx.GetSym(syComma); // domain name UrlStr+=Lx.GetStrToEoln(); Lx.GetEoln(); // path if ((UserId==TgUserId)&&IsUrlOk(UrlStr)&&(Method=="GET")){ UserIdToDocsH.AddDat(UserId)++; UrlStrV.Add(UrlStr); } Lx.GetSym(syInt, syEof); if (Lx.SymLnN%100000==0){OnNotify(TInt::GetStr(Lx.SymLnN)+ " docs");} } int UserIdToDocsP=UserIdToDocsH.FFirstKeyId(); while (UserIdToDocsH.FNextKeyId(UserIdToDocsP)){ int UserId=UserIdToDocsH.GetKey(UserIdToDocsP); int Docs=UserIdToDocsH[UserIdToDocsP]; TStr MsgStr=TStr("User ")+TInt::GetStr(UserId)+": "+ TInt::GetStr(Docs)+" Docs."; OnNotify(MsgStr); } UrlStrV.Shuffle(TRnd()); for (int UrlStrN=0; UrlStrN<UrlStrV.Len(); UrlStrN++){ Go(UrlStrV[UrlStrN]); } }
PSIn TMIn::New(const TChA& ChA){ return PSIn(new TMIn(ChA)); }
PSIn TMIn::New(const TStr& Str){ return PSIn(new TMIn(Str)); }
PSIn TMIn::New(const char* CStr){ return PSIn(new TMIn(CStr)); }
PSIn TMIn::New(const void* _Bf, const int& _BfL, const bool& TakeBf){ return PSIn(new TMIn(_Bf, _BfL, TakeBf)); }
PSIn TFIn::New(const TStr& FNm, bool& OpenedP, const bool IgnoreBOMIfExistsP){ return PSIn(new TFIn(FNm, OpenedP, IgnoreBOMIfExistsP)); }
LnStr=LnChA; return IsNext; } bool TSIn::GetNextLn(TChA& LnChA){ LnChA.Clr(); while (!Eof()){ const char Ch=GetCh(); if (Ch=='\n'){return true;} if (Ch=='\r' && PeekCh()=='\n'){GetCh(); return true;} LnChA.AddCh(Ch); } return !LnChA.Empty(); } const PSIn TSIn::StdIn=PSIn(new TStdIn()); TStdIn::TStdIn(): TSBase("Standard input"), TSIn("Standard input") {} ///////////////////////////////////////////////// // Output-Stream TSOut::TSOut(const TStr& Str): TSBase(Str.CStr()), MxLnLen(-1), LnLen(0){} int TSOut::UpdateLnLen(const int& StrLen, const bool& ForceInLn){ int Cs=0; if (MxLnLen!=-1){ if ((!ForceInLn)&&(LnLen+StrLen>MxLnLen)){Cs+=PutLn();} LnLen+=StrLen; } return Cs;
PSIn TZipIn::New(const TStr& FNm, bool& OpenedP){ return PSIn(new TZipIn(FNm, OpenedP)); }
PSIn TZipIn::New(const TStr& FNm) { return PSIn(new TZipIn(FNm)); }
PSch TSch::LoadTxt(const TStr& FNm){ PSIn SIn=PSIn(new TFIn(FNm)); TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloCsSens); return LoadTxt(Lx); }