///////////////////////////////////////////////// // Output-Stream TSOut::TSOut(const TStr& Str): TSBase(Str.CStr()), MxLnLen(-1), LnLen(0){}
// file extensions bool THttp::IsHtmlFExt(const TStr& FExt){ TStr UcFExt=FExt.GetUc(); return ((UcFExt==TFile::HtmlFExt.GetUc())||(UcFExt==TFile::HtmFExt.GetUc())); }
///////////////////////////////////////////////// // Tql-Lexical-Chars void TTqlChDef::SetChTy(const TTqlLxChTy& ChTy, const TStr& Str){ for (int ChN=0; ChN<Str.Len(); ChN++){ChTyV[Str[ChN]-TCh::Mn]=TInt(ChTy);} }
TStr TEnv::GetArgPostfix(const TStr& PrefixStr) const { int ArgN = GetPrefixArgN(PrefixStr); IAssert(ArgN != -1); TStr ArgStr = GetArg(ArgN); return ArgStr.GetSubStr(PrefixStr.Len(), ArgStr.Len()); }
void THttpChDef::SetLcCh(const TStr& Str){ for (int ChN=1; ChN<Str.Len(); ChN++){LcChV[Str[ChN]-TCh::Mn]=TCh(Str[0]);}}
bool TFFile::Next(TStr& FNm){ // if need to recurse if (!SubFFile.Empty()){ if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;} else {SubFFile=NULL;} } // for all required file-paths while (FPathN<FPathV.Len()){ if ((FPathN!=-1)&&(FindNextFile(FFileDesc->FFileH, &FFileDesc->FDesc))){ // next file-name available on the current file-path TStr FBase=FFileDesc->GetFBase(); if ((RecurseP)&&(FFileDesc->IsDir())){ // file-name is directory and recursion is required if ((FBase!=".")&&(FBase!="..")){ // directory is non-trivial - prepare sub-file-find for recursion TStr SubFPath=FPathV[FPathN]+FBase; TStrV SubFPathV; SubFPathV.Add(SubFPath); SubFFile=New(SubFPathV, FExtV, FBaseWc, RecurseP); if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;} else {SubFFile=NULL;} } } else { // return file-name if fits if ((FBase!=".")&&(FBase!="..")){ FNm=FPathV[FPathN]+FBase; TStr FExt=FNm.GetFExt(); if (!CsImpP){FExt.ToUc(); FBase.ToUc();} if (((FExtV.Empty())||(FExtV.SearchForw(FExt)!=-1))&& ((FBaseWc.Empty())||(FBase.IsWcMatch(FBaseWc)))){ CurFNm=FNm; CurFNmN++; return true;} } } } else { // close file-find descriptor if needed if (FPathN!=-1){ IAssert(FindClose(FFileDesc->FFileH)); FFileDesc->FFileH=INVALID_HANDLE_VALUE; } // find next file existing path from the input list while ((++FPathN<FPathV.Len())&& ((FFileDesc->FFileH=FindFirstFile((FPathV[FPathN]+"*.*").CStr(), &FFileDesc->FDesc))==INVALID_HANDLE_VALUE)){} if ((FPathN<FPathV.Len())&&(RecurseP)&&(FFileDesc->IsDir())){ // file-path found, file-name is directory and recursion is required TStr FBase=FFileDesc->GetFBase(); if ((FBase!=".")&&(FBase!="..")){ TStr SubFPath=FPathV[FPathN]+FBase; TStrV SubFPathV; SubFPathV.Add(SubFPath); SubFFile=New(SubFPathV, FExtV, FBaseWc, RecurseP); if (SubFFile->Next(FNm)){CurFNm=FNm; CurFNmN++; return true;} else {SubFFile=NULL;} } } else { // return file-name if fits if (FPathN<FPathV.Len()){ TStr FBase=FFileDesc->GetFBase(); if ((FBase!=".")&&(FBase!="..")){ FNm=FPathV[FPathN]+FBase; TStr FExt=FNm.GetFExt(); if (!CsImpP){FExt.ToUc(); FBase.ToUc();} if (((FExtV.Empty())||(FExtV.SearchForw(FExt)!=-1))&& ((FBaseWc.Empty())||(FBase.IsWcMatch(FBaseWc)))){ CurFNm=FNm; CurFNmN++; return true; } } } } } } // not found CurFNm=""; CurFNmN=-1; return false; }
void TEnv::PutVarVal(const TStr& VarNm, const TStr& VarVal) { const int RetVal = putenv(TStr::Fmt("%s=%s", VarNm.CStr(), VarVal.CStr()) .CStr()); IAssert(RetVal == 0); }
void TGgWebFetchSaver::OnError(const int&, const TStr& MsgStr){ printf("Error [Wait:%d Conn.:%d]: %s\n", GetWaitUrls(), GetConnUrls(), MsgStr.CStr()); if (Empty()){ TSysMsg::Quit();} }
///////////////////////////////////////////////// // Google-Focused-Crawl PGgFCrawl TGgFCrawl::GetFCrawl( const TStr& SrcUrlStr, const int& MxCands, const TStr& ProxyStr){ // collect related urls printf("Expand source URL: %s\n", SrcUrlStr.CStr()); PRSet SrcUrlRSet= TGg::WebSearch(TStr("related:")+SrcUrlStr, -1, TNotify::NullNotify, ProxyStr); // create & prepare focused-crawl PGgFCrawl FCrawl=TGgFCrawl::New(); FCrawl->SrcUrlStr=SrcUrlStr; FCrawl->DstRSet=TRSet::New(SrcUrlRSet); // fill hits for (int HitN=0; HitN<SrcUrlRSet->GetHits(); HitN++){ if ((MxCands!=-1)&&(FCrawl->DstRSet->GetHits()>MxCands)){break;} TStr HitUrlStr=SrcUrlRSet->GetHitUrlStr(HitN); printf("Expand URL: %s\n", HitUrlStr.CStr()); PRSet RelUrlRSet= TGg::WebSearch(TStr("related:")+HitUrlStr, -1, TNotify::NullNotify, ProxyStr); FCrawl->DstRSet->Merge(RelUrlRSet); } // save related urls //TRSet::SaveXml(DstRSet, OutXmlUrlFNm); // collect related web-pages TGgWebFetchSaver WebFetchSaver(100); WebFetchSaver.PutProxyStr(ProxyStr); // get source-url web-page {bool Ok; TStr MsgStr; TWebFetchBlocking::GetWebPg( SrcUrlStr, Ok, MsgStr, FCrawl->SrcWebPg, NULL, ProxyStr); if (!Ok){FCrawl->SrcWebPg=NULL;}} // get related-urls web-page int FetchHits=FCrawl->DstRSet->GetHits(); if ((MxCands!=-1)&&(MxCands<FetchHits)){FetchHits=MxCands;} for (int HitN=0; HitN<FetchHits; HitN++){ TStr HitUrlStr=FCrawl->DstRSet->GetHitUrlStr(HitN); WebFetchSaver.FetchUrl(HitUrlStr); } TSysMsg::Loop(); // save crawled web-pages for (int WebPgN=0; WebPgN<WebFetchSaver.GetWebPgs(); WebPgN++){ PWebPg WebPg=WebFetchSaver.GetWebPg(WebPgN); FCrawl->UrlStrToWebPgH.AddDat(WebPg->GetUrlStr(), WebPg); } // create bag-of-words FCrawl->BowDocBs=TBowDocBs::New(); FCrawl->SrcDId=FCrawl->BowDocBs->AddHtmlDoc( SrcUrlStr, TStrV(), FCrawl->SrcWebPg->GetHttpBodyAsStr()); for (int WebPgN=0; WebPgN<WebFetchSaver.GetWebPgs(); WebPgN++){ PWebPg WebPg=WebFetchSaver.GetWebPg(WebPgN); FCrawl->BowDocBs->AddHtmlDoc( WebPg->GetUrlStr(0), TStrV(), WebPg->GetHttpBodyAsStr()); } // calculate similarities to the source document PBowDocWgtBs BowDocWgtBs=TBowDocWgtBs::New(FCrawl->BowDocBs, bwwtNrmTFIDF); PBowSim BowSim=TBowSim::New(bstCos); FCrawl->SimDIdKdV; FCrawl->SumSim=0; for (int DIdN=0; DIdN<BowDocWgtBs->GetDocs(); DIdN++){ int DId=BowDocWgtBs->GetDId(DIdN); if (DId!=FCrawl->SrcDId){ double Sim=BowSim->GetSim( BowDocWgtBs->GetSpV(FCrawl->SrcDId), BowDocWgtBs->GetSpV(DId)); FCrawl->SimDIdKdV.Add(TFltIntKd(Sim, DId)); FCrawl->SumSim+=Sim; } } FCrawl->SimDIdKdV.Sort(false); // set crawl ok FCrawl->Ok=true; // return focused-crawl return FCrawl; }
void TGgSchRef::GetAuthNmVPubStr( const TStr& AuthNmVPubStr, TStrV& AuthNmV, TStr& PubNm, TStr& PubYearStr){ // split input string into two parts TStr AuthNmVStr; TStr PubStr; AuthNmVPubStr.SplitOnStr(AuthNmVStr, " - ", PubStr); // author-names string AuthNmVStr.SplitOnAllCh(',', AuthNmV, true); for (int AuthN=0; AuthN<AuthNmV.Len(); AuthN++){ AuthNmV[AuthN].ToTrunc(); } if ((!AuthNmV.Empty())&& ((AuthNmV.Last().IsStrIn("..."))||(AuthNmV.Last().Len()<=2))){ AuthNmV.DelLast(); } // publication-name & publication-year string TStr OriginStr; TStr LinkStr; PubStr.SplitOnStr(OriginStr, " - ", LinkStr); OriginStr.SplitOnLastCh(PubNm, ',', PubYearStr); PubNm.ToTrunc(); PubYearStr.ToTrunc(); if ((PubYearStr.Len()>=4)&&(PubYearStr.GetSubStr(0, 3).IsInt())){ PubYearStr=PubYearStr.GetSubStr(0, 3); } else if ((PubNm.Len()>=4)&&(PubNm.GetSubStr(0, 3).IsInt())){ PubYearStr=PubNm.GetSubStr(0, 3); PubNm=""; } else { PubYearStr=""; } }
void TGgSchBs::SetAuthCrawled(const TStr& AuthNm){ AuthNmToCrawlPH.AddDat(AuthNm.GetLc())=true; }
void TStrParser::DocStrToSylIdV(const TStr& _DocStr, TIntV& SyllablesIdV) { TStr DocStr = _DocStr.GetUc(); // to upper case Fail; }
void TFile::Rename(const TStr& SrcFNm, const TStr& DstFNm){ EAssertR( rename(SrcFNm.CStr(), DstFNm.CStr())==0, "Error renaming file '"+SrcFNm+"' to "+DstFNm+"'."); }
void TFRnd::PutStr(const TStr& Str){ PutBf(Str.CStr(), Str.Len()+1); }
bool TDir::GenDir(const TStr& FPathFNm){ return CreateDirectory(FPathFNm.CStr(), NULL)!=0; }
///////////////////////////////////////////////// // Time-Profiler - poor-man's profiler int TTmProfiler::AddTimer(const TStr& TimerNm) { MxNmLen = TInt::GetMx(MxNmLen, TimerNm.Len()); return TimerH.AddKey(TimerNm); }
bool TDir::DelDir(const TStr& FPathFNm){ return RemoveDirectory(FPathFNm.CStr())!=0; }
int InitKronecker(const TStr args, PNGraph &GD, TKronMtx& FitMtx){ Env = TEnv(args, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Kronecker graphs. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try Env = TEnv(args, TNotify::StdNotify); //const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input graph file (single directed edge per line)"); TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "initMatrix.txt", "Output file prefix"); const TInt NZero = Env.GetIfArgPrefixInt("-n0:", 2, "Innitiator matrix size"); const TStr InitMtx = Env.GetIfArgPrefixStr("-m:", "0.9 0.7; 0.5 0.2", "Init Gradient Descent Matrix (R=random)").GetLc(); const TStr Perm = Env.GetIfArgPrefixStr("-p:", "d", "Initial node permutation: d:Degree, r:Random, o:Order").GetLc(); const TInt GradIter = Env.GetIfArgPrefixInt("-gi:", 10, "Gradient descent iterations"); const TFlt LrnRate = Env.GetIfArgPrefixFlt("-l:", 1e-5, "Learning rate"); const TFlt MnStep = Env.GetIfArgPrefixFlt("-mns:", 0.005, "Minimum gradient step"); const TFlt MxStep = Env.GetIfArgPrefixFlt("-mxs:", 0.05, "Maximum gradient step"); const TInt WarmUp = Env.GetIfArgPrefixInt("-w:", 10000, "Samples to warm up"); const TInt NSamples = Env.GetIfArgPrefixInt("-s:", 100000, "Samples per gradient estimation"); //const TInt GradType = Env.GetIfArgPrefixInt("-gt:", 1, "1:Grad1, 2:Grad2"); const bool ScaleInitMtx = Env.GetIfArgPrefixBool("-sim:", true, "Scale the initiator to match the number of edges"); const TFlt PermSwapNodeProb = Env.GetIfArgPrefixFlt("-nsp:", 1.0, "Probability of using NodeSwap (vs. EdgeSwap) MCMC proposal distribution"); //if (OutFNm.Empty()) { OutFNm = TStr::Fmt("%s-fit%d", InFNm.GetFMid().CStr(), NZero()); } printf("%s\n", OutFNm.CStr()); // load graph cout << "n0 = " << NZero << endl; // fit FILE *F = fopen(OutFNm.CStr(), "w"); TKronMtx InitKronMtx = InitMtx=="r" ? TKronMtx::GetRndMtx(NZero, 0.1) : TKronMtx::GetMtx(InitMtx); InitKronMtx.Dump("INIT PARAM", true); TKroneckerLL KronLL(GD, InitKronMtx, PermSwapNodeProb); fprintf(F, "INIT PARAM\t%s, MTX SUM %f\n", InitKronMtx.GetMtxStr().CStr(), InitKronMtx.GetMtxSum()); if (ScaleInitMtx) { InitKronMtx.SetForEdges(GD->GetNodes(), GD->GetEdges()); } KronLL.InitLL(GD, InitKronMtx); InitKronMtx.Dump("SCALED PARAM", true); fprintf(F, "SCALED PARAM\t%s, MTX SUM %f\n", InitKronMtx.GetMtxStr().CStr(), InitKronMtx.GetMtxSum()); KronLL.SetPerm(Perm.GetCh(0)); double LogLike = 0; //if (GradType == 1) { LogLike = KronLL.GradDescent(GradIter, LrnRate, MnStep, MxStep, WarmUp, NSamples); //} else if (GradType == 2) { // LogLike = KronLL.GradDescent2(GradIter, LrnRate, MnStep, MxStep, WarmUp, NSamples); } //else{ Fail; } //const TKronMtx& FitMtx = KronLL.GetProbMtx(); FitMtx = KronLL.GetProbMtx(); // fprintf(F, "Input\t%s\n", InFNm.CStr()); TStrV ParamV; Env.GetCmLn().SplitOnAllCh(' ', ParamV); fprintf(F, "Command line options\n"); for (int i = 0; i < ParamV.Len(); i++) { fprintf(F, "\t%s\n", ParamV[i].CStr()+(ParamV[i][0]=='-'?1:0)); } fprintf(F, "Loglikelihood\t%10.2f\n", LogLike); fprintf(F, "Absolute error (based on expected number of edges)\t%f\n", KronLL.GetAbsErr()); fprintf(F, "RunTime\t%g\n", ExeTm.GetSecs()); fprintf(F, "Estimated initiator\t%s, mtx sum %f\n", FitMtx.GetMtxStr().CStr(), FitMtx.GetMtxSum()); /*if (ScaleInitMtx) { FitMtx.SetForEdgesNoCut(GD->GetNodes(), GD->GetEdges()); } fprintf(F, "Scaled initiator\t%s, mtx sum %f\n", FitMtx.GetMtxStr().CStr(), FitMtx.GetMtxSum()); FitMtx.Normalize(); fprintf(F, "Normalized initiator\t%s, mtx sum %f\n", FitMtx.GetMtxStr().CStr(), FitMtx.GetMtxSum());*/ fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
bool TNEANetMP::IsOk(const bool& ThrowExcept) const { bool RetVal = true; for (int N = NodeH.FFirstKeyId(); NodeH.FNextKeyId(N); ) { const TNode& Node = NodeH[N]; if (! Node.OutEIdV.IsSorted()) { const TStr Msg = TStr::Fmt("Out-edge list of node %d is not sorted.", Node.GetId()); if (ThrowExcept) { EAssertR(false, Msg); } else { ErrNotify(Msg.CStr()); } RetVal=false; } if (! Node.InEIdV.IsSorted()) { const TStr Msg = TStr::Fmt("In-edge list of node %d is not sorted.", Node.GetId()); if (ThrowExcept) { EAssertR(false, Msg); } else { ErrNotify(Msg.CStr()); } RetVal=false; } // check out-edge ids int prevEId = -1; for (int e = 0; e < Node.GetOutDeg(); e++) { if (! IsEdge(Node.GetOutEId(e))) { const TStr Msg = TStr::Fmt("Out-edge id %d of node %d does not exist.", Node.GetOutEId(e), Node.GetId()); if (ThrowExcept) { EAssertR(false, Msg); } else { ErrNotify(Msg.CStr()); } RetVal=false; } if (e > 0 && prevEId == Node.GetOutEId(e)) { const TStr Msg = TStr::Fmt("Node %d has duplidate out-edge id %d.", Node.GetId(), Node.GetOutEId(e)); if (ThrowExcept) { EAssertR(false, Msg); } else { ErrNotify(Msg.CStr()); } RetVal=false; } prevEId = Node.GetOutEId(e); } // check in-edge ids prevEId = -1; for (int e = 0; e < Node.GetInDeg(); e++) { if (! IsEdge(Node.GetInEId(e))) { const TStr Msg = TStr::Fmt("Out-edge id %d of node %d does not exist.", Node.GetInEId(e), Node.GetId()); if (ThrowExcept) { EAssertR(false, Msg); } else { ErrNotify(Msg.CStr()); } RetVal=false; } if (e > 0 && prevEId == Node.GetInEId(e)) { const TStr Msg = TStr::Fmt("Node %d has duplidate out-edge id %d.", Node.GetId(), Node.GetInEId(e)); if (ThrowExcept) { EAssertR(false, Msg); } else { ErrNotify(Msg.CStr()); } RetVal=false; } prevEId = Node.GetInEId(e); } } for (int E = EdgeH.FFirstKeyId(); EdgeH.FNextKeyId(E); ) { const TEdge& Edge = EdgeH[E]; if (! IsNode(Edge.GetSrcNId())) { const TStr Msg = TStr::Fmt("Edge %d source node %d does not exist.", Edge.GetId(), Edge.GetSrcNId()); if (ThrowExcept) { EAssertR(false, Msg); } else { ErrNotify(Msg.CStr()); } RetVal=false; } if (! IsNode(Edge.GetDstNId())) { const TStr Msg = TStr::Fmt("Edge %d destination node %d does not exist.", Edge.GetId(), Edge.GetDstNId()); if (ThrowExcept) { EAssertR(false, Msg); } else { ErrNotify(Msg.CStr()); } RetVal=false; } } return RetVal; }
void TGnuPlot::CreatePlotFile(const TStr& Comment) { time_t ltime; time(<ime); char* TimeStr = ctime(<ime); TimeStr[strlen(TimeStr) - 1] = 0; // rearrange columns so that longest are on the left //SeriesV.Sort(false); TIntV SerIdV(SeriesV.Len(), 0); for (int i = 0; i < SeriesV.Len(); i++) { SerIdV.Add(i); } SerIdV.SortCmp(TGpSeriesCmp(SeriesV)); // set columns int ColCnt = 1; bool SaveData = false; for (int s = 0; s < SeriesV.Len(); s++) { TGpSeries& Plt = SeriesV[SerIdV[s]]; if (Plt.XYValV.Empty()) { continue; } Plt.DataFNm = DataFNm; // plots use same X column const int PrevCol = s > 0 ? IsSameXCol(SerIdV[s], SerIdV[s-1]) : -1; if (PrevCol != -1) { Plt.XCol = PrevCol; } else { Plt.XCol = ColCnt; ColCnt++; } Plt.YCol = ColCnt; ColCnt++; if (! Plt.ZValV.Empty()) { Plt.ZCol = ColCnt; ColCnt++; } if (! Plt.XYValV.Empty()) { SaveData=true; } } // save data file (skip duplicate X columns) if (SaveData) { FILE *F = fopen(DataFNm.CStr(), "wt"); EAssertR(F != NULL, TStr("Can not open data file ")+DataFNm); fprintf(F, "#\n"); fprintf(F, "# %s (%s)\n", Comment.CStr(), TimeStr); fprintf(F, "#\n"); // column names for (int i = 0; i < SerIdV.Len(); i++) { const TGpSeries& Ser = SeriesV[SerIdV[i]]; if (Ser.XYValV.Empty()) { continue; } if (i == 0) { fprintf(F, "# "); } else { fprintf(F, "\t"); } if (Ser.SaveXVals()) { if (! LblX.Empty()) { fprintf(F, "%s\t", LblX.CStr()); } else { fprintf(F, "XVals\t"); } } if (Ser.Label.Empty()) { fprintf(F, "%s", LblY.CStr()); } else { fprintf(F, "%s", SeriesV[SerIdV[i]].Label.CStr()); } if (Ser.ZCol > 0) fprintf(F, "\tDeltaY"); } fprintf(F, "\n"); // data for (int row = 0; row < SeriesV[SerIdV[0]].XYValV.Len(); row++) { for (int i = 0; i < SeriesV.Len(); i++) { const TGpSeries& Ser = SeriesV[SerIdV[i]]; if (row < Ser.XYValV.Len()) { if (i > 0) { fprintf(F, "\t"); } if (Ser.SaveXVals()) { fprintf(F, "%g\t%g", Ser.XYValV[row].Key(), Ser.XYValV[row].Dat()); } else { fprintf(F, "%g", Ser.XYValV[row].Dat()); } if (! Ser.ZValV.Empty()) { fprintf(F, "\t%g", Ser.ZValV[row]()); } } } fprintf(F, "\n"); } fclose(F); } // save plot file FILE *F = fopen(PlotFNm.CStr(), "wt"); EAssertR(F != 0, TStr("Can not open plot file ")+PlotFNm); TStr CurDir = TDir::GetCurDir(); CurDir.ChangeStrAll("\\", "\\\\"); fprintf(F, "#\n"); fprintf(F, "# %s (%s)\n", Comment.CStr(), TimeStr); fprintf(F, "#\n\n"); if (! Title.Empty()) fprintf(F, "set title \"%s\"\n", Title.CStr()); fprintf(F, "set key bottom right\n"); fprintf(F, "%s\n", GetScaleStr(ScaleTy).CStr()); if (ScaleTy==gpsLog || ScaleTy==gpsLog10X || ScaleTy==gpsLog10XY) { fprintf(F, "set format x \"10^{%%L}\"\n"); fprintf(F, "set mxtics 10\n"); } if (ScaleTy==gpsLog || ScaleTy==gpsLog10Y || ScaleTy==gpsLog10XY) { fprintf(F, "set format y \"10^{%%L}\"\n"); fprintf(F, "set mytics 10\n"); } if (ScaleTy==gpsLog2X || ScaleTy==gpsLog2XY) { fprintf(F, "set format x \"2^{%%L}\"\n"); } if (ScaleTy==gpsLog2Y || ScaleTy==gpsLog2XY) { fprintf(F, "set format y \"2^{%%L}\"\n"); } if (SetGrid) fprintf(F, "set grid\n"); if (XRange.Val1 != XRange.Val2) fprintf(F, "set xrange [%g:%g]\n", XRange.Val1(), XRange.Val2()); if (YRange.Val1 != YRange.Val2) fprintf(F, "set yrange [%g:%g]\n", YRange.Val1(), YRange.Val2()); if (! LblX.Empty()) fprintf(F, "set xlabel \"%s\"\n", LblX.CStr()); if (! LblY.Empty()) fprintf(F, "set ylabel \"%s\"\n", LblY.CStr()); if (Tics42 < -1) { Tics42 = GetTics42(); } if (Tics42) { fprintf(F, "set tics scale 2\n"); // New in version 4.2 } else { fprintf(F, "set ticscale 2 1\n"); // Old (deprecated) } // custom commands for (int i = 0; i < MoreCmds.Len(); i++) { fprintf(F, "%s\n", MoreCmds[i].CStr()); } // plot if (! SeriesV.Empty()) { fprintf(F, "plot \t"); for (int i = 0; i < SeriesV.Len(); i++) { fprintf(F, "%s", GetSeriesPlotStr(i).CStr()); } fprintf(F, "\n"); } if (SetPause) fprintf(F, "pause -1 \"Hit return to exit. %s\"\n", PlotFNm.CStr()); fclose(F); }
TStr TEnv::GetVarVal(const TStr& VarNm) const { return TStr(getenv(VarNm.CStr())); }
TGnuPlot::TGnuPlot(const TStr& FileNm, const TStr& PlotTitle, const bool& Grid) : DataFNm(FileNm+".tab"), PlotFNm(FileNm+".plt"), Title(PlotTitle), LblX(), LblY(), ScaleTy(gpsAuto), YRange(0, 0), XRange(0, 0), SetGrid(Grid), SetPause(true), SeriesV(), MoreCmds() { IAssert(! FileNm.Empty()); }
void THttpChDef::SetChTy(const THttpChTy& ChTy, const TStr& Str){ for (int ChN=0; ChN<Str.Len(); ChN++){SetChTy(ChTy, Str[ChN]);}}
TGnuPlot::TGnuPlot(const TStr& DataFileNm, const TStr& PlotFileNm, const TStr& PlotTitle, const bool& Grid) : DataFNm(DataFileNm.Empty() ? DefDataFNm : DataFileNm), PlotFNm(PlotFileNm.Empty() ? DefPlotFNm : PlotFileNm), Title(PlotTitle), LblX(), LblY(), ScaleTy(gpsAuto), YRange(0, 0), XRange(0, 0), SetGrid(Grid), SetPause(true), SeriesV(), MoreCmds() { }
TStr THttpChDef::GetLcStr(const TStr& Str){ TChA LcStr; for (int ChN=0; ChN<Str.Len(); ChN++){LcStr+=GetLcCh(Str[ChN]);} return LcStr; }
int main(int argc, char *argv[]) { // #### SETUP: Parse Arguments LogOutput Log; THash<TStr, TStr> Arguments; ArgumentParser::ParseArguments(argc, argv, Arguments, Log); TStr OutputDirectory = ArgumentParser::GetArgument(Arguments, "directory", ""); TStr StartString = ArgumentParser::GetArgument(Arguments, "start", "2009-02-01"); TStr QBDBCDirectory = ArgumentParser::GetArgument(Arguments, "qbdbc", "/lfs/1/tmp/curis/QBDBC-C/"); TStr QBDBDirectory = ArgumentParser::GetArgument(Arguments, "qbdb", "/lfs/1/tmp/curis/QBDB/"); TInt WindowSize = ArgumentParser::GetArgument(Arguments, "window", "14").GetInt(); TStr EdgeString = ArgumentParser::GetArgument(Arguments, "edge", "lsh"); QuoteGraph::SetEdgeCreation(EdgeString); Log.DisableLogging(); TQuoteBase QB; TDocBase DB; TClusterBase CB; // Start memeseed after a break in the middle TStr LastDate = ArgumentParser::GetArgument(Arguments, "last", ""); if (LastDate != "") { { TQuoteBase OldQB; TDocBase OldDB; TClusterBase OldCB; PNGraph OldQGraph; TDataLoader::LoadCumulative(QBDBCDirectory, LastDate, OldQB, OldDB, OldCB, OldQGraph); QB = TQuoteBase(OldQB.GetCounter()); DB = TDocBase(OldDB.GetCounter()); CB = TClusterBase(OldCB.GetCounter()); } Err("Counters updated!\n"); } TStr LogDirectory; Log.GetDirectory(LogDirectory); Err("Output directory: %s\n", LogDirectory.CStr()); // #### DATA LOADING: Load ALL the things! fprintf(stderr, "Loading QB and DB from file for %d days, starting from %s...\n", WindowSize.Val, StartString.CStr()); Err("%s\n", QBDBDirectory.CStr()); TSecTm PresentTime = TDataLoader::LoadQBDBByWindow(QBDBDirectory, StartString, WindowSize, QB, DB); fprintf(stderr, "\tQBDB successfully loaded!\n"); // #### CLUSTERING STEP fprintf(stderr, "Creating clusters\n"); QuoteGraph GraphCreator(&QB, &CB); PNGraph QGraph; GraphCreator.CreateGraph(QGraph); Clustering ClusterJob(QGraph); ClusterJob.BuildClusters(&CB, &QB, &DB, Log, PresentTime, false); TIntV AllClusters; CB.GetAllClusterIdsSortByFreq(AllClusters); //PostCluster::FilterAndCacheClusterSize(&DB, &QB, &CB, Log, AllClusters, PresentTime); //PostCluster::FilterAndCacheClusterPeaks(&DB, &QB, &CB, Log, AllClusters, PresentTime); int NumClusters = AllClusters.Len(); FILE* F = fopen("nifty-day-2012-01-01.txt", "w"); for (int i = 0; i < NumClusters; i++) { TCluster C; CB.GetCluster(AllClusters[i], C); TIntV Quotes; C.GetQuoteIds(Quotes); TStr RepStr; C.GetRepresentativeQuoteString(RepStr, &QB); fprintf(F, "%d\t%d\t%s\t%d\n", Quotes.Len(), C.GetNumQuotes().Val, RepStr.CStr(), C.GetId().Val); for (int j = 0; j < Quotes.Len(); j++) { TQuote Q; QB.GetQuote(Quotes[j], Q); TStr Str; Q.GetContentString(Str); fprintf(F, "\t%d\t%d\t%s\t%d\n", Q.GetNumSources().Val, Q.GetNumSources().Val, Str.CStr(), Q.GetId().Val); } fprintf(F, "\n"); } fclose(F); TStr Directory; Log.GetDirectory(Directory); Err("Done with memeoutput! Directory created at: %s\n", Directory.CStr()); //printf("%d\n", TStringUtil::f_counter); return 0; }
bool THttp::IsGifFExt(const TStr& FExt){ return (FExt.GetUc()==TFile::GifFExt.GetUc()); }
/// MCMC fitting void TAGMFit::RunMCMC(const int& MaxIter, const int& EvalLambdaIter, const TStr& PlotFPrx) { TExeTm IterTm, TotalTm; double PrevL = Likelihood(), DeltaL = 0; double BestL = PrevL; printf("initial likelihood = %f\n",PrevL); TIntFltPrV IterTrueLV, IterJoinV, IterLeaveV, IterAcceptV, IterSwitchV, IterLBV; TIntPrV IterTotMemV; TIntV IterV; TFltV BestLV; TVec<TIntSet> BestCmtySetV; int SwitchCnt = 0, LeaveCnt = 0, JoinCnt = 0, AcceptCnt = 0, ProbBinSz; int Nodes = G->GetNodes(), Edges = G->GetEdges(); TExeTm PlotTm; ProbBinSz = TMath::Mx(1000, G->GetNodes() / 10); //bin to compute probabilities IterLBV.Add(TIntFltPr(1, BestL)); for (int iter = 0; iter < MaxIter; iter++) { IterTm.Tick(); int NID = -1; int JoinCID = -1, LeaveCID = -1; SampleTransition(NID, JoinCID, LeaveCID, DeltaL); //sample a move double OptL = PrevL; if (DeltaL > 0 || Rnd.GetUniDev() < exp(DeltaL)) { //if it is accepted IterTm.Tick(); if (LeaveCID > -1 && LeaveCID != BaseCID) { LeaveCom(NID, LeaveCID); } if (JoinCID > -1 && JoinCID != BaseCID) { JoinCom(NID, JoinCID); } if (LeaveCID > -1 && JoinCID > -1 && JoinCID != BaseCID && LeaveCID != BaseCID) { SwitchCnt++; } else if (LeaveCID > -1 && LeaveCID != BaseCID) { LeaveCnt++;} else if (JoinCID > -1 && JoinCID != BaseCID) { JoinCnt++;} AcceptCnt++; if ((iter + 1) % EvalLambdaIter == 0) { IterTm.Tick(); MLEGradAscentGivenCAG(0.01, 3); OptL = Likelihood(); } else{ OptL = PrevL + DeltaL; } if (BestL <= OptL && CIDNSetV.Len() > 0) { BestCmtySetV = CIDNSetV; BestLV = LambdaV; BestL = OptL; } } if (iter > 0 && (iter % ProbBinSz == 0) && PlotFPrx.Len() > 0) { IterLBV.Add(TIntFltPr(iter, OptL)); IterSwitchV.Add(TIntFltPr(iter, (double) SwitchCnt / (double) AcceptCnt)); IterLeaveV.Add(TIntFltPr(iter, (double) LeaveCnt / (double) AcceptCnt)); IterJoinV.Add(TIntFltPr(iter, (double) JoinCnt / (double) AcceptCnt)); IterAcceptV.Add(TIntFltPr(iter, (double) AcceptCnt / (double) ProbBinSz)); SwitchCnt = JoinCnt = LeaveCnt = AcceptCnt = 0; } PrevL = OptL; if ((iter + 1) % 10000 == 0) { printf("\r%d iterations completed [%.2f]", iter, (double) iter / (double) MaxIter); } } // plot the likelihood and acceptance probabilities if the plot file name is given if (PlotFPrx.Len() > 0) { TGnuPlot GP1; GP1.AddPlot(IterLBV, gpwLinesPoints, "likelihood"); GP1.SetDataPlotFNm(PlotFPrx + ".likelihood.tab", PlotFPrx + ".likelihood.plt"); TStr TitleStr = TStr::Fmt(" N:%d E:%d", Nodes, Edges); GP1.SetTitle(PlotFPrx + ".likelihood" + TitleStr); GP1.SavePng(PlotFPrx + ".likelihood.png"); TGnuPlot GP2; GP2.AddPlot(IterSwitchV, gpwLinesPoints, "Switch"); GP2.AddPlot(IterLeaveV, gpwLinesPoints, "Leave"); GP2.AddPlot(IterJoinV, gpwLinesPoints, "Join"); GP2.AddPlot(IterAcceptV, gpwLinesPoints, "Accept"); GP2.SetTitle(PlotFPrx + ".transition"); GP2.SetDataPlotFNm(PlotFPrx + "transition_prob.tab", PlotFPrx + "transition_prob.plt"); GP2.SavePng(PlotFPrx + "transition_prob.png"); } CIDNSetV = BestCmtySetV; LambdaV = BestLV; InitNodeData(); MLEGradAscentGivenCAG(0.001, 100); printf("\nMCMC completed (best likelihood: %.2f) [%s]\n", BestL, TotalTm.GetTmStr()); }
// Standardize first and lastnames into <last_name>_<first name innitial> TStr TStrUtil::GetStdName(TStr AuthorName) { TStr StdName; AuthorName.ToLc(); AuthorName.ChangeChAll('\n', ' '); AuthorName.ChangeChAll('.', ' '); // if there is a number in the name, remove it and everything after it int i, pos = 0; while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) { pos++; } if (pos < AuthorName.Len()) { AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc(); } if (AuthorName.Empty()) { return TStr::GetNullStr(); } // replace everything after '(' int b = AuthorName.SearchCh('('); if (b != -1) { AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc(); } // skip if contains ')' if (AuthorName .SearchCh(')')!=-1) { return TStr::GetNullStr(); } // skip if it is not a name if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1 || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) { return TStr::GetNullStr(); } // remove all non-letters (latex tags, ...) TChA NewName; for (i = 0; i < AuthorName.Len(); i++) { const char Ch = AuthorName[i]; if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') { NewName += Ch; } } StdName = NewName; StdName.ToTrunc(); TStrV AuthNmV; StdName.SplitOnWs(AuthNmV); // too short -- not a name if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast(); if (AuthNmV.Len() < 2) return TStr::GetNullStr(); const TStr LastNm = AuthNmV.Last(); if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr(); IAssert(isalpha(AuthNmV[0][0])); return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]); }
TMIn::TMIn(const TStr& Str): TSBase("Input-Memory"), TSIn("Input-Memory"), Bf(NULL), BfC(0), BfL(0){ BfL=Str.Len(); Bf=new char[BfL]; strncpy(Bf, Str.CStr(), BfL); }