void TTmProfiler::PrintReport(const TStr& ProfileNm) const { const double TimerSumSec = GetTimerSumSec(); printf("-- %s --\n", ProfileNm.CStr()); printf("Sum: (%.2f sec):\n", TimerSumSec); int TimerId = GetTimerIdFFirst(); while (GetTimerIdFNext(TimerId)) { // get timer name TStr TimerNm = GetTimerNm(TimerId); TimerNm = TStr::GetSpaceStr(TInt::GetMx(0, TimerNm.Len() - MxNmLen)) + TimerNm; // get timer time and precentage if (TimerSumSec > 0.0) { const double TimerSec = GetTimerSec(TimerId); const double TimerPerc = TimerSec / TimerSumSec * 100.0; printf(" %s: %.2fs [%.2f%%]\n", TimerNm.CStr(), TimerSec, TimerPerc); } else { printf(" %s: -\n", TimerNm.CStr()); } } printf("--\n"); }
////////////////////////////////////// // File-Download-Function void TSASFunFile::LoadFunFileV(const TStr& FPath, TSAppSrvFunV& SrvFunV) { TFFile File(FPath, "", false); TStr FNm; while (File.Next(FNm)) { TStr FExt = FNm.GetFExt(); TStr FUrl = FNm.GetSubStr(FPath.Len()); FUrl.ChangeChAll('\\', '/'); printf("%s %s %s\n", FNm.CStr(), FExt.CStr(), FUrl.CStr()); if (FExt == ".htm") { SrvFunV.Add(TSASFunFile::New(FUrl, FNm, THttp::TextHtmlFldVal)); } else if (FExt == ".html") { SrvFunV.Add(TSASFunFile::New(FUrl, FNm, THttp::TextHtmlFldVal)); } else if (FExt == ".js") { SrvFunV.Add(TSASFunFile::New(FUrl, FNm, THttp::TextJavaScriptFldVal)); } else if (FExt == ".css") { SrvFunV.Add(TSASFunFile::New(FUrl, FNm, THttp::TextCssFldVal)); } else if (FExt == ".jpg") { SrvFunV.Add(TSASFunFile::New(FUrl, FNm, THttp::ImageJpgFldVal)); } else if (FExt == ".jpeg") { SrvFunV.Add(TSASFunFile::New(FUrl, FNm, THttp::ImageJpgFldVal)); } else if (FExt == ".gif") { SrvFunV.Add(TSASFunFile::New(FUrl, FNm, THttp::ImageGifFldVal)); } else { printf("Unknown MIME type for extension '%s' for file '%s'", FExt.CStr(), FNm.CStr()); SrvFunV.Add(TSASFunFile::New(FUrl, FNm, THttp::AppOctetFldVal)); } } }
///////////////////////////////////////////////// // 2 populations SIR model TSir2Model::TSir2Model(double _N0M, TFltTr N0MP, double _I0M, TFltTr I0MP, double _N0B, TFltTr N0BP, double _I0B, TFltTr I0BP, int _T0, TFltTr T0P, double _BetaM, TFltTr BetaMP, double _GammaM, TFltTr GammaMP, double _BetaB, TFltTr BetaBP, double _GammaB, TFltTr GammaBP, double _BetaMB, TFltTr BetaMBP, double _BetaBM, TFltTr BetaBMP, TStr InFNm, int ColId1, int ColId2) : N0M(_N0M), I0M(_I0M), N0B(_N0B), I0B(_I0B), T0(_T0), BetaM(_BetaM), GammaM(_GammaM), BetaB(_BetaB), GammaB(_GammaB), BetaMB(_BetaMB), BetaBM(_BetaBM) { ParamMnMnRngV.Add(N0MP); ParamMnMnRngV.Add(I0MP); ParamMnMnRngV.Add(N0BP); ParamMnMnRngV.Add(I0BP); ParamMnMnRngV.Add(T0P); ParamMnMnRngV.Add(BetaMP); ParamMnMnRngV.Add(GammaMP); ParamMnMnRngV.Add(BetaBP); ParamMnMnRngV.Add(GammaBP); ParamMnMnRngV.Add(BetaMBP); ParamMnMnRngV.Add(BetaBMP); if (InFNm.Len()>0 && TFile::Exists(InFNm)) { TEpidemModel::LoadTxt(InFNm, ColId1, MediaV); TEpidemModel::LoadTxt(InFNm, ColId2, BlogV); } }
TTm TTm::GetTmFromWebLogTimeStr(const TStr& TimeStr, const char TimeSepCh, const char MSecSepCh){ int TimeStrLen=TimeStr.Len(); // year TChA ChA; int ChN=0; while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=TimeSepCh)){ ChA+=TimeStr[ChN]; ChN++;} TStr HourStr=ChA; // minute ChA.Clr(); ChN++; while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=TimeSepCh)){ ChA+=TimeStr[ChN]; ChN++;} TStr MinStr=ChA; // second ChA.Clr(); ChN++; while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=MSecSepCh)){ ChA+=TimeStr[ChN]; ChN++;} TStr SecStr=ChA; // mili-second ChA.Clr(); ChN++; while (ChN<TimeStrLen){ ChA+=TimeStr[ChN]; ChN++;} TStr MSecStr=ChA; if (MSecStr.Len() > 3) { MSecStr = MSecStr.GetSubStr(0, 2); } else if (MSecStr.Len() == 1) { MSecStr += "00"; } else if (MSecStr.Len() == 2) { MSecStr += "0"; } // transform to numbers int HourN=HourStr.GetInt(0); int MinN=MinStr.GetInt(0); int SecN=SecStr.GetInt(0); int MSecN=MSecStr.GetInt(0); // construct time TTm Tm(-1, -1, -1, -1, HourN, MinN, SecN, MSecN); // return time return Tm; }
///////////////////////////////////////////////// // Cyc-Base int TCycBs::AddVNm(const TStr& VNm){ int VId; if (VNmToVrtxH.IsKey(VNm.CStr(), VId)){ return VId; } else { VId=VNmToVrtxH.AddKey(VNm); TCycVrtx& Vrtx=VNmToVrtxH[VId]; Vrtx.PutVId(VId); // set flags based on vertex-name string if (VNm.IsPrefix("~")){ Vrtx.SetFlag(cvfBackLink); } else if (VNm.IsPrefix("#$")&&(!VNm.IsChIn(' '))){ Vrtx.SetFlag(cvfCycL); if (VNm.Len()>2){ char Ch=VNm[2]; if (('a'<=Ch)&&(Ch<='z')){ Vrtx.SetFlag(cvfCycLPred); } else { Vrtx.SetFlag(cvfCycLConst); } } } else if (VNm.IsPrefix("(#$")){ Vrtx.SetFlag(cvfCycL); Vrtx.SetFlag(cvfCycLExpr); } else { if (VNm.IsFlt()){ Vrtx.SetFlag(cvfNum); } else { Vrtx.SetFlag(cvfStr); } } return VId; } }
TMIn::TMIn(const TStr& Str): TSBase("Input-Memory"), TSIn("Input-Memory"), Bf(NULL), BfC(0), BfL(0){ BfL=Str.Len(); Bf=new char[BfL]; strncpy(Bf, Str.CStr(), BfL); }
void TWebFetchSendBatchJson::Send() { TStr BodyStr = TJsonVal::GetStrFromVal(JsonArray); TMem BodyMem; BodyMem.AddBf(BodyStr.CStr(), BodyStr.Len()); PHttpRq HttpRq = THttpRq::New(hrmPost, TUrl::New(UrlStr), THttp::AppJSonFldVal, BodyMem); FetchHttpRq(HttpRq); }
void THttpChDef::SetLcCh(const TStr& Str){ for (int ChN=1; ChN<Str.Len(); ChN++){LcChV[Str[ChN]-TCh::Mn]=TCh(Str[0]);}}
void TBagOfWords::AddFtr(const TStr& Val, TIntFltKdV& SpV) const { // tokenize TStrV TokenStrV(Val.Len() / 5, 0); GetFtr(Val, TokenStrV); // create sparse vector AddFtr(TokenStrV, SpV); }
void TSysConsole::Put(const TStr& Str){ DWORD ChsWritten; WriteConsole(hStdOut, Str.CStr(), Str.Len(), &ChsWritten, NULL); IAssert(ChsWritten==DWORD(Str.Len())); }
void TLxChDef::SetChTy(const TLxChTy& ChTy, const TStr& Str){ for (int CC=0; CC<Str.Len(); CC++){ ChTyV[Str[CC]-TCh::Mn]=TInt(ChTy);} }
TStr TEnv::GetArgPostfix(const TStr& PrefixStr) const { int ArgN = GetPrefixArgN(PrefixStr); IAssert(ArgN != -1); TStr ArgStr = GetArg(ArgN); return ArgStr.GetSubStr(PrefixStr.Len(), ArgStr.Len()); }
/// MCMC fitting void TAGMFit::RunMCMC(const int& MaxIter, const int& EvalLambdaIter, const TStr& PlotFPrx) { TExeTm IterTm, TotalTm; double PrevL = Likelihood(), DeltaL = 0; double BestL = PrevL; printf("initial likelihood = %f\n",PrevL); TIntFltPrV IterTrueLV, IterJoinV, IterLeaveV, IterAcceptV, IterSwitchV, IterLBV; TIntPrV IterTotMemV; TIntV IterV; TFltV BestLV; TVec<TIntSet> BestCmtySetV; int SwitchCnt = 0, LeaveCnt = 0, JoinCnt = 0, AcceptCnt = 0, ProbBinSz; int Nodes = G->GetNodes(), Edges = G->GetEdges(); TExeTm PlotTm; ProbBinSz = TMath::Mx(1000, G->GetNodes() / 10); //bin to compute probabilities IterLBV.Add(TIntFltPr(1, BestL)); for (int iter = 0; iter < MaxIter; iter++) { IterTm.Tick(); int NID = -1; int JoinCID = -1, LeaveCID = -1; SampleTransition(NID, JoinCID, LeaveCID, DeltaL); //sample a move double OptL = PrevL; if (DeltaL > 0 || Rnd.GetUniDev() < exp(DeltaL)) { //if it is accepted IterTm.Tick(); if (LeaveCID > -1 && LeaveCID != BaseCID) { LeaveCom(NID, LeaveCID); } if (JoinCID > -1 && JoinCID != BaseCID) { JoinCom(NID, JoinCID); } if (LeaveCID > -1 && JoinCID > -1 && JoinCID != BaseCID && LeaveCID != BaseCID) { SwitchCnt++; } else if (LeaveCID > -1 && LeaveCID != BaseCID) { LeaveCnt++;} else if (JoinCID > -1 && JoinCID != BaseCID) { JoinCnt++;} AcceptCnt++; if ((iter + 1) % EvalLambdaIter == 0) { IterTm.Tick(); MLEGradAscentGivenCAG(0.01, 3); OptL = Likelihood(); } else{ OptL = PrevL + DeltaL; } if (BestL <= OptL && CIDNSetV.Len() > 0) { BestCmtySetV = CIDNSetV; BestLV = LambdaV; BestL = OptL; } } if (iter > 0 && (iter % ProbBinSz == 0) && PlotFPrx.Len() > 0) { IterLBV.Add(TIntFltPr(iter, OptL)); IterSwitchV.Add(TIntFltPr(iter, (double) SwitchCnt / (double) AcceptCnt)); IterLeaveV.Add(TIntFltPr(iter, (double) LeaveCnt / (double) AcceptCnt)); IterJoinV.Add(TIntFltPr(iter, (double) JoinCnt / (double) AcceptCnt)); IterAcceptV.Add(TIntFltPr(iter, (double) AcceptCnt / (double) ProbBinSz)); SwitchCnt = JoinCnt = LeaveCnt = AcceptCnt = 0; } PrevL = OptL; if ((iter + 1) % 10000 == 0) { printf("\r%d iterations completed [%.2f]", iter, (double) iter / (double) MaxIter); } } // plot the likelihood and acceptance probabilities if the plot file name is given if (PlotFPrx.Len() > 0) { TGnuPlot GP1; GP1.AddPlot(IterLBV, gpwLinesPoints, "likelihood"); GP1.SetDataPlotFNm(PlotFPrx + ".likelihood.tab", PlotFPrx + ".likelihood.plt"); TStr TitleStr = TStr::Fmt(" N:%d E:%d", Nodes, Edges); GP1.SetTitle(PlotFPrx + ".likelihood" + TitleStr); GP1.SavePng(PlotFPrx + ".likelihood.png"); TGnuPlot GP2; GP2.AddPlot(IterSwitchV, gpwLinesPoints, "Switch"); GP2.AddPlot(IterLeaveV, gpwLinesPoints, "Leave"); GP2.AddPlot(IterJoinV, gpwLinesPoints, "Join"); GP2.AddPlot(IterAcceptV, gpwLinesPoints, "Accept"); GP2.SetTitle(PlotFPrx + ".transition"); GP2.SetDataPlotFNm(PlotFPrx + "transition_prob.tab", PlotFPrx + "transition_prob.plt"); GP2.SavePng(PlotFPrx + "transition_prob.png"); } CIDNSetV = BestCmtySetV; LambdaV = BestLV; InitNodeData(); MLEGradAscentGivenCAG(0.001, 100); printf("\nMCMC completed (best likelihood: %.2f) [%s]\n", BestL, TotalTm.GetTmStr()); }
///////////////////////////////////////////////// // Time-Profiler - poor-man's profiler int TTmProfiler::AddTimer(const TStr& TimerNm) { MxNmLen = TInt::GetMx(MxNmLen, TimerNm.Len()); return TimerH.AddKey(TimerNm); }
// load from allactors.zip that was prepared by Brad Malin in 2005 PImdbNet TImdbNet::LoadTxt(const TStr& ActorFNm) { PImdbNet Net = TImdbNet::New(); TStrV ColV; char line [2024]; int NLines=0, DupEdge=0, Year, Position, ActorNId, MovieNId; TIntH ActorNIdH; THash<TIntPr, TInt> MovieNIdH; FILE *F = fopen(ActorFNm.CStr(), "rt"); fgets(line, 2024, F); while (! feof(F)) { memset(line, 0, 2024); fgets(line, 2024, F); if (strlen(line) == 0) break; TStr(line).SplitOnAllCh('|', ColV, false); IAssert(ColV.Len() == 7); const int NameStrId = Net->AddStr(ColV[0].GetTrunc().GetLc()+" "+ColV[1].GetTrunc().GetLc()); const int MovieStrId = Net->AddStr(ColV[2].GetTrunc().GetLc()); TStr YearStr = ColV[3].GetTrunc(); if (YearStr.Len() > 4) YearStr = YearStr.GetSubStr(0, 3); Year = 1; YearStr.IsInt(Year); const TMovieTy MovieTy = TImdbNet::GetMovieTy(ColV[4]); Position = TInt::Mx; ColV[5].GetTrunc().IsInt(Position); IAssert(ColV[6].GetTrunc()[0] == 'M' || ColV[6].GetTrunc()[0]=='F'); const bool IsMale = ColV[6].GetTrunc()[0] == 'M'; // create nodes if (ActorNIdH.IsKey(NameStrId)) { ActorNId = ActorNIdH.GetDat(NameStrId); } else { ActorNId = Net->AddNode(-1, TImdbNode(NameStrId, Year, Position, IsMale)); ActorNIdH.AddDat(NameStrId, ActorNId); } if (MovieNIdH.IsKey(TIntPr(MovieStrId, Year))) { MovieNId = MovieNIdH.GetDat(TIntPr(MovieStrId, Year)); } else { MovieNId = Net->AddNode(-1, TImdbNode(NameStrId, Year, MovieTy)); MovieNIdH.AddDat(TIntPr(MovieStrId, Year), MovieNId); } if (! Net->IsEdge(ActorNId, MovieNId)) { Net->AddEdge(ActorNId, MovieNId); } else { DupEdge++; } if (++NLines % 100000 == 0) printf("\r %dk ", NLines/1000); } fclose(F); printf("duplicate edges: %d\n", DupEdge); printf("nodes: %d\n", Net->GetNodes()); printf("edges: %d\n", Net->GetEdges()); printf("actors: %d\n", ActorNIdH.Len()); printf("movies: %d\n", MovieNIdH.Len()); // set the actor year to the year of his first movie int NUpdates=0; for (TNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) { if (NI().IsActor()) { int MinYear = NI().GetYear(); for (int e = 0; e < NI.GetOutDeg(); e++) { const TImdbNode& NodeDat = Net->GetNDat(NI.GetOutNId(e)); if (NodeDat.IsMovie()) MinYear = TMath::Mn(MinYear, NodeDat.GetYear()); } if (NI().Year != MinYear) NUpdates++; NI().Year = MinYear; } } printf("updated actor times: %d\n", NUpdates); return Net; }
PBowDocBs TBowFl::LoadSvmLightTxt( const TStr& DocDefFNm, const TStr& WordDefFNm, const TStr& TrainDataFNm, const TStr& TestDataFNm, const int& MxDocs){ //TODO: use MxDocs // prepare document set PBowDocBs BowDocBs=TBowDocBs::New(); int MOneCId=BowDocBs->CatNmToFqH.AddKey("-1"); int POneCId=BowDocBs->CatNmToFqH.AddKey("+1"); // document definition bool DocDefP=false; if (!DocDefFNm.Empty()&&(TFile::Exists(DocDefFNm))){ // (DId "DoxNm"<eoln>)* PSIn SIn=TFIn::New(DocDefFNm); TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloExcept); Lx.GetSym(syInt, syEof); while (Lx.Sym==syInt){ int DId=Lx.Int; Lx.GetSym(syColon); Lx.GetSym(syQStr); TStr DocNm=Lx.Str; Lx.GetSym(syEoln); Lx.GetSym(syInt, syEof); int NewDId=BowDocBs->DocNmToDescStrH.AddKey(DocNm); EAssertR(DId==NewDId, "Document-Ids don't match."); } DocDefP=true; } // word definition if (!WordDefFNm.Empty()&&(TFile::Exists(WordDefFNm))){ BowDocBs->WordStrToDescH.AddDat("Undef").Fq=0; // ... to have WId==0 PSIn SIn=TFIn::New(WordDefFNm); TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloExcept); Lx.GetSym(syQStr, syEof); while (Lx.Sym==syQStr){ TStr WordStr=Lx.Str; Lx.GetSym(syInt); int WId=Lx.Int; Lx.GetSym(syInt); int WordFq=Lx.Int; Lx.GetSym(syEoln); Lx.GetSym(syQStr, syEof); int NewWId=BowDocBs->WordStrToDescH.AddKey(WordStr); EAssertR(WId==NewWId, "Word-Ids don't match."); BowDocBs->WordStrToDescH[WId].Fq=WordFq; } } // train & test data int MxWId=-1; TIntIntH WIdToFqH; // train data if (!TrainDataFNm.Empty()){ PSIn SIn=TFIn::New(TrainDataFNm); TILx Lx(SIn, TFSet()|iloCmtAlw|iloRetEoln|iloSigNum|iloExcept); // skip comment lines while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){} // parse data lines while (Lx.Sym==syInt){ // document TStr DocNm=TInt::GetStr(BowDocBs->GetDocs()); int DId; if (DocDefP){ DId=BowDocBs->DocNmToDescStrH.GetKeyId(DocNm); } else { DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm); } BowDocBs->TrainDIdV.Add(DId); // category (class value) int CId=(Lx.Int==-1) ? MOneCId : POneCId; BowDocBs->DocCIdVV.Add(); IAssert(DId==BowDocBs->DocCIdVV.Len()-1); BowDocBs->DocCIdVV.Last().Gen(1, 0); BowDocBs->DocCIdVV.Last().Add(CId); // words (attributes) PBowSpV SpV=TBowSpV::New(DId); BowDocBs->DocSpVV.Add(SpV); IAssert(DId==BowDocBs->DocSpVV.Len()-1); Lx.GetSym(syInt, syEoln); while (Lx.Sym==syInt){ int WId=Lx.Int; Lx.GetSym(syColon); Lx.GetSym(syFlt); double WordFq=Lx.Flt; Lx.GetSym(syInt, syEoln); SpV->AddWIdWgt(WId, WordFq); if (MxWId==-1){MxWId=WId;} else {MxWId=TInt::GetMx(MxWId, WId);} WIdToFqH.AddDat(WId)++; } if (!Lx.CmtStr.Empty()){ // change document name to 'N' if comment 'docDesc=N' TStr CmtStr=Lx.CmtStr; static TStr DocNmPrefixStr="docDesc="; if (CmtStr.IsPrefix(DocNmPrefixStr)){ TStr NewDocNm= TStr("D")+CmtStr.GetSubStr(DocNmPrefixStr.Len(), CmtStr.Len()-1); BowDocBs->DocNmToDescStrH.DelKey(DocNm); int NewDId=BowDocBs->DocNmToDescStrH.AddKey(NewDocNm); IAssert(DId==NewDId); } } SpV->Trunc(); while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){} } } // test data if (!TestDataFNm.Empty()){ PSIn SIn=TFIn::New(TestDataFNm); TILx Lx(SIn, TFSet()|iloCmtAlw|iloRetEoln|iloSigNum|iloExcept); while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){} while (Lx.Sym==syInt){ // document TStr DocNm=TInt::GetStr(BowDocBs->GetDocs()); int DId; if (DocDefP){ DId=BowDocBs->DocNmToDescStrH.GetKeyId(DocNm); } else { DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm); } BowDocBs->TestDIdV.Add(DId); // category (class value) int CId=(Lx.Int==-1) ? MOneCId : POneCId; BowDocBs->DocCIdVV.Add(); IAssert(DId==BowDocBs->DocCIdVV.Len()-1); BowDocBs->DocCIdVV.Last().Gen(1, 0); BowDocBs->DocCIdVV.Last().Add(CId); // words (attributes) PBowSpV SpV=TBowSpV::New(DId); BowDocBs->DocSpVV.Add(SpV); IAssert(DId==BowDocBs->DocSpVV.Len()-1); Lx.GetSym(syInt, syEoln); while (Lx.Sym==syInt){ int WId=Lx.Int; Lx.GetSym(syColon); Lx.GetSym(syFlt); double WordFq=Lx.Flt; Lx.GetSym(syInt, syEoln); SpV->AddWIdWgt(WId, WordFq); if (MxWId==-1){MxWId=WId;} else {MxWId=TInt::GetMx(MxWId, WId);} WIdToFqH.AddDat(WId)++; } if (!Lx.CmtStr.Empty()){ // change document name to 'N' if comment 'docDesc=N' TStr CmtStr=Lx.CmtStr; static TStr DocNmPrefixStr="docDesc="; if (CmtStr.IsPrefix(DocNmPrefixStr)){ TStr NewDocNm= TStr("D")+CmtStr.GetSubStr(DocNmPrefixStr.Len(), CmtStr.Len()-1); BowDocBs->DocNmToDescStrH.DelKey(DocNm); int NewDId=BowDocBs->DocNmToDescStrH.AddKey(NewDocNm); IAssert(DId==NewDId); } } SpV->Trunc(); while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){} } } // add missing words for (int WId=0; WId<=MxWId; WId++){ if (!BowDocBs->IsWId(WId)){ TStr WordStr=TInt::GetStr(WId, "W%d"); int _WId=BowDocBs->AddWordStr(WordStr); IAssert(WId==_WId); TInt Fq; if (WIdToFqH.IsKeyGetDat(WId, Fq)){ BowDocBs->PutWordFq(WId, Fq); } } } BowDocBs->AssertOk(); return BowDocBs; }
//////////////////////////////////////////////// // Lexical-Chars void TLxChDef::SetUcCh(const TStr& Str){ for (int CC=1; CC<Str.Len(); CC++){ UcChV[Str[CC]-TCh::Mn]=TCh(Str[0]);} }
///////////////////////////////////////////////// // Tql-Lexical-Chars void TTqlChDef::SetChTy(const TTqlLxChTy& ChTy, const TStr& Str){ for (int ChN=0; ChN<Str.Len(); ChN++){ChTyV[Str[ChN]-TCh::Mn]=TInt(ChTy);} }
TStr TLxChDef::GetUcStr(const TStr& Str) const { TChA UcStr; for (int ChN=0; ChN<Str.Len(); ChN++){ UcStr.AddCh(GetUc(Str.GetCh(ChN)));} return UcStr; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("cpm. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output file name prefix"); const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "football.edgelist", "Input edgelist file name. DEMO: AGM with 2 communities"); const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "football.labels", "Input file name for node names (Node ID, Node label) "); const TInt RndSeed = Env.GetIfArgPrefixInt("-s:", 0, "Random seed for AGM"); const TFlt Epsilon = Env.GetIfArgPrefixFlt("-e:", 0, "Edge probability between the nodes that do not share any community (default (0.0): set it to be 1 / N^2)"); const TInt Coms = Env.GetIfArgPrefixInt("-c:", 0, "Number of communities (0: determine it by AGM)"); PUNGraph G = TUNGraph::New(); TVec<TIntV> CmtyVV; TIntStrH NIDNameH; if (InFNm == "DEMO") { TVec<TIntV> TrueCmtyVV; TRnd AGMRnd(RndSeed); //generate community bipartite affiliation const int ABegin = 0, AEnd = 70, BBegin = 30, BEnd = 100; TrueCmtyVV.Add(TIntV()); TrueCmtyVV.Add(TIntV()); for (int u = ABegin; u < AEnd; u++) { TrueCmtyVV[0].Add(u); } for (int u = BBegin; u < BEnd; u++) { TrueCmtyVV[1].Add(u); } G = TAGM::GenAGM(TrueCmtyVV, 0.0, 0.2, AGMRnd); } else if (LabelFNm.Len() > 0) { G = TSnap::LoadEdgeList<PUNGraph>(InFNm); TSsParser Ss(LabelFNm, ssfTabSep); while (Ss.Next()) { if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); } } } else { G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NIDNameH); } printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges()); int MaxIter = 50 * G->GetNodes() * G->GetNodes(); if (MaxIter < 0) { MaxIter = TInt::Mx; } int NumComs = Coms; if (NumComs < 2) { int InitComs; if (G->GetNodes() > 1000) { InitComs = G->GetNodes() / 5; NumComs = TAGMUtil::FindComsByAGM(G, InitComs, MaxIter, RndSeed, 1.5, Epsilon, OutFPrx); } else { InitComs = G->GetNodes() / 5; NumComs = TAGMUtil::FindComsByAGM(G, InitComs, MaxIter, RndSeed, 1.2, Epsilon, OutFPrx); } } TAGMFit AGMFit(G, NumComs, RndSeed); if (Epsilon > 0) { AGMFit.SetPNoCom(Epsilon); } AGMFit.RunMCMC(MaxIter, 10); AGMFit.GetCmtyVV(CmtyVV, 0.9999); TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", CmtyVV, NIDNameH); TAGMUtil::SaveGephi(OutFPrx + "graph.gexf", G, CmtyVV, 1.5, 1.5, NIDNameH); AGMFit.PrintSummary(); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
bool TBagOfWords::Update(const TStr& Val) { // tokenize given text (reserve space assuming 5 chars per word) TStrV TokenStrV(Val.Len() / 5, 0); GetFtr(Val, TokenStrV); // process return Update(TokenStrV); }
void TMongCliContext::Respond(const PHttpResp& HttpResp) { const TMem& Body = HttpResp->GetBodyAsMem(); TStr ResponseHeader = HttpResp->GetHdStr(); mg_write(Conn, ResponseHeader.CStr(), ResponseHeader.Len()); mg_write(Conn, Body.GetBf(), Body.Len()); }
void THttpChDef::SetChTy(const THttpChTy& ChTy, const TStr& Str){ for (int ChN=0; ChN<Str.Len(); ChN++){SetChTy(ChTy, Str[ChN]);}}
void TSqlDmChDef::SetChTy(const TSqlDmChTy& ChTy, const TStr& Str){ for (int CC=0; CC<Str.Len(); CC++){ uchar Ch=Str[CC]; ChTyV[Ch]=ChTy; } }
TStr THttpChDef::GetLcStr(const TStr& Str){ TChA LcStr; for (int ChN=0; ChN<Str.Len(); ChN++){LcStr+=GetLcCh(Str[ChN]);} return LcStr; }
//////////////////////////////////////////////// // Lexical-Chars void TSqlDmChDef::SetUcCh(const TStr& Str){ for (int CC=1; CC<Str.Len(); CC++){ uchar Ch=Str[CC]; UcChV[Ch]=Str[0]; } }
int TSOut::PutStr(const TStr& Str, const bool& ForceInLn){ int Cs=UpdateLnLen(Str.Len(), ForceInLn); return Cs+PutBf(Str.CStr(), Str.Len()); }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("cesna. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix"); const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "./1912.edges", "Input edgelist file name"); const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) "); const TStr AttrFNm = Env.GetIfArgPrefixStr("-a:", "./1912.nodefeat", "Input node attribute file name"); const TStr ANameFNm = Env.GetIfArgPrefixStr("-n:", "./1912.nodefeatnames", "Input file name for node attribute names"); int OptComs = Env.GetIfArgPrefixInt("-c:", 10, "The number of communities to detect (-1: detect automatically)"); const int MinComs = Env.GetIfArgPrefixInt("-mc:", 3, "Minimum number of communities to try"); const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 20, "Maximum number of communities to try"); const int DivComs = Env.GetIfArgPrefixInt("-nc:", 5, "How many trials for the number of communities"); const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 4, "Number of threads for parallelization"); const double AttrWeight = Env.GetIfArgPrefixFlt("-aw:", 0.5, "We maximize (1 - aw) P(Network) + aw * P(Attributes)"); const double LassoWeight = Env.GetIfArgPrefixFlt("-lw:", 1.0, "Weight for l-1 regularization on learning the logistic model parameters"); const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.05, "Alpha for backtracking line search"); const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search"); const double MinFeatFrac = Env.GetIfArgPrefixFlt("-mf:", 0.0, "If the fraction of nodes with positive values for an attribute is smaller than this, we ignore that attribute"); #ifdef USE_OPENMP omp_set_num_threads(NumThreads); #endif PUNGraph G; TIntStrH NIDNameH; TStrHash<TInt> NodeNameH; TVec<TFltV> Wck; TVec<TIntV> EstCmtyVV; if (InFNm.IsStrIn(".ungraph")) { TFIn GFIn(InFNm); G = TUNGraph::Load(GFIn); } else { G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NodeNameH); NIDNameH.Gen(NodeNameH.Len()); for (int s = 0; s < NodeNameH.Len(); s++) { NIDNameH.AddDat(s, NodeNameH.GetKey(s)); } } if (LabelFNm.Len() > 0) { TSsParser Ss(LabelFNm, ssfTabSep); while (Ss.Next()) { if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); } } } printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges()); //load attribute TIntV NIDV; G->GetNIdV(NIDV); THash<TInt, TIntV> RawNIDAttrH, NIDAttrH; TIntStrH RawFeatNameH, FeatNameH; if (ANameFNm.Len() > 0) { TSsParser Ss(ANameFNm, ssfTabSep); while (Ss.Next()) { if (Ss.Len() > 0) { RawFeatNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); } } } TCesnaUtil::LoadNIDAttrHFromNIDKH(NIDV, AttrFNm, RawNIDAttrH, NodeNameH); TCesnaUtil::FilterLowEntropy(RawNIDAttrH, NIDAttrH, RawFeatNameH, FeatNameH, MinFeatFrac); TExeTm RunTm; TCesna CS(G, NIDAttrH, 10, 10); if (OptComs == -1) { printf("finding number of communities\n"); OptComs = CS.FindComs(NumThreads, MaxComs, MinComs, DivComs, "", false, 0.1, StepAlpha, StepBeta); } CS.NeighborComInit(OptComs); CS.SetWeightAttr(AttrWeight); CS.SetLassoCoef(LassoWeight); if (NumThreads == 1 || G->GetEdges() < 1000) { CS.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta); } else { CS.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta); } CS.GetCmtyVV(EstCmtyVV, Wck); TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH); FILE* F = fopen((OutFPrx + "weights.txt").CStr(), "wt"); if (FeatNameH.Len() == Wck[0].Len()) { fprintf(F, "#"); for (int k = 0; k < FeatNameH.Len(); k++) { fprintf(F, "%s", FeatNameH[k].CStr()); if (k < FeatNameH.Len() - 1) { fprintf(F, "\t"); } } fprintf(F, "\n"); } for (int c = 0; c < Wck.Len(); c++) { for (int k = 0; k < Wck[c].Len(); k++) { fprintf(F, "%f", Wck[c][k].Val); if (k < Wck[c].Len() - 1) { fprintf(F, "\t"); } } fprintf(F, "\n"); } fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
void TFRnd::PutStr(const TStr& Str){ PutBf(Str.CStr(), Str.Len()+1); }
TTm TTm::GetTmFromWebLogDateTimeStr(const TStr& DateTimeStr, const char DateSepCh, const char TimeSepCh, const char MSecSepCh){ int DateTimeStrLen=DateTimeStr.Len(); // year TChA ChA; int ChN=0; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr YearStr=ChA; // month ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MonthStr=ChA; // day ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=' ')){ ChA+=DateTimeStr[ChN]; ChN++;} TStr DayStr=ChA; // hour ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr HourStr=ChA; // minute ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MinStr=ChA; // second ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=MSecSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr SecStr=ChA; // mili-second ChA.Clr(); ChN++; while (ChN<DateTimeStrLen){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MSecStr=ChA; // transform to numbers int YearN=YearStr.GetInt(-1); int MonthN=MonthStr.GetInt(-1); int DayN=DayStr.GetInt(-1); int HourN=HourStr.GetInt(0); int MinN=MinStr.GetInt(0); int SecN=SecStr.GetInt(0); int MSecN=MSecStr.GetInt(0); // construct time /* //!!peter: convert month name to number and flip date/day (oracle: 10-FEB-05) if ((MonthN==-1)&&(isalpha(MonthStr.CStr()[0]))){ if ((MonthN=MonthParser.GetMonthN(MonthStr))!=-1){ int Y=DayN; DayN=YearN; YearN=Y<100?Y+2000:Y; } } */ TTm Tm; if ((YearN!=-1)&&(MonthN!=-1)&&(DayN!=-1)){ Tm=TTm(YearN, MonthN, DayN, -1, HourN, MinN, SecN, MSecN); } // return time return Tm; }