// IN-OUT edges are swapped (so that the prog runs faster) // Send message via IN edge proportional to the OUT edge weight void TWgtNet::ReinforceEdges(const int& NIters) { THash<TInt, TFlt> OutWgtSumH; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { double wgt = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { wgt += NI.GetOutEDat(e); } OutWgtSumH.AddDat(NI.GetId(), wgt); } printf("Reinforcing edges for %d iterations\n", NIters); // iterate TExeTm ExeTm; for (int iter = 0; iter < NIters; iter++) { for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { const double X = TInt::Rnd.GetUniDev() * OutWgtSumH.GetDat(NI.GetId()); double x = 0; int e = 0; for ( ; x + NI.GetOutEDat(e) < X; e++) { x += NI.GetOutEDat(e); } IAssert(IsEdge(NI.GetOutNId(e), NI.GetId())); GetEDat(NI.GetOutNId(e), NI.GetId()) += 1; // reinforce the edge OutWgtSumH.GetDat(NI.GetOutNId(e)) += 1; } if (iter % (NIters/100) == 0) { printf("\r%d [%s]", iter, ExeTm.GetStr()); } } printf(" done.\n"); }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nGenerate stochastic block model networks. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const int noNodes = Env.GetIfArgPrefixInt("-n:", 512, "Number of nodes in synthetic graph (default: 512)\n"); const double pIn = Env.GetIfArgPrefixFlt("-pIn:", 0.8, "pIn (default: 0.8)\n"); const double pOut = Env.GetIfArgPrefixFlt("-pOut:", 0.2, "pOut (default: 0.2)\n"); const int noCommunities = Env.GetIfArgPrefixInt("-k:", 2, "Number of communities in graph (default: 2)\n"); TGraphAlgo graphAlgo; graphAlgo.generateNetwork(noNodes, noCommunities, pIn, pOut); TStr networkFilename = TStr("test-network-sbm.txt"); TStr networkAdjacencyMatrixFilename = TStr("test-network-sbm-adjacency-matrix.txt"); TStr networkGexfFilename = TStr("test-network-sbm.gexf"); TStr networkLouvainFormatFilename = TStr("test-network-sbm-louvain.txt"); TStr louvainTreeFilename = TStr("test-network-sbm-louvain.tree"); TStr communityLabelsFilename = TStr("test-network-sbm-assignments.txt"); graphAlgo.saveGroundTruth(networkFilename); graphAlgo.saveGroundTruthAdjacencyMatrix(networkAdjacencyMatrixFilename); graphAlgo.saveGroundTruthGexf(networkGexfFilename); graphAlgo.convertGroundTruthToLouvainFormat(networkLouvainFormatFilename); graphAlgo.saveCommunityLabels(communityLabelsFilename, noNodes, noCommunities); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
void TTop2FriendNet::PlotPick2VsProb2nd(const PWgtNet& Net, const int& NRuns, const double& StepP, const TStr& OutFNm, TStr Desc, bool PlotTop2, bool PlotBtm2, bool PlotRnd2) { TTop2FriendNet Top2(Net); Net->MulEdgeWgt(-1.0); TTop2FriendNet Btm2(Net); Net->MulEdgeWgt(-1.0); // change back THash<TFlt, TMom> Top2H, Btm2H, Rnd2H; for (int run = 0; run < NRuns; run++) { TExeTm ExeTm; printf("run %d\n", run); for (double p = 0; p <= 1; p += StepP) { if (PlotTop2) { Top2H.AddDat(p).Add(Top2.GetTop2WccSz(p)); } if (PlotBtm2) { Btm2H.AddDat(p).Add(Btm2.GetTop2WccSz(p)); } if (PlotRnd2) { Rnd2H.AddDat(p).Add(Top2.GetRnd2WccSz(p)); } printf("."); } printf("[%s]\n", ExeTm.GetStr()); TFltTrV Top2V, Btm2V, Rnd2V; GetAvgSDevV(Top2H, Top2V); GetAvgSDevV(Btm2H, Btm2V); GetAvgSDevV(Rnd2H, Rnd2V); TGnuPlot GP("ccVsP-"+OutFNm, TStr::Fmt("%s (%d, %d, %f)", Desc.CStr(), Net->GetNodes(), Net->GetEdges(), Net->GetEdgeWgt())); GP.SetXYLabel("Prob of taking 2nd edge", "Size of largest connected component"); if (! Top2V.Empty()) { GP.AddErrBar(Top2V, "TOP", ""); } if (! Rnd2V.Empty()) { GP.AddErrBar(Rnd2V, "RND", ""); } if (! Btm2V.Empty()) { GP.AddErrBar(Btm2V, "BTM", ""); } GP.SavePng(); } }
int TLogRegFit::MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) { TExeTm ExeTm; TFltV GradV(Theta.Len()), DeltaLV(Theta.Len()); TFltVV HVV(Theta.Len(), Theta.Len()); int iter = 0; double MinVal = -1e10, MaxVal = 1e10; for(iter = 0; iter < MaxStep; iter++) { Gradient(GradV); Hessian(HVV); GetNewtonStep(HVV, GradV, DeltaLV); double Increment = TLinAlg::DotProduct(GradV, DeltaLV); if (Increment <= ChangeEps) { break; } double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1); for(int i = 0; i < Theta.Len(); i++) { double Change = LearnRate * DeltaLV[i]; Theta[i] += Change; if(Theta[i] < MinVal) { Theta[i] = MinVal; } if(Theta[i] > MaxVal) { Theta[i] = MaxVal; } } } if (! PlotNm.Empty()) { printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr()); } return iter; }
int main(int argc, char* argv[]) { // code needed for inputing parameters Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Network diversity. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; // for measuring execution time Try const TStr InFNmGraph = Env.GetIfArgPrefixStr("-i:", "artificial_intelligence_pub.txt", "Input graph (undirected graph)"); const TStr InFNmCat = Env.GetIfArgPrefixStr("-c:", "artificial_intelligence_cat_pub.txt", "Categories"); const TStr InFNmMat = Env.GetIfArgPrefixStr("-m:", "sciences.txt", "Matrix"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "diversity.txt", "Output file"); const int DivAlg = Env.GetIfArgPrefixInt("-a:", 1, "Measure: 1:Stirling"); const int Alpha = Env.GetIfArgPrefixInt("-alp:", 1, "alpha"); const int Beta = Env.GetIfArgPrefixInt("-bet:", 1, "beta"); const int Gamma = Env.GetIfArgPrefixInt("-gam:", 1, "gama"); // defining graph PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNmGraph, false); double D = 0.0; TStr DivAlgStr; // based on input parametr -a (variable DivAlg), diversity measure is choosen if (DivAlg == 1) { DivAlgStr = "Stirling"; D = TSnap::StirlingIndex(Graph,InFNmCat,InFNmMat, Alpha, Beta, Gamma);} else { Fail; } printf("\nDiversity: %f\nrun time: %s (%s)\n", D,ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); //print execution time Catch return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("ragm. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix"); const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input edgelist file name"); const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) "); int OptComs = Env.GetIfArgPrefixInt("-c:", -1, "The number of communities to detect (-1: detect automatically)"); const int MinComs = Env.GetIfArgPrefixInt("-mc:", 5, "Minimum number of communities to try"); const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 100, "Maximum number of communities to try"); const int DivComs = Env.GetIfArgPrefixInt("-nc:", 10, "How many trials for the number of communities"); const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 1, "Number of threads for parallelization"); const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.3, "Alpha for backtracking line search"); const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search"); PUNGraph G; TIntStrH NIDNameH; if (InFNm.IsStrIn(".ungraph")) { TFIn GFIn(InFNm); G = TUNGraph::Load(GFIn); } else { G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NIDNameH); } if (LabelFNm.Len() > 0) { TSsParser Ss(LabelFNm, ssfTabSep); while (Ss.Next()) { if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); } } } else { } printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges()); TVec<TIntV> EstCmtyVV; TExeTm RunTm; TAGMFast RAGM(G, 10, 10); if (OptComs == -1) { printf("finding number of communities\n"); OptComs = RAGM.FindComsByCV(NumThreads, MaxComs, MinComs, DivComs, OutFPrx, StepAlpha, StepBeta); } RAGM.NeighborComInit(OptComs); if (NumThreads == 1 || G->GetEdges() < 1000) { RAGM.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta); } else { RAGM.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta); } RAGM.GetCmtyVV(EstCmtyVV); TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH); TAGMUtil::SaveGephi(OutFPrx + "graph.gexf", G, EstCmtyVV, 1.5, 1.5, NIDNameH); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { printf("MemeWorks. build: %s, %s. Start time: %s\n\n", __TIME__, __DATE__, TExeTm::GetCurTm()); TExeTm ExeTm; TInt::Rnd.PutSeed(0); Try TSecTm BegTm = TSecTm::GetCurTm(); // char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201101.txt", "-w:F", "-o:1101", "-mint:20110101", "-maxt:20110106"}; BigMain(7, ToDo); // char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201101_201103.txt", "-w:F", "-o:11011103"}; BigMain(5, ToDo); // char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201104_201106.txt", "-w:F", "-o:11041106", "-mint:20110401", "-maxt:20110701"}; BigMain(7, ToDo); // char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201007_201107.txt", "-w:F", "-o:10071107"}; BigMain(5, ToDo); // char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201007_201107.txt", "-o:10071107", "-mint:20100714", "-maxt:20110728"}; BigMain(6, ToDo); // char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201101.txt", "-o:1101", "-mint:20110101", "-maxt:20110106"}; BigMain(6, ToDo); // char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:1101-w4mfq5.QtBs", "-o:1101", "-shglready:F", "-netready:F"}; BigMain(6, ToDo); // char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:qt08080902-w4mfq5.QtBs", "-o:0808", "-shglready:F", "-netready:F"}; BigMain(6, ToDo); // char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:11011103-w4mfq5.QtBs", "-o:11011103", "-shglready:F", "-netready:F"}; BigMain(6, ToDo); // char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:11041106-w4mfq5.QtBs", "-o:11041106", "-shglready:F", "-netready:F"}; BigMain(6, ToDo); // char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:10071107-w4mfq5.QtBs", "-o:10071107", "-shglready:F", "-netready:F"}; BigMain(6, ToDo); //char *ToDo [] = {"memeclust", "-do:memeclustzarya", "-i:201102.txt", "-o:201102", "-shglready:F", "-netready:F", "-mint:20110201", "-maxt:20110301"}; BigMain(8, ToDo); BigMain(argc, argv); TSecTm EndTm = TSecTm::GetCurTm(); double usedTime = EndTm.GetAbsSecs() - BegTm.GetAbsSecs(); printf("Total execution time : %02dh%02dm%02ds\n", int(usedTime)/3600, (int(usedTime)%3600)/60, int(usedTime)%60); return 0; CatchFull printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
TFfGGen::TStopReason TFfGGen::AddNodes(const int& GraphNodes, const bool& FloodStop) { printf("\n***ForestFire: %s Nodes:%d StartNodes:%d Take2AmbProb:%g\n", BurnExpFire ? "ExpFire" : "GeoFire", GraphNodes, StartNodes(), Take2AmbProb()); printf(" FwdBurnP:%g BckBurnP:%g ProbDecay:%g Orphan:%g\n", FwdBurnProb(), BckBurnProb(), ProbDecay(), OrphanProb()); TExeTm ExeTm; int Burned1 = 0, Burned2 = 0, Burned3 = 0; // last 3 fire sizes // create initial set of nodes if (Graph.Empty()) { Graph = PNGraph::New(); } if (Graph->GetNodes() == 0) { for (int n = 0; n < StartNodes; n++) { Graph->AddNode(); } } int NEdges = Graph->GetEdges(); // forest fire TRnd Rnd(0); TForestFire ForestFire(Graph, FwdBurnProb, BckBurnProb, ProbDecay, 0); // add nodes for (int NNodes = Graph->GetNodes() + 1; NNodes <= GraphNodes; NNodes++) { const int NewNId = Graph->AddNode(-1); IAssert(NewNId == Graph->GetNodes() - 1); // node ids have to be 0...N // not an Orphan (burn fire) if (OrphanProb == 0.0 || Rnd.GetUniDev() > OrphanProb) { // infect ambassadors if (Take2AmbProb == 0.0 || Rnd.GetUniDev() > Take2AmbProb || NewNId < 2) { ForestFire.Infect(Rnd.GetUniDevInt(NewNId)); // take 1 ambassador } else { const int AmbassadorNId1 = Rnd.GetUniDevInt(NewNId); int AmbassadorNId2 = Rnd.GetUniDevInt(NewNId); while (AmbassadorNId1 == AmbassadorNId2) { AmbassadorNId2 = Rnd.GetUniDevInt(NewNId); } ForestFire.Infect(TIntV::GetV(AmbassadorNId1, AmbassadorNId2)); // take 2 ambassadors } // burn fire if (BurnExpFire) { ForestFire.BurnExpFire(); } else { ForestFire.BurnGeoFire(); } // add edges to burned nodes for (int e = 0; e < ForestFire.GetBurned(); e++) { Graph->AddEdge(NewNId, ForestFire.GetBurnedNId(e)); NEdges++; } Burned1 = Burned2; Burned2 = Burned3; Burned3 = ForestFire.GetBurned(); } else { // Orphan (zero out-links) Burned1 = Burned2; Burned2 = Burned3; Burned3 = 0; } if (NNodes % Kilo(1) == 0) { printf("(%d, %d) burned: [%d,%d,%d] [%s]\n", NNodes, NEdges, Burned1, Burned2, Burned3, ExeTm.GetStr()); } if (FloodStop && NEdges>GraphNodes && (NEdges / double(NNodes)>1000.0)) { // average node degree is more than 500 printf(". FLOOD. G(%6d, %6d)\n", NNodes, NEdges); return srFlood; } if (NNodes % 1000 == 0 && TimeLimitSec > 0 && ExeTm.GetSecs() > TimeLimitSec) { printf(". TIME LIMIT. G(%d, %d)\n", Graph->GetNodes(), Graph->GetEdges()); return srTimeLimit; } } IAssert(Graph->GetEdges() == NEdges); return srOk; }
// get model graph according to args void GetModel(const TStr& Args, PNGraph& G){ Env = TEnv(Args, TNotify::NullNotify); const TStr Gen = Env.GetIfArgPrefixStr("-g:", "gen", "How to get model graph: read, gen, deg, genpy"); const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "Input graph file (single directed edge per line)"); TExeTm execTime; if (Gen == "gen") BasicGraphGen(Args, G); else if (Gen == "read") ReadPNGraphFromFile(InFNm, G); else if (Gen == "genpy") { PUNGraph GU; GenPy(GU, TFile, Args); G = TSnap::ConvertGraph<PNGraph>(GU); } TFile << "Time of getting model: " << execTime.GetTmStr() << endl; /*TFile << "Model graph: " << G->GetNodes() << " nodes, " << G->GetEdges() << " edges\n"; TIntV DegV; TSnap::GetDegSeqV(G, DegV); execTime.Tick(); PUNGraph Conf = TSnap::GenConfModel(DegV); TFile << "Time of getting configuration model: " << execTime.GetTmStr() << endl; cout << "Undirected configuration model: " << Conf->GetNodes() << " nodes, " << Conf->GetEdges() << " edges\n"; PNGraph ConfD = TSnap::ConvertGraph<PNGraph>(Conf); SaveAndPlot(ConfD, "conf", false); TFile << "Clustering coefficient of configuration model: " << TSnap::GetClustCf(ConfD) << endl; TSnap::PlotClustCf(ConfD,"conf");*/ }
/// Clique Percolation method communities void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) { printf("Clique Percolation Method\n"); TExeTm ExeTm; TVec<TIntV> MaxCliques; TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques); // op RS 2012/05/15, commented out next line, a parameter is missing, // creating a warning on OS X // printf("...%d cliques found\n"); // get clique overlap matrix (graph) PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1); printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges()); // connected components are communities TCnComV CnComV; TSnap::GetWccs(OverlapGraph, CnComV); NIdCmtyVV.Clr(false); TIntSet CmtySet; for (int c = 0; c < CnComV.Len(); c++) { CmtySet.Clr(false); for (int i = 0; i <CnComV[c].Len(); i++) { const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]]; CmtySet.AddKeyV(CliqueNIdV); } NIdCmtyVV.Add(); CmtySet.GetKeyV(NIdCmtyVV.Last()); NIdCmtyVV.Last().Sort(); } printf("done [%s].\n", ExeTm.GetStr()); }
int GenPy(PUNGraph &res, ofstream& TFile, const TStr& parameters) { Env = TEnv(parameters, TNotify::StdNotify); TStr mN = Env.GetIfArgPrefixStr("-module:", "random_graphs", "Module name"); TStr fN = Env.GetIfArgPrefixStr("-func:", "fast_gnp_random_graph", "Function name"); PyObject **G = new PyObject*[1]; char *moduleName = mN.CStr(); char *funcName = fN.CStr(); AddFuncInfo(); TStrV args, argTypes; if (!ParseArgs(funcName, parameters, args, argTypes)) { printf("Fail to parse arguments for NetworkX generation...\n"); return 0; }; TExeTm execTime; if (!CallPyFunction(moduleName, funcName, args, argTypes, G)) { cout << "CallPyFunction() raised error. Execution terminated.\n"; system("pause"); exit(1); }; TFile << "Time of generation of graph by NetworkX: " << execTime.GetTmStr() << endl; execTime.Tick(); PyObject*** nodes = new PyObject**[1]; GetNodes(G, nodes); int nodesCount = PyList_Size(*(nodes[0])); //printf("nodesCount = %d, ", nodesCount); res = PUNGraph::TObj::New(); res->Reserve(nodesCount, nodesCount*nodesCount); for (size_t i = 0; i < nodesCount; i++) res->AddNode(i); Py_DECREF(nodes); PyObject*** edges = new PyObject**[1]; GetEdges(G, edges); int edgesCount = PyList_Size(*(edges[0])); //printf("edgesCount = %d\n", edgesCount); for (size_t i = 0; i < edgesCount; i++) { PyObject* item = PySequence_Fast_GET_ITEM(*(edges[0]), i); int v1, v2; PyObject* node = PySequence_Fast_GET_ITEM(item,0); v1 = PyLong_AsLong(node); node = PySequence_Fast_GET_ITEM(item,1); v2 = PyLong_AsLong(node); res->AddEdge(v1,v2); } TFile << "Time of copying of graph from NetworkX representation: " << execTime.GetTmStr() << endl; Py_DECREF(G); Py_DECREF(edges); //Py_Finalize(); // очищение памяти, отданной интерпретатору return 0; }
int main(int argc, char* argv[]) { TExeTm ExeTm; try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nPlotting Scatter For Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesFullUrlsOnTwitterData_FINALFILTERED THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesOnTwitterData_FINALFILTERED THash< TUInt , TSecTmV > full_twitterUrls = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED.rar"); THash< TUInt , TSecTmV > full_twitterContents = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesOnTwitterData_FINALFILTERED.rar"); // Scatter plot plotScatterLengthOfEachCascade(twitterUrls,twitterContents); // Percentage computation double cnt = 0; for(int i=0;i<full_twitterUrls.Len();i++) { if(full_twitterContents.GetKeyId(full_twitterUrls.GetKey(i)) != -1) { cnt++; } } cnt /= full_twitterUrls.Len(); // twitterUrls.Len() / full_twitterUrls.Len() printf("The percentage of Urls of quotes which have contents as well: %f\n", 100 * cnt); cnt = 0; for(int i=0;i<full_twitterContents.Len();i++) { if(full_twitterUrls.GetKeyId(full_twitterContents.GetKey(i)) != -1) { cnt++; } } cnt /= full_twitterContents.Len(); printf("The percentage of Contents of quotes which have urls as well: %f\n", 100 * cnt); printf("\nScatter Plot had been drawn successfully."); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { // TFltPrV v; // v.Add(TFltPr(1,4)); // v.Add(TFltPr(5,5)); // v.Add(TFltPr(9,11)); // v.Add(TFltPr(20,8)); // v.Add(TFltPr(21,30)); // cout << "C: " << Tools::computeCorrelation(v,Pearson) << endl; // return 0; TExeTm ExeTm; try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nPlotting Individually Memes-Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // URLS THash< TStr , CascadeElementV > quotes = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar"); // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4URLS THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesFullUrlsOnTwitterData_FINALFILTERED // CONTENTS //THash< TStr , CascadeElementV > quotes2 = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar"); // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4Contents THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesOnTwitterData_FINALFILTERED // Plotting THash< TUInt , TSecTmV > twitterTotal; for(int i=0;i<twitterContents.Len();i++) { TSecTmV tmp; tmp.AddV(twitterContents[i]); tmp.AddV(twitterUrls[i]); twitterTotal.AddDat(i,tmp); } plotScatterLengthOfEachCascade(quotes,twitterUrls,"Urls"); plotScatterLengthOfEachCascade(quotes,twitterContents,"Contents"); plotScatterLengthOfEachCascade(quotes,twitterTotal,"Full"); printf("\nPlots had been drawn successfully."); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int TLogRegFit::MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) { TExeTm ExeTm; TFltV GradV(Theta.Len()); int iter = 0; TIntFltPrV IterLV, IterGradNormV; double MinVal = -1e10, MaxVal = 1e10; double GradCutOff = 100000; for(iter = 0; iter < MaxStep; iter++) { Gradient(GradV); //if gradient is going out of the boundary, cut off for(int i = 0; i < Theta.Len(); i++) { if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; } if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; } if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; } if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; } } double Alpha = 0.15, Beta = 0.9; //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta); double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta); if (TLinAlg::Norm(GradV) < ChangeEps) { break; } for(int i = 0; i < Theta.Len(); i++) { double Change = LearnRate * GradV[i]; Theta[i] += Change; if(Theta[i] < MinVal) { Theta[i] = MinVal; } if(Theta[i] > MaxVal) { Theta[i] = MaxVal; } } if (! PlotNm.Empty()) { double L = Likelihood(); IterLV.Add(TIntFltPr(iter, L)); IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV))); } } if (! PlotNm.Empty()) { TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q"); printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr()); } return iter; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Motifs. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input directed graph file (single directed edge per line)"); const int MotifSz = Env.GetIfArgPrefixInt("-m:", 3, "Motif size (has to be 3 or 4)"); const bool DrawMotifs = Env.GetIfArgPrefixBool("-d:", true, "Draw motif shapes (requires GraphViz)"); TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix"); if (OutFNm.Empty()) { OutFNm = InFNm.GetFMid(); } EAssert(MotifSz==3 || MotifSz==4); // load graph PNGraph G; if (InFNm.GetFExt().GetLc()==".ungraph") { TFIn FIn(InFNm); G=TSnap::ConvertGraph<PNGraph>(TUNGraph::Load(FIn), true); } else if (InFNm.GetFExt().GetLc()==".ngraph") { TFIn FIn(InFNm); G=TNGraph::Load(FIn); } else { G = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1); } bool IsOk = true; for (int nid = 0; nid < G->GetNodes(); nid++) { if (! G->IsNode(nid)) { IsOk=false; break; } } if (! IsOk) { printf("Nodes of the input graph have to be numbered 0...N-1\nRenumbering nodes...\n"); PNGraph OG = G; G = TNGraph::New(); TGraphEnumUtils::GetNormalizedGraph(OG, G); } // G = TSnap::GenRndGnm<PNGraph>(100, Kilo(1)); // count frequency of connected subgraphs in G that have MotifSz nodes TD34GraphCounter GraphCounter(MotifSz); TSubGraphEnum<TD34GraphCounter> GraphEnum; GraphEnum.GetSubGraphs(G, MotifSz, GraphCounter); FILE *F = fopen(TStr::Fmt("%s-counts.tab", OutFNm.CStr()).CStr(), "wt"); fprintf(F, "MotifId\tNodes\tEdges\tCount\n"); for (int i = 0; i < GraphCounter.Len(); i++) { const int gid = GraphCounter.GetId(i); PNGraph SG = GraphCounter.GetGraph(gid); if (DrawMotifs) { TGraphViz::Plot(SG, gvlNeato, TStr::Fmt("%s-motif%03d.gif", OutFNm.CStr(), i), TStr::Fmt("GId:%d Count: %llu", gid, GraphCounter.GetCnt(gid))); } fprintf(F, "%d\t%d\t%d\t%llu\n", gid, SG->GetNodes(), SG->GetEdges(), GraphCounter.GetCnt(gid)); } printf("done."); fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Node Centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input un/directed graph"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "node_centrality.tab", "Output file"); printf("Loading %s...", InFNm.CStr()); PNGraph Graph = TSnap::LoadEdgeList<PNGraph>(InFNm); //PNGraph Graph = TSnap::GenRndGnm<PNGraph>(10, 10); //TGraphViz::Plot(Graph, gvlNeato, InFNm+".gif", InFNm, true); printf("nodes:%d edges:%d\n", Graph->GetNodes(), Graph->GetEdges()); PUNGraph UGraph = TSnap::ConvertGraph<PUNGraph>(Graph); // undirected version of the graph TIntFltH BtwH, EigH, PRankH, CcfH, CloseH, HubH, AuthH; //printf("Computing...\n"); printf("Treat graph as DIRECTED: "); printf(" PageRank... "); TSnap::GetPageRank(Graph, PRankH, 0.85); printf(" Hubs&Authorities..."); TSnap::GetHits(Graph, HubH, AuthH); printf("\nTreat graph as UNDIRECTED: "); printf(" Eigenvector..."); TSnap::GetEigenVectorCentr(UGraph, EigH); printf(" Clustering..."); TSnap::GetNodeClustCf(UGraph, CcfH); printf(" Betweenness (SLOW!)..."); TSnap::GetBetweennessCentr(UGraph, BtwH, 1.0); printf(" Constraint (SLOW!)..."); TNetConstraint<PUNGraph> NetC(UGraph, true); printf(" Closeness (SLOW!)..."); for (TUNGraph::TNodeI NI = UGraph->BegNI(); NI < UGraph->EndNI(); NI++) { const int NId = NI.GetId(); CloseH.AddDat(NId, TSnap::GetClosenessCentr<PUNGraph>(UGraph, NId, false)); } printf("\nDONE! saving..."); FILE *F = fopen(OutFNm.CStr(), "wt"); fprintf(F,"#Network: %s\n", InFNm.CStr()); fprintf(F,"#Nodes: %d\tEdges: %d\n", Graph->GetNodes(), Graph->GetEdges()); fprintf(F,"#NodeId\tDegree\tCloseness\tBetweennes\tEigenVector\tNetworkConstraint\tClusteringCoefficient\tPageRank\tHubScore\tAuthorityScore\n"); for (TUNGraph::TNodeI NI = UGraph->BegNI(); NI < UGraph->EndNI(); NI++) { const int NId = NI.GetId(); const double DegCentr = UGraph->GetNI(NId).GetDeg(); const double CloCentr = CloseH.GetDat(NId); const double BtwCentr = BtwH.GetDat(NId); const double EigCentr = EigH.GetDat(NId); const double Constraint = NetC.GetNodeC(NId); const double ClustCf = CcfH.GetDat(NId); const double PgrCentr = PRankH.GetDat(NId); const double HubCentr = HubH.GetDat(NId); const double AuthCentr = AuthH.GetDat(NId); fprintf(F, "%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", NId, DegCentr, CloCentr, BtwCentr, EigCentr, Constraint, ClustCf, PgrCentr, HubCentr, AuthCentr); } fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Rolx. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (one edge per line, tab/space separated)"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "roles.txt", "Output file name prefix"); const int MinRoles = Env.GetIfArgPrefixInt("-l:", 2, "Lower bound of the number of roles"); const int MaxRoles = Env.GetIfArgPrefixInt("-u:", 3, "Upper bound of the number of roles"); double Threshold = 1e-6; if (MinRoles > MaxRoles || MinRoles < 2) { printf("min roles and max roles should be integer and\n"); printf("2 <= min roles <= max roles\n"); exit(EXIT_SUCCESS); } printf("loading file...\n"); PNGraph Graph = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1); printf("extracting features...\n"); TIntFtrH Features = ExtractFeatures(Graph); TIntIntH NodeIdMtxIdH = CreateNodeIdMtxIdxHash(Features); TFltVV V = ConvertFeatureToMatrix(Features, NodeIdMtxIdH); //printf("saving features...\n"); //FPrintMatrix(V, "v.txt"); printf("feature matrix is saved in v.txt\n"); TFlt MnError = TFlt::Mx; TFltVV FinalG, FinalF; int NumRoles = -1; for (int r = MinRoles; r <= MaxRoles; ++r) { TFltVV G, F; printf("factorizing for %d roles...\n", r); CalcNonNegativeFactorization(V, r, G, F, Threshold); //FPrintMatrix(G, "g.txt"); //FPrintMatrix(F, "f.txt"); TFlt Error = CalcDescriptionLength(V, G, F); if (Error < MnError) { MnError = Error; FinalG = G; FinalF = F; NumRoles = r; } } //FPrintMatrix(FinalG, "final_g.txt"); //FPrintMatrix(FinalF, "final_f.txt"); printf("using %d roles, min error: %f\n", NumRoles, MnError()); TIntIntH Roles = FindRoles(FinalG, NodeIdMtxIdH); FPrintRoles(Roles, OutFNm); //PlotRoles(Graph, Roles); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
TUNGraphMtx::TUNGraphMtx(const PUNGraph& GraphPt) : Graph() { Graph = GraphPt; if (! CheckNodeIds()) { printf(" Renumbering %d nodes....", GraphPt->GetNodes()); TExeTm ExeTm; Graph = TSnap::ConvertGraph<PUNGraph>(GraphPt, true); /*TIntSet NIdSet; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NIdSet.AddKey(NI.GetId()); } Graph = TUNGraph::New(); *Graph = *GraphPt; */ printf("done [%s]\n", ExeTm.GetStr()); } }
int TTrawling::GetNextFqItemSets(const int& FqItemsetLen) { TExeTm ExeTm; /* // slow GenCandidates(); // CurItemH --> CandItemH printf(" S[%d][%s]", CandItemH.Len(), ExeTm.GetStr()); CountSupport(); // set counters in CandItemH printf("T[%s]", ExeTm.GetStr()); ThresholdSupp(); // CandItemH --> CurItemH printf(" Items: %d\n", CurItemH.Len());*/ GenCandAndCntSupp(FqItemsetLen); printf(" cur: %d cand: %d [%s]", CurItemH.Len(), CandItemH.Len(), ExeTm.GetStr()); CurItemH.Swap(CandItemH); return CurItemH.Len(); }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Network community detection. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (undirected graph)"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "communities.txt", "Output file"); const int CmtyAlg = Env.GetIfArgPrefixInt("-a:", 2, "Algorithm: 1:Girvan-Newman, 2:Clauset-Newman-Moore, 3:Infomap"); PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNm, false); //PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>("../as20graph.txt", false); //PUNGraph Graph = TSnap::GenRndGnm<PUNGraph>(5000, 10000); // generate a random graph TSnap::DelSelfEdges(Graph); TCnComV CmtyV; double Q = 0.0; TStr CmtyAlgStr; if (CmtyAlg == 1) { CmtyAlgStr = "Girvan-Newman"; Q = TSnap::CommunityGirvanNewman(Graph, CmtyV); } else if (CmtyAlg == 2) { CmtyAlgStr = "Clauset-Newman-Moore"; Q = TSnap::CommunityCNM(Graph, CmtyV); } else if (CmtyAlg == 3) { CmtyAlgStr = "Infomap"; Q = TSnap::Infomap(Graph, CmtyV); } else { Fail; } FILE *F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "# Input: %s\n", InFNm.CStr()); fprintf(F, "# Nodes: %d Edges: %d\n", Graph->GetNodes(), Graph->GetEdges()); fprintf(F, "# Algoritm: %s\n", CmtyAlgStr.CStr()); if (CmtyAlg!=3) { fprintf(F, "# Modularity: %f\n", Q); } else { fprintf(F, "# Average code length: %f\n", Q); } fprintf(F, "# Communities: %d\n", CmtyV.Len()); fprintf(F, "# NId\tCommunityId\n"); for (int c = 0; c < CmtyV.Len(); c++) { for (int i = 0; i < CmtyV[c].Len(); i++) { fprintf(F, "%d\t%d\n", CmtyV[c][i].Val, c); } } fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
void GenKron(const TStr& Args, TKronMtx& FitMtx, TFltPrV& KronDegAvgIn, TFltPrV& KronDegAvgOut){ Env = TEnv(Args, TNotify::NullNotify); TExeTm ExecTime; // number of Kronecker graphs to generate const TInt NKron = Env.GetIfArgPrefixInt("-n:", 1, "Number of generated Kronecker graphs"); // iterations of Kronecker product const TInt NIter = Env.GetIfArgPrefixInt("-i:", 10, "Iterations of Kronecker product"); // is graph directed? TStr IsDir = Env.GetIfArgPrefixStr("-isdir:", "false", "Produce directed graph (true, false)"); TFlt ExpectedNodes = FitMtx.GetNodes(NIter), ExpectedEdges = FitMtx.GetEdges(NIter); TFile << "Kronecker nodes: " << ExpectedNodes << ", expected Kronecker edges: " << ExpectedEdges << endl; double Sec = 0.0; int AvgMaxOutDeg = 0, AvgMaxInDeg = 0, MinMaxOutDeg = 0, MaxMaxOutDeg = 0, MinMaxInDeg = 0, MaxMaxInDeg = 0; bool Dir = IsDir == "true" ? true : false; for (int i = 0; i < NKron; i++){ ExecTime.Tick(); PNGraph Kron = TKronMtx::GenFastKronecker(FitMtx, NIter, Dir, 0); Sec += ExecTime.GetSecs(); printf("Calculating maximum degree...\n"); int MaxOutDeg = GetMaxMinDeg(Kron, IsDir, "false", "true"), MaxInDeg = GetMaxMinDeg(Kron, IsDir, "true", "true"); CompareDeg(i, MaxOutDeg, MinMaxOutDeg, MaxMaxOutDeg, AvgMaxOutDeg); CompareDeg(i, MaxInDeg, MinMaxInDeg, MaxMaxInDeg, AvgMaxInDeg); //printf("Nodes count: %d, nodes with non-zero degree %d, edges count %d\n max deg = %d\n", kron->GetNodes(), TSnap::CntNonZNodes(kron), kron->GetEdges(), MaxDeg); if (i == NKron - 1){ //TFile << "Clustering coefficient: " << TSnap::GetClustCf(kron) << endl; //TSnap::PlotClustCf(kron,"kronSingle"); //TSnap::PlotHops(kron, "kronSingle"); TFile << "Maximum output degree in kron graph: " << "from " << MinMaxOutDeg << " to " << MaxMaxOutDeg << " (average: " << (double)AvgMaxOutDeg / (double)NKron << ")" << endl; TFile << "Maximum input degree in kron graph: " << "from " << MinMaxInDeg << " to " << MaxMaxInDeg << " (average: " << (double)AvgMaxInDeg / (double)NKron << ")" << endl; } AddDegreesStat(KronDegAvgIn, Kron, true); AddDegreesStat(KronDegAvgOut, Kron, false); } Sec /= NKron; GetAvgDegreeStat(KronDegAvgIn, NKron); GetAvgDegreeStat(KronDegAvgOut, NKron); KronDegAvgIn.Sort(); KronDegAvgOut.Sort(); TFile << "Average time of generation of Kronecker product: " << Sec << endl; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Clique Percolation Method. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input undirected graph file (single directed edge per line)"); const int OverlapSz = Env.GetIfArgPrefixInt("-k:", 2, "Min clique overlap"); TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix"); if (OutFNm.Empty()) { OutFNm = InFNm.GetFMid(); } PUNGraph G; if (InFNm == "DEMO") { // small demo graph G = TUNGraph::New(); for (int i = 1; i < 8; i++) { G->AddNode(i); } G->AddEdge(1,2); G->AddEdge(2,3); G->AddEdge(2,4); G->AddEdge(3,4); G->AddEdge(4,5); G->AddEdge(4,7); G->AddEdge(5,6); G->AddEdge(5,7); G->AddEdge(6,7); // draw the small graph using GraphViz TSnap::DrawGViz(G, gvlNeato, "small_graph.png", "", true); } // load graph else if (InFNm.GetFExt().GetLc()==".ungraph") { TFIn FIn(InFNm); G=TUNGraph::Load(FIn); } else if (InFNm.GetFExt().GetLc()==".ngraph") { TFIn FIn(InFNm); G=TSnap::ConvertGraph<PUNGraph>(TNGraph::Load(FIn), false); } else { G = TSnap::LoadEdgeList<PUNGraph>(InFNm, 0, 1); } // find communities TVec<TIntV> CmtyV; TCliqueOverlap::GetCPMCommunities(G, OverlapSz+1, CmtyV); // save result FILE *F = fopen(TStr::Fmt("cpm-%s.txt", OutFNm.CStr()).CStr(), "wt"); fprintf(F, "# %d Overlapping Clique Percolation Communities (min clique overlap %d)\n", CmtyV.Len(), OverlapSz); fprintf(F, "# Each line contains nodes belonging to the same community community\n"); for (int i = 0; i < CmtyV.Len(); i++) { fprintf(F, "%d", CmtyV[i][0].Val); for (int j = 1; j < CmtyV[i].Len(); j++) { fprintf(F, "\t%d", CmtyV[i][j].Val); } fprintf(F, "\n"); } Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("agmgen. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "DEMO", "Community affiliation data"); const TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "agm", "out file name prefix"); const int RndSeed = Env.GetIfArgPrefixInt("-rs:",10,"Rnd Seed"); const double DensityCoef= Env.GetIfArgPrefixFlt("-a:",0.6,"Power-law Coefficient a of density (density ~ N^(-a)"); const double ScaleCoef= Env.GetIfArgPrefixFlt("-c:",1.3,"Scaling Coefficient c of density (density ~ c"); TRnd Rnd(RndSeed); TVec<TIntV> CmtyVV; if(InFNm=="DEMO") { CmtyVV.Gen(2); TIntV NIdV; for(int i=0;i<25;i++) { TIntV& CmtyV = CmtyVV[0]; CmtyV.Add(i+1); } for(int i=15;i<40;i++) { TIntV& CmtyV = CmtyVV[1]; CmtyV.Add(i+1); } } else { TVec<TIntV> CmtyVV; TSsParser Ss(InFNm, ssfWhiteSep); while (Ss.Next()) { if(Ss.GetFlds()>0) { TIntV CmtyV; for(int i=0;i<Ss.GetFlds();i++) { if(Ss.IsInt(i)){CmtyV.Add(Ss.GetInt(i));} } CmtyVV.Add(CmtyV); } } printf("community loading completed (%d communities)\n",CmtyVV.Len()); } PUNGraph AG = TAGM::GenAGM(CmtyVV,DensityCoef,ScaleCoef,Rnd); TSnap::SaveEdgeList(AG,OutFPrx + ".edgelist.txt"); if(AG->GetNodes()<50) { TAGM::GVizComGraph(AG,CmtyVV,OutFPrx + ".graph.gif"); } Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
void PrintGraphStatTable(const PGraph& G, TStr OutFNm, TStr Desc="") { TFltPrV DegCCfV; int64 ClosedTriads, OpenTriads; int FullDiam; double EffDiam; TSnap::PrintInfo(G, OutFNm); TExeTm ExeTm; printf("C"); const double CCF = TSnap::GetClustCf(G, DegCCfV, ClosedTriads, OpenTriads); printf("[%s]D", ExeTm.GetStr()); TSnap::GetBfsEffDiam(G, 1000, false, EffDiam, FullDiam); printf("[%s]CC", ExeTm.GetStr()); PGraph WCC = TSnap::GetMxWcc(G); PGraph SCC = TSnap::GetMxScc(G); printf("[%s]\n", ExeTm.GetStr()); FILE* F = stdout; if (! OutFNm.Empty()) { F = fopen(TStr::Fmt("%s.html", OutFNm.CStr()).CStr(), "wt"); } fprintf(F, "\n"); fprintf(F, "<table id=\"datatab\" summary=\"Dataset statistics\">\n"); fprintf(F, " <tr> <th colspan=\"2\">Dataset statistics</th> </tr>\n"); fprintf(F, " <tr><td>Nodes</td> <td>%d</td></tr>\n", G->GetNodes()); fprintf(F, " <tr><td>Edges</td> <td>%d</td></tr>\n", G->GetEdges()); fprintf(F, " <tr><td>Nodes in largest WCC</td> <td>%d (%.3f)</td></tr>\n", WCC->GetNodes(), WCC->GetNodes()/double(G->GetNodes())); fprintf(F, " <tr><td>Edges in largest WCC</td> <td>%d (%.3f)</td></tr>\n", WCC->GetEdges(), WCC->GetEdges()/double(G->GetEdges())); fprintf(F, " <tr><td>Nodes in largest SCC</td> <td>%d (%.3f)</td></tr>\n", SCC->GetNodes(), SCC->GetNodes()/double(G->GetNodes())); fprintf(F, " <tr><td>Edges in largest SCC</td> <td>%d (%.3f)</td></tr>\n", SCC->GetEdges(), SCC->GetEdges()/double(G->GetEdges())); fprintf(F, " <tr><td>Average clustering coefficient</td> <td>%.4f</td></tr>\n", CCF); fprintf(F, " <tr><td>Number of triangles</td> <td>%s</td></tr>\n", TUInt64(ClosedTriads).GetStr().CStr()); fprintf(F, " <tr><td>Fraction of closed triangles</td> <td>%.4g</td></tr>\n", ClosedTriads/double(ClosedTriads+OpenTriads)); fprintf(F, " <tr><td>Diameter (longest shortest path)</td> <td>%d</td></tr>\n", FullDiam); fprintf(F, " <tr><td>90-percentile effective diameter</td> <td>%.2g</td></tr>\n", EffDiam); fprintf(F, "</table>\n"); fprintf(F, "<br>\n"); if (! OutFNm.Empty()) { fprintf(F, "\n<table id=\"datatab\" summary=\"Table of datasets\">\n"); fprintf(F, "<tr>\n"); fprintf(F, " <th>File</th>\n"); fprintf(F, " <th>Description</th>\n"); fprintf(F, "</tr>\n"); fprintf(F, "<tr>\n"); fprintf(F, " <td><a href=\"%s.txt.gz\">%s.txt.gz</a></td>\n", OutFNm.CStr(), OutFNm.CStr()); fprintf(F, " <td>%s</td>\n", Desc.CStr()); fprintf(F, "</tr>\n"); fprintf(F, "</table>\n"); fclose(F); TSnap::SaveEdgeList(G, OutFNm+".txt", Desc); } }
TFfGGen::TStopReason TUndirFFire::AddNodes(const int& GraphNodes, const bool& FloodStop) { printf("\n***Undirected GEO ForestFire: graph(%d,%d) add %d nodes, burn prob %.3f\n", Graph->GetNodes(), Graph->GetEdges(), GraphNodes, BurnProb); TExeTm ExeTm; int Burned1 = 0, Burned2 = 0, Burned3 = 0; // last 3 fire sizes TIntPrV NodesEdgesV; // create initial set of nodes if (Graph.Empty()) { Graph = PUNGraph::New(); } if (Graph->GetNodes() == 0) { Graph->AddNode(); } int NEdges = Graph->GetEdges(); // forest fire for (int NNodes = Graph->GetNodes() + 1; NNodes <= GraphNodes; NNodes++) { const int NewNId = Graph->AddNode(-1); IAssert(NewNId == Graph->GetNodes() - 1); // node ids have to be 0...N const int StartNId = Rnd.GetUniDevInt(NewNId); const int NBurned = BurnGeoFire(StartNId); // add edges to burned nodes for (int e = 0; e < NBurned; e++) { Graph->AddEdge(NewNId, GetBurnedNId(e)); } NEdges += NBurned; Burned1 = Burned2; Burned2 = Burned3; Burned3 = NBurned; if (NNodes % Kilo(1) == 0) { printf("(%d, %d) burned: [%d,%d,%d] [%s]\n", NNodes, NEdges, Burned1, Burned2, Burned3, ExeTm.GetStr()); NodesEdgesV.Add(TIntPr(NNodes, NEdges)); } if (FloodStop && NEdges>1000 && NEdges / double(NNodes)>100.0) { // average node degree is more than 50 printf("!!! FLOOD. G(%6d, %6d)\n", NNodes, NEdges); return TFfGGen::srFlood; } } printf("\n"); IAssert(Graph->GetEdges() == NEdges); return TFfGGen::srOk; }
void TSubGraphsEnum::RecurBfs(const int& MxDepth) { TExeTm ExeTm; SgV.Clr(true); for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) { TSimpleGraph SimpleG; RecurBfs(NI.GetId(), MxDepth, SimpleG); //NGraph->DelNode(NI.GetId()); printf("."); } printf("\ncandidates: %d\n", SgV.Len()); SgV.Sort(); int Cnt = 1; for (int i = 1; i < SgV.Len(); i++) { if (SgV[i-1] != SgV[i]) Cnt++; } printf("distinct: %d\t[%s]\n", Cnt, ExeTm.GetTmStr()); }
void TSubGraphsEnum::EnumSubGraphs(const int& MaxEdges) { TExeTm ExeTm; Gen2Graphs(); printf(" %2d edge graphs: %d\t[%s]\n", 2, SgV.Len(), ExeTm.GetTmStr()); ExeTm.Tick(); //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt(" %d", i+1)); } //printf("**************************************************************\n"); TSimpleGraph SimpleG; TIntPrV& EdgeV = SimpleG.GetEdgeV(); // multiple edge sub-graphs for (int edges = 3; edges <= MaxEdges; edges++) { EdgeV.Clr(); printf(" %2d edge graphs:", edges); for (int g1 = 0; g1 < SgV.Len()-1; g1++) { for (int g2 = g1+1; g2 < SgV.Len(); g2++) { if (SimpleG.Join(SgV[g1], SgV[g2])) { NextSgV.Add(SimpleG); } } } printf(" candidates: %8d [%s]", NextSgV.Len(), ExeTm.GetTmStr()); ExeTm.Tick(); NextSgV.Sort(); SgV.Gen(NextSgV.Len(), 0); SgV.Add(NextSgV[0]); for (int i = 1; i < NextSgV.Len(); i++) { if (SgV.Last() != NextSgV[i]) { SgV.Add(NextSgV[i]); } } NextSgV.Clr(false); printf(" total: %8d [%s]\n", SgV.Len(), ExeTm.GetTmStr()); ExeTm.Tick(); //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt(" %d", i+1)); } //printf("**************************************************************\n"); } }
/// Rewire the network. Keeps node degrees as is but randomly rewires the edges. /// Use this function to generate a random graph with the same degree sequence /// as the OrigGraph. /// See: On the uniform generation of random graphs with prescribed degree /// sequences by R. Milo, N. Kashtan, S. Itzkovitz, M. E. J. Newman, U. Alon /// URL: http://arxiv.org/abs/cond-mat/0312028 PUNGraph GenRewire(const PUNGraph& OrigGraph, const int& NSwitch, TRnd& Rnd) { const int Nodes = OrigGraph->GetNodes(); const int Edges = OrigGraph->GetEdges(); PUNGraph GraphPt = TUNGraph::New(); TUNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, -1); TExeTm ExeTm; // generate a graph that satisfies the constraints printf("Randomizing edges (%d, %d)...\n", Nodes, Edges); TIntPrSet EdgeSet(Edges); for (TUNGraph::TNodeI NI = OrigGraph->BegNI(); NI < OrigGraph->EndNI(); NI++) { const int NId = NI.GetId(); for (int e = 0; e < NI.GetOutDeg(); e++) { if (NId <= NI.GetOutNId(e)) { continue; } EdgeSet.AddKey(TIntPr(NId, NI.GetOutNId(e))); } Graph.AddNode(NI.GetId()); } // edge switching uint skip=0; for (uint swps = 0; swps < 2*uint(Edges)*uint(NSwitch); swps++) { const int keyId1 = EdgeSet.GetRndKeyId(Rnd); const int keyId2 = EdgeSet.GetRndKeyId(Rnd); if (keyId1 == keyId2) { skip++; continue; } const TIntPr& E1 = EdgeSet[keyId1]; const TIntPr& E2 = EdgeSet[keyId2]; TIntPr NewE1(E1.Val1, E2.Val1), NewE2(E1.Val2, E2.Val2); if (NewE1.Val1 > NewE1.Val2) { Swap(NewE1.Val1, NewE1.Val2); } if (NewE2.Val1 > NewE2.Val2) { Swap(NewE2.Val1, NewE2.Val2); } if (NewE1!=NewE2 && NewE1.Val1!=NewE1.Val2 && NewE2.Val1!=NewE2.Val2 && ! EdgeSet.IsKey(NewE1) && ! EdgeSet.IsKey(NewE2)) { EdgeSet.DelKeyId(keyId1); EdgeSet.DelKeyId(keyId2); EdgeSet.AddKey(TIntPr(NewE1)); EdgeSet.AddKey(TIntPr(NewE2)); } else { skip++; } if (swps % Edges == 0) { printf("\r %uk/%uk: %uk skip [%s]", swps/1000u, 2*uint(Edges)*uint(NSwitch)/1000u, skip/1000u, ExeTm.GetStr()); if (ExeTm.GetSecs() > 2*3600) { printf(" *** Time limit!\n"); break; } // time limit 2 hours } } printf("\r total %uk switchings attempted, %uk skiped [%s]\n", 2*uint(Edges)*uint(NSwitch)/1000u, skip/1000u, ExeTm.GetStr()); for (int e = 0; e < EdgeSet.Len(); e++) { Graph.AddEdge(EdgeSet[e].Val1, EdgeSet[e].Val2); } return GraphPt; }
void em_multi(ExamMgr& ExM) { TExeTm tm; TFltV Alphas(ExM.CPU), ThVs[ExM.CPU]; for (int i=0; i<ExM.CPU; i++) ThVs[i] = TFltV(ExM.W+1); std::vector<std::thread> threads; for (int i=0; i<ExM.CPU; i++) threads.emplace_back([i, &ExM, &Alphas, &ThVs] { em_sub(i, ExM, Alphas[i], ThVs[i]); }); for(std::thread& t: threads) t.join(); for (int n=1; n<ExM.CPU; n++) Alphas[0] += Alphas[n]; Alphas[0] /= ExM.CPU; for (int i=0; i<=ExM.W; i++) { for (int n=1; n<ExM.CPU; n++) ThVs[0][i] += ThVs[n][i]; ThVs[0][i] /= ExM.CPU; } if (ExM.TrimTail) ExM.TrimTailNTh(ThVs[0], Alphas[0]); const TStr OFnm = ExM.GetBNTHFNm(); BIO::SaveFltVWithIdx(ThVs[0], OFnm, TStr::Fmt("# Nodes: %d\n# Repeated: %d\n# Avg time cost: %.2f secs.\n# Alpha: %.6e", ExM.N, ExM.GetRpt(), tm.GetSecs()/ExM.GetRpt(), Alphas[0].Val)); printf("Saved to %s\n", OFnm.CStr()); }
int main(int argc, char* argv[]) { setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?) Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Node centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (filename extensions added)"); const TStr BseFNm = OutFNm.RightOfLast('/'); const double eps = Env.GetIfArgPrefixFlt("-eps:", 1.0e-5, "minimum quality improvement threshold"); const double min_moves = Env.GetIfArgPrefixFlt("-moves:", 1.0e-2, "minimum number of moves required (proportional)"); const double max_iters = Env.GetIfArgPrefixFlt("-iters:", 1.0e+4, "maximum number of iterations"); // Load graph and create directed and undirected graphs (pointer to the same memory) printf("\nLoading %s...", InFNm.CStr()); PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm); printf(" DONE\n"); printf(" nodes: %d\n", WGraph->GetNodes()); printf(" edges: %d\n", WGraph->GetEdges()); printf(" time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); // Declare variables // COMMUNITY // TODO // Louvain method (modularity objective) Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }