// generates Kronecker model using configuration model of small model network // and compare it to big network void KroneckerBySample(vector<TStr> CommandLineArgs){ Try Env = TEnv(CommandLineArgs[KRONTEST], TNotify::NullNotify); // generation of big model and its Kronecker product is required const TStr Gen = Env.GetIfArgPrefixStr("-gen:", "model+kron", "Generation of sample or/and its Kronecker product (model, kron, model+kron)"); // plot type const TStr Plt = Env.GetIfArgPrefixStr("-plt:", "all", "Type of plots (cum, noncum, all)"); // time estimates file name const TStr StatFile = Env.GetIfArgPrefixStr("-ot:", "stat.tab", "Name of output file with statistics"); TFile = OpenFile(StatFile.CStr()); CheckParams(Gen, Plt); PyInit("PySettings.txt"); if (Gen != "none") { vector <TStr> Parameters; GetParameters(CommandLineArgs, Parameters); // name is empty Parameters.push_back(""); GetGraphs(Parameters, Gen, Plt); } Py_Finalize(); Catch }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // command line parameters Env.PrepArgs("Web-Crawling into Text-Base"); TStr InWebFilterFNm=Env.GetIfArgPrefixStr("-i:", "", "Input-Web-Filter-File"); TStr OutTBsFNm=Env.GetIfArgPrefixStr("-o:", "", "Output-TextBase-FileName"); TStr OutLogFNm=Env.GetIfArgPrefixStr("-olog:", "Crawl2TBs.Log", "Output-Log-FileName"); bool IndexTxtBsP=Env.GetIfArgPrefixBool("-index:", false, "Create-TextBase-Index"); if (Env.IsEndOfRun()){return 0;} // get text-base names if (OutTBsFNm.Empty()){ OutTBsFNm=InWebFilterFNm.GetFPath()+InWebFilterFNm.GetFMid();} TStr TxtBsNm=OutTBsFNm.GetFBase(); TStr TxtBsFPath=OutTBsFNm.GetFPath(); // create web-text-base PWebTxtBs WebTxtBs=TWebTxtBs::New(TNotify::StdNotify); WebTxtBs->PutNotify(TWebTxtBsNotify::New(WebTxtBs, OutLogFNm)); // create text-base WebTxtBs->TxtBsNew(TxtBsNm, TxtBsFPath); // start fetching WebTxtBs->FetchStart(TxtBsNm, TxtBsFPath, InWebFilterFNm, IndexTxtBsP); // message loop TSysMsg::Loop(); return 0; Catch; return 1; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("ragm. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix"); const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input edgelist file name"); const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) "); int OptComs = Env.GetIfArgPrefixInt("-c:", -1, "The number of communities to detect (-1: detect automatically)"); const int MinComs = Env.GetIfArgPrefixInt("-mc:", 5, "Minimum number of communities to try"); const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 100, "Maximum number of communities to try"); const int DivComs = Env.GetIfArgPrefixInt("-nc:", 10, "How many trials for the number of communities"); const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 1, "Number of threads for parallelization"); const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.3, "Alpha for backtracking line search"); const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search"); PUNGraph G; TIntStrH NIDNameH; if (InFNm.IsStrIn(".ungraph")) { TFIn GFIn(InFNm); G = TUNGraph::Load(GFIn); } else { G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NIDNameH); } if (LabelFNm.Len() > 0) { TSsParser Ss(LabelFNm, ssfTabSep); while (Ss.Next()) { if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); } } } else { } printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges()); TVec<TIntV> EstCmtyVV; TExeTm RunTm; TAGMFast RAGM(G, 10, 10); if (OptComs == -1) { printf("finding number of communities\n"); OptComs = RAGM.FindComsByCV(NumThreads, MaxComs, MinComs, DivComs, OutFPrx, StepAlpha, StepBeta); } RAGM.NeighborComInit(OptComs); if (NumThreads == 1 || G->GetEdges() < 1000) { RAGM.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta); } else { RAGM.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta); } RAGM.GetCmtyVV(EstCmtyVV); TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH); TAGMUtil::SaveGephi(OutFPrx + "graph.gexf", G, EstCmtyVV, 1.5, 1.5, NIDNameH); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nGenerate stochastic block model networks. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const int noNodes = Env.GetIfArgPrefixInt("-n:", 512, "Number of nodes in synthetic graph (default: 512)\n"); const double pIn = Env.GetIfArgPrefixFlt("-pIn:", 0.8, "pIn (default: 0.8)\n"); const double pOut = Env.GetIfArgPrefixFlt("-pOut:", 0.2, "pOut (default: 0.2)\n"); const int noCommunities = Env.GetIfArgPrefixInt("-k:", 2, "Number of communities in graph (default: 2)\n"); TGraphAlgo graphAlgo; graphAlgo.generateNetwork(noNodes, noCommunities, pIn, pOut); TStr networkFilename = TStr("test-network-sbm.txt"); TStr networkAdjacencyMatrixFilename = TStr("test-network-sbm-adjacency-matrix.txt"); TStr networkGexfFilename = TStr("test-network-sbm.gexf"); TStr networkLouvainFormatFilename = TStr("test-network-sbm-louvain.txt"); TStr louvainTreeFilename = TStr("test-network-sbm-louvain.tree"); TStr communityLabelsFilename = TStr("test-network-sbm-assignments.txt"); graphAlgo.saveGroundTruth(networkFilename); graphAlgo.saveGroundTruthAdjacencyMatrix(networkAdjacencyMatrixFilename); graphAlgo.saveGroundTruthGexf(networkGexfFilename); graphAlgo.convertGroundTruthToLouvainFormat(networkLouvainFormatFilename); graphAlgo.saveCommunityLabels(communityLabelsFilename, noNodes, noCommunities); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // get command line parameters Env.PrepArgs("Text-Base Dump"); TStr InTBsFNm=Env.GetIfArgPrefixStr("-i:", "", "Input-TextBase-FileName"); TStr OutDmpFNm=Env.GetIfArgPrefixStr("-o:", "TxtBs.Dmp", "Output-Dump-FileName"); if (Env.IsEndOfRun()){return 0;} // load text-base TStr TxtBsNm=InTBsFNm.GetFBase(); TStr TxtBsFPath=InTBsFNm.GetFPath(); printf("Loading Text-Base '%s' at '%s' ...", TxtBsNm.CStr(), TxtBsFPath.CStr()); PTxtBs TxtBs=TTxtBs::New(TxtBsNm, TxtBsFPath, faRdOnly); printf(" Done.\n"); // dump text-base printf("Dumping to '%s' ...", OutDmpFNm.CStr()); TxtBs->SaveTxt(OutDmpFNm); printf(" Done.\n"); return 0; Catch; return 1; }
int main(int argc, char* argv[]) { // code needed for inputing parameters Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Network diversity. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; // for measuring execution time Try const TStr InFNmGraph = Env.GetIfArgPrefixStr("-i:", "artificial_intelligence_pub.txt", "Input graph (undirected graph)"); const TStr InFNmCat = Env.GetIfArgPrefixStr("-c:", "artificial_intelligence_cat_pub.txt", "Categories"); const TStr InFNmMat = Env.GetIfArgPrefixStr("-m:", "sciences.txt", "Matrix"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "diversity.txt", "Output file"); const int DivAlg = Env.GetIfArgPrefixInt("-a:", 1, "Measure: 1:Stirling"); const int Alpha = Env.GetIfArgPrefixInt("-alp:", 1, "alpha"); const int Beta = Env.GetIfArgPrefixInt("-bet:", 1, "beta"); const int Gamma = Env.GetIfArgPrefixInt("-gam:", 1, "gama"); // defining graph PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNmGraph, false); double D = 0.0; TStr DivAlgStr; // based on input parametr -a (variable DivAlg), diversity measure is choosen if (DivAlg == 1) { DivAlgStr = "Stirling"; D = TSnap::StirlingIndex(Graph,InFNmCat,InFNmMat, Alpha, Beta, Gamma);} else { Fail; } printf("\nDiversity: %f\nrun time: %s (%s)\n", D,ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); //print execution time Catch return 0; }
void ParseArgs(int& argc, char* argv[], TStr& InFile, TStr& OutFile, int& Dimensions, int& WalkLen, int& NumWalks, int& WinSize, int& Iter, bool& Verbose, double& ParamP, double& ParamQ, bool& Directed, bool& Weighted, bool& OutputWalks) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nAn algorithmic framework for representational learning on graphs.")); InFile = Env.GetIfArgPrefixStr("-i:", "graph/karate.edgelist", "Input graph path"); OutFile = Env.GetIfArgPrefixStr("-o:", "emb/karate.emb", "Output graph path"); Dimensions = Env.GetIfArgPrefixInt("-d:", 128, "Number of dimensions. Default is 128"); WalkLen = Env.GetIfArgPrefixInt("-l:", 80, "Length of walk per source. Default is 80"); NumWalks = Env.GetIfArgPrefixInt("-r:", 10, "Number of walks per source. Default is 10"); WinSize = Env.GetIfArgPrefixInt("-k:", 10, "Context size for optimization. Default is 10"); Iter = Env.GetIfArgPrefixInt("-e:", 1, "Number of epochs in SGD. Default is 1"); ParamP = Env.GetIfArgPrefixFlt("-p:", 1, "Return hyperparameter. Default is 1"); ParamQ = Env.GetIfArgPrefixFlt("-q:", 1, "Inout hyperparameter. Default is 1"); Verbose = Env.IsArgStr("-v", "Verbose output."); Directed = Env.IsArgStr("-dr", "Graph is directed."); Weighted = Env.IsArgStr("-w", "Graph is weighted."); OutputWalks = Env.IsArgStr("-ow", "Output random walks instead of embeddings."); }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // get command line parameters Env.PrepArgs("Text-Base Server"); TStr InTBsFNm=Env.GetIfArgPrefixStr("-i:", "", "Input-TextBase-FileName"); int SrvPortN=Env.GetIfArgPrefixInt("-port:", 8888, "Server-Port"); if (Env.IsEndOfRun()){return 0;} // notification PNotify Notify=TNotify::StdNotify; // load text-base TStr TxtBsNm=InTBsFNm.GetFBase(); TStr TxtBsFPath=InTBsFNm.GetFPath(); printf("Loading Web-Base...\r"); PWebTxtBs WebTxtBs=TWebTxtBs::New(Notify); WebTxtBs->TxtBsOpenForRdOnly(TxtBsNm, TxtBsFPath); // create & activate server PWebSrv WebSrv=TWebTxtBsSrv::New(WebTxtBs, SrvPortN, Notify); // message loop TSysMsg::Loop(); return 0; Catch; return 1; }
// get model graph according to args void GetModel(const TStr& Args, PNGraph& G){ Env = TEnv(Args, TNotify::NullNotify); const TStr Gen = Env.GetIfArgPrefixStr("-g:", "gen", "How to get model graph: read, gen, deg, genpy"); const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "Input graph file (single directed edge per line)"); TExeTm execTime; if (Gen == "gen") BasicGraphGen(Args, G); else if (Gen == "read") ReadPNGraphFromFile(InFNm, G); else if (Gen == "genpy") { PUNGraph GU; GenPy(GU, TFile, Args); G = TSnap::ConvertGraph<PNGraph>(GU); } TFile << "Time of getting model: " << execTime.GetTmStr() << endl; /*TFile << "Model graph: " << G->GetNodes() << " nodes, " << G->GetEdges() << " edges\n"; TIntV DegV; TSnap::GetDegSeqV(G, DegV); execTime.Tick(); PUNGraph Conf = TSnap::GenConfModel(DegV); TFile << "Time of getting configuration model: " << execTime.GetTmStr() << endl; cout << "Undirected configuration model: " << Conf->GetNodes() << " nodes, " << Conf->GetEdges() << " edges\n"; PNGraph ConfD = TSnap::ConvertGraph<PNGraph>(Conf); SaveAndPlot(ConfD, "conf", false); TFile << "Clustering coefficient of configuration model: " << TSnap::GetClustCf(ConfD) << endl; TSnap::PlotClustCf(ConfD,"conf");*/ }
bool CmdArgs::IsDir() const { Env = TEnv(CommandLineArgs[KRONGEN], TNotify::NullNotify); TStr IsDir = Env.GetIfArgPrefixStr("-isdir:", "false", "Produce directed graph (true, false)"); if (IsDir == "true") return true; return false; }
int GenPy(PUNGraph &res, ofstream& TFile, const TStr& parameters) { Env = TEnv(parameters, TNotify::StdNotify); TStr mN = Env.GetIfArgPrefixStr("-module:", "random_graphs", "Module name"); TStr fN = Env.GetIfArgPrefixStr("-func:", "fast_gnp_random_graph", "Function name"); PyObject **G = new PyObject*[1]; char *moduleName = mN.CStr(); char *funcName = fN.CStr(); AddFuncInfo(); TStrV args, argTypes; if (!ParseArgs(funcName, parameters, args, argTypes)) { printf("Fail to parse arguments for NetworkX generation...\n"); return 0; }; TExeTm execTime; if (!CallPyFunction(moduleName, funcName, args, argTypes, G)) { cout << "CallPyFunction() raised error. Execution terminated.\n"; system("pause"); exit(1); }; TFile << "Time of generation of graph by NetworkX: " << execTime.GetTmStr() << endl; execTime.Tick(); PyObject*** nodes = new PyObject**[1]; GetNodes(G, nodes); int nodesCount = PyList_Size(*(nodes[0])); //printf("nodesCount = %d, ", nodesCount); res = PUNGraph::TObj::New(); res->Reserve(nodesCount, nodesCount*nodesCount); for (size_t i = 0; i < nodesCount; i++) res->AddNode(i); Py_DECREF(nodes); PyObject*** edges = new PyObject**[1]; GetEdges(G, edges); int edgesCount = PyList_Size(*(edges[0])); //printf("edgesCount = %d\n", edgesCount); for (size_t i = 0; i < edgesCount; i++) { PyObject* item = PySequence_Fast_GET_ITEM(*(edges[0]), i); int v1, v2; PyObject* node = PySequence_Fast_GET_ITEM(item,0); v1 = PyLong_AsLong(node); node = PySequence_Fast_GET_ITEM(item,1); v2 = PyLong_AsLong(node); res->AddEdge(v1,v2); } TFile << "Time of copying of graph from NetworkX representation: " << execTime.GetTmStr() << endl; Py_DECREF(G); Py_DECREF(edges); //Py_Finalize(); // очищение памяти, отданной интерпретатору return 0; }
void ReadPNGraphFromFile(const TStr args, PNGraph& G){ Try Env = TEnv(args, TNotify::StdNotify); const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input graph file (single directed edge per line)"); // load graph G = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1); Catch }
int main(int argc, char* argv[]) { setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?) Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Backbone extractor (Vespignani). build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (alpha value and filename extensions added)"); const float alpha = Env.GetIfArgPrefixFlt("-a:", 0.01, "alpha significance level threshold"); // Load graph and create directed and undirected graphs (pointer to the same memory) printf("\nLoading %s...", InFNm.CStr()); PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm); printf(" DONE\n"); printf(" nodes: %d\n", WGraph->GetNodes()); printf(" edges: %d\n", WGraph->GetEdges()); printf(" time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); // Declare variables TIntFltH OutWDegH, InWDegH; TIntIntH OutDegH, InDegH; // Get degrees, weighted and unweighted printf("\nGetting initial distribution of binary in / out degrees..."); TSnap::GetInDegH(WGraph, InDegH); TSnap::GetOutDegH(WGraph, OutDegH); printf(" DONE (time elapsed: %s (%s))", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); printf("\nGetting initial distribution of weighted in / out degrees..."); TSnap::GetWInDegH(WGraph, InWDegH); TSnap::GetWOutDegH(WGraph, OutWDegH); printf(" DONE (time elapsed: %s (%s))", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); // Apply the disparity filter Vespignani method printf("\nApplying the disparity filter vespignani method..."); TSnap::FilterEdgesVespignani(WGraph, InWDegH, OutWDegH, InDegH, OutDegH, alpha); printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); printf("\nPruned graph:"); printf("\n nodes: %d", WGraph->GetNodes()); printf("\n edges: %d\n", WGraph->GetEdges()); // OUTPUTTING printf("\nSaving %s-%f.snap...", OutFNm.CStr(), alpha); TSnap::SaveFltWEdgeList(WGraph, TStr::Fmt("%s-%f.snap", OutFNm.CStr(), alpha), ""); printf(" DONE\n"); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { TExeTm ExeTm; try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nPlotting Scatter For Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesFullUrlsOnTwitterData_FINALFILTERED THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesOnTwitterData_FINALFILTERED THash< TUInt , TSecTmV > full_twitterUrls = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED.rar"); THash< TUInt , TSecTmV > full_twitterContents = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesOnTwitterData_FINALFILTERED.rar"); // Scatter plot plotScatterLengthOfEachCascade(twitterUrls,twitterContents); // Percentage computation double cnt = 0; for(int i=0;i<full_twitterUrls.Len();i++) { if(full_twitterContents.GetKeyId(full_twitterUrls.GetKey(i)) != -1) { cnt++; } } cnt /= full_twitterUrls.Len(); // twitterUrls.Len() / full_twitterUrls.Len() printf("The percentage of Urls of quotes which have contents as well: %f\n", 100 * cnt); cnt = 0; for(int i=0;i<full_twitterContents.Len();i++) { if(full_twitterUrls.GetKeyId(full_twitterContents.GetKey(i)) != -1) { cnt++; } } cnt /= full_twitterContents.Len(); printf("The percentage of Contents of quotes which have urls as well: %f\n", 100 * cnt); printf("\nScatter Plot had been drawn successfully."); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // get command line parameters Env.PrepArgs("DMoz-Topic To Text", -1); TStr InFPath=Env.GetIfArgPrefixStr("-i:", "", "Input-File-Path"); TStr OutFPath=Env.GetIfArgPrefixStr("-o:", "", "Output-File-Path"); TStr RootCatNm=Env.GetIfArgPrefixStr("-c:", "Top/Science", "Root-Category-Name"); if (Env.IsEndOfRun()){return 0;} // load DMoz-Base PDMozBs DMozBs=TDMozBs::LoadBin(TDMozInfo::BinFullFBase, InFPath); // assign root category name //RootCatNm="Top/Computers/Software/Databases/Data_Mining"; //RootCatNm="Top/Reference/Knowledge_Management/Knowledge_Discovery"; //RootCatNm="Top/Computers/Artificial_Intelligence/Machine_Learning"; //RootCatNm="Top/Computers/Artificial_Intelligence"; //RootCatNm="Top/Recreation/Travel"; // get root category-id int RootCatId=DMozBs->GetCatId(RootCatNm); // prepare external-url list TStr RootFBase=TStr::GetFNmStr(RootCatNm, true); TStr ExtUrlFNm=TStr::GetNrFPath(OutFPath)+RootFBase+"_ExternalUrlList.Txt"; TFOut ExtUrlSOut(ExtUrlFNm); FILE* fExtUrlOut=ExtUrlSOut.GetFileId(); // get topic categories TIntV TopicCatIdV; DMozBs->GetSubCatIdV(RootCatId, TopicCatIdV); for (int TopicCatIdN=0; TopicCatIdN<TopicCatIdV.Len(); TopicCatIdN++){ // get topic id & name int TopicCatId=TopicCatIdV[TopicCatIdN]; TStr TopicCatNm=DMozBs->GetCatNm(TopicCatId); // get subtopic subtrees and corresponding external-url-ids TIntV SubCatIdV; TIntV CatIdV; //DMozBs->GetSubTreeCatIdV(TopicCatId, SubCatIdV, CatIdV, true); TIntV ExtUrlIdV; DMozBs->GetExtUrlIdV(CatIdV, ExtUrlIdV); // output url/titles/descriptions TStr TopicFBase=TStr::GetFNmStr(TopicCatNm, true); TStr TopicFNm=TStr::GetNrFPath(OutFPath)+TopicFBase+".Txt"; printf("Saving %s\n", TopicFNm.CStr()); TFOut TopicSOut(TopicFNm); FILE* fTopicOut=TopicSOut.GetFileId(); for (int ExtUrlIdN=0; ExtUrlIdN<ExtUrlIdV.Len(); ExtUrlIdN++){ int ExtUrlId=ExtUrlIdV[ExtUrlIdN]; TStr UrlStr=DMozBs->GetExtUrlStr(ExtUrlId); TStr TitleStr=DMozBs->GetExtUrlTitleStr(ExtUrlId); TStr DescStr=DMozBs->GetExtUrlDescStr(ExtUrlId); fprintf(fExtUrlOut, "%s\n", UrlStr.CStr()); fprintf(fTopicOut, "%s - %s\n", TitleStr.CStr(), DescStr.CStr()); } } return 0; Catch; return 1; }
void GetParameters(const vector<TStr>& CommandLineArgs, vector<TStr>& Parameters){ Env = TEnv(CommandLineArgs[KRONTEST], TNotify::NullNotify); // to plot const TStr Plt = Env.GetIfArgPrefixStr("-plttype:", "model+kron", "Plotting of big model and/or its Kronecker product (model, kron, model+kron)"); // type of plots const TStr PltType = Env.GetIfArgPrefixStr("-plt:", "all", "Type of plots (cum, noncum, all)"); for (size_t i = 0; i < CommandLineArgs.size(); i++) Parameters.push_back(CommandLineArgs[i]); Parameters.push_back(Plt); Parameters.push_back(PltType); }
int main(int argc, char* argv[]) { // TFltPrV v; // v.Add(TFltPr(1,4)); // v.Add(TFltPr(5,5)); // v.Add(TFltPr(9,11)); // v.Add(TFltPr(20,8)); // v.Add(TFltPr(21,30)); // cout << "C: " << Tools::computeCorrelation(v,Pearson) << endl; // return 0; TExeTm ExeTm; try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nPlotting Individually Memes-Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // URLS THash< TStr , CascadeElementV > quotes = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar"); // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4URLS THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesFullUrlsOnTwitterData_FINALFILTERED // CONTENTS //THash< TStr , CascadeElementV > quotes2 = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar"); // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4Contents THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesOnTwitterData_FINALFILTERED // Plotting THash< TUInt , TSecTmV > twitterTotal; for(int i=0;i<twitterContents.Len();i++) { TSecTmV tmp; tmp.AddV(twitterContents[i]); tmp.AddV(twitterUrls[i]); twitterTotal.AddDat(i,tmp); } plotScatterLengthOfEachCascade(quotes,twitterUrls,"Urls"); plotScatterLengthOfEachCascade(quotes,twitterContents,"Contents"); plotScatterLengthOfEachCascade(quotes,twitterTotal,"Full"); printf("\nPlots had been drawn successfully."); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Motifs. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input directed graph file (single directed edge per line)"); const int MotifSz = Env.GetIfArgPrefixInt("-m:", 3, "Motif size (has to be 3 or 4)"); const bool DrawMotifs = Env.GetIfArgPrefixBool("-d:", true, "Draw motif shapes (requires GraphViz)"); TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix"); if (OutFNm.Empty()) { OutFNm = InFNm.GetFMid(); } EAssert(MotifSz==3 || MotifSz==4); // load graph PNGraph G; if (InFNm.GetFExt().GetLc()==".ungraph") { TFIn FIn(InFNm); G=TSnap::ConvertGraph<PNGraph>(TUNGraph::Load(FIn), true); } else if (InFNm.GetFExt().GetLc()==".ngraph") { TFIn FIn(InFNm); G=TNGraph::Load(FIn); } else { G = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1); } bool IsOk = true; for (int nid = 0; nid < G->GetNodes(); nid++) { if (! G->IsNode(nid)) { IsOk=false; break; } } if (! IsOk) { printf("Nodes of the input graph have to be numbered 0...N-1\nRenumbering nodes...\n"); PNGraph OG = G; G = TNGraph::New(); TGraphEnumUtils::GetNormalizedGraph(OG, G); } // G = TSnap::GenRndGnm<PNGraph>(100, Kilo(1)); // count frequency of connected subgraphs in G that have MotifSz nodes TD34GraphCounter GraphCounter(MotifSz); TSubGraphEnum<TD34GraphCounter> GraphEnum; GraphEnum.GetSubGraphs(G, MotifSz, GraphCounter); FILE *F = fopen(TStr::Fmt("%s-counts.tab", OutFNm.CStr()).CStr(), "wt"); fprintf(F, "MotifId\tNodes\tEdges\tCount\n"); for (int i = 0; i < GraphCounter.Len(); i++) { const int gid = GraphCounter.GetId(i); PNGraph SG = GraphCounter.GetGraph(gid); if (DrawMotifs) { TGraphViz::Plot(SG, gvlNeato, TStr::Fmt("%s-motif%03d.gif", OutFNm.CStr(), i), TStr::Fmt("GId:%d Count: %llu", gid, GraphCounter.GetCnt(gid))); } fprintf(F, "%d\t%d\t%d\t%llu\n", gid, SG->GetNodes(), SG->GetEdges(), GraphCounter.GetCnt(gid)); } printf("done."); fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Node Centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input un/directed graph"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "node_centrality.tab", "Output file"); printf("Loading %s...", InFNm.CStr()); PNGraph Graph = TSnap::LoadEdgeList<PNGraph>(InFNm); //PNGraph Graph = TSnap::GenRndGnm<PNGraph>(10, 10); //TGraphViz::Plot(Graph, gvlNeato, InFNm+".gif", InFNm, true); printf("nodes:%d edges:%d\n", Graph->GetNodes(), Graph->GetEdges()); PUNGraph UGraph = TSnap::ConvertGraph<PUNGraph>(Graph); // undirected version of the graph TIntFltH BtwH, EigH, PRankH, CcfH, CloseH, HubH, AuthH; //printf("Computing...\n"); printf("Treat graph as DIRECTED: "); printf(" PageRank... "); TSnap::GetPageRank(Graph, PRankH, 0.85); printf(" Hubs&Authorities..."); TSnap::GetHits(Graph, HubH, AuthH); printf("\nTreat graph as UNDIRECTED: "); printf(" Eigenvector..."); TSnap::GetEigenVectorCentr(UGraph, EigH); printf(" Clustering..."); TSnap::GetNodeClustCf(UGraph, CcfH); printf(" Betweenness (SLOW!)..."); TSnap::GetBetweennessCentr(UGraph, BtwH, 1.0); printf(" Constraint (SLOW!)..."); TNetConstraint<PUNGraph> NetC(UGraph, true); printf(" Closeness (SLOW!)..."); for (TUNGraph::TNodeI NI = UGraph->BegNI(); NI < UGraph->EndNI(); NI++) { const int NId = NI.GetId(); CloseH.AddDat(NId, TSnap::GetClosenessCentr<PUNGraph>(UGraph, NId, false)); } printf("\nDONE! saving..."); FILE *F = fopen(OutFNm.CStr(), "wt"); fprintf(F,"#Network: %s\n", InFNm.CStr()); fprintf(F,"#Nodes: %d\tEdges: %d\n", Graph->GetNodes(), Graph->GetEdges()); fprintf(F,"#NodeId\tDegree\tCloseness\tBetweennes\tEigenVector\tNetworkConstraint\tClusteringCoefficient\tPageRank\tHubScore\tAuthorityScore\n"); for (TUNGraph::TNodeI NI = UGraph->BegNI(); NI < UGraph->EndNI(); NI++) { const int NId = NI.GetId(); const double DegCentr = UGraph->GetNI(NId).GetDeg(); const double CloCentr = CloseH.GetDat(NId); const double BtwCentr = BtwH.GetDat(NId); const double EigCentr = EigH.GetDat(NId); const double Constraint = NetC.GetNodeC(NId); const double ClustCf = CcfH.GetDat(NId); const double PgrCentr = PRankH.GetDat(NId); const double HubCentr = HubH.GetDat(NId); const double AuthCentr = AuthH.GetDat(NId); fprintf(F, "%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", NId, DegCentr, CloCentr, BtwCentr, EigCentr, Constraint, ClustCf, PgrCentr, HubCentr, AuthCentr); } fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
void GetGraphs(const vector <TStr>& Parameters, const TStr& ModelGen, const TStr&ModelPlt) { PNGraph G; size_t PSize = Parameters.size(); if (GRAPHGEN >= PSize || MTXGEN >= PSize || KRONGEN >= PSize || KRONFIT >= PSize) Error("GetGraphs", "Wrong index in array of parameters"); GetModel(Parameters[GRAPHGEN], G); if (G->GetNodes() == 0) Error("GetGraphs", "Empty graph"); TFltPrV MDegIn, MDegOut; TSnap::GetInDegCnt(G, MDegIn); TSnap::GetOutDegCnt(G, MDegOut); PlotDegrees(Parameters, MDegIn, MDegOut, "model"); TFile << "Model nodes: " << G->GetNodes() << ", model edges: " << G->GetEdges() << endl; TFile << "Maximum output degree in model graph: " << MDegOut[MDegOut.Len()-1].GetVal1() << endl; TFile << "Maximum input degree in model graph: " << MDegIn[MDegIn.Len()-1].GetVal1() << endl; if (ModelGen == "model+kron"){ // generate (or read) Kronecker initiator matrix TKronMtx FitMtxM; if (!GetMtx(Parameters[MTXGEN], FitMtxM)) GenNewMtx(G, Parameters[KRONFIT], FitMtxM); PrintMtx(FitMtxM, TFile); TFile << "Scaling for the number of edges... " << endl; FitMtxM.SetForEdges(G->GetNodes(), G->GetEdges()); int ModelNodes = G->GetNodes(), ModelEdges = G->GetEdges(); Env = TEnv(Parameters[KRONGEN], TNotify::NullNotify); TStr IsDir = Env.GetIfArgPrefixStr("-isdir:", "false", "Produce directed graph (true, false)"); const TInt NIter = Env.GetIfArgPrefixInt("-i:", 1, "Number of iterations of Kronecker product"); if (pow(FitMtxM.GetDim(), static_cast<double>(NIter)) != ModelNodes) Error("GetGraphs", "Inconsistent value of -i: parameter, KronNodes != ModelNodes"); // in and out average degrees of Kronecker graphs TFltPrV KronDegAvgIn, KronDegAvgOut; GenKron(Parameters[KRONGEN], FitMtxM, KronDegAvgIn, KronDegAvgOut); PlotDegrees(Parameters, KronDegAvgIn, KronDegAvgOut, "kron"); } }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Rolx. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (one edge per line, tab/space separated)"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "roles.txt", "Output file name prefix"); const int MinRoles = Env.GetIfArgPrefixInt("-l:", 2, "Lower bound of the number of roles"); const int MaxRoles = Env.GetIfArgPrefixInt("-u:", 3, "Upper bound of the number of roles"); double Threshold = 1e-6; if (MinRoles > MaxRoles || MinRoles < 2) { printf("min roles and max roles should be integer and\n"); printf("2 <= min roles <= max roles\n"); exit(EXIT_SUCCESS); } printf("loading file...\n"); PNGraph Graph = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1); printf("extracting features...\n"); TIntFtrH Features = ExtractFeatures(Graph); TIntIntH NodeIdMtxIdH = CreateNodeIdMtxIdxHash(Features); TFltVV V = ConvertFeatureToMatrix(Features, NodeIdMtxIdH); //printf("saving features...\n"); //FPrintMatrix(V, "v.txt"); printf("feature matrix is saved in v.txt\n"); TFlt MnError = TFlt::Mx; TFltVV FinalG, FinalF; int NumRoles = -1; for (int r = MinRoles; r <= MaxRoles; ++r) { TFltVV G, F; printf("factorizing for %d roles...\n", r); CalcNonNegativeFactorization(V, r, G, F, Threshold); //FPrintMatrix(G, "g.txt"); //FPrintMatrix(F, "f.txt"); TFlt Error = CalcDescriptionLength(V, G, F); if (Error < MnError) { MnError = Error; FinalG = G; FinalF = F; NumRoles = r; } } //FPrintMatrix(FinalG, "final_g.txt"); //FPrintMatrix(FinalF, "final_f.txt"); printf("using %d roles, min error: %f\n", NumRoles, MnError()); TIntIntH Roles = FindRoles(FinalG, NodeIdMtxIdH); FPrintRoles(Roles, OutFNm); //PlotRoles(Graph, Roles); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Network community detection. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (undirected graph)"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "communities.txt", "Output file"); const int CmtyAlg = Env.GetIfArgPrefixInt("-a:", 2, "Algorithm: 1:Girvan-Newman, 2:Clauset-Newman-Moore, 3:Infomap"); PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNm, false); //PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>("../as20graph.txt", false); //PUNGraph Graph = TSnap::GenRndGnm<PUNGraph>(5000, 10000); // generate a random graph TSnap::DelSelfEdges(Graph); TCnComV CmtyV; double Q = 0.0; TStr CmtyAlgStr; if (CmtyAlg == 1) { CmtyAlgStr = "Girvan-Newman"; Q = TSnap::CommunityGirvanNewman(Graph, CmtyV); } else if (CmtyAlg == 2) { CmtyAlgStr = "Clauset-Newman-Moore"; Q = TSnap::CommunityCNM(Graph, CmtyV); } else if (CmtyAlg == 3) { CmtyAlgStr = "Infomap"; Q = TSnap::Infomap(Graph, CmtyV); } else { Fail; } FILE *F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "# Input: %s\n", InFNm.CStr()); fprintf(F, "# Nodes: %d Edges: %d\n", Graph->GetNodes(), Graph->GetEdges()); fprintf(F, "# Algoritm: %s\n", CmtyAlgStr.CStr()); if (CmtyAlg!=3) { fprintf(F, "# Modularity: %f\n", Q); } else { fprintf(F, "# Average code length: %f\n", Q); } fprintf(F, "# Communities: %d\n", CmtyV.Len()); fprintf(F, "# NId\tCommunityId\n"); for (int c = 0; c < CmtyV.Len(); c++) { for (int i = 0; i < CmtyV[c].Len(); i++) { fprintf(F, "%d\t%d\n", CmtyV[c][i].Val, c); } } fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
// read or get random mtx bool GetMtx(const TStr& MtxArgs, TKronMtx& FitMtxModel){ Env = TEnv(MtxArgs, TNotify::StdNotify); // how to generate initiator matrix const TStr Mtx = Env.GetIfArgPrefixStr("-m:", "random", "Init Kronecker matrix"); // if matrix will be generated, its size is an argument of KRONFIT cmd line const TInt MtxSize = Env.GetIfArgPrefixInt("-rs:", 2, "Size of randomized Kronecker matrix"); // get Kronecker init matrix if (Mtx == "create") return false; if (Mtx == "random") GenRandomMtx(MtxSize, FitMtxModel); else ReadMtx(Mtx, MtxSize, FitMtxModel); return true; }
void GenKron(const TStr& Args, TKronMtx& FitMtx, TFltPrV& KronDegAvgIn, TFltPrV& KronDegAvgOut){ Env = TEnv(Args, TNotify::NullNotify); TExeTm ExecTime; // number of Kronecker graphs to generate const TInt NKron = Env.GetIfArgPrefixInt("-n:", 1, "Number of generated Kronecker graphs"); // iterations of Kronecker product const TInt NIter = Env.GetIfArgPrefixInt("-i:", 10, "Iterations of Kronecker product"); // is graph directed? TStr IsDir = Env.GetIfArgPrefixStr("-isdir:", "false", "Produce directed graph (true, false)"); TFlt ExpectedNodes = FitMtx.GetNodes(NIter), ExpectedEdges = FitMtx.GetEdges(NIter); TFile << "Kronecker nodes: " << ExpectedNodes << ", expected Kronecker edges: " << ExpectedEdges << endl; double Sec = 0.0; int AvgMaxOutDeg = 0, AvgMaxInDeg = 0, MinMaxOutDeg = 0, MaxMaxOutDeg = 0, MinMaxInDeg = 0, MaxMaxInDeg = 0; bool Dir = IsDir == "true" ? true : false; for (int i = 0; i < NKron; i++){ ExecTime.Tick(); PNGraph Kron = TKronMtx::GenFastKronecker(FitMtx, NIter, Dir, 0); Sec += ExecTime.GetSecs(); printf("Calculating maximum degree...\n"); int MaxOutDeg = GetMaxMinDeg(Kron, IsDir, "false", "true"), MaxInDeg = GetMaxMinDeg(Kron, IsDir, "true", "true"); CompareDeg(i, MaxOutDeg, MinMaxOutDeg, MaxMaxOutDeg, AvgMaxOutDeg); CompareDeg(i, MaxInDeg, MinMaxInDeg, MaxMaxInDeg, AvgMaxInDeg); //printf("Nodes count: %d, nodes with non-zero degree %d, edges count %d\n max deg = %d\n", kron->GetNodes(), TSnap::CntNonZNodes(kron), kron->GetEdges(), MaxDeg); if (i == NKron - 1){ //TFile << "Clustering coefficient: " << TSnap::GetClustCf(kron) << endl; //TSnap::PlotClustCf(kron,"kronSingle"); //TSnap::PlotHops(kron, "kronSingle"); TFile << "Maximum output degree in kron graph: " << "from " << MinMaxOutDeg << " to " << MaxMaxOutDeg << " (average: " << (double)AvgMaxOutDeg / (double)NKron << ")" << endl; TFile << "Maximum input degree in kron graph: " << "from " << MinMaxInDeg << " to " << MaxMaxInDeg << " (average: " << (double)AvgMaxInDeg / (double)NKron << ")" << endl; } AddDegreesStat(KronDegAvgIn, Kron, true); AddDegreesStat(KronDegAvgOut, Kron, false); } Sec /= NKron; GetAvgDegreeStat(KronDegAvgIn, NKron); GetAvgDegreeStat(KronDegAvgOut, NKron); KronDegAvgIn.Sort(); KronDegAvgOut.Sort(); TFile << "Average time of generation of Kronecker product: " << Sec << endl; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Clique Percolation Method. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input undirected graph file (single directed edge per line)"); const int OverlapSz = Env.GetIfArgPrefixInt("-k:", 2, "Min clique overlap"); TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix"); if (OutFNm.Empty()) { OutFNm = InFNm.GetFMid(); } PUNGraph G; if (InFNm == "DEMO") { // small demo graph G = TUNGraph::New(); for (int i = 1; i < 8; i++) { G->AddNode(i); } G->AddEdge(1,2); G->AddEdge(2,3); G->AddEdge(2,4); G->AddEdge(3,4); G->AddEdge(4,5); G->AddEdge(4,7); G->AddEdge(5,6); G->AddEdge(5,7); G->AddEdge(6,7); // draw the small graph using GraphViz TSnap::DrawGViz(G, gvlNeato, "small_graph.png", "", true); } // load graph else if (InFNm.GetFExt().GetLc()==".ungraph") { TFIn FIn(InFNm); G=TUNGraph::Load(FIn); } else if (InFNm.GetFExt().GetLc()==".ngraph") { TFIn FIn(InFNm); G=TSnap::ConvertGraph<PUNGraph>(TNGraph::Load(FIn), false); } else { G = TSnap::LoadEdgeList<PUNGraph>(InFNm, 0, 1); } // find communities TVec<TIntV> CmtyV; TCliqueOverlap::GetCPMCommunities(G, OverlapSz+1, CmtyV); // save result FILE *F = fopen(TStr::Fmt("cpm-%s.txt", OutFNm.CStr()).CStr(), "wt"); fprintf(F, "# %d Overlapping Clique Percolation Communities (min clique overlap %d)\n", CmtyV.Len(), OverlapSz); fprintf(F, "# Each line contains nodes belonging to the same community community\n"); for (int i = 0; i < CmtyV.Len(); i++) { fprintf(F, "%d", CmtyV[i][0].Val); for (int j = 1; j < CmtyV[i].Len(); j++) { fprintf(F, "\t%d", CmtyV[i][j].Val); } fprintf(F, "\n"); } Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("agmgen. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "DEMO", "Community affiliation data"); const TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "agm", "out file name prefix"); const int RndSeed = Env.GetIfArgPrefixInt("-rs:",10,"Rnd Seed"); const double DensityCoef= Env.GetIfArgPrefixFlt("-a:",0.6,"Power-law Coefficient a of density (density ~ N^(-a)"); const double ScaleCoef= Env.GetIfArgPrefixFlt("-c:",1.3,"Scaling Coefficient c of density (density ~ c"); TRnd Rnd(RndSeed); TVec<TIntV> CmtyVV; if(InFNm=="DEMO") { CmtyVV.Gen(2); TIntV NIdV; for(int i=0;i<25;i++) { TIntV& CmtyV = CmtyVV[0]; CmtyV.Add(i+1); } for(int i=15;i<40;i++) { TIntV& CmtyV = CmtyVV[1]; CmtyV.Add(i+1); } } else { TVec<TIntV> CmtyVV; TSsParser Ss(InFNm, ssfWhiteSep); while (Ss.Next()) { if(Ss.GetFlds()>0) { TIntV CmtyV; for(int i=0;i<Ss.GetFlds();i++) { if(Ss.IsInt(i)){CmtyV.Add(Ss.GetInt(i));} } CmtyVV.Add(CmtyV); } } printf("community loading completed (%d communities)\n",CmtyVV.Len()); } PUNGraph AG = TAGM::GenAGM(CmtyVV,DensityCoef,ScaleCoef,Rnd); TSnap::SaveEdgeList(AG,OutFPrx + ".edgelist.txt"); if(AG->GetNodes()<50) { TAGM::GVizComGraph(AG,CmtyVV,OutFPrx + ".graph.gif"); } Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // command line parameters Env.PrepArgs("Ontology-Light To Ontology-Classifier", 0); TStr InOntoLightFNm=Env.GetIfArgPrefixStr("-iol:", "f:/Data/OntoLight/EuroVoc.OntoLight", "Input-OntoLight-FileName"); TStr InBowFNm=Env.GetIfArgPrefixStr("-ibow:", "f:/Data/OntoLight/Acquis.Bow", "Input-BagOfWords-FileName"); TStr OutOntoCfierFNm=Env.GetIfArgPrefixStr("-oom:", "f:/Data/OntoLight/EuroVoc.OntoCfier", "Output-OntoClassifier-FileName"); TStr OutOntoCfierTxtFNm=Env.GetIfArgPrefixStr("-oom:", "f:/Data/OntoLight/EuroVoc.OntoCfier.Txt", "Output-OntoClassifier-Text-FileName"); TStr LangNm=Env.GetIfArgPrefixStr("-lang:", "EN", "Language-Name"); bool DocCatIsTermIdP=Env.GetIfArgPrefixBool("-catisid:", true, "DocumentCategory-Is-TermId"); double CutWordWgtSumPrc=Env.GetIfArgPrefixFlt("-cwwprc:", 0.33, "Cut-Word-Weight-Sum-Percent"); if (Env.IsEndOfRun()){return 0;} /* InOntoLightFNm="f:/Data/OntoLight/Asfa.OntoLight"; InBowFNm="f:/Data/OntoLight/Asfa.Bow"; OutOntoCfierFNm="f:/Data/OntoLight/Asfa.OntoCfier"; OutOntoCfierTxtFNm="f:/Data/OntoLight/Asfa.OntoCfier.Txt"; DocCatIsTermIdP=false;*/ printf("Loading Onto-Light from '%s' ...", InOntoLightFNm.CStr()); PLwOnto LwOnto=TLwOnto::LoadBin(InOntoLightFNm); printf(" Done.\n"); printf("Loading Bag-Of-Words from '%s' ...", InBowFNm.CStr()); PBowDocBs BowDocBs=TBowDocBs::LoadBin(InBowFNm); printf(" Done.\n"); // generate ontology-classifier PLwOntoCfier LwOntoCfier=TLwOntoCfier::GetOntoCfier( LwOnto, BowDocBs, LangNm, DocCatIsTermIdP, CutWordWgtSumPrc); printf("Saving Onto-Classifier to '%s' ...", OutOntoCfierFNm.CStr()); LwOntoCfier->SaveBin(OutOntoCfierFNm); printf(" Done.\n"); printf("Saving Text to '%s' ...", OutOntoCfierTxtFNm.CStr()); LwOntoCfier->SaveTxt(OutOntoCfierTxtFNm); printf(" Done.\n"); return 0; Catch; return 1; }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // command line parameters Env.PrepArgs("Ontology-Classify", 0); TStr InOntoCfierFNm=Env.GetIfArgPrefixStr("-ioc:", "f:/Data/OntoLight/EuroVoc.OntoCfier", "Input-OntoClassifier-FileName"); TStr InQueryStr=Env.GetIfArgPrefixStr("-qs:", "Slovenia and Croatia are having a fishing industry.", "Input-Query-String"); TStr InQueryHtmlFNm=Env.GetIfArgPrefixStr("-qh:", "", "Input-Query-Html-File"); TStr InQueryCpdFNm=Env.GetIfArgPrefixStr("-qcpd:", "", "Input-Query-CompactDocument-FileName"); TStr InQueryUrlStr=Env.GetIfArgPrefixStr("-qu:", "", "Input-Query-Url"); TStr InQueryUrlStrVFNm=Env.GetIfArgPrefixStr("-quf:", "", "Input-Query-URL-Vector-FileName"); TStr OutXmlFNm=Env.GetIfArgPrefixStr("-ox:", "OntoCfy.Xml", "Output-Classification-Xml-File"); TStr OutTxtFNm=Env.GetIfArgPrefixStr("-ot:", "OntoCfy.Txt", "Output-Classification-Txt-File"); if (Env.IsEndOfRun()){return 0;} // InQueryStr="Slovenia and Croatia are having a fishing industry."; printf("Loading Onto-Classifier from '%s' ...", InOntoCfierFNm.CStr()); PLwOntoCfier OntoCfier=TLwOntoCfier::LoadBin(InOntoCfierFNm); printf(" Done.\n"); // process query TSimTermIdPrV SimTermIdPrV; if (!InQueryStr.Empty()){ OntoCfier->ClassifyStr(InQueryStr, SimTermIdPrV); } else if (!InQueryHtmlFNm.Empty()){ OntoCfier->ClassifyHtmlFNm(InQueryHtmlFNm, SimTermIdPrV); } else { TExcept::Throw("No Input-Query specified!"); } // save to xml {TFOut FXml(OutXmlFNm); FILE* fXml=FXml.GetFileId(); OntoCfier->SaveCfySetXml(fXml, SimTermIdPrV);} // save to txt {TFOut FTxt(OutTxtFNm); FILE* fTxt=FTxt.GetFileId(); OntoCfier->SaveCfySetTxt(fTxt, SimTermIdPrV);} return 0; Catch; return 1; }
int main(int argc, char* argv[]) { setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?) Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Node centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (filename extensions added)"); const TStr BseFNm = OutFNm.RightOfLast('/'); const double eps = Env.GetIfArgPrefixFlt("-eps:", 1.0e-5, "minimum quality improvement threshold"); const double min_moves = Env.GetIfArgPrefixFlt("-moves:", 1.0e-2, "minimum number of moves required (proportional)"); const double max_iters = Env.GetIfArgPrefixFlt("-iters:", 1.0e+4, "maximum number of iterations"); // Load graph and create directed and undirected graphs (pointer to the same memory) printf("\nLoading %s...", InFNm.CStr()); PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm); printf(" DONE\n"); printf(" nodes: %d\n", WGraph->GetNodes()); printf(" edges: %d\n", WGraph->GetEdges()); printf(" time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); // Declare variables // COMMUNITY // TODO // Louvain method (modularity objective) Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]){ Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); const TStr GFNm = Env.GetIfArgPrefixStr("-i:", "", "Input graph"); const int W = Env.GetIfArgPrefixInt("-w:", 10000, "W"); const int BW = Env.GetIfArgPrefixInt("-bw:", 100, "W upper bound"); const int CPU = Env.GetIfArgPrefixInt("-cpu:", std::thread::hardware_concurrency(), "# of CPUs"); const int Rpt = Env.GetIfArgPrefixInt("-r:", 100/CPU, "Repeat times"); const double Pe = Env.GetIfArgPrefixFlt("-p:", 0.1, "Edge sampling rate"); const double alpha = Env.GetIfArgPrefixFlt("-alpha:", 0.0001, "alpha"); const bool TrimTail = Env.GetIfArgPrefixBool("-t:", false, "Trim tail"); if (Env.IsEndOfRun()) return 0; TExeTm2 tm; ExamMgr ExM; ExM.SetActionGraph(GFNm).SetW(W).SetPEdge(Pe).SetRepeat(Rpt).SetCPU(CPU).IsTrimTail(TrimTail).SetBoundW(BW).SetAlpha(alpha); em_multi(ExM); printf("Cost time: %s.\n", tm.GetStr()); return 0; }