Beispiel #1
0
// generates Kronecker model using configuration model of small model network
// and compare it to big network
void KroneckerBySample(vector<TStr> CommandLineArgs){
    Try
        Env = TEnv(CommandLineArgs[KRONTEST], TNotify::NullNotify);
    // generation of big model and its Kronecker product is required
    const TStr Gen = Env.GetIfArgPrefixStr("-gen:", "model+kron", "Generation of sample or/and its Kronecker product (model, kron, model+kron)");
    // plot type
    const TStr Plt = Env.GetIfArgPrefixStr("-plt:", "all", "Type of plots (cum, noncum, all)");
    // time estimates file name
    const TStr StatFile = Env.GetIfArgPrefixStr("-ot:", "stat.tab", "Name of output file with statistics");

    TFile = OpenFile(StatFile.CStr());
    CheckParams(Gen, Plt);
    
    PyInit("PySettings.txt");
      

    if (Gen != "none")
    {
        vector <TStr> Parameters;
        GetParameters(CommandLineArgs, Parameters);
        // name is empty
        Parameters.push_back("");
        GetGraphs(Parameters, Gen, Plt);
    }
    

    Py_Finalize();

    Catch
}
Beispiel #2
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);

  // command line parameters
  Env.PrepArgs("Web-Crawling into Text-Base");
  TStr InWebFilterFNm=Env.GetIfArgPrefixStr("-i:", "", "Input-Web-Filter-File");
  TStr OutTBsFNm=Env.GetIfArgPrefixStr("-o:", "", "Output-TextBase-FileName");
  TStr OutLogFNm=Env.GetIfArgPrefixStr("-olog:", "Crawl2TBs.Log", "Output-Log-FileName");
  bool IndexTxtBsP=Env.GetIfArgPrefixBool("-index:", false, "Create-TextBase-Index");
  if (Env.IsEndOfRun()){return 0;}

  // get text-base names
  if (OutTBsFNm.Empty()){
    OutTBsFNm=InWebFilterFNm.GetFPath()+InWebFilterFNm.GetFMid();}
  TStr TxtBsNm=OutTBsFNm.GetFBase();
  TStr TxtBsFPath=OutTBsFNm.GetFPath();

  // create web-text-base
  PWebTxtBs WebTxtBs=TWebTxtBs::New(TNotify::StdNotify);
  WebTxtBs->PutNotify(TWebTxtBsNotify::New(WebTxtBs, OutLogFNm));

  // create text-base
  WebTxtBs->TxtBsNew(TxtBsNm, TxtBsFPath);
  // start fetching
  WebTxtBs->FetchStart(TxtBsNm, TxtBsFPath, InWebFilterFNm, IndexTxtBsP);

  // message loop
  TSysMsg::Loop();

  return 0;
  Catch;
  return 1;
}
Beispiel #3
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("ragm. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix");
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input edgelist file name");
  const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) ");
  int OptComs = Env.GetIfArgPrefixInt("-c:", -1, "The number of communities to detect (-1: detect automatically)");
  const int MinComs = Env.GetIfArgPrefixInt("-mc:", 5, "Minimum number of communities to try");
  const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 100, "Maximum number of communities to try");
  const int DivComs = Env.GetIfArgPrefixInt("-nc:", 10, "How many trials for the number of communities");
  const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 1, "Number of threads for parallelization");
  const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.3, "Alpha for backtracking line search");
  const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search");

  PUNGraph G;
  TIntStrH NIDNameH;
  if (InFNm.IsStrIn(".ungraph")) {
    TFIn GFIn(InFNm);
    G = TUNGraph::Load(GFIn);
  } else {
    G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NIDNameH);
  }
  if (LabelFNm.Len() > 0) {
    TSsParser Ss(LabelFNm, ssfTabSep);
    while (Ss.Next()) {
      if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); }
    }
  }
  else {
    
  }
  printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges());
  
  TVec<TIntV> EstCmtyVV;
  TExeTm RunTm;
  TAGMFast RAGM(G, 10, 10);
  
  if (OptComs == -1) {
    printf("finding number of communities\n");
    OptComs = RAGM.FindComsByCV(NumThreads, MaxComs, MinComs, DivComs, OutFPrx, StepAlpha, StepBeta);
  }

  RAGM.NeighborComInit(OptComs);
  if (NumThreads == 1 || G->GetEdges() < 1000) {
    RAGM.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta);
  } else {
    RAGM.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta);
  }
  RAGM.GetCmtyVV(EstCmtyVV);
   TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH);
  TAGMUtil::SaveGephi(OutFPrx + "graph.gexf", G, EstCmtyVV, 1.5, 1.5, NIDNameH);

  Catch

  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());

  return 0;
}
int main(int argc, char* argv[]) {
    Env = TEnv(argc, argv, TNotify::StdNotify);
    Env.PrepArgs(TStr::Fmt("\nGenerate stochastic block model networks. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
    TExeTm ExeTm;
    Try
    
    const int noNodes = Env.GetIfArgPrefixInt("-n:", 512, "Number of nodes in synthetic graph (default: 512)\n");
    const double pIn = Env.GetIfArgPrefixFlt("-pIn:", 0.8, "pIn (default: 0.8)\n");
    const double pOut = Env.GetIfArgPrefixFlt("-pOut:", 0.2, "pOut (default: 0.2)\n");
    const int noCommunities = Env.GetIfArgPrefixInt("-k:", 2, "Number of communities in graph (default: 2)\n");
    
    TGraphAlgo graphAlgo;
    
    graphAlgo.generateNetwork(noNodes, noCommunities, pIn, pOut);
    
    TStr networkFilename = TStr("test-network-sbm.txt");
    TStr networkAdjacencyMatrixFilename = TStr("test-network-sbm-adjacency-matrix.txt");
    TStr networkGexfFilename = TStr("test-network-sbm.gexf");
    TStr networkLouvainFormatFilename = TStr("test-network-sbm-louvain.txt");
    TStr louvainTreeFilename = TStr("test-network-sbm-louvain.tree");
    TStr communityLabelsFilename = TStr("test-network-sbm-assignments.txt");
    
    graphAlgo.saveGroundTruth(networkFilename);
    graphAlgo.saveGroundTruthAdjacencyMatrix(networkAdjacencyMatrixFilename);
    graphAlgo.saveGroundTruthGexf(networkGexfFilename);
    graphAlgo.convertGroundTruthToLouvainFormat(networkLouvainFormatFilename);
    graphAlgo.saveCommunityLabels(communityLabelsFilename, noNodes, noCommunities);
    
    Catch
    printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
    
    return 0;
}
Beispiel #5
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);

  // get command line parameters
  Env.PrepArgs("Text-Base Dump");
  TStr InTBsFNm=Env.GetIfArgPrefixStr("-i:", "", "Input-TextBase-FileName");
  TStr OutDmpFNm=Env.GetIfArgPrefixStr("-o:", "TxtBs.Dmp", "Output-Dump-FileName");
  if (Env.IsEndOfRun()){return 0;}

  // load text-base
  TStr TxtBsNm=InTBsFNm.GetFBase();
  TStr TxtBsFPath=InTBsFNm.GetFPath();
  printf("Loading Text-Base '%s' at '%s' ...", TxtBsNm.CStr(), TxtBsFPath.CStr());
  PTxtBs TxtBs=TTxtBs::New(TxtBsNm, TxtBsFPath, faRdOnly);
  printf(" Done.\n");

  // dump text-base
  printf("Dumping to '%s' ...", OutDmpFNm.CStr());
  TxtBs->SaveTxt(OutDmpFNm);
  printf(" Done.\n");

  return 0;
  Catch;
  return 1;
}
Beispiel #6
0
int main(int argc, char* argv[]) {

  // code needed for inputing parameters
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Network diversity. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

  TExeTm ExeTm; // for measuring execution time
  Try
  const TStr InFNmGraph = Env.GetIfArgPrefixStr("-i:", "artificial_intelligence_pub.txt", "Input graph (undirected graph)");
  const TStr InFNmCat = Env.GetIfArgPrefixStr("-c:", "artificial_intelligence_cat_pub.txt", "Categories");
  const TStr InFNmMat = Env.GetIfArgPrefixStr("-m:", "sciences.txt", "Matrix");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "diversity.txt", "Output file");
  const int DivAlg = Env.GetIfArgPrefixInt("-a:", 1, "Measure: 1:Stirling");
  const int Alpha = Env.GetIfArgPrefixInt("-alp:", 1, "alpha");
  const int Beta = Env.GetIfArgPrefixInt("-bet:", 1, "beta");
  const int Gamma = Env.GetIfArgPrefixInt("-gam:", 1, "gama");
  // defining graph
  PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNmGraph, false);

  double D = 0.0;
  TStr DivAlgStr;

  // based on input parametr -a (variable DivAlg), diversity measure is choosen
  if (DivAlg == 1) {
    DivAlgStr = "Stirling";
	D = TSnap::StirlingIndex(Graph,InFNmCat,InFNmMat, Alpha, Beta, Gamma);}
  else { Fail; }

  printf("\nDiversity: %f\nrun time: %s (%s)\n", D,ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); //print execution time
  
  Catch

  return 0;
}
Beispiel #7
0
void ParseArgs(int& argc, char* argv[], TStr& InFile, TStr& OutFile,
 int& Dimensions, int& WalkLen, int& NumWalks, int& WinSize, int& Iter,
 bool& Verbose, double& ParamP, double& ParamQ, bool& Directed, bool& Weighted,
 bool& OutputWalks) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("\nAn algorithmic framework for representational learning on graphs."));
  InFile = Env.GetIfArgPrefixStr("-i:", "graph/karate.edgelist",
   "Input graph path");
  OutFile = Env.GetIfArgPrefixStr("-o:", "emb/karate.emb",
   "Output graph path");
  Dimensions = Env.GetIfArgPrefixInt("-d:", 128,
   "Number of dimensions. Default is 128");
  WalkLen = Env.GetIfArgPrefixInt("-l:", 80,
   "Length of walk per source. Default is 80");
  NumWalks = Env.GetIfArgPrefixInt("-r:", 10,
   "Number of walks per source. Default is 10");
  WinSize = Env.GetIfArgPrefixInt("-k:", 10,
   "Context size for optimization. Default is 10");
  Iter = Env.GetIfArgPrefixInt("-e:", 1,
   "Number of epochs in SGD. Default is 1");
  ParamP = Env.GetIfArgPrefixFlt("-p:", 1,
   "Return hyperparameter. Default is 1");
  ParamQ = Env.GetIfArgPrefixFlt("-q:", 1,
   "Inout hyperparameter. Default is 1");
  Verbose = Env.IsArgStr("-v", "Verbose output.");
  Directed = Env.IsArgStr("-dr", "Graph is directed.");
  Weighted = Env.IsArgStr("-w", "Graph is weighted.");
  OutputWalks = Env.IsArgStr("-ow", "Output random walks instead of embeddings.");
}
Beispiel #8
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);

  // get command line parameters
  Env.PrepArgs("Text-Base Server");
  TStr InTBsFNm=Env.GetIfArgPrefixStr("-i:", "", "Input-TextBase-FileName");
  int SrvPortN=Env.GetIfArgPrefixInt("-port:", 8888, "Server-Port");
  if (Env.IsEndOfRun()){return 0;}

  // notification
  PNotify Notify=TNotify::StdNotify;

  // load text-base
  TStr TxtBsNm=InTBsFNm.GetFBase();
  TStr TxtBsFPath=InTBsFNm.GetFPath();
  printf("Loading Web-Base...\r");
  PWebTxtBs WebTxtBs=TWebTxtBs::New(Notify);
  WebTxtBs->TxtBsOpenForRdOnly(TxtBsNm, TxtBsFPath);

  // create & activate server
  PWebSrv WebSrv=TWebTxtBsSrv::New(WebTxtBs, SrvPortN, Notify);

  // message loop
  TSysMsg::Loop();

  return 0;
  Catch;
  return 1;
}
Beispiel #9
0
// get model graph according to args
void GetModel(const TStr& Args, PNGraph& G){
	Env = TEnv(Args, TNotify::NullNotify);
	const TStr Gen = Env.GetIfArgPrefixStr("-g:", "gen", "How to get model graph: read, gen, deg, genpy");
	const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "Input graph file (single directed edge per line)");
	
	TExeTm execTime;
	if (Gen == "gen")
		BasicGraphGen(Args, G);
	else if (Gen == "read")
		ReadPNGraphFromFile(InFNm, G);
	else if (Gen == "genpy")
	{
		PUNGraph GU;
		GenPy(GU, TFile, Args);	
		G = TSnap::ConvertGraph<PNGraph>(GU);
	}
	TFile << "Time of getting model: " <<  execTime.GetTmStr() << endl;
	/*TFile << "Model graph: " << G->GetNodes() << " nodes, " << G->GetEdges() << " edges\n";
	TIntV DegV;
	TSnap::GetDegSeqV(G, DegV);
	execTime.Tick();
	PUNGraph Conf = TSnap::GenConfModel(DegV);
	TFile << "Time of getting configuration model: " <<  execTime.GetTmStr() << endl;
	cout << "Undirected configuration model: " << Conf->GetNodes() << " nodes, " << Conf->GetEdges() << " edges\n";
	PNGraph ConfD = TSnap::ConvertGraph<PNGraph>(Conf);
	SaveAndPlot(ConfD, "conf", false);
	TFile << "Clustering coefficient of configuration model: " << TSnap::GetClustCf(ConfD) << endl;
	TSnap::PlotClustCf(ConfD,"conf");*/
}
Beispiel #10
0
bool CmdArgs::IsDir() const {
	Env = TEnv(CommandLineArgs[KRONGEN], TNotify::NullNotify);
	TStr IsDir = Env.GetIfArgPrefixStr("-isdir:", "false", "Produce directed graph (true, false)");
	if (IsDir == "true")
		return true;
	return false;
}
Beispiel #11
0
int GenPy(PUNGraph &res, ofstream& TFile, const TStr& parameters)
{
	Env = TEnv(parameters, TNotify::StdNotify);
	TStr mN = Env.GetIfArgPrefixStr("-module:", "random_graphs", "Module name");
	TStr fN = Env.GetIfArgPrefixStr("-func:", "fast_gnp_random_graph", "Function name");
	
	PyObject **G = new PyObject*[1];
		
	char *moduleName = mN.CStr();
	char *funcName = fN.CStr();
	AddFuncInfo();
	TStrV args, argTypes;
	if (!ParseArgs(funcName, parameters, args, argTypes))
	{
		printf("Fail to parse arguments for NetworkX generation...\n");
		return 0;
	};
	TExeTm execTime;
	if (!CallPyFunction(moduleName, funcName, args, argTypes, G))
	{
		cout << "CallPyFunction() raised error. Execution terminated.\n";
		system("pause");
		exit(1);
	};
	
	TFile << "Time of generation of graph by NetworkX: " << execTime.GetTmStr() << endl; 

	execTime.Tick();
	PyObject*** nodes = new PyObject**[1];
	GetNodes(G, nodes);
	int nodesCount = PyList_Size(*(nodes[0]));
	//printf("nodesCount = %d, ", nodesCount);
	res = PUNGraph::TObj::New();
    res->Reserve(nodesCount, nodesCount*nodesCount);
	for (size_t i = 0; i < nodesCount; i++)
		res->AddNode(i);
	Py_DECREF(nodes);

	PyObject*** edges = new PyObject**[1];
	GetEdges(G, edges);
	int edgesCount = PyList_Size(*(edges[0]));
	//printf("edgesCount = %d\n", edgesCount);
	for (size_t i = 0; i < edgesCount; i++)
	{
		PyObject* item = PySequence_Fast_GET_ITEM(*(edges[0]), i);
		int v1, v2;
		PyObject* node = PySequence_Fast_GET_ITEM(item,0);
		v1 = PyLong_AsLong(node);
		node = PySequence_Fast_GET_ITEM(item,1);
		v2 = PyLong_AsLong(node);
		res->AddEdge(v1,v2);
	}
	TFile << "Time of copying of graph from NetworkX representation: " << execTime.GetTmStr() << endl; 
	Py_DECREF(G);
	Py_DECREF(edges);
	//Py_Finalize(); // очищение памяти, отданной интерпретатору
	
	return 0;
}
Beispiel #12
0
void ReadPNGraphFromFile(const TStr args, PNGraph& G){
    Try
        Env = TEnv(args, TNotify::StdNotify);
    const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input graph file (single directed edge per line)");
    // load graph
    G = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1);
    Catch
}
Beispiel #13
0
int main(int argc, char* argv[]) {
  
  setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?)
   
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Backbone extractor (Vespignani). build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  
  TExeTm ExeTm;
  
  Try
  
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (alpha value and filename extensions added)");
  const float alpha = Env.GetIfArgPrefixFlt("-a:", 0.01, "alpha significance level threshold");

  // Load graph and create directed and undirected graphs (pointer to the same memory)
  printf("\nLoading %s...", InFNm.CStr());
  PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm);
  printf(" DONE\n");
  printf("  nodes: %d\n", WGraph->GetNodes());
  printf("  edges: %d\n", WGraph->GetEdges());
  printf("  time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());

  // Declare variables
  TIntFltH OutWDegH, InWDegH;
  TIntIntH OutDegH, InDegH;

  // Get degrees, weighted and unweighted
  printf("\nGetting initial distribution of binary in / out degrees...");
  TSnap::GetInDegH(WGraph, InDegH);
  TSnap::GetOutDegH(WGraph, OutDegH);
  printf(" DONE (time elapsed: %s (%s))", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());

  printf("\nGetting initial distribution of weighted in / out degrees...");
  TSnap::GetWInDegH(WGraph, InWDegH);
  TSnap::GetWOutDegH(WGraph, OutWDegH);
  printf(" DONE (time elapsed: %s (%s))", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());

  // Apply the disparity filter Vespignani method
  printf("\nApplying the disparity filter vespignani method...");
  TSnap::FilterEdgesVespignani(WGraph, InWDegH, OutWDegH, InDegH, OutDegH, alpha);
  printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  printf("\nPruned graph:");
  printf("\n  nodes: %d", WGraph->GetNodes());
  printf("\n  edges: %d\n", WGraph->GetEdges());

  // OUTPUTTING 
  printf("\nSaving %s-%f.snap...", OutFNm.CStr(), alpha);
  TSnap::SaveFltWEdgeList(WGraph, TStr::Fmt("%s-%f.snap", OutFNm.CStr(), alpha), "");
  printf(" DONE\n");

  Catch
  
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
  
}
int main(int argc, char* argv[])
{
	TExeTm ExeTm;
	try
	{
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nPlotting Scatter For Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

//		THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");
//		THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");
		THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");     // CascadesFullUrlsOnTwitterData_FINALFILTERED
		THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");    // CascadesOnTwitterData_FINALFILTERED

		THash< TUInt , TSecTmV > full_twitterUrls = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED.rar");
		THash< TUInt , TSecTmV > full_twitterContents = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesOnTwitterData_FINALFILTERED.rar");


		// Scatter plot
		plotScatterLengthOfEachCascade(twitterUrls,twitterContents);

		// Percentage computation
		double cnt = 0;
		for(int i=0;i<full_twitterUrls.Len();i++)
		{
			if(full_twitterContents.GetKeyId(full_twitterUrls.GetKey(i)) != -1)
			{
				cnt++;
			}
		}
		cnt /= full_twitterUrls.Len();    // twitterUrls.Len() / full_twitterUrls.Len()
		printf("The percentage of Urls of quotes which have contents as well: %f\n", 100 * cnt);

		cnt = 0;
		for(int i=0;i<full_twitterContents.Len();i++)
		{
			if(full_twitterUrls.GetKeyId(full_twitterContents.GetKey(i)) != -1)
			{
				cnt++;
			}
		}
		cnt /= full_twitterContents.Len();
		printf("The percentage of Contents of quotes which have urls as well: %f\n", 100 * cnt);

		printf("\nScatter Plot had been drawn successfully.");
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
Beispiel #15
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);

  // get command line parameters
  Env.PrepArgs("DMoz-Topic To Text", -1);
  TStr InFPath=Env.GetIfArgPrefixStr("-i:", "", "Input-File-Path");
  TStr OutFPath=Env.GetIfArgPrefixStr("-o:", "", "Output-File-Path");
  TStr RootCatNm=Env.GetIfArgPrefixStr("-c:", "Top/Science", "Root-Category-Name");
  if (Env.IsEndOfRun()){return 0;}

  // load DMoz-Base
  PDMozBs DMozBs=TDMozBs::LoadBin(TDMozInfo::BinFullFBase, InFPath);

  // assign root category name
  //RootCatNm="Top/Computers/Software/Databases/Data_Mining";
  //RootCatNm="Top/Reference/Knowledge_Management/Knowledge_Discovery";
  //RootCatNm="Top/Computers/Artificial_Intelligence/Machine_Learning";
  //RootCatNm="Top/Computers/Artificial_Intelligence";
  //RootCatNm="Top/Recreation/Travel";

  // get root category-id
  int RootCatId=DMozBs->GetCatId(RootCatNm);
  // prepare external-url list
  TStr RootFBase=TStr::GetFNmStr(RootCatNm, true);
  TStr ExtUrlFNm=TStr::GetNrFPath(OutFPath)+RootFBase+"_ExternalUrlList.Txt";
  TFOut ExtUrlSOut(ExtUrlFNm); FILE* fExtUrlOut=ExtUrlSOut.GetFileId();
  // get topic categories
  TIntV TopicCatIdV; DMozBs->GetSubCatIdV(RootCatId, TopicCatIdV);
  for (int TopicCatIdN=0; TopicCatIdN<TopicCatIdV.Len(); TopicCatIdN++){
    // get topic id & name
    int TopicCatId=TopicCatIdV[TopicCatIdN];
    TStr TopicCatNm=DMozBs->GetCatNm(TopicCatId);
    // get subtopic subtrees and corresponding external-url-ids
    TIntV SubCatIdV; TIntV CatIdV;
    //DMozBs->GetSubTreeCatIdV(TopicCatId, SubCatIdV, CatIdV, true);
    TIntV ExtUrlIdV; DMozBs->GetExtUrlIdV(CatIdV, ExtUrlIdV);
    // output url/titles/descriptions
    TStr TopicFBase=TStr::GetFNmStr(TopicCatNm, true);
    TStr TopicFNm=TStr::GetNrFPath(OutFPath)+TopicFBase+".Txt";
    printf("Saving %s\n", TopicFNm.CStr());
    TFOut TopicSOut(TopicFNm); FILE* fTopicOut=TopicSOut.GetFileId();
    for (int ExtUrlIdN=0; ExtUrlIdN<ExtUrlIdV.Len(); ExtUrlIdN++){
      int ExtUrlId=ExtUrlIdV[ExtUrlIdN];
      TStr UrlStr=DMozBs->GetExtUrlStr(ExtUrlId);
      TStr TitleStr=DMozBs->GetExtUrlTitleStr(ExtUrlId);
      TStr DescStr=DMozBs->GetExtUrlDescStr(ExtUrlId);
      fprintf(fExtUrlOut, "%s\n", UrlStr.CStr());
      fprintf(fTopicOut, "%s - %s\n", TitleStr.CStr(), DescStr.CStr());
    }
  }
  return 0;
  Catch;
  return 1;
}
Beispiel #16
0
void GetParameters(const vector<TStr>& CommandLineArgs, vector<TStr>& Parameters){
    Env = TEnv(CommandLineArgs[KRONTEST], TNotify::NullNotify);
    // to plot
    const TStr Plt = Env.GetIfArgPrefixStr("-plttype:", "model+kron", "Plotting of big model and/or its Kronecker product (model, kron, model+kron)");
    // type of plots
    const TStr PltType = Env.GetIfArgPrefixStr("-plt:", "all", "Type of plots (cum, noncum, all)");
    
    for (size_t i = 0; i < CommandLineArgs.size(); i++)
        Parameters.push_back(CommandLineArgs[i]);
    Parameters.push_back(Plt); Parameters.push_back(PltType); 
}
Beispiel #17
0
int main(int argc, char* argv[])
{
//	TFltPrV v;
//	v.Add(TFltPr(1,4));
//	v.Add(TFltPr(5,5));
//	v.Add(TFltPr(9,11));
//	v.Add(TFltPr(20,8));
//	v.Add(TFltPr(21,30));
//	cout << "C: " << Tools::computeCorrelation(v,Pearson) << endl;
//	return 0;


	TExeTm ExeTm;
	try
	{
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nPlotting Individually Memes-Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

		// URLS
		THash< TStr , CascadeElementV > quotes = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar");    // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4URLS
		THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");     // CascadesFullUrlsOnTwitterData_FINALFILTERED

		// CONTENTS
		//THash< TStr , CascadeElementV > quotes2 = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar");    // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4Contents
		THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");    // CascadesOnTwitterData_FINALFILTERED

		// Plotting
		THash< TUInt , TSecTmV > twitterTotal;
		for(int i=0;i<twitterContents.Len();i++)
		{
			TSecTmV tmp;
			tmp.AddV(twitterContents[i]);
			tmp.AddV(twitterUrls[i]);
			twitterTotal.AddDat(i,tmp);
		}

		plotScatterLengthOfEachCascade(quotes,twitterUrls,"Urls");
		plotScatterLengthOfEachCascade(quotes,twitterContents,"Contents");
		plotScatterLengthOfEachCascade(quotes,twitterTotal,"Full");

		printf("\nPlots had been drawn successfully.");
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
Beispiel #18
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Motifs. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input directed graph file (single directed edge per line)");
  const int MotifSz = Env.GetIfArgPrefixInt("-m:", 3, "Motif size (has to be 3 or 4)");
  const bool DrawMotifs = Env.GetIfArgPrefixBool("-d:", true, "Draw motif shapes (requires GraphViz)");
  TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix");
  if (OutFNm.Empty()) { OutFNm = InFNm.GetFMid(); }
  EAssert(MotifSz==3 || MotifSz==4);

  // load graph
  PNGraph G;
  if (InFNm.GetFExt().GetLc()==".ungraph") {
    TFIn FIn(InFNm);  G=TSnap::ConvertGraph<PNGraph>(TUNGraph::Load(FIn), true); }
  else if (InFNm.GetFExt().GetLc()==".ngraph") {
    TFIn FIn(InFNm);  G=TNGraph::Load(FIn); }
  else {
    G = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1); }
  bool IsOk = true;
  for (int nid = 0; nid < G->GetNodes(); nid++) {
    if (! G->IsNode(nid)) { IsOk=false; break; } }
  if (! IsOk) {
    printf("Nodes of the input graph have to be numbered 0...N-1\nRenumbering nodes...\n"); 
    PNGraph OG = G; G = TNGraph::New();
    TGraphEnumUtils::GetNormalizedGraph(OG, G);
  }
  // G = TSnap::GenRndGnm<PNGraph>(100, Kilo(1));
  
  // count frequency of connected subgraphs in G that have MotifSz nodes
  TD34GraphCounter GraphCounter(MotifSz);
  TSubGraphEnum<TD34GraphCounter> GraphEnum;
  GraphEnum.GetSubGraphs(G, MotifSz, GraphCounter);
  FILE *F = fopen(TStr::Fmt("%s-counts.tab", OutFNm.CStr()).CStr(), "wt");
  fprintf(F, "MotifId\tNodes\tEdges\tCount\n");
  for (int i = 0; i < GraphCounter.Len(); i++) {
    const int gid = GraphCounter.GetId(i);
    PNGraph SG = GraphCounter.GetGraph(gid);
    if (DrawMotifs) {
      TGraphViz::Plot(SG, gvlNeato, TStr::Fmt("%s-motif%03d.gif", OutFNm.CStr(), i), 
        TStr::Fmt("GId:%d  Count: %llu", gid, GraphCounter.GetCnt(gid)));
    }
    fprintf(F, "%d\t%d\t%d\t%llu\n", gid, SG->GetNodes(), SG->GetEdges(), GraphCounter.GetCnt(gid));
  }
  printf("done.");
  fclose(F); 
  
  Catch
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
}
Beispiel #19
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Node Centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input un/directed graph");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "node_centrality.tab", "Output file");
  printf("Loading %s...", InFNm.CStr());
  PNGraph Graph = TSnap::LoadEdgeList<PNGraph>(InFNm);
  //PNGraph Graph = TSnap::GenRndGnm<PNGraph>(10, 10);
  //TGraphViz::Plot(Graph, gvlNeato, InFNm+".gif", InFNm, true);
  printf("nodes:%d  edges:%d\n", Graph->GetNodes(), Graph->GetEdges());
  PUNGraph UGraph = TSnap::ConvertGraph<PUNGraph>(Graph); // undirected version of the graph
  TIntFltH BtwH, EigH, PRankH, CcfH, CloseH, HubH, AuthH;
  //printf("Computing...\n");
  printf("Treat graph as DIRECTED: ");
  printf(" PageRank... ");             TSnap::GetPageRank(Graph, PRankH, 0.85);
  printf(" Hubs&Authorities...");      TSnap::GetHits(Graph, HubH, AuthH);
  printf("\nTreat graph as UNDIRECTED: ");
  printf(" Eigenvector...");           TSnap::GetEigenVectorCentr(UGraph, EigH);
  printf(" Clustering...");            TSnap::GetNodeClustCf(UGraph, CcfH);
  printf(" Betweenness (SLOW!)...");   TSnap::GetBetweennessCentr(UGraph, BtwH, 1.0);
  printf(" Constraint (SLOW!)...");    TNetConstraint<PUNGraph> NetC(UGraph, true);
  printf(" Closeness (SLOW!)...");
  for (TUNGraph::TNodeI NI = UGraph->BegNI(); NI < UGraph->EndNI(); NI++) {
    const int NId = NI.GetId();
    CloseH.AddDat(NId, TSnap::GetClosenessCentr<PUNGraph>(UGraph, NId, false));
  }
  printf("\nDONE! saving...");
  FILE *F = fopen(OutFNm.CStr(), "wt");
  fprintf(F,"#Network: %s\n", InFNm.CStr());
  fprintf(F,"#Nodes: %d\tEdges: %d\n", Graph->GetNodes(), Graph->GetEdges());
  fprintf(F,"#NodeId\tDegree\tCloseness\tBetweennes\tEigenVector\tNetworkConstraint\tClusteringCoefficient\tPageRank\tHubScore\tAuthorityScore\n");
  for (TUNGraph::TNodeI NI = UGraph->BegNI(); NI < UGraph->EndNI(); NI++) {
    const int NId = NI.GetId();
    const double DegCentr = UGraph->GetNI(NId).GetDeg();
    const double CloCentr = CloseH.GetDat(NId);
    const double BtwCentr = BtwH.GetDat(NId);
    const double EigCentr = EigH.GetDat(NId);
    const double Constraint = NetC.GetNodeC(NId);
    const double ClustCf = CcfH.GetDat(NId);
    const double PgrCentr = PRankH.GetDat(NId);
    const double HubCentr = HubH.GetDat(NId);
    const double AuthCentr = AuthH.GetDat(NId);
    fprintf(F, "%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", NId, 
      DegCentr, CloCentr, BtwCentr, EigCentr, Constraint, ClustCf, PgrCentr, HubCentr, AuthCentr);
  }
  fclose(F);
  Catch
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
}
Beispiel #20
0
void GetGraphs(const vector <TStr>& Parameters, const TStr& ModelGen, const TStr&ModelPlt)
{
    PNGraph G;
    size_t PSize = Parameters.size();
    if (GRAPHGEN >= PSize || MTXGEN >= PSize || KRONGEN >= PSize || KRONFIT >= PSize)
        Error("GetGraphs", "Wrong index in array of parameters");

    GetModel(Parameters[GRAPHGEN], G);

    if (G->GetNodes() == 0)
        Error("GetGraphs", "Empty graph");

    TFltPrV MDegIn, MDegOut;
    TSnap::GetInDegCnt(G, MDegIn);
    TSnap::GetOutDegCnt(G, MDegOut);

    PlotDegrees(Parameters, MDegIn, MDegOut, "model");
    TFile << "Model nodes: " << G->GetNodes() << ", model edges: " << G->GetEdges() << endl;
    TFile << "Maximum output degree in model graph: " << MDegOut[MDegOut.Len()-1].GetVal1() << endl;
    TFile << "Maximum input degree in model graph: " << MDegIn[MDegIn.Len()-1].GetVal1() << endl;


    if (ModelGen == "model+kron"){
        // generate (or read) Kronecker initiator matrix
        TKronMtx FitMtxM;
        if (!GetMtx(Parameters[MTXGEN], FitMtxM))
            GenNewMtx(G, Parameters[KRONFIT], FitMtxM);
        PrintMtx(FitMtxM, TFile);
        TFile << "Scaling for the number of edges... " << endl;
        FitMtxM.SetForEdges(G->GetNodes(), G->GetEdges());

        int ModelNodes = G->GetNodes(), ModelEdges = G->GetEdges();

        Env = TEnv(Parameters[KRONGEN], TNotify::NullNotify);
        TStr IsDir = Env.GetIfArgPrefixStr("-isdir:", "false", "Produce directed graph (true, false)");
        const TInt NIter = Env.GetIfArgPrefixInt("-i:", 1, "Number of iterations of Kronecker product");
        
        if (pow(FitMtxM.GetDim(), static_cast<double>(NIter)) != ModelNodes)
            Error("GetGraphs", "Inconsistent value of -i: parameter, KronNodes != ModelNodes");
              

        // in and out average degrees of Kronecker graphs
        TFltPrV KronDegAvgIn, KronDegAvgOut;

       
        GenKron(Parameters[KRONGEN], FitMtxM, KronDegAvgIn, KronDegAvgOut);

        PlotDegrees(Parameters, KronDegAvgIn, KronDegAvgOut, "kron");

    }
}
Beispiel #21
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Rolx. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (one edge per line, tab/space separated)");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "roles.txt", "Output file name prefix");
  const int MinRoles = Env.GetIfArgPrefixInt("-l:", 2, "Lower bound of the number of roles");
  const int MaxRoles = Env.GetIfArgPrefixInt("-u:", 3, "Upper bound of the number of roles");
  double Threshold = 1e-6;
  if (MinRoles > MaxRoles || MinRoles < 2) {
    printf("min roles and max roles should be integer and\n");
    printf("2 <= min roles <= max roles\n");
    exit(EXIT_SUCCESS);
  }
  printf("loading file...\n");
  PNGraph Graph = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1);
  printf("extracting features...\n");
  TIntFtrH Features = ExtractFeatures(Graph);
  TIntIntH NodeIdMtxIdH = CreateNodeIdMtxIdxHash(Features);
  TFltVV V = ConvertFeatureToMatrix(Features, NodeIdMtxIdH);
  //printf("saving features...\n");
  //FPrintMatrix(V, "v.txt");
  printf("feature matrix is saved in v.txt\n");
  TFlt MnError = TFlt::Mx;
  TFltVV FinalG, FinalF;
  int NumRoles = -1;
  for (int r = MinRoles; r <= MaxRoles; ++r) {
    TFltVV G, F;
    printf("factorizing for %d roles...\n", r);
    CalcNonNegativeFactorization(V, r, G, F, Threshold);
    //FPrintMatrix(G, "g.txt");
    //FPrintMatrix(F, "f.txt");
    TFlt Error = CalcDescriptionLength(V, G, F);
    if (Error < MnError) {
      MnError = Error;
      FinalG = G;
      FinalF = F;
      NumRoles = r;
    }
  }
  //FPrintMatrix(FinalG, "final_g.txt");
  //FPrintMatrix(FinalF, "final_f.txt");
  printf("using %d roles, min error: %f\n", NumRoles, MnError());
  TIntIntH Roles = FindRoles(FinalG, NodeIdMtxIdH);
  FPrintRoles(Roles, OutFNm);
  //PlotRoles(Graph, Roles);
  Catch
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
}
Beispiel #22
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Network community detection. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (undirected graph)");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "communities.txt", "Output file");
  const int CmtyAlg = Env.GetIfArgPrefixInt("-a:", 2, "Algorithm: 1:Girvan-Newman, 2:Clauset-Newman-Moore, 3:Infomap");

  PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNm, false);
  //PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>("../as20graph.txt", false);
  //PUNGraph Graph = TSnap::GenRndGnm<PUNGraph>(5000, 10000); // generate a random graph

  TSnap::DelSelfEdges(Graph);
  TCnComV CmtyV;
  double Q = 0.0;
  TStr CmtyAlgStr;
  if (CmtyAlg == 1) {
    CmtyAlgStr = "Girvan-Newman";
    Q = TSnap::CommunityGirvanNewman(Graph, CmtyV); }
  else if (CmtyAlg == 2) {
    CmtyAlgStr = "Clauset-Newman-Moore";
    Q = TSnap::CommunityCNM(Graph, CmtyV); }
  else if (CmtyAlg == 3) {
    CmtyAlgStr = "Infomap";
    Q = TSnap::Infomap(Graph, CmtyV); }
  else { Fail; }

  FILE *F = fopen(OutFNm.CStr(), "wt");
  fprintf(F, "# Input: %s\n", InFNm.CStr());
  fprintf(F, "# Nodes: %d    Edges: %d\n", Graph->GetNodes(), Graph->GetEdges());
  fprintf(F, "# Algoritm: %s\n", CmtyAlgStr.CStr());
  if (CmtyAlg!=3) {
    fprintf(F, "# Modularity: %f\n", Q);
  } else {
    fprintf(F, "# Average code length: %f\n", Q);
  }
  fprintf(F, "# Communities: %d\n", CmtyV.Len());
  fprintf(F, "# NId\tCommunityId\n");
  for (int c = 0; c < CmtyV.Len(); c++) {
    for (int i = 0; i < CmtyV[c].Len(); i++) {
      fprintf(F, "%d\t%d\n", CmtyV[c][i].Val, c);
    }
  }
  fclose(F);

  Catch
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
}
Beispiel #23
0
// read or get random mtx
bool GetMtx(const TStr& MtxArgs, TKronMtx& FitMtxModel){
    Env = TEnv(MtxArgs, TNotify::StdNotify);
    // how to generate initiator matrix
    const TStr Mtx = Env.GetIfArgPrefixStr("-m:", "random", "Init Kronecker matrix");
    // if matrix will be generated, its size is an argument of KRONFIT cmd line
    const TInt MtxSize = Env.GetIfArgPrefixInt("-rs:", 2, "Size of randomized Kronecker matrix");
    // get Kronecker init matrix
    if (Mtx == "create") return false;
    if (Mtx == "random")
        GenRandomMtx(MtxSize, FitMtxModel);
    else 
        ReadMtx(Mtx, MtxSize, FitMtxModel);
    return true;
}
Beispiel #24
0
void GenKron(const TStr& Args, TKronMtx& FitMtx, TFltPrV& KronDegAvgIn, TFltPrV& KronDegAvgOut){
	Env = TEnv(Args, TNotify::NullNotify);
	TExeTm ExecTime;
	// number of Kronecker graphs to generate
	const TInt NKron = Env.GetIfArgPrefixInt("-n:", 1, "Number of generated Kronecker graphs");
	// iterations of Kronecker product
	const TInt NIter = Env.GetIfArgPrefixInt("-i:", 10, "Iterations of Kronecker product");
	// is graph directed?
	TStr IsDir = Env.GetIfArgPrefixStr("-isdir:", "false", "Produce directed graph (true, false)");
	
	TFlt ExpectedNodes = FitMtx.GetNodes(NIter), ExpectedEdges = FitMtx.GetEdges(NIter);
	
	TFile << "Kronecker nodes: " << ExpectedNodes << ", expected Kronecker edges: " << ExpectedEdges << endl;
		
	double Sec = 0.0;
	int AvgMaxOutDeg = 0, AvgMaxInDeg = 0, MinMaxOutDeg = 0, MaxMaxOutDeg = 0, MinMaxInDeg = 0, MaxMaxInDeg = 0;
   bool Dir = IsDir == "true" ? true : false;

	for (int i = 0; i < NKron; i++){
		ExecTime.Tick();
      PNGraph Kron = TKronMtx::GenFastKronecker(FitMtx, NIter, Dir, 0);
		Sec += ExecTime.GetSecs();
		printf("Calculating maximum degree...\n");
		int MaxOutDeg = GetMaxMinDeg(Kron, IsDir, "false", "true"), MaxInDeg = GetMaxMinDeg(Kron, IsDir, "true", "true");
		CompareDeg(i, MaxOutDeg, MinMaxOutDeg, MaxMaxOutDeg, AvgMaxOutDeg);
		CompareDeg(i, MaxInDeg, MinMaxInDeg, MaxMaxInDeg, AvgMaxInDeg);

		//printf("Nodes count: %d, nodes with non-zero degree %d, edges count %d\n max deg = %d\n", kron->GetNodes(), TSnap::CntNonZNodes(kron), kron->GetEdges(), MaxDeg);
		if (i == NKron - 1){
			//TFile << "Clustering coefficient: " << TSnap::GetClustCf(kron) << endl;
			//TSnap::PlotClustCf(kron,"kronSingle");
			//TSnap::PlotHops(kron, "kronSingle");
			
			TFile << "Maximum output degree in kron graph: " << "from " << MinMaxOutDeg << " to " << MaxMaxOutDeg << " (average: " << (double)AvgMaxOutDeg / (double)NKron << ")" << endl;
			TFile << "Maximum input degree in kron graph: " << "from " << MinMaxInDeg << " to " << MaxMaxInDeg << " (average: " << (double)AvgMaxInDeg / (double)NKron << ")" << endl;
		}
		AddDegreesStat(KronDegAvgIn, Kron, true);
		AddDegreesStat(KronDegAvgOut, Kron, false);
	}
	Sec /= NKron;

	
    GetAvgDegreeStat(KronDegAvgIn, NKron);
    GetAvgDegreeStat(KronDegAvgOut, NKron);
	
	KronDegAvgIn.Sort();
	KronDegAvgOut.Sort();
	TFile << "Average time of generation of Kronecker product: " <<  Sec << endl;
}
Beispiel #25
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Clique Percolation Method. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input undirected graph file (single directed edge per line)");
  const int OverlapSz = Env.GetIfArgPrefixInt("-k:", 2, "Min clique overlap");
  TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix");
  if (OutFNm.Empty()) { OutFNm = InFNm.GetFMid(); }

  PUNGraph G;
  if (InFNm == "DEMO") { // small demo graph
    G = TUNGraph::New();
    for (int i = 1; i < 8; i++) { G->AddNode(i); }
    G->AddEdge(1,2);
    G->AddEdge(2,3); G->AddEdge(2,4);
    G->AddEdge(3,4);
    G->AddEdge(4,5); G->AddEdge(4,7);
    G->AddEdge(5,6); G->AddEdge(5,7);
    G->AddEdge(6,7);
    // draw the small graph using GraphViz
    TSnap::DrawGViz(G, gvlNeato, "small_graph.png", "", true); 
  }
  // load graph
  else if (InFNm.GetFExt().GetLc()==".ungraph") {
    TFIn FIn(InFNm);  G=TUNGraph::Load(FIn); }
  else if (InFNm.GetFExt().GetLc()==".ngraph") {
    TFIn FIn(InFNm);  G=TSnap::ConvertGraph<PUNGraph>(TNGraph::Load(FIn), false); }
  else {
    G = TSnap::LoadEdgeList<PUNGraph>(InFNm, 0, 1); }
  // find communities
  TVec<TIntV> CmtyV;
  TCliqueOverlap::GetCPMCommunities(G, OverlapSz+1, CmtyV);
  // save result
  FILE *F = fopen(TStr::Fmt("cpm-%s.txt", OutFNm.CStr()).CStr(), "wt");
  fprintf(F, "# %d Overlapping Clique Percolation Communities (min clique overlap %d)\n", CmtyV.Len(), OverlapSz);
  fprintf(F, "# Each line contains nodes belonging to the same community community\n");
  for (int i = 0; i < CmtyV.Len(); i++) {
    fprintf(F, "%d", CmtyV[i][0].Val);
    for (int j = 1; j < CmtyV[i].Len(); j++) {
      fprintf(F, "\t%d", CmtyV[i][j].Val);
    }
    fprintf(F, "\n");
  }
  Catch
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
}
Beispiel #26
0
int main(int argc, char* argv[]) {
	Env = TEnv(argc, argv, TNotify::StdNotify);
	Env.PrepArgs(TStr::Fmt("agmgen. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
	TExeTm ExeTm;
	Try
	const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "DEMO", "Community affiliation data");
	const TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "agm", "out file name prefix");
	const int RndSeed = Env.GetIfArgPrefixInt("-rs:",10,"Rnd Seed");
	const double DensityCoef= Env.GetIfArgPrefixFlt("-a:",0.6,"Power-law Coefficient a of density (density ~ N^(-a)");
	const double ScaleCoef= Env.GetIfArgPrefixFlt("-c:",1.3,"Scaling Coefficient c of density (density ~ c");

	TRnd Rnd(RndSeed);
	TVec<TIntV> CmtyVV;
	if(InFNm=="DEMO") {
		CmtyVV.Gen(2);
		TIntV NIdV;
		for(int i=0;i<25;i++) {
			TIntV& CmtyV = CmtyVV[0];
			CmtyV.Add(i+1);
		}
		for(int i=15;i<40;i++) {
			TIntV& CmtyV = CmtyVV[1];
			CmtyV.Add(i+1);
		}
	}
	else {
		TVec<TIntV> CmtyVV;
	  TSsParser Ss(InFNm, ssfWhiteSep);
	  while (Ss.Next()) {
			if(Ss.GetFlds()>0) {
				TIntV CmtyV;
				for(int i=0;i<Ss.GetFlds();i++) {
					if(Ss.IsInt(i)){CmtyV.Add(Ss.GetInt(i));}
				}
				CmtyVV.Add(CmtyV);
			}
	  }
		printf("community loading completed (%d communities)\n",CmtyVV.Len());
	}
	PUNGraph AG = TAGM::GenAGM(CmtyVV,DensityCoef,ScaleCoef,Rnd);
	TSnap::SaveEdgeList(AG,OutFPrx + ".edgelist.txt");
	if(AG->GetNodes()<50) {
		TAGM::GVizComGraph(AG,CmtyVV,OutFPrx + ".graph.gif");
	}
	Catch
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
}
Beispiel #27
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);

  // command line parameters
  Env.PrepArgs("Ontology-Light To Ontology-Classifier", 0);
  TStr InOntoLightFNm=Env.GetIfArgPrefixStr("-iol:", "f:/Data/OntoLight/EuroVoc.OntoLight", "Input-OntoLight-FileName");
  TStr InBowFNm=Env.GetIfArgPrefixStr("-ibow:", "f:/Data/OntoLight/Acquis.Bow", "Input-BagOfWords-FileName");
  TStr OutOntoCfierFNm=Env.GetIfArgPrefixStr("-oom:", "f:/Data/OntoLight/EuroVoc.OntoCfier", "Output-OntoClassifier-FileName");
  TStr OutOntoCfierTxtFNm=Env.GetIfArgPrefixStr("-oom:", "f:/Data/OntoLight/EuroVoc.OntoCfier.Txt", "Output-OntoClassifier-Text-FileName");
  TStr LangNm=Env.GetIfArgPrefixStr("-lang:", "EN", "Language-Name");
  bool DocCatIsTermIdP=Env.GetIfArgPrefixBool("-catisid:", true, "DocumentCategory-Is-TermId");
  double CutWordWgtSumPrc=Env.GetIfArgPrefixFlt("-cwwprc:", 0.33, "Cut-Word-Weight-Sum-Percent");
  if (Env.IsEndOfRun()){return 0;}
/*  InOntoLightFNm="f:/Data/OntoLight/Asfa.OntoLight";
  InBowFNm="f:/Data/OntoLight/Asfa.Bow";
  OutOntoCfierFNm="f:/Data/OntoLight/Asfa.OntoCfier";
  OutOntoCfierTxtFNm="f:/Data/OntoLight/Asfa.OntoCfier.Txt";
  DocCatIsTermIdP=false;*/

  printf("Loading Onto-Light from '%s' ...", InOntoLightFNm.CStr());
  PLwOnto LwOnto=TLwOnto::LoadBin(InOntoLightFNm);
  printf(" Done.\n");

  printf("Loading Bag-Of-Words from '%s' ...", InBowFNm.CStr());
  PBowDocBs BowDocBs=TBowDocBs::LoadBin(InBowFNm);
  printf(" Done.\n");

  // generate ontology-classifier
  PLwOntoCfier LwOntoCfier=TLwOntoCfier::GetOntoCfier(
   LwOnto, BowDocBs, LangNm, DocCatIsTermIdP, CutWordWgtSumPrc);

  printf("Saving Onto-Classifier to '%s' ...", OutOntoCfierFNm.CStr());
  LwOntoCfier->SaveBin(OutOntoCfierFNm);
  printf(" Done.\n");

  printf("Saving Text to '%s' ...", OutOntoCfierTxtFNm.CStr());
  LwOntoCfier->SaveTxt(OutOntoCfierTxtFNm);
  printf(" Done.\n");

  return 0;
  Catch;
  return 1;
}
Beispiel #28
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);

  // command line parameters
  Env.PrepArgs("Ontology-Classify", 0);
  TStr InOntoCfierFNm=Env.GetIfArgPrefixStr("-ioc:", "f:/Data/OntoLight/EuroVoc.OntoCfier", "Input-OntoClassifier-FileName");
  TStr InQueryStr=Env.GetIfArgPrefixStr("-qs:", "Slovenia and Croatia are having a fishing industry.", "Input-Query-String");
  TStr InQueryHtmlFNm=Env.GetIfArgPrefixStr("-qh:", "", "Input-Query-Html-File");
  TStr InQueryCpdFNm=Env.GetIfArgPrefixStr("-qcpd:", "", "Input-Query-CompactDocument-FileName");
  TStr InQueryUrlStr=Env.GetIfArgPrefixStr("-qu:", "", "Input-Query-Url");
  TStr InQueryUrlStrVFNm=Env.GetIfArgPrefixStr("-quf:", "", "Input-Query-URL-Vector-FileName");
  TStr OutXmlFNm=Env.GetIfArgPrefixStr("-ox:", "OntoCfy.Xml", "Output-Classification-Xml-File");
  TStr OutTxtFNm=Env.GetIfArgPrefixStr("-ot:", "OntoCfy.Txt", "Output-Classification-Txt-File");
  if (Env.IsEndOfRun()){return 0;}
//  InQueryStr="Slovenia and Croatia are having a fishing industry.";

  printf("Loading Onto-Classifier from '%s' ...", InOntoCfierFNm.CStr());
  PLwOntoCfier OntoCfier=TLwOntoCfier::LoadBin(InOntoCfierFNm);
  printf(" Done.\n");

  // process query
  TSimTermIdPrV SimTermIdPrV;
  if (!InQueryStr.Empty()){
    OntoCfier->ClassifyStr(InQueryStr, SimTermIdPrV);
  } else
  if (!InQueryHtmlFNm.Empty()){
    OntoCfier->ClassifyHtmlFNm(InQueryHtmlFNm, SimTermIdPrV);
  } else {
    TExcept::Throw("No Input-Query specified!");
  }

  // save to xml
  {TFOut FXml(OutXmlFNm); FILE* fXml=FXml.GetFileId();
  OntoCfier->SaveCfySetXml(fXml, SimTermIdPrV);}

  // save to txt
  {TFOut FTxt(OutTxtFNm); FILE* fTxt=FTxt.GetFileId();
  OntoCfier->SaveCfySetTxt(fTxt, SimTermIdPrV);}

  return 0;
  Catch;
  return 1;
}
Beispiel #29
0
int main(int argc, char* argv[]) {
  
  setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?)
   
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Node centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  
  TExeTm ExeTm;
  
  Try
  
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (filename extensions added)");
  const TStr BseFNm = OutFNm.RightOfLast('/');
  const double eps = Env.GetIfArgPrefixFlt("-eps:", 1.0e-5, "minimum quality improvement threshold");
  const double min_moves = Env.GetIfArgPrefixFlt("-moves:", 1.0e-2, "minimum number of moves required (proportional)");
  const double max_iters = Env.GetIfArgPrefixFlt("-iters:", 1.0e+4, "maximum number of iterations");
  
  // Load graph and create directed and undirected graphs (pointer to the same memory)
  printf("\nLoading %s...", InFNm.CStr());
  PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm);
  printf(" DONE\n");
  printf("  nodes: %d\n", WGraph->GetNodes());
  printf("  edges: %d\n", WGraph->GetEdges());
  printf("  time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  
  // Declare variables
  
  // COMMUNITY
  
  // TODO
  
  
  
  // Louvain method (modularity objective)
  
  Catch
  
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
  
}
Beispiel #30
0
int main(int argc, char* argv[]){
	Env = TEnv(argc, argv, TNotify::StdNotify);
	Env.PrepArgs(TStr::Fmt("Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
	const TStr GFNm = Env.GetIfArgPrefixStr("-i:", "", "Input graph");
	const int W = Env.GetIfArgPrefixInt("-w:", 10000, "W");
	const int BW = Env.GetIfArgPrefixInt("-bw:", 100, "W upper bound");
	const int CPU = Env.GetIfArgPrefixInt("-cpu:", std::thread::hardware_concurrency(), "# of CPUs");
	const int Rpt = Env.GetIfArgPrefixInt("-r:", 100/CPU, "Repeat times");
	const double Pe = Env.GetIfArgPrefixFlt("-p:", 0.1, "Edge sampling rate");
	const double alpha = Env.GetIfArgPrefixFlt("-alpha:", 0.0001, "alpha");
	const bool TrimTail = Env.GetIfArgPrefixBool("-t:", false, "Trim tail");
	if (Env.IsEndOfRun())  return 0;

	TExeTm2 tm;
	ExamMgr ExM;
	ExM.SetActionGraph(GFNm).SetW(W).SetPEdge(Pe).SetRepeat(Rpt).SetCPU(CPU).IsTrimTail(TrimTail).SetBoundW(BW).SetAlpha(alpha);
	em_multi(ExM);
	printf("Cost time: %s.\n", tm.GetStr());
	return 0;
}