// IN-OUT edges are swapped (so that the prog runs faster)
// Send message via IN edge proportional to the OUT edge weight
void TWgtNet::ReinforceEdges(const int& NIters) {
  THash<TInt, TFlt> OutWgtSumH;
  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
    double wgt = 0;
    for (int e = 0; e < NI.GetOutDeg(); e++) { 
      wgt += NI.GetOutEDat(e); }
    OutWgtSumH.AddDat(NI.GetId(), wgt);
  printf("Reinforcing edges for %d iterations\n", NIters);
  // iterate
  TExeTm ExeTm;
  for (int iter = 0; iter < NIters; iter++) {
    for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
      const double X = TInt::Rnd.GetUniDev() * OutWgtSumH.GetDat(NI.GetId());
      double x = 0;  int e = 0;
      for ( ; x + NI.GetOutEDat(e) < X; e++) {
        x += NI.GetOutEDat(e); }
      IAssert(IsEdge(NI.GetOutNId(e), NI.GetId()));
      GetEDat(NI.GetOutNId(e), NI.GetId()) += 1; // reinforce the edge
      OutWgtSumH.GetDat(NI.GetOutNId(e)) += 1; 
    if (iter % (NIters/100) == 0) {
      printf("\r%d [%s]", iter, ExeTm.GetStr()); 
  printf(" done.\n");
int main(int argc, char* argv[]) {
    Env = TEnv(argc, argv, TNotify::StdNotify);
    Env.PrepArgs(TStr::Fmt("\nGenerate stochastic block model networks. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
    TExeTm ExeTm;
    const int noNodes = Env.GetIfArgPrefixInt("-n:", 512, "Number of nodes in synthetic graph (default: 512)\n");
    const double pIn = Env.GetIfArgPrefixFlt("-pIn:", 0.8, "pIn (default: 0.8)\n");
    const double pOut = Env.GetIfArgPrefixFlt("-pOut:", 0.2, "pOut (default: 0.2)\n");
    const int noCommunities = Env.GetIfArgPrefixInt("-k:", 2, "Number of communities in graph (default: 2)\n");
    TGraphAlgo graphAlgo;
    graphAlgo.generateNetwork(noNodes, noCommunities, pIn, pOut);
    TStr networkFilename = TStr("test-network-sbm.txt");
    TStr networkAdjacencyMatrixFilename = TStr("test-network-sbm-adjacency-matrix.txt");
    TStr networkGexfFilename = TStr("test-network-sbm.gexf");
    TStr networkLouvainFormatFilename = TStr("test-network-sbm-louvain.txt");
    TStr louvainTreeFilename = TStr("test-network-sbm-louvain.tree");
    TStr communityLabelsFilename = TStr("test-network-sbm-assignments.txt");
    graphAlgo.saveCommunityLabels(communityLabelsFilename, noNodes, noCommunities);
    printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
    return 0;
void TTop2FriendNet::PlotPick2VsProb2nd(const PWgtNet& Net, const int& NRuns, const double& StepP, const TStr& OutFNm, 
                                        TStr Desc, bool PlotTop2, bool PlotBtm2, bool PlotRnd2) {
  TTop2FriendNet Top2(Net);  Net->MulEdgeWgt(-1.0); 
  TTop2FriendNet Btm2(Net);  Net->MulEdgeWgt(-1.0); // change back
  THash<TFlt, TMom> Top2H, Btm2H, Rnd2H;
  for (int run = 0; run < NRuns; run++) {
    TExeTm ExeTm;
    printf("run %d\n", run);
    for (double p = 0; p <= 1; p += StepP) {
      if (PlotTop2) { Top2H.AddDat(p).Add(Top2.GetTop2WccSz(p)); }
      if (PlotBtm2) { Btm2H.AddDat(p).Add(Btm2.GetTop2WccSz(p)); }
      if (PlotRnd2) { Rnd2H.AddDat(p).Add(Top2.GetRnd2WccSz(p)); }
    printf("[%s]\n", ExeTm.GetStr());
    TFltTrV Top2V, Btm2V, Rnd2V;
    GetAvgSDevV(Top2H, Top2V);
    GetAvgSDevV(Btm2H, Btm2V);
    GetAvgSDevV(Rnd2H, Rnd2V);
    TGnuPlot GP("ccVsP-"+OutFNm, TStr::Fmt("%s (%d, %d, %f)", Desc.CStr(), Net->GetNodes(), 
      Net->GetEdges(), Net->GetEdgeWgt()));
    GP.SetXYLabel("Prob of taking 2nd edge", "Size of largest connected component");
    if (! Top2V.Empty()) { GP.AddErrBar(Top2V, "TOP", ""); }
    if (! Rnd2V.Empty()) { GP.AddErrBar(Rnd2V, "RND", ""); }
    if (! Btm2V.Empty()) { GP.AddErrBar(Btm2V, "BTM", ""); }
int TLogRegFit::MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
    TExeTm ExeTm;
    TFltV GradV(Theta.Len()), DeltaLV(Theta.Len());
    TFltVV HVV(Theta.Len(), Theta.Len());
    int iter = 0;
    double MinVal = -1e10, MaxVal = 1e10;
    for(iter = 0; iter < MaxStep; iter++) {
        GetNewtonStep(HVV, GradV, DeltaLV);
        double Increment = TLinAlg::DotProduct(GradV, DeltaLV);
        if (Increment <= ChangeEps) {
        double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1);
        for(int i = 0; i < Theta.Len(); i++) {
            double Change = LearnRate * DeltaLV[i];
            Theta[i] += Change;
            if(Theta[i] < MinVal) {
                Theta[i] = MinVal;
            if(Theta[i] > MaxVal) {
                Theta[i] = MaxVal;
    if (! PlotNm.Empty()) {
        printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());

    return iter;
int main(int argc, char* argv[]) {

  // code needed for inputing parameters
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Network diversity. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

  TExeTm ExeTm; // for measuring execution time
  const TStr InFNmGraph = Env.GetIfArgPrefixStr("-i:", "artificial_intelligence_pub.txt", "Input graph (undirected graph)");
  const TStr InFNmCat = Env.GetIfArgPrefixStr("-c:", "artificial_intelligence_cat_pub.txt", "Categories");
  const TStr InFNmMat = Env.GetIfArgPrefixStr("-m:", "sciences.txt", "Matrix");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "diversity.txt", "Output file");
  const int DivAlg = Env.GetIfArgPrefixInt("-a:", 1, "Measure: 1:Stirling");
  const int Alpha = Env.GetIfArgPrefixInt("-alp:", 1, "alpha");
  const int Beta = Env.GetIfArgPrefixInt("-bet:", 1, "beta");
  const int Gamma = Env.GetIfArgPrefixInt("-gam:", 1, "gama");
  // defining graph
  PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNmGraph, false);

  double D = 0.0;
  TStr DivAlgStr;

  // based on input parametr -a (variable DivAlg), diversity measure is choosen
  if (DivAlg == 1) {
    DivAlgStr = "Stirling";
	D = TSnap::StirlingIndex(Graph,InFNmCat,InFNmMat, Alpha, Beta, Gamma);}
  else { Fail; }

  printf("\nDiversity: %f\nrun time: %s (%s)\n", D,ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); //print execution time

  return 0;
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("ragm. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "", "Output Graph data prefix");
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input edgelist file name");
  const TStr LabelFNm = Env.GetIfArgPrefixStr("-l:", "", "Input file name for node names (Node ID, Node label) ");
  int OptComs = Env.GetIfArgPrefixInt("-c:", -1, "The number of communities to detect (-1: detect automatically)");
  const int MinComs = Env.GetIfArgPrefixInt("-mc:", 5, "Minimum number of communities to try");
  const int MaxComs = Env.GetIfArgPrefixInt("-xc:", 100, "Maximum number of communities to try");
  const int DivComs = Env.GetIfArgPrefixInt("-nc:", 10, "How many trials for the number of communities");
  const int NumThreads = Env.GetIfArgPrefixInt("-nt:", 1, "Number of threads for parallelization");
  const double StepAlpha = Env.GetIfArgPrefixFlt("-sa:", 0.3, "Alpha for backtracking line search");
  const double StepBeta = Env.GetIfArgPrefixFlt("-sb:", 0.3, "Beta for backtracking line search");

  PUNGraph G;
  TIntStrH NIDNameH;
  if (InFNm.IsStrIn(".ungraph")) {
    TFIn GFIn(InFNm);
    G = TUNGraph::Load(GFIn);
  } else {
    G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NIDNameH);
  if (LabelFNm.Len() > 0) {
    TSsParser Ss(LabelFNm, ssfTabSep);
    while (Ss.Next()) {
      if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); }
  else {
  printf("Graph: %d Nodes %d Edges\n", G->GetNodes(), G->GetEdges());
  TVec<TIntV> EstCmtyVV;
  TExeTm RunTm;
  TAGMFast RAGM(G, 10, 10);
  if (OptComs == -1) {
    printf("finding number of communities\n");
    OptComs = RAGM.FindComsByCV(NumThreads, MaxComs, MinComs, DivComs, OutFPrx, StepAlpha, StepBeta);

  if (NumThreads == 1 || G->GetEdges() < 1000) {
    RAGM.MLEGradAscent(0.0001, 1000 * G->GetNodes(), "", StepAlpha, StepBeta);
  } else {
    RAGM.MLEGradAscentParallel(0.0001, 1000, NumThreads, "", StepAlpha, StepBeta);
   TAGMUtil::DumpCmtyVV(OutFPrx + "cmtyvv.txt", EstCmtyVV, NIDNameH);
  TAGMUtil::SaveGephi(OutFPrx + "graph.gexf", G, EstCmtyVV, 1.5, 1.5, NIDNameH);


  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());

  return 0;
int main(int argc, char* argv[]) {
  printf("MemeWorks. build: %s, %s. Start time: %s\n\n", __TIME__, __DATE__, TExeTm::GetCurTm());

  TExeTm ExeTm;  TInt::Rnd.PutSeed(0);  Try
  TSecTm BegTm = TSecTm::GetCurTm();

//	char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201101.txt", "-w:F", "-o:1101", "-mint:20110101", "-maxt:20110106"};  BigMain(7, ToDo);   
//  char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201101_201103.txt", "-w:F", "-o:11011103"};  BigMain(5, ToDo);   
//  char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201104_201106.txt", "-w:F", "-o:11041106", "-mint:20110401", "-maxt:20110701"};  BigMain(7, ToDo);   
//  char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201007_201107.txt", "-w:F", "-o:10071107"};  BigMain(5, ToDo);
//  char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201007_201107.txt", "-o:10071107", "-mint:20100714", "-maxt:20110728"};  BigMain(6, ToDo); 
//	char *ToDo [] = {"memeclust", "-do:memestoqtbs", "-i:201101.txt", "-o:1101", "-mint:20110101", "-maxt:20110106"};  BigMain(6, ToDo);   

//	char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:1101-w4mfq5.QtBs", "-o:1101", "-shglready:F", "-netready:F"};  BigMain(6, ToDo);  
//  char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:qt08080902-w4mfq5.QtBs", "-o:0808", "-shglready:F", "-netready:F"};  BigMain(6, ToDo);  
//  char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:11011103-w4mfq5.QtBs", "-o:11011103", "-shglready:F", "-netready:F"};  BigMain(6, ToDo);  
//	char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:11041106-w4mfq5.QtBs", "-o:11041106", "-shglready:F", "-netready:F"};  BigMain(6, ToDo);  
//  char *ToDo [] = {"memeclust", "-do:mkclustnet", "-i:10071107-w4mfq5.QtBs", "-o:10071107", "-shglready:F", "-netready:F"};  BigMain(6, ToDo);  
  //char *ToDo [] = {"memeclust", "-do:memeclustzarya", "-i:201102.txt", "-o:201102", "-shglready:F", "-netready:F", "-mint:20110201", "-maxt:20110301"};  BigMain(8, ToDo);  
  BigMain(argc, argv);

  TSecTm EndTm = TSecTm::GetCurTm();
  double usedTime = EndTm.GetAbsSecs() - BegTm.GetAbsSecs();
  printf("Total execution time : %02dh%02dm%02ds\n", int(usedTime)/3600, (int(usedTime)%3600)/60, int(usedTime)%60);
  return 0;
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
TFfGGen::TStopReason TFfGGen::AddNodes(const int& GraphNodes, const bool& FloodStop) {
	printf("\n***ForestFire:  %s  Nodes:%d  StartNodes:%d  Take2AmbProb:%g\n", BurnExpFire ? "ExpFire" : "GeoFire", GraphNodes, StartNodes(), Take2AmbProb());
	printf("                FwdBurnP:%g  BckBurnP:%g  ProbDecay:%g  Orphan:%g\n", FwdBurnProb(), BckBurnProb(), ProbDecay(), OrphanProb());
	TExeTm ExeTm;
	int Burned1 = 0, Burned2 = 0, Burned3 = 0; // last 3 fire sizes
	// create initial set of nodes
	if (Graph.Empty()) { Graph = PNGraph::New(); }
	if (Graph->GetNodes() == 0) {
		for (int n = 0; n < StartNodes; n++) { Graph->AddNode(); }
	int NEdges = Graph->GetEdges();
	// forest fire
	TRnd Rnd(0);
	TForestFire ForestFire(Graph, FwdBurnProb, BckBurnProb, ProbDecay, 0);
	// add nodes
	for (int NNodes = Graph->GetNodes() + 1; NNodes <= GraphNodes; NNodes++) {
		const int NewNId = Graph->AddNode(-1);
		IAssert(NewNId == Graph->GetNodes() - 1); // node ids have to be 0...N
		// not an Orphan (burn fire)
		if (OrphanProb == 0.0 || Rnd.GetUniDev() > OrphanProb) {
			// infect ambassadors
			if (Take2AmbProb == 0.0 || Rnd.GetUniDev() > Take2AmbProb || NewNId < 2) {
				ForestFire.Infect(Rnd.GetUniDevInt(NewNId)); // take 1 ambassador
			else {
				const int AmbassadorNId1 = Rnd.GetUniDevInt(NewNId);
				int AmbassadorNId2 = Rnd.GetUniDevInt(NewNId);
				while (AmbassadorNId1 == AmbassadorNId2) {
					AmbassadorNId2 = Rnd.GetUniDevInt(NewNId);
				ForestFire.Infect(TIntV::GetV(AmbassadorNId1, AmbassadorNId2)); // take 2 ambassadors
			// burn fire
			if (BurnExpFire) { ForestFire.BurnExpFire(); }
			else { ForestFire.BurnGeoFire(); }
			// add edges to burned nodes
			for (int e = 0; e < ForestFire.GetBurned(); e++) {
				Graph->AddEdge(NewNId, ForestFire.GetBurnedNId(e));
			Burned1 = Burned2;  Burned2 = Burned3;  Burned3 = ForestFire.GetBurned();
		else {
			// Orphan (zero out-links)
			Burned1 = Burned2;  Burned2 = Burned3;  Burned3 = 0;
		if (NNodes % Kilo(1) == 0) {
			printf("(%d, %d)  burned: [%d,%d,%d]  [%s]\n", NNodes, NEdges, Burned1, Burned2, Burned3, ExeTm.GetStr());
		if (FloodStop && NEdges>GraphNodes && (NEdges / double(NNodes)>1000.0)) { // average node degree is more than 500
			printf(". FLOOD. G(%6d, %6d)\n", NNodes, NEdges);  return srFlood;
		if (NNodes % 1000 == 0 && TimeLimitSec > 0 && ExeTm.GetSecs() > TimeLimitSec) {
			printf(". TIME LIMIT. G(%d, %d)\n", Graph->GetNodes(), Graph->GetEdges());
			return srTimeLimit;
	IAssert(Graph->GetEdges() == NEdges);
	return srOk;
// get model graph according to args
void GetModel(const TStr& Args, PNGraph& G){
	Env = TEnv(Args, TNotify::NullNotify);
	const TStr Gen = Env.GetIfArgPrefixStr("-g:", "gen", "How to get model graph: read, gen, deg, genpy");
	const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "Input graph file (single directed edge per line)");
	TExeTm execTime;
	if (Gen == "gen")
		BasicGraphGen(Args, G);
	else if (Gen == "read")
		ReadPNGraphFromFile(InFNm, G);
	else if (Gen == "genpy")
		PUNGraph GU;
		GenPy(GU, TFile, Args);	
		G = TSnap::ConvertGraph<PNGraph>(GU);
	TFile << "Time of getting model: " <<  execTime.GetTmStr() << endl;
	/*TFile << "Model graph: " << G->GetNodes() << " nodes, " << G->GetEdges() << " edges\n";
	TIntV DegV;
	TSnap::GetDegSeqV(G, DegV);
	PUNGraph Conf = TSnap::GenConfModel(DegV);
	TFile << "Time of getting configuration model: " <<  execTime.GetTmStr() << endl;
	cout << "Undirected configuration model: " << Conf->GetNodes() << " nodes, " << Conf->GetEdges() << " edges\n";
	PNGraph ConfD = TSnap::ConvertGraph<PNGraph>(Conf);
	SaveAndPlot(ConfD, "conf", false);
	TFile << "Clustering coefficient of configuration model: " << TSnap::GetClustCf(ConfD) << endl;
/// Clique Percolation method communities
void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) {
  printf("Clique Percolation Method\n");
  TExeTm ExeTm;
  TVec<TIntV> MaxCliques;
  TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques);
  // op RS 2012/05/15, commented out next line, a parameter is missing,
  //   creating a warning on OS X
  // printf("...%d cliques found\n");
  // get clique overlap matrix (graph)
  PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1);
  printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges());
  // connected components are communities
  TCnComV CnComV;
  TSnap::GetWccs(OverlapGraph, CnComV);
  TIntSet CmtySet;
  for (int c = 0; c < CnComV.Len(); c++) {
    for (int i = 0; i <CnComV[c].Len(); i++) {
      const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]];
  printf("done [%s].\n", ExeTm.GetStr());
int GenPy(PUNGraph &res, ofstream& TFile, const TStr& parameters)
	Env = TEnv(parameters, TNotify::StdNotify);
	TStr mN = Env.GetIfArgPrefixStr("-module:", "random_graphs", "Module name");
	TStr fN = Env.GetIfArgPrefixStr("-func:", "fast_gnp_random_graph", "Function name");
	PyObject **G = new PyObject*[1];
	char *moduleName = mN.CStr();
	char *funcName = fN.CStr();
	TStrV args, argTypes;
	if (!ParseArgs(funcName, parameters, args, argTypes))
		printf("Fail to parse arguments for NetworkX generation...\n");
		return 0;
	TExeTm execTime;
	if (!CallPyFunction(moduleName, funcName, args, argTypes, G))
		cout << "CallPyFunction() raised error. Execution terminated.\n";
	TFile << "Time of generation of graph by NetworkX: " << execTime.GetTmStr() << endl; 

	PyObject*** nodes = new PyObject**[1];
	GetNodes(G, nodes);
	int nodesCount = PyList_Size(*(nodes[0]));
	//printf("nodesCount = %d, ", nodesCount);
	res = PUNGraph::TObj::New();
    res->Reserve(nodesCount, nodesCount*nodesCount);
	for (size_t i = 0; i < nodesCount; i++)

	PyObject*** edges = new PyObject**[1];
	GetEdges(G, edges);
	int edgesCount = PyList_Size(*(edges[0]));
	//printf("edgesCount = %d\n", edgesCount);
	for (size_t i = 0; i < edgesCount; i++)
		PyObject* item = PySequence_Fast_GET_ITEM(*(edges[0]), i);
		int v1, v2;
		PyObject* node = PySequence_Fast_GET_ITEM(item,0);
		v1 = PyLong_AsLong(node);
		node = PySequence_Fast_GET_ITEM(item,1);
		v2 = PyLong_AsLong(node);
	TFile << "Time of copying of graph from NetworkX representation: " << execTime.GetTmStr() << endl; 
	//Py_Finalize(); // очищение памяти, отданной интерпретатору
	return 0;
int main(int argc, char* argv[])
	TExeTm ExeTm;
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nPlotting Scatter For Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

//		THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");
//		THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");
		THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");     // CascadesFullUrlsOnTwitterData_FINALFILTERED
		THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");    // CascadesOnTwitterData_FINALFILTERED

		THash< TUInt , TSecTmV > full_twitterUrls = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED.rar");
		THash< TUInt , TSecTmV > full_twitterContents = Tools::loadTwitter("/NS/twitter-5/work/oaskaris/DATA/CascadesOnTwitterData_FINALFILTERED.rar");

		// Scatter plot

		// Percentage computation
		double cnt = 0;
		for(int i=0;i<full_twitterUrls.Len();i++)
			if(full_twitterContents.GetKeyId(full_twitterUrls.GetKey(i)) != -1)
		cnt /= full_twitterUrls.Len();    // twitterUrls.Len() / full_twitterUrls.Len()
		printf("The percentage of Urls of quotes which have contents as well: %f\n", 100 * cnt);

		cnt = 0;
		for(int i=0;i<full_twitterContents.Len();i++)
			if(full_twitterUrls.GetKeyId(full_twitterContents.GetKey(i)) != -1)
		cnt /= full_twitterContents.Len();
		printf("The percentage of Contents of quotes which have urls as well: %f\n", 100 * cnt);

		printf("\nScatter Plot had been drawn successfully.");
	catch(exception& ex)
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	catch(TPt<TExcept>& ex)
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
int main(int argc, char* argv[])
//	TFltPrV v;
//	v.Add(TFltPr(1,4));
//	v.Add(TFltPr(5,5));
//	v.Add(TFltPr(9,11));
//	v.Add(TFltPr(20,8));
//	v.Add(TFltPr(21,30));
//	cout << "C: " << Tools::computeCorrelation(v,Pearson) << endl;
//	return 0;

	TExeTm ExeTm;
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nPlotting Individually Memes-Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

		// URLS
		THash< TStr , CascadeElementV > quotes = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar");    // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4URLS
		THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");     // CascadesFullUrlsOnTwitterData_FINALFILTERED

		//THash< TStr , CascadeElementV > quotes2 = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar");    // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4Contents
		THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");    // CascadesOnTwitterData_FINALFILTERED

		// Plotting
		THash< TUInt , TSecTmV > twitterTotal;
		for(int i=0;i<twitterContents.Len();i++)
			TSecTmV tmp;


		printf("\nPlots had been drawn successfully.");
	catch(exception& ex)
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	catch(TPt<TExcept>& ex)
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
int TLogRegFit::MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) {
    TExeTm ExeTm;
    TFltV GradV(Theta.Len());
    int iter = 0;
    TIntFltPrV IterLV, IterGradNormV;
    double MinVal = -1e10, MaxVal = 1e10;
    double GradCutOff = 100000;
    for(iter = 0; iter < MaxStep; iter++) {
        Gradient(GradV);    //if gradient is going out of the boundary, cut off
        for(int i = 0; i < Theta.Len(); i++) {
            if (GradV[i] < -GradCutOff) {
                GradV[i] = -GradCutOff;
            if (GradV[i] > GradCutOff) {
                GradV[i] = GradCutOff;
            if (Theta[i] <= MinVal && GradV[i] < 0) {
                GradV[i] = 0.0;
            if (Theta[i] >= MaxVal && GradV[i] > 0) {
                GradV[i] = 0.0;
        double Alpha = 0.15, Beta = 0.9;
        //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
        double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta);
        if (TLinAlg::Norm(GradV) < ChangeEps) {
        for(int i = 0; i < Theta.Len(); i++) {
            double Change = LearnRate * GradV[i];
            Theta[i] += Change;
            if(Theta[i] < MinVal) {
                Theta[i] = MinVal;
            if(Theta[i] > MaxVal) {
                Theta[i] = MaxVal;
        if (! PlotNm.Empty()) {
            double L = Likelihood();
            IterLV.Add(TIntFltPr(iter, L));
            IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV)));

    if (! PlotNm.Empty()) {
        TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
        TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q");
        printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr());
    return iter;
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Motifs. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input directed graph file (single directed edge per line)");
  const int MotifSz = Env.GetIfArgPrefixInt("-m:", 3, "Motif size (has to be 3 or 4)");
  const bool DrawMotifs = Env.GetIfArgPrefixBool("-d:", true, "Draw motif shapes (requires GraphViz)");
  TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix");
  if (OutFNm.Empty()) { OutFNm = InFNm.GetFMid(); }
  EAssert(MotifSz==3 || MotifSz==4);

  // load graph
  PNGraph G;
  if (InFNm.GetFExt().GetLc()==".ungraph") {
    TFIn FIn(InFNm);  G=TSnap::ConvertGraph<PNGraph>(TUNGraph::Load(FIn), true); }
  else if (InFNm.GetFExt().GetLc()==".ngraph") {
    TFIn FIn(InFNm);  G=TNGraph::Load(FIn); }
  else {
    G = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1); }
  bool IsOk = true;
  for (int nid = 0; nid < G->GetNodes(); nid++) {
    if (! G->IsNode(nid)) { IsOk=false; break; } }
  if (! IsOk) {
    printf("Nodes of the input graph have to be numbered 0...N-1\nRenumbering nodes...\n"); 
    PNGraph OG = G; G = TNGraph::New();
    TGraphEnumUtils::GetNormalizedGraph(OG, G);
  // G = TSnap::GenRndGnm<PNGraph>(100, Kilo(1));
  // count frequency of connected subgraphs in G that have MotifSz nodes
  TD34GraphCounter GraphCounter(MotifSz);
  TSubGraphEnum<TD34GraphCounter> GraphEnum;
  GraphEnum.GetSubGraphs(G, MotifSz, GraphCounter);
  FILE *F = fopen(TStr::Fmt("%s-counts.tab", OutFNm.CStr()).CStr(), "wt");
  fprintf(F, "MotifId\tNodes\tEdges\tCount\n");
  for (int i = 0; i < GraphCounter.Len(); i++) {
    const int gid = GraphCounter.GetId(i);
    PNGraph SG = GraphCounter.GetGraph(gid);
    if (DrawMotifs) {
      TGraphViz::Plot(SG, gvlNeato, TStr::Fmt("%s-motif%03d.gif", OutFNm.CStr(), i), 
        TStr::Fmt("GId:%d  Count: %llu", gid, GraphCounter.GetCnt(gid)));
    fprintf(F, "%d\t%d\t%d\t%llu\n", gid, SG->GetNodes(), SG->GetEdges(), GraphCounter.GetCnt(gid));
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Node Centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input un/directed graph");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "node_centrality.tab", "Output file");
  printf("Loading %s...", InFNm.CStr());
  PNGraph Graph = TSnap::LoadEdgeList<PNGraph>(InFNm);
  //PNGraph Graph = TSnap::GenRndGnm<PNGraph>(10, 10);
  //TGraphViz::Plot(Graph, gvlNeato, InFNm+".gif", InFNm, true);
  printf("nodes:%d  edges:%d\n", Graph->GetNodes(), Graph->GetEdges());
  PUNGraph UGraph = TSnap::ConvertGraph<PUNGraph>(Graph); // undirected version of the graph
  TIntFltH BtwH, EigH, PRankH, CcfH, CloseH, HubH, AuthH;
  printf("Treat graph as DIRECTED: ");
  printf(" PageRank... ");             TSnap::GetPageRank(Graph, PRankH, 0.85);
  printf(" Hubs&Authorities...");      TSnap::GetHits(Graph, HubH, AuthH);
  printf("\nTreat graph as UNDIRECTED: ");
  printf(" Eigenvector...");           TSnap::GetEigenVectorCentr(UGraph, EigH);
  printf(" Clustering...");            TSnap::GetNodeClustCf(UGraph, CcfH);
  printf(" Betweenness (SLOW!)...");   TSnap::GetBetweennessCentr(UGraph, BtwH, 1.0);
  printf(" Constraint (SLOW!)...");    TNetConstraint<PUNGraph> NetC(UGraph, true);
  printf(" Closeness (SLOW!)...");
  for (TUNGraph::TNodeI NI = UGraph->BegNI(); NI < UGraph->EndNI(); NI++) {
    const int NId = NI.GetId();
    CloseH.AddDat(NId, TSnap::GetClosenessCentr<PUNGraph>(UGraph, NId, false));
  printf("\nDONE! saving...");
  FILE *F = fopen(OutFNm.CStr(), "wt");
  fprintf(F,"#Network: %s\n", InFNm.CStr());
  fprintf(F,"#Nodes: %d\tEdges: %d\n", Graph->GetNodes(), Graph->GetEdges());
  for (TUNGraph::TNodeI NI = UGraph->BegNI(); NI < UGraph->EndNI(); NI++) {
    const int NId = NI.GetId();
    const double DegCentr = UGraph->GetNI(NId).GetDeg();
    const double CloCentr = CloseH.GetDat(NId);
    const double BtwCentr = BtwH.GetDat(NId);
    const double EigCentr = EigH.GetDat(NId);
    const double Constraint = NetC.GetNodeC(NId);
    const double ClustCf = CcfH.GetDat(NId);
    const double PgrCentr = PRankH.GetDat(NId);
    const double HubCentr = HubH.GetDat(NId);
    const double AuthCentr = AuthH.GetDat(NId);
    fprintf(F, "%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n", NId, 
      DegCentr, CloCentr, BtwCentr, EigCentr, Constraint, ClustCf, PgrCentr, HubCentr, AuthCentr);
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Rolx. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (one edge per line, tab/space separated)");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "roles.txt", "Output file name prefix");
  const int MinRoles = Env.GetIfArgPrefixInt("-l:", 2, "Lower bound of the number of roles");
  const int MaxRoles = Env.GetIfArgPrefixInt("-u:", 3, "Upper bound of the number of roles");
  double Threshold = 1e-6;
  if (MinRoles > MaxRoles || MinRoles < 2) {
    printf("min roles and max roles should be integer and\n");
    printf("2 <= min roles <= max roles\n");
  printf("loading file...\n");
  PNGraph Graph = TSnap::LoadEdgeList<PNGraph>(InFNm, 0, 1);
  printf("extracting features...\n");
  TIntFtrH Features = ExtractFeatures(Graph);
  TIntIntH NodeIdMtxIdH = CreateNodeIdMtxIdxHash(Features);
  TFltVV V = ConvertFeatureToMatrix(Features, NodeIdMtxIdH);
  //printf("saving features...\n");
  //FPrintMatrix(V, "v.txt");
  printf("feature matrix is saved in v.txt\n");
  TFlt MnError = TFlt::Mx;
  TFltVV FinalG, FinalF;
  int NumRoles = -1;
  for (int r = MinRoles; r <= MaxRoles; ++r) {
    TFltVV G, F;
    printf("factorizing for %d roles...\n", r);
    CalcNonNegativeFactorization(V, r, G, F, Threshold);
    //FPrintMatrix(G, "g.txt");
    //FPrintMatrix(F, "f.txt");
    TFlt Error = CalcDescriptionLength(V, G, F);
    if (Error < MnError) {
      MnError = Error;
      FinalG = G;
      FinalF = F;
      NumRoles = r;
  //FPrintMatrix(FinalG, "final_g.txt");
  //FPrintMatrix(FinalF, "final_f.txt");
  printf("using %d roles, min error: %f\n", NumRoles, MnError());
  TIntIntH Roles = FindRoles(FinalG, NodeIdMtxIdH);
  FPrintRoles(Roles, OutFNm);
  //PlotRoles(Graph, Roles);
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
TUNGraphMtx::TUNGraphMtx(const PUNGraph& GraphPt) : Graph() { 
  Graph = GraphPt;
  if (! CheckNodeIds()) {
    printf("  Renumbering %d nodes....", GraphPt->GetNodes());
    TExeTm ExeTm;
    Graph = TSnap::ConvertGraph<PUNGraph>(GraphPt, true);
    /*TIntSet NIdSet;
    for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    Graph = TUNGraph::New();  *Graph = *GraphPt; */
    printf("done [%s]\n", ExeTm.GetStr());
int TTrawling::GetNextFqItemSets(const int& FqItemsetLen) {
  TExeTm ExeTm;
  /* // slow
  GenCandidates(); // CurItemH --> CandItemH
  printf(" S[%d][%s]", CandItemH.Len(), ExeTm.GetStr());
  CountSupport();  // set counters in CandItemH
  printf("T[%s]", ExeTm.GetStr());
  ThresholdSupp(); // CandItemH --> CurItemH
  printf("  Items:  %d\n", CurItemH.Len());*/
  printf("  cur: %d cand: %d [%s]", CurItemH.Len(), CandItemH.Len(), ExeTm.GetStr());
  return CurItemH.Len();
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Network community detection. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (undirected graph)");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "communities.txt", "Output file");
  const int CmtyAlg = Env.GetIfArgPrefixInt("-a:", 2, "Algorithm: 1:Girvan-Newman, 2:Clauset-Newman-Moore, 3:Infomap");

  PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNm, false);
  //PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>("../as20graph.txt", false);
  //PUNGraph Graph = TSnap::GenRndGnm<PUNGraph>(5000, 10000); // generate a random graph

  TCnComV CmtyV;
  double Q = 0.0;
  TStr CmtyAlgStr;
  if (CmtyAlg == 1) {
    CmtyAlgStr = "Girvan-Newman";
    Q = TSnap::CommunityGirvanNewman(Graph, CmtyV); }
  else if (CmtyAlg == 2) {
    CmtyAlgStr = "Clauset-Newman-Moore";
    Q = TSnap::CommunityCNM(Graph, CmtyV); }
  else if (CmtyAlg == 3) {
    CmtyAlgStr = "Infomap";
    Q = TSnap::Infomap(Graph, CmtyV); }
  else { Fail; }

  FILE *F = fopen(OutFNm.CStr(), "wt");
  fprintf(F, "# Input: %s\n", InFNm.CStr());
  fprintf(F, "# Nodes: %d    Edges: %d\n", Graph->GetNodes(), Graph->GetEdges());
  fprintf(F, "# Algoritm: %s\n", CmtyAlgStr.CStr());
  if (CmtyAlg!=3) {
    fprintf(F, "# Modularity: %f\n", Q);
  } else {
    fprintf(F, "# Average code length: %f\n", Q);
  fprintf(F, "# Communities: %d\n", CmtyV.Len());
  fprintf(F, "# NId\tCommunityId\n");
  for (int c = 0; c < CmtyV.Len(); c++) {
    for (int i = 0; i < CmtyV[c].Len(); i++) {
      fprintf(F, "%d\t%d\n", CmtyV[c][i].Val, c);

  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
void GenKron(const TStr& Args, TKronMtx& FitMtx, TFltPrV& KronDegAvgIn, TFltPrV& KronDegAvgOut){
	Env = TEnv(Args, TNotify::NullNotify);
	TExeTm ExecTime;
	// number of Kronecker graphs to generate
	const TInt NKron = Env.GetIfArgPrefixInt("-n:", 1, "Number of generated Kronecker graphs");
	// iterations of Kronecker product
	const TInt NIter = Env.GetIfArgPrefixInt("-i:", 10, "Iterations of Kronecker product");
	// is graph directed?
	TStr IsDir = Env.GetIfArgPrefixStr("-isdir:", "false", "Produce directed graph (true, false)");
	TFlt ExpectedNodes = FitMtx.GetNodes(NIter), ExpectedEdges = FitMtx.GetEdges(NIter);
	TFile << "Kronecker nodes: " << ExpectedNodes << ", expected Kronecker edges: " << ExpectedEdges << endl;
	double Sec = 0.0;
	int AvgMaxOutDeg = 0, AvgMaxInDeg = 0, MinMaxOutDeg = 0, MaxMaxOutDeg = 0, MinMaxInDeg = 0, MaxMaxInDeg = 0;
   bool Dir = IsDir == "true" ? true : false;

	for (int i = 0; i < NKron; i++){
      PNGraph Kron = TKronMtx::GenFastKronecker(FitMtx, NIter, Dir, 0);
		Sec += ExecTime.GetSecs();
		printf("Calculating maximum degree...\n");
		int MaxOutDeg = GetMaxMinDeg(Kron, IsDir, "false", "true"), MaxInDeg = GetMaxMinDeg(Kron, IsDir, "true", "true");
		CompareDeg(i, MaxOutDeg, MinMaxOutDeg, MaxMaxOutDeg, AvgMaxOutDeg);
		CompareDeg(i, MaxInDeg, MinMaxInDeg, MaxMaxInDeg, AvgMaxInDeg);

		//printf("Nodes count: %d, nodes with non-zero degree %d, edges count %d\n max deg = %d\n", kron->GetNodes(), TSnap::CntNonZNodes(kron), kron->GetEdges(), MaxDeg);
		if (i == NKron - 1){
			//TFile << "Clustering coefficient: " << TSnap::GetClustCf(kron) << endl;
			//TSnap::PlotHops(kron, "kronSingle");
			TFile << "Maximum output degree in kron graph: " << "from " << MinMaxOutDeg << " to " << MaxMaxOutDeg << " (average: " << (double)AvgMaxOutDeg / (double)NKron << ")" << endl;
			TFile << "Maximum input degree in kron graph: " << "from " << MinMaxInDeg << " to " << MaxMaxInDeg << " (average: " << (double)AvgMaxInDeg / (double)NKron << ")" << endl;
		AddDegreesStat(KronDegAvgIn, Kron, true);
		AddDegreesStat(KronDegAvgOut, Kron, false);
	Sec /= NKron;

    GetAvgDegreeStat(KronDegAvgIn, NKron);
    GetAvgDegreeStat(KronDegAvgOut, NKron);
	TFile << "Average time of generation of Kronecker product: " <<  Sec << endl;
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Clique Percolation Method. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "../as20graph.txt", "Input undirected graph file (single directed edge per line)");
  const int OverlapSz = Env.GetIfArgPrefixInt("-k:", 2, "Min clique overlap");
  TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file prefix");
  if (OutFNm.Empty()) { OutFNm = InFNm.GetFMid(); }

  PUNGraph G;
  if (InFNm == "DEMO") { // small demo graph
    G = TUNGraph::New();
    for (int i = 1; i < 8; i++) { G->AddNode(i); }
    G->AddEdge(2,3); G->AddEdge(2,4);
    G->AddEdge(4,5); G->AddEdge(4,7);
    G->AddEdge(5,6); G->AddEdge(5,7);
    // draw the small graph using GraphViz
    TSnap::DrawGViz(G, gvlNeato, "small_graph.png", "", true); 
  // load graph
  else if (InFNm.GetFExt().GetLc()==".ungraph") {
    TFIn FIn(InFNm);  G=TUNGraph::Load(FIn); }
  else if (InFNm.GetFExt().GetLc()==".ngraph") {
    TFIn FIn(InFNm);  G=TSnap::ConvertGraph<PUNGraph>(TNGraph::Load(FIn), false); }
  else {
    G = TSnap::LoadEdgeList<PUNGraph>(InFNm, 0, 1); }
  // find communities
  TVec<TIntV> CmtyV;
  TCliqueOverlap::GetCPMCommunities(G, OverlapSz+1, CmtyV);
  // save result
  FILE *F = fopen(TStr::Fmt("cpm-%s.txt", OutFNm.CStr()).CStr(), "wt");
  fprintf(F, "# %d Overlapping Clique Percolation Communities (min clique overlap %d)\n", CmtyV.Len(), OverlapSz);
  fprintf(F, "# Each line contains nodes belonging to the same community community\n");
  for (int i = 0; i < CmtyV.Len(); i++) {
    fprintf(F, "%d", CmtyV[i][0].Val);
    for (int j = 1; j < CmtyV[i].Len(); j++) {
      fprintf(F, "\t%d", CmtyV[i][j].Val);
    fprintf(F, "\n");
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
int main(int argc, char* argv[]) {
	Env = TEnv(argc, argv, TNotify::StdNotify);
	Env.PrepArgs(TStr::Fmt("agmgen. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
	TExeTm ExeTm;
	const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "DEMO", "Community affiliation data");
	const TStr OutFPrx = Env.GetIfArgPrefixStr("-o:", "agm", "out file name prefix");
	const int RndSeed = Env.GetIfArgPrefixInt("-rs:",10,"Rnd Seed");
	const double DensityCoef= Env.GetIfArgPrefixFlt("-a:",0.6,"Power-law Coefficient a of density (density ~ N^(-a)");
	const double ScaleCoef= Env.GetIfArgPrefixFlt("-c:",1.3,"Scaling Coefficient c of density (density ~ c");

	TRnd Rnd(RndSeed);
	TVec<TIntV> CmtyVV;
	if(InFNm=="DEMO") {
		TIntV NIdV;
		for(int i=0;i<25;i++) {
			TIntV& CmtyV = CmtyVV[0];
		for(int i=15;i<40;i++) {
			TIntV& CmtyV = CmtyVV[1];
	else {
		TVec<TIntV> CmtyVV;
	  TSsParser Ss(InFNm, ssfWhiteSep);
	  while (Ss.Next()) {
			if(Ss.GetFlds()>0) {
				TIntV CmtyV;
				for(int i=0;i<Ss.GetFlds();i++) {
		printf("community loading completed (%d communities)\n",CmtyVV.Len());
	PUNGraph AG = TAGM::GenAGM(CmtyVV,DensityCoef,ScaleCoef,Rnd);
	TSnap::SaveEdgeList(AG,OutFPrx + ".edgelist.txt");
	if(AG->GetNodes()<50) {
		TAGM::GVizComGraph(AG,CmtyVV,OutFPrx + ".graph.gif");
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
void PrintGraphStatTable(const PGraph& G, TStr OutFNm, TStr Desc="") {
  TFltPrV DegCCfV;
  int64 ClosedTriads, OpenTriads;
  int FullDiam;
  double EffDiam;
  TSnap::PrintInfo(G, OutFNm);
  TExeTm ExeTm; printf("C");
  const double CCF = TSnap::GetClustCf(G, DegCCfV, ClosedTriads, OpenTriads);
  printf("[%s]D", ExeTm.GetStr());
  TSnap::GetBfsEffDiam(G, 1000, false, EffDiam, FullDiam);
  printf("[%s]CC", ExeTm.GetStr());
  PGraph WCC = TSnap::GetMxWcc(G);
  PGraph SCC = TSnap::GetMxScc(G);
  printf("[%s]\n", ExeTm.GetStr());
  FILE* F = stdout;
  if (! OutFNm.Empty()) {
    F = fopen(TStr::Fmt("%s.html", OutFNm.CStr()).CStr(), "wt"); }
  fprintf(F, "\n");
  fprintf(F, "<table id=\"datatab\" summary=\"Dataset statistics\">\n");
  fprintf(F, "  <tr> <th colspan=\"2\">Dataset statistics</th> </tr>\n");
  fprintf(F, "  <tr><td>Nodes</td> <td>%d</td></tr>\n", G->GetNodes());
  fprintf(F, "  <tr><td>Edges</td> <td>%d</td></tr>\n", G->GetEdges());
  fprintf(F, "  <tr><td>Nodes in largest WCC</td> <td>%d (%.3f)</td></tr>\n", WCC->GetNodes(), WCC->GetNodes()/double(G->GetNodes()));
  fprintf(F, "  <tr><td>Edges in largest WCC</td> <td>%d (%.3f)</td></tr>\n", WCC->GetEdges(), WCC->GetEdges()/double(G->GetEdges()));
  fprintf(F, "  <tr><td>Nodes in largest SCC</td> <td>%d (%.3f)</td></tr>\n", SCC->GetNodes(), SCC->GetNodes()/double(G->GetNodes()));
  fprintf(F, "  <tr><td>Edges in largest SCC</td> <td>%d (%.3f)</td></tr>\n", SCC->GetEdges(), SCC->GetEdges()/double(G->GetEdges()));
  fprintf(F, "  <tr><td>Average clustering coefficient</td> <td>%.4f</td></tr>\n", CCF);
  fprintf(F, "  <tr><td>Number of triangles</td> <td>%s</td></tr>\n", TUInt64(ClosedTriads).GetStr().CStr());
  fprintf(F, "  <tr><td>Fraction of closed triangles</td> <td>%.4g</td></tr>\n", ClosedTriads/double(ClosedTriads+OpenTriads));
  fprintf(F, "  <tr><td>Diameter (longest shortest path)</td> <td>%d</td></tr>\n", FullDiam);
  fprintf(F, "  <tr><td>90-percentile effective diameter</td> <td>%.2g</td></tr>\n", EffDiam);
  fprintf(F, "</table>\n");
  fprintf(F, "<br>\n");
  if (! OutFNm.Empty()) {
    fprintf(F, "\n<table id=\"datatab\" summary=\"Table of datasets\">\n");
    fprintf(F, "<tr>\n");
	  fprintf(F, "  <th>File</th>\n");
	  fprintf(F, "  <th>Description</th>\n");
    fprintf(F, "</tr>\n");
    fprintf(F, "<tr>\n");
	  fprintf(F, "  <td><a href=\"%s.txt.gz\">%s.txt.gz</a></td>\n", OutFNm.CStr(), OutFNm.CStr());
	  fprintf(F, "  <td>%s</td>\n", Desc.CStr());
    fprintf(F, "</tr>\n");
    fprintf(F, "</table>\n");
    TSnap::SaveEdgeList(G, OutFNm+".txt", Desc);
TFfGGen::TStopReason TUndirFFire::AddNodes(const int& GraphNodes, const bool& FloodStop) {
	printf("\n***Undirected GEO ForestFire: graph(%d,%d) add %d nodes, burn prob %.3f\n",
		Graph->GetNodes(), Graph->GetEdges(), GraphNodes, BurnProb);
	TExeTm ExeTm;
	int Burned1 = 0, Burned2 = 0, Burned3 = 0; // last 3 fire sizes
	TIntPrV NodesEdgesV;
	// create initial set of nodes
	if (Graph.Empty()) { Graph = PUNGraph::New(); }
	if (Graph->GetNodes() == 0) { Graph->AddNode(); }
	int NEdges = Graph->GetEdges();
	// forest fire
	for (int NNodes = Graph->GetNodes() + 1; NNodes <= GraphNodes; NNodes++) {
		const int NewNId = Graph->AddNode(-1);
		IAssert(NewNId == Graph->GetNodes() - 1); // node ids have to be 0...N
		const int StartNId = Rnd.GetUniDevInt(NewNId);
		const int NBurned = BurnGeoFire(StartNId);
		// add edges to burned nodes
		for (int e = 0; e < NBurned; e++) {
			Graph->AddEdge(NewNId, GetBurnedNId(e));
		NEdges += NBurned;
		Burned1 = Burned2;  Burned2 = Burned3;  Burned3 = NBurned;
		if (NNodes % Kilo(1) == 0) {
			printf("(%d, %d)    burned: [%d,%d,%d]  [%s]\n", NNodes, NEdges, Burned1, Burned2, Burned3, ExeTm.GetStr());
			NodesEdgesV.Add(TIntPr(NNodes, NEdges));
		if (FloodStop && NEdges>1000 && NEdges / double(NNodes)>100.0) { // average node degree is more than 50
			printf("!!! FLOOD. G(%6d, %6d)\n", NNodes, NEdges);  return TFfGGen::srFlood;
	IAssert(Graph->GetEdges() == NEdges);
	return TFfGGen::srOk;
void TSubGraphsEnum::RecurBfs(const int& MxDepth) {
  TExeTm ExeTm;
  for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) {
    TSimpleGraph SimpleG;
    RecurBfs(NI.GetId(), MxDepth, SimpleG);
  printf("\ncandidates: %d\n", SgV.Len());
  int Cnt = 1;
  for (int i = 1; i < SgV.Len(); i++) {
    if (SgV[i-1] != SgV[i]) Cnt++;
  printf("distinct:   %d\t[%s]\n", Cnt, ExeTm.GetTmStr());
void TSubGraphsEnum::EnumSubGraphs(const int& MaxEdges) {
  TExeTm ExeTm;
  printf("  %2d edge graphs:  %d\t[%s]\n", 2, SgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
  //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt("  %d", i+1)); }
  TSimpleGraph SimpleG;
  TIntPrV& EdgeV = SimpleG.GetEdgeV();
  // multiple edge sub-graphs
  for (int edges = 3; edges <= MaxEdges; edges++) {
    printf("  %2d edge graphs:", edges);
    for (int g1 = 0; g1 < SgV.Len()-1; g1++) {
      for (int g2 = g1+1; g2 < SgV.Len(); g2++) {
        if (SimpleG.Join(SgV[g1], SgV[g2])) { NextSgV.Add(SimpleG); }
    printf("  candidates: %8d [%s]", NextSgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
    SgV.Gen(NextSgV.Len(), 0);
    for (int i = 1; i < NextSgV.Len(); i++) {
      if (SgV.Last() != NextSgV[i]) {
    printf("  total: %8d [%s]\n", SgV.Len(), ExeTm.GetTmStr());  ExeTm.Tick();
    //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt("  %d", i+1)); }
/// Rewire the network. Keeps node degrees as is but randomly rewires the edges.
/// Use this function to generate a random graph with the same degree sequence 
/// as the OrigGraph. 
/// See:  On the uniform generation of random graphs with prescribed degree
/// sequences by R. Milo, N. Kashtan, S. Itzkovitz, M. E. J. Newman, U. Alon
/// URL: http://arxiv.org/abs/cond-mat/0312028
PUNGraph GenRewire(const PUNGraph& OrigGraph, const int& NSwitch, TRnd& Rnd) {
  const int Nodes = OrigGraph->GetNodes();
  const int Edges = OrigGraph->GetEdges();
  PUNGraph GraphPt = TUNGraph::New();
  TUNGraph& Graph = *GraphPt;
  Graph.Reserve(Nodes, -1);
  TExeTm ExeTm;
  // generate a graph that satisfies the constraints
  printf("Randomizing edges (%d, %d)...\n", Nodes, Edges);
  TIntPrSet EdgeSet(Edges);
  for (TUNGraph::TNodeI NI = OrigGraph->BegNI(); NI < OrigGraph->EndNI(); NI++) {
    const int NId = NI.GetId();
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      if (NId <= NI.GetOutNId(e)) { continue; }
      EdgeSet.AddKey(TIntPr(NId, NI.GetOutNId(e)));
  // edge switching
  uint skip=0;
  for (uint swps = 0; swps < 2*uint(Edges)*uint(NSwitch); swps++) {
    const int keyId1 = EdgeSet.GetRndKeyId(Rnd);
    const int keyId2 = EdgeSet.GetRndKeyId(Rnd);
    if (keyId1 == keyId2) { skip++; continue; }
    const TIntPr& E1 = EdgeSet[keyId1];
    const TIntPr& E2 = EdgeSet[keyId2];
    TIntPr NewE1(E1.Val1, E2.Val1), NewE2(E1.Val2, E2.Val2);
    if (NewE1.Val1 > NewE1.Val2) { Swap(NewE1.Val1, NewE1.Val2); }
    if (NewE2.Val1 > NewE2.Val2) { Swap(NewE2.Val1, NewE2.Val2); }
    if (NewE1!=NewE2 && NewE1.Val1!=NewE1.Val2 && NewE2.Val1!=NewE2.Val2 && ! EdgeSet.IsKey(NewE1) && ! EdgeSet.IsKey(NewE2)) {
      EdgeSet.DelKeyId(keyId1);  EdgeSet.DelKeyId(keyId2);
    } else { skip++; }
    if (swps % Edges == 0) {
      printf("\r  %uk/%uk: %uk skip [%s]", swps/1000u, 2*uint(Edges)*uint(NSwitch)/1000u, skip/1000u, ExeTm.GetStr());
      if (ExeTm.GetSecs() > 2*3600) { printf(" *** Time limit!\n"); break; } // time limit 2 hours
  printf("\r  total %uk switchings attempted, %uk skiped  [%s]\n", 2*uint(Edges)*uint(NSwitch)/1000u, skip/1000u, ExeTm.GetStr());
  for (int e = 0; e < EdgeSet.Len(); e++) {
    Graph.AddEdge(EdgeSet[e].Val1, EdgeSet[e].Val2); }
  return GraphPt;
void em_multi(ExamMgr& ExM) {
	TExeTm tm;
	TFltV Alphas(ExM.CPU), ThVs[ExM.CPU];
	for (int i=0; i<ExM.CPU; i++) ThVs[i] = TFltV(ExM.W+1);
	std::vector<std::thread> threads;
	for (int i=0; i<ExM.CPU; i++) threads.emplace_back([i, &ExM, &Alphas, &ThVs] { em_sub(i, ExM, Alphas[i], ThVs[i]); });
	for(std::thread& t: threads) t.join();
	for (int n=1; n<ExM.CPU; n++) Alphas[0] += Alphas[n];
	Alphas[0] /= ExM.CPU;
	for (int i=0; i<=ExM.W; i++) {
		for (int n=1; n<ExM.CPU; n++) ThVs[0][i] += ThVs[n][i];
		ThVs[0][i] /= ExM.CPU;
	if (ExM.TrimTail) ExM.TrimTailNTh(ThVs[0], Alphas[0]);
	const TStr OFnm = ExM.GetBNTHFNm();
	BIO::SaveFltVWithIdx(ThVs[0], OFnm, TStr::Fmt("# Nodes: %d\n# Repeated: %d\n# Avg time cost: %.2f secs.\n# Alpha: %.6e",
			ExM.N, ExM.GetRpt(), tm.GetSecs()/ExM.GetRpt(), Alphas[0].Val));
	printf("Saved to %s\n", OFnm.CStr());
int main(int argc, char* argv[]) {
  setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?)
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Node centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (filename extensions added)");
  const TStr BseFNm = OutFNm.RightOfLast('/');
  const double eps = Env.GetIfArgPrefixFlt("-eps:", 1.0e-5, "minimum quality improvement threshold");
  const double min_moves = Env.GetIfArgPrefixFlt("-moves:", 1.0e-2, "minimum number of moves required (proportional)");
  const double max_iters = Env.GetIfArgPrefixFlt("-iters:", 1.0e+4, "maximum number of iterations");
  // Load graph and create directed and undirected graphs (pointer to the same memory)
  printf("\nLoading %s...", InFNm.CStr());
  PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm);
  printf(" DONE\n");
  printf("  nodes: %d\n", WGraph->GetNodes());
  printf("  edges: %d\n", WGraph->GetEdges());
  printf("  time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  // Declare variables
  // TODO
  // Louvain method (modularity objective)
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;