Example #1
0
File: gstat.cpp Project: Accio/snap
void TGStat::TakeSpectral(const PNGraph& Graph, TFSet StatFSet, int _TakeSngVals) {
  if (_TakeSngVals == -1) { _TakeSngVals = TakeSngVals; }
  // singular values, vectors
  if (StatFSet.In(gsdSngVal)) {
    const int SngVals = TMath::Mn(_TakeSngVals, Graph->GetNodes()/2);
    TFltV SngValV1;
    TSnap::GetSngVals(Graph, SngVals, SngValV1);
    SngValV1.Sort(false);
    TFltPrV& SngValV = DistrStatH.AddDat(gsdSngVal);
    SngValV.Gen(SngValV1.Len(), 0);
    for (int i = 0; i < SngValV1.Len(); i++) {
      SngValV.Add(TFltPr(i+1, SngValV1[i]));
    }
  }
  if (StatFSet.In(gsdSngVec)) {
    TFltV LeftV, RightV;
    TSnap::GetSngVec(Graph, LeftV, RightV);
    LeftV.Sort(false);
    TFltPrV& SngVec = DistrStatH.AddDat(gsdSngVec);
    SngVec.Gen(LeftV.Len(), 0);
    for (int i = 0; i < TMath::Mn(Kilo(10), LeftV.Len()/2); i++) {
      if (LeftV[i] > 0) { SngVec.Add(TFltPr(i+1, LeftV[i])); }
    }
  }
}
Example #2
0
void plotParitialDegDistribution(const PNGraph& graph, std::vector<int>& nodeList) {
	std::map<int, int> inDegDistMap;
	std::map<int, int> outDegDistMap;
	
	for (int i = 0; i < nodeList.size(); ++i) {
		int curNodeId = nodeList[i];
		if (!graph->IsNode(curNodeId)) continue;
		TNGraph::TNodeI ni = graph->GetNI(curNodeId);

		int curNodeInDeg = ni.GetInDeg();
		if (inDegDistMap.find(curNodeInDeg) == inDegDistMap.end()) {
			inDegDistMap.insert(std::pair<int, int>(curNodeInDeg, 0));
		}
		inDegDistMap[curNodeInDeg]++;

		int curNodeOutDeg = ni.GetOutDeg();
		if (outDegDistMap.find(curNodeOutDeg) == outDegDistMap.end()) {
			outDegDistMap.insert(std::pair<int, int>(curNodeOutDeg, 0));
		}
		outDegDistMap[curNodeOutDeg]++;
		
	}
	
	TFltPrV inDegDist;
	for (std::map<int, int>::iterator itr = inDegDistMap.begin(); itr != inDegDistMap.end(); itr++) {
		inDegDist.Add(TFltPr(itr->first, itr->second));
	}

	TFltPrV outDegDist;
	for (std::map<int, int>::iterator itr = outDegDistMap.begin(); itr != outDegDistMap.end(); itr++) {
		outDegDist.Add(TFltPr(itr->first, itr->second));
	}
	
	TGnuPlot plot1("inDegDistParitial", "");
	plot1.AddPlot(inDegDist, gpwPoints, "");
	plot1.SetScale(gpsLog10XY);
	plot1.SavePng();

	TGnuPlot plot2("outDegDistParitial", "");
	plot2.AddPlot(outDegDist, gpwPoints, "");
	plot2.SetScale(gpsLog10XY);
	plot2.SavePng();

	TGnuPlot plot3("DegDistParitial", "");
	plot3.AddCmd("set key right top");
	plot3.AddPlot(inDegDist, gpwPoints, "In Degree");
	plot3.AddPlot(outDegDist, gpwPoints, "Out Degree");
	plot3.SetScale(gpsLog10XY);
	plot3.SavePng();
}
Example #3
0
void getSampledDistance(const PNGraph& graph, std::vector<int> srcIds, std::vector<int> dstIds, int sampleSize, TFltPrV& ret) {
	std::random_shuffle(srcIds.begin(), srcIds.end());
	std::random_shuffle(dstIds.begin(), dstIds.end());

	int distance[20];
	for (int i = 0; i < 20; distance[i++] = 0);

	int sampleCount = 0;
	for (int i = 0; i < sampleSize; ) {
		int srcNodeId = srcIds[rand() % srcIds.size()];
		int dstNodeId = dstIds[rand() % dstIds.size()];

		if (!graph->IsNode(srcNodeId)) continue;
		if (!graph->IsNode(dstNodeId)) continue;
		int shortDist = TSnap::GetShortPath(graph, srcNodeId, dstNodeId, true);
		distance[shortDist]++;
		sampleCount++;
		printIntArray(distance, 20);
		++i;
	}

	for (int i = 0; i < 20; ++i) {
		ret.Add(TFltPr(i, distance[i]));
	}
}
Example #4
0
int TGnuPlot::AddLogFit(const int& PlotId, const TGpSeriesTy& SeriesTy, const TStr& Style) {
  const TGpSeries& Plot = SeriesV[PlotId];
  if(Plot.XYValV.Empty()) return -1;
  const TFltKdV& XY = Plot.XYValV;
  double A, B, R2, SigA, SigB, Chi2;
  // power fit
  TFltPrV XYPr;
  int s;
  for (s = 0; s < XY.Len(); s++) {
    if (XY[s].Key > 0) {
      XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat)); } //!!! skip zero values
  }
  TSpecFunc::LogFit(XYPr, A, B, SigA, SigB, Chi2, R2);
  TStr StyleStr=Style;
  if (StyleStr.Empty()) { StyleStr = "linewidth 3"; }
  const int FitId = AddFunc(TStr::Fmt("%f+%f*log(x)", A, B),
    SeriesTy, TStr::Fmt("%.4g + %.4g log(x)  R^2:%.2g", A, B, R2), StyleStr);
  return FitId;
  /*SeriesV.Add();
  TGpSeries& NewPlot = SeriesV.Last();
  TFltKdV& EstXY = NewPlot.XYValV;
  for (s = 0; s < XYPr.Len(); s++) {
    EstXY.Add(TFltKd(XYPr[s].Val1, A+B*log((double)XYPr[s].Val1)));
  }
  NewPlot.Label = TStr::Fmt("%.4g + %.4g log(x)  R^2:%.2g", A, B, R2);
  NewPlot.SeriesTy = SeriesTy;
  if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; }
  else { NewPlot.WithStyle = Style; }
  return SeriesV.Len() - 1;*/
}
Example #5
0
int TGnuPlot::AddExpFit(const int& PlotId, const TGpSeriesTy& SeriesTy, const double& FitXOffset, const TStr& Style) {
  const TGpSeries& Plot = SeriesV[PlotId];
  if(Plot.XYValV.Empty()) return -1;
  const TFltKdV& XY = Plot.XYValV;
  double A, B, R2, SigA, SigB, Chi2;
  // power fit
  TFltPrV XYPr;
  int s;
  for (s = 0; s < XY.Len(); s++) {
    if (XY[s].Key-FitXOffset > 0) {
      XYPr.Add(TFltPr(XY[s].Key-FitXOffset, XY[s].Dat)); } //!!! skip zero values
  }
  TSpecFunc::ExpFit(XYPr, A, B, SigA, SigB, Chi2, R2);
  TStr Label, StyleStr=Style;
  if (FitXOffset == 0) { Label = TStr::Fmt("%.4g exp(%.4g x)  R^2:%.2g", A, B, R2); }
  else { Label = TStr::Fmt("%.4g exp(%.4g x - %g)  R^2:%.2g", A, B, FitXOffset, R2); }
  if (StyleStr.Empty()) { StyleStr = "linewidth 3"; }
  const int FitId = AddFunc(TStr::Fmt("%f*exp(%f*x-%f)", A, B, FitXOffset),
    SeriesTy, Label, StyleStr);
  return FitId;
  /*SeriesV.Add();
  TGpSeries& NewPlot = SeriesV.Last();
  TFltKdV& EstXY = NewPlot.XYValV;
  for (s = 0; s < XYPr.Len(); s++) {
    EstXY.Add(TFltKd(XYPr[s].Val1+FitXOffset, A*exp(B*XYPr[s].Val1)));
  }
  NewPlot.SeriesTy = SeriesTy;
  if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; }
  else { NewPlot.WithStyle = Style; }
  return SeriesV.Len() - 1;*/
}
Example #6
0
// some kind of least squares power-law fitting that cutts the tail until the fit is good
int TGnuPlot::AddPwrFit3(const int& PlotId, const TGpSeriesTy& SeriesTy, const double& MinX, const TStr& Style, double& Intercept, double& Slope, double& R2) {
  if (PlotId < 0 || PlotId >= SeriesV.Len()) return -1;
  const TGpSeries& Plot = SeriesV[PlotId];
  if(Plot.XYValV.Empty()) return -1;
  double A, B, SigA, SigB, Chi2, MinY=TFlt::Mx;
  const TFltKdV& XY = Plot.XYValV;
  //SeriesV.Add();
  //TGpSeries& NewPlot = SeriesV.Last();
  //TFltKdV& EstXY = NewPlot.XYValV;
  TFltPrV FitXY, NewFitXY;
  for (int s = 0; s < XY.Len(); s++) {
    if (XY[s].Key > 0 && XY[s].Key >= MinX) {
      FitXY.Add(TFltPr(XY[s].Key, XY[s].Dat)); //!!! skip zero values
      MinY = TMath::Mn(MinY, XY[s].Dat());
    }
  }
  MinY = TMath::Mn(1.0, MinY);
  // power fit (if tail is too fat, cut everything where
  // extrapolation sets the value < MinY
  while (true) {
    TSpecFunc::PowerFit(FitXY, A, B, SigA, SigB, Chi2, R2);
    NewFitXY.Clr(false);
    //EstXY.Clr(false);
    for (int s = 0; s < FitXY.Len(); s++) {
      const double YVal = A*pow(FitXY[s].Val1(), B);
      if (YVal < MinY) continue;
      //EstXY.Add(TFltKd(FitXY[s].Val1, YVal));
      NewFitXY.Add(TFltPr(FitXY[s].Val1, FitXY[s].Val2));
    }
    if (NewFitXY.Len() < 10 || FitXY.Last().Val1 < 1.2 * NewFitXY.Last().Val1) { break; }
    else { FitXY.Swap(NewFitXY); }
  }
  TStr StyleStr=Style;
  if (StyleStr.Empty()) { StyleStr = "linewidth 3"; }
  const int FitId = AddFunc(TStr::Fmt("%f*x**%f", A, B),
    SeriesTy, TStr::Fmt("%.1g * x^{%.4g}  R^2:%.2g", A, B, R2), StyleStr);
  return FitId;
  /*NewPlot.Label = TStr::Fmt("%.1g * x^{%.4g}  R^2:%.2g", A, B, R2);
  Intercept = A;
  Slope = B;
  NewPlot.SeriesTy = SeriesTy;
  if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; }
  else { NewPlot.WithStyle = Style; }
  return SeriesV.Len() - 1;*/
}
Example #7
0
File: gstat.cpp Project: Accio/snap
void TGStatVec::GetValV(const TGStatVal& XVal, const TGStatVal& YVal, TFltPrV& ValV) const {
  ValV.Gen(Len(), 0);
  double x;
  for (int t = 0; t < Len(); t++) {
    if (XVal == gsvTime) { x = t+1; }
    else { x = At(t)->GetVal(XVal); }
    ValV.Add(TFltPr(x, At(t)->GetVal(YVal)));
  }
}
Example #8
0
// Inverse participation ratio: normalize EigVec to have L2=1 and then I=sum_k EigVec[i]^4
// see Spectra of "real-world" graphs: Beyond the semicircle law by Farkas, Derenyi, Barabasi and Vicsek
void PlotInvParticipRat(const PUNGraph& Graph, const int& MaxEigVecs, const int& TimeLimit, const TStr& FNmPref, TStr DescStr) {
  TFltPrV EigIprV;
  GetInvParticipRat(Graph, MaxEigVecs, TimeLimit, EigIprV);
  if (DescStr.Empty()) { DescStr = FNmPref; }
  if (EigIprV.Empty()) { DescStr+=". FAIL"; EigIprV.Add(TFltPr(-1,-1)); return; }
  TGnuPlot::PlotValV(EigIprV, "eigIPR."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest eig val = %f (%d values)",
    DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), EigIprV.Last().Val1(), EigIprV.Len()),
    "Eigenvalue", "Inverse Participation Ratio of corresponding Eigenvector", gpsLog10Y, false, gpwPoints);
}
Example #9
0
void TGnuPlot::MakeExpBins(const TFltPrV& XYValV, TFltPrV& ExpXYValV, const double& BinFactor, const double& MinYVal) {
  TFltKdV KdV(XYValV.Len(), 0), OutV;
  for (int i = 0; i < XYValV.Len(); i++) {
    KdV.Add(TFltKd(XYValV[i].Val1, XYValV[i].Val2)); }
  KdV.Sort();
  TGnuPlot::MakeExpBins(KdV, OutV, BinFactor, MinYVal);
  ExpXYValV.Gen(OutV.Len(), 0);
  for (int i = 0; i < OutV.Len(); i++) {
    ExpXYValV.Add(TFltPr(OutV[i].Key, OutV[i].Dat)); }
}
Example #10
0
void TFfGGen::PlotFireSize(const TStr& FNmPref, const TStr& DescStr) {
  TGnuPlot GnuPlot("fs."+FNmPref, TStr::Fmt("%s. Fire size. G(%d, %d)",
    DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges()));
  GnuPlot.SetXYLabel("Vertex id (iterations)", "Fire size (node out-degree)");
  TFltPrV IdToOutDegV;
  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    IdToOutDegV.Add(TFltPr(NI.GetId(), NI.GetOutDeg())); }
  IdToOutDegV.Sort();
  GnuPlot.AddPlot(IdToOutDegV, gpwImpulses, "Node out-degree");
  GnuPlot.SavePng();
}
Example #11
0
void TGnuPlot::Test() {
  TFltV DeltaY;
  TFltPrV ValV1, ValV2, ValV3;
  for (int i = 1; i < 30; i++) {
    ValV1.Add(TFltPr(i, pow(double(i), 1.2)));
    DeltaY.Add(5*TInt::Rnd.GetUniDev());
    ValV2.Add(TFltPr(i, 5*i-1));
  }
  for (int i = -10; i < 20; i++) {
    ValV3.Add(TFltPr(i, 2*i + 2 + TInt::Rnd.GetUniDev()));
  }
  TGnuPlot GnuPlot("testDat", "TestPlot", true);
  GnuPlot.SetXYLabel("X", "Y");
  const int id2 = GnuPlot.AddPlot(ValV2, gpwPoints, "y=5*x-1");
  const int id3 = GnuPlot.AddPlot(ValV3, gpwPoints, "y=2*x+2");
  GnuPlot.AddErrBar(ValV1, DeltaY, "y=x^2", "Error bar");
  GnuPlot.AddLinFit(id2, gpwLines);
  GnuPlot.AddLinFit(id3, gpwLines);
  GnuPlot.Plot();
  GnuPlot.SavePng("testPlot.png");
}
Example #12
0
void plotpaths(char* fileName, TFltPrV& ret) {
	int distance[10000];
	for (int i = 0; i < 10000; distance[i++] = 0);

	int lineCount = 1;
	std::ifstream inputFile(fileName);
	for (std::string line; std::getline(inputFile, line);) {
		std::istringstream isss(line);
		int a, c;
		double b, d;
		isss >> a;
		ret.Add(TFltPr(lineCount++, a));
	}
}
Example #13
0
// MLE power-coefficient
int TGnuPlot::AddPwrFit2(const int& PlotId, const TGpSeriesTy& SeriesTy, const double& MinX, const TStr& Style) {
  const TGpSeries& Plot = SeriesV[PlotId];
  if(Plot.XYValV.Empty()) return -1;
  const TFltKdV& XY = Plot.XYValV;
  // power fit
  TFltPrV XYPr;
  double MinY = TFlt::Mx;
  for (int s = 0; s < XY.Len(); s++) {
    if (XY[s].Key > 0.0) {
      XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat));
      MinY = TMath::Mn(MinY, XY[s].Dat());
    }
  }
  if (XYPr.Empty()) return -1;
  MinY = TMath::Mn(1.0, MinY);
  // determine the sign of power coefficient
  double CoefSign = 0.0;
  { double A, B, R2, SigA, SigB, Chi2;
  TSpecFunc::PowerFit(XYPr, A, B, SigA, SigB, Chi2, R2);
  CoefSign = B > 0.0 ? +1.0 : -1.0; }
  const double PowerCf = CoefSign * TSpecFunc::GetPowerCoef(XYPr, MinX);
  int Mid = (int) exp(log((double)XYPr.Len())/2.0);
  if (Mid >= XYPr.Len()) { Mid = XYPr.Len()-1; }
  const double MidX = XYPr[Mid].Val1();
  const double MidY = XYPr[Mid].Val2();
  const double B = MidY / pow(MidX, PowerCf);
  TStr StyleStr=Style;
  if (StyleStr.Empty()) { StyleStr = "linewidth 3"; }
  const int FitId = AddFunc(TStr::Fmt("%f*x**%f", B, PowerCf),
    SeriesTy, TStr::Fmt("MLE = x^{%.4g}", PowerCf), StyleStr);
  return FitId;
  /*SeriesV.Add();
  TGpSeries& NewPlot = SeriesV.Last();
  TFltKdV& XYFit = NewPlot.XYValV;
  XYFit.Gen(XYPr.Len(), 0);
  for (int s = 0; s < XYPr.Len(); s++) {
    const double XVal = XYPr[s].Val1;
    const double YVal = B * pow(XYPr[s].Val1(), PowerCf);
    if (YVal < MinY || XVal < MinX) continue;
    XYFit.Add(TFltKd(XVal, YVal));
  }
  NewPlot.Label = TStr::Fmt("PowerFit: %g", PowerCf);
  NewPlot.SeriesTy = SeriesTy;
  if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; }
  else { NewPlot.WithStyle = Style; }
  return SeriesV.Len() - 1;*/
}
TFltPrV mygetCCDFYAxis(double* arr1, int leng1, int min, int max)
{
	int i;
	double x,y;
	TFltPrV points1;
	sort(arr1,arr1+leng1);
	for(i=0;i<leng1;i++)
	{
		x = arr1[i];
		if(x>=min && x<=max)
		{
			y = 1.0 - (1.0/leng1)*i;
			points1.Add(TFltPr(x,y));
		}
	}
	return points1;
}
Example #15
0
// linear fit on log-log scales{%
int TGnuPlot::AddPwrFit1(const int& PlotId, const TGpSeriesTy& SeriesTy, const TStr& Style) {
  if (PlotId < 0 || PlotId >= SeriesV.Len()) return -1;
  const TGpSeries& Plot = SeriesV[PlotId];
  if(Plot.XYValV.Empty()) return -1;
  const TFltKdV& XY = Plot.XYValV;
  double A, B, R2, SigA, SigB, Chi2, MinY = TFlt::Mx, MinX = TFlt::Mx;
  // power fit
  TFltPrV XYPr;
  int s;
  for (s = 0; s < XY.Len(); s++) {
    if (XY[s].Key > 0) {
      XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat)); //!!! skip zero values
      MinX = TMath::Mn(MinX, XY[s].Key());
      MinY = TMath::Mn(MinY, XY[s].Dat());
    }
  }
  MinY = TMath::Mn(1.0, MinY);
  TSpecFunc::PowerFit(XYPr, A, B, SigA, SigB, Chi2, R2);
  TStr StyleStr=Style;
  if (StyleStr.Empty()) { StyleStr = "linewidth 3"; }
  const int FitId = AddFunc(TStr::Fmt("%f*x**%f", A, B),
    SeriesTy, TStr::Fmt("%.1g * x^{%.4g}  R^2:%.2g", A, B, R2), StyleStr);
  return FitId;
  /*SeriesV.Add();
  TGpSeries& NewPlot = SeriesV.Last();
  const int FitId = SeriesV.Len() - 1;
  NewPlot.DataFNm = ;
  TFltKdV& EstXY = NewPlot.XYValV;
  for (s = 0; s < XYPr.Len(); s++) {
    const double YVal = A*pow(XYPr[s].Val1(), B);
    if (YVal < MinY) continue;
    EstXY.Add(TFltKd(XYPr[s].Val1, YVal));
  }
  NewPlot.Label = ;
  NewPlot.SeriesTy = SeriesTy;
  if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; }
  else { NewPlot.WithStyle = Style; }
  //if (MinX < 5.0) MinX = 5.0;
  //AddPwrFit2(PlotId, SeriesTy, MinX);*/
}
Example #16
0
void GetInvParticipRat(const PUNGraph& Graph, int MaxEigVecs, int TimeLimit, TFltPrV& EigValIprV) {
  TUNGraphMtx GraphMtx(Graph);
  TFltVV EigVecVV;
  TFltV EigValV;
  TExeTm ExeTm;
  if (MaxEigVecs<=1) { MaxEigVecs=1000; }
  int EigVecs = TMath::Mn(Graph->GetNodes(), MaxEigVecs);
  printf("start %d vecs...", EigVecs);
  try {
    TSparseSVD::Lanczos2(GraphMtx, EigVecs, TimeLimit, ssotFull, EigValV, EigVecVV, false);
  } catch(...) {
    printf("\n  ***EXCEPTION:  TRIED %d GOT %d values** \n", EigVecs, EigValV.Len()); }
  printf("  ***TRIED %d GOT %d values in %s\n", EigVecs, EigValV.Len(), ExeTm.GetStr());
  TFltV EigVec;
  EigValIprV.Clr();
  if (EigValV.Empty()) { return; }
  for (int v = 0; v < EigVecVV.GetCols(); v++) {
    EigVecVV.GetCol(v, EigVec);
    EigValIprV.Add(TFltPr(EigValV[v], GetInvParticipRat(EigVec)));
  }
  EigValIprV.Sort();
}
Example #17
0
void plotPR(char* fileName, TFltPrV& ret) {
	int distance[10000];
	for (int i = 0; i < 10000; distance[i++] = 0);

	std::ifstream inputFile(fileName);
	for (std::string line; std::getline(inputFile, line);) {
		std::istringstream isss(line);
		int a, c;
		double b, d;
		isss >> a >> b >> c >> d;

		int val = (int)(d * 1000);
		val -= (val % 100);
		if (val >= 10000) continue;
		//double idd = std::stold(line);
		printf("%d\n", val);
		distance[val]++;
	}

	for (int i = 0; i < 10000; ++i) {
		if (distance[i] == 0) continue;
		ret.Add(TFltPr(i, distance[i]));
	}
}
Example #18
0
/// estimate number of communities using AGM
int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) {
    TRnd Rnd(RndSeed);
    int LambdaIter = 100;
    if (Graph->GetNodes() < 200) {
        LambdaIter = 1;
    }
    if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) {
        LambdaIter = 100;
    }

    //Find coms with large C
    TAGMFit AGMFitM(Graph, InitComs, RndSeed);
    if (PNoCom > 0.0) {
        AGMFitM.SetPNoCom(PNoCom);
    }
    AGMFitM.RunMCMC(MaxIter, LambdaIter, "");

    int TE = Graph->GetEdges();
    TFltV RegV;
    RegV.Add(0.3 * TE);
    for (int r = 0; r < 25; r++) {
        RegV.Add(RegV.Last() * RegGap);
    }
    TFltPrV RegComsV, RegLV, RegBICV;
    TFltV LV, BICV;
    //record likelihood and number of communities with nonzero P_c
    for (int r = 0; r < RegV.Len(); r++) {
        double RegCoef = RegV[r];
        AGMFitM.SetRegCoef(RegCoef);
        AGMFitM.MLEGradAscentGivenCAG(0.01, 1000);
        AGMFitM.SetRegCoef(0.0);

        TVec<TIntV> EstCmtyVV;
        AGMFitM.GetCmtyVV(EstCmtyVV, 0.99);
        int NumLowQ = EstCmtyVV.Len();
        RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ));

        if (EstCmtyVV.Len() > 0) {
            TAGMFit AFTemp(Graph, EstCmtyVV, Rnd);
            AFTemp.MLEGradAscentGivenCAG(0.001, 1000);
            double CurL = AFTemp.Likelihood();
            LV.Add(CurL);
            BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0));
        }
        else {
            break;
        }
    }
    // if likelihood does not exist or does not change at all, report the smallest number of communities or 2
    if (LV.Len() == 0) {
        return 2;
    }
    else if (LV[0] == LV.Last()) {
        return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2);
    }


    //normalize likelihood and BIC to 0~100
    int MaxL = 100;
    {
        TFltV& ValueV = LV;
        TFltPrV& RegValueV = RegLV;
        double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
        for (int l = 0; l < ValueV.Len(); l++) {
            if (ValueV[l] < MinValue) {
                MinValue = ValueV[l];
            }
            if (ValueV[l] > MaxValue) {
                MaxValue = ValueV[l];
            }
        }
        while (ValueV.Len() < RegV.Len()) {
            ValueV.Add(MinValue);
        }
        double RangeVal = MaxValue - MinValue;
        for (int l = 0; l < ValueV.Len(); l++) {
            RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
        }

    }
    {
        TFltV& ValueV = BICV;
        TFltPrV& RegValueV = RegBICV;
        double MinValue = TFlt::Mx, MaxValue = TFlt::Mn;
        for (int l = 0; l < ValueV.Len(); l++) {
            if (ValueV[l] < MinValue) {
                MinValue = ValueV[l];
            }
            if (ValueV[l] > MaxValue) {
                MaxValue = ValueV[l];
            }
        }
        while (ValueV.Len() < RegV.Len()) {
            ValueV.Add(MaxValue);
        }
        double RangeVal = MaxValue - MinValue;
        for (int l = 0; l < ValueV.Len(); l++) {
            RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal));
        }
    }

    //fit logistic regression to normalized likelihood.
    TVec<TFltV> XV(RegLV.Len());
    TFltV YV (RegLV.Len());
    for (int l = 0; l < RegLV.Len(); l++) {
        XV[l] = TFltV::GetV(log(RegLV[l].Val1));
        YV[l] = RegLV[l].Val2 / (double) MaxL;
    }
    TFltPrV LRVScaled, LRV;
    TLogRegFit LRFit;
    PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx);
    for (int l = 0; l < RegLV.Len(); l++) {
        LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l])));
        LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2));
    }

    //estimate # communities from fitted logistic regression
    int NumComs = 0, IdxRegDrop = 0;
    double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25
    double LeftReg = 0.0, RightReg = 0.0;
    TFltV Theta;
    LRMd->GetTheta(Theta);
    RegDrop = (- Theta[1] - LRThres) / Theta[0];
    if (RegDrop <= XV[0][0]) {
        NumComs = (int) RegComsV[0].Val2;
    }
    else if (RegDrop >= XV.Last()[0]) {
        NumComs = (int) RegComsV.Last().Val2;
    }
    else {  //interpolate for RegDrop
        for (int i = 0; i < XV.Len(); i++) {
            if (XV[i][0] > RegDrop) {
                IdxRegDrop = i;
                break;
            }
        }

        if (IdxRegDrop == 0) {
            printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val);
            for (int l = 0; l < RegLV.Len(); l++) {
                printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val);
            }
        }
        IAssert(IdxRegDrop > 0);
        LeftReg = RegDrop - XV[IdxRegDrop - 1][0];
        RightReg = XV[IdxRegDrop][0] - RegDrop;
        NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg));

    }
    //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2);
    printf("Num Coms:%d\n", NumComs);
    if (NumComs < 2) {
        NumComs = 2;
    }

    if (PltFPrx.Len() > 0) {
        TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE);
        TGnuPlot GPC(PltFPrx + ".l");
        GPC.AddPlot(RegComsV, gpwLinesPoints, "C");
        GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood");
        GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC");
        GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)");
        GPC.SetScale(gpsLog10X);
        GPC.SetTitle(PlotTitle);
        GPC.SavePng(PltFPrx + ".l.png");
    }

    return NumComs;
}
Example #19
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("\nNETINF. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  const TStr InFNm  = Env.GetIfArgPrefixStr("-i:", "example-cascades.txt", "Input cascades (one file)");
  const TStr GroundTruthFNm = Env.GetIfArgPrefixStr("-n:", "example-network.txt", "Input ground-truth network (one file)");
  const TStr OutFNm  = Env.GetIfArgPrefixStr("-o:", "network", "Output file name(s) prefix");
  const TStr Iters  = Env.GetIfArgPrefixStr("-e:", "5", "Number of iterations");
  const double alpha = Env.GetIfArgPrefixFlt("-a:", 1.0, "Alpha for transmission model");
  const int Model = Env.GetIfArgPrefixInt("-m:", 0, "0:exponential, 1:power law, 2:rayleigh");
  const int Top =Env.GetIfArgPrefixInt("-t:", 10, "select top k as friends");
  const int TakeAdditional = Env.GetIfArgPrefixInt("-s:", 1, "How much additional files to create?\n\
    1:info about each edge, 2:objective function value (+upper bound), 3:Precision-recall plot, 4:all-additional-files (default:1)\n");

  bool ComputeBound = false, ComputeInfo = false; bool CompareGroundTruth = false;
  switch (TakeAdditional) {
     case 1 : ComputeInfo = true; break;
     case 2 : ComputeBound = true; break;
     case 3 : CompareGroundTruth = true; break;
     case 4 :
    	 ComputeInfo = true;
    	 // ComputeBound = true;
    	 CompareGroundTruth = true; break;
     default: FailR("Bad -s: parameter.");
  }

  TNetInfBs NIB(ComputeBound, CompareGroundTruth, Top);
  printf("\nLoading input cascades: %s\n", InFNm.CStr());

  // load cascade from file
  TFIn FIn(InFNm);
  NIB.LoadCascadesTxt(FIn, Model, alpha);

  // load ground truth network
  if (CompareGroundTruth) {
	  TFIn FInG(GroundTruthFNm);
	  NIB.LoadGroundTruthTxt(FInG);
  }

  NIB.Init();
  printf("cascades:%d nodes:%d potential edges:%d\nRunning NETINF...\n", NIB.GetCascs(), NIB.GetNodes(), NIB.CascPerEdge.Len());
  NIB.GreedyOpt(Iters.GetInt());

  // plot showing precision/recall using groundtruth
  if (CompareGroundTruth)
	  TGnuPlot::PlotValV(NIB.PrecisionRecall, TStr::Fmt("%s-precision-recall", OutFNm.CStr()), "Precision Recall", "Recall",
						 "Precision", gpsAuto, false, gpwLinesPoints, false);

  // plot objective function
  if (ComputeBound) {
	  TFltPrV Gains;
	  for (int i=0; i<NIB.EdgeInfoH.Len(); i++)
		  Gains.Add(TFltPr((double)(i+1), NIB.EdgeInfoH[i].MarginalGain));

	  TGnuPlot::PlotValV(Gains, TStr::Fmt("%s-objective", OutFNm.CStr()), "Objective Function", "Iters", "Objective Function");
  }

  // save network in plain text
  NIB.SavePlaneTextNet(TStr::Fmt("%s.txt", OutFNm.CStr()));

  // save edge info
  if (ComputeInfo)
	  NIB.SaveEdgeInfo(TStr::Fmt("%s-edge.info", OutFNm.CStr()));

  // save obj+bound info
  if (ComputeBound)
	  NIB.SaveObjInfo(TStr::Fmt("%s-obj", OutFNm.CStr()));

  Catch
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
}