void TGStat::TakeSpectral(const PNGraph& Graph, TFSet StatFSet, int _TakeSngVals) { if (_TakeSngVals == -1) { _TakeSngVals = TakeSngVals; } // singular values, vectors if (StatFSet.In(gsdSngVal)) { const int SngVals = TMath::Mn(_TakeSngVals, Graph->GetNodes()/2); TFltV SngValV1; TSnap::GetSngVals(Graph, SngVals, SngValV1); SngValV1.Sort(false); TFltPrV& SngValV = DistrStatH.AddDat(gsdSngVal); SngValV.Gen(SngValV1.Len(), 0); for (int i = 0; i < SngValV1.Len(); i++) { SngValV.Add(TFltPr(i+1, SngValV1[i])); } } if (StatFSet.In(gsdSngVec)) { TFltV LeftV, RightV; TSnap::GetSngVec(Graph, LeftV, RightV); LeftV.Sort(false); TFltPrV& SngVec = DistrStatH.AddDat(gsdSngVec); SngVec.Gen(LeftV.Len(), 0); for (int i = 0; i < TMath::Mn(Kilo(10), LeftV.Len()/2); i++) { if (LeftV[i] > 0) { SngVec.Add(TFltPr(i+1, LeftV[i])); } } } }
void plotParitialDegDistribution(const PNGraph& graph, std::vector<int>& nodeList) { std::map<int, int> inDegDistMap; std::map<int, int> outDegDistMap; for (int i = 0; i < nodeList.size(); ++i) { int curNodeId = nodeList[i]; if (!graph->IsNode(curNodeId)) continue; TNGraph::TNodeI ni = graph->GetNI(curNodeId); int curNodeInDeg = ni.GetInDeg(); if (inDegDistMap.find(curNodeInDeg) == inDegDistMap.end()) { inDegDistMap.insert(std::pair<int, int>(curNodeInDeg, 0)); } inDegDistMap[curNodeInDeg]++; int curNodeOutDeg = ni.GetOutDeg(); if (outDegDistMap.find(curNodeOutDeg) == outDegDistMap.end()) { outDegDistMap.insert(std::pair<int, int>(curNodeOutDeg, 0)); } outDegDistMap[curNodeOutDeg]++; } TFltPrV inDegDist; for (std::map<int, int>::iterator itr = inDegDistMap.begin(); itr != inDegDistMap.end(); itr++) { inDegDist.Add(TFltPr(itr->first, itr->second)); } TFltPrV outDegDist; for (std::map<int, int>::iterator itr = outDegDistMap.begin(); itr != outDegDistMap.end(); itr++) { outDegDist.Add(TFltPr(itr->first, itr->second)); } TGnuPlot plot1("inDegDistParitial", ""); plot1.AddPlot(inDegDist, gpwPoints, ""); plot1.SetScale(gpsLog10XY); plot1.SavePng(); TGnuPlot plot2("outDegDistParitial", ""); plot2.AddPlot(outDegDist, gpwPoints, ""); plot2.SetScale(gpsLog10XY); plot2.SavePng(); TGnuPlot plot3("DegDistParitial", ""); plot3.AddCmd("set key right top"); plot3.AddPlot(inDegDist, gpwPoints, "In Degree"); plot3.AddPlot(outDegDist, gpwPoints, "Out Degree"); plot3.SetScale(gpsLog10XY); plot3.SavePng(); }
void getSampledDistance(const PNGraph& graph, std::vector<int> srcIds, std::vector<int> dstIds, int sampleSize, TFltPrV& ret) { std::random_shuffle(srcIds.begin(), srcIds.end()); std::random_shuffle(dstIds.begin(), dstIds.end()); int distance[20]; for (int i = 0; i < 20; distance[i++] = 0); int sampleCount = 0; for (int i = 0; i < sampleSize; ) { int srcNodeId = srcIds[rand() % srcIds.size()]; int dstNodeId = dstIds[rand() % dstIds.size()]; if (!graph->IsNode(srcNodeId)) continue; if (!graph->IsNode(dstNodeId)) continue; int shortDist = TSnap::GetShortPath(graph, srcNodeId, dstNodeId, true); distance[shortDist]++; sampleCount++; printIntArray(distance, 20); ++i; } for (int i = 0; i < 20; ++i) { ret.Add(TFltPr(i, distance[i])); } }
int TGnuPlot::AddLogFit(const int& PlotId, const TGpSeriesTy& SeriesTy, const TStr& Style) { const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; const TFltKdV& XY = Plot.XYValV; double A, B, R2, SigA, SigB, Chi2; // power fit TFltPrV XYPr; int s; for (s = 0; s < XY.Len(); s++) { if (XY[s].Key > 0) { XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat)); } //!!! skip zero values } TSpecFunc::LogFit(XYPr, A, B, SigA, SigB, Chi2, R2); TStr StyleStr=Style; if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f+%f*log(x)", A, B), SeriesTy, TStr::Fmt("%.4g + %.4g log(x) R^2:%.2g", A, B, R2), StyleStr); return FitId; /*SeriesV.Add(); TGpSeries& NewPlot = SeriesV.Last(); TFltKdV& EstXY = NewPlot.XYValV; for (s = 0; s < XYPr.Len(); s++) { EstXY.Add(TFltKd(XYPr[s].Val1, A+B*log((double)XYPr[s].Val1))); } NewPlot.Label = TStr::Fmt("%.4g + %.4g log(x) R^2:%.2g", A, B, R2); NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } return SeriesV.Len() - 1;*/ }
int TGnuPlot::AddExpFit(const int& PlotId, const TGpSeriesTy& SeriesTy, const double& FitXOffset, const TStr& Style) { const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; const TFltKdV& XY = Plot.XYValV; double A, B, R2, SigA, SigB, Chi2; // power fit TFltPrV XYPr; int s; for (s = 0; s < XY.Len(); s++) { if (XY[s].Key-FitXOffset > 0) { XYPr.Add(TFltPr(XY[s].Key-FitXOffset, XY[s].Dat)); } //!!! skip zero values } TSpecFunc::ExpFit(XYPr, A, B, SigA, SigB, Chi2, R2); TStr Label, StyleStr=Style; if (FitXOffset == 0) { Label = TStr::Fmt("%.4g exp(%.4g x) R^2:%.2g", A, B, R2); } else { Label = TStr::Fmt("%.4g exp(%.4g x - %g) R^2:%.2g", A, B, FitXOffset, R2); } if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f*exp(%f*x-%f)", A, B, FitXOffset), SeriesTy, Label, StyleStr); return FitId; /*SeriesV.Add(); TGpSeries& NewPlot = SeriesV.Last(); TFltKdV& EstXY = NewPlot.XYValV; for (s = 0; s < XYPr.Len(); s++) { EstXY.Add(TFltKd(XYPr[s].Val1+FitXOffset, A*exp(B*XYPr[s].Val1))); } NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } return SeriesV.Len() - 1;*/ }
// some kind of least squares power-law fitting that cutts the tail until the fit is good int TGnuPlot::AddPwrFit3(const int& PlotId, const TGpSeriesTy& SeriesTy, const double& MinX, const TStr& Style, double& Intercept, double& Slope, double& R2) { if (PlotId < 0 || PlotId >= SeriesV.Len()) return -1; const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; double A, B, SigA, SigB, Chi2, MinY=TFlt::Mx; const TFltKdV& XY = Plot.XYValV; //SeriesV.Add(); //TGpSeries& NewPlot = SeriesV.Last(); //TFltKdV& EstXY = NewPlot.XYValV; TFltPrV FitXY, NewFitXY; for (int s = 0; s < XY.Len(); s++) { if (XY[s].Key > 0 && XY[s].Key >= MinX) { FitXY.Add(TFltPr(XY[s].Key, XY[s].Dat)); //!!! skip zero values MinY = TMath::Mn(MinY, XY[s].Dat()); } } MinY = TMath::Mn(1.0, MinY); // power fit (if tail is too fat, cut everything where // extrapolation sets the value < MinY while (true) { TSpecFunc::PowerFit(FitXY, A, B, SigA, SigB, Chi2, R2); NewFitXY.Clr(false); //EstXY.Clr(false); for (int s = 0; s < FitXY.Len(); s++) { const double YVal = A*pow(FitXY[s].Val1(), B); if (YVal < MinY) continue; //EstXY.Add(TFltKd(FitXY[s].Val1, YVal)); NewFitXY.Add(TFltPr(FitXY[s].Val1, FitXY[s].Val2)); } if (NewFitXY.Len() < 10 || FitXY.Last().Val1 < 1.2 * NewFitXY.Last().Val1) { break; } else { FitXY.Swap(NewFitXY); } } TStr StyleStr=Style; if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f*x**%f", A, B), SeriesTy, TStr::Fmt("%.1g * x^{%.4g} R^2:%.2g", A, B, R2), StyleStr); return FitId; /*NewPlot.Label = TStr::Fmt("%.1g * x^{%.4g} R^2:%.2g", A, B, R2); Intercept = A; Slope = B; NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } return SeriesV.Len() - 1;*/ }
void TGStatVec::GetValV(const TGStatVal& XVal, const TGStatVal& YVal, TFltPrV& ValV) const { ValV.Gen(Len(), 0); double x; for (int t = 0; t < Len(); t++) { if (XVal == gsvTime) { x = t+1; } else { x = At(t)->GetVal(XVal); } ValV.Add(TFltPr(x, At(t)->GetVal(YVal))); } }
// Inverse participation ratio: normalize EigVec to have L2=1 and then I=sum_k EigVec[i]^4 // see Spectra of "real-world" graphs: Beyond the semicircle law by Farkas, Derenyi, Barabasi and Vicsek void PlotInvParticipRat(const PUNGraph& Graph, const int& MaxEigVecs, const int& TimeLimit, const TStr& FNmPref, TStr DescStr) { TFltPrV EigIprV; GetInvParticipRat(Graph, MaxEigVecs, TimeLimit, EigIprV); if (DescStr.Empty()) { DescStr = FNmPref; } if (EigIprV.Empty()) { DescStr+=". FAIL"; EigIprV.Add(TFltPr(-1,-1)); return; } TGnuPlot::PlotValV(EigIprV, "eigIPR."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest eig val = %f (%d values)", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), EigIprV.Last().Val1(), EigIprV.Len()), "Eigenvalue", "Inverse Participation Ratio of corresponding Eigenvector", gpsLog10Y, false, gpwPoints); }
void TGnuPlot::MakeExpBins(const TFltPrV& XYValV, TFltPrV& ExpXYValV, const double& BinFactor, const double& MinYVal) { TFltKdV KdV(XYValV.Len(), 0), OutV; for (int i = 0; i < XYValV.Len(); i++) { KdV.Add(TFltKd(XYValV[i].Val1, XYValV[i].Val2)); } KdV.Sort(); TGnuPlot::MakeExpBins(KdV, OutV, BinFactor, MinYVal); ExpXYValV.Gen(OutV.Len(), 0); for (int i = 0; i < OutV.Len(); i++) { ExpXYValV.Add(TFltPr(OutV[i].Key, OutV[i].Dat)); } }
void TFfGGen::PlotFireSize(const TStr& FNmPref, const TStr& DescStr) { TGnuPlot GnuPlot("fs."+FNmPref, TStr::Fmt("%s. Fire size. G(%d, %d)", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges())); GnuPlot.SetXYLabel("Vertex id (iterations)", "Fire size (node out-degree)"); TFltPrV IdToOutDegV; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IdToOutDegV.Add(TFltPr(NI.GetId(), NI.GetOutDeg())); } IdToOutDegV.Sort(); GnuPlot.AddPlot(IdToOutDegV, gpwImpulses, "Node out-degree"); GnuPlot.SavePng(); }
void TGnuPlot::Test() { TFltV DeltaY; TFltPrV ValV1, ValV2, ValV3; for (int i = 1; i < 30; i++) { ValV1.Add(TFltPr(i, pow(double(i), 1.2))); DeltaY.Add(5*TInt::Rnd.GetUniDev()); ValV2.Add(TFltPr(i, 5*i-1)); } for (int i = -10; i < 20; i++) { ValV3.Add(TFltPr(i, 2*i + 2 + TInt::Rnd.GetUniDev())); } TGnuPlot GnuPlot("testDat", "TestPlot", true); GnuPlot.SetXYLabel("X", "Y"); const int id2 = GnuPlot.AddPlot(ValV2, gpwPoints, "y=5*x-1"); const int id3 = GnuPlot.AddPlot(ValV3, gpwPoints, "y=2*x+2"); GnuPlot.AddErrBar(ValV1, DeltaY, "y=x^2", "Error bar"); GnuPlot.AddLinFit(id2, gpwLines); GnuPlot.AddLinFit(id3, gpwLines); GnuPlot.Plot(); GnuPlot.SavePng("testPlot.png"); }
void plotpaths(char* fileName, TFltPrV& ret) { int distance[10000]; for (int i = 0; i < 10000; distance[i++] = 0); int lineCount = 1; std::ifstream inputFile(fileName); for (std::string line; std::getline(inputFile, line);) { std::istringstream isss(line); int a, c; double b, d; isss >> a; ret.Add(TFltPr(lineCount++, a)); } }
// MLE power-coefficient int TGnuPlot::AddPwrFit2(const int& PlotId, const TGpSeriesTy& SeriesTy, const double& MinX, const TStr& Style) { const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; const TFltKdV& XY = Plot.XYValV; // power fit TFltPrV XYPr; double MinY = TFlt::Mx; for (int s = 0; s < XY.Len(); s++) { if (XY[s].Key > 0.0) { XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat)); MinY = TMath::Mn(MinY, XY[s].Dat()); } } if (XYPr.Empty()) return -1; MinY = TMath::Mn(1.0, MinY); // determine the sign of power coefficient double CoefSign = 0.0; { double A, B, R2, SigA, SigB, Chi2; TSpecFunc::PowerFit(XYPr, A, B, SigA, SigB, Chi2, R2); CoefSign = B > 0.0 ? +1.0 : -1.0; } const double PowerCf = CoefSign * TSpecFunc::GetPowerCoef(XYPr, MinX); int Mid = (int) exp(log((double)XYPr.Len())/2.0); if (Mid >= XYPr.Len()) { Mid = XYPr.Len()-1; } const double MidX = XYPr[Mid].Val1(); const double MidY = XYPr[Mid].Val2(); const double B = MidY / pow(MidX, PowerCf); TStr StyleStr=Style; if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f*x**%f", B, PowerCf), SeriesTy, TStr::Fmt("MLE = x^{%.4g}", PowerCf), StyleStr); return FitId; /*SeriesV.Add(); TGpSeries& NewPlot = SeriesV.Last(); TFltKdV& XYFit = NewPlot.XYValV; XYFit.Gen(XYPr.Len(), 0); for (int s = 0; s < XYPr.Len(); s++) { const double XVal = XYPr[s].Val1; const double YVal = B * pow(XYPr[s].Val1(), PowerCf); if (YVal < MinY || XVal < MinX) continue; XYFit.Add(TFltKd(XVal, YVal)); } NewPlot.Label = TStr::Fmt("PowerFit: %g", PowerCf); NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } return SeriesV.Len() - 1;*/ }
TFltPrV mygetCCDFYAxis(double* arr1, int leng1, int min, int max) { int i; double x,y; TFltPrV points1; sort(arr1,arr1+leng1); for(i=0;i<leng1;i++) { x = arr1[i]; if(x>=min && x<=max) { y = 1.0 - (1.0/leng1)*i; points1.Add(TFltPr(x,y)); } } return points1; }
// linear fit on log-log scales{% int TGnuPlot::AddPwrFit1(const int& PlotId, const TGpSeriesTy& SeriesTy, const TStr& Style) { if (PlotId < 0 || PlotId >= SeriesV.Len()) return -1; const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; const TFltKdV& XY = Plot.XYValV; double A, B, R2, SigA, SigB, Chi2, MinY = TFlt::Mx, MinX = TFlt::Mx; // power fit TFltPrV XYPr; int s; for (s = 0; s < XY.Len(); s++) { if (XY[s].Key > 0) { XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat)); //!!! skip zero values MinX = TMath::Mn(MinX, XY[s].Key()); MinY = TMath::Mn(MinY, XY[s].Dat()); } } MinY = TMath::Mn(1.0, MinY); TSpecFunc::PowerFit(XYPr, A, B, SigA, SigB, Chi2, R2); TStr StyleStr=Style; if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f*x**%f", A, B), SeriesTy, TStr::Fmt("%.1g * x^{%.4g} R^2:%.2g", A, B, R2), StyleStr); return FitId; /*SeriesV.Add(); TGpSeries& NewPlot = SeriesV.Last(); const int FitId = SeriesV.Len() - 1; NewPlot.DataFNm = ; TFltKdV& EstXY = NewPlot.XYValV; for (s = 0; s < XYPr.Len(); s++) { const double YVal = A*pow(XYPr[s].Val1(), B); if (YVal < MinY) continue; EstXY.Add(TFltKd(XYPr[s].Val1, YVal)); } NewPlot.Label = ; NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } //if (MinX < 5.0) MinX = 5.0; //AddPwrFit2(PlotId, SeriesTy, MinX);*/ }
void GetInvParticipRat(const PUNGraph& Graph, int MaxEigVecs, int TimeLimit, TFltPrV& EigValIprV) { TUNGraphMtx GraphMtx(Graph); TFltVV EigVecVV; TFltV EigValV; TExeTm ExeTm; if (MaxEigVecs<=1) { MaxEigVecs=1000; } int EigVecs = TMath::Mn(Graph->GetNodes(), MaxEigVecs); printf("start %d vecs...", EigVecs); try { TSparseSVD::Lanczos2(GraphMtx, EigVecs, TimeLimit, ssotFull, EigValV, EigVecVV, false); } catch(...) { printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", EigVecs, EigValV.Len()); } printf(" ***TRIED %d GOT %d values in %s\n", EigVecs, EigValV.Len(), ExeTm.GetStr()); TFltV EigVec; EigValIprV.Clr(); if (EigValV.Empty()) { return; } for (int v = 0; v < EigVecVV.GetCols(); v++) { EigVecVV.GetCol(v, EigVec); EigValIprV.Add(TFltPr(EigValV[v], GetInvParticipRat(EigVec))); } EigValIprV.Sort(); }
void plotPR(char* fileName, TFltPrV& ret) { int distance[10000]; for (int i = 0; i < 10000; distance[i++] = 0); std::ifstream inputFile(fileName); for (std::string line; std::getline(inputFile, line);) { std::istringstream isss(line); int a, c; double b, d; isss >> a >> b >> c >> d; int val = (int)(d * 1000); val -= (val % 100); if (val >= 10000) continue; //double idd = std::stold(line); printf("%d\n", val); distance[val]++; } for (int i = 0; i < 10000; ++i) { if (distance[i] == 0) continue; ret.Add(TFltPr(i, distance[i])); } }
/// estimate number of communities using AGM int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) { TRnd Rnd(RndSeed); int LambdaIter = 100; if (Graph->GetNodes() < 200) { LambdaIter = 1; } if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) { LambdaIter = 100; } //Find coms with large C TAGMFit AGMFitM(Graph, InitComs, RndSeed); if (PNoCom > 0.0) { AGMFitM.SetPNoCom(PNoCom); } AGMFitM.RunMCMC(MaxIter, LambdaIter, ""); int TE = Graph->GetEdges(); TFltV RegV; RegV.Add(0.3 * TE); for (int r = 0; r < 25; r++) { RegV.Add(RegV.Last() * RegGap); } TFltPrV RegComsV, RegLV, RegBICV; TFltV LV, BICV; //record likelihood and number of communities with nonzero P_c for (int r = 0; r < RegV.Len(); r++) { double RegCoef = RegV[r]; AGMFitM.SetRegCoef(RegCoef); AGMFitM.MLEGradAscentGivenCAG(0.01, 1000); AGMFitM.SetRegCoef(0.0); TVec<TIntV> EstCmtyVV; AGMFitM.GetCmtyVV(EstCmtyVV, 0.99); int NumLowQ = EstCmtyVV.Len(); RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ)); if (EstCmtyVV.Len() > 0) { TAGMFit AFTemp(Graph, EstCmtyVV, Rnd); AFTemp.MLEGradAscentGivenCAG(0.001, 1000); double CurL = AFTemp.Likelihood(); LV.Add(CurL); BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0)); } else { break; } } // if likelihood does not exist or does not change at all, report the smallest number of communities or 2 if (LV.Len() == 0) { return 2; } else if (LV[0] == LV.Last()) { return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2); } //normalize likelihood and BIC to 0~100 int MaxL = 100; { TFltV& ValueV = LV; TFltPrV& RegValueV = RegLV; double MinValue = TFlt::Mx, MaxValue = TFlt::Mn; for (int l = 0; l < ValueV.Len(); l++) { if (ValueV[l] < MinValue) { MinValue = ValueV[l]; } if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; } } while (ValueV.Len() < RegV.Len()) { ValueV.Add(MinValue); } double RangeVal = MaxValue - MinValue; for (int l = 0; l < ValueV.Len(); l++) { RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal)); } } { TFltV& ValueV = BICV; TFltPrV& RegValueV = RegBICV; double MinValue = TFlt::Mx, MaxValue = TFlt::Mn; for (int l = 0; l < ValueV.Len(); l++) { if (ValueV[l] < MinValue) { MinValue = ValueV[l]; } if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; } } while (ValueV.Len() < RegV.Len()) { ValueV.Add(MaxValue); } double RangeVal = MaxValue - MinValue; for (int l = 0; l < ValueV.Len(); l++) { RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal)); } } //fit logistic regression to normalized likelihood. TVec<TFltV> XV(RegLV.Len()); TFltV YV (RegLV.Len()); for (int l = 0; l < RegLV.Len(); l++) { XV[l] = TFltV::GetV(log(RegLV[l].Val1)); YV[l] = RegLV[l].Val2 / (double) MaxL; } TFltPrV LRVScaled, LRV; TLogRegFit LRFit; PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx); for (int l = 0; l < RegLV.Len(); l++) { LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l]))); LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2)); } //estimate # communities from fitted logistic regression int NumComs = 0, IdxRegDrop = 0; double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25 double LeftReg = 0.0, RightReg = 0.0; TFltV Theta; LRMd->GetTheta(Theta); RegDrop = (- Theta[1] - LRThres) / Theta[0]; if (RegDrop <= XV[0][0]) { NumComs = (int) RegComsV[0].Val2; } else if (RegDrop >= XV.Last()[0]) { NumComs = (int) RegComsV.Last().Val2; } else { //interpolate for RegDrop for (int i = 0; i < XV.Len(); i++) { if (XV[i][0] > RegDrop) { IdxRegDrop = i; break; } } if (IdxRegDrop == 0) { printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val); for (int l = 0; l < RegLV.Len(); l++) { printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val); } } IAssert(IdxRegDrop > 0); LeftReg = RegDrop - XV[IdxRegDrop - 1][0]; RightReg = XV[IdxRegDrop][0] - RegDrop; NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg)); } //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2); printf("Num Coms:%d\n", NumComs); if (NumComs < 2) { NumComs = 2; } if (PltFPrx.Len() > 0) { TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE); TGnuPlot GPC(PltFPrx + ".l"); GPC.AddPlot(RegComsV, gpwLinesPoints, "C"); GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood"); GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC"); GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)"); GPC.SetScale(gpsLog10X); GPC.SetTitle(PlotTitle); GPC.SavePng(PltFPrx + ".l.png"); } return NumComs; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nNETINF. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "example-cascades.txt", "Input cascades (one file)"); const TStr GroundTruthFNm = Env.GetIfArgPrefixStr("-n:", "example-network.txt", "Input ground-truth network (one file)"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "network", "Output file name(s) prefix"); const TStr Iters = Env.GetIfArgPrefixStr("-e:", "5", "Number of iterations"); const double alpha = Env.GetIfArgPrefixFlt("-a:", 1.0, "Alpha for transmission model"); const int Model = Env.GetIfArgPrefixInt("-m:", 0, "0:exponential, 1:power law, 2:rayleigh"); const int Top =Env.GetIfArgPrefixInt("-t:", 10, "select top k as friends"); const int TakeAdditional = Env.GetIfArgPrefixInt("-s:", 1, "How much additional files to create?\n\ 1:info about each edge, 2:objective function value (+upper bound), 3:Precision-recall plot, 4:all-additional-files (default:1)\n"); bool ComputeBound = false, ComputeInfo = false; bool CompareGroundTruth = false; switch (TakeAdditional) { case 1 : ComputeInfo = true; break; case 2 : ComputeBound = true; break; case 3 : CompareGroundTruth = true; break; case 4 : ComputeInfo = true; // ComputeBound = true; CompareGroundTruth = true; break; default: FailR("Bad -s: parameter."); } TNetInfBs NIB(ComputeBound, CompareGroundTruth, Top); printf("\nLoading input cascades: %s\n", InFNm.CStr()); // load cascade from file TFIn FIn(InFNm); NIB.LoadCascadesTxt(FIn, Model, alpha); // load ground truth network if (CompareGroundTruth) { TFIn FInG(GroundTruthFNm); NIB.LoadGroundTruthTxt(FInG); } NIB.Init(); printf("cascades:%d nodes:%d potential edges:%d\nRunning NETINF...\n", NIB.GetCascs(), NIB.GetNodes(), NIB.CascPerEdge.Len()); NIB.GreedyOpt(Iters.GetInt()); // plot showing precision/recall using groundtruth if (CompareGroundTruth) TGnuPlot::PlotValV(NIB.PrecisionRecall, TStr::Fmt("%s-precision-recall", OutFNm.CStr()), "Precision Recall", "Recall", "Precision", gpsAuto, false, gpwLinesPoints, false); // plot objective function if (ComputeBound) { TFltPrV Gains; for (int i=0; i<NIB.EdgeInfoH.Len(); i++) Gains.Add(TFltPr((double)(i+1), NIB.EdgeInfoH[i].MarginalGain)); TGnuPlot::PlotValV(Gains, TStr::Fmt("%s-objective", OutFNm.CStr()), "Objective Function", "Iters", "Objective Function"); } // save network in plain text NIB.SavePlaneTextNet(TStr::Fmt("%s.txt", OutFNm.CStr())); // save edge info if (ComputeInfo) NIB.SaveEdgeInfo(TStr::Fmt("%s-edge.info", OutFNm.CStr())); // save obj+bound info if (ComputeBound) NIB.SaveObjInfo(TStr::Fmt("%s-obj", OutFNm.CStr())); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }