void plotParitialDegDistribution(const PNGraph& graph, std::vector<int>& nodeList) { std::map<int, int> inDegDistMap; std::map<int, int> outDegDistMap; for (int i = 0; i < nodeList.size(); ++i) { int curNodeId = nodeList[i]; if (!graph->IsNode(curNodeId)) continue; TNGraph::TNodeI ni = graph->GetNI(curNodeId); int curNodeInDeg = ni.GetInDeg(); if (inDegDistMap.find(curNodeInDeg) == inDegDistMap.end()) { inDegDistMap.insert(std::pair<int, int>(curNodeInDeg, 0)); } inDegDistMap[curNodeInDeg]++; int curNodeOutDeg = ni.GetOutDeg(); if (outDegDistMap.find(curNodeOutDeg) == outDegDistMap.end()) { outDegDistMap.insert(std::pair<int, int>(curNodeOutDeg, 0)); } outDegDistMap[curNodeOutDeg]++; } TFltPrV inDegDist; for (std::map<int, int>::iterator itr = inDegDistMap.begin(); itr != inDegDistMap.end(); itr++) { inDegDist.Add(TFltPr(itr->first, itr->second)); } TFltPrV outDegDist; for (std::map<int, int>::iterator itr = outDegDistMap.begin(); itr != outDegDistMap.end(); itr++) { outDegDist.Add(TFltPr(itr->first, itr->second)); } TGnuPlot plot1("inDegDistParitial", ""); plot1.AddPlot(inDegDist, gpwPoints, ""); plot1.SetScale(gpsLog10XY); plot1.SavePng(); TGnuPlot plot2("outDegDistParitial", ""); plot2.AddPlot(outDegDist, gpwPoints, ""); plot2.SetScale(gpsLog10XY); plot2.SavePng(); TGnuPlot plot3("DegDistParitial", ""); plot3.AddCmd("set key right top"); plot3.AddPlot(inDegDist, gpwPoints, "In Degree"); plot3.AddPlot(outDegDist, gpwPoints, "Out Degree"); plot3.SetScale(gpsLog10XY); plot3.SavePng(); }
void getSampledDistance(const PNGraph& graph, std::vector<int> srcIds, std::vector<int> dstIds, int sampleSize, TFltPrV& ret) { std::random_shuffle(srcIds.begin(), srcIds.end()); std::random_shuffle(dstIds.begin(), dstIds.end()); int distance[20]; for (int i = 0; i < 20; distance[i++] = 0); int sampleCount = 0; for (int i = 0; i < sampleSize; ) { int srcNodeId = srcIds[rand() % srcIds.size()]; int dstNodeId = dstIds[rand() % dstIds.size()]; if (!graph->IsNode(srcNodeId)) continue; if (!graph->IsNode(dstNodeId)) continue; int shortDist = TSnap::GetShortPath(graph, srcNodeId, dstNodeId, true); distance[shortDist]++; sampleCount++; printIntArray(distance, 20); ++i; } for (int i = 0; i < 20; ++i) { ret.Add(TFltPr(i, distance[i])); } }
int TGnuPlot::AddExpFit(const int& PlotId, const TGpSeriesTy& SeriesTy, const double& FitXOffset, const TStr& Style) { const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; const TFltKdV& XY = Plot.XYValV; double A, B, R2, SigA, SigB, Chi2; // power fit TFltPrV XYPr; int s; for (s = 0; s < XY.Len(); s++) { if (XY[s].Key-FitXOffset > 0) { XYPr.Add(TFltPr(XY[s].Key-FitXOffset, XY[s].Dat)); } //!!! skip zero values } TSpecFunc::ExpFit(XYPr, A, B, SigA, SigB, Chi2, R2); TStr Label, StyleStr=Style; if (FitXOffset == 0) { Label = TStr::Fmt("%.4g exp(%.4g x) R^2:%.2g", A, B, R2); } else { Label = TStr::Fmt("%.4g exp(%.4g x - %g) R^2:%.2g", A, B, FitXOffset, R2); } if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f*exp(%f*x-%f)", A, B, FitXOffset), SeriesTy, Label, StyleStr); return FitId; /*SeriesV.Add(); TGpSeries& NewPlot = SeriesV.Last(); TFltKdV& EstXY = NewPlot.XYValV; for (s = 0; s < XYPr.Len(); s++) { EstXY.Add(TFltKd(XYPr[s].Val1+FitXOffset, A*exp(B*XYPr[s].Val1))); } NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } return SeriesV.Len() - 1;*/ }
int TGnuPlot::AddLogFit(const int& PlotId, const TGpSeriesTy& SeriesTy, const TStr& Style) { const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; const TFltKdV& XY = Plot.XYValV; double A, B, R2, SigA, SigB, Chi2; // power fit TFltPrV XYPr; int s; for (s = 0; s < XY.Len(); s++) { if (XY[s].Key > 0) { XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat)); } //!!! skip zero values } TSpecFunc::LogFit(XYPr, A, B, SigA, SigB, Chi2, R2); TStr StyleStr=Style; if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f+%f*log(x)", A, B), SeriesTy, TStr::Fmt("%.4g + %.4g log(x) R^2:%.2g", A, B, R2), StyleStr); return FitId; /*SeriesV.Add(); TGpSeries& NewPlot = SeriesV.Last(); TFltKdV& EstXY = NewPlot.XYValV; for (s = 0; s < XYPr.Len(); s++) { EstXY.Add(TFltKd(XYPr[s].Val1, A+B*log((double)XYPr[s].Val1))); } NewPlot.Label = TStr::Fmt("%.4g + %.4g log(x) R^2:%.2g", A, B, R2); NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } return SeriesV.Len() - 1;*/ }
// CHECK void ExpBinning(const TFltPrV& deg, TFltPrV& degSparse, const int& BinRadix){ TFlt maxDeg(deg[deg.Len()-1].Val1.Val), minDeg(deg[0].Val1.Val); bool maxPowerReached = false; // idx - index of border, previdx - index of previous border int power = 0, previdx = 0, idx, binSize; TFltPr val; double binBorder = 0.0; while (binBorder <= minDeg) binBorder = pow(static_cast<double>(BinRadix), power++); TFltPr v(minDeg, deg[0].Val2.Val); degSparse.Add(v); bool isExact = false; while (!maxPowerReached){ if (binBorder >= maxDeg){ // when last element of deg was previous bin border if (previdx == deg.Len() - 1) break; // if we have another elements binBorder = maxDeg; maxPowerReached = true; } // find next element idx = FindVal1Elem(deg, binBorder, isExact); // if bin size == 0 if (previdx + 1 == idx && !isExact) continue; if (!isExact) idx = idx - 1; double sum = 0.0; binSize = idx - previdx; for (int i = previdx + 1; i <= idx; i++){ sum += deg[i].Val2.Val; } sum /= binSize; // if prevBinBorder was the smallest degree, it can be more than binBorder / BinRadix double SumBinBorder = previdx > 0 ? binBorder + static_cast<double>(binBorder) / BinRadix : binBorder + static_cast<double>(minDeg); double avgDeg = SumBinBorder / 2.0; val.Val1 = avgDeg; val.Val2 = sum; degSparse.Add(val); previdx = idx; binBorder = pow(static_cast<double>(BinRadix), power++); } }
// Inverse participation ratio: normalize EigVec to have L2=1 and then I=sum_k EigVec[i]^4 // see Spectra of "real-world" graphs: Beyond the semicircle law by Farkas, Derenyi, Barabasi and Vicsek void PlotInvParticipRat(const PUNGraph& Graph, const int& MaxEigVecs, const int& TimeLimit, const TStr& FNmPref, TStr DescStr) { TFltPrV EigIprV; GetInvParticipRat(Graph, MaxEigVecs, TimeLimit, EigIprV); if (DescStr.Empty()) { DescStr = FNmPref; } if (EigIprV.Empty()) { DescStr+=". FAIL"; EigIprV.Add(TFltPr(-1,-1)); return; } TGnuPlot::PlotValV(EigIprV, "eigIPR."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest eig val = %f (%d values)", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), EigIprV.Last().Val1(), EigIprV.Len()), "Eigenvalue", "Inverse Participation Ratio of corresponding Eigenvector", gpsLog10Y, false, gpwPoints); }
void TGStatVec::GetValV(const TGStatVal& XVal, const TGStatVal& YVal, TFltPrV& ValV) const { ValV.Gen(Len(), 0); double x; for (int t = 0; t < Len(); t++) { if (XVal == gsvTime) { x = t+1; } else { x = At(t)->GetVal(XVal); } ValV.Add(TFltPr(x, At(t)->GetVal(YVal))); } }
void TGnuPlot::MakeExpBins(const TFltPrV& XYValV, TFltPrV& ExpXYValV, const double& BinFactor, const double& MinYVal) { TFltKdV KdV(XYValV.Len(), 0), OutV; for (int i = 0; i < XYValV.Len(); i++) { KdV.Add(TFltKd(XYValV[i].Val1, XYValV[i].Val2)); } KdV.Sort(); TGnuPlot::MakeExpBins(KdV, OutV, BinFactor, MinYVal); ExpXYValV.Gen(OutV.Len(), 0); for (int i = 0; i < OutV.Len(); i++) { ExpXYValV.Add(TFltPr(OutV[i].Key, OutV[i].Dat)); } }
void TFfGGen::PlotFireSize(const TStr& FNmPref, const TStr& DescStr) { TGnuPlot GnuPlot("fs."+FNmPref, TStr::Fmt("%s. Fire size. G(%d, %d)", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges())); GnuPlot.SetXYLabel("Vertex id (iterations)", "Fire size (node out-degree)"); TFltPrV IdToOutDegV; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IdToOutDegV.Add(TFltPr(NI.GetId(), NI.GetOutDeg())); } IdToOutDegV.Sort(); GnuPlot.AddPlot(IdToOutDegV, gpwImpulses, "Node out-degree"); GnuPlot.SavePng(); }
void GetCumDistr(const TFltPrV& nonCum, TFltPrV& res){ for (int i = nonCum.Len() - 1; i >=0; i--){ TFlt count; if (i == nonCum.Len() - 1) count = nonCum[i].Val2.Val; else count = nonCum[i].Val2.Val + res[res.Len()-1].Val2.Val; TFltPr val(nonCum[i].Val1, count); res.Add(val); } res.Sort(); }
void plotScatterLengthOfEachCascade(THash<TUInt,TSecTmV>& c1, THash<TUInt,TSecTmV>& c2) { printf("\n\nPlotting ...\n"); TFltPrV plotdata; for(int q=0;q<c1.Len();q++) { TFltPr elem; elem.Val1 = c1[q].Len(); elem.Val2 = c2[q].Len(); plotdata.Add(elem); } Tools::plotScatter(plotdata, "TwitterUrlsOverContents", "Urls on Twitter", "Contents on Twitter"); }
void plotScatterLengthOfEachCascade(THash<TStr,CascadeElementV>& quotes, THash<TUInt,TSecTmV>& twitter, char* name) { printf("\n\nPlotting ...\n"); TFltPrV plotdata; for(int q=0;q<quotes.Len();q++) { TFltPr elem; elem.Val1 = quotes[q].Len(); elem.Val2 = twitter[q].Len(); plotdata.Add(elem); } Tools::plotScatter(plotdata, name, "Blogs/News", TStr::Fmt("%s on Twitter",name).CStr()); }
void GetPoints(const TFlt& maxDegLog, const TFlt& minDegLog, const int& NInt, const TFltPrV& base, TFltPrV& points){ int beginIndex = 0; // ignore nodes with zero degree (for Kronecker graphs) /*if (base[0].Val1.Val != 0) points.Add(base[beginIndex]); else { points.Add(base[++beginIndex]); }*/ points.Add(base[beginIndex]); TFlt baseMaxDeg = base[base.Len()-1].Val1.Val, baseMinDeg = base[beginIndex].Val1.Val; for (int i = beginIndex + 1; i < NInt; i++){ // deg - degree to be found in base TFlt degRound (pow (10, minDegLog.Val + i * (maxDegLog.Val - minDegLog.Val) / NInt)); TInt degInt(static_cast<int>(degRound.Val)); TFlt deg(degInt); // if deg < baseMinDeg (for cases when baseMinDeg > minDeg) if (deg.Val <= baseMinDeg) continue; // if deg > baseMaxDeg, add last point and finish if (deg.Val >= baseMaxDeg){ points.Add(base[base.Len()-1]); break; } // we have two cases: when we can find an exact value of deg, or when we have not this value bool isExact = false; int index = FindVal1Elem(base, deg, isExact); if (isExact){ points.Add(base[index]); } else { TFltPr x; x.Val1.Val = deg; x.Val2.Val = ( base[index].Val2.Val + base [index + 1].Val2.Val ) / 2; points.Add(x); } } }
void plotpaths(char* fileName, TFltPrV& ret) { int distance[10000]; for (int i = 0; i < 10000; distance[i++] = 0); int lineCount = 1; std::ifstream inputFile(fileName); for (std::string line; std::getline(inputFile, line);) { std::istringstream isss(line); int a, c; double b, d; isss >> a; ret.Add(TFltPr(lineCount++, a)); } }
// MLE power-coefficient int TGnuPlot::AddPwrFit2(const int& PlotId, const TGpSeriesTy& SeriesTy, const double& MinX, const TStr& Style) { const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; const TFltKdV& XY = Plot.XYValV; // power fit TFltPrV XYPr; double MinY = TFlt::Mx; for (int s = 0; s < XY.Len(); s++) { if (XY[s].Key > 0.0) { XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat)); MinY = TMath::Mn(MinY, XY[s].Dat()); } } if (XYPr.Empty()) return -1; MinY = TMath::Mn(1.0, MinY); // determine the sign of power coefficient double CoefSign = 0.0; { double A, B, R2, SigA, SigB, Chi2; TSpecFunc::PowerFit(XYPr, A, B, SigA, SigB, Chi2, R2); CoefSign = B > 0.0 ? +1.0 : -1.0; } const double PowerCf = CoefSign * TSpecFunc::GetPowerCoef(XYPr, MinX); int Mid = (int) exp(log((double)XYPr.Len())/2.0); if (Mid >= XYPr.Len()) { Mid = XYPr.Len()-1; } const double MidX = XYPr[Mid].Val1(); const double MidY = XYPr[Mid].Val2(); const double B = MidY / pow(MidX, PowerCf); TStr StyleStr=Style; if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f*x**%f", B, PowerCf), SeriesTy, TStr::Fmt("MLE = x^{%.4g}", PowerCf), StyleStr); return FitId; /*SeriesV.Add(); TGpSeries& NewPlot = SeriesV.Last(); TFltKdV& XYFit = NewPlot.XYValV; XYFit.Gen(XYPr.Len(), 0); for (int s = 0; s < XYPr.Len(); s++) { const double XVal = XYPr[s].Val1; const double YVal = B * pow(XYPr[s].Val1(), PowerCf); if (YVal < MinY || XVal < MinX) continue; XYFit.Add(TFltKd(XVal, YVal)); } NewPlot.Label = TStr::Fmt("PowerFit: %g", PowerCf); NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } return SeriesV.Len() - 1;*/ }
// linear fit on log-log scales{% int TGnuPlot::AddPwrFit1(const int& PlotId, const TGpSeriesTy& SeriesTy, const TStr& Style) { if (PlotId < 0 || PlotId >= SeriesV.Len()) return -1; const TGpSeries& Plot = SeriesV[PlotId]; if(Plot.XYValV.Empty()) return -1; const TFltKdV& XY = Plot.XYValV; double A, B, R2, SigA, SigB, Chi2, MinY = TFlt::Mx, MinX = TFlt::Mx; // power fit TFltPrV XYPr; int s; for (s = 0; s < XY.Len(); s++) { if (XY[s].Key > 0) { XYPr.Add(TFltPr(XY[s].Key, XY[s].Dat)); //!!! skip zero values MinX = TMath::Mn(MinX, XY[s].Key()); MinY = TMath::Mn(MinY, XY[s].Dat()); } } MinY = TMath::Mn(1.0, MinY); TSpecFunc::PowerFit(XYPr, A, B, SigA, SigB, Chi2, R2); TStr StyleStr=Style; if (StyleStr.Empty()) { StyleStr = "linewidth 3"; } const int FitId = AddFunc(TStr::Fmt("%f*x**%f", A, B), SeriesTy, TStr::Fmt("%.1g * x^{%.4g} R^2:%.2g", A, B, R2), StyleStr); return FitId; /*SeriesV.Add(); TGpSeries& NewPlot = SeriesV.Last(); const int FitId = SeriesV.Len() - 1; NewPlot.DataFNm = ; TFltKdV& EstXY = NewPlot.XYValV; for (s = 0; s < XYPr.Len(); s++) { const double YVal = A*pow(XYPr[s].Val1(), B); if (YVal < MinY) continue; EstXY.Add(TFltKd(XYPr[s].Val1, YVal)); } NewPlot.Label = ; NewPlot.SeriesTy = SeriesTy; if (Style.Empty()) { NewPlot.WithStyle = "linewidth 3"; } else { NewPlot.WithStyle = Style; } //if (MinX < 5.0) MinX = 5.0; //AddPwrFit2(PlotId, SeriesTy, MinX);*/ }
void GetInvParticipRat(const PUNGraph& Graph, int MaxEigVecs, int TimeLimit, TFltPrV& EigValIprV) { TUNGraphMtx GraphMtx(Graph); TFltVV EigVecVV; TFltV EigValV; TExeTm ExeTm; if (MaxEigVecs<=1) { MaxEigVecs=1000; } int EigVecs = TMath::Mn(Graph->GetNodes(), MaxEigVecs); printf("start %d vecs...", EigVecs); try { TSparseSVD::Lanczos2(GraphMtx, EigVecs, TimeLimit, ssotFull, EigValV, EigVecVV, false); } catch(...) { printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", EigVecs, EigValV.Len()); } printf(" ***TRIED %d GOT %d values in %s\n", EigVecs, EigValV.Len(), ExeTm.GetStr()); TFltV EigVec; EigValIprV.Clr(); if (EigValV.Empty()) { return; } for (int v = 0; v < EigVecVV.GetCols(); v++) { EigVecVV.GetCol(v, EigVec); EigValIprV.Add(TFltPr(EigValV[v], GetInvParticipRat(EigVec))); } EigValIprV.Sort(); }
// get degrees from current and add it to degrees void AddDegreeStat(const TFltPrV& current, TFltPrV& degrees, TIntPrV& samples){ for (int j = 0; j < current.Len(); j++){ const TFltPr& elem = current[j]; const double& deg = elem.Val1.Val, &nodesCount = elem.Val2.Val; bool wasFound = false; // silly search for (int k = 0; k < degrees.Len(); k++){ if (degrees[k].Val1.Val == deg){ degrees[k].Val2.Val += nodesCount; samples[k].Val2.Val++; wasFound = true; break; } } if (!wasFound){ TFlt d(deg), n(nodesCount); TFltPr val(d,n); degrees.Add(val); TInt di(static_cast<int>(deg)); TIntPr valI(di, 1); samples.Add(valI); } } }
void plotPR(char* fileName, TFltPrV& ret) { int distance[10000]; for (int i = 0; i < 10000; distance[i++] = 0); std::ifstream inputFile(fileName); for (std::string line; std::getline(inputFile, line);) { std::istringstream isss(line); int a, c; double b, d; isss >> a >> b >> c >> d; int val = (int)(d * 1000); val -= (val % 100); if (val >= 10000) continue; //double idd = std::stold(line); printf("%d\n", val); distance[val]++; } for (int i = 0; i < 10000; ++i) { if (distance[i] == 0) continue; ret.Add(TFltPr(i, distance[i])); } }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nNETINF. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "example-cascades.txt", "Input cascades (one file)"); const TStr GroundTruthFNm = Env.GetIfArgPrefixStr("-n:", "example-network.txt", "Input ground-truth network (one file)"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "network", "Output file name(s) prefix"); const TStr Iters = Env.GetIfArgPrefixStr("-e:", "5", "Number of iterations"); const double alpha = Env.GetIfArgPrefixFlt("-a:", 1.0, "Alpha for transmission model"); const int Model = Env.GetIfArgPrefixInt("-m:", 0, "0:exponential, 1:power law, 2:rayleigh"); const int Top =Env.GetIfArgPrefixInt("-t:", 10, "select top k as friends"); const int TakeAdditional = Env.GetIfArgPrefixInt("-s:", 1, "How much additional files to create?\n\ 1:info about each edge, 2:objective function value (+upper bound), 3:Precision-recall plot, 4:all-additional-files (default:1)\n"); bool ComputeBound = false, ComputeInfo = false; bool CompareGroundTruth = false; switch (TakeAdditional) { case 1 : ComputeInfo = true; break; case 2 : ComputeBound = true; break; case 3 : CompareGroundTruth = true; break; case 4 : ComputeInfo = true; // ComputeBound = true; CompareGroundTruth = true; break; default: FailR("Bad -s: parameter."); } TNetInfBs NIB(ComputeBound, CompareGroundTruth, Top); printf("\nLoading input cascades: %s\n", InFNm.CStr()); // load cascade from file TFIn FIn(InFNm); NIB.LoadCascadesTxt(FIn, Model, alpha); // load ground truth network if (CompareGroundTruth) { TFIn FInG(GroundTruthFNm); NIB.LoadGroundTruthTxt(FInG); } NIB.Init(); printf("cascades:%d nodes:%d potential edges:%d\nRunning NETINF...\n", NIB.GetCascs(), NIB.GetNodes(), NIB.CascPerEdge.Len()); NIB.GreedyOpt(Iters.GetInt()); // plot showing precision/recall using groundtruth if (CompareGroundTruth) TGnuPlot::PlotValV(NIB.PrecisionRecall, TStr::Fmt("%s-precision-recall", OutFNm.CStr()), "Precision Recall", "Recall", "Precision", gpsAuto, false, gpwLinesPoints, false); // plot objective function if (ComputeBound) { TFltPrV Gains; for (int i=0; i<NIB.EdgeInfoH.Len(); i++) Gains.Add(TFltPr((double)(i+1), NIB.EdgeInfoH[i].MarginalGain)); TGnuPlot::PlotValV(Gains, TStr::Fmt("%s-objective", OutFNm.CStr()), "Objective Function", "Iters", "Objective Function"); } // save network in plain text NIB.SavePlaneTextNet(TStr::Fmt("%s.txt", OutFNm.CStr())); // save edge info if (ComputeInfo) NIB.SaveEdgeInfo(TStr::Fmt("%s-edge.info", OutFNm.CStr())); // save obj+bound info if (ComputeBound) NIB.SaveObjInfo(TStr::Fmt("%s-obj", OutFNm.CStr())); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }