// to get first few eigenvectors void GetEigVec(const PUNGraph& Graph, const int& EigVecs, TFltV& EigValV, TVec<TFltV>& EigVecV) { const int Nodes = Graph->GetNodes(); // Lanczos TUNGraphMtx GraphMtx(Graph); int CalcVals = int(2*EigVecs); if (CalcVals > Nodes) { CalcVals = Nodes; } TFltVV EigVecVV; //while (EigValV.Len() < EigVecs && CalcVals < 10*EigVecs) { try { TSparseSVD::Lanczos(GraphMtx, EigVecs, 2*EigVecs, ssotFull, EigValV, EigVecVV, false); } catch(...) { printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); } if (EigValV.Len() < EigVecs) { printf(" ***TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); } // CalcVals += EigVecs; //} TFltIntPrV EigValIdV; for (int i = 0; i < EigValV.Len(); i++) { EigValIdV.Add(TFltIntPr(EigValV[i], i)); } EigValIdV.Sort(false); EigValV.Sort(false); for (int v = 0; v < EigValIdV.Len(); v++) { // vector components are not sorted!!! EigVecV.Add(); EigVecVV.GetCol(EigValIdV[v].Val2, EigVecV.Last()); } IsAllValVNeg(EigVecV[0], true); }
void GetEigVals(const PUNGraph& Graph, const int& EigVals, TFltV& EigValV) { // Lanczos TUNGraphMtx GraphMtx(Graph); //const int Nodes = Graph->GetNodes(); //int CalcVals = int(2*EigVals); //if (CalcVals > Nodes) { CalcVals = Nodes; } //while (EigValV.Len() < EigVals && CalcVals < 3*EigVals) { try { if (EigVals > 4) { TSparseSVD::SimpleLanczos(GraphMtx, 2*EigVals, EigValV, false); } else { TFltVV EigVecVV; // this is much more precise, but also much slower TSparseSVD::Lanczos(GraphMtx, EigVals, 3*EigVals, ssotFull, EigValV, EigVecVV, false); } } catch(...) { printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", 2*EigVals, EigValV.Len()); } if (EigValV.Len() < EigVals) { printf(" ***TRIED %d GOT %d values** \n", 2*EigVals, EigValV.Len()); } // CalcVals += EigVals; //} EigValV.Sort(false); /*if (EigValV.Len() > EigVals) { EigValV.Del(EigVals, EigValV.Len()-1); } else { while (EigValV.Len() < EigVals) EigValV.Add(1e-6); } IAssert(EigValV.Len() == EigVals);*/ }
void PlotSngValRank(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr) { TFltV SngValV; TSnap::GetSngVals(Graph, SngVals, SngValV); SngValV.Sort(false); if (DescStr.Empty()) { DescStr = FNmPref; } TGnuPlot::PlotValV(SngValV, "sngVal."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest eig val = %f", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), SngValV[0].Val), "Rank", "Singular value", gpsLog10XY, false, gpwLinesPoints); }
void TNearestNeighbor::UpdateThreshold() { ThresholdV.Gen(RateV.Len(), 0); // sort distances TFltV SortedV = DistV; SortedV.Sort(true); // establish thrashold for each rate for (const double Rate : RateV) { // element Id corresponding to Rate-th percentile const int Elt = (int)floor((1.0 - Rate) * SortedV.Len()); // remember the distance as threshold ThresholdV.Add(SortedV[Elt]); } }
void TGraphKey::TakeSig(const PNGraph& Graph, const int& MnSvdGraph, const int& MxSvdGraph) { const int Edges = Graph->GetEdges(); Nodes = Graph->GetNodes(); VariantId = 0; SigV.Gen(2+Nodes, 0); // degree sequence TIntPrV DegV(Nodes, 0); for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { DegV.Add(TIntPr(NodeI.GetInDeg(), NodeI.GetOutDeg())); } DegV.Sort(false); SigV.Add(TFlt(Nodes)); SigV.Add(TFlt(Edges)); for (int i = 0; i < DegV.Len(); i++) { SigV.Add(DegV[i].Val1()); SigV.Add(DegV[i].Val2()); } // singular values signature // it turns out that it is cheaper to do brute force isomorphism // checking than to calculate SVD and then check isomorphism if (Nodes >= MnSvdGraph && Nodes < MxSvdGraph) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TFltV SngValV; TFltVV LSingV, RSingV; TIntH NodeIdH; // create adjecency matrix for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d): SngValV.Len():%d\n", Nodes(), Graph->GetEdges(), SngValV.Len()); } // round singular values SngValV.Sort(false); for (int i = 0; i < SngValV.Len(); i++) { SigV.Add(TMath::Round(SngValV[i], RoundTo)); } } //printf("SIG:\n"); for (int i = 0; i < SigV.Len(); i++) { printf("\t%f\n", SigV[i]); } SigV.Pack(); }
void GetSngVals(const PNGraph& Graph, const int& SngVals, TFltV& SngValV) { const int Nodes = Graph->GetNodes(); IAssert(SngVals > 0); if (Nodes < 100) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TFltVV LSingV, RSingV; TIntH NodeIdH; // create adjecency matrix for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } } else { // Lanczos TNGraphMtx GraphMtx(Graph); int CalcVals = int(2*SngVals); //if (CalcVals > Nodes) { CalcVals = int(2*Nodes); } //if (CalcVals > Nodes) { CalcVals = Nodes; } //while (SngValV.Len() < SngVals && CalcVals < 10*SngVals) { try { if (SngVals > 4) { TSparseSVD::SimpleLanczosSVD(GraphMtx, 2*SngVals, SngValV, false); } else { TFltVV LSingV, RSingV; // this is much more precise, but also much slower TSparseSVD::LanczosSVD(GraphMtx, SngVals, 3*SngVals, ssotFull, SngValV, LSingV, RSingV); } } catch(...) { printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", 2*SngVals, SngValV.Len()); } if (SngValV.Len() < SngVals) { printf(" ***TRIED %d GOT %d values** \n", CalcVals, SngValV.Len()); } // CalcVals += SngVals; //} } SngValV.Sort(false); //if (SngValV.Len() > SngVals) { // SngValV.Del(SngVals, SngValV.Len()-1); } //else { // while (SngValV.Len() < SngVals) SngValV.Add(1e-6); } //IAssert(SngValV.Len() == SngVals); }
double TNetInfBs::GetBound(const TIntPr& Edge, double& CurProb) { double Bound = 0; TFltV Bounds; // bound could be computed faster (using lazy evaluation, as in the optimization procedure) for (int e=0; e < EdgeGainV.Len(); e++) { const TIntPr& EE = EdgeGainV[e].Val2; if (EE != Edge && !Graph->IsEdge(EE.Val1, EE.Val2)) { const double EProb = GetAllCascProb(EE.Val1, EE.Val2); if (EProb > CurProb) Bounds.Add(EProb - CurProb); } } Bounds.Sort(false); for (int i=0; i<Graph->GetEdges() && i<Bounds.Len(); i++) Bound += Bounds[i]; return Bound; }
void GetSngVec(const PNGraph& Graph, const int& SngVecs, TFltV& SngValV, TVec<TFltV>& LeftSV, TVec<TFltV>& RightSV) { const int Nodes = Graph->GetNodes(); SngValV.Clr(); LeftSV.Clr(); RightSV.Clr(); TFltVV LSingV, RSingV; if (Nodes < 100) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TIntH NodeIdH; // create adjecency matrix (1-based) for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId())+1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e))+1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } } else { // Lanczos TNGraphMtx GraphMtx(Graph); TSparseSVD::LanczosSVD(GraphMtx, SngVecs, 2*SngVecs, ssotFull, SngValV, LSingV, RSingV); //TGAlg::SaveFullMtx(Graph, "adj_mtx.txt"); //TLAMisc::DumpTFltVVMjrSubMtrx(LSingV, LSingV.GetRows(), LSingV.GetCols(), "LSingV2.txt"); // save MTX } TFltIntPrV SngValIdV; for (int i = 0; i < SngValV.Len(); i++) { SngValIdV.Add(TFltIntPr(SngValV[i], i)); } SngValIdV.Sort(false); SngValV.Sort(false); for (int v = 0; v < SngValIdV.Len(); v++) { LeftSV.Add(); LSingV.GetCol(SngValIdV[v].Val2, LeftSV.Last()); RightSV.Add(); RSingV.GetCol(SngValIdV[v].Val2, RightSV.Last()); } IsAllValVNeg(LeftSV[0], true); IsAllValVNeg(RightSV[0], true); }
// Computes GINI coefficient of egonet as a subset of the parent graph (edges into and out of the egonet ARE considered) double TSnap::GetGiniCoefficient(const TIntFltH DegH, const TIntV NIdV) { typename TIntV::TIter VI; typename TFltV::TIter DI; TFltV DegV; const int n = NIdV.Len(); // DegV.Gen(n); // NOTE: don't use Gen() and Sort() on the same object (!) for (VI = NIdV.BegI(); VI < NIdV.EndI(); VI++) { DegV.Add(DegH.GetDat(VI->Val)); // might need to change this (in / out / undirected) } DegV.Sort(); int i = 0; double numerator = 0.0, denominator = 0.0; for (DI = DegV.BegI(); DI < DegV.EndI(); DI++, i++) { numerator += (i + 1)*DegV[i]; denominator += DegV[i]; } return(double(2*numerator) / double(n*denominator) - double(n + 1) / double(n)); }
void PlotSngValDistr(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr) { const int NBuckets = 50; TFltV SngValV; for (int f = 1; SngValV.Empty() && f < 4; f++) { TSnap::GetSngVals(Graph, f*SngVals, SngValV); } SngValV.Sort(true); THash<TFlt, TFlt> BucketCntH; double Step = (SngValV.Last()-SngValV[0]) / double(NBuckets-1); for (int i = 0; i < NBuckets; i++) { BucketCntH.AddDat(SngValV[0]+Step*(i+0.5), 0); } for (int i = 0; i < SngValV.Len(); i++) { const int Bucket = (int) floor((SngValV[i]-SngValV[0]) / Step); BucketCntH[Bucket] += 1; } TFltPrV EigCntV; BucketCntH.GetKeyDatPrV(EigCntV); if (DescStr.Empty()) { DescStr = FNmPref; } TGnuPlot::PlotValV(EigCntV, "sngDistr."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest eig val = %f", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), SngValV.Last().Val), "Singular value", "Count", gpsAuto, false, gpwLinesPoints); }
void TNetInfBs::GreedyOpt(const int& MxEdges) { double CurProb = GetAllCascProb(-1, -1); double LastGain = TFlt::Mx; int attempts = 0; bool msort = false; for (int k = 0; k < MxEdges && EdgeGainV.Len() > 0; k++) { double prev = CurProb; const TIntPr BestE = GetBestEdge(CurProb, LastGain, msort, attempts); if (BestE == TIntPr(-1, -1)) // if we cannot add more edges, we stop break; if (CompareGroundTruth) { double precision = 0, recall = 0; if (PrecisionRecall.Len() > 1) { precision = PrecisionRecall[PrecisionRecall.Len()-1].Val2.Val; recall = PrecisionRecall[PrecisionRecall.Len()-1].Val1.Val; } if (GroundTruth->IsEdge(BestE.Val1, BestE.Val2)) { recall++; } else { precision++; } PrecisionRecall.Add(TPair<TFlt, TFlt>(recall, precision)); } Graph->AddEdge(BestE.Val1, BestE.Val2); // add edge to network double Bound = 0; if (BoundOn) Bound = GetBound(BestE, prev); // localized update! TIntV &CascsEdge = CascPerEdge.GetDat(BestE); // only check cascades that contain the edge for (int c = 0; c < CascsEdge.Len(); c++) { CascV[CascsEdge[c]].UpdateProb(BestE.Val1, BestE.Val2, true); // update probabilities } // some extra info for the added edge TInt Vol; TFlt AverageTimeDiff; TFltV TimeDiffs; Vol = 0; AverageTimeDiff = 0; for (int i=0; i< CascV.Len(); i++) { if (CascV[i].IsNode(BestE.Val2) && CascV[i].GetParent(BestE.Val2) == BestE.Val1) { Vol += 1; TimeDiffs.Add(CascV[i].GetTm(BestE.Val2)-CascV[i].GetTm(BestE.Val1)); AverageTimeDiff += TimeDiffs[TimeDiffs.Len()-1]; } } AverageTimeDiff /= Vol; if (TimeDiffs.Len() > 0) TimeDiffs.Sort(); else TimeDiffs.Add(0); // compute bound only if explicitly required EdgeInfoH.AddDat(BestE) = TEdgeInfo(Vol, LastGain, Bound, TimeDiffs[(int)(TimeDiffs.Len()/2)], AverageTimeDiff); } if (CompareGroundTruth) { for (int i=0; i<PrecisionRecall.Len(); i++) { PrecisionRecall[i].Val2 = 1.0 - PrecisionRecall[i].Val2/(PrecisionRecall[i].Val2+PrecisionRecall[i].Val1); PrecisionRecall[i].Val1 /= (double)GroundTruth->GetEdges(); } } }