///////////////////////////////////////////////// // Trawling the web for emerging communities // graph, left points to right TTrawling::TTrawling(const PNGraph& Graph, const int& MinSupport) : MinSup(MinSupport) { TIntH ItemCntH; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IAssert(NI.GetOutDeg()==0 || NI.GetInDeg()==0); // edges only point from left to right if (NI.GetOutDeg()==0) { continue; } for (int e = 0; e < NI.GetOutDeg(); e++) { ItemCntH.AddDat(NI.GetOutNId(e)) += 1; } } TIntV RightV; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IAssert(NI.GetOutDeg()==0 || NI.GetInDeg()==0); // edges only point from left to right if (NI.GetOutDeg()==0) { continue; } RightV.Clr(false); for (int e = 0; e < NI.GetOutDeg(); e++) { const int itm = NI.GetOutNId(e); // only include items that already are above minimum support if (ItemCntH.GetDat(itm) >= MinSup) { RightV.Add(itm); } } if (! RightV.Empty()) { NIdSetH.AddDat(NI.GetId(), RightV); } } // for (int n = 0; n < NIdSetH.Len(); n++) { const TIntV& Set = NIdSetH[n]; for (int s = 0; s < Set.Len(); s++) { SetNIdH.AddDat(Set[s]).Add(n); } } }
void TSubGraphsEnum::RecurBfs1(const int& NId, const int& Depth) { if (Depth == 0) { TIntPrV EdgeV; EdgeH.GetKeyV(EdgeV); EdgeV.Sort(); SgV.Add(EdgeV); return; } const TNGraph::TNodeI NI = NGraph ->GetNI(NId); for (int e = 0; e < NI.GetOutDeg(); e++) { const TIntPr Edge(NId, NI.GetOutNId(e)); if (! EdgeH.IsKey(Edge)) { EdgeH.AddKey(Edge); RecurBfs1(NI.GetOutNId(e), Depth-1); EdgeH.DelKey(Edge); } } for (int e = 0; e < NI.GetInDeg(); e++) { const TIntPr Edge(NI.GetInNId(e), NId); if (! EdgeH.IsKey(Edge)) { EdgeH.AddKey(Edge); RecurBfs1(NI.GetInNId(e), Depth-1); EdgeH.DelKey(Edge); } } }
// improved version void GetMergeSortedV1(TIntV& NeighbourV, TNGraph::TNodeI NI) { int j = 0; int k = 0; int prev = -1; int indeg = NI.GetInDeg(); int outdeg = NI.GetOutDeg(); //while (j < NI.GetInDeg() && k < NI.GetOutDeg()) { if (indeg > 0 && outdeg > 0) { int v1 = NI.GetInNId(j); int v2 = NI.GetOutNId(k); while (1) { if (v1 <= v2) { if (prev != v1) { NeighbourV.Add(v1); prev = v1; } j += 1; if (j >= indeg) { break; } v1 = NI.GetInNId(j); } else { if (prev != v2) { NeighbourV.Add(v2); prev = v2; } k += 1; if (k >= outdeg) { break; } v2 = NI.GetOutNId(k); } } } while (j < indeg) { int v = NI.GetInNId(j); if (prev != v) { NeighbourV.Add(v); prev = v; } j += 1; } while (k < outdeg) { int v = NI.GetOutNId(k); if (prev != v) { NeighbourV.Add(v); prev = v; } k += 1; } }
void TIncrementalClustering::KeepAtMostOneChildPerNode(PNGraph& G, TQuoteBase *QB, TDocBase *DB) { TIntSet::TIter EndNode = AffectedNodes.EndI(); for (TIntSet::TIter NodeId = AffectedNodes.BegI(); NodeId < EndNode; NodeId++) { TNGraph::TNodeI Node = G->GetNI(NodeId.GetKey()); TQuote SourceQuote; if (QB->GetQuote(Node.GetId(), SourceQuote)) { TInt NodeDegree = Node.GetOutDeg(); if (NodeDegree > 1) { TFlt MaxScore = 0; TInt MaxNodeId = 0; TIntV NodeV; // first pass: check to see if we are pointing to any old nodes - if so, they get higher // priority over the new ones for edge selection. bool ContainsOldNode = false; for (int i = 0; i < NodeDegree; ++i) { if (!NewQuotes.IsKey(Node.GetOutNId(i))) { ContainsOldNode = true; } } // modified edge selection: filter out new nodes if old ones exist. for (int i = 0; i < NodeDegree; ++i) { TInt CurNode = Node.GetOutNId(i); NodeV.Add(CurNode); TQuote DestQuote; if (QB->GetQuote(CurNode, DestQuote)) { TFlt EdgeScore = 0; if (!ContainsOldNode || !NewQuotes.IsKey(Node.GetOutNId(i))) { EdgeScore = ComputeEdgeScore(SourceQuote, DestQuote, DB); } if (EdgeScore > MaxScore) { MaxScore = EdgeScore; MaxNodeId = CurNode; } } } // remove all other edges, backwards to prevent indexing fail for (int i = 0; i < NodeV.Len(); i++) { if (NodeV[i] != MaxNodeId) { G->DelEdge(Node.GetId(), NodeV[i]); } } //printf("Out degree: %d out of %d\n", Node.GetOutDeg(), NodeDegree.Val); } } } fprintf(stderr, "finished deleting edges\n"); }
TIntNNet TMultimodalGraphImplB::GetSubGraph(const TIntV ModeIds) const { TIntNNet SubGraph = TIntNNet(); for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) { if (ModeIds.IsIn(CurI.GetDat())) { SubGraph.AddNode(CurI.GetKey(), CurI.GetDat()); } } for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) { int ModeId1 = ModeIds.GetVal(ModeIdx1); for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) { int ModeId2 = ModeIds.GetVal(ModeIdx2); TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2); if (!Graphs.IsKey(ModeIdsKey)) { continue; } const TNGraph& Graph = Graphs.GetDat(ModeIdsKey); for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) { for (int e = 0; e < it.GetOutDeg(); e++) { SubGraph.AddEdge(it.GetId(), it.GetOutNId(e)); } } } } printf("Number of nodes in SubGraph: %d...\n", SubGraph.GetNodes()); printf("Number of edges in SubGraph: %d...\n", SubGraph.GetEdges()); return SubGraph; }
double DirectedModularity(PNGraph& graph, std::vector<int>& communities) { if (graph->GetNodes() != communities.size()) { throw std::logic_error("Number of nodes does not match community size."); } int num_edges = graph->GetEdges(); double score = 0.0; int num_unique = 10; std::map<int, double> outdeg_sums; std::map<int, double> indeg_sums; for (TNGraph::TNodeI node = graph->BegNI(); node < graph->EndNI(); node++) { int comm = communities[node.GetId()]; outdeg_sums[comm] += node.GetOutDeg(); indeg_sums[comm] += node.GetInDeg(); } for (auto& kv : outdeg_sums) { score -= (kv.second / num_edges) * indeg_sums[kv.first]; } for (TNGraph::TNodeI node = graph->BegNI(); node < graph->EndNI(); node++) { int node_ID = node.GetId(); for (int e = 0; e < node.GetOutDeg(); ++e) { int nbr = node.GetOutNId(e); if (communities[node_ID] == communities[nbr]) { score += 1.0; } } } return score / num_edges; }
void GetSngVec(const PNGraph& Graph, TFltV& LeftSV, TFltV& RightSV) { const int Nodes = Graph->GetNodes(); TFltVV LSingV, RSingV; TFltV SngValV; if (Nodes < 500) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TIntH NodeIdH; // create adjecency matrix for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } } else { // Lanczos TNGraphMtx GraphMtx(Graph); TSparseSVD::LanczosSVD(GraphMtx, 1, 8, ssotFull, SngValV, LSingV, RSingV); } TFlt MxSngVal = TFlt::Mn; int ValN = 0; for (int i = 0; i < SngValV.Len(); i++) { if (MxSngVal < SngValV[i]) { MxSngVal = SngValV[i]; ValN = i; } } LSingV.GetCol(ValN, LeftSV); RSingV.GetCol(ValN, RightSV); IsAllValVNeg(LeftSV, true); IsAllValVNeg(RightSV, true); }
void TSubGraphsEnum::Gen2Graphs() { // singe edge sub-graphs SgV.Gen(NGraph->GetEdges(), 0); TSimpleGraph SimpleG; TIntPrV& EdgeV = SimpleG.GetEdgeV(); EdgeV.Gen(1); for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) { for (int e = 0; e < NI.GetOutDeg(); e++) { EdgeV[0] = TIntPr(NI.GetId(), NI.GetOutNId(e)); SgV.Add(SimpleG); } } SgV.Sort(); // two edge sub-graphs EdgeV.Gen(2); for (int g1 = 0; g1 < SgV.Len()-1; g1++) { const TIntPr& E1 = SgV[g1].GetEdgeV()[0]; for (int g2 = g1+1; g2 < SgV.Len(); g2++) { const TIntPr& E2 = SgV[g2].GetEdgeV()[0]; if (E1.Val2 == E2.Val1 || E1.Val1 == E2.Val2 || E1.Val1 == E2.Val1 || E1.Val2 == E2.Val2) { EdgeV[0] = TMath::Mn(E1, E2); EdgeV[1] = TMath::Mx(E1, E2); SimpleG.Dump(); NextSgV.Add(SimpleG); } } } SgV.MoveFrom(NextSgV); }
int TMultimodalGraphImplB::GetSubGraphMocked(const TIntV ModeIds) const { int NumVerticesAndEdges = 0; for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) { if (ModeIds.IsIn(CurI.GetDat())) { NumVerticesAndEdges++; } } for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) { int ModeId1 = ModeIds.GetVal(ModeIdx1); for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) { int ModeId2 = ModeIds.GetVal(ModeIdx2); TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2); if (!Graphs.IsKey(ModeIdsKey)) { continue; } const TNGraph& Graph = Graphs.GetDat(ModeIdsKey); for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) { for (int e = 0; e < it.GetOutDeg(); e++) { NumVerticesAndEdges += it.GetOutNId(e); } } } } return NumVerticesAndEdges; }
void TGraphCascade::PruneGraph() { // iterate over nodes int Nodes = NodeNmIdH.Len(); TIntV NodeIdV; NodeNmIdH.GetDatV(NodeIdV); TStrV NodeNmV; NodeNmIdH.GetKeyV(NodeNmV); for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = NodeIdV[NodeN]; if (!EnabledNodeIdH.IsKey(NodeId)) { // if a node is not enabled: // - connect its parents to its children TNGraph::TNodeI NI = Graph.GetNI(NodeId); for (int ParentN = 0; ParentN < NI.GetInDeg(); ParentN++) { for (int ChildN = 0; ChildN < NI.GetOutDeg(); ChildN++) { if (!Graph.IsEdge(NI.GetInNId(ParentN), NI.GetOutNId(ChildN))) { Graph.AddEdge(NI.GetInNId(ParentN), NI.GetOutNId(ChildN)); } } } //printf("deleting node %s %d\n", NodeNmV[NodeN].CStr(), NodeId); // - delete it (deletes edges) Graph.DelNode(NodeId); } } // generate search sequence from sinks to sources TopologicalSort(NIdSweep); //Print(NIdSweep); }
// burn each link independently (forward with FwdBurnProb, backward with BckBurnProb) void TForestFire::BurnExpFire() { const double OldFwdBurnProb = FwdBurnProb; const double OldBckBurnProb = BckBurnProb; const int NInfect = InfectNIdV.Len(); const TNGraph& G = *Graph; TIntH BurnedNIdH; // burned nodes TIntV BurningNIdV = InfectNIdV; // currently burning nodes TIntV NewBurnedNIdV; // nodes newly burned in current step bool HasAliveNbrs; // has unburned neighbors int NBurned = NInfect, NDiedFire=0; for (int i = 0; i < InfectNIdV.Len(); i++) { BurnedNIdH.AddDat(InfectNIdV[i]); } NBurnedTmV.Clr(false); NBurningTmV.Clr(false); NewBurnedTmV.Clr(false); for (int time = 0; ; time++) { NewBurnedNIdV.Clr(false); // for each burning node for (int node = 0; node < BurningNIdV.Len(); node++) { const int& BurningNId = BurningNIdV[node]; const TNGraph::TNodeI Node = G.GetNI(BurningNId); HasAliveNbrs = false; NDiedFire = 0; // burn forward links (out-links) for (int e = 0; e < Node.GetOutDeg(); e++) { const int OutNId = Node.GetOutNId(e); if (! BurnedNIdH.IsKey(OutNId)) { // not yet burned HasAliveNbrs = true; if (Rnd.GetUniDev() < FwdBurnProb) { BurnedNIdH.AddDat(OutNId); NewBurnedNIdV.Add(OutNId); NBurned++; } } } // burn backward links (in-links) if (BckBurnProb > 0.0) { for (int e = 0; e < Node.GetInDeg(); e++) { const int InNId = Node.GetInNId(e); if (! BurnedNIdH.IsKey(InNId)) { // not yet burned HasAliveNbrs = true; if (Rnd.GetUniDev() < BckBurnProb) { BurnedNIdH.AddDat(InNId); NewBurnedNIdV.Add(InNId); NBurned++; } } } } if (! HasAliveNbrs) { NDiedFire++; } } NBurnedTmV.Add(NBurned); NBurningTmV.Add(BurningNIdV.Len() - NDiedFire); NewBurnedTmV.Add(NewBurnedNIdV.Len()); //BurningNIdV.AddV(NewBurnedNIdV); // node is burning eternally BurningNIdV.Swap(NewBurnedNIdV); // node is burning just 1 time step if (BurningNIdV.Empty()) break; FwdBurnProb = FwdBurnProb * ProbDecay; BckBurnProb = BckBurnProb * ProbDecay; } BurnedNIdV.Gen(BurnedNIdH.Len(), 0); for (int i = 0; i < BurnedNIdH.Len(); i++) { BurnedNIdV.Add(BurnedNIdH.GetKey(i)); } FwdBurnProb = OldFwdBurnProb; BckBurnProb = OldBckBurnProb; }
void TempMotifCounter::GetAllNeighbors(int node, TIntV& nbrs) { nbrs = TIntV(); TNGraph::TNodeI NI = static_graph_->GetNI(node); for (int i = 0; i < NI.GetOutDeg(); i++) { nbrs.Add(NI.GetOutNId(i)); } for (int i = 0; i < NI.GetInDeg(); i++) { int nbr = NI.GetInNId(i); if (!NI.IsOutNId(nbr)) { nbrs.Add(nbr); } } }
float JaccardSim(TNGraph::TNodeI NI1, TNGraph::TNodeI NI2) { int lenA = NI1.GetOutDeg(); int lenB = NI2.GetOutDeg(); int ct = 0; int j = 0; int i = 0; while (i < lenA && j < lenB) { if (NI1.GetOutNId(i) == NI2.GetOutNId(j)) { ct++; i++; j++; } else if (NI1.GetOutNId(i) > NI2.GetOutNId(j)) { j++; } else { i++; } } return ct*1.0/(lenA+lenB-ct); }
void TGraphCascade::TopologicalSort(TIntV& SortedNIdV) { int Nodes = Graph.GetNodes(); SortedNIdV.Gen(Nodes, 0); // result THash<TInt, TBool> Marks(Nodes); // nodeid -> mark map THash<TInt,TBool> TempMarks(Nodes); // nodeid -> temp mark map THash<TInt, TBool> Added(Nodes); TIntV NIdV; Graph.GetNIdV(NIdV); // all node ids // set marks for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = NIdV[NodeN]; Marks.AddDat(NodeId, false); TempMarks.AddDat(NodeId, false); Added.AddDat(NodeId, false); } TSStack<TInt> Stack; for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = NIdV[NodeN]; // select an unmarked node if (!Marks.GetDat(NodeId)) { Stack.Push(NodeId); while (!Stack.Empty()) { // visit TopNode int TopNodeId = Stack.Top(); Marks.GetDat(TopNodeId) = true; TempMarks.GetDat(TopNodeId) = true; // add children, set their temp marks to true TNGraph::TNodeI NI = Graph.GetNI(TopNodeId); int Children = NI.GetOutDeg(); bool IsFinal = true; for (int ChildN = 0; ChildN < Children; ChildN++) { int ChildId = NI.GetOutNId(ChildN); EAssertR(!TempMarks.GetDat(ChildId), "TGraphCascade::TopologicalSort: the graph is not a DAG!"); if (!Marks.GetDat(ChildId)) { // unvisited node IsFinal = false; Stack.Push(ChildId); } } if (IsFinal) { // push TopNode to tail if (!Added.GetDat(TopNodeId)) { SortedNIdV.Add(TopNodeId); Added.GetDat(TopNodeId) = true; } TempMarks.GetDat(TopNodeId) = false; Stack.Pop(); } } } } SortedNIdV.Reverse(); }
// RenumberNodes ... Renumber node ids in the subgraph to 0...N-1 PNGraph GetSubGraph(const PNGraph& Graph, const TIntV& NIdV, const bool& RenumberNodes) { //if (! RenumberNodes) { return TSnap::GetSubGraph(Graph, NIdV); } PNGraph NewGraphPt = TNGraph::New(); TNGraph& NewGraph = *NewGraphPt; NewGraph.Reserve(NIdV.Len(), -1); TIntSet NIdSet(NIdV.Len()); for (int n = 0; n < NIdV.Len(); n++) { if (Graph->IsNode(NIdV[n])) { NIdSet.AddKey(NIdV[n]); if (! RenumberNodes) { NewGraph.AddNode(NIdV[n]); } else { NewGraph.AddNode(NIdSet.GetKeyId(NIdV[n])); } } } if (! RenumberNodes) { for (int n = 0; n < NIdSet.Len(); n++) { const int SrcNId = NIdSet[n]; const TNGraph::TNodeI NI = Graph->GetNI(SrcNId); for (int edge = 0; edge < NI.GetOutDeg(); edge++) { const int OutNId = NI.GetOutNId(edge); if (NIdSet.IsKey(OutNId)) { NewGraph.AddEdge(SrcNId, OutNId); } } } } else { for (int n = 0; n < NIdSet.Len(); n++) { const int SrcNId = NIdSet[n]; const TNGraph::TNodeI NI = Graph->GetNI(SrcNId); for (int edge = 0; edge < NI.GetOutDeg(); edge++) { const int OutNId = NI.GetOutNId(edge); if (NIdSet.IsKey(OutNId)) { NewGraph.AddEdge(NIdSet.GetKeyId(SrcNId), NIdSet.GetKeyId(OutNId)); } } } } return NewGraphPt; }
void TSubGraphsEnum::RecurBfs(const int& NId, const int& Depth, TSimpleGraph& PrevG) { if (Depth == 0) { TIntPrV& EdgeV = PrevG(); EdgeV.Sort(); for (int i = 1; i < EdgeV.Len(); i++) { if (EdgeV[i-1] == EdgeV[i]) { return; } } SgV.Add(PrevG); return; } const TNGraph::TNodeI NI = NGraph ->GetNI(NId); for (int e = 0; e < NI.GetOutDeg(); e++) { TSimpleGraph CurG = PrevG; CurG.AddEdge(NI.GetId(), NI.GetOutNId(e)); RecurBfs(NI.GetOutNId(e), Depth-1, CurG); } for (int e = 0; e < NI.GetInDeg(); e++) { TSimpleGraph CurG = PrevG; CurG.AddEdge(NI.GetInNId(e), NI.GetId()); RecurBfs(NI.GetInNId(e), Depth-1, CurG); } }
void TGraphKey::TakeSig(const PNGraph& Graph, const int& MnSvdGraph, const int& MxSvdGraph) { const int Edges = Graph->GetEdges(); Nodes = Graph->GetNodes(); VariantId = 0; SigV.Gen(2+Nodes, 0); // degree sequence TIntPrV DegV(Nodes, 0); for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { DegV.Add(TIntPr(NodeI.GetInDeg(), NodeI.GetOutDeg())); } DegV.Sort(false); SigV.Add(TFlt(Nodes)); SigV.Add(TFlt(Edges)); for (int i = 0; i < DegV.Len(); i++) { SigV.Add(DegV[i].Val1()); SigV.Add(DegV[i].Val2()); } // singular values signature // it turns out that it is cheaper to do brute force isomorphism // checking than to calculate SVD and then check isomorphism if (Nodes >= MnSvdGraph && Nodes < MxSvdGraph) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TFltV SngValV; TFltVV LSingV, RSingV; TIntH NodeIdH; // create adjecency matrix for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d): SngValV.Len():%d\n", Nodes(), Graph->GetEdges(), SngValV.Len()); } // round singular values SngValV.Sort(false); for (int i = 0; i < SngValV.Len(); i++) { SigV.Add(TMath::Round(SngValV[i], RoundTo)); } } //printf("SIG:\n"); for (int i = 0; i < SigV.Len(); i++) { printf("\t%f\n", SigV[i]); } SigV.Pack(); }
// initial version void GetMergeSortedV(TIntV& NeighbourV, TNGraph::TNodeI NI) { int ind, j, k; ind = j = k = 0; while (j < NI.GetInDeg() && k < NI.GetOutDeg()) { int v1 = NI.GetInNId(j); int v2 = NI.GetOutNId(k); if (v1 <= v2) { if ((ind == 0) || (NeighbourV[ind-1] != v1)) { NeighbourV.Add(v1); ind += 1; } j += 1; } else { if ((ind == 0) || (NeighbourV[ind-1] != v2)) { NeighbourV.Add(v2); ind += 1; } k += 1; } } while (j < NI.GetInDeg()) { int v = NI.GetInNId(j); if ((ind == 0) || (NeighbourV[ind-1] != v)) { NeighbourV.Add(v); ind += 1; } j += 1; } while (k < NI.GetOutDeg()) { int v = NI.GetOutNId(k); if ((ind == 0) || (NeighbourV[ind-1] != v)) { NeighbourV.Add(v); ind += 1; } k += 1; } }
// Rok #5 void GetMergeSortedV(TIntV& NeighbourV, TNGraph::TNodeI NI) { int j = 0; int k = 0; int prev = -1; while (j < NI.GetInDeg() && k < NI.GetOutDeg()) { int v1 = NI.GetInNId(j); int v2 = NI.GetOutNId(k); if (v1 <= v2) { if (prev != v1) { NeighbourV.Add(v1); prev = v1; } j += 1; } else { if (prev != v2) { NeighbourV.Add(v2); prev = v2; } k += 1; } } while (j < NI.GetInDeg()) { int v = NI.GetInNId(j); if (prev != v) { NeighbourV.Add(v); prev = v; } j += 1; } while (k < NI.GetOutDeg()) { int v = NI.GetOutNId(k); if (prev != v) { NeighbourV.Add(v); prev = v; } k += 1; } }
void GetSngVals(const PNGraph& Graph, const int& SngVals, TFltV& SngValV) { const int Nodes = Graph->GetNodes(); IAssert(SngVals > 0); if (Nodes < 100) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TFltVV LSingV, RSingV; TIntH NodeIdH; // create adjecency matrix for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } } else { // Lanczos TNGraphMtx GraphMtx(Graph); int CalcVals = int(2*SngVals); //if (CalcVals > Nodes) { CalcVals = int(2*Nodes); } //if (CalcVals > Nodes) { CalcVals = Nodes; } //while (SngValV.Len() < SngVals && CalcVals < 10*SngVals) { try { if (SngVals > 4) { TSparseSVD::SimpleLanczosSVD(GraphMtx, 2*SngVals, SngValV, false); } else { TFltVV LSingV, RSingV; // this is much more precise, but also much slower TSparseSVD::LanczosSVD(GraphMtx, SngVals, 3*SngVals, ssotFull, SngValV, LSingV, RSingV); } } catch(...) { printf("\n ***EXCEPTION: TRIED %d GOT %d values** \n", 2*SngVals, SngValV.Len()); } if (SngValV.Len() < SngVals) { printf(" ***TRIED %d GOT %d values** \n", CalcVals, SngValV.Len()); } // CalcVals += SngVals; //} } SngValV.Sort(false); //if (SngValV.Len() > SngVals) { // SngValV.Del(SngVals, SngValV.Len()-1); } //else { // while (SngValV.Len() < SngVals) SngValV.Add(1e-6); } //IAssert(SngValV.Len() == SngVals); }
// renumbers nodes void TGraphKey::TakeGraph(const PNGraph& Graph) { TIntH NodeIdH; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NodeIdH.AddKey(NI.GetId()); } Nodes = Graph->GetNodes(); EdgeV.Gen(Nodes, 0); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { const int NewNId = NodeIdH.GetKeyId(NI.GetId()); for (int i = 0; i < NI.GetOutDeg(); i++) { EdgeV.Add(TIntPr(NewNId, NodeIdH.GetKeyId(NI.GetOutNId(i)))); } } EdgeV.Sort(true); EdgeV.Pack(); }
void GetSngVec(const PNGraph& Graph, const int& SngVecs, TFltV& SngValV, TVec<TFltV>& LeftSV, TVec<TFltV>& RightSV) { const int Nodes = Graph->GetNodes(); SngValV.Clr(); LeftSV.Clr(); RightSV.Clr(); TFltVV LSingV, RSingV; if (Nodes < 100) { // perform full SVD TFltVV AdjMtx(Nodes+1, Nodes+1); TIntH NodeIdH; // create adjecency matrix (1-based) for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { NodeIdH.AddKey(NodeI.GetId()); } for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { const int NodeId = NodeIdH.GetKeyId(NodeI.GetId())+1; for (int e = 0; e < NodeI.GetOutDeg(); e++) { const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e))+1; // no self edges if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; } } try { // can fail to converge but results seem to be good TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); } catch(...) { printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); } } else { // Lanczos TNGraphMtx GraphMtx(Graph); TSparseSVD::LanczosSVD(GraphMtx, SngVecs, 2*SngVecs, ssotFull, SngValV, LSingV, RSingV); //TGAlg::SaveFullMtx(Graph, "adj_mtx.txt"); //TLAMisc::DumpTFltVVMjrSubMtrx(LSingV, LSingV.GetRows(), LSingV.GetCols(), "LSingV2.txt"); // save MTX } TFltIntPrV SngValIdV; for (int i = 0; i < SngValV.Len(); i++) { SngValIdV.Add(TFltIntPr(SngValV[i], i)); } SngValIdV.Sort(false); SngValV.Sort(false); for (int v = 0; v < SngValIdV.Len(); v++) { LeftSV.Add(); LSingV.GetCol(SngValIdV[v].Val2, LeftSV.Last()); RightSV.Add(); RSingV.GetCol(SngValIdV[v].Val2, RightSV.Last()); } IsAllValVNeg(LeftSV[0], true); IsAllValVNeg(RightSV[0], true); }
void UndirCopy(PNGraph& dir_graph, PUNGraph& undir_graph) { // Add all of the nodes into the new graph for (TNGraph::TNodeI node = dir_graph->BegNI(); node < dir_graph->EndNI(); node++) { int curr_node = node.GetId(); undir_graph->AddNode(curr_node); } for (TNGraph::TNodeI node = dir_graph->BegNI(); node < dir_graph->EndNI(); node++) { int curr_node = node.GetId(); for (int e = 0; e < node.GetOutDeg(); ++e) { int nbr_node = node.GetOutNId(e); if (!undir_graph->IsEdge(curr_node, nbr_node)) { undir_graph->AddEdge(curr_node, nbr_node); } } } }
/// Generates a random scale-free network using the Copying Model. /// The generating process operates as follows: Node u is added to a graph, it /// selects a random node v, and with prob Beta it links to v, with 1-Beta /// links u links to neighbor of v. The power-law degree exponent is -1/(1-Beta). /// See: Stochastic models for the web graph. /// Kumar, Raghavan, Rajagopalan, Sivakumar, Tomkins, Upfal. /// URL: http://snap.stanford.edu/class/cs224w-readings/kumar00stochastic.pdf PNGraph GenCopyModel(const int& Nodes, const double& Beta, TRnd& Rnd) { PNGraph GraphPt = TNGraph::New(); TNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, Nodes); const int startNId = Graph.AddNode(); Graph.AddEdge(startNId, startNId); for (int n = 1; n < Nodes; n++) { const int rnd = Graph.GetRndNId(); const int NId = Graph.AddNode(); if (Rnd.GetUniDev() < Beta) { Graph.AddEdge(NId, rnd); } else { const TNGraph::TNodeI NI = Graph.GetNI(rnd); const int rnd2 = Rnd.GetUniDevInt(NI.GetOutDeg()); Graph.AddEdge(NId, NI.GetOutNId(rnd2)); } } return GraphPt; }
void getOutNeighborNodeIDs(const PNGraph& graph, int srcNodeID, std::set<int>& nodeIdSet) { std::queue<int> q; q.push(srcNodeID); nodeIdSet.insert(srcNodeID); for (int level = 0; level < 2; ++level) { int levelCount = q.size(); for (int i = 0; i < levelCount; ++i) { int curNodeId = q.front(); q.pop(); // Scan neigbors; TNGraph::TNodeI curNode = graph->GetNI(curNodeId); int outDeg = curNode.GetOutDeg(); for (int j = 0; j < outDeg; ++j) { int curNeighborNodeID = curNode.GetOutNId(j); q.push(curNeighborNodeID); nodeIdSet.insert(curNeighborNodeID); } } } }
/// Rewire the network. Keeps node degrees as is but randomly rewires the edges. /// Use this function to generate a random graph with the same degree sequence /// as the OrigGraph. /// See: On the uniform generation of random graphs with prescribed degree /// sequences by R. Milo, N. Kashtan, S. Itzkovitz, M. E. J. Newman, U. Alon. /// URL: http://arxiv.org/abs/cond-mat/0312028 PNGraph GenRewire(const PNGraph& OrigGraph, const int& NSwitch, TRnd& Rnd) { const int Nodes = OrigGraph->GetNodes(); const int Edges = OrigGraph->GetEdges(); PNGraph GraphPt = TNGraph::New(); TNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, -1); TExeTm ExeTm; // generate a graph that satisfies the constraints printf("Randomizing edges (%d, %d)...\n", Nodes, Edges); TIntPrSet EdgeSet(Edges); for (TNGraph::TNodeI NI = OrigGraph->BegNI(); NI < OrigGraph->EndNI(); NI++) { const int NId = NI.GetId(); for (int e = 0; e < NI.GetOutDeg(); e++) { EdgeSet.AddKey(TIntPr(NId, NI.GetOutNId(e))); } Graph.AddNode(NI); } // edge switching uint skip=0; for (uint swps = 0; swps < 2*uint(Edges)*uint(NSwitch); swps++) { const int keyId1 = EdgeSet.GetRndKeyId(Rnd); const int keyId2 = EdgeSet.GetRndKeyId(Rnd); if (keyId1 == keyId2) { skip++; continue; } const TIntPr& E1 = EdgeSet[keyId1]; const TIntPr& E2 = EdgeSet[keyId2]; TIntPr NewE1(E1.Val1, E2.Val1), NewE2(E1.Val2, E2.Val2); if (NewE1.Val1!=NewE2.Val1 && NewE1.Val2!=NewE2.Val1 && NewE1.Val2!=NewE2.Val1 && NewE1.Val2!=NewE2.Val2 && ! EdgeSet.IsKey(NewE1) && ! EdgeSet.IsKey(NewE2)) { EdgeSet.DelKeyId(keyId1); EdgeSet.DelKeyId(keyId2); EdgeSet.AddKey(TIntPr(NewE1)); EdgeSet.AddKey(TIntPr(NewE2)); } else { skip++; } if (swps % Edges == 0) { printf("\r %uk/%uk: %uk skip [%s]", swps/1000u, 2*uint(Edges)*uint(NSwitch)/1000u, skip/1000u, ExeTm.GetStr()); if (ExeTm.GetSecs() > 2*3600) { printf(" *** Time limit!\n"); break; } // time limit 2 hours } } printf("\r total %uk switchings attempted, %uk skiped [%s]\n", 2*uint(Edges)*uint(NSwitch)/1000u, skip/1000u, ExeTm.GetStr()); for (int e = 0; e < EdgeSet.Len(); e++) { Graph.AddEdge(EdgeSet[e].Val1, EdgeSet[e].Val2); } return GraphPt; }
int main(int argc, char* argv[]) { // create a graph and save it { PNGraph Graph = TNGraph::New(); for (int i = 0; i < 10; i++) { Graph->AddNode(i); } for (int i = 0; i < 10; i++) { Graph->AddEdge(i, TInt::Rnd.GetUniDevInt(10)); } TSnap::SaveEdgeList(Graph, "graph.txt", "Edge list format"); } // load a graph PNGraph Graph; Graph = TSnap::LoadEdgeList<PNGraph>("graph.txt", 0, 1); // traverse nodes for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { printf("NodeId: %d, InDegree: %d, OutDegree: %d\n", NI.GetId(), NI.GetInDeg(), NI.GetOutDeg()); printf("OutNodes: "); for (int e = 0; e < NI.GetOutDeg(); e++) { printf(" %d", NI.GetOutNId(e)); } printf("\nInNodes: "); for (int e = 0; e < NI.GetInDeg(); e++) { printf(" %d", NI.GetInNId(e)); } printf("\n\n"); } // graph statistic TSnap::PrintInfo(Graph, "Graph info"); PNGraph MxWcc = TSnap::GetMxWcc(Graph); TSnap::PrintInfo(MxWcc, "Largest Weakly connected component"); // random graph PNGraph RndGraph = TSnap::GenRndGnm<PNGraph>(100, 1000); TGStat GraphStat(RndGraph, TSecTm(1), TGStat::AllStat(), "Gnm graph"); GraphStat.PlotAll("RndGraph", "Random graph on 1000 nodes"); // Forest Fire graph { TFfGGen ForestFire(false, 1, 0.35, 0.30, 1.0, 0.0, 0.0); ForestFire.GenGraph(100); PNGraph FfGraph = ForestFire.GetGraph(); } // network TPt<TNodeEDatNet<TStr, TStr> > Net = TNodeEDatNet<TStr, TStr>::New(); Net->AddNode(0, "zero"); Net->AddNode(1, "one"); Net->AddEdge(0, 1, "zero to one"); return 0; }
PNEANet KNNJaccardParallel(PNGraph Graph,int K) { PNEANet KNN = TNEANet::New(); TIntV NIdV; Graph->GetNIdV (NIdV); int size = NIdV.Len(); for (int ind = 0; ind < size; ind++) { KNN->AddNode(NIdV[ind]); } KNN->AddFltAttrE("sim"); TVec<TVec<TPair<TFlt, TInt>, int >, int > TopKList; TVec<TVec<TPair<TFlt, TInt>, int >, int > ThTopK; // for each thread TIntV NodeList; TIntV ThNodeList;// for each thread int NumThreads = omp_get_max_threads(); omp_set_num_threads(NumThreads); #pragma omp parallel private(ThNodeList, ThTopK) { TIntV* Neighbors_old = new TIntV(); TIntV* Neighbors = new TIntV(); TIntV* temp; #pragma omp for schedule(dynamic,1000) for (int ind = 0; ind < size; ind++) { TNGraph::TNodeI NI = Graph->GetNI(NIdV[ind]); if (NI.GetInDeg() > 0) { continue; } if (NI.GetOutDeg() == 0) { continue; } TVec<TPair<TFlt, TInt>, int > TopK; for (int i = 0; i < K; i++) { TopK.Add(TPair<TFlt,TInt>(0.0, -1)); } Neighbors->Clr(false); Neighbors_old->Clr(false); for (int i = 0; i < NI.GetOutDeg(); i++) { TNGraph::TNodeI Inst_NI = Graph->GetNI(NI.GetOutNId(i)); MergeNbrs(Neighbors, Neighbors_old, Inst_NI); temp = Neighbors_old; temp->Clr(false); Neighbors_old = Neighbors; Neighbors = temp; } // Swap neighbors and Neighbors_old temp = Neighbors_old; Neighbors_old = Neighbors; Neighbors = temp; for(int j = 0; j< Neighbors->Len(); j++) { TNGraph::TNodeI Auth_NI = Graph->GetNI((*Neighbors)[j]); float similarity = JaccardSim(NI, Auth_NI); if (TopK[K-1].GetVal1() < similarity) { int index = 0; for (int i = K-2; i >= 0; i--) if (TopK[i].GetVal1() < similarity) { TopK.SetVal(i+1, TopK[i]); } else { index = i+1; break; } TopK.SetVal(index, TPair<TFlt, TInt>(similarity, (*Neighbors)[j])); } } ThTopK.Add(TopK); ThNodeList.Add(NIdV[ind]); // if (ct%10000 == 0) // cout<<ct<<" avg neighbor degree = "<<sum_neighbors*1.0/ct<<" "<<currentDateTime()<<endl; } #pragma omp critical { for (int j = 0; j < ThTopK.Len(); j++) { TopKList.Add(ThTopK[j]); NodeList.Add(ThNodeList[j]); } } } int size2 = NodeList.Len(); for (int i= 0; i < size2 ; i++) { for (int j = 0; j < K; j++) { if (TopKList[i][j].GetVal2() <= -1) { break; } int EId = KNN->AddEdge(NodeList[i], TopKList[i][j].GetVal2()); KNN->AddFltAttrDatE(EId, TopKList[i][j].GetVal1(), "sim"); } } return KNN; }
void TNetInfBs::GenCascade(TCascade& C, const int& TModel, const double &window, TIntPrIntH& EdgesUsed, const double& delta, const double& std_waiting_time, const double& std_beta) { TIntFltH InfectedNIdH; TIntH InfectedBy; double GlobalTime; int StartNId; double alpha, beta; if (GroundTruth->GetNodes() == 0) return; while (C.Len() < 2) { C.Clr(); InfectedNIdH.Clr(); InfectedBy.Clr(); GlobalTime = 0; StartNId = GroundTruth->GetRndNId(); InfectedNIdH.AddDat(StartNId) = GlobalTime; while (true) { // sort by time & get the oldest node that did not run infection InfectedNIdH.SortByDat(true); const int& NId = InfectedNIdH.BegI().GetKey(); GlobalTime = InfectedNIdH.BegI().GetDat(); // all the nodes has run infection if (GlobalTime >= window) break; // add current oldest node to the network and set its time C.Add(NId, GlobalTime); // run infection from the current oldest node const TNGraph::TNodeI NI = GroundTruth->GetNI(NId); for (int e = 0; e < NI.GetOutDeg(); e++) { const int DstNId = NI.GetOutNId(e); beta = Betas.GetDat(TIntPr(NId, DstNId)); // flip biased coin (set by beta) if (TInt::Rnd.GetUniDev() > beta+std_beta*TFlt::Rnd.GetNrmDev()) continue; alpha = Alphas.GetDat(TIntPr(NId, DstNId)); // not infecting the parent if (InfectedBy.IsKey(NId) && InfectedBy.GetDat(NId).Val == DstNId) continue; double sigmaT; switch (TModel) { case 0: // exponential with alpha parameter sigmaT = TInt::Rnd.GetExpDev(alpha); break; case 1: // power-law with alpha parameter sigmaT = TInt::Rnd.GetPowerDev(alpha); while (sigmaT < delta) { sigmaT = TInt::Rnd.GetPowerDev(alpha); } break; case 2: // rayleigh with alpha parameter sigmaT = TInt::Rnd.GetRayleigh(1/sqrt(alpha)); break; default: sigmaT = 1; break; } // avoid negative time diffs in case of noise if (std_waiting_time > 0) sigmaT = TFlt::GetMx(0.0, sigmaT + std_waiting_time*TFlt::Rnd.GetNrmDev()); double t1 = GlobalTime + sigmaT; if (InfectedNIdH.IsKey(DstNId)) { double t2 = InfectedNIdH.GetDat(DstNId); if (t2 > t1 && t2 != window) { InfectedNIdH.GetDat(DstNId) = t1; InfectedBy.GetDat(DstNId) = NId; } } else { InfectedNIdH.AddDat(DstNId) = t1; InfectedBy.AddDat(DstNId) = NId; } } // we cannot delete key (otherwise, we cannot sort), so we assign a big time (window cut-off) InfectedNIdH.GetDat(NId) = window; } } C.Sort(); for (TIntH::TIter EI = InfectedBy.BegI(); EI < InfectedBy.EndI(); EI++) { TIntPr Edge(EI.GetDat().Val, EI.GetKey().Val); if (!EdgesUsed.IsKey(Edge)) EdgesUsed.AddDat(Edge) = 0; EdgesUsed.GetDat(Edge) += 1; } }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Inverse PageRank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File" ); const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File"); FILE* fpI = fopen(Iput.CStr(), "r"); FILE* fpO = fopen(Oput.CStr(), "w"); const double C = 0.85; const int MaxIter = 50; const double Eps = 1e-9; PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput); fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges()); const int NNodes = Graph->GetNodes(); const double OneOver = (double) 1.0 / (double) NNodes; TIntFltH PRankH; PRankH.Gen(NNodes); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) PRankH.AddDat(NI.GetId(), OneOver); TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { int j = 0; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { const int OutNId = NI.GetOutNId(e); const int InDeg = Graph->GetNI(OutNId).GetInDeg(); if (InDeg > 0) TmpV[j] += PRankH.GetDat(OutNId) / InDeg; } TmpV[j] = C * TmpV[j]; } for (int i = 0; i < PRankH.Len(); i++) PRankH[i] = TmpV[i]; /* double diff = 0, sum = 0, NewVal; for (int i = 0; i < TmpV.Len(); i++) sum += TmpV[i]; const double Leaked = (double) (1.0 - sum) / (double) NNodes; for (int i = 0; i < PRankH.Len(); i++) { NewVal = TmpV[i] + Leaked; diff += fabs(NewVal - PRankH[i]); PRankH[i] = NewVal; } if (diff < Eps) break; */ } fprintf(fpO, "Node ID\t\tInverse PageRank\n"); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){ int Id = NI.GetId(); double ipr = PRankH.GetDat(Id); fprintf(fpO, "%d\t\t\t%.5lf\n", Id, ipr); } Catch printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }