/// Clique Percolation method communities void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) { printf("Clique Percolation Method\n"); TExeTm ExeTm; TVec<TIntV> MaxCliques; TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques); // op RS 2012/05/15, commented out next line, a parameter is missing, // creating a warning on OS X // printf("...%d cliques found\n"); // get clique overlap matrix (graph) PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1); printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges()); // connected components are communities TCnComV CnComV; TSnap::GetWccs(OverlapGraph, CnComV); NIdCmtyVV.Clr(false); TIntSet CmtySet; for (int c = 0; c < CnComV.Len(); c++) { CmtySet.Clr(false); for (int i = 0; i <CnComV[c].Len(); i++) { const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]]; CmtySet.AddKeyV(CliqueNIdV); } NIdCmtyVV.Add(); CmtySet.GetKeyV(NIdCmtyVV.Last()); NIdCmtyVV.Last().Sort(); } printf("done [%s].\n", ExeTm.GetStr()); }
void run_graph_long ( const char * input_name) { char line[1024]; sprintf (line, "g-%s.out", input_name); ofstream outfile (line); cout << "outfile: " << line << endl; outfile << "id clustercf comps apl netsize" << endl; int sizes [] = {1000, 715, 525}; for (int i = 0; i < 6; i++) { sprintf( line, "g-%d-%s.graph", sizes[i], input_name); ifstream file (line); cerr << "infile: " << line << endl; string s; int numedges, numnodes; file >> s >> numedges; cerr << s << "\t" << numedges << endl; while (numedges > 0) { int edges [2*numedges]; get_edges (file, edges, numedges); file >> s >> numnodes; int inf[numnodes]; get_nodes (file, inf, numnodes); PUNGraph g = get_PUNGraph (edges, numedges, numnodes); TCnComV convec; TSnap::GetWccs(g, convec); outfile << graph_num << " " << TSnap::GetClustCf(g, -1) << " " << convec.Len() << " " << " " << ave_path_length (g) << " " << numnodes << endl; graph_num ++; file >> s >> numedges; } } }
///////////////////////////////////////////////// // Top2 Friends network void TTop2FriendNet::SetTop2() { Top2NIdH.Gen(Net->GetNodes()); TFltIntPrV WgtNIdV; for (TWgtNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) { WgtNIdV.Clr(false); for (int e = 0; e < NI.GetOutDeg(); e++) { WgtNIdV.Add(TFltIntPr(NI.GetOutEDat(e), NI.GetOutNId(e))); } WgtNIdV.Shuffle(TInt::Rnd); // so that ties are broken randomly WgtNIdV.Sort(false); if (WgtNIdV.Len() == 0) { Top2NIdH.AddDat(NI.GetId(), TIntPr(-1, -1)); } else if (WgtNIdV.Len() == 1) { Top2NIdH.AddDat(NI.GetId(), TIntPr(WgtNIdV[0].Val2, -1)); } else if (WgtNIdV.Len() >= 2) { Top2NIdH.AddDat(NI.GetId(), TIntPr(WgtNIdV[0].Val2, WgtNIdV[1].Val2)); } } // create union find structure PNGraph Top1Net = GetTop1Net(); Top1UF = TUnionFind(Top1Net->GetNodes()); TCnComV CnComV; TCnCom::GetWccs(Top1Net, CnComV); for (TWgtNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) { Top1UF.Add(NI.GetId()); } for (int c = 0; c < CnComV.Len(); c++) { for (int i = 1; i < CnComV[c].Len(); i++) { Top1UF.Union(CnComV[c][0], CnComV[c][i]); } } }
// get the node ids in 1-connected components void Get1CnCom(const PUNGraph& Graph, TCnComV& Cn1ComV) { //TCnCom::GetWccCnt(Graph, SzCntV); IAssertR(SzCntV.Len() == 1, "Graph is not connected."); TIntPrV EdgeV; GetEdgeBridges(Graph, EdgeV); if (EdgeV.Empty()) { Cn1ComV.Clr(false); return; } PUNGraph TmpG = TUNGraph::New(); *TmpG = *Graph; for (int e = 0; e < EdgeV.Len(); e++) { TmpG->DelEdge(EdgeV[e].Val1, EdgeV[e].Val2); } TCnComV CnComV; GetWccs(TmpG, CnComV); IAssert(CnComV.Len() >= 2); const TIntV& MxWcc = CnComV[0].NIdV; TIntSet MxCcSet(MxWcc.Len()); for (int i = 0; i < MxWcc.Len(); i++) { MxCcSet.AddKey(MxWcc[i]); } // create new graph: bridges not touching MxCc of G with no bridges for (int e = 0; e < EdgeV.Len(); e++) { if (! MxCcSet.IsKey(EdgeV[e].Val1) && ! MxCcSet.IsKey(EdgeV[e].Val2)) { TmpG->AddEdge(EdgeV[e].Val1, EdgeV[e].Val2); } } GetWccs(TmpG, Cn1ComV); // remove the largest component of G for (int c = 0; c < Cn1ComV.Len(); c++) { if (MxCcSet.IsKey(Cn1ComV[c].NIdV[0])) { Cn1ComV.Del(c); break; } } }
void GetBiConSzCnt(const PUNGraph& Graph, TIntPrV& SzCntV) { TCnComV BiCnComV; GetBiCon(Graph, BiCnComV); TIntH SzCntH; for (int c =0; c < BiCnComV.Len(); c++) { SzCntH.AddDat(BiCnComV[c].Len()) += 1; } SzCntH.GetKeyDatPrV(SzCntV); SzCntV.Sort(); }
// bridges are edges in the size 2 biconnected components void GetEdgeBridges(const PUNGraph& Graph, TIntPrV& EdgeV) { TCnComV BiCnComV; GetBiCon(Graph, BiCnComV); TIntPrSet EdgeSet; for (int c = 0; c < BiCnComV.Len(); c++) { const TIntV& NIdV = BiCnComV[c].NIdV; if (NIdV.Len() == 2) { EdgeSet.AddKey(TIntPr(TMath::Mn(NIdV[0], NIdV[1]), TMath::Mx(NIdV[0], NIdV[1]))); } } EdgeSet.GetKeyV(EdgeV); }
void TCnCom::SaveTxt(const TCnComV& CnComV, const TStr& FNm, const TStr& Desc) { FILE *F = fopen(FNm.CStr(), "wt"); if (! Desc.Empty()) { fprintf(F, "# %s\n", Desc.CStr()); } fprintf(F, "# Connected Components:\t%d\n", CnComV.Len()); fprintf(F, "# Connected components (format: <Size>\\t<NodeId1>\\t<NodeId2>...)\n"); for (int cc = 0; cc < CnComV.Len(); cc++) { const TIntV& NIdV = CnComV[cc].NIdV; fprintf(F, "%d", NIdV.Len()); for (int i = 0; i < NIdV.Len(); i++) { fprintf(F, "\t%d", NIdV[i].Val); } fprintf(F, "\n"); } fclose(F); }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Network community detection. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (undirected graph)"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "communities.txt", "Output file"); const int CmtyAlg = Env.GetIfArgPrefixInt("-a:", 2, "Algorithm: 1:Girvan-Newman, 2:Clauset-Newman-Moore, 3:Infomap"); PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNm, false); //PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>("../as20graph.txt", false); //PUNGraph Graph = TSnap::GenRndGnm<PUNGraph>(5000, 10000); // generate a random graph TSnap::DelSelfEdges(Graph); TCnComV CmtyV; double Q = 0.0; TStr CmtyAlgStr; if (CmtyAlg == 1) { CmtyAlgStr = "Girvan-Newman"; Q = TSnap::CommunityGirvanNewman(Graph, CmtyV); } else if (CmtyAlg == 2) { CmtyAlgStr = "Clauset-Newman-Moore"; Q = TSnap::CommunityCNM(Graph, CmtyV); } else if (CmtyAlg == 3) { CmtyAlgStr = "Infomap"; Q = TSnap::Infomap(Graph, CmtyV); } else { Fail; } FILE *F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "# Input: %s\n", InFNm.CStr()); fprintf(F, "# Nodes: %d Edges: %d\n", Graph->GetNodes(), Graph->GetEdges()); fprintf(F, "# Algoritm: %s\n", CmtyAlgStr.CStr()); if (CmtyAlg!=3) { fprintf(F, "# Modularity: %f\n", Q); } else { fprintf(F, "# Average code length: %f\n", Q); } fprintf(F, "# Communities: %d\n", CmtyV.Len()); fprintf(F, "# NId\tCommunityId\n"); for (int c = 0; c < CmtyV.Len(); c++) { for (int i = 0; i < CmtyV[c].Len(); i++) { fprintf(F, "%d\t%d\n", CmtyV[c][i].Val, c); } } fclose(F); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
PUNGraph GetMxBiCon(const PUNGraph& Graph, const bool& RenumberNodes) { TCnComV CnComV; GetBiCon(Graph, CnComV); if (CnComV.Empty()) { return PUNGraph(); } int CcId = 0, MxSz = 0; for (int i = 0; i < CnComV.Len(); i++) { if (MxSz < CnComV[i].Len()) { MxSz = CnComV[i].Len(); CcId=i; } } return TSnap::GetSubGraph(Graph, CnComV[CcId](), RenumberNodes); }
///////////////////////////////////////////////// // Connected Components void TCnCom::Dump(const TCnComV& CnComV, const TStr& Desc) { if (! Desc.Empty()) { printf("%s:\n", Desc.CStr()); } for (int cc = 0; cc < CnComV.Len(); cc++) { const TIntV& NIdV = CnComV[cc].NIdV; printf("%d : ", NIdV.Len()); for (int i = 0; i < NIdV.Len(); i++) { printf(" %d", NIdV[i].Val); } printf("\n"); } }
void TakeStat(const PGraph& InfG, const PGraph& NetG, const TIntH& NIdInfTmH, const double& P, const bool& DivByM=true) { const double M = DivByM ? InfG->GetNodes() : 1; IAssert(M>=1); PGraph CcInf, CcNet; // largest connected component // connected components and sizes { TCnComV CnComV; TSnap::GetWccs(InfG, CnComV); NCascInf.AddDat(P).Add(CnComV.Len()/M); MxSzInf.AddDat(P).Add(CnComV[0].Len()/M); { int a=0; for (int i=0; i<CnComV.Len(); i++) { a+=CnComV[i].Len(); } AvgSzInf.AddDat(P).Add(a/double(CnComV.Len()*M)); } CcInf = TSnap::GetSubGraph(InfG, CnComV[0].NIdV); TSnap::GetWccs(NetG, CnComV); NCascNet.AddDat(P).Add(CnComV.Len()/M); MxSzNet.AddDat(P).Add(CnComV[0].Len()/M); { int a=0; for (int i=0; i<CnComV.Len(); i++) { a+=CnComV[i].Len(); } AvgSzNet.AddDat(P).Add(a/double(CnComV.Len()*M)); } CcNet = TSnap::GetSubGraph(NetG, CnComV[0].NIdV); } // count isolated nodes and leaves; average in- and out-degree (skip leaves) { int i1=0, i2=0,l1=0,l2=0,r1=0,r2=0,ENet=0,EInf=0; double ci1=0,ci2=0,co1=0,co2=0; for (typename PGraph::TObj::TNodeI NI = InfG->BegNI(); NI < InfG->EndNI(); NI++) { if (NI.GetOutDeg()==0 && NI.GetInDeg()>0) { l1++; } if (NI.GetOutDeg()>0 && NI.GetInDeg()==0) { r1++; } if (NI.GetDeg()==0) { i1++; } if (NI.GetInDeg()>0) { ci1+=1; } if (NI.GetOutDeg()>0) { co1+=1; } EInf+=NI.GetOutDeg(); } for (typename PGraph::TObj::TNodeI NI = NetG->BegNI(); NI < NetG->EndNI(); NI++) { if (NI.GetOutDeg()==0 && NI.GetInDeg()>0) { l2++; } if (NI.GetOutDeg()>0 && NI.GetInDeg()==0) { r2++; } if (NI.GetDeg()==0) { i2++; } if (NI.GetInDeg()>0) { ci2+=1; } if (NI.GetOutDeg()>0) { co2+=1; } ENet+=NI.GetOutDeg(); } if(ci1>0)InDegInf.AddDat(P).Add(EInf/ci1); if(ci2>0)InDegNet.AddDat(P).Add(ENet/ci2); if(co1>0)OutDegInf.AddDat(P).Add(EInf/co1); if(co2>0)OutDegNet.AddDat(P).Add(ENet/co2); NLfInf.AddDat(P).Add(l1/M); NLfNet.AddDat(P).Add(l2/M); NRtInf.AddDat(P).Add(r1/M); NRtNet.AddDat(P).Add(r2/M); NIsoInf.AddDat(P).Add(i1/M); NIsoNet.AddDat(P).Add(i2/M); } // cascade depth { const double M1 = DivByM ? CcNet->GetNodes() : 1; IAssert(M1>=1); int Root=FindCascadeRoot(CcInf, NIdInfTmH); TIntPrV HopCntV; TSnap::GetNodesAtHops(CcInf, Root, HopCntV, true); int MxN=0, Lev=0, IncL=0; for (int i = 0; i < HopCntV.Len(); i++) { if (MxN<HopCntV[i].Val2) { MxN=HopCntV[i].Val2; Lev=HopCntV[i].Val1; } if (i > 0 && HopCntV[i-1].Val2<=HopCntV[i].Val2) { IncL++; } } double D=0; int c=0; TIntH DistH; D = HopCntV.Last().Val1; c=1; // maximum depth if (c!=0 && D!=0) { D = D/c; DepthInf.AddDat(P).Add(D/M1); MxWidInf.AddDat(P).Add(MxN/M1); MxLevInf.AddDat(P).Add(Lev/D); IncLevInf.AddDat(P).Add(IncL/D); } Root=FindCascadeRoot(CcNet, NIdInfTmH); TSnap::GetNodesAtHops(CcNet, Root, HopCntV, true); MxN=0; Lev=0; IncL=0; D=0; c=0; for (int i = 0; i < HopCntV.Len(); i++) { if (MxN<HopCntV[i].Val2) { MxN=HopCntV[i].Val2; Lev=HopCntV[i].Val1; } if (i > 0 && HopCntV[i-1].Val2<=HopCntV[i].Val2) { IncL++; } } D = HopCntV.Last().Val1; c=1; // maximum depth if (c!=0 && D!=0) { D = D/c; DepthNet.AddDat(P).Add(D/M1); MxWidNet.AddDat(P).Add(MxN/M1); MxLevNet.AddDat(P).Add(Lev/D); IncLevNet.AddDat(P).Add(IncL/D); } } }
// Process the strongly connected components of the graph. We only work // on the largest SCC. void CommDetection::ProcessSCCs() { Network& net = CurrNetwork(); TCnComV components; TSnap::GetSccs(net.graph(), components); if (components.Len() == 1) { return; } int num_scc = components.Len(); std::vector< std::vector<int> > cuts(num_scc); for (int j = 0; j < num_scc; ++j) { TCnCom comp = components[j]; std::vector<int>& curr_cut = cuts[j]; for (int i = 0; i < comp.Len(); ++i) { curr_cut.push_back(comp[i].Val); } } // Find the largest remaining component int argmax_ind = -1; int max_val = -1; for (int i = 0; i < cuts.size(); ++i) { if (max_val == -1 || cuts[i].size() > max_val) { argmax_ind = i; max_val = cuts[i].size(); } } // Now we cut out each component std::cout << "Removing SCC" << std::endl; Cutter cutter(net, algorithm_, cut_type_, ""); RemoveCutFromCurrNetwork(cutter, cuts[argmax_ind]); // TODO(arbenson): this is kindof a hack for dealing with SCCs. We don't // want to process all of them because there are many isolated nodes. // Instead, we continue processing until the largest community (in terms // of the number of nodes) is a SCC. ProcessSCCs(); }
// Maximum modularity clustering by Girvan-Newman algorithm (slow) // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821–7826 (2002) double CommunityGirvanNewman(PUNGraph& Graph, TCnComV& CmtyV) { TIntH OutDegH; const int NEdges = Graph->GetEdges(); for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { OutDegH.AddDat(NI.GetId(), NI.GetOutDeg()); } double BestQ = -1; // modularity TCnComV CurCmtyV; CmtyV.Clr(); TIntV Cmty1, Cmty2; while (true) { CmtyGirvanNewmanStep(Graph, Cmty1, Cmty2); const double Q = _GirvanNewmanGetModularity(Graph, OutDegH, NEdges, CurCmtyV); //printf("current modularity: %f\n", Q); if (Q > BestQ) { BestQ = Q; CmtyV.Swap(CurCmtyV); } if (Cmty1.Len()==0 || Cmty2.Len() == 0) { break; } } return BestQ; }
static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) { TCNMQMatrix QMatrix(Graph); // maximize modularity while (QMatrix.MergeBestQ()) { } // reconstruct communities THash<TInt, TIntV> IdCmtyH; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId()); } CmtyV.Gen(IdCmtyH.Len()); for (int j = 0; j < IdCmtyH.Len(); j++) { CmtyV[j].NIdV.Swap(IdCmtyH[j]); } return QMatrix.Q; }
// Connected components of a graph define clusters // OutDegH and OrigEdges stores node degrees and number of edges in the original graph double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) { TSnap::GetWccs(G, CnComV); // get communities double Mod = 0; for (int c = 0; c < CnComV.Len(); c++) { const TIntV& NIdV = CnComV[c](); double EIn=0, EEIn=0; for (int i = 0; i < NIdV.Len(); i++) { TUNGraph::TNodeI NI = G->GetNI(NIdV[i]); EIn += NI.GetOutDeg(); EEIn += OutDegH.GetDat(NIdV[i]); } Mod += (EIn-EEIn*EEIn/(2.0*OrigEdges)); } if (Mod == 0) { return 0; } else { return Mod/(2.0*OrigEdges); } }
void sample (const int *m, const int *n, const int *h, const int *ns, const int *in, const int *infection_state, const int *mde, const int *bi, const int *br, double * result) { const int nodes = *h; const int nval = (*n)/2; int num_seeds = *ns; int infect_type = *in; int mode = *mde; int burnin = *bi; int branch = *br; PUNGraph g = get_PUNGraph (m, nval, nodes); THash<TInt, TInt> * visited = choose_seeds (g, num_seeds, infection_state, infect_type); TVec <VisitedNode *> queue; TIntV qids; for (THash<TInt, TInt>::TIter n = visited->BegI(); n != visited->EndI(); n++) { queue = queue + new VisitedNode (n->Key); qids = qids + n->Key; //cerr << "enqueued " << n->Key << endl; } TInt counted = 0; TInt first_unprocessed = 0; TFlt infected_mass = 0.0; TFlt total_mass = 0.0; TFlt revisits = 0.0; TFlt trehits = 0.0; //cerr << "nodeId\tneigh\tnbh_size\tinfected?\tinfected_mass\ttotal_mass" << endl; while (counted < 500 && first_unprocessed < queue.Len()) { VisitedNode * current_node = queue [first_unprocessed]; first_unprocessed++; TUNGraph::TNodeI NI = g->GetNI (current_node->id); TInt neighborhood_size = NI.GetDeg(); // cerr << counted << " " << current_node->id << endl; if (counted >= burnin) { if (infection_state[(current_node->id) - 1] == 1) infected_mass += 1.0/TFlt(neighborhood_size); total_mass += 1.0/TFlt(neighborhood_size); } //cerr << current_node->id << "\t" << neighborhood_size << "\t" << (1.0/TFlt(neighborhood_size)) // << "\t" << infection_state[(current_node->id) - 1] << "\t" << infected_mass << "\t" << total_mass << endl; // build list of unvisited neighbors TVec<TInt> neighbors; for (int i = 0; i < neighborhood_size; i++) { TInt neighbor = NI.GetNbrNId(i); if (mode == 0 && visited->IsKey(neighbor)) continue; else if (mode == 2 && isChild (current_node, neighbor)) continue; else if (mode == 3 && current_node-> previous != NULL && current_node->previous->id == neighbor) continue; else neighbors = neighbors + neighbor; } TInt num_legal_neighbors = neighbors.Len(); TInt sample_size = TMath::Mn<TInt> (branch, num_legal_neighbors); THash <TInt, TInt> * choices = choose (num_legal_neighbors, sample_size); for (THash<TInt, TInt>::TIter n = choices->BegI(); n != choices->EndI(); n++) { if (queue.Len() >= 500) break; queue = queue + new VisitedNode (neighbors[n->Key], current_node); if (visited->IsKey(neighbors[n->Key])) revisits++; if (isChild(current_node, neighbors[n->Key])) trehits++; if (!visited->IsKey(neighbors[n->Key])) qids = qids + neighbors[n->Key]; visited->AddDat(neighbors[n->Key], 1); } counted++; } // cout << (infected_mass / total_mass) << endl; delete (visited); result[0] = (infected_mass / total_mass); result[1] = revisits; result[2] = trehits; result[3] = counted; //PUNGraph p (&g); PUNGraph p = TSnap:: GetSubGraph (g, qids, false); TCnComV convec; result[4] = TSnap::GetClustCf(p, -1); TSnap::GetWccs(p, convec); result[5] = convec.Len(); result[6] = ave_path_length (p); }