Esempio n. 1
0
// get the node ids in 1-connected components 
void Get1CnCom(const PUNGraph& Graph, TCnComV& Cn1ComV) {
  //TCnCom::GetWccCnt(Graph, SzCntV);  IAssertR(SzCntV.Len() == 1, "Graph is not connected.");
  TIntPrV EdgeV;
  GetEdgeBridges(Graph, EdgeV);
  if (EdgeV.Empty()) { Cn1ComV.Clr(false); return; }
  PUNGraph TmpG = TUNGraph::New();
  *TmpG = *Graph;
  for (int e = 0; e < EdgeV.Len(); e++) {
    TmpG->DelEdge(EdgeV[e].Val1, EdgeV[e].Val2);  }
  TCnComV CnComV;  GetWccs(TmpG, CnComV);
  IAssert(CnComV.Len() >= 2);
  const TIntV& MxWcc = CnComV[0].NIdV;
  TIntSet MxCcSet(MxWcc.Len());
  for (int i = 0; i < MxWcc.Len(); i++) { 
    MxCcSet.AddKey(MxWcc[i]); }
  // create new graph: bridges not touching MxCc of G with no bridges
  for (int e = 0; e < EdgeV.Len(); e++) {
    if (! MxCcSet.IsKey(EdgeV[e].Val1) &&  ! MxCcSet.IsKey(EdgeV[e].Val2)) {
      TmpG->AddEdge(EdgeV[e].Val1, EdgeV[e].Val2); }
  }
  GetWccs(TmpG, Cn1ComV);
  // remove the largest component of G
  for (int c = 0; c < Cn1ComV.Len(); c++) {
    if (MxCcSet.IsKey(Cn1ComV[c].NIdV[0])) {
      Cn1ComV.Del(c);  break; }
  }
}
Esempio n. 2
0
void TCnCom::SaveTxt(const TCnComV& CnComV, const TStr& FNm, const TStr& Desc) {
  FILE *F = fopen(FNm.CStr(), "wt");
  if (! Desc.Empty()) { fprintf(F, "# %s\n", Desc.CStr()); }
  fprintf(F, "# Connected Components:\t%d\n", CnComV.Len());
  fprintf(F, "# Connected components (format: <Size>\\t<NodeId1>\\t<NodeId2>...)\n");
  for (int cc = 0; cc < CnComV.Len(); cc++) {
    const TIntV& NIdV = CnComV[cc].NIdV;
    fprintf(F, "%d", NIdV.Len());
    for (int i = 0; i < NIdV.Len(); i++) { fprintf(F, "\t%d", NIdV[i].Val); }
    fprintf(F, "\n");
  }
  fclose(F);
}
Esempio n. 3
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Network community detection. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  TExeTm ExeTm;
  Try
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "graph.txt", "Input graph (undirected graph)");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "communities.txt", "Output file");
  const int CmtyAlg = Env.GetIfArgPrefixInt("-a:", 2, "Algorithm: 1:Girvan-Newman, 2:Clauset-Newman-Moore, 3:Infomap");

  PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>(InFNm, false);
  //PUNGraph Graph = TSnap::LoadEdgeList<PUNGraph>("../as20graph.txt", false);
  //PUNGraph Graph = TSnap::GenRndGnm<PUNGraph>(5000, 10000); // generate a random graph

  TSnap::DelSelfEdges(Graph);
  TCnComV CmtyV;
  double Q = 0.0;
  TStr CmtyAlgStr;
  if (CmtyAlg == 1) {
    CmtyAlgStr = "Girvan-Newman";
    Q = TSnap::CommunityGirvanNewman(Graph, CmtyV); }
  else if (CmtyAlg == 2) {
    CmtyAlgStr = "Clauset-Newman-Moore";
    Q = TSnap::CommunityCNM(Graph, CmtyV); }
  else if (CmtyAlg == 3) {
    CmtyAlgStr = "Infomap";
    Q = TSnap::Infomap(Graph, CmtyV); }
  else { Fail; }

  FILE *F = fopen(OutFNm.CStr(), "wt");
  fprintf(F, "# Input: %s\n", InFNm.CStr());
  fprintf(F, "# Nodes: %d    Edges: %d\n", Graph->GetNodes(), Graph->GetEdges());
  fprintf(F, "# Algoritm: %s\n", CmtyAlgStr.CStr());
  if (CmtyAlg!=3) {
    fprintf(F, "# Modularity: %f\n", Q);
  } else {
    fprintf(F, "# Average code length: %f\n", Q);
  }
  fprintf(F, "# Communities: %d\n", CmtyV.Len());
  fprintf(F, "# NId\tCommunityId\n");
  for (int c = 0; c < CmtyV.Len(); c++) {
    for (int i = 0; i < CmtyV[c].Len(); i++) {
      fprintf(F, "%d\t%d\n", CmtyV[c][i].Val, c);
    }
  }
  fclose(F);

  Catch
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
}
Esempio n. 4
0
/// Clique Percolation method communities
void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) {
  printf("Clique Percolation Method\n");
  TExeTm ExeTm;
  TVec<TIntV> MaxCliques;
  TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques);
  // op RS 2012/05/15, commented out next line, a parameter is missing,
  //   creating a warning on OS X
  // printf("...%d cliques found\n");
  // get clique overlap matrix (graph)
  PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1);
  printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges());
  // connected components are communities
  TCnComV CnComV;
  TSnap::GetWccs(OverlapGraph, CnComV);
  NIdCmtyVV.Clr(false);
  TIntSet CmtySet;
  for (int c = 0; c < CnComV.Len(); c++) {
    CmtySet.Clr(false);
    for (int i = 0; i <CnComV[c].Len(); i++) {
      const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]];
      CmtySet.AddKeyV(CliqueNIdV);
    }
    NIdCmtyVV.Add();
    CmtySet.GetKeyV(NIdCmtyVV.Last());
    NIdCmtyVV.Last().Sort();
  }
  printf("done [%s].\n", ExeTm.GetStr());
}
Esempio n. 5
0
/////////////////////////////////////////////////
// Top2 Friends network
void TTop2FriendNet::SetTop2() {
  Top2NIdH.Gen(Net->GetNodes());
  TFltIntPrV WgtNIdV;
  for (TWgtNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) {
    WgtNIdV.Clr(false);
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      WgtNIdV.Add(TFltIntPr(NI.GetOutEDat(e), NI.GetOutNId(e)));
    }
    WgtNIdV.Shuffle(TInt::Rnd); // so that ties are broken randomly
    WgtNIdV.Sort(false);
    if (WgtNIdV.Len() == 0) { Top2NIdH.AddDat(NI.GetId(), TIntPr(-1, -1)); }
    else if (WgtNIdV.Len() == 1) { Top2NIdH.AddDat(NI.GetId(), TIntPr(WgtNIdV[0].Val2, -1)); } 
    else if (WgtNIdV.Len() >= 2) {
      Top2NIdH.AddDat(NI.GetId(), TIntPr(WgtNIdV[0].Val2, WgtNIdV[1].Val2)); }
  }
  // create union find structure
  PNGraph Top1Net = GetTop1Net();
  Top1UF = TUnionFind(Top1Net->GetNodes());
  TCnComV CnComV; 
  TCnCom::GetWccs(Top1Net, CnComV);
  for (TWgtNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) {
    Top1UF.Add(NI.GetId());
  }
  for (int c = 0; c < CnComV.Len(); c++) {
    for (int i = 1; i < CnComV[c].Len(); i++) {
      Top1UF.Union(CnComV[c][0], CnComV[c][i]); }
  }
}
Esempio n. 6
0
void run_graph_long ( const char * input_name) {

  char line[1024];
  sprintf (line, "g-%s.out", input_name);
  ofstream outfile (line);
  cout << "outfile: " << line << endl;
  outfile << "id clustercf comps apl netsize" << endl;

  int sizes [] = {1000, 715, 525};
  for (int i = 0; i < 6; i++) {
    sprintf( line, "g-%d-%s.graph", sizes[i], input_name);
    ifstream file (line);
    cerr << "infile: " << line << endl;
    string s;
    int numedges, numnodes;
    file >> s >> numedges;
    cerr << s  << "\t" << numedges << endl;

    while (numedges > 0) {
      int edges [2*numedges];
      get_edges (file, edges, numedges);
      file >> s >> numnodes;
      int inf[numnodes];
      get_nodes (file, inf, numnodes);
    
      PUNGraph g = get_PUNGraph (edges, numedges, numnodes);
      TCnComV convec;
      TSnap::GetWccs(g, convec);
      
      outfile << graph_num << " " << TSnap::GetClustCf(g, -1) << " " << convec.Len() << " " << " " << ave_path_length (g) << " " << numnodes << endl;
      graph_num ++;
      file >> s >> numedges;
    }
  }
}
Esempio n. 7
0
/////////////////////////////////////////////////
// Connected Components
void TCnCom::Dump(const TCnComV& CnComV, const TStr& Desc) {
  if (! Desc.Empty()) { printf("%s:\n", Desc.CStr()); }
  for (int cc = 0; cc < CnComV.Len(); cc++) {
    const TIntV& NIdV = CnComV[cc].NIdV;
    printf("%d : ", NIdV.Len());
    for (int i = 0; i < NIdV.Len(); i++) { printf(" %d", NIdV[i].Val); }
    printf("\n");
  }
}
Esempio n. 8
0
void GetBiConSzCnt(const PUNGraph& Graph, TIntPrV& SzCntV) {
  TCnComV BiCnComV;
  GetBiCon(Graph, BiCnComV);
  TIntH SzCntH;
  for (int c =0; c < BiCnComV.Len(); c++) {
    SzCntH.AddDat(BiCnComV[c].Len()) += 1;
  }
  SzCntH.GetKeyDatPrV(SzCntV);
  SzCntV.Sort();
}
Esempio n. 9
0
// bridges are edges in the size 2 biconnected components
void GetEdgeBridges(const PUNGraph& Graph, TIntPrV& EdgeV) {
  TCnComV BiCnComV;
  GetBiCon(Graph, BiCnComV);
  TIntPrSet EdgeSet;
  for (int c = 0; c < BiCnComV.Len(); c++) {
    const TIntV& NIdV = BiCnComV[c].NIdV; 
    if (NIdV.Len() == 2) {
      EdgeSet.AddKey(TIntPr(TMath::Mn(NIdV[0], NIdV[1]), TMath::Mx(NIdV[0], NIdV[1]))); 
    }
  }
  EdgeSet.GetKeyV(EdgeV);
}
Esempio n. 10
0
 void TakeStat(const PGraph& InfG, const PGraph& NetG, const TIntH& NIdInfTmH, const double& P, const bool& DivByM=true) {
   const double M = DivByM ? InfG->GetNodes() : 1;  IAssert(M>=1);
   PGraph CcInf, CcNet; // largest connected component
   // connected components and sizes
   { TCnComV CnComV;  TSnap::GetWccs(InfG, CnComV);
   NCascInf.AddDat(P).Add(CnComV.Len()/M);
   MxSzInf.AddDat(P).Add(CnComV[0].Len()/M);
   { int a=0; for (int i=0; i<CnComV.Len(); i++) { a+=CnComV[i].Len(); }
   AvgSzInf.AddDat(P).Add(a/double(CnComV.Len()*M)); }
   CcInf = TSnap::GetSubGraph(InfG, CnComV[0].NIdV);
   TSnap::GetWccs(NetG, CnComV);
   NCascNet.AddDat(P).Add(CnComV.Len()/M);
   MxSzNet.AddDat(P).Add(CnComV[0].Len()/M);
   { int a=0; for (int i=0; i<CnComV.Len(); i++) { a+=CnComV[i].Len(); }
   AvgSzNet.AddDat(P).Add(a/double(CnComV.Len()*M)); }
   CcNet = TSnap::GetSubGraph(NetG, CnComV[0].NIdV); }
   // count isolated nodes and leaves; average in- and out-degree (skip leaves)
   { int i1=0, i2=0,l1=0,l2=0,r1=0,r2=0,ENet=0,EInf=0; double ci1=0,ci2=0,co1=0,co2=0;
   for (typename PGraph::TObj::TNodeI NI = InfG->BegNI(); NI < InfG->EndNI(); NI++) {
     if (NI.GetOutDeg()==0 && NI.GetInDeg()>0) { l1++; }
     if (NI.GetOutDeg()>0 && NI.GetInDeg()==0) { r1++; }
     if (NI.GetDeg()==0) { i1++; }  if (NI.GetInDeg()>0) { ci1+=1; }
     if (NI.GetOutDeg()>0) { co1+=1; }  EInf+=NI.GetOutDeg(); }
   for (typename PGraph::TObj::TNodeI NI = NetG->BegNI(); NI < NetG->EndNI(); NI++) {
     if (NI.GetOutDeg()==0 && NI.GetInDeg()>0) { l2++; }
     if (NI.GetOutDeg()>0 && NI.GetInDeg()==0) { r2++; }
     if (NI.GetDeg()==0) { i2++; }  if (NI.GetInDeg()>0) { ci2+=1; }
     if (NI.GetOutDeg()>0) { co2+=1; }  ENet+=NI.GetOutDeg(); }
   if(ci1>0)InDegInf.AddDat(P).Add(EInf/ci1);  if(ci2>0)InDegNet.AddDat(P).Add(ENet/ci2);
   if(co1>0)OutDegInf.AddDat(P).Add(EInf/co1); if(co2>0)OutDegNet.AddDat(P).Add(ENet/co2);
   NLfInf.AddDat(P).Add(l1/M);  NLfNet.AddDat(P).Add(l2/M);
   NRtInf.AddDat(P).Add(r1/M);  NRtNet.AddDat(P).Add(r2/M);
   NIsoInf.AddDat(P).Add(i1/M); NIsoNet.AddDat(P).Add(i2/M); }
   // cascade depth
   { const double M1 = DivByM ? CcNet->GetNodes() : 1;  IAssert(M1>=1);
   int Root=FindCascadeRoot(CcInf, NIdInfTmH);  TIntPrV HopCntV;
   TSnap::GetNodesAtHops(CcInf, Root, HopCntV, true);
   int MxN=0, Lev=0, IncL=0;
   for (int i = 0; i < HopCntV.Len(); i++) {
     if (MxN<HopCntV[i].Val2) { MxN=HopCntV[i].Val2; Lev=HopCntV[i].Val1; }
     if (i > 0 && HopCntV[i-1].Val2<=HopCntV[i].Val2) { IncL++; } }
   double D=0; int c=0; TIntH DistH;
   D = HopCntV.Last().Val1; c=1; // maximum depth
   if (c!=0 && D!=0) { D = D/c;
     DepthInf.AddDat(P).Add(D/M1); MxWidInf.AddDat(P).Add(MxN/M1);
     MxLevInf.AddDat(P).Add(Lev/D); IncLevInf.AddDat(P).Add(IncL/D);
   }
   Root=FindCascadeRoot(CcNet, NIdInfTmH);
   TSnap::GetNodesAtHops(CcNet, Root, HopCntV, true);
   MxN=0; Lev=0; IncL=0; D=0; c=0;
   for (int i = 0; i < HopCntV.Len(); i++) {
     if (MxN<HopCntV[i].Val2) { MxN=HopCntV[i].Val2; Lev=HopCntV[i].Val1; }
     if (i > 0 && HopCntV[i-1].Val2<=HopCntV[i].Val2) { IncL++; } }
   D = HopCntV.Last().Val1; c=1; // maximum depth
   if (c!=0 && D!=0) { D = D/c;
     DepthNet.AddDat(P).Add(D/M1); MxWidNet.AddDat(P).Add(MxN/M1);
     MxLevNet.AddDat(P).Add(Lev/D); IncLevNet.AddDat(P).Add(IncL/D); }
   }
 }
Esempio n. 11
0
// Process the strongly connected components of the graph.  We only work
// on the largest SCC.
void CommDetection::ProcessSCCs() {
    Network& net = CurrNetwork();
    TCnComV components;
    TSnap::GetSccs(net.graph(), components);
    if (components.Len() == 1) {
        return;
    }
    int num_scc = components.Len();

    std::vector< std::vector<int> > cuts(num_scc);
    for (int j = 0; j < num_scc; ++j) {
        TCnCom comp = components[j];
        std::vector<int>& curr_cut = cuts[j];
        for (int i = 0; i < comp.Len(); ++i) {
            curr_cut.push_back(comp[i].Val);
        }
    }

    // Find the largest remaining component
    int argmax_ind = -1;
    int max_val = -1;
    for (int i = 0; i < cuts.size(); ++i) {
        if (max_val == -1 || cuts[i].size() > max_val) {
            argmax_ind = i;
            max_val = cuts[i].size();
        }
    }

    // Now we cut out each component
    std::cout << "Removing SCC" << std::endl;
    Cutter cutter(net, algorithm_, cut_type_, "");
    RemoveCutFromCurrNetwork(cutter, cuts[argmax_ind]);

    // TODO(arbenson): this is kindof a hack for dealing with SCCs.  We don't
    // want to process all of them because there are many isolated nodes.
    // Instead, we continue processing until the largest community (in terms
    // of the number of nodes) is a SCC.
    ProcessSCCs();
}
Esempio n. 12
0
PUNGraph GetMxBiCon(const PUNGraph& Graph, const bool& RenumberNodes) {
  TCnComV CnComV;
  GetBiCon(Graph, CnComV);
  if (CnComV.Empty()) { 
    return PUNGraph(); 
  }
  int CcId = 0, MxSz = 0;
  for (int i = 0; i < CnComV.Len(); i++) {
    if (MxSz < CnComV[i].Len()) {
      MxSz = CnComV[i].Len();  
      CcId=i; 
    }
  }
  return TSnap::GetSubGraph(Graph, CnComV[CcId](), RenumberNodes);
}
Esempio n. 13
0
File: cmty.cpp Progetto: pikma/Snap
// Connected components of a graph define clusters
// OutDegH and OrigEdges stores node degrees and number of edges in the original graph
double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) {
  TSnap::GetWccs(G, CnComV); // get communities
  double Mod = 0;
  for (int c = 0; c < CnComV.Len(); c++) {
    const TIntV& NIdV = CnComV[c]();
    double EIn=0, EEIn=0;
    for (int i = 0; i < NIdV.Len(); i++) {
      TUNGraph::TNodeI NI = G->GetNI(NIdV[i]);
      EIn += NI.GetOutDeg();
      EEIn += OutDegH.GetDat(NIdV[i]);
    }
    Mod += (EIn-EEIn*EEIn/(2.0*OrigEdges));
  }
  if (Mod == 0) { return 0; }
  else { return Mod/(2.0*OrigEdges); }
}
Esempio n. 14
0
void sample (const int *m, const int *n, const int *h, const int *ns, const int *in, const int *infection_state, const int *mde, const int *bi, const int *br, double * result) {
  const int nodes = *h;
  const int nval = (*n)/2;
  int num_seeds = *ns;
  int infect_type = *in;
  int mode = *mde;
  int burnin = *bi;
  int branch = *br;

  PUNGraph g = get_PUNGraph (m, nval, nodes);

  THash<TInt, TInt> * visited = choose_seeds (g, num_seeds, infection_state, infect_type);
  TVec <VisitedNode *>  queue;
  TIntV qids;
  

  for (THash<TInt, TInt>::TIter n = visited->BegI(); n != visited->EndI(); n++) {
    queue = queue + new VisitedNode (n->Key);
    qids = qids + n->Key;
    //cerr << "enqueued " << n->Key << endl;
  }
  TInt counted = 0;
  TInt first_unprocessed = 0;
  TFlt infected_mass = 0.0;
  TFlt total_mass = 0.0;
  TFlt revisits = 0.0;
  TFlt trehits = 0.0;
  //cerr << "nodeId\tneigh\tnbh_size\tinfected?\tinfected_mass\ttotal_mass" << endl;
  while (counted < 500 && first_unprocessed < queue.Len()) {
    VisitedNode * current_node = queue [first_unprocessed];
    first_unprocessed++;
    TUNGraph::TNodeI NI = g->GetNI (current_node->id);
    TInt neighborhood_size = NI.GetDeg();
    //  cerr << counted << " " << current_node->id << endl;
    if (counted >= burnin) {
      if (infection_state[(current_node->id) - 1] == 1)
       infected_mass += 1.0/TFlt(neighborhood_size);
      total_mass += 1.0/TFlt(neighborhood_size);
    }
    //cerr << current_node->id << "\t" << neighborhood_size << "\t" << (1.0/TFlt(neighborhood_size)) 
    //	 << "\t" << infection_state[(current_node->id) - 1] << "\t" << infected_mass << "\t" << total_mass << endl;
    
    // build list of unvisited neighbors
    TVec<TInt> neighbors;
    for (int i = 0; i < neighborhood_size; i++) {
      TInt neighbor = NI.GetNbrNId(i);
      if (mode == 0 && visited->IsKey(neighbor)) continue;
      else if (mode == 2 && isChild (current_node, neighbor)) continue;
      else if (mode == 3 && current_node-> previous != NULL && current_node->previous->id == neighbor) continue;
      else neighbors = neighbors + neighbor;									
    }
    TInt num_legal_neighbors = neighbors.Len();
    TInt sample_size = TMath::Mn<TInt> (branch, num_legal_neighbors);
    THash <TInt, TInt> * choices = choose (num_legal_neighbors, sample_size);
    for (THash<TInt, TInt>::TIter n = choices->BegI(); n != choices->EndI(); n++) {
      if (queue.Len() >= 500) break;
      queue = queue + new VisitedNode (neighbors[n->Key], current_node);
      if (visited->IsKey(neighbors[n->Key])) revisits++;
      if (isChild(current_node, neighbors[n->Key])) trehits++;
      if (!visited->IsKey(neighbors[n->Key])) qids = qids + neighbors[n->Key];
      visited->AddDat(neighbors[n->Key], 1);
    }
    counted++;
  }
    
  // cout << (infected_mass / total_mass) << endl;
  delete (visited);
  result[0] = (infected_mass / total_mass);
  result[1] = revisits;
  result[2] = trehits;
  result[3] = counted;
  //PUNGraph p (&g);
  PUNGraph p = TSnap:: GetSubGraph (g, qids, false);
  TCnComV convec;
  result[4] = TSnap::GetClustCf(p, -1);
  TSnap::GetWccs(p, convec);
  result[5] = convec.Len();
  
  result[6] = ave_path_length (p);
}