Ejemplo n.º 1
0
void TBlobBs::PutBlockLenV(const PFRnd& FBlobBs, const TIntV& BlockLenV){
  FBlobBs->PutStr(BlockLenVNm);
  FBlobBs->PutInt(BlockLenV.Len());
  for (int BlockLenN=0; BlockLenN<BlockLenV.Len(); BlockLenN++){
    FBlobBs->PutInt(BlockLenV[BlockLenN]);}
  FBlobBs->PutInt(-1);
}
Ejemplo n.º 2
0
void LSH::ElCheapoHashing(TQuoteBase *QuoteBase, TInt ShingleLen,
    THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) {
  fprintf(stderr, "Hashing shingles the el cheapo way...\n");
  TIntV QuoteIds;
  QuoteBase->GetAllQuoteIds(QuoteIds);
  for (int qt = 0; qt < QuoteIds.Len(); qt++) {
    if (qt % 1000 == 0) {
      fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len());
    }
    TQuote Q;
    QuoteBase->GetQuote(QuoteIds[qt], Q);

    // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter
    TStr QContentStr;
    Q.GetParsedContentString(QContentStr);
    TChA QContentChA = TChA(QContentStr);

    for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) {
      TChA ShingleChA = TChA();
      for (int j = 0; j < ShingleLen; j++) {
        ShingleChA.AddCh(QContentChA.GetCh(i + j));
      }
      TStr Shingle = TStr(ShingleChA);
      const TMd5Sig ShingleMd5(Shingle);
      TIntSet ShingleQuoteIds;
      if (ShingleToQuoteIds.IsKey(ShingleMd5)) {
        ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5);
      }

      ShingleQuoteIds.AddKey(QuoteIds[qt]);
      ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds);
    }
  }
  Err("Done with el cheapo hashing!\n");
}
Ejemplo n.º 3
0
void TNmObjBs::GetNmObjDIdV(
 const PBowDocBs& BowDocBs, TIntV& BowDIdV, 
 const TStr& NmObjStr1, const TStr& NmObjStr2) const {
  // get first named-object-id
  int NmObjId1=GetNmObjId(NmObjStr1);
  TIntV NmObjDocIdV1; GetNmObjDocIdV(NmObjId1, NmObjDocIdV1);
  NmObjDocIdV1.Sort();
  // get second named-object-id
  TIntV NmObjDocIdV2;
  if (!NmObjStr2.Empty()){
    int NmObjId2=GetNmObjId(NmObjStr2);
    GetNmObjDocIdV(NmObjId2, NmObjDocIdV2);
    NmObjDocIdV2.Sort();
  }
  // create joint doc-id-vector
  TIntV NmObjDocIdV;
  if (NmObjDocIdV2.Empty()){
    NmObjDocIdV=NmObjDocIdV1;
  } else {
    NmObjDocIdV1.Intrs(NmObjDocIdV2, NmObjDocIdV);
  }
  // traverse named-object-documents to collect bow-document-ids
  BowDIdV.Gen(NmObjDocIdV.Len(), 0);
  for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocIdV.Len(); NmObjDocIdN++){
    TStr DocNm=GetDocNm(NmObjDocIdV[NmObjDocIdN]);
    int DId=BowDocBs->GetDId(DocNm);
    if (DId!=-1){
      BowDIdV.Add(DId);
    } 
  }
}
Ejemplo n.º 4
0
int TGnuPlot::AddPlot(const TIntV& YValV, const TGpSeriesTy& SeriesTy, const TStr& Label, const TStr& Style) {
  TFltKdV XYValV(YValV.Len(), 0);
  for (int i = 0; i < YValV.Len(); i++) {
    XYValV.Add(TFltKd(TFlt(i+1), TFlt(YValV[i])));
  }
  return AddPlot(XYValV, SeriesTy, Label, Style);
}
Ejemplo n.º 5
0
// Compute the empirical edge probability between a pair of nodes who share no community (epsilon), based on current community affiliations.
double TAGMFit::CalcPNoComByCmtyVV(const int& SamplePairs) {
  TIntV NIdV;
  G->GetNIdV(NIdV);
  uint64 PairNoCom = 0, EdgesNoCom = 0;
  for (int u = 0; u < NIdV.Len(); u++) {
    for (int v = u + 1; v < NIdV.Len(); v++) {
      int SrcNID = NIdV[u], DstNID = NIdV[v];
      TIntSet JointCom;
      TAGMUtil::GetIntersection(NIDComVH.GetDat(SrcNID),NIDComVH.GetDat(DstNID),JointCom);
      if(JointCom.Len() == 0) {
        PairNoCom++;
        if (G->IsEdge(SrcNID, DstNID)) { EdgesNoCom++; }
        if (SamplePairs > 0 && PairNoCom >= (uint64) SamplePairs) { break; }
      }
    }
    if (SamplePairs > 0 && PairNoCom >= (uint64) SamplePairs) { break; }
  }
  double DefaultVal = 1.0 / (double)G->GetNodes() / (double)G->GetNodes();
  if (EdgesNoCom > 0) {
    PNoCom = (double) EdgesNoCom / (double) PairNoCom;
  } else {
    PNoCom = DefaultVal;
  }
  printf("%s / %s edges without joint com detected (PNoCom = %f)\n", TUInt64::GetStr(EdgesNoCom).CStr(), TUInt64::GetStr(PairNoCom).CStr(), PNoCom.Val);
  return PNoCom;
}
Ejemplo n.º 6
0
// Eric #4
//Count Triangles time (elapsed): 166.162323, cpu: 2048.942704
//Count Triangles time (elapsed): 159.984497, cpu: 1769.572704
//Count Triangles time (elapsed): 167.080368, cpu: 1727.222704
int GetCommon(TIntV& A, TIntV& B) {
  int ret = 0;
  int i = 0;
  int j = 0;
  int alen, blen;

  alen = A.Len();
  blen = B.Len();
  while (i < alen && j < blen) {
    while (i < alen && A[i] < B[j]) {
      i++;
    }
    // Optional check
    if (i == alen) {
      break;
    }

    while (j < blen && A[i] > B[j]) {
      j++;
    }
    // Optional check
    if (j == blen) {
      break;
    }

    if (A[i] == B[j]) {
      ret++;
      i++;
      j++;
    }
  }

  return ret;
}
Ejemplo n.º 7
0
int TMultimodalGraphImplB::GetSubGraphMocked(const TIntV ModeIds) const {
  int NumVerticesAndEdges = 0;

  for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) {
    if (ModeIds.IsIn(CurI.GetDat())) {
      NumVerticesAndEdges++;
    }
  }

  for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) {
    int ModeId1 = ModeIds.GetVal(ModeIdx1);
    for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) {
      int ModeId2 = ModeIds.GetVal(ModeIdx2);
      TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2);
      if (!Graphs.IsKey(ModeIdsKey)) { continue; }
      const TNGraph& Graph = Graphs.GetDat(ModeIdsKey);
      for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) {
        for (int e = 0; e < it.GetOutDeg(); e++) {
          NumVerticesAndEdges += it.GetOutNId(e);
        }
      }
    }
  }

  return NumVerticesAndEdges;
}
Ejemplo n.º 8
0
void TNmObjBs::PutMergedNmObj(const TIntV& NewNmObjIdV){
  // create temporary table of new named-objects
  TStrVIntVH NewNmObjWordStrVToDocIdVH;
  for (int NmObjId=0; NmObjId<NewNmObjIdV.Len(); NmObjId++){
    if (NewNmObjIdV[NmObjId]!=NmObjId){continue;}
    // take data for new named-object from old definition
    const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId);
    // define new named-object
    NewNmObjWordStrVToDocIdVH.AddDat(WordStrV);
  }
  //printf("Old Named-Objects: %d\n", NmObjWordStrVToDocIdVH.Len());
  //printf("New Named-Objects: %d\n", NewNmObjWordStrVToDocIdVH.Len());
  // obsolete named-object define as aliases
  {for (int NmObjId=0; NmObjId<NewNmObjIdV.Len(); NmObjId++){
    if (NewNmObjIdV[NmObjId]==NmObjId){continue;}
    // take data for obsolete named-object from old definition
    const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId);
    // define alias for obsolete named-object
    int NrNmObjId=NewNmObjIdV[NmObjId];
    if (NrNmObjId!=-1){
      const TStrV& NrWordStrV=NmObjWordStrVToDocIdVH.GetKey(NrNmObjId);
      NmObjWordStrVToNrH.AddDat(WordStrV, NrWordStrV);
    }
  }}
  // redefine documents
  int Docs=GetDocs();
  for (int DocId=0; DocId<Docs; DocId++){
    TIntPrV& NmObjIdFqPrV=GetDoc_NmObjIdFqPrV(DocId);
    // create temporary-document: new-named-object to frequency table
    TIntIntH NewNmObjIdToFqH(NmObjIdFqPrV.Len());
    for (int NmObjN=0; NmObjN<NmObjIdFqPrV.Len(); NmObjN++){
      // get obsolete named-object data
      int NmObjId=NmObjIdFqPrV[NmObjN].Val1;
      int Fq=NmObjIdFqPrV[NmObjN].Val2;
      // get named-document-id for normalized named-object
      int NrNmObjId=NewNmObjIdV[NmObjId];
      if (NrNmObjId!=-1){
        // get normalized version of word-vector
        const TStrV& NrWordStrV=NmObjWordStrVToDocIdVH.GetKey(NrNmObjId);
        // get new named-object-id
        int NewNmObjId=NewNmObjWordStrVToDocIdVH.GetKeyId(NrWordStrV);
        // add new named-object-id and term-frequency to temporary-document
        NewNmObjIdToFqH.AddDat(NewNmObjId)+=Fq;
      }
    }
    // transfere new-named-object data to document
    NmObjIdFqPrV.Gen(NewNmObjIdToFqH.Len(), 0);
    for (int NmObjP=0; NmObjP<NewNmObjIdToFqH.Len(); NmObjP++){
      int NewNmObjId=NewNmObjIdToFqH.GetKey(NmObjP);
      int Fq=NewNmObjIdToFqH[NmObjP];
      // add named-object and increment by term-frequency
      NmObjIdFqPrV.Add(TIntPr(NewNmObjId, Fq));
      // merge document-ids
      NewNmObjWordStrVToDocIdVH[NewNmObjId].Add(DocId);
    }
    NmObjIdFqPrV.Sort();
  }
  // assign new named-objects
  NmObjWordStrVToDocIdVH=NewNmObjWordStrVToDocIdVH;
}
Ejemplo n.º 9
0
double TStringKernel::KTrie2(const TIntV& s, const TIntV& t, const double& lb, const int& p, int m, const int& AlphN) {
    int ls = s.Len(), lt = t.Len();
    if (ls < p || lt < p) return 0.0;
    m = TInt::GetMn(m, ls-p, lt-p);

    TVec<TVec<TTrieNodeP> > LsV(AlphN), LtV(AlphN); 
    TIntV v(p), x(p+m); double Kern = 0.0;

    // precalculate weights
    TFltV lbV(m+1); lbV[0] = 1;
    for (int i = 0; i < p; i++) lbV[0] *= lb;
    for (int i = 1; i <= m; i++) lbV[i] = lb * lbV[i-1];
    
    for (int i = 0; i <= ls - p; i++) {
        int j = TInt::GetMn(ls, i+p+m);
        LsV[s[i]].Add(TTrieNodeP(TIntPr(i, j-i), 0, 0)); // i == 0 becasue strings start with 0 (not 1 as in Matlab!)
    }
    for (int i = 0; i <= lt - p; i++) {
        int j = TInt::GetMn(lt, i+p+m);
        LtV[t[i]].Add(TTrieNodeP(TIntPr(i, j-i), 0, 0)); // i == 0 becasue strings start with 0 (not 1 as in Matlab!)
    }

    for (int AlphC = 0; AlphC < AlphN; AlphC++) {
        v[0] = AlphC; 
        KTrieR2(s, t, LsV[AlphC], LtV[AlphC], v, 1, Kern, lbV, p, m, AlphN); //depth == 1, not 0 !!!!
    }

    return Kern;
}
Ejemplo n.º 10
0
TIntNNet TMultimodalGraphImplB::GetSubGraph(const TIntV ModeIds) const {
  TIntNNet SubGraph = TIntNNet();

  for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) {
    if (ModeIds.IsIn(CurI.GetDat())) {
      SubGraph.AddNode(CurI.GetKey(), CurI.GetDat());
    }
  }

  for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) {
    int ModeId1 = ModeIds.GetVal(ModeIdx1);
    for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) {
      int ModeId2 = ModeIds.GetVal(ModeIdx2);
      TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2);
      if (!Graphs.IsKey(ModeIdsKey)) { continue; }
      const TNGraph& Graph = Graphs.GetDat(ModeIdsKey);
      for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) {
        for (int e = 0; e < it.GetOutDeg(); e++) {
          SubGraph.AddEdge(it.GetId(), it.GetOutNId(e));
        }
      }
    }
  }
  printf("Number of nodes in SubGraph: %d...\n", SubGraph.GetNodes());
  printf("Number of edges in SubGraph: %d...\n", SubGraph.GetEdges());

  return SubGraph;
}
Ejemplo n.º 11
0
void LogOutput::PrintClusterInformationToText(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, TIntV& ClusterIds, TSecTm PresentTime) {
  if (!ShouldLog) return;

  TStr CurDateString = PresentTime.GetDtYmdStr();
  TStr TopFileName = Directory + "/text/top/topclusters_" + CurDateString + ".txt";
  FILE *T = fopen(TopFileName.CStr(), "w");

  for (int i = 0; i < ClusterIds.Len(); i++) {
    TCluster C;
    CB->GetCluster(ClusterIds[i], C);
    TStr CRepQuote;
    C.GetRepresentativeQuoteString(CRepQuote, QB);

    TIntV CQuoteIds;
	TVec<TUInt> CUniqueSources;
    C.GetQuoteIds(CQuoteIds);
    TCluster::GetUniqueSources(CUniqueSources, CQuoteIds, QB);

    fprintf(T, "%d\t%d\t%s\n", CUniqueSources.Len(), CQuoteIds.Len(), CRepQuote.CStr());

    for (int j = 0; j < CQuoteIds.Len(); j++) {
      TQuote Q;
      if (QB->GetQuote(CQuoteIds[j], Q)) {
        TStr QuoteStr;
        Q.GetContentString(QuoteStr);
        fprintf(T, "\t%d\t%s\n", Q.GetNumSources().Val, QuoteStr.CStr());
      }
    }
  }
  fclose(T);
}
Ejemplo n.º 12
0
/// Shingles by words
void LSH::HashShinglesOfClusters(TQuoteBase *QuoteBase,
    TClusterBase *ClusterBase, TIntV& ClusterIds, TInt ShingleLen,
    THash<TMd5Sig, TIntV>& ShingleToClusterIds) {
  Err("Hashing shingles of clusters...\n");
  for (int i = 0; i < ClusterIds.Len(); i++) {
    if (i % 1000 == 0) {
      fprintf(stderr, "%d out of %d completed\n", i, ClusterIds.Len());
    }
    TCluster C;
    ClusterBase->GetCluster(ClusterIds[i], C);
    //fprintf(stderr, "%d vs. %d\n", ClusterIds[i].Val, C.GetId().Val);

    // Put x-word shingles into hash table; x is specified by ShingleLen parameter
    THashSet < TMd5Sig > CHashedShingles;
    GetHashedShinglesOfCluster(QuoteBase, C, ShingleLen, CHashedShingles);
    for (THashSet<TMd5Sig>::TIter Hash = CHashedShingles.BegI();
        Hash < CHashedShingles.EndI(); Hash++) {
      TIntV ShingleClusterIds;
      if (ShingleToClusterIds.IsKey(*Hash)) {
        ShingleClusterIds = ShingleToClusterIds.GetDat(*Hash);
      }
      ShingleClusterIds.Add(ClusterIds[i]);
      ShingleToClusterIds.AddDat(*Hash, ShingleClusterIds);
    }
  }
  Err("Done hashing!\n");
}
Ejemplo n.º 13
0
void LogOutput::PrintClusterInformation(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, PNGraph& QGraph, TIntV& ClusterIds, TSecTm PresentTime, TIntV &OldTopClusters) {
  if (!ShouldLog) return;
  TStr CurDateString = PresentTime.GetDtYmdStr();
  Err("Writing cluster information...\n");

  // PREVIOUS RANKING SETUP
  THash<TInt, TInt> OldRankings;
  if (OldTopClusters.Len() > 0) {
    for (int i = 0; i < OldTopClusters.Len(); i++) {
      OldRankings.AddDat(OldTopClusters[i], i + 1);
    }
  }

  TStrV RankStr;
  TStr ClusterJSONDirectory = Directory + "/web/json/clusters/";
  for (int i = 0; i < ClusterIds.Len(); i++) {
    TStr OldRankStr;
    ComputeOldRankString(OldRankings, ClusterIds[i], i+1, OldRankStr);
    RankStr.Add(OldRankStr);

    // JSON file for each cluster!
    TPrintJson::PrintClusterJSON(QB, DB, CB, QGraph, ClusterJSONDirectory, ClusterIds[i], PresentTime);
  }

  Err("JSON Files for individual written!\n");
  TStr JSONTableFileName = Directory + "/web/json/daily/" + CurDateString + ".json";
  TPrintJson::PrintClusterTableJSON(QB, DB, CB, JSONTableFileName, ClusterIds, RankStr);
  Err("JSON Files for the cluster table written!\n");
}
Ejemplo n.º 14
0
TVec<TPair<TFltV, TFltV> > TLSHash::GetAllCandidatePairs() {
  THashSet<TPair<TInt, TInt> > CandidateIdPairs;
  for (int i=0; i<Bands; i++) {
    TVec<TIntV> BucketVV;
    SigBucketVHV[i].GetDatV(BucketVV);
    for (int j=0; j<BucketVV.Len(); j++) {
      TIntV BucketV = BucketVV[j];

      for (int k=0; k<BucketV.Len(); k++) {
        for (int l=k+1; l<BucketV.Len(); l++) {
          int First = BucketV[k], Second = BucketV[l];
          if (First > Second) { 
            int Temp = First;
            First = Second;
            Second = Temp;
          }
          CandidateIdPairs.AddKey(TPair<TInt, TInt> (First, Second));
        }
      }
    }
  }

  TVec<TPair<TFltV, TFltV> > CandidatePairs;
  int Ind = CandidateIdPairs.FFirstKeyId();
  while (CandidateIdPairs.FNextKeyId(Ind)) {
    TPair<TInt, TInt> IdPair = CandidateIdPairs[Ind];
    TPair<TFltV, TFltV> Pair(DataV[IdPair.GetVal1()], DataV[IdPair.GetVal2()]);
    CandidatePairs.Add(Pair);
  }
  return CandidatePairs;
}
Ejemplo n.º 15
0
void StarTriad3TEdgeCounter<EdgeData>::Count(const TVec<EdgeData>& events,
                                             const TIntV& timestamps, double delta) {
  InitializeCounters();
  if (events.Len() != timestamps.Len()) {
    TExcept::Throw("Number of events must match number of timestamps.");
  }
  int start = 0;
  int end = 0;
  int L = timestamps.Len();
  for (int j = 0; j < L; j++) {
    double tj = double(timestamps[j]);
    // Adjust counts in pre-window [tj - delta, tj)
    while (start < L && double(timestamps[start]) < tj - delta) {
      PopPre(events[start]);
      start++;
    }
    // Adjust counts in post-window (tj, tj + delta]
    while (end < L && double(timestamps[end]) <= tj + delta) {
      PushPos(events[end]);
      end++;
    }
    // Move current event off post-window
    PopPos(events[j]);
    ProcessCurrent(events[j]);
    PushPre(events[j]);
  }
}
Ejemplo n.º 16
0
TBowMatrix::TBowMatrix(PBowDocWgtBs BowDocWgtBs, const TIntV& DIdV): TMatrix() {
    RowN = BowDocWgtBs->GetWords();
    ColSpVV.Gen(DIdV.Len(), 0);
    for (int i = 0; i < DIdV.Len(); i++) {
        ColSpVV.Add(BowDocWgtBs->GetSpV(DIdV[i]));
    }
}
Ejemplo n.º 17
0
void TTrawling::GenCandidates() {
  CandItemH.Clr(false);
  TIntV JoinItem;
  if (CurItemH.GetKey(0).Len() == 1) {
    // join 1-items into 2-items
    for (int i = 0; i < CurItemH.Len(); i++) {
      for (int j = i+1; j < CurItemH.Len(); j++) {
        JoinItems(CurItemH.GetKey(i), CurItemH.GetKey(j), JoinItem);
        if (JoinItem.Len() == CurItemH.GetKey(i).Len()+1) {
          CandItemH.AddDat(JoinItem, 0);
        }
      }
    }
  } else {
    // join longer item sets
    CurItemH.SortByKey();
    for (int i = 0; i < CurItemH.Len(); i++) {
      const TIntV& Set = CurItemH.GetKey(i);
      const int Val = Set[Set.Len()-2];
      for (int j=i+1; j < CurItemH.Len() && CurItemH.GetKey(j)[CurItemH.GetKey(j).Len()-2] == Val; j++) {
        JoinItems(CurItemH.GetKey(i), CurItemH.GetKey(j), JoinItem);
        if (JoinItem.Len() == CurItemH.GetKey(i).Len()+1) {
          CandItemH.AddDat(JoinItem, 0);
        }
      }
    }
  }//*/
}
Ejemplo n.º 18
0
void TempMotifCounter::GetAllStaticTriangles(TIntV& Us, TIntV& Vs, TIntV& Ws) {
  Us.Clr();
  Vs.Clr();
  Ws.Clr();
  // Get degree ordering of the graph
  int max_nodes = static_graph_->GetMxNId();
  TVec<TIntPair> degrees(max_nodes);
  degrees.PutAll(TIntPair(0, 0));
  // Set the degree of a node to be the number of nodes adjacent to the node in
  // the undirected graph.
  TIntV nodes;
  GetAllNodes(nodes);
  #pragma omp parallel for schedule(dynamic)  
  for (int node_id = 0; node_id < nodes.Len(); node_id++) {
    int src = nodes[node_id];
    TIntV nbrs;
    GetAllNeighbors(src, nbrs);
    degrees[src] = TIntPair(nbrs.Len(), src);
  }
  degrees.Sort();
  TIntV order = TIntV(max_nodes);
  #pragma omp parallel for schedule(dynamic)  
  for (int i = 0; i < order.Len(); i++) {
    order[degrees[i].Dat] = i;
  }

  // Get triangles centered at a given node where that node is the smallest in
  // the degree ordering.
  #pragma omp parallel for schedule(dynamic)  
  for (int node_id = 0; node_id < nodes.Len(); node_id++) {
    int src = nodes[node_id];
    int src_pos = order[src];
    
    // Get all neighbors who come later in the ordering
    TIntV nbrs;
    GetAllNeighbors(src, nbrs);    
    TIntV neighbors_higher;
    for (int i = 0; i < nbrs.Len(); i++) {
      int nbr = nbrs[i];
      if (order[nbr] > src_pos) { neighbors_higher.Add(nbr); }
    }

    for (int ind1 = 0; ind1 < neighbors_higher.Len(); ind1++) {
      for (int ind2 = ind1 + 1; ind2 < neighbors_higher.Len(); ind2++) {
        int dst1 = neighbors_higher[ind1];
        int dst2 = neighbors_higher[ind2];
        // Check for triangle formation
        if (static_graph_->IsEdge(dst1, dst2) || static_graph_->IsEdge(dst2, dst1)) {
          #pragma omp critical
          {
            Us.Add(src);
            Vs.Add(dst1);
            Ws.Add(dst2);
          }
        }
      }
    }
  }
}
Ejemplo n.º 19
0
void TBlobBs::GetAllocInfo(
 const int& BfL, const TIntV& BlockLenV, int& MxBfL, int& FFreeBlobPtN){
  int BlockLenN=0;
  while ((BlockLenN<BlockLenV.Len())&&(BfL>BlockLenV[BlockLenN])){
    BlockLenN++;}
  EAssert(BlockLenN<BlockLenV.Len());
  MxBfL=BlockLenV[BlockLenN]; FFreeBlobPtN=BlockLenN;
}
Ejemplo n.º 20
0
// burn each link independently (forward with FwdBurnProb, backward with BckBurnProb)
void TForestFire::BurnExpFire() {
  const double OldFwdBurnProb = FwdBurnProb;
  const double OldBckBurnProb = BckBurnProb;
  const int NInfect = InfectNIdV.Len();
  const TNGraph& G = *Graph;
  TIntH BurnedNIdH;               // burned nodes
  TIntV BurningNIdV = InfectNIdV; // currently burning nodes
  TIntV NewBurnedNIdV;            // nodes newly burned in current step
  bool HasAliveNbrs;              // has unburned neighbors
  int NBurned = NInfect, NDiedFire=0;
  for (int i = 0; i < InfectNIdV.Len(); i++) {
    BurnedNIdH.AddDat(InfectNIdV[i]); }
  NBurnedTmV.Clr(false);  NBurningTmV.Clr(false);  NewBurnedTmV.Clr(false);
  for (int time = 0; ; time++) {
    NewBurnedNIdV.Clr(false);
    // for each burning node
    for (int node = 0; node < BurningNIdV.Len(); node++) {
      const int& BurningNId = BurningNIdV[node];
      const TNGraph::TNodeI Node = G.GetNI(BurningNId);
      HasAliveNbrs = false;
      NDiedFire = 0;
      // burn forward links  (out-links)
      for (int e = 0; e < Node.GetOutDeg(); e++) {
        const int OutNId = Node.GetOutNId(e);
        if (! BurnedNIdH.IsKey(OutNId)) { // not yet burned
          HasAliveNbrs = true;
          if (Rnd.GetUniDev() < FwdBurnProb) {
            BurnedNIdH.AddDat(OutNId);  NewBurnedNIdV.Add(OutNId);  NBurned++; }
        }
      }
      // burn backward links (in-links)
      if (BckBurnProb > 0.0) {
        for (int e = 0; e < Node.GetInDeg(); e++) {
          const int InNId = Node.GetInNId(e);
          if (! BurnedNIdH.IsKey(InNId)) { // not yet burned
            HasAliveNbrs = true;
            if (Rnd.GetUniDev() < BckBurnProb) {
              BurnedNIdH.AddDat(InNId);  NewBurnedNIdV.Add(InNId);  NBurned++; }
          }
        }
      }
      if (! HasAliveNbrs) { NDiedFire++; }
    }
    NBurnedTmV.Add(NBurned);
    NBurningTmV.Add(BurningNIdV.Len() - NDiedFire);
    NewBurnedTmV.Add(NewBurnedNIdV.Len());
    //BurningNIdV.AddV(NewBurnedNIdV);   // node is burning eternally
    BurningNIdV.Swap(NewBurnedNIdV);    // node is burning just 1 time step
    if (BurningNIdV.Empty()) break;
    FwdBurnProb = FwdBurnProb * ProbDecay;
    BckBurnProb = BckBurnProb * ProbDecay;
  }
  BurnedNIdV.Gen(BurnedNIdH.Len(), 0);
  for (int i = 0; i < BurnedNIdH.Len(); i++) {
    BurnedNIdV.Add(BurnedNIdH.GetKey(i)); }
  FwdBurnProb = OldFwdBurnProb;
  BckBurnProb = OldBckBurnProb;
}
Ejemplo n.º 21
0
/// save bipartite community affiliation into gexf file
void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) {
    /// Plot bipartite graph
    if (CmtyVV.Len() == 0) {
        return;
    }
    double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0;
    double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax;
    double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len();
    THash<TInt,TIntV> NIDComVH;
    TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV);

    FILE* F = fopen(OutFNm.CStr(), "wt");
    fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
    fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
    fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
    fprintf(F, "\t\t<nodes>\n");
    for (int c = 0; c < CmtyVV.Len(); c++) {
        int CID = c;
        double XPos = c * CStep + CXMin;
        TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120);
        fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID);
        fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
        fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz);
        fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
        fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
        fprintf(F, "\t\t\t</node>\n");
    }

    for (int u = 0; u < NIDV.Len(); u++) {
        int NID = NIDV[u];
        TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
        double Size = MinSz;
        double XPos = NXMin + u * NStep;
        TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
        double Alpha = 1.0;
        fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
        fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
        fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
        fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
        fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
        fprintf(F, "\t\t\t</node>\n");
    }
    fprintf(F, "\t\t</nodes>\n");
    fprintf(F, "\t\t<edges>\n");
    int EID = 0;
    for (int u = 0; u < NIDV.Len(); u++) {
        int NID = NIDV[u];
        if (NIDComVH.IsKey(NID)) {
            for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) {
                int CID = NIDComVH.GetDat(NID)[c];
                fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID);
            }
        }
    }
    fprintf(F, "\t\t</edges>\n");
    fprintf(F, "\t</graph>\n");
    fprintf(F, "</gexf>\n");
}
Ejemplo n.º 22
0
void TNEANetMP::Dump(FILE *OutF) const {
  const int NodePlaces = (int) ceil(log10((double) GetNodes()));
  const int EdgePlaces = (int) ceil(log10((double) GetEdges()));
  fprintf(OutF, "-------------------------------------------------\nDirected Node-Edge Network: nodes: %d, edges: %d\n", GetNodes(), GetEdges());
  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
    fprintf(OutF, "  %*d]\n", NodePlaces, NodeI.GetId());
    // load node attributes
    TIntV IntAttrN;
    IntAttrValueNI(NodeI.GetId(), IntAttrN);
    fprintf(OutF, "    nai[%d]", IntAttrN.Len());
    for (int i = 0; i < IntAttrN.Len(); i++) {
      fprintf(OutF, " %*i", NodePlaces, IntAttrN[i]()); }
    TStrV StrAttrN;
    StrAttrValueNI(NodeI.GetId(), StrAttrN);
    fprintf(OutF, "    nas[%d]", StrAttrN.Len());
    for (int i = 0; i < StrAttrN.Len(); i++) {
      fprintf(OutF, " %*s", NodePlaces, StrAttrN[i]()); }
    TFltV FltAttrN;
    FltAttrValueNI(NodeI.GetId(), FltAttrN);
    fprintf(OutF, "    naf[%d]", FltAttrN.Len());
    for (int i = 0; i < FltAttrN.Len(); i++) {
      fprintf(OutF, " %*f", NodePlaces, FltAttrN[i]()); }

    fprintf(OutF, "    in[%d]", NodeI.GetInDeg());
    for (int edge = 0; edge < NodeI.GetInDeg(); edge++) {
      fprintf(OutF, " %*d", EdgePlaces, NodeI.GetInEId(edge)); }
    fprintf(OutF, "\n");
    fprintf(OutF, "    out[%d]", NodeI.GetOutDeg());
    for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) {
      fprintf(OutF, " %*d", EdgePlaces, NodeI.GetOutEId(edge)); }
    fprintf(OutF, "\n");
  }
  for (TEdgeI EdgeI = BegEI(); EdgeI < EndEI(); EdgeI++) {
    fprintf(OutF, "  %*d]  %*d  ->  %*d", EdgePlaces, EdgeI.GetId(), NodePlaces, EdgeI.GetSrcNId(), NodePlaces, EdgeI.GetDstNId());

    // load edge attributes
    TIntV IntAttrE;
    IntAttrValueEI(EdgeI.GetId(), IntAttrE);
    fprintf(OutF, "    eai[%d]", IntAttrE.Len());
    for (int i = 0; i < IntAttrE.Len(); i++) {
      fprintf(OutF, " %*i", EdgePlaces, IntAttrE[i]());
    }
    TStrV StrAttrE;
    StrAttrValueEI(EdgeI.GetId(), StrAttrE);
    fprintf(OutF, "    eas[%d]", StrAttrE.Len());
    for (int i = 0; i < StrAttrE.Len(); i++) {
      fprintf(OutF, " %*s", EdgePlaces, StrAttrE[i]());
    }
    TFltV FltAttrE;
    FltAttrValueEI(EdgeI.GetId(), FltAttrE);
    fprintf(OutF, "    eaf[%d]", FltAttrE.Len());
    for (int i = 0; i < FltAttrE.Len(); i++) {
      fprintf(OutF, " %*f", EdgePlaces, FltAttrE[i]());
    }
    fprintf(OutF, "\n");
  }
  fprintf(OutF, "\n");
}
Ejemplo n.º 23
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);

  // get command line parameters
  Env.PrepArgs("DMoz-Topic To Text", -1);
  TStr InFPath=Env.GetIfArgPrefixStr("-i:", "", "Input-File-Path");
  TStr OutFPath=Env.GetIfArgPrefixStr("-o:", "", "Output-File-Path");
  TStr RootCatNm=Env.GetIfArgPrefixStr("-c:", "Top/Science", "Root-Category-Name");
  if (Env.IsEndOfRun()){return 0;}

  // load DMoz-Base
  PDMozBs DMozBs=TDMozBs::LoadBin(TDMozInfo::BinFullFBase, InFPath);

  // assign root category name
  //RootCatNm="Top/Computers/Software/Databases/Data_Mining";
  //RootCatNm="Top/Reference/Knowledge_Management/Knowledge_Discovery";
  //RootCatNm="Top/Computers/Artificial_Intelligence/Machine_Learning";
  //RootCatNm="Top/Computers/Artificial_Intelligence";
  //RootCatNm="Top/Recreation/Travel";

  // get root category-id
  int RootCatId=DMozBs->GetCatId(RootCatNm);
  // prepare external-url list
  TStr RootFBase=TStr::GetFNmStr(RootCatNm, true);
  TStr ExtUrlFNm=TStr::GetNrFPath(OutFPath)+RootFBase+"_ExternalUrlList.Txt";
  TFOut ExtUrlSOut(ExtUrlFNm); FILE* fExtUrlOut=ExtUrlSOut.GetFileId();
  // get topic categories
  TIntV TopicCatIdV; DMozBs->GetSubCatIdV(RootCatId, TopicCatIdV);
  for (int TopicCatIdN=0; TopicCatIdN<TopicCatIdV.Len(); TopicCatIdN++){
    // get topic id & name
    int TopicCatId=TopicCatIdV[TopicCatIdN];
    TStr TopicCatNm=DMozBs->GetCatNm(TopicCatId);
    // get subtopic subtrees and corresponding external-url-ids
    TIntV SubCatIdV; TIntV CatIdV;
    //DMozBs->GetSubTreeCatIdV(TopicCatId, SubCatIdV, CatIdV, true);
    TIntV ExtUrlIdV; DMozBs->GetExtUrlIdV(CatIdV, ExtUrlIdV);
    // output url/titles/descriptions
    TStr TopicFBase=TStr::GetFNmStr(TopicCatNm, true);
    TStr TopicFNm=TStr::GetNrFPath(OutFPath)+TopicFBase+".Txt";
    printf("Saving %s\n", TopicFNm.CStr());
    TFOut TopicSOut(TopicFNm); FILE* fTopicOut=TopicSOut.GetFileId();
    for (int ExtUrlIdN=0; ExtUrlIdN<ExtUrlIdV.Len(); ExtUrlIdN++){
      int ExtUrlId=ExtUrlIdV[ExtUrlIdN];
      TStr UrlStr=DMozBs->GetExtUrlStr(ExtUrlId);
      TStr TitleStr=DMozBs->GetExtUrlTitleStr(ExtUrlId);
      TStr DescStr=DMozBs->GetExtUrlDescStr(ExtUrlId);
      fprintf(fExtUrlOut, "%s\n", UrlStr.CStr());
      fprintf(fTopicOut, "%s - %s\n", TitleStr.CStr(), DescStr.CStr());
    }
  }
  return 0;
  Catch;
  return 1;
}
Ejemplo n.º 24
0
void TNGramBs::GetNGramStrV(
 const TStr& HtmlStr, TStrV& NGramStrV, TIntPrV& NGramBEChXPrV) const {
  TIntV NGramIdV; NGramStrV.Clr(); NGramBEChXPrV.Clr();
  TNGramBs::GetNGramIdV(HtmlStr, NGramIdV, NGramBEChXPrV);
  NGramStrV.Gen(NGramIdV.Len(), 0);
  for (int NGramIdN=0; NGramIdN<NGramIdV.Len(); NGramIdN++){
    TStr NGramStr=GetNGramStr(NGramIdV[NGramIdN]);
    NGramStrV.Add(NGramStr);
  }
}
Ejemplo n.º 25
0
void TempMotifCounter::Count3TEdge3NodeStarsNaive(
        double delta, Counter3D& pre_counts, Counter3D& pos_counts,
        Counter3D& mid_counts) {
  TIntV centers;
  GetAllNodes(centers);
  pre_counts = Counter3D(2, 2, 2);
  pos_counts = Counter3D(2, 2, 2);
  mid_counts = Counter3D(2, 2, 2);
  // Get counts for each node as the center
  #pragma omp parallel for schedule(dynamic)
  for (int c = 0; c < centers.Len(); c++) {
    // Gather all adjacent events
    int center = centers[c];
    TIntV nbrs;
    GetAllNeighbors(center, nbrs);
    for (int i = 0; i < nbrs.Len(); i++) {
      for (int j = i + 1; j < nbrs.Len(); j++) {
        int nbr1 = nbrs[i];
        int nbr2 = nbrs[j];
        TVec<TIntPair> combined;
        AddStarEdges(combined, center, nbr1, 0);
        AddStarEdges(combined, nbr1, center, 1);
        AddStarEdges(combined, center, nbr2, 2);
        AddStarEdges(combined, nbr2, center, 3);
        combined.Sort();
        ThreeTEdgeMotifCounter counter(4);
        TIntV edge_id(combined.Len());
        TIntV timestamps(combined.Len());
        for (int k = 0; k < combined.Len(); k++) {
          edge_id[k] = combined[k].Dat;
          timestamps[k] = combined[k].Key;
        }
        Counter3D local;
        counter.Count(edge_id, timestamps, delta, local);

        #pragma omp critical
        {  // Update with local counts
          for (int dir1 = 0; dir1 < 2; ++dir1) {
            for (int dir2 = 0; dir2 < 2; ++dir2) {
              for (int dir3 = 0; dir3 < 2; ++dir3) {
                pre_counts(dir1, dir2, dir3) +=
                  local(dir1, dir2, dir3 + 2) + local(dir1 + 2, dir2 + 2, dir3);
                pos_counts(dir1, dir2, dir3) +=
                  local(dir1, dir2 + 2, dir3 + 2) + local(dir1 + 2, dir2, dir3);
                mid_counts(dir1, dir2, dir3) +=
                  local(dir1, dir2 + 2, dir3) + local(dir1 + 2, dir2, dir3 + 2);
              }
            }
          }
        }
      }
    }
  }
}
Ejemplo n.º 26
0
void TBowFl::SaveSparseMatlabTxt(const PBowDocBs& BowDocBs,
    const PBowDocWgtBs& BowDocWgtBs, const TStr& FNm,
    const TStr& CatFNm, const TIntV& _DIdV) {

  TIntV DIdV;
  if (_DIdV.Empty()) {
      BowDocBs->GetAllDIdV(DIdV);
  } else {
      DIdV = _DIdV;
  }
  // generate map of row-ids to words
  TFOut WdMapSOut(TStr::PutFExt(FNm, ".row-to-word-map.dat"));
  for (int WId = 0; WId < BowDocWgtBs->GetWords(); WId++) {
    TStr WdStr = BowDocBs->GetWordStr(WId);
    WdMapSOut.PutStrLn(TStr::Fmt("%d %s", WId+1,  WdStr.CStr()));
  }
  WdMapSOut.Flush();
  // generate map of col-ids to document names
  TFOut DocMapSOut(TStr::PutFExt(FNm, ".col-to-docName-map.dat"));
  for (int DocN = 0; DocN < DIdV.Len(); DocN++) {
    const int DId = DIdV[DocN];
    TStr DocNm = BowDocBs->GetDocNm(DId);
    DocMapSOut.PutStrLn(TStr::Fmt("%d %d %s", DocN, DId,  DocNm.CStr()));
  }
  DocMapSOut.Flush();
  // save documents' sparse vectors
  TFOut SOut(FNm);
  for (int DocN = 0; DocN < DIdV.Len(); DocN++){
    const int DId = DIdV[DocN];
    PBowSpV DocSpV = BowDocWgtBs->GetSpV(DId);
    const int DocWIds = DocSpV->GetWIds();
    for (int DocWIdN=0; DocWIdN<DocWIds; DocWIdN++){
      const int WId = DocSpV->GetWId(DocWIdN);
      const double WordWgt = DocSpV->GetWgt(DocWIdN);
      SOut.PutStrLn(TStr::Fmt("%d %d %.16f", WId+1, DocN+1, WordWgt));
    }
  }
  SOut.Flush();
  // save documents' category sparse vectors
  if (!CatFNm.Empty()) {
    TFOut CatSOut(CatFNm);
    for (int DocN = 0; DocN < DIdV.Len(); DocN++){
      const int DId = DIdV[DocN];
      const int DocCIds = BowDocBs->GetDocCIds(DId);
      for (int DocCIdN=0; DocCIdN<DocCIds; DocCIdN++){
        const int CId = BowDocBs->GetDocCId(DId, DocCIdN);
        const double CatWgt = 1.0;
        CatSOut.PutStrLn(TStr::Fmt("%d %d %.16f", CId+1, DocN+1, CatWgt));
      }
    }
    CatSOut.Flush();
  }
}
Ejemplo n.º 27
0
PTb TTb::GetSubTb(const TIntV& TupNV, const TIntV& VarNV, const PTb& Tb){
  if ((TupNV.Len()==0)||(VarNV.Len()==0)){return NULL;}
  for (int VarNN=0; VarNN<VarNV.Len(); VarNN++){
    Tb->AddVar(GetVar(VarNV[VarNN]));}
  for (int TupNN=0; TupNN<TupNV.Len(); TupNN++){
    int TupN=TupNV[TupNN]; int NewTupN=Tb->AddTup(GetTupNm(TupN));
    for (int VarNN=0; VarNN<VarNV.Len(); VarNN++){
      Tb->PutVal(NewTupN, VarNN, GetVal(TupN, VarNV[VarNN]));}
  }
  Tb->DefVarTypes();
  return Tb;
}
Ejemplo n.º 28
0
TBowMatrix::TBowMatrix(PBowDocBs BowDocBs, PBowDocWgtBs BowDocWgtBs,
        const TStr& CatNm,  const TIntV& DIdV, TFltV& ClsV): TMatrix() {

    RowN = BowDocBs->GetWords();
    ClsV.Gen(DIdV.Len(), 0);
    ColSpVV.Gen(DIdV.Len(), 0);
    IAssert(BowDocBs->IsCatNm(CatNm));
    int CatId = BowDocBs->GetCId(CatNm);
    for (int i = 0; i < DIdV.Len(); i++) {
        ColSpVV.Add(BowDocWgtBs->GetSpV(DIdV[i]));
        ClsV.Add(BowDocBs->IsCatInDoc(DIdV[i], CatId) ? 0.99 : -0.99);
    }
}
Ejemplo n.º 29
0
// YES I COPIED AND PASTED CODE my section leader would be so ashamed :D
void LSH::MinHash(THash<TMd5Sig, TIntSet>& ShingleToQuoteIds,
    TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) {
  TRnd RandomGenerator; // TODO: make this "more random" by incorporating time
  for (int i = 0; i < NumBands; ++i) {
    THash < TInt, TIntV > Inverted; // (QuoteID, QuoteSignatureForBand)
    THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs)
    for (int j = 0; j < BandSize; ++j) {
      // Create new signature
      TVec < TMd5Sig > Signature;
      ShingleToQuoteIds.GetKeyV(Signature);
      Signature.Shuffle(RandomGenerator);

      // Place in bucket - not very efficient
      int SigLen = Signature.Len();
      for (int k = 0; k < SigLen; ++k) {
        TIntSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]);
        for (TIntSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) {
          TInt Key = l.GetKey();
          if (Inverted.IsKey(Key)) {
            TIntV CurSignature = Inverted.GetDat(Key);
            if (CurSignature.Len() <= j) {
              CurSignature.Add(k);
              Inverted.AddDat(Key, CurSignature);
            }
          } else {
            TIntV NewSignature;
            NewSignature.Add(k);
            Inverted.AddDat(Key, NewSignature);
          }
        }
      }
    }

    TIntV InvertedKeys;
    Inverted.GetKeyV(InvertedKeys);
    TInt InvertedLen = InvertedKeys.Len();
    for (int k = 0; k < InvertedLen; ++k) {
      TIntSet Bucket;
      TIntV Signature = Inverted.GetDat(InvertedKeys[k]);
      if (BandBuckets.IsKey(Signature)) {
        Bucket = BandBuckets.GetDat(Signature);
      }
      Bucket.AddKey(InvertedKeys[k]);
      BandBuckets.AddDat(Signature, Bucket);
    }

    SignatureBandBuckets.Add(BandBuckets);
    Err("%d out of %d band signatures computed\n", i + 1, NumBands);
  }
  Err("Minhash step complete!\n");
}
Ejemplo n.º 30
0
/// Generates a random graph with exact degree sequence DegSeqV.
/// The generated graph has no self loops. The graph generation process
/// simulates the Configuration Model but if a duplicate edge occurs, we find a
/// random edge, break it and reconnect it with the duplicate.
PUNGraph GenDegSeq(const TIntV& DegSeqV, TRnd& Rnd) {
  const int Nodes = DegSeqV.Len();
  PUNGraph GraphPt = TUNGraph::New();
  TUNGraph& Graph = *GraphPt;
  Graph.Reserve(Nodes, -1);
  TIntH DegH(DegSeqV.Len(), true);
  
  IAssertR(DegSeqV.IsSorted(false), "DegSeqV must be sorted in descending order.");
  int DegSum=0, edge=0;
  for (int node = 0; node < Nodes; node++) {
    IAssert(Graph.AddNode(node) == node);
    DegH.AddDat(node, DegSeqV[node]);
    DegSum += DegSeqV[node];
  }
  IAssert(DegSum % 2 == 0);
  while (! DegH.Empty()) {
    // pick random nodes and connect
    const int NId1 = DegH.GetKey(DegH.GetRndKeyId(TInt::Rnd, 0.5));
    const int NId2 = DegH.GetKey(DegH.GetRndKeyId(TInt::Rnd, 0.5));
    IAssert(DegH.IsKey(NId1) && DegH.IsKey(NId2));
    if (NId1 == NId2) {
      if (DegH.GetDat(NId1) == 1) { continue; }
      // find rnd edge, break it, and connect the endpoints to the nodes
      const TIntPr Edge = TSnapDetail::GetRndEdgeNonAdjNode(GraphPt, NId1, -1);
      if (Edge.Val1==-1) { continue; }
      Graph.DelEdge(Edge.Val1, Edge.Val2);
      Graph.AddEdge(Edge.Val1, NId1);
      Graph.AddEdge(NId1, Edge.Val2);
      if (DegH.GetDat(NId1) == 2) { DegH.DelKey(NId1); }
      else { DegH.GetDat(NId1) -= 2; }
    } else {
      if (! Graph.IsEdge(NId1, NId2)) {
        Graph.AddEdge(NId1, NId2); }  // good edge
      else {
        // find rnd edge, break and cross-connect
        const TIntPr Edge = TSnapDetail::GetRndEdgeNonAdjNode(GraphPt, NId1, NId2);
        if (Edge.Val1==-1) {continue; }
        Graph.DelEdge(Edge.Val1, Edge.Val2);
        Graph.AddEdge(NId1, Edge.Val1);
        Graph.AddEdge(NId2, Edge.Val2);
      }
      if (DegH.GetDat(NId1)==1) { DegH.DelKey(NId1); }
      else { DegH.GetDat(NId1) -= 1; }
      if (DegH.GetDat(NId2)==1) { DegH.DelKey(NId2); }
      else { DegH.GetDat(NId2) -= 1; }
    }
    if (++edge % 1000 == 0) {
      printf("\r %dk / %dk", edge/1000, DegSum/2000); }
  }
  return GraphPt;
}