void TBlobBs::PutBlockLenV(const PFRnd& FBlobBs, const TIntV& BlockLenV){ FBlobBs->PutStr(BlockLenVNm); FBlobBs->PutInt(BlockLenV.Len()); for (int BlockLenN=0; BlockLenN<BlockLenV.Len(); BlockLenN++){ FBlobBs->PutInt(BlockLenV[BlockLenN]);} FBlobBs->PutInt(-1); }
void LSH::ElCheapoHashing(TQuoteBase *QuoteBase, TInt ShingleLen, THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) { fprintf(stderr, "Hashing shingles the el cheapo way...\n"); TIntV QuoteIds; QuoteBase->GetAllQuoteIds(QuoteIds); for (int qt = 0; qt < QuoteIds.Len(); qt++) { if (qt % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len()); } TQuote Q; QuoteBase->GetQuote(QuoteIds[qt], Q); // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter TStr QContentStr; Q.GetParsedContentString(QContentStr); TChA QContentChA = TChA(QContentStr); for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) { TChA ShingleChA = TChA(); for (int j = 0; j < ShingleLen; j++) { ShingleChA.AddCh(QContentChA.GetCh(i + j)); } TStr Shingle = TStr(ShingleChA); const TMd5Sig ShingleMd5(Shingle); TIntSet ShingleQuoteIds; if (ShingleToQuoteIds.IsKey(ShingleMd5)) { ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5); } ShingleQuoteIds.AddKey(QuoteIds[qt]); ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds); } } Err("Done with el cheapo hashing!\n"); }
void TNmObjBs::GetNmObjDIdV( const PBowDocBs& BowDocBs, TIntV& BowDIdV, const TStr& NmObjStr1, const TStr& NmObjStr2) const { // get first named-object-id int NmObjId1=GetNmObjId(NmObjStr1); TIntV NmObjDocIdV1; GetNmObjDocIdV(NmObjId1, NmObjDocIdV1); NmObjDocIdV1.Sort(); // get second named-object-id TIntV NmObjDocIdV2; if (!NmObjStr2.Empty()){ int NmObjId2=GetNmObjId(NmObjStr2); GetNmObjDocIdV(NmObjId2, NmObjDocIdV2); NmObjDocIdV2.Sort(); } // create joint doc-id-vector TIntV NmObjDocIdV; if (NmObjDocIdV2.Empty()){ NmObjDocIdV=NmObjDocIdV1; } else { NmObjDocIdV1.Intrs(NmObjDocIdV2, NmObjDocIdV); } // traverse named-object-documents to collect bow-document-ids BowDIdV.Gen(NmObjDocIdV.Len(), 0); for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocIdV.Len(); NmObjDocIdN++){ TStr DocNm=GetDocNm(NmObjDocIdV[NmObjDocIdN]); int DId=BowDocBs->GetDId(DocNm); if (DId!=-1){ BowDIdV.Add(DId); } } }
int TGnuPlot::AddPlot(const TIntV& YValV, const TGpSeriesTy& SeriesTy, const TStr& Label, const TStr& Style) { TFltKdV XYValV(YValV.Len(), 0); for (int i = 0; i < YValV.Len(); i++) { XYValV.Add(TFltKd(TFlt(i+1), TFlt(YValV[i]))); } return AddPlot(XYValV, SeriesTy, Label, Style); }
// Compute the empirical edge probability between a pair of nodes who share no community (epsilon), based on current community affiliations. double TAGMFit::CalcPNoComByCmtyVV(const int& SamplePairs) { TIntV NIdV; G->GetNIdV(NIdV); uint64 PairNoCom = 0, EdgesNoCom = 0; for (int u = 0; u < NIdV.Len(); u++) { for (int v = u + 1; v < NIdV.Len(); v++) { int SrcNID = NIdV[u], DstNID = NIdV[v]; TIntSet JointCom; TAGMUtil::GetIntersection(NIDComVH.GetDat(SrcNID),NIDComVH.GetDat(DstNID),JointCom); if(JointCom.Len() == 0) { PairNoCom++; if (G->IsEdge(SrcNID, DstNID)) { EdgesNoCom++; } if (SamplePairs > 0 && PairNoCom >= (uint64) SamplePairs) { break; } } } if (SamplePairs > 0 && PairNoCom >= (uint64) SamplePairs) { break; } } double DefaultVal = 1.0 / (double)G->GetNodes() / (double)G->GetNodes(); if (EdgesNoCom > 0) { PNoCom = (double) EdgesNoCom / (double) PairNoCom; } else { PNoCom = DefaultVal; } printf("%s / %s edges without joint com detected (PNoCom = %f)\n", TUInt64::GetStr(EdgesNoCom).CStr(), TUInt64::GetStr(PairNoCom).CStr(), PNoCom.Val); return PNoCom; }
// Eric #4 //Count Triangles time (elapsed): 166.162323, cpu: 2048.942704 //Count Triangles time (elapsed): 159.984497, cpu: 1769.572704 //Count Triangles time (elapsed): 167.080368, cpu: 1727.222704 int GetCommon(TIntV& A, TIntV& B) { int ret = 0; int i = 0; int j = 0; int alen, blen; alen = A.Len(); blen = B.Len(); while (i < alen && j < blen) { while (i < alen && A[i] < B[j]) { i++; } // Optional check if (i == alen) { break; } while (j < blen && A[i] > B[j]) { j++; } // Optional check if (j == blen) { break; } if (A[i] == B[j]) { ret++; i++; j++; } } return ret; }
int TMultimodalGraphImplB::GetSubGraphMocked(const TIntV ModeIds) const { int NumVerticesAndEdges = 0; for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) { if (ModeIds.IsIn(CurI.GetDat())) { NumVerticesAndEdges++; } } for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) { int ModeId1 = ModeIds.GetVal(ModeIdx1); for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) { int ModeId2 = ModeIds.GetVal(ModeIdx2); TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2); if (!Graphs.IsKey(ModeIdsKey)) { continue; } const TNGraph& Graph = Graphs.GetDat(ModeIdsKey); for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) { for (int e = 0; e < it.GetOutDeg(); e++) { NumVerticesAndEdges += it.GetOutNId(e); } } } } return NumVerticesAndEdges; }
void TNmObjBs::PutMergedNmObj(const TIntV& NewNmObjIdV){ // create temporary table of new named-objects TStrVIntVH NewNmObjWordStrVToDocIdVH; for (int NmObjId=0; NmObjId<NewNmObjIdV.Len(); NmObjId++){ if (NewNmObjIdV[NmObjId]!=NmObjId){continue;} // take data for new named-object from old definition const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId); // define new named-object NewNmObjWordStrVToDocIdVH.AddDat(WordStrV); } //printf("Old Named-Objects: %d\n", NmObjWordStrVToDocIdVH.Len()); //printf("New Named-Objects: %d\n", NewNmObjWordStrVToDocIdVH.Len()); // obsolete named-object define as aliases {for (int NmObjId=0; NmObjId<NewNmObjIdV.Len(); NmObjId++){ if (NewNmObjIdV[NmObjId]==NmObjId){continue;} // take data for obsolete named-object from old definition const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId); // define alias for obsolete named-object int NrNmObjId=NewNmObjIdV[NmObjId]; if (NrNmObjId!=-1){ const TStrV& NrWordStrV=NmObjWordStrVToDocIdVH.GetKey(NrNmObjId); NmObjWordStrVToNrH.AddDat(WordStrV, NrWordStrV); } }} // redefine documents int Docs=GetDocs(); for (int DocId=0; DocId<Docs; DocId++){ TIntPrV& NmObjIdFqPrV=GetDoc_NmObjIdFqPrV(DocId); // create temporary-document: new-named-object to frequency table TIntIntH NewNmObjIdToFqH(NmObjIdFqPrV.Len()); for (int NmObjN=0; NmObjN<NmObjIdFqPrV.Len(); NmObjN++){ // get obsolete named-object data int NmObjId=NmObjIdFqPrV[NmObjN].Val1; int Fq=NmObjIdFqPrV[NmObjN].Val2; // get named-document-id for normalized named-object int NrNmObjId=NewNmObjIdV[NmObjId]; if (NrNmObjId!=-1){ // get normalized version of word-vector const TStrV& NrWordStrV=NmObjWordStrVToDocIdVH.GetKey(NrNmObjId); // get new named-object-id int NewNmObjId=NewNmObjWordStrVToDocIdVH.GetKeyId(NrWordStrV); // add new named-object-id and term-frequency to temporary-document NewNmObjIdToFqH.AddDat(NewNmObjId)+=Fq; } } // transfere new-named-object data to document NmObjIdFqPrV.Gen(NewNmObjIdToFqH.Len(), 0); for (int NmObjP=0; NmObjP<NewNmObjIdToFqH.Len(); NmObjP++){ int NewNmObjId=NewNmObjIdToFqH.GetKey(NmObjP); int Fq=NewNmObjIdToFqH[NmObjP]; // add named-object and increment by term-frequency NmObjIdFqPrV.Add(TIntPr(NewNmObjId, Fq)); // merge document-ids NewNmObjWordStrVToDocIdVH[NewNmObjId].Add(DocId); } NmObjIdFqPrV.Sort(); } // assign new named-objects NmObjWordStrVToDocIdVH=NewNmObjWordStrVToDocIdVH; }
double TStringKernel::KTrie2(const TIntV& s, const TIntV& t, const double& lb, const int& p, int m, const int& AlphN) { int ls = s.Len(), lt = t.Len(); if (ls < p || lt < p) return 0.0; m = TInt::GetMn(m, ls-p, lt-p); TVec<TVec<TTrieNodeP> > LsV(AlphN), LtV(AlphN); TIntV v(p), x(p+m); double Kern = 0.0; // precalculate weights TFltV lbV(m+1); lbV[0] = 1; for (int i = 0; i < p; i++) lbV[0] *= lb; for (int i = 1; i <= m; i++) lbV[i] = lb * lbV[i-1]; for (int i = 0; i <= ls - p; i++) { int j = TInt::GetMn(ls, i+p+m); LsV[s[i]].Add(TTrieNodeP(TIntPr(i, j-i), 0, 0)); // i == 0 becasue strings start with 0 (not 1 as in Matlab!) } for (int i = 0; i <= lt - p; i++) { int j = TInt::GetMn(lt, i+p+m); LtV[t[i]].Add(TTrieNodeP(TIntPr(i, j-i), 0, 0)); // i == 0 becasue strings start with 0 (not 1 as in Matlab!) } for (int AlphC = 0; AlphC < AlphN; AlphC++) { v[0] = AlphC; KTrieR2(s, t, LsV[AlphC], LtV[AlphC], v, 1, Kern, lbV, p, m, AlphN); //depth == 1, not 0 !!!! } return Kern; }
TIntNNet TMultimodalGraphImplB::GetSubGraph(const TIntV ModeIds) const { TIntNNet SubGraph = TIntNNet(); for (THash<TInt,TInt>::TIter CurI = NodeToModeMapping.BegI(); CurI < NodeToModeMapping.EndI(); CurI++) { if (ModeIds.IsIn(CurI.GetDat())) { SubGraph.AddNode(CurI.GetKey(), CurI.GetDat()); } } for (int ModeIdx1 = 0; ModeIdx1 < ModeIds.Len(); ModeIdx1++) { int ModeId1 = ModeIds.GetVal(ModeIdx1); for (int ModeIdx2 = 0; ModeIdx2 < ModeIds.Len(); ModeIdx2++) { int ModeId2 = ModeIds.GetVal(ModeIdx2); TPair<TInt,TInt> ModeIdsKey = GetModeIdsKey(ModeId1, ModeId2); if (!Graphs.IsKey(ModeIdsKey)) { continue; } const TNGraph& Graph = Graphs.GetDat(ModeIdsKey); for (TNGraph::TNodeI it = Graph.BegNI(); it < Graph.EndNI(); it++) { for (int e = 0; e < it.GetOutDeg(); e++) { SubGraph.AddEdge(it.GetId(), it.GetOutNId(e)); } } } } printf("Number of nodes in SubGraph: %d...\n", SubGraph.GetNodes()); printf("Number of edges in SubGraph: %d...\n", SubGraph.GetEdges()); return SubGraph; }
void LogOutput::PrintClusterInformationToText(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, TIntV& ClusterIds, TSecTm PresentTime) { if (!ShouldLog) return; TStr CurDateString = PresentTime.GetDtYmdStr(); TStr TopFileName = Directory + "/text/top/topclusters_" + CurDateString + ".txt"; FILE *T = fopen(TopFileName.CStr(), "w"); for (int i = 0; i < ClusterIds.Len(); i++) { TCluster C; CB->GetCluster(ClusterIds[i], C); TStr CRepQuote; C.GetRepresentativeQuoteString(CRepQuote, QB); TIntV CQuoteIds; TVec<TUInt> CUniqueSources; C.GetQuoteIds(CQuoteIds); TCluster::GetUniqueSources(CUniqueSources, CQuoteIds, QB); fprintf(T, "%d\t%d\t%s\n", CUniqueSources.Len(), CQuoteIds.Len(), CRepQuote.CStr()); for (int j = 0; j < CQuoteIds.Len(); j++) { TQuote Q; if (QB->GetQuote(CQuoteIds[j], Q)) { TStr QuoteStr; Q.GetContentString(QuoteStr); fprintf(T, "\t%d\t%s\n", Q.GetNumSources().Val, QuoteStr.CStr()); } } } fclose(T); }
/// Shingles by words void LSH::HashShinglesOfClusters(TQuoteBase *QuoteBase, TClusterBase *ClusterBase, TIntV& ClusterIds, TInt ShingleLen, THash<TMd5Sig, TIntV>& ShingleToClusterIds) { Err("Hashing shingles of clusters...\n"); for (int i = 0; i < ClusterIds.Len(); i++) { if (i % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", i, ClusterIds.Len()); } TCluster C; ClusterBase->GetCluster(ClusterIds[i], C); //fprintf(stderr, "%d vs. %d\n", ClusterIds[i].Val, C.GetId().Val); // Put x-word shingles into hash table; x is specified by ShingleLen parameter THashSet < TMd5Sig > CHashedShingles; GetHashedShinglesOfCluster(QuoteBase, C, ShingleLen, CHashedShingles); for (THashSet<TMd5Sig>::TIter Hash = CHashedShingles.BegI(); Hash < CHashedShingles.EndI(); Hash++) { TIntV ShingleClusterIds; if (ShingleToClusterIds.IsKey(*Hash)) { ShingleClusterIds = ShingleToClusterIds.GetDat(*Hash); } ShingleClusterIds.Add(ClusterIds[i]); ShingleToClusterIds.AddDat(*Hash, ShingleClusterIds); } } Err("Done hashing!\n"); }
void LogOutput::PrintClusterInformation(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, PNGraph& QGraph, TIntV& ClusterIds, TSecTm PresentTime, TIntV &OldTopClusters) { if (!ShouldLog) return; TStr CurDateString = PresentTime.GetDtYmdStr(); Err("Writing cluster information...\n"); // PREVIOUS RANKING SETUP THash<TInt, TInt> OldRankings; if (OldTopClusters.Len() > 0) { for (int i = 0; i < OldTopClusters.Len(); i++) { OldRankings.AddDat(OldTopClusters[i], i + 1); } } TStrV RankStr; TStr ClusterJSONDirectory = Directory + "/web/json/clusters/"; for (int i = 0; i < ClusterIds.Len(); i++) { TStr OldRankStr; ComputeOldRankString(OldRankings, ClusterIds[i], i+1, OldRankStr); RankStr.Add(OldRankStr); // JSON file for each cluster! TPrintJson::PrintClusterJSON(QB, DB, CB, QGraph, ClusterJSONDirectory, ClusterIds[i], PresentTime); } Err("JSON Files for individual written!\n"); TStr JSONTableFileName = Directory + "/web/json/daily/" + CurDateString + ".json"; TPrintJson::PrintClusterTableJSON(QB, DB, CB, JSONTableFileName, ClusterIds, RankStr); Err("JSON Files for the cluster table written!\n"); }
TVec<TPair<TFltV, TFltV> > TLSHash::GetAllCandidatePairs() { THashSet<TPair<TInt, TInt> > CandidateIdPairs; for (int i=0; i<Bands; i++) { TVec<TIntV> BucketVV; SigBucketVHV[i].GetDatV(BucketVV); for (int j=0; j<BucketVV.Len(); j++) { TIntV BucketV = BucketVV[j]; for (int k=0; k<BucketV.Len(); k++) { for (int l=k+1; l<BucketV.Len(); l++) { int First = BucketV[k], Second = BucketV[l]; if (First > Second) { int Temp = First; First = Second; Second = Temp; } CandidateIdPairs.AddKey(TPair<TInt, TInt> (First, Second)); } } } } TVec<TPair<TFltV, TFltV> > CandidatePairs; int Ind = CandidateIdPairs.FFirstKeyId(); while (CandidateIdPairs.FNextKeyId(Ind)) { TPair<TInt, TInt> IdPair = CandidateIdPairs[Ind]; TPair<TFltV, TFltV> Pair(DataV[IdPair.GetVal1()], DataV[IdPair.GetVal2()]); CandidatePairs.Add(Pair); } return CandidatePairs; }
void StarTriad3TEdgeCounter<EdgeData>::Count(const TVec<EdgeData>& events, const TIntV& timestamps, double delta) { InitializeCounters(); if (events.Len() != timestamps.Len()) { TExcept::Throw("Number of events must match number of timestamps."); } int start = 0; int end = 0; int L = timestamps.Len(); for (int j = 0; j < L; j++) { double tj = double(timestamps[j]); // Adjust counts in pre-window [tj - delta, tj) while (start < L && double(timestamps[start]) < tj - delta) { PopPre(events[start]); start++; } // Adjust counts in post-window (tj, tj + delta] while (end < L && double(timestamps[end]) <= tj + delta) { PushPos(events[end]); end++; } // Move current event off post-window PopPos(events[j]); ProcessCurrent(events[j]); PushPre(events[j]); } }
TBowMatrix::TBowMatrix(PBowDocWgtBs BowDocWgtBs, const TIntV& DIdV): TMatrix() { RowN = BowDocWgtBs->GetWords(); ColSpVV.Gen(DIdV.Len(), 0); for (int i = 0; i < DIdV.Len(); i++) { ColSpVV.Add(BowDocWgtBs->GetSpV(DIdV[i])); } }
void TTrawling::GenCandidates() { CandItemH.Clr(false); TIntV JoinItem; if (CurItemH.GetKey(0).Len() == 1) { // join 1-items into 2-items for (int i = 0; i < CurItemH.Len(); i++) { for (int j = i+1; j < CurItemH.Len(); j++) { JoinItems(CurItemH.GetKey(i), CurItemH.GetKey(j), JoinItem); if (JoinItem.Len() == CurItemH.GetKey(i).Len()+1) { CandItemH.AddDat(JoinItem, 0); } } } } else { // join longer item sets CurItemH.SortByKey(); for (int i = 0; i < CurItemH.Len(); i++) { const TIntV& Set = CurItemH.GetKey(i); const int Val = Set[Set.Len()-2]; for (int j=i+1; j < CurItemH.Len() && CurItemH.GetKey(j)[CurItemH.GetKey(j).Len()-2] == Val; j++) { JoinItems(CurItemH.GetKey(i), CurItemH.GetKey(j), JoinItem); if (JoinItem.Len() == CurItemH.GetKey(i).Len()+1) { CandItemH.AddDat(JoinItem, 0); } } } }//*/ }
void TempMotifCounter::GetAllStaticTriangles(TIntV& Us, TIntV& Vs, TIntV& Ws) { Us.Clr(); Vs.Clr(); Ws.Clr(); // Get degree ordering of the graph int max_nodes = static_graph_->GetMxNId(); TVec<TIntPair> degrees(max_nodes); degrees.PutAll(TIntPair(0, 0)); // Set the degree of a node to be the number of nodes adjacent to the node in // the undirected graph. TIntV nodes; GetAllNodes(nodes); #pragma omp parallel for schedule(dynamic) for (int node_id = 0; node_id < nodes.Len(); node_id++) { int src = nodes[node_id]; TIntV nbrs; GetAllNeighbors(src, nbrs); degrees[src] = TIntPair(nbrs.Len(), src); } degrees.Sort(); TIntV order = TIntV(max_nodes); #pragma omp parallel for schedule(dynamic) for (int i = 0; i < order.Len(); i++) { order[degrees[i].Dat] = i; } // Get triangles centered at a given node where that node is the smallest in // the degree ordering. #pragma omp parallel for schedule(dynamic) for (int node_id = 0; node_id < nodes.Len(); node_id++) { int src = nodes[node_id]; int src_pos = order[src]; // Get all neighbors who come later in the ordering TIntV nbrs; GetAllNeighbors(src, nbrs); TIntV neighbors_higher; for (int i = 0; i < nbrs.Len(); i++) { int nbr = nbrs[i]; if (order[nbr] > src_pos) { neighbors_higher.Add(nbr); } } for (int ind1 = 0; ind1 < neighbors_higher.Len(); ind1++) { for (int ind2 = ind1 + 1; ind2 < neighbors_higher.Len(); ind2++) { int dst1 = neighbors_higher[ind1]; int dst2 = neighbors_higher[ind2]; // Check for triangle formation if (static_graph_->IsEdge(dst1, dst2) || static_graph_->IsEdge(dst2, dst1)) { #pragma omp critical { Us.Add(src); Vs.Add(dst1); Ws.Add(dst2); } } } } } }
void TBlobBs::GetAllocInfo( const int& BfL, const TIntV& BlockLenV, int& MxBfL, int& FFreeBlobPtN){ int BlockLenN=0; while ((BlockLenN<BlockLenV.Len())&&(BfL>BlockLenV[BlockLenN])){ BlockLenN++;} EAssert(BlockLenN<BlockLenV.Len()); MxBfL=BlockLenV[BlockLenN]; FFreeBlobPtN=BlockLenN; }
// burn each link independently (forward with FwdBurnProb, backward with BckBurnProb) void TForestFire::BurnExpFire() { const double OldFwdBurnProb = FwdBurnProb; const double OldBckBurnProb = BckBurnProb; const int NInfect = InfectNIdV.Len(); const TNGraph& G = *Graph; TIntH BurnedNIdH; // burned nodes TIntV BurningNIdV = InfectNIdV; // currently burning nodes TIntV NewBurnedNIdV; // nodes newly burned in current step bool HasAliveNbrs; // has unburned neighbors int NBurned = NInfect, NDiedFire=0; for (int i = 0; i < InfectNIdV.Len(); i++) { BurnedNIdH.AddDat(InfectNIdV[i]); } NBurnedTmV.Clr(false); NBurningTmV.Clr(false); NewBurnedTmV.Clr(false); for (int time = 0; ; time++) { NewBurnedNIdV.Clr(false); // for each burning node for (int node = 0; node < BurningNIdV.Len(); node++) { const int& BurningNId = BurningNIdV[node]; const TNGraph::TNodeI Node = G.GetNI(BurningNId); HasAliveNbrs = false; NDiedFire = 0; // burn forward links (out-links) for (int e = 0; e < Node.GetOutDeg(); e++) { const int OutNId = Node.GetOutNId(e); if (! BurnedNIdH.IsKey(OutNId)) { // not yet burned HasAliveNbrs = true; if (Rnd.GetUniDev() < FwdBurnProb) { BurnedNIdH.AddDat(OutNId); NewBurnedNIdV.Add(OutNId); NBurned++; } } } // burn backward links (in-links) if (BckBurnProb > 0.0) { for (int e = 0; e < Node.GetInDeg(); e++) { const int InNId = Node.GetInNId(e); if (! BurnedNIdH.IsKey(InNId)) { // not yet burned HasAliveNbrs = true; if (Rnd.GetUniDev() < BckBurnProb) { BurnedNIdH.AddDat(InNId); NewBurnedNIdV.Add(InNId); NBurned++; } } } } if (! HasAliveNbrs) { NDiedFire++; } } NBurnedTmV.Add(NBurned); NBurningTmV.Add(BurningNIdV.Len() - NDiedFire); NewBurnedTmV.Add(NewBurnedNIdV.Len()); //BurningNIdV.AddV(NewBurnedNIdV); // node is burning eternally BurningNIdV.Swap(NewBurnedNIdV); // node is burning just 1 time step if (BurningNIdV.Empty()) break; FwdBurnProb = FwdBurnProb * ProbDecay; BckBurnProb = BckBurnProb * ProbDecay; } BurnedNIdV.Gen(BurnedNIdH.Len(), 0); for (int i = 0; i < BurnedNIdH.Len(); i++) { BurnedNIdV.Add(BurnedNIdH.GetKey(i)); } FwdBurnProb = OldFwdBurnProb; BckBurnProb = OldBckBurnProb; }
/// save bipartite community affiliation into gexf file void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) { /// Plot bipartite graph if (CmtyVV.Len() == 0) { return; } double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0; double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax; double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len(); THash<TInt,TIntV> NIDComVH; TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV); FILE* F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n"); fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n"); fprintf(F, "\t\t<nodes>\n"); for (int c = 0; c < CmtyVV.Len(); c++) { int CID = c; double XPos = c * CStep + CXMin; TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120); fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID); fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val); fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz); fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax); fprintf(F, "\t\t\t</node>\n"); } for (int u = 0; u < NIDV.Len(); u++) { int NID = NIDV[u]; TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): ""; double Size = MinSz; double XPos = NXMin + u * NStep; TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120); double Alpha = 1.0; fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr()); fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha); fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size); fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin); fprintf(F, "\t\t\t</node>\n"); } fprintf(F, "\t\t</nodes>\n"); fprintf(F, "\t\t<edges>\n"); int EID = 0; for (int u = 0; u < NIDV.Len(); u++) { int NID = NIDV[u]; if (NIDComVH.IsKey(NID)) { for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) { int CID = NIDComVH.GetDat(NID)[c]; fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID); } } } fprintf(F, "\t\t</edges>\n"); fprintf(F, "\t</graph>\n"); fprintf(F, "</gexf>\n"); }
void TNEANetMP::Dump(FILE *OutF) const { const int NodePlaces = (int) ceil(log10((double) GetNodes())); const int EdgePlaces = (int) ceil(log10((double) GetEdges())); fprintf(OutF, "-------------------------------------------------\nDirected Node-Edge Network: nodes: %d, edges: %d\n", GetNodes(), GetEdges()); for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) { fprintf(OutF, " %*d]\n", NodePlaces, NodeI.GetId()); // load node attributes TIntV IntAttrN; IntAttrValueNI(NodeI.GetId(), IntAttrN); fprintf(OutF, " nai[%d]", IntAttrN.Len()); for (int i = 0; i < IntAttrN.Len(); i++) { fprintf(OutF, " %*i", NodePlaces, IntAttrN[i]()); } TStrV StrAttrN; StrAttrValueNI(NodeI.GetId(), StrAttrN); fprintf(OutF, " nas[%d]", StrAttrN.Len()); for (int i = 0; i < StrAttrN.Len(); i++) { fprintf(OutF, " %*s", NodePlaces, StrAttrN[i]()); } TFltV FltAttrN; FltAttrValueNI(NodeI.GetId(), FltAttrN); fprintf(OutF, " naf[%d]", FltAttrN.Len()); for (int i = 0; i < FltAttrN.Len(); i++) { fprintf(OutF, " %*f", NodePlaces, FltAttrN[i]()); } fprintf(OutF, " in[%d]", NodeI.GetInDeg()); for (int edge = 0; edge < NodeI.GetInDeg(); edge++) { fprintf(OutF, " %*d", EdgePlaces, NodeI.GetInEId(edge)); } fprintf(OutF, "\n"); fprintf(OutF, " out[%d]", NodeI.GetOutDeg()); for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) { fprintf(OutF, " %*d", EdgePlaces, NodeI.GetOutEId(edge)); } fprintf(OutF, "\n"); } for (TEdgeI EdgeI = BegEI(); EdgeI < EndEI(); EdgeI++) { fprintf(OutF, " %*d] %*d -> %*d", EdgePlaces, EdgeI.GetId(), NodePlaces, EdgeI.GetSrcNId(), NodePlaces, EdgeI.GetDstNId()); // load edge attributes TIntV IntAttrE; IntAttrValueEI(EdgeI.GetId(), IntAttrE); fprintf(OutF, " eai[%d]", IntAttrE.Len()); for (int i = 0; i < IntAttrE.Len(); i++) { fprintf(OutF, " %*i", EdgePlaces, IntAttrE[i]()); } TStrV StrAttrE; StrAttrValueEI(EdgeI.GetId(), StrAttrE); fprintf(OutF, " eas[%d]", StrAttrE.Len()); for (int i = 0; i < StrAttrE.Len(); i++) { fprintf(OutF, " %*s", EdgePlaces, StrAttrE[i]()); } TFltV FltAttrE; FltAttrValueEI(EdgeI.GetId(), FltAttrE); fprintf(OutF, " eaf[%d]", FltAttrE.Len()); for (int i = 0; i < FltAttrE.Len(); i++) { fprintf(OutF, " %*f", EdgePlaces, FltAttrE[i]()); } fprintf(OutF, "\n"); } fprintf(OutF, "\n"); }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // get command line parameters Env.PrepArgs("DMoz-Topic To Text", -1); TStr InFPath=Env.GetIfArgPrefixStr("-i:", "", "Input-File-Path"); TStr OutFPath=Env.GetIfArgPrefixStr("-o:", "", "Output-File-Path"); TStr RootCatNm=Env.GetIfArgPrefixStr("-c:", "Top/Science", "Root-Category-Name"); if (Env.IsEndOfRun()){return 0;} // load DMoz-Base PDMozBs DMozBs=TDMozBs::LoadBin(TDMozInfo::BinFullFBase, InFPath); // assign root category name //RootCatNm="Top/Computers/Software/Databases/Data_Mining"; //RootCatNm="Top/Reference/Knowledge_Management/Knowledge_Discovery"; //RootCatNm="Top/Computers/Artificial_Intelligence/Machine_Learning"; //RootCatNm="Top/Computers/Artificial_Intelligence"; //RootCatNm="Top/Recreation/Travel"; // get root category-id int RootCatId=DMozBs->GetCatId(RootCatNm); // prepare external-url list TStr RootFBase=TStr::GetFNmStr(RootCatNm, true); TStr ExtUrlFNm=TStr::GetNrFPath(OutFPath)+RootFBase+"_ExternalUrlList.Txt"; TFOut ExtUrlSOut(ExtUrlFNm); FILE* fExtUrlOut=ExtUrlSOut.GetFileId(); // get topic categories TIntV TopicCatIdV; DMozBs->GetSubCatIdV(RootCatId, TopicCatIdV); for (int TopicCatIdN=0; TopicCatIdN<TopicCatIdV.Len(); TopicCatIdN++){ // get topic id & name int TopicCatId=TopicCatIdV[TopicCatIdN]; TStr TopicCatNm=DMozBs->GetCatNm(TopicCatId); // get subtopic subtrees and corresponding external-url-ids TIntV SubCatIdV; TIntV CatIdV; //DMozBs->GetSubTreeCatIdV(TopicCatId, SubCatIdV, CatIdV, true); TIntV ExtUrlIdV; DMozBs->GetExtUrlIdV(CatIdV, ExtUrlIdV); // output url/titles/descriptions TStr TopicFBase=TStr::GetFNmStr(TopicCatNm, true); TStr TopicFNm=TStr::GetNrFPath(OutFPath)+TopicFBase+".Txt"; printf("Saving %s\n", TopicFNm.CStr()); TFOut TopicSOut(TopicFNm); FILE* fTopicOut=TopicSOut.GetFileId(); for (int ExtUrlIdN=0; ExtUrlIdN<ExtUrlIdV.Len(); ExtUrlIdN++){ int ExtUrlId=ExtUrlIdV[ExtUrlIdN]; TStr UrlStr=DMozBs->GetExtUrlStr(ExtUrlId); TStr TitleStr=DMozBs->GetExtUrlTitleStr(ExtUrlId); TStr DescStr=DMozBs->GetExtUrlDescStr(ExtUrlId); fprintf(fExtUrlOut, "%s\n", UrlStr.CStr()); fprintf(fTopicOut, "%s - %s\n", TitleStr.CStr(), DescStr.CStr()); } } return 0; Catch; return 1; }
void TNGramBs::GetNGramStrV( const TStr& HtmlStr, TStrV& NGramStrV, TIntPrV& NGramBEChXPrV) const { TIntV NGramIdV; NGramStrV.Clr(); NGramBEChXPrV.Clr(); TNGramBs::GetNGramIdV(HtmlStr, NGramIdV, NGramBEChXPrV); NGramStrV.Gen(NGramIdV.Len(), 0); for (int NGramIdN=0; NGramIdN<NGramIdV.Len(); NGramIdN++){ TStr NGramStr=GetNGramStr(NGramIdV[NGramIdN]); NGramStrV.Add(NGramStr); } }
void TempMotifCounter::Count3TEdge3NodeStarsNaive( double delta, Counter3D& pre_counts, Counter3D& pos_counts, Counter3D& mid_counts) { TIntV centers; GetAllNodes(centers); pre_counts = Counter3D(2, 2, 2); pos_counts = Counter3D(2, 2, 2); mid_counts = Counter3D(2, 2, 2); // Get counts for each node as the center #pragma omp parallel for schedule(dynamic) for (int c = 0; c < centers.Len(); c++) { // Gather all adjacent events int center = centers[c]; TIntV nbrs; GetAllNeighbors(center, nbrs); for (int i = 0; i < nbrs.Len(); i++) { for (int j = i + 1; j < nbrs.Len(); j++) { int nbr1 = nbrs[i]; int nbr2 = nbrs[j]; TVec<TIntPair> combined; AddStarEdges(combined, center, nbr1, 0); AddStarEdges(combined, nbr1, center, 1); AddStarEdges(combined, center, nbr2, 2); AddStarEdges(combined, nbr2, center, 3); combined.Sort(); ThreeTEdgeMotifCounter counter(4); TIntV edge_id(combined.Len()); TIntV timestamps(combined.Len()); for (int k = 0; k < combined.Len(); k++) { edge_id[k] = combined[k].Dat; timestamps[k] = combined[k].Key; } Counter3D local; counter.Count(edge_id, timestamps, delta, local); #pragma omp critical { // Update with local counts for (int dir1 = 0; dir1 < 2; ++dir1) { for (int dir2 = 0; dir2 < 2; ++dir2) { for (int dir3 = 0; dir3 < 2; ++dir3) { pre_counts(dir1, dir2, dir3) += local(dir1, dir2, dir3 + 2) + local(dir1 + 2, dir2 + 2, dir3); pos_counts(dir1, dir2, dir3) += local(dir1, dir2 + 2, dir3 + 2) + local(dir1 + 2, dir2, dir3); mid_counts(dir1, dir2, dir3) += local(dir1, dir2 + 2, dir3) + local(dir1 + 2, dir2, dir3 + 2); } } } } } } } }
void TBowFl::SaveSparseMatlabTxt(const PBowDocBs& BowDocBs, const PBowDocWgtBs& BowDocWgtBs, const TStr& FNm, const TStr& CatFNm, const TIntV& _DIdV) { TIntV DIdV; if (_DIdV.Empty()) { BowDocBs->GetAllDIdV(DIdV); } else { DIdV = _DIdV; } // generate map of row-ids to words TFOut WdMapSOut(TStr::PutFExt(FNm, ".row-to-word-map.dat")); for (int WId = 0; WId < BowDocWgtBs->GetWords(); WId++) { TStr WdStr = BowDocBs->GetWordStr(WId); WdMapSOut.PutStrLn(TStr::Fmt("%d %s", WId+1, WdStr.CStr())); } WdMapSOut.Flush(); // generate map of col-ids to document names TFOut DocMapSOut(TStr::PutFExt(FNm, ".col-to-docName-map.dat")); for (int DocN = 0; DocN < DIdV.Len(); DocN++) { const int DId = DIdV[DocN]; TStr DocNm = BowDocBs->GetDocNm(DId); DocMapSOut.PutStrLn(TStr::Fmt("%d %d %s", DocN, DId, DocNm.CStr())); } DocMapSOut.Flush(); // save documents' sparse vectors TFOut SOut(FNm); for (int DocN = 0; DocN < DIdV.Len(); DocN++){ const int DId = DIdV[DocN]; PBowSpV DocSpV = BowDocWgtBs->GetSpV(DId); const int DocWIds = DocSpV->GetWIds(); for (int DocWIdN=0; DocWIdN<DocWIds; DocWIdN++){ const int WId = DocSpV->GetWId(DocWIdN); const double WordWgt = DocSpV->GetWgt(DocWIdN); SOut.PutStrLn(TStr::Fmt("%d %d %.16f", WId+1, DocN+1, WordWgt)); } } SOut.Flush(); // save documents' category sparse vectors if (!CatFNm.Empty()) { TFOut CatSOut(CatFNm); for (int DocN = 0; DocN < DIdV.Len(); DocN++){ const int DId = DIdV[DocN]; const int DocCIds = BowDocBs->GetDocCIds(DId); for (int DocCIdN=0; DocCIdN<DocCIds; DocCIdN++){ const int CId = BowDocBs->GetDocCId(DId, DocCIdN); const double CatWgt = 1.0; CatSOut.PutStrLn(TStr::Fmt("%d %d %.16f", CId+1, DocN+1, CatWgt)); } } CatSOut.Flush(); } }
PTb TTb::GetSubTb(const TIntV& TupNV, const TIntV& VarNV, const PTb& Tb){ if ((TupNV.Len()==0)||(VarNV.Len()==0)){return NULL;} for (int VarNN=0; VarNN<VarNV.Len(); VarNN++){ Tb->AddVar(GetVar(VarNV[VarNN]));} for (int TupNN=0; TupNN<TupNV.Len(); TupNN++){ int TupN=TupNV[TupNN]; int NewTupN=Tb->AddTup(GetTupNm(TupN)); for (int VarNN=0; VarNN<VarNV.Len(); VarNN++){ Tb->PutVal(NewTupN, VarNN, GetVal(TupN, VarNV[VarNN]));} } Tb->DefVarTypes(); return Tb; }
TBowMatrix::TBowMatrix(PBowDocBs BowDocBs, PBowDocWgtBs BowDocWgtBs, const TStr& CatNm, const TIntV& DIdV, TFltV& ClsV): TMatrix() { RowN = BowDocBs->GetWords(); ClsV.Gen(DIdV.Len(), 0); ColSpVV.Gen(DIdV.Len(), 0); IAssert(BowDocBs->IsCatNm(CatNm)); int CatId = BowDocBs->GetCId(CatNm); for (int i = 0; i < DIdV.Len(); i++) { ColSpVV.Add(BowDocWgtBs->GetSpV(DIdV[i])); ClsV.Add(BowDocBs->IsCatInDoc(DIdV[i], CatId) ? 0.99 : -0.99); } }
// YES I COPIED AND PASTED CODE my section leader would be so ashamed :D void LSH::MinHash(THash<TMd5Sig, TIntSet>& ShingleToQuoteIds, TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) { TRnd RandomGenerator; // TODO: make this "more random" by incorporating time for (int i = 0; i < NumBands; ++i) { THash < TInt, TIntV > Inverted; // (QuoteID, QuoteSignatureForBand) THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs) for (int j = 0; j < BandSize; ++j) { // Create new signature TVec < TMd5Sig > Signature; ShingleToQuoteIds.GetKeyV(Signature); Signature.Shuffle(RandomGenerator); // Place in bucket - not very efficient int SigLen = Signature.Len(); for (int k = 0; k < SigLen; ++k) { TIntSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]); for (TIntSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) { TInt Key = l.GetKey(); if (Inverted.IsKey(Key)) { TIntV CurSignature = Inverted.GetDat(Key); if (CurSignature.Len() <= j) { CurSignature.Add(k); Inverted.AddDat(Key, CurSignature); } } else { TIntV NewSignature; NewSignature.Add(k); Inverted.AddDat(Key, NewSignature); } } } } TIntV InvertedKeys; Inverted.GetKeyV(InvertedKeys); TInt InvertedLen = InvertedKeys.Len(); for (int k = 0; k < InvertedLen; ++k) { TIntSet Bucket; TIntV Signature = Inverted.GetDat(InvertedKeys[k]); if (BandBuckets.IsKey(Signature)) { Bucket = BandBuckets.GetDat(Signature); } Bucket.AddKey(InvertedKeys[k]); BandBuckets.AddDat(Signature, Bucket); } SignatureBandBuckets.Add(BandBuckets); Err("%d out of %d band signatures computed\n", i + 1, NumBands); } Err("Minhash step complete!\n"); }
/// Generates a random graph with exact degree sequence DegSeqV. /// The generated graph has no self loops. The graph generation process /// simulates the Configuration Model but if a duplicate edge occurs, we find a /// random edge, break it and reconnect it with the duplicate. PUNGraph GenDegSeq(const TIntV& DegSeqV, TRnd& Rnd) { const int Nodes = DegSeqV.Len(); PUNGraph GraphPt = TUNGraph::New(); TUNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, -1); TIntH DegH(DegSeqV.Len(), true); IAssertR(DegSeqV.IsSorted(false), "DegSeqV must be sorted in descending order."); int DegSum=0, edge=0; for (int node = 0; node < Nodes; node++) { IAssert(Graph.AddNode(node) == node); DegH.AddDat(node, DegSeqV[node]); DegSum += DegSeqV[node]; } IAssert(DegSum % 2 == 0); while (! DegH.Empty()) { // pick random nodes and connect const int NId1 = DegH.GetKey(DegH.GetRndKeyId(TInt::Rnd, 0.5)); const int NId2 = DegH.GetKey(DegH.GetRndKeyId(TInt::Rnd, 0.5)); IAssert(DegH.IsKey(NId1) && DegH.IsKey(NId2)); if (NId1 == NId2) { if (DegH.GetDat(NId1) == 1) { continue; } // find rnd edge, break it, and connect the endpoints to the nodes const TIntPr Edge = TSnapDetail::GetRndEdgeNonAdjNode(GraphPt, NId1, -1); if (Edge.Val1==-1) { continue; } Graph.DelEdge(Edge.Val1, Edge.Val2); Graph.AddEdge(Edge.Val1, NId1); Graph.AddEdge(NId1, Edge.Val2); if (DegH.GetDat(NId1) == 2) { DegH.DelKey(NId1); } else { DegH.GetDat(NId1) -= 2; } } else { if (! Graph.IsEdge(NId1, NId2)) { Graph.AddEdge(NId1, NId2); } // good edge else { // find rnd edge, break and cross-connect const TIntPr Edge = TSnapDetail::GetRndEdgeNonAdjNode(GraphPt, NId1, NId2); if (Edge.Val1==-1) {continue; } Graph.DelEdge(Edge.Val1, Edge.Val2); Graph.AddEdge(NId1, Edge.Val1); Graph.AddEdge(NId2, Edge.Val2); } if (DegH.GetDat(NId1)==1) { DegH.DelKey(NId1); } else { DegH.GetDat(NId1) -= 1; } if (DegH.GetDat(NId2)==1) { DegH.DelKey(NId2); } else { DegH.GetDat(NId2) -= 1; } } if (++edge % 1000 == 0) { printf("\r %dk / %dk", edge/1000, DegSum/2000); } } return GraphPt; }