void GetBetweennessCentr(const PUNGraph& Graph, TIntFltH& NodeBtwH, TIntPrFltH& EdgeBtwH, const double& NodeFrac) { TIntV NIdV; Graph->GetNIdV(NIdV); if (NodeFrac < 1.0) { // calculate beetweenness centrality for a subset of nodes NIdV.Shuffle(TInt::Rnd); for (int i = int((1.0-NodeFrac)*NIdV.Len()); i > 0; i--) { NIdV.DelLast(); } } GetBetweennessCentr(Graph, NIdV, NodeBtwH, true, EdgeBtwH, true); }
void TSkyGridEnt::GetEntClustV(const TSkyGridBs* SkyGridBs, const uint64& MnTm, const int& MnDocs, const int& MxDocs, const int& Clusts, TVec<TStrFltPrV>& EntNmWgtPrVV) const { EntNmWgtPrVV.Clr(); // create bow PBowDocBs BowDocBs=TBowDocBs::New(); // collect documents TIntV DocIdV; GetDocIdV(SkyGridBs, MnTm, 0, DocIdV); DocIdV.Reverse(); DocIdV.Shuffle(TRnd(1)); DocIdV.Trunc(MxDocs); if (DocIdV.Len()<MnDocs){return;} for (int DocN=0; DocN<DocIdV.Len(); DocN++){ int DocId=DocIdV[DocN]; PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId); // create vector of entity-weights TIntFltPrV WIdWgtPrV; for (int EntN=0; EntN<Doc->GetEnts(); EntN++){ int EntId; int EntFq; Doc->GetEntNmFq(EntN, EntId, EntFq); TStr EntNm=SkyGridBs->GetEntNm(EntId); int EntWId=BowDocBs->AddWordStr(EntNm); WIdWgtPrV.Add(TIntFltPr(EntWId, EntFq)); } // create bow-document int DId=BowDocBs->AddDoc(TInt::GetStr(DocId), TStrV(), WIdWgtPrV); TStr DocDescStr=Doc->GetTitleStr(); BowDocBs->PutDocDescStr(DId, DocDescStr); } // k-means clustering PBowSim BowSim=TBowSim::New(bstCos); // similarity object TBowWordWgtType WordWgtType=bwwtNrmTFIDF; // define weighting PBowDocPart BowDocPart=TBowClust::GetKMeansPart( TNotify::StdNotify, // log output BowDocBs, // document data BowSim, // similarity function TRnd(1), // random generator Clusts, // number of clusters 1, // trials per k-means 1, // convergence epsilon for k-means 1, // min. documents per cluster WordWgtType, // word weighting 0, // cut-word-weights percentage 0); // minimal word frequency EntNmWgtPrVV.Clr(); for (int ClustN=0; ClustN<BowDocPart->GetClusts(); ClustN++){ PBowDocPartClust Clust=BowDocPart->GetClust(ClustN); TStrFltPrV WordStrWgtPrV; Clust->GetTopWordStrWgtPrV(BowDocBs, 25, 0.5, WordStrWgtPrV); EntNmWgtPrVV.Add(WordStrWgtPrV); } //BowDocPart->SaveTxt("Clusts.Txt", BowDocBs, true, 25, 0.5, false); }
void node2vec(PWNet& InNet, double& ParamP, double& ParamQ, int& Dimensions, int& WalkLen, int& NumWalks, int& WinSize, int& Iter, bool& Verbose, bool& OutputWalks, TVVec<TInt, int64>& WalksVV, TIntFltVH& EmbeddingsHV) { //Preprocess transition probabilities PreprocessTransitionProbs(InNet, ParamP, ParamQ, Verbose); TIntV NIdsV; for (TWNet::TNodeI NI = InNet->BegNI(); NI < InNet->EndNI(); NI++) { NIdsV.Add(NI.GetId()); } //Generate random walks int64 AllWalks = (int64)NumWalks * NIdsV.Len(); WalksVV = TVVec<TInt, int64>(AllWalks,WalkLen); TRnd Rnd(time(NULL)); int64 WalksDone = 0; for (int64 i = 0; i < NumWalks; i++) { NIdsV.Shuffle(Rnd); #pragma omp parallel for schedule(dynamic) for (int64 j = 0; j < NIdsV.Len(); j++) { if ( Verbose && WalksDone%10000 == 0 ) { printf("\rWalking Progress: %.2lf%%",(double)WalksDone*100/(double)AllWalks);fflush(stdout); } TIntV WalkV; SimulateWalk(InNet, NIdsV[j], WalkLen, Rnd, WalkV); for (int64 k = 0; k < WalkV.Len(); k++) { WalksVV.PutXY(i*NIdsV.Len()+j, k, WalkV[k]); } WalksDone++; } } if (Verbose) { printf("\n"); fflush(stdout); } //Learning embeddings if (!OutputWalks) { LearnEmbeddings(WalksVV, Dimensions, WinSize, Iter, Verbose, EmbeddingsHV); } }
// Node selects N~geometric(1.0-FwdBurnProb)-1 out-links and burns them. Then same for in-links. // geometirc(p) has mean 1/(p), so for given FwdBurnProb, we burn 1/(1-FwdBurnProb) void TForestFire::BurnGeoFire() { const double OldFwdBurnProb = FwdBurnProb; const double OldBckBurnProb = BckBurnProb; const int& NInfect = InfectNIdV.Len(); const TNGraph& G = *Graph; TIntH BurnedNIdH; // burned nodes TIntV BurningNIdV = InfectNIdV; // currently burning nodes TIntV NewBurnedNIdV; // nodes newly burned in current step bool HasAliveInNbrs, HasAliveOutNbrs; // has unburned neighbors TIntV AliveNIdV; // NIds of alive neighbors int NBurned = NInfect, time; for (int i = 0; i < InfectNIdV.Len(); i++) { BurnedNIdH.AddDat(InfectNIdV[i]); } NBurnedTmV.Clr(false); NBurningTmV.Clr(false); NewBurnedTmV.Clr(false); for (time = 0;; time++) { NewBurnedNIdV.Clr(false); for (int node = 0; node < BurningNIdV.Len(); node++) { const int& BurningNId = BurningNIdV[node]; const TNGraph::TNodeI Node = G.GetNI(BurningNId); // find unburned links HasAliveOutNbrs = false; AliveNIdV.Clr(false); // unburned links for (int e = 0; e < Node.GetOutDeg(); e++) { const int OutNId = Node.GetOutNId(e); if (!BurnedNIdH.IsKey(OutNId)) { HasAliveOutNbrs = true; AliveNIdV.Add(OutNId); } } // number of links to burn (geometric coin). Can also burn 0 links const int BurnNFwdLinks = Rnd.GetGeoDev(1.0 - FwdBurnProb) - 1; if (HasAliveOutNbrs && BurnNFwdLinks > 0) { AliveNIdV.Shuffle(Rnd); for (int i = 0; i < TMath::Mn(BurnNFwdLinks, AliveNIdV.Len()); i++) { BurnedNIdH.AddDat(AliveNIdV[i]); NewBurnedNIdV.Add(AliveNIdV[i]); NBurned++; } } // backward links if (BckBurnProb > 0.0) { // find unburned links HasAliveInNbrs = false; AliveNIdV.Clr(false); for (int e = 0; e < Node.GetInDeg(); e++) { const int InNId = Node.GetInNId(e); if (!BurnedNIdH.IsKey(InNId)) { HasAliveInNbrs = true; AliveNIdV.Add(InNId); } } // number of links to burn (geometric coin). Can also burn 0 links const int BurnNBckLinks = Rnd.GetGeoDev(1.0 - BckBurnProb) - 1; if (HasAliveInNbrs && BurnNBckLinks > 0) { AliveNIdV.Shuffle(Rnd); for (int i = 0; i < TMath::Mn(BurnNBckLinks, AliveNIdV.Len()); i++) { BurnedNIdH.AddDat(AliveNIdV[i]); NewBurnedNIdV.Add(AliveNIdV[i]); NBurned++; } } } } NBurnedTmV.Add(NBurned); NBurningTmV.Add(BurningNIdV.Len()); NewBurnedTmV.Add(NewBurnedNIdV.Len()); // BurningNIdV.AddV(NewBurnedNIdV); // node is burning eternally BurningNIdV.Swap(NewBurnedNIdV); // node is burning just 1 time step if (BurningNIdV.Empty()) break; FwdBurnProb = FwdBurnProb * ProbDecay; BckBurnProb = BckBurnProb * ProbDecay; } BurnedNIdV.Gen(BurnedNIdH.Len(), 0); for (int i = 0; i < BurnedNIdH.Len(); i++) { BurnedNIdV.Add(BurnedNIdH.GetKey(i)); } FwdBurnProb = OldFwdBurnProb; BckBurnProb = OldBckBurnProb; }
/// Newton method: DEPRECATED int TAGMFast::MLENewton(const double& Thres, const int& MaxIter, const TStr PlotNm) { TExeTm ExeTm; int iter = 0, PrevIter = 0; TIntFltPrV IterLV; double PrevL = TFlt::Mn, CurL; TUNGraph::TNodeI UI; TIntV NIdxV; G->GetNIdV(NIdxV); int CID, UID, NewtonIter; double Fuc, PrevFuc, Grad, H; while(iter < MaxIter) { NIdxV.Shuffle(Rnd); for (int ui = 0; ui < F.Len(); ui++, iter++) { if (! PlotNm.Empty() && iter % G->GetNodes() == 0) { IterLV.Add(TIntFltPr(iter, Likelihood(false))); } UID = NIdxV[ui]; //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) TIntSet CIDSet; UI = G->GetNI(UID); if (UI.GetDeg() == 0) { //if the node is isolated, clear its membership and skip if (! F[UID].Empty()) { F[UID].Clr(); } continue; } for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } for (TIntFltH::TIter CI = F[UID].BegI(); CI < F[UID].EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { DelCom(UID, CI.GetKey()); } } if (CIDSet.Empty()) { continue; } for (TIntSet::TIter CI = CIDSet.BegI(); CI < CIDSet.EndI(); CI++) { CID = CI.GetKey(); //optimize for UID, CID //compute constants TFltV AlphaKV(UI.GetDeg()); for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } AlphaKV[e] = (1 - PNoCom) * exp(- DotProduct(UID, UI.GetNbrNId(e)) + GetCom(UI.GetNbrNId(e), CID) * GetCom(UID, CID)); IAssertR(AlphaKV[e] <= 1.0, TStr::Fmt("AlphaKV=%f, %f, %f", AlphaKV[e].Val, PNoCom.Val, GetCom(UI.GetNbrNId(e), CID))); } Fuc = GetCom(UID, CID); PrevFuc = Fuc; Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0; if (Grad <= 1e-3 && Grad >= -0.1) { continue; } NewtonIter = 0; while (NewtonIter++ < 10) { Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0; H = HessianForOneVar(AlphaKV, UID, CID, Fuc); if (Fuc == 0.0 && Grad <= 0.0) { Grad = 0.0; } if (fabs(Grad) < 1e-3) { break; } if (H == 0.0) { Fuc = 0.0; break; } double NewtonStep = - Grad / H; if (NewtonStep < -0.5) { NewtonStep = - 0.5; } Fuc += NewtonStep; if (Fuc < 0.0) { Fuc = 0.0; } } if (Fuc == 0.0) { DelCom(UID, CID); } else { AddCom(UID, CID, Fuc); } } } if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) { PrevIter = iter; CurL = Likelihood(); if (PrevL > TFlt::Mn && ! PlotNm.Empty()) { printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL); } fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } if (! PlotNm.Empty()) { printf("\nMLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr()); TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); } return iter; }