Пример #1
0
void GetBetweennessCentr(const PUNGraph& Graph, TIntFltH& NodeBtwH, TIntPrFltH& EdgeBtwH, const double& NodeFrac) {
  TIntV NIdV;  Graph->GetNIdV(NIdV);
  if (NodeFrac < 1.0) { // calculate beetweenness centrality for a subset of nodes
    NIdV.Shuffle(TInt::Rnd);
    for (int i = int((1.0-NodeFrac)*NIdV.Len()); i > 0; i--) { 
      NIdV.DelLast(); }
  }
  GetBetweennessCentr(Graph, NIdV, NodeBtwH, true, EdgeBtwH, true);
}
Пример #2
0
void TSkyGridEnt::GetEntClustV(const TSkyGridBs* SkyGridBs,
 const uint64& MnTm, const int& MnDocs, const int& MxDocs, const int& Clusts,
 TVec<TStrFltPrV>& EntNmWgtPrVV) const {
  EntNmWgtPrVV.Clr();
  // create bow
  PBowDocBs BowDocBs=TBowDocBs::New();
  // collect documents
  TIntV DocIdV; GetDocIdV(SkyGridBs, MnTm, 0, DocIdV);
  DocIdV.Reverse(); DocIdV.Shuffle(TRnd(1)); DocIdV.Trunc(MxDocs);
  if (DocIdV.Len()<MnDocs){return;}
  for (int DocN=0; DocN<DocIdV.Len(); DocN++){
    int DocId=DocIdV[DocN];
    PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId);
    // create vector of entity-weights
    TIntFltPrV WIdWgtPrV;
    for (int EntN=0; EntN<Doc->GetEnts(); EntN++){
      int EntId; int EntFq; Doc->GetEntNmFq(EntN, EntId, EntFq);
      TStr EntNm=SkyGridBs->GetEntNm(EntId);
      int EntWId=BowDocBs->AddWordStr(EntNm);
      WIdWgtPrV.Add(TIntFltPr(EntWId, EntFq));
    }
    // create bow-document
    int DId=BowDocBs->AddDoc(TInt::GetStr(DocId), TStrV(), WIdWgtPrV);
    TStr DocDescStr=Doc->GetTitleStr();
    BowDocBs->PutDocDescStr(DId, DocDescStr);
  }
  // k-means clustering
  PBowSim BowSim=TBowSim::New(bstCos); // similarity object
  TBowWordWgtType WordWgtType=bwwtNrmTFIDF; // define weighting
  PBowDocPart BowDocPart=TBowClust::GetKMeansPart(
   TNotify::StdNotify, // log output
   BowDocBs, // document data
   BowSim, // similarity function
   TRnd(1), // random generator
   Clusts, // number of clusters
   1, // trials per k-means
   1, // convergence epsilon for k-means
   1, // min. documents per cluster
   WordWgtType, // word weighting
   0, // cut-word-weights percentage
   0); // minimal word frequency
  EntNmWgtPrVV.Clr();
  for (int ClustN=0; ClustN<BowDocPart->GetClusts(); ClustN++){
    PBowDocPartClust Clust=BowDocPart->GetClust(ClustN);
    TStrFltPrV WordStrWgtPrV;
    Clust->GetTopWordStrWgtPrV(BowDocBs, 25, 0.5, WordStrWgtPrV);
    EntNmWgtPrVV.Add(WordStrWgtPrV);
  }
  //BowDocPart->SaveTxt("Clusts.Txt", BowDocBs, true, 25, 0.5, false);
}
Пример #3
0
void node2vec(PWNet& InNet, double& ParamP, double& ParamQ, int& Dimensions,
 int& WalkLen, int& NumWalks, int& WinSize, int& Iter, bool& Verbose,
 bool& OutputWalks, TVVec<TInt, int64>& WalksVV, TIntFltVH& EmbeddingsHV) {
  //Preprocess transition probabilities
  PreprocessTransitionProbs(InNet, ParamP, ParamQ, Verbose);
  TIntV NIdsV;
  for (TWNet::TNodeI NI = InNet->BegNI(); NI < InNet->EndNI(); NI++) {
    NIdsV.Add(NI.GetId());
  }
  //Generate random walks
  int64 AllWalks = (int64)NumWalks * NIdsV.Len();
  WalksVV = TVVec<TInt, int64>(AllWalks,WalkLen);
  TRnd Rnd(time(NULL));
  int64 WalksDone = 0;
  for (int64 i = 0; i < NumWalks; i++) {
    NIdsV.Shuffle(Rnd);
#pragma omp parallel for schedule(dynamic)
    for (int64 j = 0; j < NIdsV.Len(); j++) {
      if ( Verbose && WalksDone%10000 == 0 ) {
        printf("\rWalking Progress: %.2lf%%",(double)WalksDone*100/(double)AllWalks);fflush(stdout);
      }
      TIntV WalkV;
      SimulateWalk(InNet, NIdsV[j], WalkLen, Rnd, WalkV);
      for (int64 k = 0; k < WalkV.Len(); k++) { 
        WalksVV.PutXY(i*NIdsV.Len()+j, k, WalkV[k]);
      }
      WalksDone++;
    }
  }
  if (Verbose) {
    printf("\n");
    fflush(stdout);
  }
  //Learning embeddings
  if (!OutputWalks) {
    LearnEmbeddings(WalksVV, Dimensions, WinSize, Iter, Verbose, EmbeddingsHV);
  }
}
Пример #4
0
// Node selects N~geometric(1.0-FwdBurnProb)-1 out-links and burns them. Then same for in-links.
// geometirc(p) has mean 1/(p), so for given FwdBurnProb, we burn 1/(1-FwdBurnProb)
void TForestFire::BurnGeoFire() {
	const double OldFwdBurnProb = FwdBurnProb;
	const double OldBckBurnProb = BckBurnProb;
	const int& NInfect = InfectNIdV.Len();
	const TNGraph& G = *Graph;
	TIntH BurnedNIdH;               // burned nodes
	TIntV BurningNIdV = InfectNIdV; // currently burning nodes
	TIntV NewBurnedNIdV;            // nodes newly burned in current step
	bool HasAliveInNbrs, HasAliveOutNbrs; // has unburned neighbors
	TIntV AliveNIdV;                // NIds of alive neighbors
	int NBurned = NInfect, time;
	for (int i = 0; i < InfectNIdV.Len(); i++) {
		BurnedNIdH.AddDat(InfectNIdV[i]);
	}
	NBurnedTmV.Clr(false);  NBurningTmV.Clr(false);  NewBurnedTmV.Clr(false);
	for (time = 0;; time++) {
		NewBurnedNIdV.Clr(false);
		for (int node = 0; node < BurningNIdV.Len(); node++) {
			const int& BurningNId = BurningNIdV[node];
			const TNGraph::TNodeI Node = G.GetNI(BurningNId);
			// find unburned links
			HasAliveOutNbrs = false;
			AliveNIdV.Clr(false); // unburned links
			for (int e = 0; e < Node.GetOutDeg(); e++) {
				const int OutNId = Node.GetOutNId(e);
				if (!BurnedNIdH.IsKey(OutNId)) {
					HasAliveOutNbrs = true;  AliveNIdV.Add(OutNId);
				}
			}
			// number of links to burn (geometric coin). Can also burn 0 links
			const int BurnNFwdLinks = Rnd.GetGeoDev(1.0 - FwdBurnProb) - 1;
			if (HasAliveOutNbrs && BurnNFwdLinks > 0) {
				AliveNIdV.Shuffle(Rnd);
				for (int i = 0; i < TMath::Mn(BurnNFwdLinks, AliveNIdV.Len()); i++) {
					BurnedNIdH.AddDat(AliveNIdV[i]);
					NewBurnedNIdV.Add(AliveNIdV[i]);  NBurned++;
				}
			}
			// backward links
			if (BckBurnProb > 0.0) {
				// find unburned links
				HasAliveInNbrs = false;
				AliveNIdV.Clr(false);
				for (int e = 0; e < Node.GetInDeg(); e++) {
					const int InNId = Node.GetInNId(e);
					if (!BurnedNIdH.IsKey(InNId)) {
						HasAliveInNbrs = true;  AliveNIdV.Add(InNId);
					}
				}
				// number of links to burn (geometric coin). Can also burn 0 links
				const int BurnNBckLinks = Rnd.GetGeoDev(1.0 - BckBurnProb) - 1;
				if (HasAliveInNbrs && BurnNBckLinks > 0) {
					AliveNIdV.Shuffle(Rnd);
					for (int i = 0; i < TMath::Mn(BurnNBckLinks, AliveNIdV.Len()); i++) {
						BurnedNIdH.AddDat(AliveNIdV[i]);
						NewBurnedNIdV.Add(AliveNIdV[i]);  NBurned++;
					}
				}
			}
		}
		NBurnedTmV.Add(NBurned);  NBurningTmV.Add(BurningNIdV.Len());  NewBurnedTmV.Add(NewBurnedNIdV.Len());
		// BurningNIdV.AddV(NewBurnedNIdV);   // node is burning eternally
		BurningNIdV.Swap(NewBurnedNIdV);   // node is burning just 1 time step
		if (BurningNIdV.Empty()) break;
		FwdBurnProb = FwdBurnProb * ProbDecay;
		BckBurnProb = BckBurnProb * ProbDecay;
	}
	BurnedNIdV.Gen(BurnedNIdH.Len(), 0);
	for (int i = 0; i < BurnedNIdH.Len(); i++) {
		BurnedNIdV.Add(BurnedNIdH.GetKey(i));
	}
	FwdBurnProb = OldFwdBurnProb;
	BckBurnProb = OldBckBurnProb;
}
Пример #5
0
/// Newton method: DEPRECATED
int TAGMFast::MLENewton(const double& Thres, const int& MaxIter, const TStr PlotNm) {
  TExeTm ExeTm;
  int iter = 0, PrevIter = 0;
  TIntFltPrV IterLV;
  double PrevL = TFlt::Mn, CurL;
  TUNGraph::TNodeI UI;
  TIntV NIdxV;
  G->GetNIdV(NIdxV);
  int CID, UID, NewtonIter;
  double Fuc, PrevFuc, Grad, H;
  while(iter < MaxIter) {
    NIdxV.Shuffle(Rnd);
    for (int ui = 0; ui < F.Len(); ui++, iter++) {
      if (! PlotNm.Empty() && iter % G->GetNodes() == 0) {
        IterLV.Add(TIntFltPr(iter, Likelihood(false)));
      }
      UID = NIdxV[ui];
      //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
      TIntSet CIDSet;
      UI = G->GetNI(UID);
      if (UI.GetDeg() == 0) { //if the node is isolated, clear its membership and skip
        if (! F[UID].Empty()) { F[UID].Clr(); }
        continue;
      }
      for (int e = 0; e < UI.GetDeg(); e++) {
        if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; }
        TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
        for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
          CIDSet.AddKey(CI.GetKey());
        }
      }
      for (TIntFltH::TIter CI = F[UID].BegI(); CI < F[UID].EndI(); CI++) { //remove the community membership which U does not share with its neighbors
        if (! CIDSet.IsKey(CI.GetKey())) {
          DelCom(UID, CI.GetKey());
        }
      }
      if (CIDSet.Empty()) { continue; }
      for (TIntSet::TIter CI = CIDSet.BegI(); CI < CIDSet.EndI(); CI++) {
        CID = CI.GetKey();
        //optimize for UID, CID
        //compute constants
        TFltV AlphaKV(UI.GetDeg());
        for (int e = 0; e < UI.GetDeg(); e++) {
          if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; }
          AlphaKV[e] = (1 - PNoCom) * exp(- DotProduct(UID, UI.GetNbrNId(e)) + GetCom(UI.GetNbrNId(e), CID) * GetCom(UID, CID));
          IAssertR(AlphaKV[e] <= 1.0, TStr::Fmt("AlphaKV=%f, %f, %f", AlphaKV[e].Val, PNoCom.Val, GetCom(UI.GetNbrNId(e), CID)));
        }
        Fuc = GetCom(UID, CID);
        PrevFuc = Fuc;
        Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0;
        if (Grad <= 1e-3 && Grad >= -0.1) { continue; }
        NewtonIter = 0;
        while (NewtonIter++ < 10) {
          Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0;
          H = HessianForOneVar(AlphaKV, UID, CID, Fuc);
          if (Fuc == 0.0 && Grad <= 0.0) { Grad = 0.0; }
          if (fabs(Grad) < 1e-3) { break; }
          if (H == 0.0) { Fuc = 0.0; break; }
          double NewtonStep = - Grad / H;
          if (NewtonStep < -0.5) { NewtonStep = - 0.5; }
          Fuc += NewtonStep;
          if (Fuc < 0.0) { Fuc = 0.0; }
        }
        if (Fuc == 0.0) {
          DelCom(UID, CID);
        }
        else {
          AddCom(UID, CID, Fuc);
        }
      }
    }
    if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
      PrevIter = iter;
      CurL = Likelihood();
      if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
        printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL,  CurL - PrevL);
      }
      fflush(stdout);
      if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
      else { PrevL = CurL; }
    }
    
  }
  if (! PlotNm.Empty()) {
    printf("\nMLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
  }
  return iter;
}