Ejemplo n.º 1
0
/// estimate number of communities using cross validation
int TAGMFast::FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const double StepAlpha, const double StepBeta) {
    double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs);
    TIntV ComsV;
    ComsV.Add(MinComs);
    while (ComsV.Len() < DivComs) {
      int NewComs = int(ComsV.Last() * ComsGap);
      if (NewComs == ComsV.Last().Val) { NewComs++; }
      ComsV.Add(NewComs);
    }
    if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); }
    return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm + ".CV.likelihood", StepAlpha, StepBeta);
}
Ejemplo n.º 2
0
/// rewire bipartite community affiliation graphs
void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) {
    THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len());
    TIntV NDegV;
    TIntV CDegV;
    for (int i = 0; i < CmtyVH.Len(); i++) {
        int CID = CmtyVH.GetKey(i);
        for (int j = 0; j < CmtyVH[i].Len(); j++) {
            int NID = CmtyVH[i][j];
            NDegV.Add(NID);
            CDegV.Add(CID);
        }
    }
    TIntPrSet CNIDSet(CDegV.Len());
    int c=0;
    while (c++ < 15 && CDegV.Len() > 1) {
        for (int i = 0; i < CDegV.Len(); i++) {
            int u = Rnd.GetUniDevInt(CDegV.Len());
            int v = Rnd.GetUniDevInt(NDegV.Len());
            if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) {
                continue;
            }
            CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
            if (u == CDegV.Len() - 1) {
                CDegV.DelLast();
            }  else {
                CDegV[u] = CDegV.Last();
                CDegV.DelLast();
            }
            if ( v == NDegV.Len() - 1) {
                NDegV.DelLast();
            }  else {
                NDegV[v] = NDegV.Last();
                NDegV.DelLast();
            }
        }
    }
    for (int i = 0; i < CNIDSet.Len(); i++) {
        TIntPr CNIDPr = CNIDSet[i];
        IAssert(CmtyVH.IsKey(CNIDPr.Val1));
        NewCmtyVH.AddDat(CNIDPr.Val1);
        NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
    }
    CmtyVH = NewCmtyVH;
}
Ejemplo n.º 3
0
//Precompute unigram table using alias sampling method
void InitUnigramTable(TIntV& Vocab, TIntV& KTable, TFltV& UTable) {
  double TrainWordsPow = 0;
  double Pwr = 0.75;
  TFltV ProbV(Vocab.Len());
  for (int64 i = 0; i < Vocab.Len(); i++) {
    ProbV[i]=TMath::Power(Vocab[i],Pwr);
    TrainWordsPow += ProbV[i];
    KTable[i]=0;
    UTable[i]=0;
  }
  for (int64 i = 0; i < ProbV.Len(); i++) {
    ProbV[i] /= TrainWordsPow;
  }
  TIntV UnderV;
  TIntV OverV;
  for (int64 i = 0; i < ProbV.Len(); i++) {
    UTable[i] = ProbV[i] * ProbV.Len();
    if ( UTable[i] < 1 ) {
      UnderV.Add(i);
    } else {
      OverV.Add(i);
    }
  }
  while(UnderV.Len() > 0 && OverV.Len() > 0) {
    int64 Small = UnderV.Last();
    int64 Large = OverV.Last();
    UnderV.DelLast();
    OverV.DelLast();
    KTable[Small] = Large;
    UTable[Large] = UTable[Large] + UTable[Small] - 1;
    if (UTable[Large] < 1) {
      UnderV.Add(Large);
    } else {
      OverV.Add(Large);
    }
  }
}
Ejemplo n.º 4
0
void TBlobBs::GenBlockLenV(TIntV& BlockLenV){
  BlockLenV.Clr();
  for (int P2Exp=0; P2Exp<TB4Def::MxP2Exp; P2Exp++){
    BlockLenV.Add(TInt(TB4Def::GetP2(P2Exp)));}
  EAssert(int(BlockLenV.Last())<2000000000);

  {for (int Len=10; Len<100; Len+=10){BlockLenV.Add(Len);}}
  {for (int Len=100; Len<10000; Len+=100){BlockLenV.Add(Len);}}
  {for (int Len=10000; Len<100000; Len+=1000){BlockLenV.Add(Len);}}
  {for (int Len=100000; Len<1000000; Len+=25000){BlockLenV.Add(Len);}}
  {for (int Len=1000000; Len<10000000; Len+=1000000){BlockLenV.Add(Len);}}
  {for (int Len=10000000; Len<100000000; Len+=10000000){BlockLenV.Add(Len);}}

  BlockLenV.Sort();
}
Ejemplo n.º 5
0
int TAGMFast::FindComsByCV(TIntV& ComsV, const double HOFrac, const int NumThreads, const TStr PlotLFNm, const double StepAlpha, const double StepBeta) {
  if (ComsV.Len() == 0) {
    int MaxComs = G->GetNodes() / 5;
    ComsV.Add(2);
    while(ComsV.Last() < MaxComs) { ComsV.Add(ComsV.Last() * 2); }
  }
  TIntPrV EdgeV(G->GetEdges(), 0);
  for (TUNGraph::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
    EdgeV.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
  }
  EdgeV.Shuffle(Rnd);
  int MaxIterCV = 3;

  TVec<TVec<TIntSet> > HoldOutSets(MaxIterCV);
  if (EdgeV.Len() > 50) { //if edges are many enough, use CV
    printf("generating hold out set\n");
    TIntV NIdV1, NIdV2;
    G->GetNIdV(NIdV1);
    G->GetNIdV(NIdV2);
    for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
      // generate holdout sets
      HoldOutSets[IterCV].Gen(G->GetNodes());
      const int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
      int HOCnt = 0;
      int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges());
      printf("holding out %d edges...\n", HOEdges);
      for (int he = 0; he < (int) HOEdges; he++) {
        HoldOutSets[IterCV][EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
        HoldOutSets[IterCV][EdgeV[he].Val2].AddKey(EdgeV[he].Val1);
        HOCnt++;
      }
      printf("%d Edges hold out\n", HOCnt);
      while(HOCnt++ < HOTotal) {
        int SrcNID = Rnd.GetUniDevInt(G->GetNodes());
        int DstNID = Rnd.GetUniDevInt(G->GetNodes());
        HoldOutSets[IterCV][SrcNID].AddKey(DstNID);
        HoldOutSets[IterCV][DstNID].AddKey(SrcNID);
      }
    }
    printf("hold out set generated\n");
  }

  TFltV HOLV(ComsV.Len());
  TIntFltPrV ComsLV;
  for (int c = 0; c < ComsV.Len(); c++) {
    const int Coms = ComsV[c];
    printf("Try number of Coms:%d\n", Coms);
    NeighborComInit(Coms);
    printf("Initialized\n");

    if (EdgeV.Len() > 50) { //if edges are many enough, use CV
      for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
        HOVIDSV = HoldOutSets[IterCV];

        if (NumThreads == 1) {
          printf("MLE without parallelization begins\n");
          MLEGradAscent(0.05, 10 * G->GetNodes(), "", StepAlpha, StepBeta);
        } else {
          printf("MLE with parallelization begins\n");
          MLEGradAscentParallel(0.05, 100, NumThreads, "", StepAlpha, StepBeta);
        }
        double HOL = LikelihoodHoldOut();
        HOL = HOL < 0? HOL: TFlt::Mn;
        HOLV[c] += HOL;
      }
    }
    else {
      HOVIDSV.Gen(G->GetNodes());
      MLEGradAscent(0.0001, 100 * G->GetNodes(), "");
      double BIC = 2 * Likelihood() - (double) G->GetNodes() * Coms * 2.0 * log ( (double) G->GetNodes());
      HOLV[c] = BIC;
    }
  }
  int EstComs = 2;
  double MaxL = TFlt::Mn;
  printf("\n");
  for (int c = 0; c < ComsV.Len(); c++) {
    ComsLV.Add(TIntFltPr(ComsV[c].Val, HOLV[c].Val));
    printf("%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
    if (MaxL < HOLV[c]) {
      MaxL = HOLV[c];
      EstComs = ComsV[c];
    }
  }
  printf("\n");
  RandomInit(EstComs);
  HOVIDSV.Gen(G->GetNodes());
  if (! PlotLFNm.Empty()) {
    TGnuPlot::PlotValV(ComsLV, PlotLFNm, "hold-out likelihood", "communities", "likelihood");
  }
  return EstComs;
}