Пример #1
0
double TAGMFast::LikelihoodForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val) {
  TUNGraph::TNodeI UI = G->GetNI(UID);
  double L = 0.0, PNoEdge;
  int VID = 0;
  for (int e = 0; e < UI.GetDeg(); e++) {
    VID = UI.GetNbrNId(e);
    if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; }
    if (! F[VID].IsKey(CID)) { 
      PNoEdge = AlphaKV[e];
    } else {
      PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val);
    }
    IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0);
    //PNoEdge = PNoEdge >= 1.0 - PNoCom? 1 - PNoCom: PNoEdge;
    L += log(1.0 - PNoEdge) + NegWgt * GetCom(VID, CID) * Val;

    //  += ((PNoEdge * F[VID].GetDat(CID)) / (1.0 - PNoEdge) + NegWgt * F[VID].GetDat(CID));
  }
  L -= NegWgt * (SumFV[CID] - GetCom(UID, CID)) * Val;
  //add regularization
  if (RegCoef > 0.0) { //L1
    L -= RegCoef * Val;
  }
  if (RegCoef < 0.0) { //L2
    L += RegCoef * Val * Val; 
  }

  return L;
}
Пример #2
0
double TAGMFast::LikelihoodForRow(const int UID, const TIntFltH& FU) {
  double L = 0.0;
  TFltV HOSumFV; //adjust for Fv of v hold out
  if (HOVIDSV[UID].Len() > 0) {
    HOSumFV.Gen(SumFV.Len());
    
    for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
      for (int c = 0; c < SumFV.Len(); c++) {
        HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
      }
    }
  }

  TUNGraph::TNodeI NI = G->GetNI(UID);
  if (DoParallel && NI.GetDeg() > 10) {
#pragma omp parallel for schedule(static, 1)
    for (int e = 0; e < NI.GetDeg(); e++) {
      int v = NI.GetNbrNId(e);
      if (v == UID) { continue; }
      if (HOVIDSV[UID].IsKey(v)) { continue; }
      double LU = log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
#pragma omp atomic
      L += LU;
    }
    for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      double LU = NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
      L -= LU;
    }
  } else {
    for (int e = 0; e < NI.GetDeg(); e++) {
      int v = NI.GetNbrNId(e);
      if (v == UID) { continue; }
      if (HOVIDSV[UID].IsKey(v)) { continue; }
      L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
    }
    for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
    }
  }
  //add regularization
  if (RegCoef > 0.0) { //L1
    L -= RegCoef * Sum(FU);
  }
  if (RegCoef < 0.0) { //L2
    L += RegCoef * Norm2(FU);
  }

  return L;
}
Пример #3
0
// Compute the change in likelihood (Delta) if node UID switches from CurCID to NewCID.
double TAGMFit::SeekSwitch(const int& UID, const int& CurCID, const int& NewCID) {
  IAssert(! CIDNSetV[NewCID].IsKey(UID));
  IAssert(CIDNSetV[CurCID].IsKey(UID));
  double Delta = SeekJoin(UID, NewCID) + SeekLeave(UID, CurCID);
  //correct only for intersection between new com and current com
  TUNGraph::TNodeI NI = G->GetNI(UID);
  for (int e = 0; e < NI.GetDeg(); e++) {
    const int VID = NI.GetNbrNId(e);
    if (! NIDComVH.GetDat(VID).IsKey(CurCID) || ! NIDComVH.GetDat(VID).IsKey(NewCID)) {continue;}
    TIntPr SrcDstNIDPr(TMath::Mn(UID,VID), TMath::Mx(UID,VID));
    TIntSet& JointCom = EdgeComVH.GetDat(SrcDstNIDPr);
    double CurPuv, NewPuvAfterJoin, NewPuvAfterLeave, NewPuvAfterSwitch, LambdaSum = SelectLambdaSum(JointCom);
    CurPuv = 1 - exp(- LambdaSum);
    NewPuvAfterLeave = 1 - exp(- LambdaSum + LambdaV[CurCID]);
    NewPuvAfterJoin = 1 - exp(- LambdaSum - LambdaV[NewCID]);
    NewPuvAfterSwitch = 1 - exp(- LambdaSum - LambdaV[NewCID] + LambdaV[CurCID]);
    if (JointCom.Len() == 1 || NewPuvAfterLeave == 0.0) {
      NewPuvAfterLeave = PNoCom;
    }
    Delta += (log(NewPuvAfterSwitch) + log(CurPuv) - log(NewPuvAfterLeave) - log(NewPuvAfterJoin));
    if (_isnan(Delta)) {
      printf("NS:%f C:%f NL:%f NJ:%f PNoCom:%f", NewPuvAfterSwitch, CurPuv, NewPuvAfterLeave, NewPuvAfterJoin, PNoCom.Val);
    }
    IAssert(!_isnan(Delta));
  }
  return Delta;
}
Пример #4
0
int Intersect(TUNGraph::TNodeI Node, int *NNodes, int NNodes_br){
  int br = 0;
  int neig;
  for (int i = 0; i<Node.GetDeg(); i++)
  {
    neig = Node.GetNbrNId(i);
    for (int j = 0; j<NNodes_br; j++)
    {
      if (neig == NNodes[j])
      {
        br++;
        j = NNodes_br;
      }
    }
  }

  neig = Node.GetId();
  for (int j = 0; j<NNodes_br; j++)
  {
    if (neig == NNodes[j])
    {
      br++;
      j = NNodes_br;
    }
  }

  return br;
}
Пример #5
0
// Compute the change in likelihood (Delta) if node UID leaves community CID.
double TAGMFit::SeekLeave(const int& UID, const int& CID) {
  IAssert(CIDNSetV[CID].IsKey(UID));
  IAssert(G->IsNode(UID));
  double Delta = 0.0;
  TUNGraph::TNodeI NI = G->GetNI(UID);
  int NbhsInC = 0;
  for (int e = 0; e < NI.GetDeg(); e++) {
    const int VID = NI.GetNbrNId(e);
    if (! NIDComVH.GetDat(VID).IsKey(CID)) { continue; }
    TIntPr SrcDstNIDPr(TMath::Mn(UID,VID), TMath::Mx(UID,VID));
    TIntSet& JointCom = EdgeComVH.GetDat(SrcDstNIDPr);
    double CurPuv, NewPuv, LambdaSum = SelectLambdaSum(JointCom);
    CurPuv = 1 - exp(- LambdaSum);
    NewPuv = 1 - exp(- LambdaSum + LambdaV[CID]);
    IAssert(JointCom.Len() > 0);
    if (JointCom.Len() == 1) {
      NewPuv = PNoCom;
    }
    Delta += (log(NewPuv) - log(CurPuv));
    IAssert(!_isnan(Delta));
    NbhsInC++;
  }
  Delta += LambdaV[CID] * (CIDNSetV[CID].Len() - 1 - NbhsInC);
  return Delta;
}
Пример #6
0
void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) {
    TUNGraph::TNodeI NI = Graph->GetNI(NID);
    NBCmtyS.Gen(NI.GetDeg());
    NBCmtyS.AddKey(NID);
    for (int e = 0; e < NI.GetDeg(); e++) {
        NBCmtyS.AddKey(NI.GetNbrNId(e));
    }
}
Пример #7
0
double GetGroupDegreeCentr(const PUNGraph& Graph, const TIntH& GroupNodes) {
  int deg;
  TIntH NN;
  TIntH GroupNodes1;

  for (THashKeyDatI<TInt, TInt> NI = GroupNodes.BegI(); NI < GroupNodes.EndI(); NI++)
    GroupNodes1.AddDat(NI.GetDat(), NI.GetDat());

  for (THashKeyDatI<TInt, TInt> NI = GroupNodes1.BegI(); NI < GroupNodes1.EndI(); NI++){
    TUNGraph::TNodeI node = Graph->GetNI(NI.GetKey());
    deg = node.GetDeg();
    for (int j = 0; j < deg; j++){
      if (GroupNodes1.IsKey(node.GetNbrNId(j)) == 0 && NN.IsKey(node.GetNbrNId(j)) == 0)
        NN.AddDat(node.GetNbrNId(j), NI.GetKey());
    }
  }

  return (double)NN.Len();
}
Пример #8
0
double TAGMFast::HessianForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val) {
  TUNGraph::TNodeI UI = G->GetNI(UID);
  double H = 0.0, PNoEdge;
  int VID = 0;
  for (int e = 0; e < UI.GetDeg(); e++) {
    VID = UI.GetNbrNId(e);
    if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; }
    if (! F[VID].IsKey(CID)) { continue; }
    PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val);
    IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0);
    //PNoEdge = PNoEdge == 1.0? 1 - PNoCom: PNoEdge;
    H += (- PNoEdge * F[VID].GetDat(CID) * F[VID].GetDat(CID)) / (1.0 - PNoEdge) / (1.0 - PNoEdge);
  }
  //add regularization
  if (RegCoef < 0.0) { //L2
    H += 2 * RegCoef; 
  }
  IAssert (H <= 0.0);
  return H;
}
Пример #9
0
int Intersect(TUNGraph::TNodeI Node, TIntH NNodes){
  int br = 0;
  for (int i = 0; i<Node.GetDeg(); i++)
  {
    if (NNodes.IsKey(Node.GetNbrNId(i)))
      br++;
  }
  if (NNodes.IsKey(Node.GetId()))
    br++;

  return br;
}
Пример #10
0
// After MCMC, NID joins community CID.
void TAGMFit::JoinCom(const int& NID, const int& JoinCID) {
  TUNGraph::TNodeI NI = G->GetNI(NID);
  for (int e = 0; e < NI.GetDeg(); e++) {
    int VID = NI.GetNbrNId(e);
    if (NIDComVH.GetDat(VID).IsKey(JoinCID)) {
      TIntPr SrcDstNIDPr = TIntPr(TMath::Mn(NID,VID), TMath::Mx(NID,VID));
      EdgeComVH.GetDat(SrcDstNIDPr).AddKey(JoinCID);
      ComEdgesV[JoinCID]++;
    }
  }
  CIDNSetV[JoinCID].AddKey(NID);
  NIDComVH.GetDat(NID).AddKey(JoinCID);
  NIDCIDPrS.AddKey(TIntPr(NID, JoinCID));
}
Пример #11
0
void TAGMFit::RandomInit(const int& MaxK) {
  CIDNSetV.Clr();
  for (int c = 0; c < MaxK; c++) {
    CIDNSetV.Add();
    int NC = Rnd.GetUniDevInt(G -> GetNodes());
    TUNGraph::TNodeI NI = G -> GetRndNI();
    CIDNSetV.Last().AddKey(NI.GetId());
    for (int v = 0; v < NC; v++) {
      NI = G->GetNI(NI.GetNbrNId(Rnd.GetUniDevInt(NI.GetDeg())));
      CIDNSetV.Last().AddKey(NI.GetId());
    }
  }
  InitNodeData();
  SetDefaultPNoCom();
}
Пример #12
0
double TAGMFast::GradientForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val) {
  TUNGraph::TNodeI UI = G->GetNI(UID);
  double Grad = 0.0, PNoEdge;
  int VID = 0;
  for (int e = 0; e < UI.GetDeg(); e++) {
    VID = UI.GetNbrNId(e);
    if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; }
    if (! F[VID].IsKey(CID)) { continue; }
    PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val);
    IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0);
    //PNoEdge = PNoEdge >= 1.0 - PNoCom? 1 - PNoCom: PNoEdge;
    Grad += ((PNoEdge * F[VID].GetDat(CID)) / (1.0 - PNoEdge) + NegWgt * F[VID].GetDat(CID));
  }
  Grad -= NegWgt * (SumFV[CID] - GetCom(UID, CID));
  //add regularization
  if (RegCoef > 0.0) { //L1
    Grad -= RegCoef; 
  }
  if (RegCoef < 0.0) { //L2
    Grad += 2 * RegCoef * Val; 
  }

  return Grad;
}
Пример #13
0
int TCliqueOverlap::MaxNbrsInCANDNodeId(const THashSet<TInt>& SUBG, const THashSet<TInt>& CAND) const{
	int id = -1;
	int maxIntersection = -1;
	//
	for (THashSetKeyI<TInt> it=SUBG.BegI(); it<SUBG.EndI(); it++) {
		int nId = it.GetKey();
		TUNGraph::TNodeI nIt = m_G->GetNI(nId);
		int deg = nIt.GetDeg();
		//
		int curIntersection = 0;
		for (int i=0; i<deg; i++) {
			int nbrId = nIt.GetNbrNId(i);
			if (CAND.IsKey(nbrId)) curIntersection++;
		}
		//
		if (maxIntersection < curIntersection) { maxIntersection=curIntersection; id=nId; }
	}
	return id;
}
Пример #14
0
int Intersect1(TUNGraph::TNodeI Node, TStr NNodes){
  int br = 0;
  for (int i = 0; i<Node.GetDeg(); i++)
  {
    TInt digi = Node.GetNbrNId(i);
    TStr buf = "";
    buf = digi.GetStr();

    if (NNodes.SearchStr(buf.CStr()) != -1)
      br++;
  }

  TInt digi = Node.GetId();
  TStr buf = digi.GetStr();

  if (NNodes.SearchStr(buf.CStr()) != -1)
    br++;

  return br;
}
Пример #15
0
// For each (u, v) in edges, precompute C_uv (the set of communities u and v share).
void TAGMFit::GetEdgeJointCom() {
  ComEdgesV.Gen(CIDNSetV.Len());
  EdgeComVH.Gen(G->GetEdges());
  for (TUNGraph::TNodeI SrcNI = G->BegNI(); SrcNI < G->EndNI(); SrcNI++) {
    int SrcNID = SrcNI.GetId();
    for (int v = 0; v < SrcNI.GetDeg(); v++) {
      int DstNID = SrcNI.GetNbrNId(v);
      if (SrcNID >= DstNID) { continue; }
      TIntSet JointCom;
      IAssert(NIDComVH.IsKey(SrcNID));
      IAssert(NIDComVH.IsKey(DstNID));
      TAGMUtil::GetIntersection(NIDComVH.GetDat(SrcNID), NIDComVH.GetDat(DstNID), JointCom);
      EdgeComVH.AddDat(TIntPr(SrcNID,DstNID),JointCom);
      for (int k = 0; k < JointCom.Len(); k++) {
        ComEdgesV[JointCom[k]]++;
      }
    }
  }
  IAssert(EdgeComVH.Len() == G->GetEdges());
}
Пример #16
0
int Intersect(TUNGraph::TNodeI Node, TStr NNodes){
  int br = 0;

  TInt digi = -1;
  TStr buf = "";

  for (int i = 0; i<Node.GetDeg(); i++)
  {
    digi = Node.GetNbrNId(i);
    TStr buf = digi.GetStr();

    if (NNodes.IsStrIn(buf.CStr()))
      br++;
  }

  digi = Node.GetId();
  buf = digi.GetStr();

  if (NNodes.IsStrIn(buf.CStr()))
    br++;

  return br;
}
Пример #17
0
/// Newton method: DEPRECATED
int TAGMFast::MLENewton(const double& Thres, const int& MaxIter, const TStr PlotNm) {
  TExeTm ExeTm;
  int iter = 0, PrevIter = 0;
  TIntFltPrV IterLV;
  double PrevL = TFlt::Mn, CurL;
  TUNGraph::TNodeI UI;
  TIntV NIdxV;
  G->GetNIdV(NIdxV);
  int CID, UID, NewtonIter;
  double Fuc, PrevFuc, Grad, H;
  while(iter < MaxIter) {
    NIdxV.Shuffle(Rnd);
    for (int ui = 0; ui < F.Len(); ui++, iter++) {
      if (! PlotNm.Empty() && iter % G->GetNodes() == 0) {
        IterLV.Add(TIntFltPr(iter, Likelihood(false)));
      }
      UID = NIdxV[ui];
      //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
      TIntSet CIDSet;
      UI = G->GetNI(UID);
      if (UI.GetDeg() == 0) { //if the node is isolated, clear its membership and skip
        if (! F[UID].Empty()) { F[UID].Clr(); }
        continue;
      }
      for (int e = 0; e < UI.GetDeg(); e++) {
        if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; }
        TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
        for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
          CIDSet.AddKey(CI.GetKey());
        }
      }
      for (TIntFltH::TIter CI = F[UID].BegI(); CI < F[UID].EndI(); CI++) { //remove the community membership which U does not share with its neighbors
        if (! CIDSet.IsKey(CI.GetKey())) {
          DelCom(UID, CI.GetKey());
        }
      }
      if (CIDSet.Empty()) { continue; }
      for (TIntSet::TIter CI = CIDSet.BegI(); CI < CIDSet.EndI(); CI++) {
        CID = CI.GetKey();
        //optimize for UID, CID
        //compute constants
        TFltV AlphaKV(UI.GetDeg());
        for (int e = 0; e < UI.GetDeg(); e++) {
          if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; }
          AlphaKV[e] = (1 - PNoCom) * exp(- DotProduct(UID, UI.GetNbrNId(e)) + GetCom(UI.GetNbrNId(e), CID) * GetCom(UID, CID));
          IAssertR(AlphaKV[e] <= 1.0, TStr::Fmt("AlphaKV=%f, %f, %f", AlphaKV[e].Val, PNoCom.Val, GetCom(UI.GetNbrNId(e), CID)));
        }
        Fuc = GetCom(UID, CID);
        PrevFuc = Fuc;
        Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0;
        if (Grad <= 1e-3 && Grad >= -0.1) { continue; }
        NewtonIter = 0;
        while (NewtonIter++ < 10) {
          Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0;
          H = HessianForOneVar(AlphaKV, UID, CID, Fuc);
          if (Fuc == 0.0 && Grad <= 0.0) { Grad = 0.0; }
          if (fabs(Grad) < 1e-3) { break; }
          if (H == 0.0) { Fuc = 0.0; break; }
          double NewtonStep = - Grad / H;
          if (NewtonStep < -0.5) { NewtonStep = - 0.5; }
          Fuc += NewtonStep;
          if (Fuc < 0.0) { Fuc = 0.0; }
        }
        if (Fuc == 0.0) {
          DelCom(UID, CID);
        }
        else {
          AddCom(UID, CID, Fuc);
        }
      }
    }
    if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
      PrevIter = iter;
      CurL = Likelihood();
      if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
        printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL,  CurL - PrevL);
      }
      fflush(stdout);
      if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
      else { PrevL = CurL; }
    }
    
  }
  if (! PlotNm.Empty()) {
    printf("\nMLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
  }
  return iter;
}
Пример #18
0
void TCliqueOverlap::GetNbrs(int NId, THashSet<TInt>& Nbrs) const{
	TUNGraph::TNodeI node = m_G->GetNI(NId);
	int deg = node.GetDeg();
	for (int i=0; i<deg; i++) Nbrs.AddKey(node.GetNbrNId(i));
}
Пример #19
0
// Initialize node community memberships using best neighborhood communities (see D. Gleich et al. KDD'12).
void TAGMFit::NeighborComInit(const int InitComs) {
  CIDNSetV.Gen(InitComs);
  const int Edges = G->GetEdges();
  TFltIntPrV NIdPhiV(G->GetNodes(), 0);
  TIntSet InvalidNIDS(G->GetNodes());
  TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG
  TExeTm RunTm;
  //compute conductance of neighborhood community
  TIntV NIdV;
  G->GetNIdV(NIdV);
  for (int u = 0; u < NIdV.Len(); u++) {
    TIntSet NBCmty(G->GetNI(NIdV[u]).GetDeg() + 1);
    double Phi;
    if (G->GetNI(NIdV[u]).GetDeg() < 5) { //do not include nodes with too few degree
      Phi = 1.0; 
    } else {
      TAGMUtil::GetNbhCom(G, NIdV[u], NBCmty);
      IAssert(NBCmty.Len() == G->GetNI(NIdV[u]).GetDeg() + 1);
      Phi = TAGMUtil::GetConductance(G, NBCmty, Edges);
    }
    NIdPhiV.Add(TFltIntPr(Phi, NIdV[u]));
  }
  NIdPhiV.Sort(true);
  printf("conductance computation completed [%s]\n", RunTm.GetTmStr());
  fflush(stdout);
  //choose nodes with local minimum in conductance
  int CurCID = 0;
  for (int ui = 0; ui < NIdPhiV.Len(); ui++) {
    int UID = NIdPhiV[ui].Val2;
    fflush(stdout);
    if (InvalidNIDS.IsKey(UID)) { continue; }
    ChosenNIDV.Add(UID); //FOR DEBUG
    //add the node and its neighbors to the current community
    CIDNSetV[CurCID].AddKey(UID);
    TUNGraph::TNodeI NI = G->GetNI(UID);
    fflush(stdout);
    for (int e = 0; e < NI.GetDeg(); e++) {
      CIDNSetV[CurCID].AddKey(NI.GetNbrNId(e));
    }
    //exclude its neighbors from the next considerations
    for (int e = 0; e < NI.GetDeg(); e++) {
      InvalidNIDS.AddKey(NI.GetNbrNId(e));
    }
    CurCID++;
    fflush(stdout);
    if (CurCID >= InitComs) { break;  }
  }
  if (InitComs > CurCID) {
    printf("%d communities needed to fill randomly\n", InitComs - CurCID);
  }
  //assign a member to zero-member community (if any)
  for (int c = 0; c < CIDNSetV.Len(); c++) {
    if (CIDNSetV[c].Len() == 0) {
      int ComSz = 10;
      for (int u = 0; u < ComSz; u++) {
        int UID = G->GetRndNI().GetId();
        CIDNSetV[c].AddKey(UID);
      }
    }
  }
  InitNodeData();
  SetDefaultPNoCom();
}
Пример #20
0
int TAGMFast::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
  //parallel
  time_t InitTime = time(NULL);
  uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
  TExeTm ExeTm, CheckTm;
  double PrevL = Likelihood(true);
  TIntFltPrV IterLV;
  int PrevIter = 0;
  int iter = 0;
  TIntV NIdxV(F.Len(), 0);
  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
  TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
  NIDOPTV.PutAll(0);
  TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
  TIntV NewNIDV(ChunkNum * ChunkSize);
  for (iter = 0; iter < MaxIter; iter++) {
    NIdxV.Clr(false);
    for (int i = 0; i < F.Len(); i++) { 
      if (NIDOPTV[i] == 0) {  NIdxV.Add(i); }
    }
    IAssert (NIdxV.Len() <= F.Len());
    NIdxV.Shuffle(Rnd);
    // compute gradient for chunk of nodes
#pragma omp parallel for schedule(static, 1)
    for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
      TIntFltH GradV;
      for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
        NewNIDV[ui] = -1;
        if (ui > NIdxV.Len()) { continue; }
        int u = NIdxV[ui]; //
        //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
        TUNGraph::TNodeI UI = G->GetNI(u);
        TIntSet CIDSet(5 * UI.GetDeg());
        TIntFltH CurFU = F[u];
        for (int e = 0; e < UI.GetDeg(); e++) {
          if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
          TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
          for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
            CIDSet.AddKey(CI.GetKey());
          }
        }
        if (CIDSet.Empty()) { 
          CurFU.Clr();
        }
        else {
          for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
            if (! CIDSet.IsKey(CI.GetKey())) {
              CurFU.DelIfKey(CI.GetKey());
            }
          }
          GradientForRow(u, GradV, CIDSet);
          if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
          double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta, 5);
          if (LearnRate <= 1e-5) { NewNIDV[ui] = -2; continue; }
          for (int ci = 0; ci < GradV.Len(); ci++) {
            int CID = GradV.GetKey(ci);
            double Change = LearnRate * GradV.GetDat(CID);
            double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
            if (NewFuc <= 0.0) {
              CurFU.DelIfKey(CID);
            } else {
              CurFU.AddDat(CID) = NewFuc;
            }
          }
          CurFU.Defrag();
        }
        //store changes
        NewF[ui] = CurFU;
        NewNIDV[ui] = u;
      }
    }
    int NumNoChangeGrad = 0;
    int NumNoChangeStepSize = 0;
    for (int ui = 0; ui < NewNIDV.Len(); ui++) {
      int NewNID = NewNIDV[ui];
      if (NewNID == -1) { NumNoChangeGrad++; continue; }
      if (NewNID == -2) { NumNoChangeStepSize++; continue; }
      for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
        SumFV[CI.GetKey()] -= CI.GetDat();
      }
    }
#pragma omp parallel for
    for (int ui = 0; ui < NewNIDV.Len(); ui++) {
      int NewNID = NewNIDV[ui];
      if (NewNID < 0) { continue; }
      F[NewNID] = NewF[ui];
    }
    for (int ui = 0; ui < NewNIDV.Len(); ui++) {
      int NewNID = NewNIDV[ui];
      if (NewNID < 0) { continue; }
      for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
        SumFV[CI.GetKey()] += CI.GetDat();
      }
    }
    // update the nodes who are optimal
    for (int ui = 0; ui < NewNIDV.Len(); ui++) {
      int NewNID = NewNIDV[ui];
      if (NewNID < 0) { continue; }
      TUNGraph::TNodeI UI = G->GetNI(NewNID);
      NIDOPTV[NewNID] = 0;
      for (int e = 0; e < UI.GetDeg(); e++) {
        NIDOPTV[UI.GetNbrNId(e)] = 0;
      }
    }
    int OPTCnt = 0;
    for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } }
    if (! PlotNm.Empty()) {
      printf("\r%d iterations [%s] %d secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), int(TSecTm::GetCurTm().GetAbsSecs() - StartTm));
      if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
      fflush(stdout);
    }
    if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) {
      PrevIter = iter;
      double CurL = Likelihood(true);
      IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
      printf("\r%d iterations, Likelihood: %f, Diff: %f [%d secs]", iter, CurL,  CurL - PrevL, int(time(NULL) - InitTime));
       fflush(stdout);
      if (CurL - PrevL <= Thres * fabs(PrevL)) { 
        break;
      }
      else {
        PrevL = CurL;
      }
    }
  }
  if (! PlotNm.Empty()) {
    printf("\nMLE completed with %d iterations(%s secs)\n", iter, int(TSecTm::GetCurTm().GetAbsSecs() - StartTm));
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");[]
  } else {
Пример #21
0
int TAGMFast::MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
  time_t InitTime = time(NULL);
  TExeTm ExeTm, CheckTm;
  int iter = 0, PrevIter = 0;
  TIntFltPrV IterLV;
  TUNGraph::TNodeI UI;
  double PrevL = TFlt::Mn, CurL = 0.0;
  TIntV NIdxV(F.Len(), 0);
  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
  IAssert(NIdxV.Len() == F.Len());
  TIntFltH GradV;
  while(iter < MaxIter) {
    NIdxV.Shuffle(Rnd);
    for (int ui = 0; ui < F.Len(); ui++, iter++) {
      int u = NIdxV[ui]; //
      //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
      UI = G->GetNI(u);
      TIntSet CIDSet(5 * UI.GetDeg());
      for (int e = 0; e < UI.GetDeg(); e++) {
        if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
        TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
        for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
          CIDSet.AddKey(CI.GetKey());
        }
      }
      for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) { //remove the community membership which U does not share with its neighbors
        if (! CIDSet.IsKey(CI.GetKey())) {
          DelCom(u, CI.GetKey());
        }
      }
      if (CIDSet.Empty()) { continue; }
      GradientForRow(u, GradV, CIDSet);
      if (Norm2(GradV) < 1e-4) { continue; }
      double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta);
      if (LearnRate == 0.0) { continue; }
      for (int ci = 0; ci < GradV.Len(); ci++) {
        int CID = GradV.GetKey(ci);
        double Change = LearnRate * GradV.GetDat(CID);
        double NewFuc = GetCom(u, CID) + Change;
        if (NewFuc <= 0.0) {
          DelCom(u, CID);
        } else {
          AddCom(u, CID, NewFuc);
        }
      }
      if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) {
        IterLV.Add(TIntFltPr(iter, Likelihood(false)));
      }
    }
    printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
    fflush(stdout);
    if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
      PrevIter = iter;
      CurL = Likelihood();
      if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
        printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL,  CurL - PrevL);
      }
      fflush(stdout);
      if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
      else { PrevL = CurL; }
    }
    
  }
  printf("\n");
  printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
  if (! PlotNm.Empty()) {
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
  }
  return iter;
}
Пример #22
0
void TAGMFast::NeighborComInit(const int InitComs) {
  //initialize with best neighborhood communities (Gleich et.al. KDD'12)
  F.Gen(G->GetNodes());
  SumFV.Gen(InitComs);
  NumComs = InitComs;
  const int Edges = G->GetEdges();
  //TIntFltH NCPhiH(F.Len());
  TFltIntPrV NIdPhiV(F.Len(), 0);
  TIntSet InvalidNIDS(F.Len());
  TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG
  TExeTm RunTm;
  //compute conductance of neighborhood community
  for (int u = 0; u < F.Len(); u++) {
    TIntSet NBCmty(G->GetNI(u).GetDeg() + 1);
    double Phi;
    if (G->GetNI(u).GetDeg() < 5) { //do not include nodes with too few degree
      Phi = 1.0; 
    } else {
      TAGMUtil::GetNbhCom(G, u, NBCmty);
      IAssert(NBCmty.Len() == G->GetNI(u).GetDeg() + 1);
      Phi = TAGMUtil::GetConductance(G, NBCmty, Edges);
    }
    //NCPhiH.AddDat(u, Phi);
    NIdPhiV.Add(TFltIntPr(Phi, u));
  }
  NIdPhiV.Sort(true);
  printf("conductance computation completed [%s]\n", RunTm.GetTmStr());
  fflush(stdout);
  //choose nodes with local minimum in conductance
  int CurCID = 0;
  for (int ui = 0; ui < NIdPhiV.Len(); ui++) {
    int UID = NIdPhiV[ui].Val2;
    fflush(stdout);
    if (InvalidNIDS.IsKey(UID)) { continue; }
    ChosenNIDV.Add(UID); //FOR DEBUG
    //add the node and its neighbors to the current community
    AddCom(UID, CurCID, 1.0);
    TUNGraph::TNodeI NI = G->GetNI(UID);
    fflush(stdout);
    for (int e = 0; e < NI.GetDeg(); e++) {
      AddCom(NI.GetNbrNId(e), CurCID, 1.0);
    }
    //exclude its neighbors from the next considerations
    for (int e = 0; e < NI.GetDeg(); e++) {
      InvalidNIDS.AddKey(NI.GetNbrNId(e));
    }
    CurCID++;
    fflush(stdout);
    if (CurCID >= NumComs) { break;  }
  }
  if (NumComs > CurCID) {
    printf("%d communities needed to fill randomly\n", NumComs - CurCID);
  }
  //assign a member to zero-member community (if any)
  for (int c = 0; c < SumFV.Len(); c++) {
    if (SumFV[c] == 0.0) {
      int ComSz = 10;
      for (int u = 0; u < ComSz; u++) {
        int UID = Rnd.GetUniDevInt(G->GetNodes());
        AddCom(UID, c, Rnd.GetUniDev());
      }
    }
  }
}
Пример #23
0
void TAGMFast::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
  GradU.Gen(CIDSet.Len());

  TFltV HOSumFV; //adjust for Fv of v hold out
  if (HOVIDSV[UID].Len() > 0) {
    HOSumFV.Gen(SumFV.Len());
    
    for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
      for (int c = 0; c < SumFV.Len(); c++) {
        HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
      }
    }
  }
    
  TUNGraph::TNodeI NI = G->GetNI(UID);
  int Deg = NI.GetDeg();
  TFltV PredV(Deg), GradV(CIDSet.Len());
  TIntV CIDV(CIDSet.Len());
  if (DoParallel && Deg + CIDSet.Len() > 10) {
#pragma omp parallel for schedule(static, 1)
    for (int e = 0; e < Deg; e++) {
      if (NI.GetNbrNId(e) == UID) { continue; }
      if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
      PredV[e] = Prediction(UID, NI.GetNbrNId(e));
    }
  
#pragma omp parallel for schedule(static, 1)
    for (int c = 0; c < CIDSet.Len(); c++) {
      int CID = CIDSet.GetKey(c);
      double Val = 0.0;
      for (int e = 0; e < Deg; e++) {
        int VID = NI.GetNbrNId(e);
        if (VID == UID) { continue; }
        if (HOVIDSV[UID].IsKey(VID)) { continue; }
        Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
      }
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
      CIDV[c] = CID;
      GradV[c] = Val;
    }
  } 
  else {
    for (int e = 0; e < Deg; e++) {
      if (NI.GetNbrNId(e) == UID) { continue; }
      if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
      PredV[e] = Prediction(UID, NI.GetNbrNId(e));
    }
  
    for (int c = 0; c < CIDSet.Len(); c++) {
      int CID = CIDSet.GetKey(c);
      double Val = 0.0;
      for (int e = 0; e < Deg; e++) {
        int VID = NI.GetNbrNId(e);
        if (VID == UID) { continue; }
        if (HOVIDSV[UID].IsKey(VID)) { continue; }
        Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
      }
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
      CIDV[c] = CID;
      GradV[c] = Val;
    }
  }
  //add regularization
  if (RegCoef > 0.0) { //L1
    for (int c = 0; c < GradV.Len(); c++) {
      GradV[c] -= RegCoef; 
    }
  }
  if (RegCoef < 0.0) { //L2
    for (int c = 0; c < GradV.Len(); c++) {
      GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); 
    }
  }


  for (int c = 0; c < GradV.Len(); c++) {
    if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
    if (fabs(GradV[c]) < 0.0001) { continue; }
    GradU.AddDat(CIDV[c], GradV[c]);
  }
  for (int c = 0; c < GradU.Len(); c++) {
    if (GradU[c] >= 10) { GradU[c] = 10; }
    if (GradU[c] <= -10) { GradU[c] = -10; }
    IAssert(GradU[c] >= -10);
  }
}
Пример #24
0
void sample (const int *m, const int *n, const int *h, const int *ns, const int *in, const int *infection_state, const int *mde, const int *bi, const int *br, double * result) {
  const int nodes = *h;
  const int nval = (*n)/2;
  int num_seeds = *ns;
  int infect_type = *in;
  int mode = *mde;
  int burnin = *bi;
  int branch = *br;

  PUNGraph g = get_PUNGraph (m, nval, nodes);

  THash<TInt, TInt> * visited = choose_seeds (g, num_seeds, infection_state, infect_type);
  TVec <VisitedNode *>  queue;
  TIntV qids;
  

  for (THash<TInt, TInt>::TIter n = visited->BegI(); n != visited->EndI(); n++) {
    queue = queue + new VisitedNode (n->Key);
    qids = qids + n->Key;
    //cerr << "enqueued " << n->Key << endl;
  }
  TInt counted = 0;
  TInt first_unprocessed = 0;
  TFlt infected_mass = 0.0;
  TFlt total_mass = 0.0;
  TFlt revisits = 0.0;
  TFlt trehits = 0.0;
  //cerr << "nodeId\tneigh\tnbh_size\tinfected?\tinfected_mass\ttotal_mass" << endl;
  while (counted < 500 && first_unprocessed < queue.Len()) {
    VisitedNode * current_node = queue [first_unprocessed];
    first_unprocessed++;
    TUNGraph::TNodeI NI = g->GetNI (current_node->id);
    TInt neighborhood_size = NI.GetDeg();
    //  cerr << counted << " " << current_node->id << endl;
    if (counted >= burnin) {
      if (infection_state[(current_node->id) - 1] == 1)
       infected_mass += 1.0/TFlt(neighborhood_size);
      total_mass += 1.0/TFlt(neighborhood_size);
    }
    //cerr << current_node->id << "\t" << neighborhood_size << "\t" << (1.0/TFlt(neighborhood_size)) 
    //	 << "\t" << infection_state[(current_node->id) - 1] << "\t" << infected_mass << "\t" << total_mass << endl;
    
    // build list of unvisited neighbors
    TVec<TInt> neighbors;
    for (int i = 0; i < neighborhood_size; i++) {
      TInt neighbor = NI.GetNbrNId(i);
      if (mode == 0 && visited->IsKey(neighbor)) continue;
      else if (mode == 2 && isChild (current_node, neighbor)) continue;
      else if (mode == 3 && current_node-> previous != NULL && current_node->previous->id == neighbor) continue;
      else neighbors = neighbors + neighbor;									
    }
    TInt num_legal_neighbors = neighbors.Len();
    TInt sample_size = TMath::Mn<TInt> (branch, num_legal_neighbors);
    THash <TInt, TInt> * choices = choose (num_legal_neighbors, sample_size);
    for (THash<TInt, TInt>::TIter n = choices->BegI(); n != choices->EndI(); n++) {
      if (queue.Len() >= 500) break;
      queue = queue + new VisitedNode (neighbors[n->Key], current_node);
      if (visited->IsKey(neighbors[n->Key])) revisits++;
      if (isChild(current_node, neighbors[n->Key])) trehits++;
      if (!visited->IsKey(neighbors[n->Key])) qids = qids + neighbors[n->Key];
      visited->AddDat(neighbors[n->Key], 1);
    }
    counted++;
  }
    
  // cout << (infected_mass / total_mass) << endl;
  delete (visited);
  result[0] = (infected_mass / total_mass);
  result[1] = revisits;
  result[2] = trehits;
  result[3] = counted;
  //PUNGraph p (&g);
  PUNGraph p = TSnap:: GetSubGraph (g, qids, false);
  TCnComV convec;
  result[4] = TSnap::GetClustCf(p, -1);
  TSnap::GetWccs(p, convec);
  result[5] = convec.Len();
  
  result[6] = ave_path_length (p);
}