double TAGMFast::LikelihoodForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val) { TUNGraph::TNodeI UI = G->GetNI(UID); double L = 0.0, PNoEdge; int VID = 0; for (int e = 0; e < UI.GetDeg(); e++) { VID = UI.GetNbrNId(e); if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } if (! F[VID].IsKey(CID)) { PNoEdge = AlphaKV[e]; } else { PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val); } IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0); //PNoEdge = PNoEdge >= 1.0 - PNoCom? 1 - PNoCom: PNoEdge; L += log(1.0 - PNoEdge) + NegWgt * GetCom(VID, CID) * Val; // += ((PNoEdge * F[VID].GetDat(CID)) / (1.0 - PNoEdge) + NegWgt * F[VID].GetDat(CID)); } L -= NegWgt * (SumFV[CID] - GetCom(UID, CID)) * Val; //add regularization if (RegCoef > 0.0) { //L1 L -= RegCoef * Val; } if (RegCoef < 0.0) { //L2 L += RegCoef * Val * Val; } return L; }
double TAGMFast::LikelihoodForRow(const int UID, const TIntFltH& FU) { double L = 0.0; TFltV HOSumFV; //adjust for Fv of v hold out if (HOVIDSV[UID].Len() > 0) { HOSumFV.Gen(SumFV.Len()); for (int e = 0; e < HOVIDSV[UID].Len(); e++) { for (int c = 0; c < SumFV.Len(); c++) { HOSumFV[c] += GetCom(HOVIDSV[UID][e], c); } } } TUNGraph::TNodeI NI = G->GetNI(UID); if (DoParallel && NI.GetDeg() > 10) { #pragma omp parallel for schedule(static, 1) for (int e = 0; e < NI.GetDeg(); e++) { int v = NI.GetNbrNId(e); if (v == UID) { continue; } if (HOVIDSV[UID].IsKey(v)) { continue; } double LU = log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]); #pragma omp atomic L += LU; } for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) { double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist double LU = NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat(); L -= LU; } } else { for (int e = 0; e < NI.GetDeg(); e++) { int v = NI.GetNbrNId(e); if (v == UID) { continue; } if (HOVIDSV[UID].IsKey(v)) { continue; } L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]); } for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) { double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat(); } } //add regularization if (RegCoef > 0.0) { //L1 L -= RegCoef * Sum(FU); } if (RegCoef < 0.0) { //L2 L += RegCoef * Norm2(FU); } return L; }
// Compute the change in likelihood (Delta) if node UID switches from CurCID to NewCID. double TAGMFit::SeekSwitch(const int& UID, const int& CurCID, const int& NewCID) { IAssert(! CIDNSetV[NewCID].IsKey(UID)); IAssert(CIDNSetV[CurCID].IsKey(UID)); double Delta = SeekJoin(UID, NewCID) + SeekLeave(UID, CurCID); //correct only for intersection between new com and current com TUNGraph::TNodeI NI = G->GetNI(UID); for (int e = 0; e < NI.GetDeg(); e++) { const int VID = NI.GetNbrNId(e); if (! NIDComVH.GetDat(VID).IsKey(CurCID) || ! NIDComVH.GetDat(VID).IsKey(NewCID)) {continue;} TIntPr SrcDstNIDPr(TMath::Mn(UID,VID), TMath::Mx(UID,VID)); TIntSet& JointCom = EdgeComVH.GetDat(SrcDstNIDPr); double CurPuv, NewPuvAfterJoin, NewPuvAfterLeave, NewPuvAfterSwitch, LambdaSum = SelectLambdaSum(JointCom); CurPuv = 1 - exp(- LambdaSum); NewPuvAfterLeave = 1 - exp(- LambdaSum + LambdaV[CurCID]); NewPuvAfterJoin = 1 - exp(- LambdaSum - LambdaV[NewCID]); NewPuvAfterSwitch = 1 - exp(- LambdaSum - LambdaV[NewCID] + LambdaV[CurCID]); if (JointCom.Len() == 1 || NewPuvAfterLeave == 0.0) { NewPuvAfterLeave = PNoCom; } Delta += (log(NewPuvAfterSwitch) + log(CurPuv) - log(NewPuvAfterLeave) - log(NewPuvAfterJoin)); if (_isnan(Delta)) { printf("NS:%f C:%f NL:%f NJ:%f PNoCom:%f", NewPuvAfterSwitch, CurPuv, NewPuvAfterLeave, NewPuvAfterJoin, PNoCom.Val); } IAssert(!_isnan(Delta)); } return Delta; }
int Intersect(TUNGraph::TNodeI Node, int *NNodes, int NNodes_br){ int br = 0; int neig; for (int i = 0; i<Node.GetDeg(); i++) { neig = Node.GetNbrNId(i); for (int j = 0; j<NNodes_br; j++) { if (neig == NNodes[j]) { br++; j = NNodes_br; } } } neig = Node.GetId(); for (int j = 0; j<NNodes_br; j++) { if (neig == NNodes[j]) { br++; j = NNodes_br; } } return br; }
// Compute the change in likelihood (Delta) if node UID leaves community CID. double TAGMFit::SeekLeave(const int& UID, const int& CID) { IAssert(CIDNSetV[CID].IsKey(UID)); IAssert(G->IsNode(UID)); double Delta = 0.0; TUNGraph::TNodeI NI = G->GetNI(UID); int NbhsInC = 0; for (int e = 0; e < NI.GetDeg(); e++) { const int VID = NI.GetNbrNId(e); if (! NIDComVH.GetDat(VID).IsKey(CID)) { continue; } TIntPr SrcDstNIDPr(TMath::Mn(UID,VID), TMath::Mx(UID,VID)); TIntSet& JointCom = EdgeComVH.GetDat(SrcDstNIDPr); double CurPuv, NewPuv, LambdaSum = SelectLambdaSum(JointCom); CurPuv = 1 - exp(- LambdaSum); NewPuv = 1 - exp(- LambdaSum + LambdaV[CID]); IAssert(JointCom.Len() > 0); if (JointCom.Len() == 1) { NewPuv = PNoCom; } Delta += (log(NewPuv) - log(CurPuv)); IAssert(!_isnan(Delta)); NbhsInC++; } Delta += LambdaV[CID] * (CIDNSetV[CID].Len() - 1 - NbhsInC); return Delta; }
void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) { TUNGraph::TNodeI NI = Graph->GetNI(NID); NBCmtyS.Gen(NI.GetDeg()); NBCmtyS.AddKey(NID); for (int e = 0; e < NI.GetDeg(); e++) { NBCmtyS.AddKey(NI.GetNbrNId(e)); } }
double GetGroupDegreeCentr(const PUNGraph& Graph, const TIntH& GroupNodes) { int deg; TIntH NN; TIntH GroupNodes1; for (THashKeyDatI<TInt, TInt> NI = GroupNodes.BegI(); NI < GroupNodes.EndI(); NI++) GroupNodes1.AddDat(NI.GetDat(), NI.GetDat()); for (THashKeyDatI<TInt, TInt> NI = GroupNodes1.BegI(); NI < GroupNodes1.EndI(); NI++){ TUNGraph::TNodeI node = Graph->GetNI(NI.GetKey()); deg = node.GetDeg(); for (int j = 0; j < deg; j++){ if (GroupNodes1.IsKey(node.GetNbrNId(j)) == 0 && NN.IsKey(node.GetNbrNId(j)) == 0) NN.AddDat(node.GetNbrNId(j), NI.GetKey()); } } return (double)NN.Len(); }
double TAGMFast::HessianForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val) { TUNGraph::TNodeI UI = G->GetNI(UID); double H = 0.0, PNoEdge; int VID = 0; for (int e = 0; e < UI.GetDeg(); e++) { VID = UI.GetNbrNId(e); if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } if (! F[VID].IsKey(CID)) { continue; } PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val); IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0); //PNoEdge = PNoEdge == 1.0? 1 - PNoCom: PNoEdge; H += (- PNoEdge * F[VID].GetDat(CID) * F[VID].GetDat(CID)) / (1.0 - PNoEdge) / (1.0 - PNoEdge); } //add regularization if (RegCoef < 0.0) { //L2 H += 2 * RegCoef; } IAssert (H <= 0.0); return H; }
int Intersect(TUNGraph::TNodeI Node, TIntH NNodes){ int br = 0; for (int i = 0; i<Node.GetDeg(); i++) { if (NNodes.IsKey(Node.GetNbrNId(i))) br++; } if (NNodes.IsKey(Node.GetId())) br++; return br; }
// After MCMC, NID joins community CID. void TAGMFit::JoinCom(const int& NID, const int& JoinCID) { TUNGraph::TNodeI NI = G->GetNI(NID); for (int e = 0; e < NI.GetDeg(); e++) { int VID = NI.GetNbrNId(e); if (NIDComVH.GetDat(VID).IsKey(JoinCID)) { TIntPr SrcDstNIDPr = TIntPr(TMath::Mn(NID,VID), TMath::Mx(NID,VID)); EdgeComVH.GetDat(SrcDstNIDPr).AddKey(JoinCID); ComEdgesV[JoinCID]++; } } CIDNSetV[JoinCID].AddKey(NID); NIDComVH.GetDat(NID).AddKey(JoinCID); NIDCIDPrS.AddKey(TIntPr(NID, JoinCID)); }
void TAGMFit::RandomInit(const int& MaxK) { CIDNSetV.Clr(); for (int c = 0; c < MaxK; c++) { CIDNSetV.Add(); int NC = Rnd.GetUniDevInt(G -> GetNodes()); TUNGraph::TNodeI NI = G -> GetRndNI(); CIDNSetV.Last().AddKey(NI.GetId()); for (int v = 0; v < NC; v++) { NI = G->GetNI(NI.GetNbrNId(Rnd.GetUniDevInt(NI.GetDeg()))); CIDNSetV.Last().AddKey(NI.GetId()); } } InitNodeData(); SetDefaultPNoCom(); }
double TAGMFast::GradientForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val) { TUNGraph::TNodeI UI = G->GetNI(UID); double Grad = 0.0, PNoEdge; int VID = 0; for (int e = 0; e < UI.GetDeg(); e++) { VID = UI.GetNbrNId(e); if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } if (! F[VID].IsKey(CID)) { continue; } PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val); IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0); //PNoEdge = PNoEdge >= 1.0 - PNoCom? 1 - PNoCom: PNoEdge; Grad += ((PNoEdge * F[VID].GetDat(CID)) / (1.0 - PNoEdge) + NegWgt * F[VID].GetDat(CID)); } Grad -= NegWgt * (SumFV[CID] - GetCom(UID, CID)); //add regularization if (RegCoef > 0.0) { //L1 Grad -= RegCoef; } if (RegCoef < 0.0) { //L2 Grad += 2 * RegCoef * Val; } return Grad; }
int TCliqueOverlap::MaxNbrsInCANDNodeId(const THashSet<TInt>& SUBG, const THashSet<TInt>& CAND) const{ int id = -1; int maxIntersection = -1; // for (THashSetKeyI<TInt> it=SUBG.BegI(); it<SUBG.EndI(); it++) { int nId = it.GetKey(); TUNGraph::TNodeI nIt = m_G->GetNI(nId); int deg = nIt.GetDeg(); // int curIntersection = 0; for (int i=0; i<deg; i++) { int nbrId = nIt.GetNbrNId(i); if (CAND.IsKey(nbrId)) curIntersection++; } // if (maxIntersection < curIntersection) { maxIntersection=curIntersection; id=nId; } } return id; }
int Intersect1(TUNGraph::TNodeI Node, TStr NNodes){ int br = 0; for (int i = 0; i<Node.GetDeg(); i++) { TInt digi = Node.GetNbrNId(i); TStr buf = ""; buf = digi.GetStr(); if (NNodes.SearchStr(buf.CStr()) != -1) br++; } TInt digi = Node.GetId(); TStr buf = digi.GetStr(); if (NNodes.SearchStr(buf.CStr()) != -1) br++; return br; }
// For each (u, v) in edges, precompute C_uv (the set of communities u and v share). void TAGMFit::GetEdgeJointCom() { ComEdgesV.Gen(CIDNSetV.Len()); EdgeComVH.Gen(G->GetEdges()); for (TUNGraph::TNodeI SrcNI = G->BegNI(); SrcNI < G->EndNI(); SrcNI++) { int SrcNID = SrcNI.GetId(); for (int v = 0; v < SrcNI.GetDeg(); v++) { int DstNID = SrcNI.GetNbrNId(v); if (SrcNID >= DstNID) { continue; } TIntSet JointCom; IAssert(NIDComVH.IsKey(SrcNID)); IAssert(NIDComVH.IsKey(DstNID)); TAGMUtil::GetIntersection(NIDComVH.GetDat(SrcNID), NIDComVH.GetDat(DstNID), JointCom); EdgeComVH.AddDat(TIntPr(SrcNID,DstNID),JointCom); for (int k = 0; k < JointCom.Len(); k++) { ComEdgesV[JointCom[k]]++; } } } IAssert(EdgeComVH.Len() == G->GetEdges()); }
int Intersect(TUNGraph::TNodeI Node, TStr NNodes){ int br = 0; TInt digi = -1; TStr buf = ""; for (int i = 0; i<Node.GetDeg(); i++) { digi = Node.GetNbrNId(i); TStr buf = digi.GetStr(); if (NNodes.IsStrIn(buf.CStr())) br++; } digi = Node.GetId(); buf = digi.GetStr(); if (NNodes.IsStrIn(buf.CStr())) br++; return br; }
/// Newton method: DEPRECATED int TAGMFast::MLENewton(const double& Thres, const int& MaxIter, const TStr PlotNm) { TExeTm ExeTm; int iter = 0, PrevIter = 0; TIntFltPrV IterLV; double PrevL = TFlt::Mn, CurL; TUNGraph::TNodeI UI; TIntV NIdxV; G->GetNIdV(NIdxV); int CID, UID, NewtonIter; double Fuc, PrevFuc, Grad, H; while(iter < MaxIter) { NIdxV.Shuffle(Rnd); for (int ui = 0; ui < F.Len(); ui++, iter++) { if (! PlotNm.Empty() && iter % G->GetNodes() == 0) { IterLV.Add(TIntFltPr(iter, Likelihood(false))); } UID = NIdxV[ui]; //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) TIntSet CIDSet; UI = G->GetNI(UID); if (UI.GetDeg() == 0) { //if the node is isolated, clear its membership and skip if (! F[UID].Empty()) { F[UID].Clr(); } continue; } for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } for (TIntFltH::TIter CI = F[UID].BegI(); CI < F[UID].EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { DelCom(UID, CI.GetKey()); } } if (CIDSet.Empty()) { continue; } for (TIntSet::TIter CI = CIDSet.BegI(); CI < CIDSet.EndI(); CI++) { CID = CI.GetKey(); //optimize for UID, CID //compute constants TFltV AlphaKV(UI.GetDeg()); for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } AlphaKV[e] = (1 - PNoCom) * exp(- DotProduct(UID, UI.GetNbrNId(e)) + GetCom(UI.GetNbrNId(e), CID) * GetCom(UID, CID)); IAssertR(AlphaKV[e] <= 1.0, TStr::Fmt("AlphaKV=%f, %f, %f", AlphaKV[e].Val, PNoCom.Val, GetCom(UI.GetNbrNId(e), CID))); } Fuc = GetCom(UID, CID); PrevFuc = Fuc; Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0; if (Grad <= 1e-3 && Grad >= -0.1) { continue; } NewtonIter = 0; while (NewtonIter++ < 10) { Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0; H = HessianForOneVar(AlphaKV, UID, CID, Fuc); if (Fuc == 0.0 && Grad <= 0.0) { Grad = 0.0; } if (fabs(Grad) < 1e-3) { break; } if (H == 0.0) { Fuc = 0.0; break; } double NewtonStep = - Grad / H; if (NewtonStep < -0.5) { NewtonStep = - 0.5; } Fuc += NewtonStep; if (Fuc < 0.0) { Fuc = 0.0; } } if (Fuc == 0.0) { DelCom(UID, CID); } else { AddCom(UID, CID, Fuc); } } } if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) { PrevIter = iter; CurL = Likelihood(); if (PrevL > TFlt::Mn && ! PlotNm.Empty()) { printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL); } fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } if (! PlotNm.Empty()) { printf("\nMLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr()); TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); } return iter; }
void TCliqueOverlap::GetNbrs(int NId, THashSet<TInt>& Nbrs) const{ TUNGraph::TNodeI node = m_G->GetNI(NId); int deg = node.GetDeg(); for (int i=0; i<deg; i++) Nbrs.AddKey(node.GetNbrNId(i)); }
// Initialize node community memberships using best neighborhood communities (see D. Gleich et al. KDD'12). void TAGMFit::NeighborComInit(const int InitComs) { CIDNSetV.Gen(InitComs); const int Edges = G->GetEdges(); TFltIntPrV NIdPhiV(G->GetNodes(), 0); TIntSet InvalidNIDS(G->GetNodes()); TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG TExeTm RunTm; //compute conductance of neighborhood community TIntV NIdV; G->GetNIdV(NIdV); for (int u = 0; u < NIdV.Len(); u++) { TIntSet NBCmty(G->GetNI(NIdV[u]).GetDeg() + 1); double Phi; if (G->GetNI(NIdV[u]).GetDeg() < 5) { //do not include nodes with too few degree Phi = 1.0; } else { TAGMUtil::GetNbhCom(G, NIdV[u], NBCmty); IAssert(NBCmty.Len() == G->GetNI(NIdV[u]).GetDeg() + 1); Phi = TAGMUtil::GetConductance(G, NBCmty, Edges); } NIdPhiV.Add(TFltIntPr(Phi, NIdV[u])); } NIdPhiV.Sort(true); printf("conductance computation completed [%s]\n", RunTm.GetTmStr()); fflush(stdout); //choose nodes with local minimum in conductance int CurCID = 0; for (int ui = 0; ui < NIdPhiV.Len(); ui++) { int UID = NIdPhiV[ui].Val2; fflush(stdout); if (InvalidNIDS.IsKey(UID)) { continue; } ChosenNIDV.Add(UID); //FOR DEBUG //add the node and its neighbors to the current community CIDNSetV[CurCID].AddKey(UID); TUNGraph::TNodeI NI = G->GetNI(UID); fflush(stdout); for (int e = 0; e < NI.GetDeg(); e++) { CIDNSetV[CurCID].AddKey(NI.GetNbrNId(e)); } //exclude its neighbors from the next considerations for (int e = 0; e < NI.GetDeg(); e++) { InvalidNIDS.AddKey(NI.GetNbrNId(e)); } CurCID++; fflush(stdout); if (CurCID >= InitComs) { break; } } if (InitComs > CurCID) { printf("%d communities needed to fill randomly\n", InitComs - CurCID); } //assign a member to zero-member community (if any) for (int c = 0; c < CIDNSetV.Len(); c++) { if (CIDNSetV[c].Len() == 0) { int ComSz = 10; for (int u = 0; u < ComSz; u++) { int UID = G->GetRndNI().GetId(); CIDNSetV[c].AddKey(UID); } } } InitNodeData(); SetDefaultPNoCom(); }
int TAGMFast::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) { //parallel time_t InitTime = time(NULL); uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs(); TExeTm ExeTm, CheckTm; double PrevL = Likelihood(true); TIntFltPrV IterLV; int PrevIter = 0; int iter = 0; TIntV NIdxV(F.Len(), 0); for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); } TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization NIDOPTV.PutAll(0); TVec<TIntFltH> NewF(ChunkNum * ChunkSize); TIntV NewNIDV(ChunkNum * ChunkSize); for (iter = 0; iter < MaxIter; iter++) { NIdxV.Clr(false); for (int i = 0; i < F.Len(); i++) { if (NIDOPTV[i] == 0) { NIdxV.Add(i); } } IAssert (NIdxV.Len() <= F.Len()); NIdxV.Shuffle(Rnd); // compute gradient for chunk of nodes #pragma omp parallel for schedule(static, 1) for (int TIdx = 0; TIdx < ChunkNum; TIdx++) { TIntFltH GradV; for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) { NewNIDV[ui] = -1; if (ui > NIdxV.Len()) { continue; } int u = NIdxV[ui]; // //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) TUNGraph::TNodeI UI = G->GetNI(u); TIntSet CIDSet(5 * UI.GetDeg()); TIntFltH CurFU = F[u]; for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } if (CIDSet.Empty()) { CurFU.Clr(); } else { for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { CurFU.DelIfKey(CI.GetKey()); } } GradientForRow(u, GradV, CIDSet); if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; } double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta, 5); if (LearnRate <= 1e-5) { NewNIDV[ui] = -2; continue; } for (int ci = 0; ci < GradV.Len(); ci++) { int CID = GradV.GetKey(ci); double Change = LearnRate * GradV.GetDat(CID); double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change; if (NewFuc <= 0.0) { CurFU.DelIfKey(CID); } else { CurFU.AddDat(CID) = NewFuc; } } CurFU.Defrag(); } //store changes NewF[ui] = CurFU; NewNIDV[ui] = u; } } int NumNoChangeGrad = 0; int NumNoChangeStepSize = 0; for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID == -1) { NumNoChangeGrad++; continue; } if (NewNID == -2) { NumNoChangeStepSize++; continue; } for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) { SumFV[CI.GetKey()] -= CI.GetDat(); } } #pragma omp parallel for for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } F[NewNID] = NewF[ui]; } for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) { SumFV[CI.GetKey()] += CI.GetDat(); } } // update the nodes who are optimal for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } TUNGraph::TNodeI UI = G->GetNI(NewNID); NIDOPTV[NewNID] = 0; for (int e = 0; e < UI.GetDeg(); e++) { NIDOPTV[UI.GetNbrNId(e)] = 0; } } int OPTCnt = 0; for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } } if (! PlotNm.Empty()) { printf("\r%d iterations [%s] %d secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), int(TSecTm::GetCurTm().GetAbsSecs() - StartTm)); if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); } fflush(stdout); } if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) { PrevIter = iter; double CurL = Likelihood(true); IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL)); printf("\r%d iterations, Likelihood: %f, Diff: %f [%d secs]", iter, CurL, CurL - PrevL, int(time(NULL) - InitTime)); fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } if (! PlotNm.Empty()) { printf("\nMLE completed with %d iterations(%s secs)\n", iter, int(TSecTm::GetCurTm().GetAbsSecs() - StartTm)); TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");[] } else {
int TAGMFast::MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha, const double StepBeta) { time_t InitTime = time(NULL); TExeTm ExeTm, CheckTm; int iter = 0, PrevIter = 0; TIntFltPrV IterLV; TUNGraph::TNodeI UI; double PrevL = TFlt::Mn, CurL = 0.0; TIntV NIdxV(F.Len(), 0); for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); } IAssert(NIdxV.Len() == F.Len()); TIntFltH GradV; while(iter < MaxIter) { NIdxV.Shuffle(Rnd); for (int ui = 0; ui < F.Len(); ui++, iter++) { int u = NIdxV[ui]; // //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) UI = G->GetNI(u); TIntSet CIDSet(5 * UI.GetDeg()); for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { DelCom(u, CI.GetKey()); } } if (CIDSet.Empty()) { continue; } GradientForRow(u, GradV, CIDSet); if (Norm2(GradV) < 1e-4) { continue; } double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta); if (LearnRate == 0.0) { continue; } for (int ci = 0; ci < GradV.Len(); ci++) { int CID = GradV.GetKey(ci); double Change = LearnRate * GradV.GetDat(CID); double NewFuc = GetCom(u, CID) + Change; if (NewFuc <= 0.0) { DelCom(u, CID); } else { AddCom(u, CID, NewFuc); } } if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) { IterLV.Add(TIntFltPr(iter, Likelihood(false))); } } printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime); fflush(stdout); if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) { PrevIter = iter; CurL = Likelihood(); if (PrevL > TFlt::Mn && ! PlotNm.Empty()) { printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL); } fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } printf("\n"); printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr()); if (! PlotNm.Empty()) { TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); } return iter; }
void TAGMFast::NeighborComInit(const int InitComs) { //initialize with best neighborhood communities (Gleich et.al. KDD'12) F.Gen(G->GetNodes()); SumFV.Gen(InitComs); NumComs = InitComs; const int Edges = G->GetEdges(); //TIntFltH NCPhiH(F.Len()); TFltIntPrV NIdPhiV(F.Len(), 0); TIntSet InvalidNIDS(F.Len()); TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG TExeTm RunTm; //compute conductance of neighborhood community for (int u = 0; u < F.Len(); u++) { TIntSet NBCmty(G->GetNI(u).GetDeg() + 1); double Phi; if (G->GetNI(u).GetDeg() < 5) { //do not include nodes with too few degree Phi = 1.0; } else { TAGMUtil::GetNbhCom(G, u, NBCmty); IAssert(NBCmty.Len() == G->GetNI(u).GetDeg() + 1); Phi = TAGMUtil::GetConductance(G, NBCmty, Edges); } //NCPhiH.AddDat(u, Phi); NIdPhiV.Add(TFltIntPr(Phi, u)); } NIdPhiV.Sort(true); printf("conductance computation completed [%s]\n", RunTm.GetTmStr()); fflush(stdout); //choose nodes with local minimum in conductance int CurCID = 0; for (int ui = 0; ui < NIdPhiV.Len(); ui++) { int UID = NIdPhiV[ui].Val2; fflush(stdout); if (InvalidNIDS.IsKey(UID)) { continue; } ChosenNIDV.Add(UID); //FOR DEBUG //add the node and its neighbors to the current community AddCom(UID, CurCID, 1.0); TUNGraph::TNodeI NI = G->GetNI(UID); fflush(stdout); for (int e = 0; e < NI.GetDeg(); e++) { AddCom(NI.GetNbrNId(e), CurCID, 1.0); } //exclude its neighbors from the next considerations for (int e = 0; e < NI.GetDeg(); e++) { InvalidNIDS.AddKey(NI.GetNbrNId(e)); } CurCID++; fflush(stdout); if (CurCID >= NumComs) { break; } } if (NumComs > CurCID) { printf("%d communities needed to fill randomly\n", NumComs - CurCID); } //assign a member to zero-member community (if any) for (int c = 0; c < SumFV.Len(); c++) { if (SumFV[c] == 0.0) { int ComSz = 10; for (int u = 0; u < ComSz; u++) { int UID = Rnd.GetUniDevInt(G->GetNodes()); AddCom(UID, c, Rnd.GetUniDev()); } } } }
void TAGMFast::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) { GradU.Gen(CIDSet.Len()); TFltV HOSumFV; //adjust for Fv of v hold out if (HOVIDSV[UID].Len() > 0) { HOSumFV.Gen(SumFV.Len()); for (int e = 0; e < HOVIDSV[UID].Len(); e++) { for (int c = 0; c < SumFV.Len(); c++) { HOSumFV[c] += GetCom(HOVIDSV[UID][e], c); } } } TUNGraph::TNodeI NI = G->GetNI(UID); int Deg = NI.GetDeg(); TFltV PredV(Deg), GradV(CIDSet.Len()); TIntV CIDV(CIDSet.Len()); if (DoParallel && Deg + CIDSet.Len() > 10) { #pragma omp parallel for schedule(static, 1) for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } #pragma omp parallel for schedule(static, 1) for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } else { for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } //add regularization if (RegCoef > 0.0) { //L1 for (int c = 0; c < GradV.Len(); c++) { GradV[c] -= RegCoef; } } if (RegCoef < 0.0) { //L2 for (int c = 0; c < GradV.Len(); c++) { GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); } } for (int c = 0; c < GradV.Len(); c++) { if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; } if (fabs(GradV[c]) < 0.0001) { continue; } GradU.AddDat(CIDV[c], GradV[c]); } for (int c = 0; c < GradU.Len(); c++) { if (GradU[c] >= 10) { GradU[c] = 10; } if (GradU[c] <= -10) { GradU[c] = -10; } IAssert(GradU[c] >= -10); } }
void sample (const int *m, const int *n, const int *h, const int *ns, const int *in, const int *infection_state, const int *mde, const int *bi, const int *br, double * result) { const int nodes = *h; const int nval = (*n)/2; int num_seeds = *ns; int infect_type = *in; int mode = *mde; int burnin = *bi; int branch = *br; PUNGraph g = get_PUNGraph (m, nval, nodes); THash<TInt, TInt> * visited = choose_seeds (g, num_seeds, infection_state, infect_type); TVec <VisitedNode *> queue; TIntV qids; for (THash<TInt, TInt>::TIter n = visited->BegI(); n != visited->EndI(); n++) { queue = queue + new VisitedNode (n->Key); qids = qids + n->Key; //cerr << "enqueued " << n->Key << endl; } TInt counted = 0; TInt first_unprocessed = 0; TFlt infected_mass = 0.0; TFlt total_mass = 0.0; TFlt revisits = 0.0; TFlt trehits = 0.0; //cerr << "nodeId\tneigh\tnbh_size\tinfected?\tinfected_mass\ttotal_mass" << endl; while (counted < 500 && first_unprocessed < queue.Len()) { VisitedNode * current_node = queue [first_unprocessed]; first_unprocessed++; TUNGraph::TNodeI NI = g->GetNI (current_node->id); TInt neighborhood_size = NI.GetDeg(); // cerr << counted << " " << current_node->id << endl; if (counted >= burnin) { if (infection_state[(current_node->id) - 1] == 1) infected_mass += 1.0/TFlt(neighborhood_size); total_mass += 1.0/TFlt(neighborhood_size); } //cerr << current_node->id << "\t" << neighborhood_size << "\t" << (1.0/TFlt(neighborhood_size)) // << "\t" << infection_state[(current_node->id) - 1] << "\t" << infected_mass << "\t" << total_mass << endl; // build list of unvisited neighbors TVec<TInt> neighbors; for (int i = 0; i < neighborhood_size; i++) { TInt neighbor = NI.GetNbrNId(i); if (mode == 0 && visited->IsKey(neighbor)) continue; else if (mode == 2 && isChild (current_node, neighbor)) continue; else if (mode == 3 && current_node-> previous != NULL && current_node->previous->id == neighbor) continue; else neighbors = neighbors + neighbor; } TInt num_legal_neighbors = neighbors.Len(); TInt sample_size = TMath::Mn<TInt> (branch, num_legal_neighbors); THash <TInt, TInt> * choices = choose (num_legal_neighbors, sample_size); for (THash<TInt, TInt>::TIter n = choices->BegI(); n != choices->EndI(); n++) { if (queue.Len() >= 500) break; queue = queue + new VisitedNode (neighbors[n->Key], current_node); if (visited->IsKey(neighbors[n->Key])) revisits++; if (isChild(current_node, neighbors[n->Key])) trehits++; if (!visited->IsKey(neighbors[n->Key])) qids = qids + neighbors[n->Key]; visited->AddDat(neighbors[n->Key], 1); } counted++; } // cout << (infected_mass / total_mass) << endl; delete (visited); result[0] = (infected_mass / total_mass); result[1] = revisits; result[2] = trehits; result[3] = counted; //PUNGraph p (&g); PUNGraph p = TSnap:: GetSubGraph (g, qids, false); TCnComV convec; result[4] = TSnap::GetClustCf(p, -1); TSnap::GetWccs(p, convec); result[5] = convec.Len(); result[6] = ave_path_length (p); }