// Test GetNodeClustCf (Vector) TEST(triad, TestGetNodeClustCfVector) { // Test TUNGraph PUNGraph GraphTUN = TriadGetTestTUNGraph(); TIntFltH NIdCCfH; TSnap::GetNodeClustCf(GraphTUN, NIdCCfH); for (int i = 0; i < GraphTUN->GetNodes(); i++) { double ClustCf = NIdCCfH.GetDat(i); VerifyNodeClustCf(i, ClustCf); } // TNGraph should be treated as TUNGraph for calculations PNGraph GraphTN = TriadGetTestTNGraph(); NIdCCfH.Clr(); TSnap::GetNodeClustCf(GraphTN, NIdCCfH); for (int i = 0; i < GraphTN->GetNodes(); i++) { double ClustCf = NIdCCfH.GetDat(i); VerifyNodeClustCf(i, ClustCf); } // TNEGraph should be treated as TUNGraph for calculations PNEGraph GraphTNE = TriadGetTestTNEGraph(); NIdCCfH.Clr(); TSnap::GetNodeClustCf(GraphTNE, NIdCCfH); for (int i = 0; i < GraphTNE->GetNodes(); i++) { double ClustCf = NIdCCfH.GetDat(i); VerifyNodeClustCf(i, ClustCf); } }
void GetEigenVectorCentr(const PUNGraph& Graph, TIntFltH& EigenH, const double& Eps, const int& MaxIter) { const int NNodes = Graph->GetNodes(); EigenH.Gen(NNodes); for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { EigenH.AddDat(NI.GetId(), 1.0/NNodes); IAssert(NI.GetId() == EigenH.GetKey(EigenH.Len()-1)); } TFltV TmpV(NNodes); double diff = TFlt::Mx; for (int iter = 0; iter < MaxIter; iter++) { int j = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { TmpV[j] += EigenH.GetDat(NI.GetOutNId(e)); } } double sum = 0; for (int i = 0; i < TmpV.Len(); i++) { EigenH[i] = TmpV[i]; sum += EigenH[i]; } for (int i = 0; i < EigenH.Len(); i++) { EigenH[i] /= sum; } if (fabs(diff-sum) < Eps) { break; } //printf("\tdiff:%f\tsum:%f\n", fabs(diff-sum), sum); diff = sum; } }
void GetEigenVectorCentr(const PUNGraph& Graph, TIntFltH& NIdEigenH, const double& Eps, const int& MaxIter) { const int NNodes = Graph->GetNodes(); NIdEigenH.Gen(NNodes); // initialize vector values for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NIdEigenH.AddDat(NI.GetId(), 1.0 / NNodes); IAssert(NI.GetId() == NIdEigenH.GetKey(NIdEigenH.Len() - 1)); } TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { int j = 0; // add neighbor values for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { TmpV[j] += NIdEigenH.GetDat(NI.GetOutNId(e)); } } // normalize double sum = 0; for (int i = 0; i < TmpV.Len(); i++) { sum += (TmpV[i] * TmpV[i]); } sum = sqrt(sum); for (int i = 0; i < TmpV.Len(); i++) { TmpV[i] /= sum; } // compute difference double diff = 0.0; j = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { diff += fabs(NIdEigenH.GetDat(NI.GetId()) - TmpV[j]); } // set new values j = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { NIdEigenH.AddDat(NI.GetId(), TmpV[j]); } if (diff < Eps) { break; } } }
int GetWeightedPageRankMP1(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) { if (!Graph->IsFltAttrE(Attr)) return -1; TFltV Weights = Graph->GetFltAttrVecE(Attr); int mxid = Graph->GetMxNId(); TFltV OutWeights(mxid); Graph->GetWeightOutEdgesV(OutWeights, Weights); /*for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { OutWeights[NI.GetId()] = Graph->GetWeightOutEdges(NI, Attr); }*/ /*TIntFltH Weights; for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { Weights.AddDat(NI.GetId(), Graph->GetWeightOutEdges(NI, Attr)); }*/ const int NNodes = Graph->GetNodes(); TVec<TNEANet::TNodeI> NV; //const double OneOver = 1.0/double(NNodes); PRankH.Gen(NNodes); for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NV.Add(NI); PRankH.AddDat(NI.GetId(), 1.0/NNodes); //IAssert(NI.GetId() == PRankH.GetKey(PRankH.Len()-1)); } TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { #pragma omp parallel for schedule(dynamic,10000) for (int j = 0; j < NNodes; j++) { TNEANet::TNodeI NI = NV[j]; TmpV[j] = 0; for (int e = 0; e < NI.GetInDeg(); e++) { const int InNId = NI.GetInNId(e); const TFlt OutWeight = OutWeights[InNId]; int EId = Graph->GetEId(InNId, NI.GetId()); const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)]; if (OutWeight > 0) { TmpV[j] += PRankH.GetDat(InNId) * Weight / OutWeight; } } TmpV[j] = C*TmpV[j]; // Berkhin (the correct way of doing it) //TmpV[j] = C*TmpV[j] + (1.0-C)*OneOver; // iGraph } double diff=0, sum=0, NewVal; #pragma omp parallel for reduction(+:sum) schedule(dynamic,10000) for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; } const double Leaked = (1.0-sum) / double(NNodes); #pragma omp parallel for reduction(+:diff) schedule(dynamic,10000) for (int i = 0; i < PRankH.Len(); i++) { // re-instert leaked PageRank NewVal = TmpV[i] + Leaked; // Berkhin //NewVal = TmpV[i] / sum; // iGraph diff += fabs(NewVal-PRankH[i]); PRankH[i] = NewVal; } if (diff < Eps) { break; } } return 0; }
// Computes GINI coefficient of egonet as a subset of the parent graph (edges into and out of the egonet ARE considered) double TSnap::GetGiniCoefficient(const TIntFltH DegH, const TIntV NIdV) { typename TIntV::TIter VI; typename TFltV::TIter DI; TFltV DegV; const int n = NIdV.Len(); // DegV.Gen(n); // NOTE: don't use Gen() and Sort() on the same object (!) for (VI = NIdV.BegI(); VI < NIdV.EndI(); VI++) { DegV.Add(DegH.GetDat(VI->Val)); // might need to change this (in / out / undirected) } DegV.Sort(); int i = 0; double numerator = 0.0, denominator = 0.0; for (DI = DegV.BegI(); DI < DegV.EndI(); DI++, i++) { numerator += (i + 1)*DegV[i]; denominator += DegV[i]; } return(double(2*numerator) / double(n*denominator) - double(n + 1) / double(n)); }
void getPageRankFromVect(const PNGraph& graph, std::vector<int> srcIds, std::vector<int> dstIds, int sampleSz, char* fileName) { std::random_shuffle(srcIds.begin(), srcIds.end()); std::random_shuffle(dstIds.begin(), dstIds.end()); std::ofstream outputFile; outputFile.open(fileName); for (int i = 0; i < sampleSz; ) { int srcNodeId = srcIds[rand() % srcIds.size()]; int dstNodeId = dstIds[rand() % dstIds.size()]; if (!graph->IsNode(srcNodeId)) continue; if (!graph->IsNode(dstNodeId)) continue; int shortPath = TSnap::GetShortPath(graph, srcNodeId, dstNodeId, true); if (shortPath > 4 || shortPath <= 2) continue; PNGraph subgraph = getFourHopGraph(graph, srcNodeId, dstNodeId); TIntFltH pageRankScores; TSnap::GetPageRank(subgraph, pageRankScores); // Calculate total PR score; /* double totalPR = 0.0; for (TIntFltH::TIter itr = pageRankScores.BegI(); itr != pageRankScores.EndI(); itr++) { totalPR += itr.GetDat(); }*/ int numOfNodesInSubGraph = subgraph->GetNodes(); double normalizedSrcPR = pageRankScores.GetDat(srcNodeId) * numOfNodesInSubGraph; double normalizedDstPR = pageRankScores.GetDat(dstNodeId) * numOfNodesInSubGraph; char buffer[100]; printf("%d, %d\n", i, numOfNodesInSubGraph); sprintf(buffer, "%d\t%f\t%d\t%f", srcNodeId, normalizedSrcPR, dstNodeId, normalizedDstPR); std::cout << buffer << std::endl; outputFile << buffer << std::endl; ++i; } outputFile.close(); }
double TAGMFast::GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter) { double StepSize = 1.0; double InitLikelihood = LikelihoodForRow(UID); TIntFltH NewVarV(DeltaV.Len()); for(int iter = 0; iter < MaxIter; iter++) { for (int i = 0; i < DeltaV.Len(); i++){ int CID = DeltaV.GetKey(i); double NewVal = GetCom(UID, CID) + StepSize * DeltaV.GetDat(CID); if (NewVal < MinVal) { NewVal = MinVal; } if (NewVal > MaxVal) { NewVal = MaxVal; } NewVarV.AddDat(CID, NewVal); } if (LikelihoodForRow(UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) { StepSize *= Beta; } else { break; } if (iter == MaxIter - 1) { StepSize = 0.0; break; } } return StepSize; }
void TNetInfBs::GenCascade(TCascade& C, const int& TModel, const double &window, TIntPrIntH& EdgesUsed, const double& delta, const double& std_waiting_time, const double& std_beta) { TIntFltH InfectedNIdH; TIntH InfectedBy; double GlobalTime; int StartNId; double alpha, beta; if (GroundTruth->GetNodes() == 0) return; while (C.Len() < 2) { C.Clr(); InfectedNIdH.Clr(); InfectedBy.Clr(); GlobalTime = 0; StartNId = GroundTruth->GetRndNId(); InfectedNIdH.AddDat(StartNId) = GlobalTime; while (true) { // sort by time & get the oldest node that did not run infection InfectedNIdH.SortByDat(true); const int& NId = InfectedNIdH.BegI().GetKey(); GlobalTime = InfectedNIdH.BegI().GetDat(); // all the nodes has run infection if (GlobalTime >= window) break; // add current oldest node to the network and set its time C.Add(NId, GlobalTime); // run infection from the current oldest node const TNGraph::TNodeI NI = GroundTruth->GetNI(NId); for (int e = 0; e < NI.GetOutDeg(); e++) { const int DstNId = NI.GetOutNId(e); beta = Betas.GetDat(TIntPr(NId, DstNId)); // flip biased coin (set by beta) if (TInt::Rnd.GetUniDev() > beta+std_beta*TFlt::Rnd.GetNrmDev()) continue; alpha = Alphas.GetDat(TIntPr(NId, DstNId)); // not infecting the parent if (InfectedBy.IsKey(NId) && InfectedBy.GetDat(NId).Val == DstNId) continue; double sigmaT; switch (TModel) { case 0: // exponential with alpha parameter sigmaT = TInt::Rnd.GetExpDev(alpha); break; case 1: // power-law with alpha parameter sigmaT = TInt::Rnd.GetPowerDev(alpha); while (sigmaT < delta) { sigmaT = TInt::Rnd.GetPowerDev(alpha); } break; case 2: // rayleigh with alpha parameter sigmaT = TInt::Rnd.GetRayleigh(1/sqrt(alpha)); break; default: sigmaT = 1; break; } // avoid negative time diffs in case of noise if (std_waiting_time > 0) sigmaT = TFlt::GetMx(0.0, sigmaT + std_waiting_time*TFlt::Rnd.GetNrmDev()); double t1 = GlobalTime + sigmaT; if (InfectedNIdH.IsKey(DstNId)) { double t2 = InfectedNIdH.GetDat(DstNId); if (t2 > t1 && t2 != window) { InfectedNIdH.GetDat(DstNId) = t1; InfectedBy.GetDat(DstNId) = NId; } } else { InfectedNIdH.AddDat(DstNId) = t1; InfectedBy.AddDat(DstNId) = NId; } } // we cannot delete key (otherwise, we cannot sort), so we assign a big time (window cut-off) InfectedNIdH.GetDat(NId) = window; } } C.Sort(); for (TIntH::TIter EI = InfectedBy.BegI(); EI < InfectedBy.EndI(); EI++) { TIntPr Edge(EI.GetDat().Val, EI.GetKey().Val); if (!EdgesUsed.IsKey(Edge)) EdgesUsed.AddDat(Edge) = 0; EdgesUsed.GetDat(Edge) += 1; } }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Inverse PageRank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File" ); const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File"); FILE* fpI = fopen(Iput.CStr(), "r"); FILE* fpO = fopen(Oput.CStr(), "w"); const double C = 0.85; const int MaxIter = 50; const double Eps = 1e-9; PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput); fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges()); const int NNodes = Graph->GetNodes(); const double OneOver = (double) 1.0 / (double) NNodes; TIntFltH PRankH; PRankH.Gen(NNodes); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) PRankH.AddDat(NI.GetId(), OneOver); TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { int j = 0; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { const int OutNId = NI.GetOutNId(e); const int InDeg = Graph->GetNI(OutNId).GetInDeg(); if (InDeg > 0) TmpV[j] += PRankH.GetDat(OutNId) / InDeg; } TmpV[j] = C * TmpV[j]; } for (int i = 0; i < PRankH.Len(); i++) PRankH[i] = TmpV[i]; /* double diff = 0, sum = 0, NewVal; for (int i = 0; i < TmpV.Len(); i++) sum += TmpV[i]; const double Leaked = (double) (1.0 - sum) / (double) NNodes; for (int i = 0; i < PRankH.Len(); i++) { NewVal = TmpV[i] + Leaked; diff += fabs(NewVal - PRankH[i]); PRankH[i] = NewVal; } if (diff < Eps) break; */ } fprintf(fpO, "Node ID\t\tInverse PageRank\n"); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){ int Id = NI.GetId(); double ipr = PRankH.GetDat(Id); fprintf(fpO, "%d\t\t\t%.5lf\n", Id, ipr); } Catch printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int GetWeightedPageRankMP2(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) { if (!Graph->IsFltAttrE(Attr)) return -1; const int NNodes = Graph->GetNodes(); TVec<TNEANet::TNodeI> NV; //const double OneOver = 1.0/double(NNodes); PRankH.Gen(NNodes); int MxId; for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NV.Add(NI); PRankH.AddDat(NI.GetId(), 1.0/NNodes); int Id = NI.GetId(); if (Id > MxId) { MxId = Id; } } TFltV PRankV(MxId+1); TFltV OutWeights(MxId+1); TFltV Weights = Graph->GetFltAttrVecE(Attr); #pragma omp parallel for schedule(dynamic,10000) for (int j = 0; j < NNodes; j++) { TNEANet::TNodeI NI = NV[j]; int Id = NI.GetId(); OutWeights[Id] = Graph->GetWeightOutEdges(NI, Attr); PRankV[Id] = 1/NNodes; } TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { #pragma omp parallel for schedule(dynamic,10000) for (int j = 0; j < NNodes; j++) { TNEANet::TNodeI NI = NV[j]; TFlt Tmp = 0; for (int e = 0; e < NI.GetInDeg(); e++) { const int InNId = NI.GetInNId(e); const TFlt OutWeight = OutWeights[InNId]; int EId = Graph->GetEId(InNId, NI.GetId()); const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)]; if (OutWeight > 0) { Tmp += PRankH.GetDat(InNId) * Weight / OutWeight; } } TmpV[j] = C*Tmp; // Berkhin (the correct way of doing it) //TmpV[j] = C*TmpV[j] + (1.0-C)*OneOver; // iGraph } double sum = 0; #pragma omp parallel for reduction(+:sum) schedule(dynamic,10000) for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; } const double Leaked = (1.0-sum) / double(NNodes); double diff = 0; #pragma omp parallel for reduction(+:diff) schedule(dynamic,10000) for (int i = 0; i < NNodes; i++) { TNEANet::TNodeI NI = NV[i]; double NewVal = TmpV[i] + Leaked; // Berkhin //NewVal = TmpV[i] / sum; // iGraph int Id = NI.GetId(); diff += fabs(NewVal-PRankV[Id]); PRankV[Id] = NewVal; } if (diff < Eps) { break; } } #pragma omp parallel for schedule(dynamic,10000) for (int i = 0; i < NNodes; i++) { TNEANet::TNodeI NI = NV[i]; PRankH[i] = PRankV[NI.GetId()]; } return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Trust Rank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr Gnod = Env.GetIfArgPrefixStr("-g:", "Gnode.txt", "Good Nodes"); const TStr Bnod = Env.GetIfArgPrefixStr("-b:", "Bnode.txt", "Bad Nodes" ); const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File"); const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File"); const double C = 0.85; const int MaxIter = 50; const double Eps = 1e-9; FILE* fpO = fopen(Oput.CStr(), "w"); PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput); fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges()); const int NNodes = Graph->GetNodes(); TIntFltH TRankH; TRankH.Gen(NNodes); int maxNId = 0, NId = 0, ret = 0; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) maxNId = max(maxNId, NI.GetId()); TFltV initialTrustScore(maxNId + 1); for (int i = 0; i < initialTrustScore.Len(); i++) initialTrustScore[i] = 0.5; FILE* fpI = fopen(Gnod.CStr(), "r"); while (true) { ret = fscanf(fpI, "%d", &NId); if (ret == EOF) break; if (Graph->IsNode(NId)) initialTrustScore[NId] = 1.0; } fclose(fpI); fpI = fopen(Bnod.CStr(), "r"); while (true) { ret = fscanf(fpI, "%d", &NId); if (ret == EOF) break; if (Graph->IsNode(NId)) initialTrustScore[NId] = 0.0; } fclose(fpI); double Tot = 0.0; for(int i = 0; i < initialTrustScore.Len(); i++) Tot += initialTrustScore[i]; for(int i = 0; i < initialTrustScore.Len(); i++) initialTrustScore[i] /= Tot; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) TRankH.AddDat( NI.GetId(), initialTrustScore[NI.GetId()] ); TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { int j = 0; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { const int OutNId = NI.GetOutNId(e); const int InDeg = Graph->GetNI(InNId).GetInDeg(); if (InDeg > 0) TmpV[j] += (double) TRankH.GetDat(OutNId) / (double) InDeg; } TmpV[j] = C * TmpV[j] + (1.0 - C) * initialTrustScore[NI.GetId()]; } for (int i = 0; i < TRankH.Len(); i++) TRankH[i] = TmpV[i]; } fprintf(fpO, "Node ID\t\tTrustRank\n"); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){ int Id = NI.GetId(); double tr = TRankH.GetDat(Id); fprintf(fpO, "%d\t\t\t%.5lf\n", Id, tr); } fclose(fpO); Catch printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int TAGMFast::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) { //parallel time_t InitTime = time(NULL); uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs(); TExeTm ExeTm, CheckTm; double PrevL = Likelihood(true); TIntFltPrV IterLV; int PrevIter = 0; int iter = 0; TIntV NIdxV(F.Len(), 0); for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); } TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization NIDOPTV.PutAll(0); TVec<TIntFltH> NewF(ChunkNum * ChunkSize); TIntV NewNIDV(ChunkNum * ChunkSize); for (iter = 0; iter < MaxIter; iter++) { NIdxV.Clr(false); for (int i = 0; i < F.Len(); i++) { if (NIDOPTV[i] == 0) { NIdxV.Add(i); } } IAssert (NIdxV.Len() <= F.Len()); NIdxV.Shuffle(Rnd); // compute gradient for chunk of nodes #pragma omp parallel for schedule(static, 1) for (int TIdx = 0; TIdx < ChunkNum; TIdx++) { TIntFltH GradV; for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) { NewNIDV[ui] = -1; if (ui > NIdxV.Len()) { continue; } int u = NIdxV[ui]; // //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) TUNGraph::TNodeI UI = G->GetNI(u); TIntSet CIDSet(5 * UI.GetDeg()); TIntFltH CurFU = F[u]; for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } if (CIDSet.Empty()) { CurFU.Clr(); } else { for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { CurFU.DelIfKey(CI.GetKey()); } } GradientForRow(u, GradV, CIDSet); if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; } double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta, 5); if (LearnRate <= 1e-5) { NewNIDV[ui] = -2; continue; } for (int ci = 0; ci < GradV.Len(); ci++) { int CID = GradV.GetKey(ci); double Change = LearnRate * GradV.GetDat(CID); double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change; if (NewFuc <= 0.0) { CurFU.DelIfKey(CID); } else { CurFU.AddDat(CID) = NewFuc; } } CurFU.Defrag(); } //store changes NewF[ui] = CurFU; NewNIDV[ui] = u; } } int NumNoChangeGrad = 0; int NumNoChangeStepSize = 0; for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID == -1) { NumNoChangeGrad++; continue; } if (NewNID == -2) { NumNoChangeStepSize++; continue; } for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) { SumFV[CI.GetKey()] -= CI.GetDat(); } } #pragma omp parallel for for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } F[NewNID] = NewF[ui]; } for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) { SumFV[CI.GetKey()] += CI.GetDat(); } } // update the nodes who are optimal for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } TUNGraph::TNodeI UI = G->GetNI(NewNID); NIDOPTV[NewNID] = 0; for (int e = 0; e < UI.GetDeg(); e++) { NIDOPTV[UI.GetNbrNId(e)] = 0; } } int OPTCnt = 0; for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } } if (! PlotNm.Empty()) { printf("\r%d iterations [%s] %d secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), int(TSecTm::GetCurTm().GetAbsSecs() - StartTm)); if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); } fflush(stdout); } if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) { PrevIter = iter; double CurL = Likelihood(true); IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL)); printf("\r%d iterations, Likelihood: %f, Diff: %f [%d secs]", iter, CurL, CurL - PrevL, int(time(NULL) - InitTime)); fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } if (! PlotNm.Empty()) { printf("\nMLE completed with %d iterations(%s secs)\n", iter, int(TSecTm::GetCurTm().GetAbsSecs() - StartTm)); TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");[] } else {
int TAGMFast::MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha, const double StepBeta) { time_t InitTime = time(NULL); TExeTm ExeTm, CheckTm; int iter = 0, PrevIter = 0; TIntFltPrV IterLV; TUNGraph::TNodeI UI; double PrevL = TFlt::Mn, CurL = 0.0; TIntV NIdxV(F.Len(), 0); for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); } IAssert(NIdxV.Len() == F.Len()); TIntFltH GradV; while(iter < MaxIter) { NIdxV.Shuffle(Rnd); for (int ui = 0; ui < F.Len(); ui++, iter++) { int u = NIdxV[ui]; // //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) UI = G->GetNI(u); TIntSet CIDSet(5 * UI.GetDeg()); for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { DelCom(u, CI.GetKey()); } } if (CIDSet.Empty()) { continue; } GradientForRow(u, GradV, CIDSet); if (Norm2(GradV) < 1e-4) { continue; } double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta); if (LearnRate == 0.0) { continue; } for (int ci = 0; ci < GradV.Len(); ci++) { int CID = GradV.GetKey(ci); double Change = LearnRate * GradV.GetDat(CID); double NewFuc = GetCom(u, CID) + Change; if (NewFuc <= 0.0) { DelCom(u, CID); } else { AddCom(u, CID, NewFuc); } } if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) { IterLV.Add(TIntFltPr(iter, Likelihood(false))); } } printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime); fflush(stdout); if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) { PrevIter = iter; CurL = Likelihood(); if (PrevL > TFlt::Mn && ! PlotNm.Empty()) { printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL); } fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } printf("\n"); printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr()); if (! PlotNm.Empty()) { TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); } return iter; }
int main(int argc, char* argv[]) { setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?) Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Node centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network"); const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (filename extensions added)"); const TStr BseFNm = OutFNm.RightOfLast('/'); const int k = Env.GetIfArgPrefixInt("-k:", 1, "depth of weighted degree distributions (1 / 2 / ...)"); const bool c = Env.GetIfArgPrefixBool("-c:", false, "collate centralities into matrix (T / F)"); // Load graph and create directed and undirected graphs (pointer to the same memory) printf("\nLoading %s...", InFNm.CStr()); PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm); printf(" DONE\n"); printf(" nodes: %d\n", WGraph->GetNodes()); printf(" edges: %d\n", WGraph->GetEdges()); printf(" time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); // Declare variables TIntFltVH FirstWDegVH; TIntFltVH kWInDegVH, kWOutDegVH, kWDegVH; TIntFltVH WDegCentrVH, WEigCentrVH; TFltV WEigDiffV; TIntFltH WPgRH; double WPgRDiff; TFltWNGraph::TNodeI NI; TFltV::TIter VI; // CENTRALITY (computations) // Weighted first degree distributions printf("\nComputing weighted degree distributions..."); TSnap::GetWDegVH(WGraph, FirstWDegVH); printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); // 1:k degree distributions printf("Computing egonet degrees for k = 1 to %d (in / out / undirected)\n", k); TSnap::TFixedMemorykWDeg<TFlt, TWNGraph> FixedMemorykWDeg(WGraph, k); printf(" ..."); FixedMemorykWDeg.GetkWInDegSeqH(kWInDegVH); printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); printf(" ..."); FixedMemorykWDeg.GetkWOutDegSeqH(kWOutDegVH); printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); printf(" ..."); FixedMemorykWDeg.GetkWDegSeqH(kWDegVH); printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); // Centrality measures printf("Computing weighted degree centrality..."); TSnap::GetWDegreeCentrVH(WGraph, WDegCentrVH, 0.5); printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); printf("Computing weighted eigenvector centrality..."); WEigDiffV = TSnap::GetWEigenVectorCentrVH<TFlt>(WGraph, WEigCentrVH, 1e-4, 1000); printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); printf(" convergence differences (in / out / undirected)\n"); printf(" %f\n", double(WEigDiffV[0])); printf(" %f\n", double(WEigDiffV[1])); printf(" %f\n", double(WEigDiffV[2])); printf("Computing weighted PageRank centrality..."); WPgRDiff = TSnap::GetWPageRank<TFlt>(WGraph, WPgRH, 0.85, 1e-4, 1000); printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); printf(" convergence difference: %f\n", double(WPgRDiff)); // OUTPUTTING (mostly verbose printing statements, don't get scared) if (c) { printf("\nSaving %s.wcentr...", BseFNm.CStr()); const TStr AggFNm = TStr::Fmt("%s.wcentr", OutFNm.CStr()); FILE *F = fopen(AggFNm.CStr(), "wt"); fprintf(F,"# Node centrality distributions on the directed / undirected graph (as applicable)\n"); fprintf(F,"# Nodes: %d\tEdges: %d\n", WGraph->GetNodes(), WGraph->GetEdges()); fprintf(F,"# NodeId\tWInDegCentr\tWOutDegCentr\tWDegCentr\tWInEigCentr\tWOutEigCentr\tWEigCentr\tWPgRCentr\n"); for (NI = WGraph->BegNI(); NI < WGraph->EndNI(); NI++) { const int NId = NI.GetId(); fprintf(F, "%d", NId); const TFltV WDegCentrV = WDegCentrVH.GetDat(NId); for (VI = WDegCentrV.BegI(); VI < WDegCentrV.EndI(); VI++) { fprintf(F, "\t%f", VI->Val); } const TFltV WEigCentrV = WEigCentrVH.GetDat(NId); for (VI = WEigCentrV.BegI(); VI < WEigCentrV.EndI(); VI++) { fprintf(F, "\t%f", VI->Val); } const double WPgRCentr = WPgRH.GetDat(NId); fprintf(F, "\t%f", WPgRCentr); fprintf(F, "\n"); } printf(" DONE\n"); } else { printf("\nSaving %s.wdeg.centr...", BseFNm.CStr()); TSnap::SaveTxt(WDegCentrVH, TStr::Fmt("%s.wdeg.centr", OutFNm.CStr()), "Weighted degree centrality (in / out / undirected)", "NodeId", "WInDegCentr\tWOutDegCentr\tWDegCentr"); printf(" DONE\n"); printf("Saving %s.weig...", BseFNm.CStr()); TSnap::SaveTxt(WEigCentrVH, TStr::Fmt("%s.weig", OutFNm.CStr()), "Weighted eigenvector centrality (in / out / undirected)", "NodeId", "WInEigCentr\tWOutEigCentr\tWEigCentr"); printf(" DONE\n"); printf("Saving %s.wpgr...", BseFNm.CStr()); TSnap::SaveTxt(WPgRH, TStr::Fmt("%s.wpgr", OutFNm.CStr()), "Weighted PageRank centrality (wpgr)", "NodeId", "WPageRank"); printf(" DONE\n"); } printf("Saving %s.wdeg...", BseFNm.CStr()); TSnap::SaveTxt(FirstWDegVH, TStr::Fmt("%s.wdeg", OutFNm.CStr()), "Weighted degree distributions (in / out / undirected)", "NodeId", "WInDeg\tWOutDeg\tWDeg"); printf(" DONE\n"); printf("Saving %s.kwdeg.IN...", BseFNm.CStr()); TSnap::SaveTxt(kWInDegVH, TStr::Fmt("%s.kwdeg.IN", OutFNm.CStr()), TStr::Fmt("1 to %d weighted in degree distributions (kdeg.IN)", k), "NodeId", "kWInDegH"); printf(" DONE\n"); printf("Saving %s.kwdeg.OUT...", BseFNm.CStr()); TSnap::SaveTxt(kWOutDegVH, TStr::Fmt("%s.kwdeg.OUT", OutFNm.CStr()), TStr::Fmt("1 to %d weighted out degree distributions (kdeg.OUT)", k), "NodeId", "kWOutDegH"); printf(" DONE\n"); printf("Saving %s.kwdeg...", BseFNm.CStr()); TSnap::SaveTxt(kWDegVH, TStr::Fmt("%s.kwdeg", OutFNm.CStr()), TStr::Fmt("1 to %d weighted degree distributions (kdeg)", k), "NodeId", "kWDegH"); printf(" DONE\n"); Catch printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }