void GetEigenVectorCentr(const PUNGraph& Graph, TIntFltH& EigenH, const double& Eps, const int& MaxIter) { const int NNodes = Graph->GetNodes(); EigenH.Gen(NNodes); for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { EigenH.AddDat(NI.GetId(), 1.0/NNodes); IAssert(NI.GetId() == EigenH.GetKey(EigenH.Len()-1)); } TFltV TmpV(NNodes); double diff = TFlt::Mx; for (int iter = 0; iter < MaxIter; iter++) { int j = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { TmpV[j] += EigenH.GetDat(NI.GetOutNId(e)); } } double sum = 0; for (int i = 0; i < TmpV.Len(); i++) { EigenH[i] = TmpV[i]; sum += EigenH[i]; } for (int i = 0; i < EigenH.Len(); i++) { EigenH[i] /= sum; } if (fabs(diff-sum) < Eps) { break; } //printf("\tdiff:%f\tsum:%f\n", fabs(diff-sum), sum); diff = sum; } }
void GetEigenVectorCentr(const PUNGraph& Graph, TIntFltH& NIdEigenH, const double& Eps, const int& MaxIter) { const int NNodes = Graph->GetNodes(); NIdEigenH.Gen(NNodes); // initialize vector values for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NIdEigenH.AddDat(NI.GetId(), 1.0 / NNodes); IAssert(NI.GetId() == NIdEigenH.GetKey(NIdEigenH.Len() - 1)); } TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { int j = 0; // add neighbor values for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { TmpV[j] += NIdEigenH.GetDat(NI.GetOutNId(e)); } } // normalize double sum = 0; for (int i = 0; i < TmpV.Len(); i++) { sum += (TmpV[i] * TmpV[i]); } sum = sqrt(sum); for (int i = 0; i < TmpV.Len(); i++) { TmpV[i] /= sum; } // compute difference double diff = 0.0; j = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { diff += fabs(NIdEigenH.GetDat(NI.GetId()) - TmpV[j]); } // set new values j = 0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { NIdEigenH.AddDat(NI.GetId(), TmpV[j]); } if (diff < Eps) { break; } } }
int GetWeightedPageRankMP1(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) { if (!Graph->IsFltAttrE(Attr)) return -1; TFltV Weights = Graph->GetFltAttrVecE(Attr); int mxid = Graph->GetMxNId(); TFltV OutWeights(mxid); Graph->GetWeightOutEdgesV(OutWeights, Weights); /*for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { OutWeights[NI.GetId()] = Graph->GetWeightOutEdges(NI, Attr); }*/ /*TIntFltH Weights; for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { Weights.AddDat(NI.GetId(), Graph->GetWeightOutEdges(NI, Attr)); }*/ const int NNodes = Graph->GetNodes(); TVec<TNEANet::TNodeI> NV; //const double OneOver = 1.0/double(NNodes); PRankH.Gen(NNodes); for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NV.Add(NI); PRankH.AddDat(NI.GetId(), 1.0/NNodes); //IAssert(NI.GetId() == PRankH.GetKey(PRankH.Len()-1)); } TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { #pragma omp parallel for schedule(dynamic,10000) for (int j = 0; j < NNodes; j++) { TNEANet::TNodeI NI = NV[j]; TmpV[j] = 0; for (int e = 0; e < NI.GetInDeg(); e++) { const int InNId = NI.GetInNId(e); const TFlt OutWeight = OutWeights[InNId]; int EId = Graph->GetEId(InNId, NI.GetId()); const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)]; if (OutWeight > 0) { TmpV[j] += PRankH.GetDat(InNId) * Weight / OutWeight; } } TmpV[j] = C*TmpV[j]; // Berkhin (the correct way of doing it) //TmpV[j] = C*TmpV[j] + (1.0-C)*OneOver; // iGraph } double diff=0, sum=0, NewVal; #pragma omp parallel for reduction(+:sum) schedule(dynamic,10000) for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; } const double Leaked = (1.0-sum) / double(NNodes); #pragma omp parallel for reduction(+:diff) schedule(dynamic,10000) for (int i = 0; i < PRankH.Len(); i++) { // re-instert leaked PageRank NewVal = TmpV[i] + Leaked; // Berkhin //NewVal = TmpV[i] / sum; // iGraph diff += fabs(NewVal-PRankH[i]); PRankH[i] = NewVal; } if (diff < Eps) { break; } } return 0; }
void GetBetweennessCentr(const PUNGraph& Graph, const TIntV& BtwNIdV, TIntFltH& NodeBtwH, const bool& DoNodeCent, TIntPrFltH& EdgeBtwH, const bool& DoEdgeCent) { if (DoNodeCent) { NodeBtwH.Clr(); } if (DoEdgeCent) { EdgeBtwH.Clr(); } const int nodes = Graph->GetNodes(); TIntS S(nodes); TIntQ Q(nodes); TIntIntVH P(nodes); // one vector for every node TIntFltH delta(nodes); TIntH sigma(nodes), d(nodes); // init for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { if (DoNodeCent) { NodeBtwH.AddDat(NI.GetId(), 0); } if (DoEdgeCent) { for (int e = 0; e < NI.GetOutDeg(); e++) { if (NI.GetId() < NI.GetOutNId(e)) { EdgeBtwH.AddDat(TIntPr(NI.GetId(), NI.GetOutNId(e)), 0); } } } sigma.AddDat(NI.GetId(), 0); d.AddDat(NI.GetId(), -1); P.AddDat(NI.GetId(), TIntV()); delta.AddDat(NI.GetId(), 0); } // calc betweeness for (int k = 0; k < BtwNIdV.Len(); k++) { const TUNGraph::TNodeI NI = Graph->GetNI(BtwNIdV[k]); // reset for (int i = 0; i < sigma.Len(); i++) { sigma[i] = 0; d[i] = -1; delta[i] = 0; P[i].Clr(false); } S.Clr(false); Q.Clr(false); sigma.AddDat(NI.GetId(), 1); d.AddDat(NI.GetId(), 0); Q.Push(NI.GetId()); while (!Q.Empty()) { const int v = Q.Top(); Q.Pop(); const TUNGraph::TNodeI NI2 = Graph->GetNI(v); S.Push(v); const int VDat = d.GetDat(v); for (int e = 0; e < NI2.GetOutDeg(); e++) { const int w = NI2.GetOutNId(e); if (d.GetDat(w) < 0) { // find w for the first time Q.Push(w); d.AddDat(w, VDat + 1); } //shortest path to w via v ? if (d.GetDat(w) == VDat + 1) { sigma.AddDat(w) += sigma.GetDat(v); P.GetDat(w).Add(v); } } } while (!S.Empty()) { const int w = S.Top(); const double SigmaW = sigma.GetDat(w); const double DeltaW = delta.GetDat(w); const TIntV NIdV = P.GetDat(w); S.Pop(); for (int i = 0; i < NIdV.Len(); i++) { const int nid = NIdV[i]; const double c = (sigma.GetDat(nid)*1.0 / SigmaW) * (1 + DeltaW); delta.AddDat(nid) += c; if (DoEdgeCent) { EdgeBtwH.AddDat(TIntPr(TMath::Mn(nid, w), TMath::Mx(nid, w))) += c; } } if (DoNodeCent && w != NI.GetId()) { NodeBtwH.AddDat(w) += delta.GetDat(w) / 2.0; } } } }
void TNetInfBs::GenCascade(TCascade& C, const int& TModel, const double &window, TIntPrIntH& EdgesUsed, const double& delta, const double& std_waiting_time, const double& std_beta) { TIntFltH InfectedNIdH; TIntH InfectedBy; double GlobalTime; int StartNId; double alpha, beta; if (GroundTruth->GetNodes() == 0) return; while (C.Len() < 2) { C.Clr(); InfectedNIdH.Clr(); InfectedBy.Clr(); GlobalTime = 0; StartNId = GroundTruth->GetRndNId(); InfectedNIdH.AddDat(StartNId) = GlobalTime; while (true) { // sort by time & get the oldest node that did not run infection InfectedNIdH.SortByDat(true); const int& NId = InfectedNIdH.BegI().GetKey(); GlobalTime = InfectedNIdH.BegI().GetDat(); // all the nodes has run infection if (GlobalTime >= window) break; // add current oldest node to the network and set its time C.Add(NId, GlobalTime); // run infection from the current oldest node const TNGraph::TNodeI NI = GroundTruth->GetNI(NId); for (int e = 0; e < NI.GetOutDeg(); e++) { const int DstNId = NI.GetOutNId(e); beta = Betas.GetDat(TIntPr(NId, DstNId)); // flip biased coin (set by beta) if (TInt::Rnd.GetUniDev() > beta+std_beta*TFlt::Rnd.GetNrmDev()) continue; alpha = Alphas.GetDat(TIntPr(NId, DstNId)); // not infecting the parent if (InfectedBy.IsKey(NId) && InfectedBy.GetDat(NId).Val == DstNId) continue; double sigmaT; switch (TModel) { case 0: // exponential with alpha parameter sigmaT = TInt::Rnd.GetExpDev(alpha); break; case 1: // power-law with alpha parameter sigmaT = TInt::Rnd.GetPowerDev(alpha); while (sigmaT < delta) { sigmaT = TInt::Rnd.GetPowerDev(alpha); } break; case 2: // rayleigh with alpha parameter sigmaT = TInt::Rnd.GetRayleigh(1/sqrt(alpha)); break; default: sigmaT = 1; break; } // avoid negative time diffs in case of noise if (std_waiting_time > 0) sigmaT = TFlt::GetMx(0.0, sigmaT + std_waiting_time*TFlt::Rnd.GetNrmDev()); double t1 = GlobalTime + sigmaT; if (InfectedNIdH.IsKey(DstNId)) { double t2 = InfectedNIdH.GetDat(DstNId); if (t2 > t1 && t2 != window) { InfectedNIdH.GetDat(DstNId) = t1; InfectedBy.GetDat(DstNId) = NId; } } else { InfectedNIdH.AddDat(DstNId) = t1; InfectedBy.AddDat(DstNId) = NId; } } // we cannot delete key (otherwise, we cannot sort), so we assign a big time (window cut-off) InfectedNIdH.GetDat(NId) = window; } } C.Sort(); for (TIntH::TIter EI = InfectedBy.BegI(); EI < InfectedBy.EndI(); EI++) { TIntPr Edge(EI.GetDat().Val, EI.GetKey().Val); if (!EdgesUsed.IsKey(Edge)) EdgesUsed.AddDat(Edge) = 0; EdgesUsed.GetDat(Edge) += 1; } }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Inverse PageRank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File" ); const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File"); FILE* fpI = fopen(Iput.CStr(), "r"); FILE* fpO = fopen(Oput.CStr(), "w"); const double C = 0.85; const int MaxIter = 50; const double Eps = 1e-9; PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput); fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges()); const int NNodes = Graph->GetNodes(); const double OneOver = (double) 1.0 / (double) NNodes; TIntFltH PRankH; PRankH.Gen(NNodes); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) PRankH.AddDat(NI.GetId(), OneOver); TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { int j = 0; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { const int OutNId = NI.GetOutNId(e); const int InDeg = Graph->GetNI(OutNId).GetInDeg(); if (InDeg > 0) TmpV[j] += PRankH.GetDat(OutNId) / InDeg; } TmpV[j] = C * TmpV[j]; } for (int i = 0; i < PRankH.Len(); i++) PRankH[i] = TmpV[i]; /* double diff = 0, sum = 0, NewVal; for (int i = 0; i < TmpV.Len(); i++) sum += TmpV[i]; const double Leaked = (double) (1.0 - sum) / (double) NNodes; for (int i = 0; i < PRankH.Len(); i++) { NewVal = TmpV[i] + Leaked; diff += fabs(NewVal - PRankH[i]); PRankH[i] = NewVal; } if (diff < Eps) break; */ } fprintf(fpO, "Node ID\t\tInverse PageRank\n"); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){ int Id = NI.GetId(); double ipr = PRankH.GetDat(Id); fprintf(fpO, "%d\t\t\t%.5lf\n", Id, ipr); } Catch printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int GetWeightedPageRankMP2(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) { if (!Graph->IsFltAttrE(Attr)) return -1; const int NNodes = Graph->GetNodes(); TVec<TNEANet::TNodeI> NV; //const double OneOver = 1.0/double(NNodes); PRankH.Gen(NNodes); int MxId; for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { NV.Add(NI); PRankH.AddDat(NI.GetId(), 1.0/NNodes); int Id = NI.GetId(); if (Id > MxId) { MxId = Id; } } TFltV PRankV(MxId+1); TFltV OutWeights(MxId+1); TFltV Weights = Graph->GetFltAttrVecE(Attr); #pragma omp parallel for schedule(dynamic,10000) for (int j = 0; j < NNodes; j++) { TNEANet::TNodeI NI = NV[j]; int Id = NI.GetId(); OutWeights[Id] = Graph->GetWeightOutEdges(NI, Attr); PRankV[Id] = 1/NNodes; } TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { #pragma omp parallel for schedule(dynamic,10000) for (int j = 0; j < NNodes; j++) { TNEANet::TNodeI NI = NV[j]; TFlt Tmp = 0; for (int e = 0; e < NI.GetInDeg(); e++) { const int InNId = NI.GetInNId(e); const TFlt OutWeight = OutWeights[InNId]; int EId = Graph->GetEId(InNId, NI.GetId()); const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)]; if (OutWeight > 0) { Tmp += PRankH.GetDat(InNId) * Weight / OutWeight; } } TmpV[j] = C*Tmp; // Berkhin (the correct way of doing it) //TmpV[j] = C*TmpV[j] + (1.0-C)*OneOver; // iGraph } double sum = 0; #pragma omp parallel for reduction(+:sum) schedule(dynamic,10000) for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; } const double Leaked = (1.0-sum) / double(NNodes); double diff = 0; #pragma omp parallel for reduction(+:diff) schedule(dynamic,10000) for (int i = 0; i < NNodes; i++) { TNEANet::TNodeI NI = NV[i]; double NewVal = TmpV[i] + Leaked; // Berkhin //NewVal = TmpV[i] / sum; // iGraph int Id = NI.GetId(); diff += fabs(NewVal-PRankV[Id]); PRankV[Id] = NewVal; } if (diff < Eps) { break; } } #pragma omp parallel for schedule(dynamic,10000) for (int i = 0; i < NNodes; i++) { TNEANet::TNodeI NI = NV[i]; PRankH[i] = PRankV[NI.GetId()]; } return 0; }
int main(int argc, char* argv[]) { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("Trust Rank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TExeTm ExeTm; Try const TStr Gnod = Env.GetIfArgPrefixStr("-g:", "Gnode.txt", "Good Nodes"); const TStr Bnod = Env.GetIfArgPrefixStr("-b:", "Bnode.txt", "Bad Nodes" ); const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File"); const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File"); const double C = 0.85; const int MaxIter = 50; const double Eps = 1e-9; FILE* fpO = fopen(Oput.CStr(), "w"); PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput); fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges()); const int NNodes = Graph->GetNodes(); TIntFltH TRankH; TRankH.Gen(NNodes); int maxNId = 0, NId = 0, ret = 0; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) maxNId = max(maxNId, NI.GetId()); TFltV initialTrustScore(maxNId + 1); for (int i = 0; i < initialTrustScore.Len(); i++) initialTrustScore[i] = 0.5; FILE* fpI = fopen(Gnod.CStr(), "r"); while (true) { ret = fscanf(fpI, "%d", &NId); if (ret == EOF) break; if (Graph->IsNode(NId)) initialTrustScore[NId] = 1.0; } fclose(fpI); fpI = fopen(Bnod.CStr(), "r"); while (true) { ret = fscanf(fpI, "%d", &NId); if (ret == EOF) break; if (Graph->IsNode(NId)) initialTrustScore[NId] = 0.0; } fclose(fpI); double Tot = 0.0; for(int i = 0; i < initialTrustScore.Len(); i++) Tot += initialTrustScore[i]; for(int i = 0; i < initialTrustScore.Len(); i++) initialTrustScore[i] /= Tot; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) TRankH.AddDat( NI.GetId(), initialTrustScore[NI.GetId()] ); TFltV TmpV(NNodes); for (int iter = 0; iter < MaxIter; iter++) { int j = 0; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) { TmpV[j] = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { const int OutNId = NI.GetOutNId(e); const int InDeg = Graph->GetNI(InNId).GetInDeg(); if (InDeg > 0) TmpV[j] += (double) TRankH.GetDat(OutNId) / (double) InDeg; } TmpV[j] = C * TmpV[j] + (1.0 - C) * initialTrustScore[NI.GetId()]; } for (int i = 0; i < TRankH.Len(); i++) TRankH[i] = TmpV[i]; } fprintf(fpO, "Node ID\t\tTrustRank\n"); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){ int Id = NI.GetId(); double tr = TRankH.GetDat(Id); fprintf(fpO, "%d\t\t\t%.5lf\n", Id, tr); } fclose(fpO); Catch printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int TAGMFast::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) { //parallel time_t InitTime = time(NULL); uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs(); TExeTm ExeTm, CheckTm; double PrevL = Likelihood(true); TIntFltPrV IterLV; int PrevIter = 0; int iter = 0; TIntV NIdxV(F.Len(), 0); for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); } TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization NIDOPTV.PutAll(0); TVec<TIntFltH> NewF(ChunkNum * ChunkSize); TIntV NewNIDV(ChunkNum * ChunkSize); for (iter = 0; iter < MaxIter; iter++) { NIdxV.Clr(false); for (int i = 0; i < F.Len(); i++) { if (NIDOPTV[i] == 0) { NIdxV.Add(i); } } IAssert (NIdxV.Len() <= F.Len()); NIdxV.Shuffle(Rnd); // compute gradient for chunk of nodes #pragma omp parallel for schedule(static, 1) for (int TIdx = 0; TIdx < ChunkNum; TIdx++) { TIntFltH GradV; for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) { NewNIDV[ui] = -1; if (ui > NIdxV.Len()) { continue; } int u = NIdxV[ui]; // //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) TUNGraph::TNodeI UI = G->GetNI(u); TIntSet CIDSet(5 * UI.GetDeg()); TIntFltH CurFU = F[u]; for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } if (CIDSet.Empty()) { CurFU.Clr(); } else { for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { CurFU.DelIfKey(CI.GetKey()); } } GradientForRow(u, GradV, CIDSet); if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; } double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta, 5); if (LearnRate <= 1e-5) { NewNIDV[ui] = -2; continue; } for (int ci = 0; ci < GradV.Len(); ci++) { int CID = GradV.GetKey(ci); double Change = LearnRate * GradV.GetDat(CID); double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change; if (NewFuc <= 0.0) { CurFU.DelIfKey(CID); } else { CurFU.AddDat(CID) = NewFuc; } } CurFU.Defrag(); } //store changes NewF[ui] = CurFU; NewNIDV[ui] = u; } } int NumNoChangeGrad = 0; int NumNoChangeStepSize = 0; for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID == -1) { NumNoChangeGrad++; continue; } if (NewNID == -2) { NumNoChangeStepSize++; continue; } for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) { SumFV[CI.GetKey()] -= CI.GetDat(); } } #pragma omp parallel for for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } F[NewNID] = NewF[ui]; } for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) { SumFV[CI.GetKey()] += CI.GetDat(); } } // update the nodes who are optimal for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } TUNGraph::TNodeI UI = G->GetNI(NewNID); NIDOPTV[NewNID] = 0; for (int e = 0; e < UI.GetDeg(); e++) { NIDOPTV[UI.GetNbrNId(e)] = 0; } } int OPTCnt = 0; for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } } if (! PlotNm.Empty()) { printf("\r%d iterations [%s] %d secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), int(TSecTm::GetCurTm().GetAbsSecs() - StartTm)); if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); } fflush(stdout); } if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) { PrevIter = iter; double CurL = Likelihood(true); IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL)); printf("\r%d iterations, Likelihood: %f, Diff: %f [%d secs]", iter, CurL, CurL - PrevL, int(time(NULL) - InitTime)); fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } if (! PlotNm.Empty()) { printf("\nMLE completed with %d iterations(%s secs)\n", iter, int(TSecTm::GetCurTm().GetAbsSecs() - StartTm)); TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");[] } else {
void TAGMFast::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) { GradU.Gen(CIDSet.Len()); TFltV HOSumFV; //adjust for Fv of v hold out if (HOVIDSV[UID].Len() > 0) { HOSumFV.Gen(SumFV.Len()); for (int e = 0; e < HOVIDSV[UID].Len(); e++) { for (int c = 0; c < SumFV.Len(); c++) { HOSumFV[c] += GetCom(HOVIDSV[UID][e], c); } } } TUNGraph::TNodeI NI = G->GetNI(UID); int Deg = NI.GetDeg(); TFltV PredV(Deg), GradV(CIDSet.Len()); TIntV CIDV(CIDSet.Len()); if (DoParallel && Deg + CIDSet.Len() > 10) { #pragma omp parallel for schedule(static, 1) for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } #pragma omp parallel for schedule(static, 1) for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } else { for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } //add regularization if (RegCoef > 0.0) { //L1 for (int c = 0; c < GradV.Len(); c++) { GradV[c] -= RegCoef; } } if (RegCoef < 0.0) { //L2 for (int c = 0; c < GradV.Len(); c++) { GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); } } for (int c = 0; c < GradV.Len(); c++) { if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; } if (fabs(GradV[c]) < 0.0001) { continue; } GradU.AddDat(CIDV[c], GradV[c]); } for (int c = 0; c < GradU.Len(); c++) { if (GradU[c] >= 10) { GradU[c] = 10; } if (GradU[c] <= -10) { GradU[c] = -10; } IAssert(GradU[c] >= -10); } }
///////////////////////////////////////////////// // NIST-score double TEvalScoreNist::Eval(const PTransCorpus& TransCorpus, const TIntV& _SentIdV) { // check if the corpus has translations IAssert(TransCorpus->IsTrans()); // ngram counts (cliped and full) TIntH ClipCountNGramH, CountNGramH; // ngram info score TIntFltH NGramInfoH; // candidate and effective reference length double FullTransLen = 0.0, FullRefLen = 0.0; // iterate over sentences TIntV SentIdV = _SentIdV; if (SentIdV.Empty()) { TransCorpus->GetSentIdV(SentIdV); } const int Sents = SentIdV.Len(); for (int SentIdN = 0; SentIdN < Sents; SentIdN++) { const int SentId = SentIdV[SentIdN]; // tokenize translation TIntV TransWIdV; Parse(TransCorpus->GetTransStr(SentId), TransWIdV); TIntH TransNGramH; GetNGramH(TransWIdV, MxNGramLen, TransNGramH); TIntH FreeTransNGramH = TransNGramH; // number of non-matched ngrams // counters for getting the closest length of reference sentences const int TransLen = TransWIdV.Len(); int RefLenSum = 0; // go over reference translations and count ngram matches TStrV RefTransStrV = TransCorpus->GetRefTransStrV(SentId); // we assume that there is at least one reference translation IAssert(!RefTransStrV.Empty()); for (int RefN = 0; RefN < RefTransStrV.Len(); RefN++) { // parse reference translation sentence TIntV RefWIdV; Parse(RefTransStrV[RefN], RefWIdV); TIntH RefNGramH; GetNGramH(RefWIdV, MxNGramLen, RefNGramH); // check for matches int TransNGramKeyId = TransNGramH.FFirstKeyId(); while(TransNGramH.FNextKeyId(TransNGramKeyId)) { const int NGramId = TransNGramH.GetKey(TransNGramKeyId); const int FreeTransNGrams = FreeTransNGramH(NGramId); if (RefNGramH.IsKey(NGramId) && (FreeTransNGrams>0)) { // ngram match and still some free ngrams left to clip const int RefNGrams = RefNGramH(NGramId); FreeTransNGramH(NGramId) = TInt::GetMx(0, FreeTransNGrams - RefNGrams); } } // check the length difference const int RefLen = RefWIdV.Len(); RefLenSum += RefLen; } // count ngrams int TransNGramKeyId = TransNGramH.FFirstKeyId(); while(TransNGramH.FNextKeyId(TransNGramKeyId)) { // get ngram const int NGramId = TransNGramH.GetKey(TransNGramKeyId); IAssert(NGramId != -1); // check if two hash tables are aligned (should be...) const int FreeNGramId = FreeTransNGramH.GetKey(TransNGramKeyId); IAssert(NGramId == FreeNGramId); // get ngram count and clip-count const int Count = TransNGramH[TransNGramKeyId]; const int ClipCount = Count - FreeTransNGramH[TransNGramKeyId]; // add ngram to the coprus ngram counts CountNGramH.AddDat(NGramId) += Count; ClipCountNGramH.AddDat(NGramId) += ClipCount; } // count length FullTransLen += double(TransLen); FullRefLen += double(RefLenSum) / double(RefTransStrV.Len()); } // calculate ngram info scores int CountKeyId = CountNGramH.FFirstKeyId(); while (CountNGramH.FNextKeyId(CountKeyId)) { // get the n-gram const int NGramId = CountNGramH.GetKey(CountKeyId); TIntV NGram = GetNGram(NGramId); // prepare counts if (NGram.Len() == 1) { // n-gram is a word const int WordCount = CountNGramH[CountKeyId]; const double NGramInfoScore = TMath::Log2(FullTransLen / double(WordCount)); NGramInfoH.AddDat(NGramId, NGramInfoScore); } else { // more then one word in the n-gram // get a n-gram with removed last element TIntV N1Gram = NGram; N1Gram.DelLast(); const int N1GramId = NGramH.GetKeyId(N1Gram); // get the counts const int NGramCount = CountNGramH(NGramId); const int N1GramCount = CountNGramH(N1GramId); // get the score const double NGramInfoScore = TMath::Log2(double(N1GramCount) / double(NGramCount)); NGramInfoH.AddDat(NGramId, NGramInfoScore); } } // calcualte ngram precisions TFltV ClipCountV(MxNGramLen); ClipCountV.PutAll(0); int ClipCountKeyId = ClipCountNGramH.FFirstKeyId(); while (ClipCountNGramH.FNextKeyId(ClipCountKeyId)) { const int NGramId = ClipCountNGramH.GetKey(ClipCountKeyId); const int NGramLen = GetNGramLen(NGramId); const double NGramInfo = NGramInfoH(NGramId); IAssert(0 < NGramLen && NGramLen <= MxNGramLen); const int ClipCountNGram = ClipCountNGramH[ClipCountKeyId]; ClipCountV[NGramLen-1] += double(ClipCountNGram) * NGramInfo; } TIntV CountV(MxNGramLen); CountV.PutAll(0); CountKeyId = CountNGramH.FFirstKeyId(); while (CountNGramH.FNextKeyId(CountKeyId)) { const int NGramId = CountNGramH.GetKey(CountKeyId); const int NGramLen = GetNGramLen(NGramId); IAssert(0 < NGramLen && NGramLen <= MxNGramLen); CountV[NGramLen-1] += CountNGramH[CountKeyId]; } TFltV PrecV(MxNGramLen, 0); for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) { const double ClipCount = ClipCountV[NGramLen]; const int Count = CountV[NGramLen]; const double Prec = (Count > 0) ? ClipCount / double(Count) : 0.0; PrecV.Add(Prec); } // calcualte brevity penalty const double LenFrac = double(FullTransLen)/double(FullRefLen); double BP = 0.0; if (LenFrac >= 1.0) { BP = 1.0; } else if (LenFrac <= 0.0) { BP = 0.0; } else { // calculate beta const double LenFracX = 1.5, BPX = 0.5; const double Beta = log(BPX) / TMath::Sqr(log(LenFracX)); // calculate BP score BP = exp(Beta * TMath::Sqr(log(LenFrac))); } // calculate full NIST score double NistScore = 0.0; for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) { NistScore += PrecV[NGramLen]; } NistScore *= BP; printf("NIST Score: %.5f\n", NistScore); // done! return NistScore; }
void AddQ(const int& NId, const double& Q) { NIdQH.AddDat(NId, Q); if (MxQId==-1 || NIdQH[MxQId]<Q) { MxQId=NIdQH.GetKeyId(NId); } }