void TStrParser::GetIDFWeightV(TFltV& WeightV) { int AlphN = GetAlphabetSize(); WeightV.Gen(AlphN); for (int AlphC = 0; AlphC < AlphN; AlphC++) WeightV[AlphC] = log((double)DocsParsed / WordToIdH[AlphC]); double MaxVal = WeightV[WeightV.GetMxValN()]; for (int AlphC = 0; AlphC < AlphN; AlphC++) WeightV[AlphC] /= MaxVal; }
void TBowLinAlg::GetDual(const PBowDocWgtBs& X, const TFltV& x, TFltV& y, const int& _Docs) { const int Docs = (_Docs == -1) ? X->GetDocs() : _Docs; y.Gen(Docs, 0); // prepare space for (int DId = 0; DId < Docs; DId++) { y.Add(TBowLinAlg::DotProduct(x, X->GetSpV(DId))); } }
static void ConjugGrad(const TMatrix& Matrix, const TFltV& b, TFltV& x, const int& CGMxIter, const double& RelErr, const TFltV& x0) { // prepare start vector x.Gen(Matrix.GetCols()); if (x0.Empty()) { x.PutAll(0.0); } else { x = x0; } // do the magic }
/// Sample random point from the surface of a Dim-dimensional unit sphere. void GetSphereDev(const int& Dim, TRnd& Rnd, TFltV& ValV) { if (ValV.Len() != Dim) { ValV.Gen(Dim); } double Length = 0.0; for (int i = 0; i < Dim; i++) { ValV[i] = Rnd.GetNrmDev(); Length += TMath::Sqr(ValV[i]); } Length = 1.0 / sqrt(Length); for (int i = 0; i < Dim; i++) { ValV[i] *= Length; } }
void TLogRegFit::Gradient(TFltV& GradV) { TFltV OutV; TLogRegPredict::GetCfy(X, OutV, Theta); GradV.Gen(M); for (int r = 0; r < X.Len(); r++) { //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val); for (int m = 0; m < M; m++) { GradV[m] += (Y[r] - OutV[r]) * X[r][m]; } } //for (int m = 0; m < M; m++) { printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); } }
TBowMatrix::TBowMatrix(PBowDocBs BowDocBs, PBowDocWgtBs BowDocWgtBs, const TStr& CatNm, const TIntV& DIdV, TFltV& ClsV): TMatrix() { RowN = BowDocBs->GetWords(); ClsV.Gen(DIdV.Len(), 0); ColSpVV.Gen(DIdV.Len(), 0); IAssert(BowDocBs->IsCatNm(CatNm)); int CatId = BowDocBs->GetCId(CatNm); for (int i = 0; i < DIdV.Len(); i++) { ColSpVV.Add(BowDocWgtBs->GetSpV(DIdV[i])); ClsV.Add(BowDocBs->IsCatInDoc(DIdV[i], CatId) ? 0.99 : -0.99); } }
double TAGMFast::LikelihoodForRow(const int UID, const TIntFltH& FU) { double L = 0.0; TFltV HOSumFV; //adjust for Fv of v hold out if (HOVIDSV[UID].Len() > 0) { HOSumFV.Gen(SumFV.Len()); for (int e = 0; e < HOVIDSV[UID].Len(); e++) { for (int c = 0; c < SumFV.Len(); c++) { HOSumFV[c] += GetCom(HOVIDSV[UID][e], c); } } } TUNGraph::TNodeI NI = G->GetNI(UID); if (DoParallel && NI.GetDeg() > 10) { #pragma omp parallel for schedule(static, 1) for (int e = 0; e < NI.GetDeg(); e++) { int v = NI.GetNbrNId(e); if (v == UID) { continue; } if (HOVIDSV[UID].IsKey(v)) { continue; } double LU = log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]); #pragma omp atomic L += LU; } for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) { double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist double LU = NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat(); L -= LU; } } else { for (int e = 0; e < NI.GetDeg(); e++) { int v = NI.GetNbrNId(e); if (v == UID) { continue; } if (HOVIDSV[UID].IsKey(v)) { continue; } L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]); } for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) { double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat(); } } //add regularization if (RegCoef > 0.0) { //L1 L -= RegCoef * Sum(FU); } if (RegCoef < 0.0) { //L2 L += RegCoef * Norm2(FU); } return L; }
void TLogReg::IRLS(const TMatrix& Matrix, TFltV& y, TFltV& bb, const double& ChangeEps, const int& MaxStep, const int& Verb) { IAssert(Matrix.GetCols() == y.Len()); int M = Matrix.GetRows(), R = Matrix.GetCols(), i; if (bb.Len() != M+1) { bb.Gen(M+1); bb.PutAll(0.0); } TFltV mu(R), w(R), z(R), delta; // adjust y for (i = 0; i < R; i++) { if (y[i] >= 1.0) y[i] = 0.999; else if (y[i] <= 0.0) y[i] = 0.001; } //const double eps = 0.01; double NewDEV = 0.0, OldDEV = -100.0; forever { Matrix.MultiplyT(bb, z); for (i = 0; i < R; i++) { z[i] += bb[M]; // evaluate current model mu[i] = 1/(1 + exp(-z[i])); // calculate weights w[i] = mu[i] * (1 - mu[i]); // calculate adjusted dependent variables z[i] += (y[i] - mu[i]) / w[i]; } // get new aproximation for bb CG(Matrix, w, z, bb, MaxStep, Verb); // calculate deviance (error measurement) NewDEV = 0.0; for (i = 0; i < R; i++) { double yi = y[i], mui = mu[i]; NewDEV += yi*log(yi / mui) + (1 - yi)*log((1 - yi)/(1 - mui)); } if (Verb == 1) printf(" -> %.5f\n", NewDEV); else if (Verb > 1) printf("NewDEV = %.5f\n", NewDEV); // do we stop? if (fabs(NewDEV - OldDEV) < ChangeEps) break; OldDEV = NewDEV; } }
// Gradient of likelihood for P_c. void TAGMFit::GradLogLForLambda(TFltV& GradV) { GradV.Gen(LambdaV.Len()); TFltV SumEdgeProbsV(LambdaV.Len()); for (int e = 0; e < EdgeComVH.Len(); e++) { TIntSet& JointCom = EdgeComVH[e]; double LambdaSum = SelectLambdaSum(JointCom); double Puv = 1 - exp(- LambdaSum); if (JointCom.Len() == 0) { Puv = PNoCom; } for (TIntSet::TIter SI = JointCom.BegI(); SI < JointCom.EndI(); SI++) { SumEdgeProbsV[SI.GetKey()] += (1 - Puv) / Puv; } } for (int k = 0; k < LambdaV.Len(); k++) { int MaxEk = CIDNSetV[k].Len() * (CIDNSetV[k].Len() - 1) / 2; int NotEdgesInCom = MaxEk - ComEdgesV[k]; GradV[k] = SumEdgeProbsV[k] - (double) NotEdgesInCom; if (LambdaV[k] > 0.0 && RegCoef > 0.0) { //if regularization exists GradV[k] -= RegCoef; } } }
void TAGMFit::GetCmtyVV(TVec<TIntV>& CmtyVV, TFltV& QV, const double QMax) { CmtyVV.Gen(CIDNSetV.Len(), 0); QV.Gen(CIDNSetV.Len(), 0); TIntFltH CIDLambdaH(CIDNSetV.Len()); for (int c = 0; c < CIDNSetV.Len(); c++) { CIDLambdaH.AddDat(c, LambdaV[c]); } CIDLambdaH.SortByDat(false); for (int c = 0; c < CIDNSetV.Len(); c++) { int CID = CIDLambdaH.GetKey(c); IAssert(LambdaV[CID] >= MinLambda); double Q = exp( - (double) LambdaV[CID]); if (Q > QMax) { continue; } TIntV CmtyV; CIDNSetV[CID].GetKeyV(CmtyV); if (CmtyV.Len() == 0) { continue; } if (CID == BaseCID) { //if the community is the base community(epsilon community), discard IAssert(CmtyV.Len() == G->GetNodes()); } else { CmtyVV.Add(CmtyV); QV.Add(Q); } } }
void TAGMFast::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) { GradU.Gen(CIDSet.Len()); TFltV HOSumFV; //adjust for Fv of v hold out if (HOVIDSV[UID].Len() > 0) { HOSumFV.Gen(SumFV.Len()); for (int e = 0; e < HOVIDSV[UID].Len(); e++) { for (int c = 0; c < SumFV.Len(); c++) { HOSumFV[c] += GetCom(HOVIDSV[UID][e], c); } } } TUNGraph::TNodeI NI = G->GetNI(UID); int Deg = NI.GetDeg(); TFltV PredV(Deg), GradV(CIDSet.Len()); TIntV CIDV(CIDSet.Len()); if (DoParallel && Deg + CIDSet.Len() > 10) { #pragma omp parallel for schedule(static, 1) for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } #pragma omp parallel for schedule(static, 1) for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } else { for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } //add regularization if (RegCoef > 0.0) { //L1 for (int c = 0; c < GradV.Len(); c++) { GradV[c] -= RegCoef; } } if (RegCoef < 0.0) { //L2 for (int c = 0; c < GradV.Len(); c++) { GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); } } for (int c = 0; c < GradV.Len(); c++) { if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; } if (fabs(GradV[c]) < 0.0001) { continue; } GradU.AddDat(CIDV[c], GradV[c]); } for (int c = 0; c < GradU.Len(); c++) { if (GradU[c] >= 10) { GradU[c] = 10; } if (GradU[c] <= -10) { GradU[c] = -10; } IAssert(GradU[c] >= -10); } }
// Returns \v QV, a vector of (1 - p_c) for each community c. void TAGMFit::GetQV(TFltV& OutV) { OutV.Gen(LambdaV.Len()); for (int i = 0; i < LambdaV.Len(); i++) { OutV[i] = exp(- LambdaV[i]); } }
void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) { OutV.Gen(X.Len()); for (int r = 0; r < X.Len(); r++) { OutV[r] = GetCfy(X[r], NewTheta); } }