Exemplo n.º 1
0
void TStrParser::GetIDFWeightV(TFltV& WeightV) {
    int AlphN = GetAlphabetSize();
    WeightV.Gen(AlphN);
    for (int AlphC = 0; AlphC < AlphN; AlphC++) 
        WeightV[AlphC] = log((double)DocsParsed / WordToIdH[AlphC]);
    double MaxVal = WeightV[WeightV.GetMxValN()];
    for (int AlphC = 0; AlphC < AlphN; AlphC++) 
        WeightV[AlphC] /= MaxVal;
}
Exemplo n.º 2
0
void TBowLinAlg::GetDual(const PBowDocWgtBs& X,
        const TFltV& x, TFltV& y, const int& _Docs) {

    const int Docs = (_Docs == -1) ? X->GetDocs() : _Docs;
    y.Gen(Docs, 0); // prepare space
    for (int DId = 0; DId < Docs; DId++) {
        y.Add(TBowLinAlg::DotProduct(x, X->GetSpV(DId)));
    }   
}
Exemplo n.º 3
0
Arquivo: mkcca.cpp Projeto: Accio/snap
static void ConjugGrad(const TMatrix& Matrix, const TFltV& b, TFltV& x, 
        const int& CGMxIter, const double& RelErr, const TFltV& x0) {

    // prepare start vector
    x.Gen(Matrix.GetCols());
    if (x0.Empty()) { x.PutAll(0.0); }
    else { x = x0; }
    // do the magic
}
Exemplo n.º 4
0
/// Sample random point from the surface of a Dim-dimensional unit sphere.
void GetSphereDev(const int& Dim, TRnd& Rnd, TFltV& ValV) {
  if (ValV.Len() != Dim) { ValV.Gen(Dim); }
  double Length = 0.0;
  for (int i = 0; i < Dim; i++) {
    ValV[i] = Rnd.GetNrmDev();
    Length += TMath::Sqr(ValV[i]); }
  Length = 1.0 / sqrt(Length);
  for (int i = 0; i < Dim; i++) {
    ValV[i] *= Length;
  }
}
Exemplo n.º 5
0
void TLogRegFit::Gradient(TFltV& GradV) {
    TFltV OutV;
    TLogRegPredict::GetCfy(X, OutV, Theta);
    GradV.Gen(M);
    for (int r = 0; r < X.Len(); r++) {
        //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val);
        for (int m = 0; m < M; m++) {
            GradV[m] += (Y[r] - OutV[r]) * X[r][m];
        }
    }
    //for (int m = 0; m < M; m++) {  printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); }
}
Exemplo n.º 6
0
TBowMatrix::TBowMatrix(PBowDocBs BowDocBs, PBowDocWgtBs BowDocWgtBs,
        const TStr& CatNm,  const TIntV& DIdV, TFltV& ClsV): TMatrix() {

    RowN = BowDocBs->GetWords();
    ClsV.Gen(DIdV.Len(), 0);
    ColSpVV.Gen(DIdV.Len(), 0);
    IAssert(BowDocBs->IsCatNm(CatNm));
    int CatId = BowDocBs->GetCId(CatNm);
    for (int i = 0; i < DIdV.Len(); i++) {
        ColSpVV.Add(BowDocWgtBs->GetSpV(DIdV[i]));
        ClsV.Add(BowDocBs->IsCatInDoc(DIdV[i], CatId) ? 0.99 : -0.99);
    }
}
Exemplo n.º 7
0
double TAGMFast::LikelihoodForRow(const int UID, const TIntFltH& FU) {
  double L = 0.0;
  TFltV HOSumFV; //adjust for Fv of v hold out
  if (HOVIDSV[UID].Len() > 0) {
    HOSumFV.Gen(SumFV.Len());
    
    for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
      for (int c = 0; c < SumFV.Len(); c++) {
        HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
      }
    }
  }

  TUNGraph::TNodeI NI = G->GetNI(UID);
  if (DoParallel && NI.GetDeg() > 10) {
#pragma omp parallel for schedule(static, 1)
    for (int e = 0; e < NI.GetDeg(); e++) {
      int v = NI.GetNbrNId(e);
      if (v == UID) { continue; }
      if (HOVIDSV[UID].IsKey(v)) { continue; }
      double LU = log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
#pragma omp atomic
      L += LU;
    }
    for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      double LU = NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
      L -= LU;
    }
  } else {
    for (int e = 0; e < NI.GetDeg(); e++) {
      int v = NI.GetNbrNId(e);
      if (v == UID) { continue; }
      if (HOVIDSV[UID].IsKey(v)) { continue; }
      L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
    }
    for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
    }
  }
  //add regularization
  if (RegCoef > 0.0) { //L1
    L -= RegCoef * Sum(FU);
  }
  if (RegCoef < 0.0) { //L2
    L += RegCoef * Norm2(FU);
  }

  return L;
}
Exemplo n.º 8
0
void TLogReg::IRLS(const TMatrix& Matrix, TFltV& y, TFltV& bb, 
                   const double& ChangeEps, const int& MaxStep, const int& Verb) {
    IAssert(Matrix.GetCols() == y.Len());

    int M = Matrix.GetRows(), R = Matrix.GetCols(), i;
    if (bb.Len() != M+1) { bb.Gen(M+1); bb.PutAll(0.0); }

    TFltV mu(R), w(R), z(R), delta;

    // adjust y
    for (i = 0; i < R; i++) {
        if (y[i] >= 1.0)
            y[i] = 0.999;
        else if (y[i] <= 0.0)
            y[i] = 0.001;
    }

    //const double eps = 0.01;
    double NewDEV = 0.0, OldDEV = -100.0;
    forever {
        Matrix.MultiplyT(bb, z);
        for (i = 0; i < R; i++) {
            z[i] += bb[M];
            // evaluate current model
            mu[i] = 1/(1 + exp(-z[i]));
            // calculate weights
            w[i] = mu[i] * (1 - mu[i]);
            // calculate adjusted dependent variables
            z[i] += (y[i] - mu[i]) / w[i];
        }
        // get new aproximation for bb
        CG(Matrix, w, z, bb, MaxStep, Verb);
        // calculate deviance (error measurement)
        NewDEV = 0.0;
        for (i = 0; i < R; i++) {
            double yi = y[i], mui = mu[i];
            NewDEV += yi*log(yi / mui) + (1 - yi)*log((1 - yi)/(1 - mui));
        }
        
        if (Verb == 1) printf(" -> %.5f\n", NewDEV);
        else if (Verb > 1) printf("NewDEV = %.5f\n", NewDEV);

        // do we stop?
        if (fabs(NewDEV - OldDEV) < ChangeEps) break;
        OldDEV = NewDEV;
    }
}
Exemplo n.º 9
0
// Gradient of likelihood for P_c.
void TAGMFit::GradLogLForLambda(TFltV& GradV) {
  GradV.Gen(LambdaV.Len());
  TFltV SumEdgeProbsV(LambdaV.Len());
  for (int e = 0; e < EdgeComVH.Len(); e++) {
    TIntSet& JointCom = EdgeComVH[e];
    double LambdaSum = SelectLambdaSum(JointCom);
    double Puv = 1 - exp(- LambdaSum);
    if (JointCom.Len() == 0) {  Puv = PNoCom;  }
    for (TIntSet::TIter SI = JointCom.BegI(); SI < JointCom.EndI(); SI++) {
      SumEdgeProbsV[SI.GetKey()] += (1 - Puv) / Puv;
    }
  }
  for (int k = 0; k < LambdaV.Len(); k++) {
    int MaxEk = CIDNSetV[k].Len() * (CIDNSetV[k].Len() - 1) / 2;
    int NotEdgesInCom = MaxEk - ComEdgesV[k];
    GradV[k] = SumEdgeProbsV[k] - (double) NotEdgesInCom;
    if (LambdaV[k] > 0.0 && RegCoef > 0.0) { //if regularization exists
      GradV[k] -= RegCoef;
    }
  }
}
Exemplo n.º 10
0
void TAGMFit::GetCmtyVV(TVec<TIntV>& CmtyVV, TFltV& QV, const double QMax) {
  CmtyVV.Gen(CIDNSetV.Len(), 0);
  QV.Gen(CIDNSetV.Len(), 0);
  TIntFltH CIDLambdaH(CIDNSetV.Len());
  for (int c = 0; c < CIDNSetV.Len(); c++) {
    CIDLambdaH.AddDat(c, LambdaV[c]);
  }
  CIDLambdaH.SortByDat(false);
  for (int c = 0; c < CIDNSetV.Len(); c++) {
    int CID = CIDLambdaH.GetKey(c);
    IAssert(LambdaV[CID] >= MinLambda);
    double Q = exp( - (double) LambdaV[CID]);
    if (Q > QMax) { continue; }
    TIntV CmtyV;
    CIDNSetV[CID].GetKeyV(CmtyV);
    if (CmtyV.Len() == 0) { continue; }
    if (CID == BaseCID) { //if the community is the base community(epsilon community), discard
      IAssert(CmtyV.Len() == G->GetNodes());
    } else {
      CmtyVV.Add(CmtyV);
      QV.Add(Q);
    }
  }
}
Exemplo n.º 11
0
void TAGMFast::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
  GradU.Gen(CIDSet.Len());

  TFltV HOSumFV; //adjust for Fv of v hold out
  if (HOVIDSV[UID].Len() > 0) {
    HOSumFV.Gen(SumFV.Len());
    
    for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
      for (int c = 0; c < SumFV.Len(); c++) {
        HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
      }
    }
  }
    
  TUNGraph::TNodeI NI = G->GetNI(UID);
  int Deg = NI.GetDeg();
  TFltV PredV(Deg), GradV(CIDSet.Len());
  TIntV CIDV(CIDSet.Len());
  if (DoParallel && Deg + CIDSet.Len() > 10) {
#pragma omp parallel for schedule(static, 1)
    for (int e = 0; e < Deg; e++) {
      if (NI.GetNbrNId(e) == UID) { continue; }
      if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
      PredV[e] = Prediction(UID, NI.GetNbrNId(e));
    }
  
#pragma omp parallel for schedule(static, 1)
    for (int c = 0; c < CIDSet.Len(); c++) {
      int CID = CIDSet.GetKey(c);
      double Val = 0.0;
      for (int e = 0; e < Deg; e++) {
        int VID = NI.GetNbrNId(e);
        if (VID == UID) { continue; }
        if (HOVIDSV[UID].IsKey(VID)) { continue; }
        Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
      }
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
      CIDV[c] = CID;
      GradV[c] = Val;
    }
  } 
  else {
    for (int e = 0; e < Deg; e++) {
      if (NI.GetNbrNId(e) == UID) { continue; }
      if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
      PredV[e] = Prediction(UID, NI.GetNbrNId(e));
    }
  
    for (int c = 0; c < CIDSet.Len(); c++) {
      int CID = CIDSet.GetKey(c);
      double Val = 0.0;
      for (int e = 0; e < Deg; e++) {
        int VID = NI.GetNbrNId(e);
        if (VID == UID) { continue; }
        if (HOVIDSV[UID].IsKey(VID)) { continue; }
        Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
      }
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
      CIDV[c] = CID;
      GradV[c] = Val;
    }
  }
  //add regularization
  if (RegCoef > 0.0) { //L1
    for (int c = 0; c < GradV.Len(); c++) {
      GradV[c] -= RegCoef; 
    }
  }
  if (RegCoef < 0.0) { //L2
    for (int c = 0; c < GradV.Len(); c++) {
      GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); 
    }
  }


  for (int c = 0; c < GradV.Len(); c++) {
    if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
    if (fabs(GradV[c]) < 0.0001) { continue; }
    GradU.AddDat(CIDV[c], GradV[c]);
  }
  for (int c = 0; c < GradU.Len(); c++) {
    if (GradU[c] >= 10) { GradU[c] = 10; }
    if (GradU[c] <= -10) { GradU[c] = -10; }
    IAssert(GradU[c] >= -10);
  }
}
Exemplo n.º 12
0
// Returns \v QV, a vector of (1 - p_c) for each community c.
void TAGMFit::GetQV(TFltV& OutV) {
  OutV.Gen(LambdaV.Len());
  for (int i = 0; i < LambdaV.Len(); i++) {
    OutV[i] = exp(- LambdaV[i]);
  }
}
Exemplo n.º 13
0
void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) {
    OutV.Gen(X.Len());
    for (int r = 0; r < X.Len(); r++) {
        OutV[r] = GetCfy(X[r], NewTheta);
    }
}