Example #1
0
double EMFit<InitialClusteringType, CovarianceConstraintPolicy>::LogLikelihood(
    const arma::mat& observations,
    const std::vector<arma::vec>& means,
    const std::vector<arma::mat>& covariances,
    const arma::vec& weights) const
{
  double logLikelihood = 0;

  arma::vec phis;
  arma::mat likelihoods(means.size(), observations.n_cols);
  for (size_t i = 0; i < means.size(); ++i)
  {
    phi(observations, means[i], covariances[i], phis);
    likelihoods.row(i) = weights(i) * trans(phis);
  }

  // Now sum over every point.
  for (size_t j = 0; j < observations.n_cols; ++j)
  {
    if (accu(likelihoods.col(j)) == 0)
      Log::Info << "Likelihood of point " << j << " is 0!  It is probably an "
          << "outlier." << std::endl;
    logLikelihood += log(accu(likelihoods.col(j)));
  }

  return logLikelihood;
}
Example #2
0
double VarMGCTM::Infer(CorpusC &test, MGCTMC &m) {
  double sum = 0.0;
  VReal likelihoods(test.Len());
  #pragma omp parallel for
  for (size_t d = 0; d < test.Len(); d++) {
    MGVar var;
    likelihoods[d] = Infer(test.docs[d], m, &var);
  }
  for (size_t d = 0; d < test.Len(); d++) {
    sum += likelihoods[d];
  }
  return exp(- sum / test.TWordsNum());
}
Example #3
0
void VarMGCTM::RunEM(CorpusC &test, MGCTM* m) {
  MGSS ss;
  ss.CorpusInit(cor_, *m);
  MStep(ss, m);
  LOG(INFO) << m->pi.transpose();
  for (int i = 0; i < converged_.em_max_iter_; i++) {
    std::vector<MGVar> vars(cor_.Len());
    VReal likelihoods(cor_.Len());
    #pragma omp parallel for
    for (size_t d = 0; d < cor_.Len(); d++) {
      likelihoods[d] = Infer(cor_.docs[d], *m, &vars[d]);
    }

    double likelihood = 0;
    VStr etas(cor_.Len());
    ss.SetZero(m->GTopicNum(), m->LTopicNum1(), m->LTopicNum2(), m->TermNum());
    for (size_t d = 0; d < cor_.Len(); d++) {
      DocC &doc = cor_.docs[d];
      for (size_t n = 0; n < doc.ULen(); n++) {
        for (int k = 0; k < m->GTopicNum(); k++) {
          ss.g_topic(k, doc.Word(n)) += doc.Count(n)*vars[d].g_z(k, n)*
                                     (1 - vars[d].delta[n]);
          ss.g_topic_sum[k] += doc.Count(n)*vars[d].g_z(k, n)*(1 - vars[d].delta[n]);
        }
      }
      for (int j = 0; j < m->LTopicNum1(); j++) {
        for (size_t n = 0; n < doc.ULen(); n++) {
          for (int k = 0; k < m->LTopicNum2(); k++) {
            ss.l_topic[j](k, doc.Word(n)) += doc.Count(n)*vars[d].l_z[j](k, n)
                                *vars[d].delta[n]*vars[d].eta[j];
            ss.l_topic_sum(k, j) += doc.Count(n)*vars[d].l_z[j](k, n) *
                                  vars[d].delta[n] * vars[d].eta[j];
          }
        }
      }
      for (int j = 0; j < m->LTopicNum1(); j++) {
        ss.pi[j] += vars[d].eta[j];
      }

      etas[d] = EVecToStr(vars[d].eta);
      likelihood += likelihoods[d];
    }
    MStep(ss, m);
    LOG(INFO) << m->pi.transpose();
    OutputFile(*m, Join(etas,"\n"), i);
//    LOG(INFO) <<"perplexity: " <<Infer(test,*m);
  }
}
Example #4
0
double GMM<FittingType>::LogLikelihood(
    const arma::mat& data,
    const std::vector<arma::vec>& meansL,
    const std::vector<arma::mat>& covariancesL,
    const arma::vec& weightsL) const
{
  double loglikelihood = 0;

  arma::vec phis;
  arma::mat likelihoods(gaussians, data.n_cols);
  for (size_t i = 0; i < gaussians; i++)
  {
    phi(data, meansL[i], covariancesL[i], phis);
    likelihoods.row(i) = weightsL(i) * trans(phis);
  }

  // Now sum over every point.
  for (size_t j = 0; j < data.n_cols; j++)
    loglikelihood += log(accu(likelihoods.col(j)));

  return loglikelihood;
}
Example #5
0
void _Estepbfactor(vector<double> &expected, vector<double> &r1, vector<double> &ri,
    const NumericMatrix &itemtrace, const vector<double> &prior, const vector<double> &Priorbetween, 
    const vector<int> &r, const int &ncores, const IntegerMatrix &data, const IntegerMatrix &sitems,
    const vector<double> &Prior)
{
     #ifdef SUPPORT_OPENMP
    omp_set_num_threads(ncores);
    #endif
    const int sfact = sitems.ncol();
    const int nitems = data.ncol();
    const int npquad = prior.size();
    const int nbquad = Priorbetween.size();
    const int nquad = nbquad * npquad;
    const int npat = r.size();
    vector<double> r1vec(nquad*nitems*sfact, 0.0);

#pragma omp parallel for
    for (int pat = 0; pat < npat; ++pat){
        vector<double> L(nquad), Elk(nbquad*sfact), posterior(nquad*sfact);
        vector<double> likelihoods(nquad*sfact, 1.0);
        for (int fact = 0; fact < sfact; ++fact){
            for (int item = 0; item < nitems; ++item){
                if (data(pat,item) && sitems(item,fact))
                    for (int k = 0; k < nquad; ++k)
                        likelihoods[k + nquad*fact] = likelihoods[k + nquad*fact] * itemtrace(k,item);
            }
        }
        vector<double> Plk(nbquad*sfact);
        for (int fact = 0; fact < sfact; ++fact){
            int k = 0;
            for (int q = 0; q < npquad; ++q){
                for (int i = 0; i < nbquad; ++i){
                    L[k] = likelihoods[k + nquad*fact] * prior[q];
                    ++k;
                }
            }
            vector<double> tempsum(nbquad, 0.0);
            for (int i = 0; i < npquad; ++i)
                for (int q = 0; q < nbquad; ++q)
                    tempsum[q] += L[q + i*nbquad];
            for (int i = 0; i < nbquad; ++i)
                Plk[i + fact*nbquad] = tempsum[i];
        }
        vector<double> Pls(nbquad, 1.0);
        for (int i = 0; i < nbquad; ++i){
            for(int fact = 0; fact < sfact; ++fact)
                Pls[i] = Pls[i] * Plk[i + fact*nbquad];
            expected[pat] += Pls[i] * Priorbetween[i];
        }
        for (int fact = 0; fact < sfact; ++fact)
            for (int i = 0; i < nbquad; ++i)
                Elk[i + fact*nbquad] = Pls[i] / Plk[i + fact*nbquad];
        for (int fact = 0; fact < sfact; ++fact)
            for (int i = 0; i < nquad; ++i)
                posterior[i + nquad*fact] = likelihoods[i + nquad*fact] * r[pat] * Elk[i % nbquad + fact*nbquad] /
                                            expected[pat];
        #pragma omp critical
        for (int i = 0; i < nbquad; ++i)
            ri[i] += Pls[i] * r[pat] * Priorbetween[i] / expected[pat];
        for (int item = 0; item < nitems; ++item)
            if (data(pat,item))
                for (int fact = 0; fact < sfact; ++fact)
                    for(int q = 0; q < nquad; ++q)
                        r1vec[q + fact*nquad*nitems + nquad*item] += posterior[q + fact*nquad];
    }   //end main
    for (int item = 0; item < nitems; ++item)
        for (int fact = 0; fact < sfact; ++fact)
            if(sitems(item, fact))
                for(int q = 0; q < nquad; ++q)
                    r1[q + nquad*item] = r1vec[q + nquad*item + nquad*nitems*fact] * Prior[q];
}