double EMFit<InitialClusteringType, CovarianceConstraintPolicy>::LogLikelihood( const arma::mat& observations, const std::vector<arma::vec>& means, const std::vector<arma::mat>& covariances, const arma::vec& weights) const { double logLikelihood = 0; arma::vec phis; arma::mat likelihoods(means.size(), observations.n_cols); for (size_t i = 0; i < means.size(); ++i) { phi(observations, means[i], covariances[i], phis); likelihoods.row(i) = weights(i) * trans(phis); } // Now sum over every point. for (size_t j = 0; j < observations.n_cols; ++j) { if (accu(likelihoods.col(j)) == 0) Log::Info << "Likelihood of point " << j << " is 0! It is probably an " << "outlier." << std::endl; logLikelihood += log(accu(likelihoods.col(j))); } return logLikelihood; }
double VarMGCTM::Infer(CorpusC &test, MGCTMC &m) { double sum = 0.0; VReal likelihoods(test.Len()); #pragma omp parallel for for (size_t d = 0; d < test.Len(); d++) { MGVar var; likelihoods[d] = Infer(test.docs[d], m, &var); } for (size_t d = 0; d < test.Len(); d++) { sum += likelihoods[d]; } return exp(- sum / test.TWordsNum()); }
void VarMGCTM::RunEM(CorpusC &test, MGCTM* m) { MGSS ss; ss.CorpusInit(cor_, *m); MStep(ss, m); LOG(INFO) << m->pi.transpose(); for (int i = 0; i < converged_.em_max_iter_; i++) { std::vector<MGVar> vars(cor_.Len()); VReal likelihoods(cor_.Len()); #pragma omp parallel for for (size_t d = 0; d < cor_.Len(); d++) { likelihoods[d] = Infer(cor_.docs[d], *m, &vars[d]); } double likelihood = 0; VStr etas(cor_.Len()); ss.SetZero(m->GTopicNum(), m->LTopicNum1(), m->LTopicNum2(), m->TermNum()); for (size_t d = 0; d < cor_.Len(); d++) { DocC &doc = cor_.docs[d]; for (size_t n = 0; n < doc.ULen(); n++) { for (int k = 0; k < m->GTopicNum(); k++) { ss.g_topic(k, doc.Word(n)) += doc.Count(n)*vars[d].g_z(k, n)* (1 - vars[d].delta[n]); ss.g_topic_sum[k] += doc.Count(n)*vars[d].g_z(k, n)*(1 - vars[d].delta[n]); } } for (int j = 0; j < m->LTopicNum1(); j++) { for (size_t n = 0; n < doc.ULen(); n++) { for (int k = 0; k < m->LTopicNum2(); k++) { ss.l_topic[j](k, doc.Word(n)) += doc.Count(n)*vars[d].l_z[j](k, n) *vars[d].delta[n]*vars[d].eta[j]; ss.l_topic_sum(k, j) += doc.Count(n)*vars[d].l_z[j](k, n) * vars[d].delta[n] * vars[d].eta[j]; } } } for (int j = 0; j < m->LTopicNum1(); j++) { ss.pi[j] += vars[d].eta[j]; } etas[d] = EVecToStr(vars[d].eta); likelihood += likelihoods[d]; } MStep(ss, m); LOG(INFO) << m->pi.transpose(); OutputFile(*m, Join(etas,"\n"), i); // LOG(INFO) <<"perplexity: " <<Infer(test,*m); } }
double GMM<FittingType>::LogLikelihood( const arma::mat& data, const std::vector<arma::vec>& meansL, const std::vector<arma::mat>& covariancesL, const arma::vec& weightsL) const { double loglikelihood = 0; arma::vec phis; arma::mat likelihoods(gaussians, data.n_cols); for (size_t i = 0; i < gaussians; i++) { phi(data, meansL[i], covariancesL[i], phis); likelihoods.row(i) = weightsL(i) * trans(phis); } // Now sum over every point. for (size_t j = 0; j < data.n_cols; j++) loglikelihood += log(accu(likelihoods.col(j))); return loglikelihood; }
void _Estepbfactor(vector<double> &expected, vector<double> &r1, vector<double> &ri, const NumericMatrix &itemtrace, const vector<double> &prior, const vector<double> &Priorbetween, const vector<int> &r, const int &ncores, const IntegerMatrix &data, const IntegerMatrix &sitems, const vector<double> &Prior) { #ifdef SUPPORT_OPENMP omp_set_num_threads(ncores); #endif const int sfact = sitems.ncol(); const int nitems = data.ncol(); const int npquad = prior.size(); const int nbquad = Priorbetween.size(); const int nquad = nbquad * npquad; const int npat = r.size(); vector<double> r1vec(nquad*nitems*sfact, 0.0); #pragma omp parallel for for (int pat = 0; pat < npat; ++pat){ vector<double> L(nquad), Elk(nbquad*sfact), posterior(nquad*sfact); vector<double> likelihoods(nquad*sfact, 1.0); for (int fact = 0; fact < sfact; ++fact){ for (int item = 0; item < nitems; ++item){ if (data(pat,item) && sitems(item,fact)) for (int k = 0; k < nquad; ++k) likelihoods[k + nquad*fact] = likelihoods[k + nquad*fact] * itemtrace(k,item); } } vector<double> Plk(nbquad*sfact); for (int fact = 0; fact < sfact; ++fact){ int k = 0; for (int q = 0; q < npquad; ++q){ for (int i = 0; i < nbquad; ++i){ L[k] = likelihoods[k + nquad*fact] * prior[q]; ++k; } } vector<double> tempsum(nbquad, 0.0); for (int i = 0; i < npquad; ++i) for (int q = 0; q < nbquad; ++q) tempsum[q] += L[q + i*nbquad]; for (int i = 0; i < nbquad; ++i) Plk[i + fact*nbquad] = tempsum[i]; } vector<double> Pls(nbquad, 1.0); for (int i = 0; i < nbquad; ++i){ for(int fact = 0; fact < sfact; ++fact) Pls[i] = Pls[i] * Plk[i + fact*nbquad]; expected[pat] += Pls[i] * Priorbetween[i]; } for (int fact = 0; fact < sfact; ++fact) for (int i = 0; i < nbquad; ++i) Elk[i + fact*nbquad] = Pls[i] / Plk[i + fact*nbquad]; for (int fact = 0; fact < sfact; ++fact) for (int i = 0; i < nquad; ++i) posterior[i + nquad*fact] = likelihoods[i + nquad*fact] * r[pat] * Elk[i % nbquad + fact*nbquad] / expected[pat]; #pragma omp critical for (int i = 0; i < nbquad; ++i) ri[i] += Pls[i] * r[pat] * Priorbetween[i] / expected[pat]; for (int item = 0; item < nitems; ++item) if (data(pat,item)) for (int fact = 0; fact < sfact; ++fact) for(int q = 0; q < nquad; ++q) r1vec[q + fact*nquad*nitems + nquad*item] += posterior[q + fact*nquad]; } //end main for (int item = 0; item < nitems; ++item) for (int fact = 0; fact < sfact; ++fact) if(sitems(item, fact)) for(int q = 0; q < nquad; ++q) r1[q + nquad*item] = r1vec[q + nquad*item + nquad*nitems*fact] * Prior[q]; }