double lda_m_step(lda* model, lda_suff_stats* ss) {
    int k, w;
    double lhood = 0;
    for (k = 0; k < model->ntopics; k++)
    {
        gsl_vector ss_k = gsl_matrix_column(ss->topics_ss, k).vector;
        gsl_vector log_p = gsl_matrix_column(model->topics, k).vector;
        if (LDA_USE_VAR_BAYES == 0)
        {
            gsl_blas_dcopy(&ss_k, &log_p);
            normalize(&log_p);
            vct_log(&log_p);
        }
        else
        {
            double digsum = sum(&ss_k)+model->nterms*LDA_TOPIC_DIR_PARAM;
            digsum = gsl_sf_psi(digsum);
            double param_sum = 0;
            for (w = 0; w < model->nterms; w++)
            {
                double param = vget(&ss_k, w) + LDA_TOPIC_DIR_PARAM;
                param_sum += param;
                double elogprob = gsl_sf_psi(param) - digsum;
                vset(&log_p, w, elogprob);
                lhood += (LDA_TOPIC_DIR_PARAM - param) * elogprob + gsl_sf_lngamma(param);
            }
            lhood -= gsl_sf_lngamma(param_sum);
        }
    }
    return(lhood);
}
예제 #2
0
파일: ctr.cpp 프로젝트: anukat2015/ctr
double c_ctr::doc_inference(const c_document* doc, const gsl_vector* theta_v, 
                            const gsl_matrix* log_beta, gsl_matrix* phi,
                            gsl_vector* gamma, gsl_matrix* word_ss, 
                            bool update_word_ss) {

  double pseudo_count = 1.0;
  double likelihood = 0;
  gsl_vector* log_theta_v = gsl_vector_alloc(theta_v->size);
  gsl_vector_memcpy(log_theta_v, theta_v);
  vct_log(log_theta_v);

  int n, k, w;
  double x;
  for (n = 0; n < doc->m_length; n ++) {
    w = doc->m_words[n]; 
    for (k = 0; k < m_num_factors; k ++)
      mset(phi, n, k, vget(theta_v, k) * mget(m_beta, k, w));

    gsl_vector_view row =  gsl_matrix_row(phi, n);
    vnormalize(&row.vector);

    for (k = 0; k < m_num_factors; k ++) {
      x = mget(phi, n, k);
      if (x > 0) 
        likelihood += x*(vget(log_theta_v, k) + mget(log_beta, k, w) - log(x));
    }
  }

  if (pseudo_count > 0) {
    likelihood += pseudo_count * vsum(log_theta_v);
  }

  gsl_vector_set_all(gamma, pseudo_count); // smoothing with small pseudo counts
  for (n = 0; n < doc->m_length; n ++) {
    for (k = 0; k < m_num_factors; k ++) {
      x = doc->m_counts[n] * mget(phi, n, k);
      vinc(gamma, k, x);      
      if (update_word_ss) minc(word_ss, k, doc->m_words[n], x);
    }
  }

  gsl_vector_free(log_theta_v);
  return likelihood;
}