void col_sum(gsl_matrix* m, gsl_vector* val) { int i, j; gsl_vector_set_all(val, 0); for (i = 0; i < m->size1; i++) for (j = 0; j < m->size2; j++) vinc(val, j, mget(m, i, j)); }
/* * compute the column sums of a matrix * */ void col_sum(gsl_matrix* m, gsl_vector* val) { gsl_vector_set_all(val, 0); for (unsigned int i = 0; i < m->size1; i++) { for (unsigned int j = 0; j < m->size2; j++) { vinc(val, j, mget(m, i, j)); } } }
void update_gamma(lda_post* p) { int k, n, K = p->model->ntopics, N = p->doc->nterms; gsl_vector_memcpy(p->gamma, p->model->alpha); for (n = 0; n < N; n++) { gsl_vector phi_row = gsl_matrix_row(p->phi, n).vector; int count = p->doc->count[n]; for (k = 0; k < K; k++) vinc(p->gamma, k, vget(&phi_row, k) * count); } }
void initialize_lda_ss_from_data(corpus_t* data, lda_suff_stats* ss) { int k, n, i, w; gsl_rng * r = new_random_number_generator(); for (k = 0; k < ss->topics_ss->size2; k++) { gsl_vector topic = gsl_matrix_column(ss->topics_ss, k).vector; for (n = 0; n < LDA_SEED_INIT; n++) { int d = floor(gsl_rng_uniform(r) * data->ndocs); doc_t* doc = data->doc[d]; for (i = 0; i < doc->nterms; i++) { vinc(&topic, doc->word[n], doc->count[n]); } } for (w = 0; w < topic.size; w++) { vinc(&topic, w, LDA_INIT_SMOOTH + gsl_rng_uniform(r)); } } }
double c_ctr::doc_inference(const c_document* doc, const gsl_vector* theta_v, const gsl_matrix* log_beta, gsl_matrix* phi, gsl_vector* gamma, gsl_matrix* word_ss, bool update_word_ss) { double pseudo_count = 1.0; double likelihood = 0; gsl_vector* log_theta_v = gsl_vector_alloc(theta_v->size); gsl_vector_memcpy(log_theta_v, theta_v); vct_log(log_theta_v); int n, k, w; double x; for (n = 0; n < doc->m_length; n ++) { w = doc->m_words[n]; for (k = 0; k < m_num_factors; k ++) mset(phi, n, k, vget(theta_v, k) * mget(m_beta, k, w)); gsl_vector_view row = gsl_matrix_row(phi, n); vnormalize(&row.vector); for (k = 0; k < m_num_factors; k ++) { x = mget(phi, n, k); if (x > 0) likelihood += x*(vget(log_theta_v, k) + mget(log_beta, k, w) - log(x)); } } if (pseudo_count > 0) { likelihood += pseudo_count * vsum(log_theta_v); } gsl_vector_set_all(gamma, pseudo_count); // smoothing with small pseudo counts for (n = 0; n < doc->m_length; n ++) { for (k = 0; k < m_num_factors; k ++) { x = doc->m_counts[n] * mget(phi, n, k); vinc(gamma, k, x); if (update_word_ss) minc(word_ss, k, doc->m_words[n], x); } } gsl_vector_free(log_theta_v); return likelihood; }