示例#1
0
void col_sum(gsl_matrix* m, gsl_vector* val)
{
    int i, j;
    gsl_vector_set_all(val, 0);

    for (i = 0; i < m->size1; i++)
        for (j = 0; j < m->size2; j++)
            vinc(val, j, mget(m, i, j));
}
/*
 * compute the column sums of a matrix
 *
 */
void col_sum(gsl_matrix* m, gsl_vector* val) {
	gsl_vector_set_all(val, 0);
	
	for (unsigned int i = 0; i < m->size1; i++) {
		for (unsigned int j = 0; j < m->size2; j++) {
			vinc(val, j, mget(m, i, j));
		}
	}
}
void update_gamma(lda_post* p) {
    int k, n, K = p->model->ntopics, N = p->doc->nterms;

    gsl_vector_memcpy(p->gamma, p->model->alpha);
    for (n = 0; n < N; n++)
    {
        gsl_vector phi_row = gsl_matrix_row(p->phi, n).vector;
        int count = p->doc->count[n];
        for (k = 0; k < K; k++)
            vinc(p->gamma, k, vget(&phi_row, k) * count);
    }
}
void initialize_lda_ss_from_data(corpus_t* data, lda_suff_stats* ss) {
    int k, n, i, w;
    gsl_rng * r = new_random_number_generator();

    for (k = 0; k < ss->topics_ss->size2; k++)
    {
        gsl_vector topic = gsl_matrix_column(ss->topics_ss, k).vector;
        for (n = 0; n < LDA_SEED_INIT; n++)
        {
            int d = floor(gsl_rng_uniform(r) * data->ndocs);
            doc_t* doc = data->doc[d];
            for (i = 0; i < doc->nterms; i++)
            {
                vinc(&topic, doc->word[n], doc->count[n]);
            }
        }
        for (w = 0; w < topic.size; w++)
        {
            vinc(&topic, w, LDA_INIT_SMOOTH + gsl_rng_uniform(r));
        }
    }
}
示例#5
0
文件: ctr.cpp 项目: anukat2015/ctr
double c_ctr::doc_inference(const c_document* doc, const gsl_vector* theta_v, 
                            const gsl_matrix* log_beta, gsl_matrix* phi,
                            gsl_vector* gamma, gsl_matrix* word_ss, 
                            bool update_word_ss) {

  double pseudo_count = 1.0;
  double likelihood = 0;
  gsl_vector* log_theta_v = gsl_vector_alloc(theta_v->size);
  gsl_vector_memcpy(log_theta_v, theta_v);
  vct_log(log_theta_v);

  int n, k, w;
  double x;
  for (n = 0; n < doc->m_length; n ++) {
    w = doc->m_words[n]; 
    for (k = 0; k < m_num_factors; k ++)
      mset(phi, n, k, vget(theta_v, k) * mget(m_beta, k, w));

    gsl_vector_view row =  gsl_matrix_row(phi, n);
    vnormalize(&row.vector);

    for (k = 0; k < m_num_factors; k ++) {
      x = mget(phi, n, k);
      if (x > 0) 
        likelihood += x*(vget(log_theta_v, k) + mget(log_beta, k, w) - log(x));
    }
  }

  if (pseudo_count > 0) {
    likelihood += pseudo_count * vsum(log_theta_v);
  }

  gsl_vector_set_all(gamma, pseudo_count); // smoothing with small pseudo counts
  for (n = 0; n < doc->m_length; n ++) {
    for (k = 0; k < m_num_factors; k ++) {
      x = doc->m_counts[n] * mget(phi, n, k);
      vinc(gamma, k, x);      
      if (update_word_ss) minc(word_ss, k, doc->m_words[n], x);
    }
  }

  gsl_vector_free(log_theta_v);
  return likelihood;
}