void update_phi(int doc_number, int time,
		lda_post* p, lda_seq* var,
		gsl_matrix* g) {
    int i, k, n, K = p->model->ntopics, N = p->doc->nterms;
    double dig[p->model->ntopics];

    for (k = 0; k < K; k++) {
      dig[k] = gsl_sf_psi(vget(p->gamma, k));
    }

    for (n = 0; n < N; n++) {
      // compute log phi up to a constant

      int w = p->doc->word[n];
      for (k = 0; k < K; k++) {
	mset(p->log_phi, n, k,
	     dig[k] + mget(p->model->topics, w, k));
      }

      // normalize in log space

      gsl_vector log_phi_row = gsl_matrix_row(p->log_phi, n).vector;
      gsl_vector phi_row = gsl_matrix_row(p->phi, n).vector;
      log_normalize(&log_phi_row);
      for (i = 0; i < K; i++) {
	vset(&phi_row, i, exp(vget(&log_phi_row, i)));
      }
    }
}
Beispiel #2
0
/**
   @return Tcl error code
*/
static int
log_setup(int rank)
{
  log_init();
  log_normalize();

  // Did the user disable logging?
  int enabled;
  getenv_integer("TURBINE_LOG", 1, &enabled);
  if (enabled)
  {
    // Should we use a specific log file?
    char* filename = getenv("TURBINE_LOG_FILE");
    if (filename != NULL && strlen(filename) > 0)
    {
      bool b = log_file_set(filename);
      if (!b)
      {
        printf("Could not set log file: %s", filename);
        return TCL_ERROR;
      }
    }
    // Should we prepend the MPI rank (emulate "mpiexec -l")?
    int log_rank_enabled;
    getenv_integer("TURBINE_LOG_RANKS", 0, &log_rank_enabled);
    if (log_rank_enabled)
      log_rank_set(rank);
  }
  else
    log_enabled(false);

  return TCL_OK;
}
Beispiel #3
0
  /*********************************************
   * Sample particles for a given document
   * 
   * doc: 
   *********************************************/
  LatentSeq DecodeGraph(const Doc doc){
    // ----------------------------------------
    // init
    int nsent = doc.size();
    LatentSeq latseq;
    // ----------------------------------------
    // for each sentence in doc, each latval, compute
    // the posterior prob p(R|cvec, sent)
    vector<float> U;
    for (unsigned sidx = 0; sidx < nsent; sidx ++){
      final_hlist.clear();
      for (int val = 0; val < nlatvar; val ++){
	ComputationGraph cg;
	BuildSentGraph(doc[sidx], sidx, cg, val);
	float prob = as_scalar(cg.forward());
	U.push_back(prob);
	cg.clear();
      }
      // normalize and get the argmax
      log_normalize(U);
      // greedy decoding
      int max_idx = argmax(U);
      // get the corresponding context vector
      final_h = final_hlist[max_idx];
      // 
      U.clear();
      // cerr << "max_latval = " << max_idx << endl;
      latseq.push_back(max_idx);
    }
    // cerr << "====" << endl;
    return latseq;
  }
Beispiel #4
0
static int
Turbine_Normalize_Cmd(ClientData cdata, Tcl_Interp *interp,
                      int objc, Tcl_Obj *const objv[])
{
  TCL_ARGS(1);
  log_normalize();
  return TCL_OK;
}
Beispiel #5
0
int
main()
{
  log_init();

  log_printf("hi");

  log_normalize();

  log_printf("ok");
  sleep(2);
  log_printf("bye");

  log_finalize();
}
void update_phi_fixed(int doc_number, int time,
		      lda_post* p, lda_seq* var,
		      gsl_matrix* g3_matrix,
		      gsl_matrix* g4_matrix,
		      gsl_matrix* g5_matrix) {
    // Hate to do this, but I had problems allocating this data
    // structure.
    if (scaled_influence == NULL) {
      scaled_influence = NewScaledInfluence(FLAGS_max_number_time_points);
    }

    int i, k, n, K = p->model->ntopics, N = p->doc->nterms;
    double dig[p->model->ntopics];

    double k_sum = 0.0;
    for (k = 0; k < K; k++) {
        double gamma_k = vget(p->gamma, k);
        dig[k] = gsl_sf_psi(gamma_k);
	k_sum += gamma_k;
    }
    double dig_sum = gsl_sf_psi(k_sum);

    gsl_vector_view document_weights;
    if (var && var->influence) {
      document_weights = gsl_matrix_row(
      var->influence->doc_weights[time], doc_number);
    }

    for (n=0; n < N; ++n) {
      int w = p->doc->word[n];
      // We have info. about the topics. Use them!
      // Try two alternate approaches.  We compare results of the new
      // algorithm with the old to make sure we're not doing anything
      // silly.

      for (k = 0; k < K; ++k) {
	// Find an estimate for log_phi_nk.
	double doc_weight = 0.0;
	sslm_var* topic = var->topic[k];
	const double chain_variance = topic->chain_variance;

	// These matrices are already set up for the correct time.
	double g3 = mget(g3_matrix, w, k);
	double g4 = mget(g4_matrix, w, k);
	double g5 = mget(g5_matrix, w, k);
	double w_phi_sum = gsl_matrix_get(
	    var->topic[k]->w_phi_sum, w, time);

	// Only set these variables if we are within the correct
	// time window.
	if (time < var->nseq) {
	  doc_weight = gsl_vector_get(&document_weights.vector, k);
	}
	
	double term_weight;
	if (FLAGS_normalize_docs == "normalize") {
	  term_weight = ((double) p->doc->count[n]
			 / (double) p->doc->total);
	} else if (FLAGS_normalize_docs == "log") {
	  term_weight = log(p->doc->count[n] + 1.0);
	} else if (FLAGS_normalize_docs == "log_norm") {
	  term_weight = log(p->doc->count[n] / p->doc->total);
	} else if (FLAGS_normalize_docs == "identity") {
	  term_weight = p->doc->count[n];
	} else if (FLAGS_normalize_docs == "occurrence") {
	  term_weight = ((double) p->doc->count[n]
			 / (double) p->doc->total);
	} else {
	  assert(0);
	}

	assert(var != NULL);
	
	double total, term1, term2, term3, term4;
	double phi_last = 0.0;

	// It's unnecessary to always multiply by 1/chain_variance
	// this deep in a loop, but it's likely not a major
	// bottleneck.
	term1 = dig[k] + mget(p->model->topics, w, k);
	term2 = (g3
		 * term_weight
		 * doc_weight
		 / chain_variance);
	term3 = (term_weight
		 * doc_weight
		 * g4
		 / chain_variance);
	term4 = (term_weight * term_weight
		 * (phi_last * (doc_weight * doc_weight)
		    - (doc_weight * doc_weight
		       + FLAGS_sigma_l * FLAGS_sigma_l))
		 * g5
		 / chain_variance);

	// Note: we're ignoring term3.  sgerrish: 18 July 2010:
	// Changing term2 to have a positive coefficient (instead of
	// negative) to be consistent with parallel version.
	// sgerrish: 23 July 2010: changing term2 back to negative,
	// to try to reproduce earlier results.
	total = term1 - term2 - term3 + term4;
	mset(p->log_phi, n, k, total);
      }
      
      // Normalize in log space.
      gsl_vector log_phi_row = gsl_matrix_row(p->log_phi, n).vector;
      gsl_vector phi_row = gsl_matrix_row(p->phi, n).vector;
      log_normalize(&log_phi_row);
      
      for (i = 0; i < K; i++) {
	vset(&phi_row, i, exp(vget(&log_phi_row, i)));
      }
    }
}