void update_phi(int doc_number, int time, lda_post* p, lda_seq* var, gsl_matrix* g) { int i, k, n, K = p->model->ntopics, N = p->doc->nterms; double dig[p->model->ntopics]; for (k = 0; k < K; k++) { dig[k] = gsl_sf_psi(vget(p->gamma, k)); } for (n = 0; n < N; n++) { // compute log phi up to a constant int w = p->doc->word[n]; for (k = 0; k < K; k++) { mset(p->log_phi, n, k, dig[k] + mget(p->model->topics, w, k)); } // normalize in log space gsl_vector log_phi_row = gsl_matrix_row(p->log_phi, n).vector; gsl_vector phi_row = gsl_matrix_row(p->phi, n).vector; log_normalize(&log_phi_row); for (i = 0; i < K; i++) { vset(&phi_row, i, exp(vget(&log_phi_row, i))); } } }
/** @return Tcl error code */ static int log_setup(int rank) { log_init(); log_normalize(); // Did the user disable logging? int enabled; getenv_integer("TURBINE_LOG", 1, &enabled); if (enabled) { // Should we use a specific log file? char* filename = getenv("TURBINE_LOG_FILE"); if (filename != NULL && strlen(filename) > 0) { bool b = log_file_set(filename); if (!b) { printf("Could not set log file: %s", filename); return TCL_ERROR; } } // Should we prepend the MPI rank (emulate "mpiexec -l")? int log_rank_enabled; getenv_integer("TURBINE_LOG_RANKS", 0, &log_rank_enabled); if (log_rank_enabled) log_rank_set(rank); } else log_enabled(false); return TCL_OK; }
/********************************************* * Sample particles for a given document * * doc: *********************************************/ LatentSeq DecodeGraph(const Doc doc){ // ---------------------------------------- // init int nsent = doc.size(); LatentSeq latseq; // ---------------------------------------- // for each sentence in doc, each latval, compute // the posterior prob p(R|cvec, sent) vector<float> U; for (unsigned sidx = 0; sidx < nsent; sidx ++){ final_hlist.clear(); for (int val = 0; val < nlatvar; val ++){ ComputationGraph cg; BuildSentGraph(doc[sidx], sidx, cg, val); float prob = as_scalar(cg.forward()); U.push_back(prob); cg.clear(); } // normalize and get the argmax log_normalize(U); // greedy decoding int max_idx = argmax(U); // get the corresponding context vector final_h = final_hlist[max_idx]; // U.clear(); // cerr << "max_latval = " << max_idx << endl; latseq.push_back(max_idx); } // cerr << "====" << endl; return latseq; }
static int Turbine_Normalize_Cmd(ClientData cdata, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) { TCL_ARGS(1); log_normalize(); return TCL_OK; }
int main() { log_init(); log_printf("hi"); log_normalize(); log_printf("ok"); sleep(2); log_printf("bye"); log_finalize(); }
void update_phi_fixed(int doc_number, int time, lda_post* p, lda_seq* var, gsl_matrix* g3_matrix, gsl_matrix* g4_matrix, gsl_matrix* g5_matrix) { // Hate to do this, but I had problems allocating this data // structure. if (scaled_influence == NULL) { scaled_influence = NewScaledInfluence(FLAGS_max_number_time_points); } int i, k, n, K = p->model->ntopics, N = p->doc->nterms; double dig[p->model->ntopics]; double k_sum = 0.0; for (k = 0; k < K; k++) { double gamma_k = vget(p->gamma, k); dig[k] = gsl_sf_psi(gamma_k); k_sum += gamma_k; } double dig_sum = gsl_sf_psi(k_sum); gsl_vector_view document_weights; if (var && var->influence) { document_weights = gsl_matrix_row( var->influence->doc_weights[time], doc_number); } for (n=0; n < N; ++n) { int w = p->doc->word[n]; // We have info. about the topics. Use them! // Try two alternate approaches. We compare results of the new // algorithm with the old to make sure we're not doing anything // silly. for (k = 0; k < K; ++k) { // Find an estimate for log_phi_nk. double doc_weight = 0.0; sslm_var* topic = var->topic[k]; const double chain_variance = topic->chain_variance; // These matrices are already set up for the correct time. double g3 = mget(g3_matrix, w, k); double g4 = mget(g4_matrix, w, k); double g5 = mget(g5_matrix, w, k); double w_phi_sum = gsl_matrix_get( var->topic[k]->w_phi_sum, w, time); // Only set these variables if we are within the correct // time window. if (time < var->nseq) { doc_weight = gsl_vector_get(&document_weights.vector, k); } double term_weight; if (FLAGS_normalize_docs == "normalize") { term_weight = ((double) p->doc->count[n] / (double) p->doc->total); } else if (FLAGS_normalize_docs == "log") { term_weight = log(p->doc->count[n] + 1.0); } else if (FLAGS_normalize_docs == "log_norm") { term_weight = log(p->doc->count[n] / p->doc->total); } else if (FLAGS_normalize_docs == "identity") { term_weight = p->doc->count[n]; } else if (FLAGS_normalize_docs == "occurrence") { term_weight = ((double) p->doc->count[n] / (double) p->doc->total); } else { assert(0); } assert(var != NULL); double total, term1, term2, term3, term4; double phi_last = 0.0; // It's unnecessary to always multiply by 1/chain_variance // this deep in a loop, but it's likely not a major // bottleneck. term1 = dig[k] + mget(p->model->topics, w, k); term2 = (g3 * term_weight * doc_weight / chain_variance); term3 = (term_weight * doc_weight * g4 / chain_variance); term4 = (term_weight * term_weight * (phi_last * (doc_weight * doc_weight) - (doc_weight * doc_weight + FLAGS_sigma_l * FLAGS_sigma_l)) * g5 / chain_variance); // Note: we're ignoring term3. sgerrish: 18 July 2010: // Changing term2 to have a positive coefficient (instead of // negative) to be consistent with parallel version. // sgerrish: 23 July 2010: changing term2 back to negative, // to try to reproduce earlier results. total = term1 - term2 - term3 + term4; mset(p->log_phi, n, k, total); } // Normalize in log space. gsl_vector log_phi_row = gsl_matrix_row(p->log_phi, n).vector; gsl_vector phi_row = gsl_matrix_row(p->phi, n).vector; log_normalize(&log_phi_row); for (i = 0; i < K; i++) { vset(&phi_row, i, exp(vget(&log_phi_row, i))); } } }