int full_variances(uint32 ts, vector_t **mean, vector_t ***var, uint32 n_density, uint32 n_stream, uint32 *veclen, uint32 blksize, uint32 n_in_frame, codew_t *label) { uint32 *n_obs; float64 term; uint32 s, i, l, m, k, n_frame; vector_t c; E_INFO("Initializing full covariances\n"); for (s = 0; s < n_stream; s++) { n_obs = ckd_calloc(n_density, sizeof(uint32)); n_frame = setup_obs(ts, s, n_in_frame, n_stream, veclen, blksize); for (i = 0; i < n_frame; i++) { k = label[i]; /* best codeword */ n_obs[k]++; c = get_obs(i); for (l = 0; l < veclen[s]; l++) { for (m = 0; m < veclen[s]; m++) { var[s][k][l][m] += (c[l] - mean[s][k][l]) * (c[m] - mean[s][k][m]); } } } for (k = 0; k < n_density; k++) { term = 1.0 / (float64)n_obs[k]; for (l = 0; l < veclen[s]; l++) { for (m = 0; m < veclen[s]; m++) { var[s][k][l][m] *= term; } } } ckd_free(n_obs); } return 0; }
float64 cluster(uint32 ts, uint32 n_stream, uint32 n_in_frame, const uint32 *veclen, vector_t **mean, uint32 n_density, codew_t **out_label) { float64 sum_sqerr, sqerr=0; uint32 s, n_frame; const char *meth; *out_label = NULL; blksize = feat_blksize(); k_means_set_get_obs(&get_obs); for (s = 0, sum_sqerr = 0; s < n_stream; s++, sum_sqerr += sqerr) { meth = (const char *)cmd_ln_access("-method"); n_frame = setup_obs(ts, s, n_in_frame, veclen); if (strcmp(meth, "rkm") == 0) { sqerr = random_kmeans(*(uint32 *)cmd_ln_access("-ntrial"), n_frame, veclen[s], mean[s], n_density, *(float32 *)cmd_ln_access("-minratio"), *(uint32 *)cmd_ln_access("-maxiter"), out_label); if (sqerr < 0) { E_ERROR("Too few observations for kmeans\n"); return -1.0; } } else if (strcmp(meth, "fnkm") == 0) { sqerr = furthest_neighbor_kmeans(n_frame, veclen[s], mean[s], n_density, *(float32 *)cmd_ln_access("-minratio"), *(uint32 *)cmd_ln_access("-maxiter")); } else { E_ERROR("I don't know how to do method '%s'. Sorry.\n", meth); } } return sum_sqerr; }
int variances(uint32 ts, vector_t **mean, vector_t **var, uint32 n_density, const uint32 *veclen, uint32 n_in_frame, uint32 n_stream, codew_t *label) { uint32 *n_obs; float64 term; uint32 s, i, l, k, n_frame; vector_t c; E_INFO("Initializing variances\n"); for (s = 0; s < n_stream; s++) { n_obs = ckd_calloc(n_density, sizeof(uint32)); n_frame = setup_obs(ts, s, n_in_frame, veclen); for (i = 0; i < n_frame; i++) { k = label[i]; /* best codeword */ n_obs[k]++; c = get_obs(i); for (l = 0; l < veclen[s]; l++) { term = c[l] - mean[s][k][l]; term *= term; var[s][k][l] += term; } } for (k = 0; k < n_density; k++) { term = 1.0 / (float64)n_obs[k]; for (l = 0; l < veclen[s]; l++) { var[s][k][l] *= term; } } ckd_free(n_obs); } return 0; }
float64 reest_sum(uint32 ts, vector_t **mean, vector_t **var, float32 **mixw, uint32 n_density, uint32 n_stream, uint32 n_in_obs, const uint32 *veclen, uint32 n_iter, uint32 twopassvar, uint32 vartiethr) { uint32 o, i, j, k, l; float32 *mixw_acc; float32 *cb_acc; vector_t **mean_acc_xx; vector_t **var_acc_xx; vector_t *mean_acc; vector_t *var_acc; float64 ol, ttt, diff, log_tot_ol = 0, p_log_tot_ol = 0; float64 **norm, *den; float64 log_a_den=0; float32 mixw_norm; vector_t obs; uint32 n_obs; vector_t ***n_mean_xx = NULL; vector_t *n_mean = NULL; float64 avg_lik=0, p_avg_lik=0; uint32 tievar = FALSE; E_INFO("EM reestimation of mixw/means/vars\n"); if (twopassvar) { n_mean_xx = gauden_alloc_param(1, 1, n_density, veclen); n_mean = n_mean_xx[0][0]; } /* allocate mixing weight accumulators */ mixw_acc = (float32 *)ckd_calloc(n_density, sizeof(float32)); cb_acc = (float32 *)ckd_calloc(n_density, sizeof(float32)); mean_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, feat_blksize()); mean_acc = mean_acc_xx[0]; var_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, feat_blksize()); var_acc = var_acc_xx[0]; den = (float64 *)ckd_calloc(n_density, sizeof(float64)); norm = (float64 **)ckd_calloc_2d(n_stream, n_density, sizeof(float64)); for (j = 0; j < n_stream; j++) { n_obs = setup_obs(ts, j, n_in_obs, veclen); if (n_obs < vartiethr) tievar = TRUE; for (i = 0; i < n_iter; i++) { p_log_tot_ol = log_tot_ol; log_tot_ol = 0; for (k = 0; k < n_density; k++) { /* floor variances */ for (l = 0; l < veclen[j]; l++) if (var[j][k][l] < 1e-4) var[j][k][l] = 1e-4; /* compute normalization factors for Gaussian densities */ norm[j][k] = diag_norm(var[j][k], veclen[j]); /* precompute 1/(2sigma^2) terms */ diag_eval_precomp(var[j][k], veclen[j]); } if (twopassvar) { /* do a pass over the corpus to compute reestimated means */ for (o = 0; o < n_obs; o++) { float64 mx; obs = get_obs(o); mx = MIN_NEG_FLOAT64; for (k = 0; k < n_density; k++) { /* really log(den) for the moment */ den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]); if (mx < den[k]) mx = den[k]; } for (k = 0, ol = 0; k < n_density; k++) { den[k] = exp(log_a_den - mx); ol += mixw[j][k] * den[k]; } for (k = 0; k < n_density; k++) { ttt = mixw[j][k] * den[k] / ol; cb_acc[k] += ttt; for (l = 0; l < veclen[j]; l++) { mean_acc[k][l] += obs[l] * ttt; } } } cb_acc[0] = 1.0 / cb_acc[0]; for (k = 1; k < n_density; k++) { cb_acc[k] = 1.0 / cb_acc[k]; } /* compute the reestimated mean value to be used in next pass */ for (k = 0; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { n_mean[k][l] = mean_acc[k][l] * cb_acc[k]; mean_acc[k][l] = 0; } cb_acc[k] = 0; } } else { n_mean = mean[j]; } for (o = 0; o < n_obs; o++) { float64 mx; /* Do a pass over the data to accumulate reestimation sums * for the remaining parameters (including means * if not a 2-pass config) */ /* Get the next observation */ obs = get_obs(o); mx = MIN_NEG_FLOAT64; /* Compute the mixture density value given the * observation and the model parameters */ for (k = 0; k < n_density; k++) { /* really log(den) for the moment */ den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]); if (mx < den[k]) mx = den[k]; } for (k = 0, ol = 0; k < n_density; k++) { den[k] = exp(den[k] - mx); ol += mixw[j][k] * den[k]; } log_tot_ol += log(ol) + mx; /* Compute the reestimation sum terms for each * of the component densities */ for (k = 0; k < n_density; k++) { ttt = mixw[j][k] * den[k] / ol; mixw_acc[k] += ttt; cb_acc[k] += ttt; for (l = 0; l < veclen[j]; l++) { /* if not doing two-pass variance computation * n_mean <- mean above. */ diff = obs[l] - n_mean[k][l]; if (!twopassvar) { mean_acc[k][l] += ttt * obs[l]; } var_acc[k][l] += ttt * diff * diff; } } } avg_lik = exp(log_tot_ol / n_obs); if (p_log_tot_ol != 0) p_avg_lik = exp(p_log_tot_ol / n_obs); else p_avg_lik = 0.5 * avg_lik; E_INFO("EM stream %u: [%u] avg_lik %e conv_ratio %e\n", j, i, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik); /* normalize after iteration */ if (tievar) { /* create a sum over all densities in entry 0 */ for (k = 1; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { var[j][0][l] += var[j][k][l]; } cb_acc[0] += cb_acc[k]; } /* copy entry 0 back to remaining entries */ for (k = 1; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { var[j][k][l] = var[j][0][l]; } cb_acc[k] = cb_acc[0]; } } for (k = 0, mixw_norm = 0; k < n_density; k++) { /* norm for per density expectations */ cb_acc[k] = 1.0 / cb_acc[k]; mixw_norm += mixw_acc[k]; } mixw_norm = 1.0 / mixw_norm; if (!twopassvar) { for (k = 0; k < n_density; k++) { mixw[j][k] = mixw_acc[k] * mixw_norm; mixw_acc[k] = 0; for (l = 0; l < veclen[j]; l++) { mean[j][k][l] = mean_acc[k][l] * cb_acc[k]; mean_acc[k][l] = 0; var[j][k][l] = var_acc[k][l] * cb_acc[k]; var_acc[k][l] = 0; } cb_acc[k] = 0; } } else { for (k = 0; k < n_density; k++) { mixw[j][k] = mixw_acc[k] * mixw_norm; mixw_acc[k] = 0; for (l = 0; l < veclen[j]; l++) { /* already computed in first pass */ mean[j][k][l] = n_mean[k][l]; var[j][k][l] = var_acc[k][l] * cb_acc[k]; var_acc[k][l] = 0; } cb_acc[k] = 0; } } } /* end of EM iteration loop */ E_INFO("EM stream %u: [final] n_obs %u avg_lik %e conv_ratio %e\n", j, n_obs, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik); } /* end of feature stream loop */ ckd_free((void *)mixw_acc); ckd_free((void *)cb_acc); ckd_free((void *)&mean_acc_xx[0][0][0]); ckd_free_2d((void **)mean_acc_xx); ckd_free((void *)&var_acc_xx[0][0][0]); ckd_free_2d((void **)var_acc_xx); if (n_mean_xx) { ckd_free((void *)&n_mean_xx[0][0][0]); ckd_free_2d((void **)n_mean); } ckd_free_2d((void **)norm); ckd_free((void *)den); return log_tot_ol; }
/***********************************************************************//** * @brief Generate the model map(s) * * This method reads the task parameters from the parfile, sets up the * observation container, loops over all CTA observations in the container * and generates a model map for each CTA observation. ***************************************************************************/ void ctmodel::run(void) { // If we're in debug mode then all output is also dumped on the screen if (logDebug()) { log.cout(true); } // Get task parameters get_parameters(); // Setup observation container setup_obs(); // Write parameters into logger if (logTerse()) { log_parameters(); log << std::endl; } // Write observation(s) into logger if (logTerse()) { log << std::endl; if (m_obs.size() > 1) { log.header1("Observations"); } else { log.header1("Observation"); } log << m_obs << std::endl; } // Write header if (logTerse()) { log << std::endl; if (m_obs.size() > 1) { log.header1("Generate model maps"); } else { log.header1("Generate model map"); } } // Initialise observation counter int n_observations = 0; // Loop over all observations in the container for (int i = 0; i < m_obs.size(); ++i) { // Initialise event input and output filenames m_infiles.push_back(""); // Get CTA observation GCTAObservation* obs = dynamic_cast<GCTAObservation*>(m_obs[i]); // Continue only if observation is a CTA observation if (obs != NULL) { // Write header for observation if (logTerse()) { if (obs->name().length() > 1) { log.header3("Observation "+obs->name()); } else { log.header3("Observation"); } } // Increment number of observations n_observations++; // Save event file name (for possible saving) m_infiles[i] = obs->eventfile(); // Generate model map model_map(obs, m_obs.models()); } // endif: CTA observation found } // endfor: looped over observations // If more than a single observation has been handled then make sure // that an XML file will be used for storage if (n_observations > 1) { m_use_xml = true; } // Write observation(s) into logger if (logTerse()) { log << std::endl; if (m_obs.size() > 1) { log.header1("Observations after model map generation"); } else { log.header1("Observation after model map generation"); } log << m_obs << std::endl; } // Return return; }