float64 find_farthest_neigh(uint32 *obs_subset, uint32 n_obs_subset, uint32 veclen, vector_t neigh_a, vector_t neigh_b) { uint32 i, j; uint32 i_corp, j_corp; uint32 i_corp_max=0, j_corp_max=0; float64 d; float64 d_max = 0; float64 diff; uint32 l; vector_t c_i, c_j; d = 0; for (i = 0; i < n_obs_subset-1; i++) { for (j = i+1; j < n_obs_subset; j++) { i_corp = obs_subset[i]; j_corp = obs_subset[j]; c_i = get_obs(i_corp); c_j = get_obs(j_corp); for (l = 0, d = 0; l < veclen; l++) { diff = c_i[l] - c_j[l]; d += diff * diff; } if (d > d_max) { d_max = d; i_corp_max = i_corp; j_corp_max = j_corp; } } } c_i = get_obs(i_corp_max); c_j = get_obs(j_corp_max); for (l = 0; l < veclen; l++) { neigh_a[l] = c_i[l]; neigh_b[l] = c_j[l]; } return d_max; }
int full_variances(uint32 ts, vector_t **mean, vector_t ***var, uint32 n_density, uint32 n_stream, uint32 *veclen, uint32 blksize, uint32 n_in_frame, codew_t *label) { uint32 *n_obs; float64 term; uint32 s, i, l, m, k, n_frame; vector_t c; E_INFO("Initializing full covariances\n"); for (s = 0; s < n_stream; s++) { n_obs = ckd_calloc(n_density, sizeof(uint32)); n_frame = setup_obs(ts, s, n_in_frame, n_stream, veclen, blksize); for (i = 0; i < n_frame; i++) { k = label[i]; /* best codeword */ n_obs[k]++; c = get_obs(i); for (l = 0; l < veclen[s]; l++) { for (m = 0; m < veclen[s]; m++) { var[s][k][l][m] += (c[l] - mean[s][k][l]) * (c[m] - mean[s][k][m]); } } } for (k = 0; k < n_density; k++) { term = 1.0 / (float64)n_obs[k]; for (l = 0; l < veclen[s]; l++) { for (m = 0; m < veclen[s]; m++) { var[s][k][l][m] *= term; } } } ckd_free(n_obs); } return 0; }
int variances(uint32 ts, vector_t **mean, vector_t **var, uint32 n_density, const uint32 *veclen, uint32 n_in_frame, uint32 n_stream, codew_t *label) { uint32 *n_obs; float64 term; uint32 s, i, l, k, n_frame; vector_t c; E_INFO("Initializing variances\n"); for (s = 0; s < n_stream; s++) { n_obs = ckd_calloc(n_density, sizeof(uint32)); n_frame = setup_obs(ts, s, n_in_frame, veclen); for (i = 0; i < n_frame; i++) { k = label[i]; /* best codeword */ n_obs[k]++; c = get_obs(i); for (l = 0; l < veclen[s]; l++) { term = c[l] - mean[s][k][l]; term *= term; var[s][k][l] += term; } } for (k = 0; k < n_density; k++) { term = 1.0 / (float64)n_obs[k]; for (l = 0; l < veclen[s]; l++) { var[s][k][l] *= term; } } ckd_free(n_obs); } return 0; }
float64 reest_sum(uint32 ts, vector_t **mean, vector_t **var, float32 **mixw, uint32 n_density, uint32 n_stream, uint32 n_in_obs, const uint32 *veclen, uint32 n_iter, uint32 twopassvar, uint32 vartiethr) { uint32 o, i, j, k, l; float32 *mixw_acc; float32 *cb_acc; vector_t **mean_acc_xx; vector_t **var_acc_xx; vector_t *mean_acc; vector_t *var_acc; float64 ol, ttt, diff, log_tot_ol = 0, p_log_tot_ol = 0; float64 **norm, *den; float64 log_a_den=0; float32 mixw_norm; vector_t obs; uint32 n_obs; vector_t ***n_mean_xx = NULL; vector_t *n_mean = NULL; float64 avg_lik=0, p_avg_lik=0; uint32 tievar = FALSE; E_INFO("EM reestimation of mixw/means/vars\n"); if (twopassvar) { n_mean_xx = gauden_alloc_param(1, 1, n_density, veclen); n_mean = n_mean_xx[0][0]; } /* allocate mixing weight accumulators */ mixw_acc = (float32 *)ckd_calloc(n_density, sizeof(float32)); cb_acc = (float32 *)ckd_calloc(n_density, sizeof(float32)); mean_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, feat_blksize()); mean_acc = mean_acc_xx[0]; var_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, feat_blksize()); var_acc = var_acc_xx[0]; den = (float64 *)ckd_calloc(n_density, sizeof(float64)); norm = (float64 **)ckd_calloc_2d(n_stream, n_density, sizeof(float64)); for (j = 0; j < n_stream; j++) { n_obs = setup_obs(ts, j, n_in_obs, veclen); if (n_obs < vartiethr) tievar = TRUE; for (i = 0; i < n_iter; i++) { p_log_tot_ol = log_tot_ol; log_tot_ol = 0; for (k = 0; k < n_density; k++) { /* floor variances */ for (l = 0; l < veclen[j]; l++) if (var[j][k][l] < 1e-4) var[j][k][l] = 1e-4; /* compute normalization factors for Gaussian densities */ norm[j][k] = diag_norm(var[j][k], veclen[j]); /* precompute 1/(2sigma^2) terms */ diag_eval_precomp(var[j][k], veclen[j]); } if (twopassvar) { /* do a pass over the corpus to compute reestimated means */ for (o = 0; o < n_obs; o++) { float64 mx; obs = get_obs(o); mx = MIN_NEG_FLOAT64; for (k = 0; k < n_density; k++) { /* really log(den) for the moment */ den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]); if (mx < den[k]) mx = den[k]; } for (k = 0, ol = 0; k < n_density; k++) { den[k] = exp(log_a_den - mx); ol += mixw[j][k] * den[k]; } for (k = 0; k < n_density; k++) { ttt = mixw[j][k] * den[k] / ol; cb_acc[k] += ttt; for (l = 0; l < veclen[j]; l++) { mean_acc[k][l] += obs[l] * ttt; } } } cb_acc[0] = 1.0 / cb_acc[0]; for (k = 1; k < n_density; k++) { cb_acc[k] = 1.0 / cb_acc[k]; } /* compute the reestimated mean value to be used in next pass */ for (k = 0; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { n_mean[k][l] = mean_acc[k][l] * cb_acc[k]; mean_acc[k][l] = 0; } cb_acc[k] = 0; } } else { n_mean = mean[j]; } for (o = 0; o < n_obs; o++) { float64 mx; /* Do a pass over the data to accumulate reestimation sums * for the remaining parameters (including means * if not a 2-pass config) */ /* Get the next observation */ obs = get_obs(o); mx = MIN_NEG_FLOAT64; /* Compute the mixture density value given the * observation and the model parameters */ for (k = 0; k < n_density; k++) { /* really log(den) for the moment */ den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]); if (mx < den[k]) mx = den[k]; } for (k = 0, ol = 0; k < n_density; k++) { den[k] = exp(den[k] - mx); ol += mixw[j][k] * den[k]; } log_tot_ol += log(ol) + mx; /* Compute the reestimation sum terms for each * of the component densities */ for (k = 0; k < n_density; k++) { ttt = mixw[j][k] * den[k] / ol; mixw_acc[k] += ttt; cb_acc[k] += ttt; for (l = 0; l < veclen[j]; l++) { /* if not doing two-pass variance computation * n_mean <- mean above. */ diff = obs[l] - n_mean[k][l]; if (!twopassvar) { mean_acc[k][l] += ttt * obs[l]; } var_acc[k][l] += ttt * diff * diff; } } } avg_lik = exp(log_tot_ol / n_obs); if (p_log_tot_ol != 0) p_avg_lik = exp(p_log_tot_ol / n_obs); else p_avg_lik = 0.5 * avg_lik; E_INFO("EM stream %u: [%u] avg_lik %e conv_ratio %e\n", j, i, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik); /* normalize after iteration */ if (tievar) { /* create a sum over all densities in entry 0 */ for (k = 1; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { var[j][0][l] += var[j][k][l]; } cb_acc[0] += cb_acc[k]; } /* copy entry 0 back to remaining entries */ for (k = 1; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { var[j][k][l] = var[j][0][l]; } cb_acc[k] = cb_acc[0]; } } for (k = 0, mixw_norm = 0; k < n_density; k++) { /* norm for per density expectations */ cb_acc[k] = 1.0 / cb_acc[k]; mixw_norm += mixw_acc[k]; } mixw_norm = 1.0 / mixw_norm; if (!twopassvar) { for (k = 0; k < n_density; k++) { mixw[j][k] = mixw_acc[k] * mixw_norm; mixw_acc[k] = 0; for (l = 0; l < veclen[j]; l++) { mean[j][k][l] = mean_acc[k][l] * cb_acc[k]; mean_acc[k][l] = 0; var[j][k][l] = var_acc[k][l] * cb_acc[k]; var_acc[k][l] = 0; } cb_acc[k] = 0; } } else { for (k = 0; k < n_density; k++) { mixw[j][k] = mixw_acc[k] * mixw_norm; mixw_acc[k] = 0; for (l = 0; l < veclen[j]; l++) { /* already computed in first pass */ mean[j][k][l] = n_mean[k][l]; var[j][k][l] = var_acc[k][l] * cb_acc[k]; var_acc[k][l] = 0; } cb_acc[k] = 0; } } } /* end of EM iteration loop */ E_INFO("EM stream %u: [final] n_obs %u avg_lik %e conv_ratio %e\n", j, n_obs, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik); } /* end of feature stream loop */ ckd_free((void *)mixw_acc); ckd_free((void *)cb_acc); ckd_free((void *)&mean_acc_xx[0][0][0]); ckd_free_2d((void **)mean_acc_xx); ckd_free((void *)&var_acc_xx[0][0][0]); ckd_free_2d((void **)var_acc_xx); if (n_mean_xx) { ckd_free((void *)&n_mean_xx[0][0][0]); ckd_free_2d((void **)n_mean); } ckd_free_2d((void **)norm); ckd_free((void *)den); return log_tot_ol; }
static float32 random_kmeans(uint32 n_trial, uint32 n_obs, uint32 veclen, vector_t *bst_mean, uint32 n_mean, float32 min_ratio, uint32 max_iter, codew_t **out_label) { uint32 t, k, kk; float32 rr; uint32 cc; codew_t *label = NULL, *b_label = NULL; vector_t *tmp_mean; float64 sqerr, b_sqerr = MAX_POS_FLOAT64; vector_t c; uint32 n_aborts; tmp_mean = (vector_t *)ckd_calloc_2d(n_mean, veclen, sizeof(float32)); E_INFO("Initializing means using random k-means\n"); for (t = 0; t < n_trial; t++) { E_INFO("Trial %u: %u means\n", t, n_mean); n_aborts = 100; /* # of aborts to allow */ do { /* pick a (pseudo-)random set of initial means from the corpus */ for (k = 0; k < n_mean; k++) { rr = drand48(); /* random numbers in the interval [0, 1) */ cc = rr * n_obs; assert((cc >= 0) && (cc < n_obs)); c = get_obs(cc); for (kk = 0; kk < veclen; kk++) { tmp_mean[k][kk] = c[kk]; } } if (n_mean > 1) { sqerr = k_means_trineq(tmp_mean, n_mean, n_obs, veclen, min_ratio, max_iter, &label); } else { sqerr = k_means(tmp_mean, n_mean, n_obs, veclen, min_ratio, max_iter, &label); } if (sqerr < 0) { E_INFO("\t-> Aborting k-means, bad initialization\n"); --n_aborts; } } while ((sqerr < 0) && (n_aborts > 0)); if (sqerr < b_sqerr) { b_sqerr = sqerr; E_INFO("\tbest-so-far sqerr = %e\n", b_sqerr); if (b_label) ckd_free(b_label); b_label = label; for (k = 0; k < n_mean; k++) { for (kk = 0; kk < veclen; kk++) { bst_mean[k][kk] = tmp_mean[k][kk]; } } } else { if (label) { ckd_free(label); label = NULL; } } } *out_label = b_label; ckd_free_2d((void **)tmp_mean); return b_sqerr; }
void obs_vector_iget_observations(const obs_vector_type * obs_vector , int report_step , obs_data_type * obs_data, const active_list_type * active_list) { void * obs_node = vector_iget( obs_vector->nodes , report_step ); if ( obs_node != NULL) obs_vector->get_obs(obs_node , obs_data , report_step , active_list); }
/***********************************************************************//** * @brief Get application parameters * * Get all task parameters from parameter file or (if required) by querying * the user. The parameters are read in the correct order. ***************************************************************************/ void ctmodel::get_parameters(void) { // Reset cube append flag m_append_cube = false; // If there are no observations in container then load them via user // parameters. if (m_obs.size() == 0) { get_obs(); } // If we have now excactly one CTA observation (but no cube has yet been // appended to the observation) then check whether this observation // is a binned observation, and if yes, extract the counts cube for // model generation if ((m_obs.size() == 1) && (m_append_cube == false)) { // Get CTA observation GCTAObservation* obs = dynamic_cast<GCTAObservation*>(m_obs[0]); // Continue only if observation is a CTA observation if (obs != NULL) { // Check for binned observation if (obs->eventtype() == "CountsCube") { // Set cube from binned observation GCTAEventCube* evtcube = dynamic_cast<GCTAEventCube*>(const_cast<GEvents*>(obs->events())); cube(*evtcube); // Signal that cube has been set m_has_cube = true; // Signal that we are in binned mode m_binned = true; } // endif: observation was binned } // endif: observation was CTA } // endif: had exactly one observation // Read model definition file if required if (m_obs.models().size() == 0) { // Get model filename std::string inmodel = (*this)["inmodel"].filename(); // Load models from file m_obs.models(inmodel); } // endif: there were no models // Get energy dispersion flag parameters m_apply_edisp = (*this)["edisp"].boolean(); // If we do not have yet a counts cube for model computation then check // whether we should read it from the "incube" parameter or whether we // should create it from scratch using the task parameters if (!m_has_cube) { // Read cube definition file std::string incube = (*this)["incube"].filename(); // If no cube file has been specified then create a cube from // the task parameters ... if ((incube == "NONE") || (gammalib::strip_whitespace(incube) == "")) { // Create cube from scratch m_cube = create_cube(m_obs); } // ... otherwise load the cube from file and reset all bins // to zero else { // Load cube from given file m_cube.load(incube); // Set all cube bins to zero for (int i = 0; i < m_cube.size(); ++i) { m_cube[i]->counts(0.0); } } // Signal that cube has been set m_has_cube = true; } // endif: we had no cube yet // Read optionally output cube filenames if (read_ahead()) { m_outcube = (*this)["outcube"].filename(); } // If cube should be appended to first observation then do that now. // This is a kluge that makes sure that the cube is passed as part // of the observation in case that a cube response is used. The kluge // is needed because the GCTACubeSourceDiffuse::set method needs to // get the full event cube from the observation. It is also at this // step that the GTI, which may just be a dummy GTI when create_cube() // has been used, will be set. if (m_append_cube) { //TODO: Check that energy boundaries are compatible // Attach GTI of observations to model cube m_cube.gti(m_obs[0]->events()->gti()); // Attach model cube to observations m_obs[0]->events(m_cube); } // endif: cube was scheduled for appending // Return return; }