Exemple #1
0
float64
find_farthest_neigh(uint32 *obs_subset,
		    uint32 n_obs_subset,
		    uint32 veclen,
		    vector_t neigh_a,
		    vector_t neigh_b)
{
    uint32 i, j;
    uint32 i_corp, j_corp;
    uint32 i_corp_max=0, j_corp_max=0;
    float64 d;
    float64 d_max = 0;
    float64 diff;
    uint32 l;
    vector_t c_i, c_j;

    d = 0;

    for (i = 0; i < n_obs_subset-1; i++) {
	for (j = i+1; j < n_obs_subset; j++) {
	    i_corp = obs_subset[i];
	    j_corp = obs_subset[j];

	    c_i = get_obs(i_corp);
	    c_j = get_obs(j_corp);
	    for (l = 0, d = 0; l < veclen; l++) {
		diff = c_i[l] - c_j[l];
		
		d += diff * diff;
	    }

	    if (d > d_max) {
		d_max = d;
		i_corp_max = i_corp;
		j_corp_max = j_corp;
	    }
	}
    }

    c_i = get_obs(i_corp_max);
    c_j = get_obs(j_corp_max);
    for (l = 0; l < veclen; l++) {
	neigh_a[l] = c_i[l];
	neigh_b[l] = c_j[l];
    }

    return d_max;
}
Exemple #2
0
int
full_variances(uint32 ts,
	  vector_t **mean,
	  vector_t ***var,
	  uint32 n_density,

	  uint32 n_stream,
	  uint32 *veclen,
	  uint32 blksize,
	  
	  uint32 n_in_frame,

	  codew_t *label)
{
    uint32 *n_obs;
    float64 term;
    uint32 s, i, l, m, k, n_frame;
    vector_t c;
    
    E_INFO("Initializing full covariances\n");
    for (s = 0; s < n_stream; s++) {
	n_obs = ckd_calloc(n_density, sizeof(uint32));

	n_frame = setup_obs(ts, s, n_in_frame, n_stream, veclen, blksize);

	for (i = 0; i < n_frame; i++) {
	    k = label[i];	/* best codeword */
	    n_obs[k]++;

	    c = get_obs(i);

	    for (l = 0; l < veclen[s]; l++) {
		for (m = 0; m < veclen[s]; m++) {
		    var[s][k][l][m] +=
			(c[l] - mean[s][k][l])
			* (c[m] - mean[s][k][m]);
		}
	    }
	}

	for (k = 0; k < n_density; k++) {
	    term = 1.0 / (float64)n_obs[k];
	    for (l = 0; l < veclen[s]; l++) {
		for (m = 0; m < veclen[s]; m++) {
		    var[s][k][l][m] *= term;
		}
	    }
	}

	ckd_free(n_obs);
    }
    return 0;
}
Exemple #3
0
int
variances(uint32 ts,
	  vector_t **mean,
	  vector_t **var,
	  uint32 n_density,
	  const uint32 *veclen,
	  
	  uint32 n_in_frame,
	  uint32 n_stream,

	  codew_t *label)
{
    uint32 *n_obs;
    float64 term;
    uint32 s, i, l, k, n_frame;
    vector_t c;
    
    E_INFO("Initializing variances\n");
    for (s = 0; s < n_stream; s++) {
	n_obs = ckd_calloc(n_density, sizeof(uint32));

	n_frame = setup_obs(ts, s, n_in_frame, veclen);

	for (i = 0; i < n_frame; i++) {
	    k = label[i];	/* best codeword */
	    n_obs[k]++;

	    c = get_obs(i);

	    for (l = 0; l < veclen[s]; l++) {
		term = c[l] - mean[s][k][l];
		term *= term;
		var[s][k][l] += term;
	    }
	}

	for (k = 0; k < n_density; k++) {
	    term = 1.0 / (float64)n_obs[k];
	    for (l = 0; l < veclen[s]; l++) {
		var[s][k][l] *= term;
	    }
	}

	ckd_free(n_obs);
    }
    return 0;
}
Exemple #4
0
float64
reest_sum(uint32 ts,
	  vector_t **mean,
	  vector_t **var,
	  float32 **mixw,
	  uint32 n_density,
	  uint32 n_stream,
	  uint32 n_in_obs,
	  const uint32 *veclen,
	  uint32 n_iter,
	  uint32 twopassvar,
	  uint32 vartiethr)
{
    uint32 o, i, j, k, l;
    float32  *mixw_acc;
    float32  *cb_acc;
    vector_t  **mean_acc_xx;
    vector_t  **var_acc_xx;
    vector_t  *mean_acc;
    vector_t  *var_acc;
    float64 ol, ttt, diff, log_tot_ol = 0, p_log_tot_ol = 0;
    float64 **norm, *den;
    float64 log_a_den=0;
    float32 mixw_norm;

    vector_t obs;
    uint32 n_obs;

    vector_t ***n_mean_xx = NULL;
    vector_t *n_mean = NULL;

    float64 avg_lik=0, p_avg_lik=0;
    uint32 tievar = FALSE;

    E_INFO("EM reestimation of mixw/means/vars\n");
    
    if (twopassvar) {
	n_mean_xx = gauden_alloc_param(1, 1, n_density, veclen);
	n_mean = n_mean_xx[0][0];
    }
    
    /* allocate mixing weight accumulators */
    mixw_acc = (float32 *)ckd_calloc(n_density, sizeof(float32));

    cb_acc = (float32 *)ckd_calloc(n_density, sizeof(float32));
    mean_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, feat_blksize());
    mean_acc = mean_acc_xx[0];
    var_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, feat_blksize());
    var_acc = var_acc_xx[0];

    den = (float64 *)ckd_calloc(n_density, sizeof(float64));
    norm = (float64 **)ckd_calloc_2d(n_stream, n_density, sizeof(float64));

    for (j = 0; j < n_stream; j++) {
	n_obs = setup_obs(ts, j, n_in_obs, veclen);

	if (n_obs < vartiethr) tievar = TRUE;

	for (i = 0; i < n_iter; i++) {
	    p_log_tot_ol = log_tot_ol;
	    log_tot_ol = 0;

	    for (k = 0; k < n_density; k++) {
		/* floor variances */
		for (l = 0; l < veclen[j]; l++)
		    if (var[j][k][l] < 1e-4) var[j][k][l] = 1e-4; 

		/* compute normalization factors for Gaussian
		   densities */
		norm[j][k] = diag_norm(var[j][k], veclen[j]);

		/* precompute 1/(2sigma^2) terms */
		diag_eval_precomp(var[j][k], veclen[j]);
	    }

	    if (twopassvar) {
		/* do a pass over the corpus to compute reestimated means */
		for (o = 0; o < n_obs; o++) {
		    float64 mx;

		    obs = get_obs(o);
		
		    mx = MIN_NEG_FLOAT64;

		    for (k = 0; k < n_density; k++) {
			/* really log(den) for the moment */
			den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]);
			if (mx < den[k]) mx = den[k];
		    }
		    for (k = 0, ol = 0; k < n_density; k++) {
			den[k] = exp(log_a_den - mx);
			ol += mixw[j][k] * den[k];
		    }

		    for (k = 0; k < n_density; k++) {
			ttt = mixw[j][k] * den[k] / ol;
		    
			cb_acc[k] += ttt;
			for (l = 0; l < veclen[j]; l++) {
			    mean_acc[k][l] += obs[l] * ttt;
			}
		    }
		}
	    
		cb_acc[0] = 1.0 / cb_acc[0];
		for (k = 1; k < n_density; k++) {
		    cb_acc[k] = 1.0 / cb_acc[k];
		}

		/* compute the reestimated mean value to be used in next pass */
		for (k = 0; k < n_density; k++) {
		    for (l = 0; l < veclen[j]; l++) {
			n_mean[k][l] = mean_acc[k][l] * cb_acc[k];
			mean_acc[k][l] = 0;
		    }
		    cb_acc[k] = 0;
		}
	    }
	    else {
		n_mean = mean[j];
	    }
		
	    for (o = 0; o < n_obs; o++) {
		float64 mx;

		/* Do a pass over the data to accumulate reestimation sums
		 * for the remaining parameters (including means
		 * if not a 2-pass config) */

		/* Get the next observation */
		obs = get_obs(o);

		mx = MIN_NEG_FLOAT64;

		/* Compute the mixture density value given the
		 * observation and the model parameters */
		for (k = 0; k < n_density; k++) {
		    /* really log(den) for the moment */
		    den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]);
		    if (mx < den[k]) mx = den[k];
		}
		for (k = 0, ol = 0; k < n_density; k++) {
		    den[k] = exp(den[k] - mx);
		    ol += mixw[j][k] * den[k];
		}

		log_tot_ol += log(ol) + mx;

		/* Compute the reestimation sum terms for each
		 * of the component densities */
		for (k = 0; k < n_density; k++) {
		    ttt = mixw[j][k] * den[k] / ol;

		    mixw_acc[k] += ttt;

		    cb_acc[k] += ttt;
		    
		    for (l = 0; l < veclen[j]; l++) {
			/* if not doing two-pass variance computation
			 * n_mean <- mean above. */
			diff = obs[l] - n_mean[k][l];
			
			if (!twopassvar) {
			    mean_acc[k][l] += ttt * obs[l];
			}

			var_acc[k][l] += ttt * diff * diff;
		    }
		}
	    }

	    avg_lik = exp(log_tot_ol / n_obs);

	    if (p_log_tot_ol != 0)
		p_avg_lik = exp(p_log_tot_ol / n_obs);
	    else
		p_avg_lik = 0.5 * avg_lik;

	    E_INFO("EM stream %u: [%u] avg_lik %e conv_ratio %e\n",
		   j, i, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik);
	    
	    /* normalize after iteration */

	    if (tievar) {
		/* create a sum over all densities in entry 0 */
		for (k = 1; k < n_density; k++) {
		    for (l = 0; l < veclen[j]; l++) {
			var[j][0][l] += var[j][k][l];
		    }
		    cb_acc[0] += cb_acc[k];
		}
		/* copy entry 0 back to remaining entries */
		for (k = 1; k < n_density; k++) {
		    for (l = 0; l < veclen[j]; l++) {
			var[j][k][l] = var[j][0][l];
		    }
		    cb_acc[k] = cb_acc[0];
		}		
	    }
		
	    for (k = 0, mixw_norm = 0; k < n_density; k++) {
		/* norm for per density expectations */
		cb_acc[k] = 1.0 / cb_acc[k];

		mixw_norm += mixw_acc[k];
	    }
	    mixw_norm = 1.0 / mixw_norm;

	    if (!twopassvar) {
		for (k = 0; k < n_density; k++) {
		    mixw[j][k] = mixw_acc[k] * mixw_norm;
		    mixw_acc[k] = 0;

		    for (l = 0; l < veclen[j]; l++) {
			mean[j][k][l] = mean_acc[k][l] * cb_acc[k];
			mean_acc[k][l] = 0;
			var[j][k][l]  = var_acc[k][l] * cb_acc[k];
			var_acc[k][l] = 0;
		    }
		    cb_acc[k] = 0;
		}
	    }
	    else {
		for (k = 0; k < n_density; k++) {
		    mixw[j][k] = mixw_acc[k] * mixw_norm;
		    mixw_acc[k] = 0;

		    for (l = 0; l < veclen[j]; l++) {
			/* already computed in first pass */
			mean[j][k][l] = n_mean[k][l];
			var[j][k][l]  = var_acc[k][l] * cb_acc[k];
			var_acc[k][l] = 0;
		    }
		    cb_acc[k] = 0;
		}
	    }
	}	/* end of EM iteration loop */

	E_INFO("EM stream %u: [final] n_obs %u avg_lik %e conv_ratio %e\n",
	       j, n_obs, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik);

    }	/* end of feature stream loop */

    ckd_free((void *)mixw_acc);
    ckd_free((void *)cb_acc);
    ckd_free((void *)&mean_acc_xx[0][0][0]);
    ckd_free_2d((void **)mean_acc_xx);
    ckd_free((void *)&var_acc_xx[0][0][0]);
    ckd_free_2d((void **)var_acc_xx);
    if (n_mean_xx) {
	ckd_free((void *)&n_mean_xx[0][0][0]);
	ckd_free_2d((void **)n_mean);
    }

    ckd_free_2d((void **)norm);
    ckd_free((void *)den);

    return log_tot_ol;
}
Exemple #5
0
static float32
random_kmeans(uint32 n_trial,
	      uint32 n_obs,
	      uint32 veclen,
	      vector_t *bst_mean,
	      uint32 n_mean,
	      float32 min_ratio,
	      uint32 max_iter,
	      codew_t **out_label)
{
    uint32 t, k, kk;
    float32 rr;
    uint32 cc;
    codew_t *label = NULL, *b_label = NULL;
    vector_t *tmp_mean;
    float64 sqerr, b_sqerr = MAX_POS_FLOAT64;
    vector_t c;
    uint32 n_aborts;

    tmp_mean = (vector_t *)ckd_calloc_2d(n_mean, veclen, sizeof(float32));

    E_INFO("Initializing means using random k-means\n");

    for (t = 0; t < n_trial; t++) {
	E_INFO("Trial %u: %u means\n", t, n_mean);

	n_aborts = 100;		/* # of aborts to allow */
	do {
	    /* pick a (pseudo-)random set of initial means from the corpus */
	    for (k = 0; k < n_mean; k++) {
		rr = drand48();	/* random numbers in the interval [0, 1) */
		cc = rr * n_obs;
		assert((cc >= 0) && (cc < n_obs));
		c = get_obs(cc);
		for (kk = 0; kk < veclen; kk++) {
		    tmp_mean[k][kk] = c[kk];
		}
	    }

	    if (n_mean > 1) {
		sqerr = k_means_trineq(tmp_mean, n_mean,
				       n_obs,
				       veclen,
				       min_ratio,
				       max_iter,
				       &label);
	    }
	    else {
		sqerr = k_means(tmp_mean, n_mean,
				n_obs,
				veclen,
				min_ratio,
				max_iter,
				&label);
	    }

	    if (sqerr < 0) {
		E_INFO("\t-> Aborting k-means, bad initialization\n");
		--n_aborts;
	    }
	} while ((sqerr < 0) && (n_aborts > 0));
	    

	if (sqerr < b_sqerr) {
	    b_sqerr = sqerr;

	    E_INFO("\tbest-so-far sqerr = %e\n", b_sqerr);
	    if (b_label)
		ckd_free(b_label);
	    b_label = label;
	    for (k = 0; k < n_mean; k++) {
		for (kk = 0; kk < veclen; kk++) {
		    bst_mean[k][kk] = tmp_mean[k][kk];
		}
	    }
	}
	else {
	    if (label) {
		ckd_free(label);
		label = NULL;
	    }
	}
    }

    *out_label = b_label;

    ckd_free_2d((void **)tmp_mean);

    return b_sqerr;
}
Exemple #6
0
void obs_vector_iget_observations(const obs_vector_type * obs_vector , int report_step , obs_data_type * obs_data, const active_list_type * active_list) {
  void * obs_node = vector_iget( obs_vector->nodes , report_step );
  if ( obs_node != NULL) 
    obs_vector->get_obs(obs_node , obs_data , report_step , active_list);
}
Exemple #7
0
/***********************************************************************//**
 * @brief Get application parameters
 *
 * Get all task parameters from parameter file or (if required) by querying
 * the user. The parameters are read in the correct order.
 ***************************************************************************/
void ctmodel::get_parameters(void)
{
    // Reset cube append flag
    m_append_cube = false;

    // If there are no observations in container then load them via user
    // parameters.
    if (m_obs.size() == 0) {
        get_obs();
    }

    // If we have now excactly one CTA observation (but no cube has yet been
    // appended to the observation) then check whether this observation
    // is a binned observation, and if yes, extract the counts cube for
    // model generation
    if ((m_obs.size() == 1) && (m_append_cube == false)) {

        // Get CTA observation
        GCTAObservation* obs = dynamic_cast<GCTAObservation*>(m_obs[0]);

        // Continue only if observation is a CTA observation
        if (obs != NULL) {

            // Check for binned observation
            if (obs->eventtype() == "CountsCube") {

                // Set cube from binned observation
                GCTAEventCube* evtcube = dynamic_cast<GCTAEventCube*>(const_cast<GEvents*>(obs->events()));

                cube(*evtcube);

                // Signal that cube has been set
                m_has_cube = true;

                // Signal that we are in binned mode
                m_binned = true;

            } // endif: observation was binned

        } // endif: observation was CTA

    } // endif: had exactly one observation

    // Read model definition file if required
    if (m_obs.models().size() == 0) {

        // Get model filename
        std::string inmodel = (*this)["inmodel"].filename();

        // Load models from file
        m_obs.models(inmodel);

    } // endif: there were no models

    // Get energy dispersion flag parameters
    m_apply_edisp = (*this)["edisp"].boolean();

    // If we do not have yet a counts cube for model computation then check
    // whether we should read it from the "incube" parameter or whether we
    // should create it from scratch using the task parameters
    if (!m_has_cube) {

        // Read cube definition file
        std::string incube = (*this)["incube"].filename();

        // If no cube file has been specified then create a cube from
        // the task parameters ...
        if ((incube == "NONE") ||
            (gammalib::strip_whitespace(incube) == "")) {
            
            // Create cube from scratch
            m_cube = create_cube(m_obs);

        }

        // ... otherwise load the cube from file and reset all bins
        // to zero
        else {

            // Load cube from given file
            m_cube.load(incube);

            // Set all cube bins to zero
            for (int i = 0; i < m_cube.size(); ++i) {
                m_cube[i]->counts(0.0);
            }
        }

        // Signal that cube has been set
        m_has_cube = true;

    } // endif: we had no cube yet

    // Read optionally output cube filenames
    if (read_ahead()) {
        m_outcube = (*this)["outcube"].filename();
    }

    // If cube should be appended to first observation then do that now.
    // This is a kluge that makes sure that the cube is passed as part
    // of the observation in case that a cube response is used. The kluge
    // is needed because the GCTACubeSourceDiffuse::set method needs to
    // get the full event cube from the observation. It is also at this
    // step that the GTI, which may just be a dummy GTI when create_cube()
    // has been used, will be set.
    if (m_append_cube) {

        //TODO: Check that energy boundaries are compatible

        // Attach GTI of observations to model cube
        m_cube.gti(m_obs[0]->events()->gti());
    
        // Attach model cube to observations
        m_obs[0]->events(m_cube);

    } // endif: cube was scheduled for appending

    // Return
    return;
}