Beispiel #1
0
int
full_variances(uint32 ts,
	  vector_t **mean,
	  vector_t ***var,
	  uint32 n_density,

	  uint32 n_stream,
	  uint32 *veclen,
	  uint32 blksize,
	  
	  uint32 n_in_frame,

	  codew_t *label)
{
    uint32 *n_obs;
    float64 term;
    uint32 s, i, l, m, k, n_frame;
    vector_t c;
    
    E_INFO("Initializing full covariances\n");
    for (s = 0; s < n_stream; s++) {
	n_obs = ckd_calloc(n_density, sizeof(uint32));

	n_frame = setup_obs(ts, s, n_in_frame, n_stream, veclen, blksize);

	for (i = 0; i < n_frame; i++) {
	    k = label[i];	/* best codeword */
	    n_obs[k]++;

	    c = get_obs(i);

	    for (l = 0; l < veclen[s]; l++) {
		for (m = 0; m < veclen[s]; m++) {
		    var[s][k][l][m] +=
			(c[l] - mean[s][k][l])
			* (c[m] - mean[s][k][m]);
		}
	    }
	}

	for (k = 0; k < n_density; k++) {
	    term = 1.0 / (float64)n_obs[k];
	    for (l = 0; l < veclen[s]; l++) {
		for (m = 0; m < veclen[s]; m++) {
		    var[s][k][l][m] *= term;
		}
	    }
	}

	ckd_free(n_obs);
    }
    return 0;
}
Beispiel #2
0
float64
cluster(uint32 ts,
	uint32 n_stream,
	uint32 n_in_frame,
	const uint32 *veclen,
	vector_t **mean,
	uint32 n_density,
	codew_t **out_label)
{
    float64 sum_sqerr, sqerr=0;
    uint32 s, n_frame;
    const char *meth;
    
    *out_label = NULL;

    blksize = feat_blksize();
    k_means_set_get_obs(&get_obs);

    for (s = 0, sum_sqerr = 0; s < n_stream; s++, sum_sqerr += sqerr) {
	meth = (const char *)cmd_ln_access("-method");

	n_frame = setup_obs(ts, s, n_in_frame, veclen);

	if (strcmp(meth, "rkm") == 0) {
	    sqerr = random_kmeans(*(uint32 *)cmd_ln_access("-ntrial"),
				  n_frame,
				  veclen[s],
				  mean[s],
				  n_density,
				  *(float32 *)cmd_ln_access("-minratio"),
				  *(uint32 *)cmd_ln_access("-maxiter"),
				  out_label);
	    if (sqerr < 0) {
		E_ERROR("Too few observations for kmeans\n");
		
		return -1.0;
	    }
	}
	else if (strcmp(meth, "fnkm") == 0) {
	    sqerr = furthest_neighbor_kmeans(n_frame,
					     veclen[s],
					     mean[s],
					     n_density,
					     *(float32 *)cmd_ln_access("-minratio"),
					     *(uint32 *)cmd_ln_access("-maxiter"));
	}
	else {
	    E_ERROR("I don't know how to do method '%s'.  Sorry.\n", meth);
	}
    }

    return sum_sqerr;
}
Beispiel #3
0
int
variances(uint32 ts,
	  vector_t **mean,
	  vector_t **var,
	  uint32 n_density,
	  const uint32 *veclen,
	  
	  uint32 n_in_frame,
	  uint32 n_stream,

	  codew_t *label)
{
    uint32 *n_obs;
    float64 term;
    uint32 s, i, l, k, n_frame;
    vector_t c;
    
    E_INFO("Initializing variances\n");
    for (s = 0; s < n_stream; s++) {
	n_obs = ckd_calloc(n_density, sizeof(uint32));

	n_frame = setup_obs(ts, s, n_in_frame, veclen);

	for (i = 0; i < n_frame; i++) {
	    k = label[i];	/* best codeword */
	    n_obs[k]++;

	    c = get_obs(i);

	    for (l = 0; l < veclen[s]; l++) {
		term = c[l] - mean[s][k][l];
		term *= term;
		var[s][k][l] += term;
	    }
	}

	for (k = 0; k < n_density; k++) {
	    term = 1.0 / (float64)n_obs[k];
	    for (l = 0; l < veclen[s]; l++) {
		var[s][k][l] *= term;
	    }
	}

	ckd_free(n_obs);
    }
    return 0;
}
Beispiel #4
0
float64
reest_sum(uint32 ts,
	  vector_t **mean,
	  vector_t **var,
	  float32 **mixw,
	  uint32 n_density,
	  uint32 n_stream,
	  uint32 n_in_obs,
	  const uint32 *veclen,
	  uint32 n_iter,
	  uint32 twopassvar,
	  uint32 vartiethr)
{
    uint32 o, i, j, k, l;
    float32  *mixw_acc;
    float32  *cb_acc;
    vector_t  **mean_acc_xx;
    vector_t  **var_acc_xx;
    vector_t  *mean_acc;
    vector_t  *var_acc;
    float64 ol, ttt, diff, log_tot_ol = 0, p_log_tot_ol = 0;
    float64 **norm, *den;
    float64 log_a_den=0;
    float32 mixw_norm;

    vector_t obs;
    uint32 n_obs;

    vector_t ***n_mean_xx = NULL;
    vector_t *n_mean = NULL;

    float64 avg_lik=0, p_avg_lik=0;
    uint32 tievar = FALSE;

    E_INFO("EM reestimation of mixw/means/vars\n");
    
    if (twopassvar) {
	n_mean_xx = gauden_alloc_param(1, 1, n_density, veclen);
	n_mean = n_mean_xx[0][0];
    }
    
    /* allocate mixing weight accumulators */
    mixw_acc = (float32 *)ckd_calloc(n_density, sizeof(float32));

    cb_acc = (float32 *)ckd_calloc(n_density, sizeof(float32));
    mean_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, feat_blksize());
    mean_acc = mean_acc_xx[0];
    var_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, feat_blksize());
    var_acc = var_acc_xx[0];

    den = (float64 *)ckd_calloc(n_density, sizeof(float64));
    norm = (float64 **)ckd_calloc_2d(n_stream, n_density, sizeof(float64));

    for (j = 0; j < n_stream; j++) {
	n_obs = setup_obs(ts, j, n_in_obs, veclen);

	if (n_obs < vartiethr) tievar = TRUE;

	for (i = 0; i < n_iter; i++) {
	    p_log_tot_ol = log_tot_ol;
	    log_tot_ol = 0;

	    for (k = 0; k < n_density; k++) {
		/* floor variances */
		for (l = 0; l < veclen[j]; l++)
		    if (var[j][k][l] < 1e-4) var[j][k][l] = 1e-4; 

		/* compute normalization factors for Gaussian
		   densities */
		norm[j][k] = diag_norm(var[j][k], veclen[j]);

		/* precompute 1/(2sigma^2) terms */
		diag_eval_precomp(var[j][k], veclen[j]);
	    }

	    if (twopassvar) {
		/* do a pass over the corpus to compute reestimated means */
		for (o = 0; o < n_obs; o++) {
		    float64 mx;

		    obs = get_obs(o);
		
		    mx = MIN_NEG_FLOAT64;

		    for (k = 0; k < n_density; k++) {
			/* really log(den) for the moment */
			den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]);
			if (mx < den[k]) mx = den[k];
		    }
		    for (k = 0, ol = 0; k < n_density; k++) {
			den[k] = exp(log_a_den - mx);
			ol += mixw[j][k] * den[k];
		    }

		    for (k = 0; k < n_density; k++) {
			ttt = mixw[j][k] * den[k] / ol;
		    
			cb_acc[k] += ttt;
			for (l = 0; l < veclen[j]; l++) {
			    mean_acc[k][l] += obs[l] * ttt;
			}
		    }
		}
	    
		cb_acc[0] = 1.0 / cb_acc[0];
		for (k = 1; k < n_density; k++) {
		    cb_acc[k] = 1.0 / cb_acc[k];
		}

		/* compute the reestimated mean value to be used in next pass */
		for (k = 0; k < n_density; k++) {
		    for (l = 0; l < veclen[j]; l++) {
			n_mean[k][l] = mean_acc[k][l] * cb_acc[k];
			mean_acc[k][l] = 0;
		    }
		    cb_acc[k] = 0;
		}
	    }
	    else {
		n_mean = mean[j];
	    }
		
	    for (o = 0; o < n_obs; o++) {
		float64 mx;

		/* Do a pass over the data to accumulate reestimation sums
		 * for the remaining parameters (including means
		 * if not a 2-pass config) */

		/* Get the next observation */
		obs = get_obs(o);

		mx = MIN_NEG_FLOAT64;

		/* Compute the mixture density value given the
		 * observation and the model parameters */
		for (k = 0; k < n_density; k++) {
		    /* really log(den) for the moment */
		    den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]);
		    if (mx < den[k]) mx = den[k];
		}
		for (k = 0, ol = 0; k < n_density; k++) {
		    den[k] = exp(den[k] - mx);
		    ol += mixw[j][k] * den[k];
		}

		log_tot_ol += log(ol) + mx;

		/* Compute the reestimation sum terms for each
		 * of the component densities */
		for (k = 0; k < n_density; k++) {
		    ttt = mixw[j][k] * den[k] / ol;

		    mixw_acc[k] += ttt;

		    cb_acc[k] += ttt;
		    
		    for (l = 0; l < veclen[j]; l++) {
			/* if not doing two-pass variance computation
			 * n_mean <- mean above. */
			diff = obs[l] - n_mean[k][l];
			
			if (!twopassvar) {
			    mean_acc[k][l] += ttt * obs[l];
			}

			var_acc[k][l] += ttt * diff * diff;
		    }
		}
	    }

	    avg_lik = exp(log_tot_ol / n_obs);

	    if (p_log_tot_ol != 0)
		p_avg_lik = exp(p_log_tot_ol / n_obs);
	    else
		p_avg_lik = 0.5 * avg_lik;

	    E_INFO("EM stream %u: [%u] avg_lik %e conv_ratio %e\n",
		   j, i, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik);
	    
	    /* normalize after iteration */

	    if (tievar) {
		/* create a sum over all densities in entry 0 */
		for (k = 1; k < n_density; k++) {
		    for (l = 0; l < veclen[j]; l++) {
			var[j][0][l] += var[j][k][l];
		    }
		    cb_acc[0] += cb_acc[k];
		}
		/* copy entry 0 back to remaining entries */
		for (k = 1; k < n_density; k++) {
		    for (l = 0; l < veclen[j]; l++) {
			var[j][k][l] = var[j][0][l];
		    }
		    cb_acc[k] = cb_acc[0];
		}		
	    }
		
	    for (k = 0, mixw_norm = 0; k < n_density; k++) {
		/* norm for per density expectations */
		cb_acc[k] = 1.0 / cb_acc[k];

		mixw_norm += mixw_acc[k];
	    }
	    mixw_norm = 1.0 / mixw_norm;

	    if (!twopassvar) {
		for (k = 0; k < n_density; k++) {
		    mixw[j][k] = mixw_acc[k] * mixw_norm;
		    mixw_acc[k] = 0;

		    for (l = 0; l < veclen[j]; l++) {
			mean[j][k][l] = mean_acc[k][l] * cb_acc[k];
			mean_acc[k][l] = 0;
			var[j][k][l]  = var_acc[k][l] * cb_acc[k];
			var_acc[k][l] = 0;
		    }
		    cb_acc[k] = 0;
		}
	    }
	    else {
		for (k = 0; k < n_density; k++) {
		    mixw[j][k] = mixw_acc[k] * mixw_norm;
		    mixw_acc[k] = 0;

		    for (l = 0; l < veclen[j]; l++) {
			/* already computed in first pass */
			mean[j][k][l] = n_mean[k][l];
			var[j][k][l]  = var_acc[k][l] * cb_acc[k];
			var_acc[k][l] = 0;
		    }
		    cb_acc[k] = 0;
		}
	    }
	}	/* end of EM iteration loop */

	E_INFO("EM stream %u: [final] n_obs %u avg_lik %e conv_ratio %e\n",
	       j, n_obs, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik);

    }	/* end of feature stream loop */

    ckd_free((void *)mixw_acc);
    ckd_free((void *)cb_acc);
    ckd_free((void *)&mean_acc_xx[0][0][0]);
    ckd_free_2d((void **)mean_acc_xx);
    ckd_free((void *)&var_acc_xx[0][0][0]);
    ckd_free_2d((void **)var_acc_xx);
    if (n_mean_xx) {
	ckd_free((void *)&n_mean_xx[0][0][0]);
	ckd_free_2d((void **)n_mean);
    }

    ckd_free_2d((void **)norm);
    ckd_free((void *)den);

    return log_tot_ol;
}
Beispiel #5
0
/***********************************************************************//**
 * @brief Generate the model map(s)
 *
 * This method reads the task parameters from the parfile, sets up the
 * observation container, loops over all CTA observations in the container
 * and generates a model map for each CTA observation.
 ***************************************************************************/
void ctmodel::run(void)
{
    // If we're in debug mode then all output is also dumped on the screen
    if (logDebug()) {
        log.cout(true);
    }

    // Get task parameters
    get_parameters();

    // Setup observation container
    setup_obs();

    // Write parameters into logger
    if (logTerse()) {
        log_parameters();
        log << std::endl;
    }

    // Write observation(s) into logger
    if (logTerse()) {
        log << std::endl;
        if (m_obs.size() > 1) {
            log.header1("Observations");
        }
        else {
            log.header1("Observation");
        }
        log << m_obs << std::endl;
    }

    // Write header
    if (logTerse()) {
        log << std::endl;
        if (m_obs.size() > 1) {
            log.header1("Generate model maps");
        }
        else {
            log.header1("Generate model map");
        }
    }

    // Initialise observation counter
    int n_observations = 0;

    // Loop over all observations in the container
    for (int i = 0; i < m_obs.size(); ++i) {

        // Initialise event input and output filenames
        m_infiles.push_back("");

        // Get CTA observation
        GCTAObservation* obs = dynamic_cast<GCTAObservation*>(m_obs[i]);

        // Continue only if observation is a CTA observation
        if (obs != NULL) {

            // Write header for observation
            if (logTerse()) {
                if (obs->name().length() > 1) {
                    log.header3("Observation "+obs->name());
                }
                else {
                    log.header3("Observation");
                }
            }

            // Increment number of observations
            n_observations++;

            // Save event file name (for possible saving)
            m_infiles[i] = obs->eventfile();

            // Generate model map
            model_map(obs, m_obs.models());

        } // endif: CTA observation found

    } // endfor: looped over observations

    // If more than a single observation has been handled then make sure
    // that an XML file will be used for storage
    if (n_observations > 1) {
        m_use_xml = true;
    }

    // Write observation(s) into logger
    if (logTerse()) {
        log << std::endl;
        if (m_obs.size() > 1) {
            log.header1("Observations after model map generation");
        }
        else {
            log.header1("Observation after model map generation");
        }
        log << m_obs << std::endl;
    }

    // Return
    return;
}