Пример #1
0
int
main(int argc, char *argv[])
{
    if (initialize(argc, argv) != S3_SUCCESS) {
        E_ERROR("errors initializing.\n");
        return 1;
    }

    if (init_mixw() != S3_SUCCESS) {
        return 1;
    }

    return 0;
}
Пример #2
0
static int
init_state(const char *obsdmp,
	   const char *obsidx,
	   uint32 n_density,
	   uint32 n_stream,
	   const uint32 *veclen,
	   int reest,
	   const char *mixwfn,
	   const char *meanfn,
	   const char *varfn,
	   uint32 ts_off,
	   uint32 ts_cnt,
	   uint32 n_ts,
	   uint32 n_d_ts)
{
    uint32 blksz;
    vector_t ***mean;
    vector_t ***var = NULL;
    vector_t ****fullvar = NULL;
    float32  ***mixw = NULL;
    uint32 n_frame;
    uint32 ignore = 0;
    codew_t *label;
    uint32 n_corpus = 0;
    float64 sqerr;
    float64 tot_sqerr;
    segdmp_type_t t;
    uint32 i, j, ts, n;
    timing_t *km_timer;
    timing_t *var_timer;
    timing_t *em_timer;
    int32 full_covar;

    km_timer = timing_get("km");
    var_timer = timing_get("var");
    em_timer = timing_get("em");
    
    blksz = feat_blksize();

    full_covar = cmd_ln_int32("-fullvar");
    /* fully-continuous for now */
    mean = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen);
    if (full_covar)
	    fullvar  = gauden_alloc_param_full(ts_cnt, n_stream, n_density, veclen);
    else
	    var  = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen);
    if (mixwfn)
	mixw = (float32 ***)ckd_calloc_3d(ts_cnt,
					  n_stream,
					  n_density,
					  sizeof(float32));

    if ((const char *)cmd_ln_access("-segidxfn")) {
	E_INFO("Multi-class dump\n");
	if (segdmp_open_read((const char **)cmd_ln_access("-segdmpdirs"),
			     (const char *)cmd_ln_access("-segdmpfn"),
			     (const char *)cmd_ln_access("-segidxfn"),
			     &n,
			     &t) != S3_SUCCESS) {
	    E_FATAL("Unable to open dumps\n");
	}

	if (n != n_d_ts) {
	    E_FATAL("Expected %u tied-states in dump, but apparently %u\n",
		    n_d_ts, n);
	}
	if (t != SEGDMP_TYPE_FEAT) {
	    E_FATAL("Expected feature dump, but instead saw %u\n", t);
	}
	
	multiclass = TRUE;
    }
    else {
	E_INFO("1-class dump file\n");
	
	multiclass = FALSE;
	
	dmp_fp = s3open((const char *)cmd_ln_access("-segdmpfn"), "rb",
			&dmp_swp);
	if (dmp_fp == NULL) {
	    E_ERROR_SYSTEM("Unable to open dump file %s for reading\n",
			   (const char *)cmd_ln_access("-segdmpfn"));

	    return S3_ERROR;
	}

	if (s3read(&n_frame, sizeof(uint32), 1, dmp_fp, dmp_swp, &ignore) != 1) {
	    E_ERROR_SYSTEM("Unable to open dump file %s for reading\n",
			   (const char *)cmd_ln_access("-segdmpfn"));

	    return S3_ERROR;
	}

	data_offset = ftell(dmp_fp);
    }

    tot_sqerr = 0;
    for (i = 0; i < ts_cnt; i++) {
	ts = ts_off + i;

	/* stride not accounted for yet */
	if (o2d == NULL) {
	    if (multiclass)
		n_frame = segdmp_n_seg(ts);
	}
	else {
	    for (j = 0, n_frame = 0; j < n_o2d[ts]; j++) {
		n_frame += segdmp_n_seg(o2d[ts][j]);
	    }
	}
    
	E_INFO("Corpus %u: sz==%u frames%s\n",
	       ts, n_frame,
	       (n_frame > *(uint32 *)cmd_ln_access("-vartiethr") ? "" : " tied var"));

	if (n_frame == 0) {
	    continue;
	}


	E_INFO("Convergence ratios are abs(cur - prior) / abs(prior)\n");
	/* Do some variety of k-means clustering */
	if (km_timer)
	    timing_start(km_timer);
	sqerr = cluster(ts, n_stream, n_frame, veclen, mean[i], n_density, &label);
	if (km_timer)
	    timing_stop(km_timer);

	if (sqerr < 0) {
	    E_ERROR("Unable to do k-means for state %u; skipping...\n", ts);

	    continue;
	}

	/* Given the k-means and assuming equal prior liklihoods
	 * compute the variances */
	if (var_timer)
	    timing_start(var_timer);
	if (full_covar)
		full_variances(ts, mean[i], fullvar[i], n_density, veclen,
			       n_frame, n_stream, label);
	else
		variances(ts, mean[i], var[i], n_density, veclen, n_frame, n_stream, label);
	if (var_timer)
	    timing_stop(var_timer);

	if (mixwfn) {
	    /* initialize the mixing weights by counting # of occurrances
	     * of the top codeword over the corpus and normalizing */
	    init_mixw(mixw[i], mean[i], n_density, veclen, n_frame, n_stream, label);

	    ckd_free(label);

	    if (reest == TRUE && full_covar)
		E_ERROR("EM re-estimation is not yet supported for full covariances\n");
	    else if (reest == TRUE) {
		if (em_timer)
		    timing_start(em_timer);
		/* Do iterations of EM to estimate the mixture densities */
		reest_sum(ts, mean[i], var[i], mixw[i], n_density, n_stream,
			  n_frame, veclen,
			  *(uint32 *)cmd_ln_access("-niter"),
			  FALSE,
			  *(uint32 *)cmd_ln_access("-vartiethr"));
		if (em_timer)
		    timing_stop(em_timer);
	    }
	}
	
	++n_corpus;
	tot_sqerr += sqerr;
	    
	E_INFO("sqerr [%u] == %e\n", ts, sqerr);
    }

    if (n_corpus > 0) {
	E_INFO("sqerr = %e tot %e rms\n", tot_sqerr, sqrt(tot_sqerr/n_corpus));
    }

    if (!multiclass)
	s3close(dmp_fp);
    
    if (meanfn) {
	if (s3gau_write(meanfn,
			(const vector_t ***)mean,
			ts_cnt,
			n_stream,
			n_density,
			veclen) != S3_SUCCESS) {
	    return S3_ERROR;
	}
    }
    else {
	E_INFO("No mean file given; none written\n");
    }
		    
    if (varfn) {
	if (full_covar) {
	    if (s3gau_write_full(varfn,
				 (const vector_t ****)fullvar,
				 ts_cnt,
				 n_stream,
				 n_density,
				 veclen) != S3_SUCCESS)
		return S3_ERROR;
	}
	else {
	    if (s3gau_write(varfn,
				 (const vector_t ***)var,
				 ts_cnt,
				 n_stream,
				 n_density,
				 veclen) != S3_SUCCESS)
		return S3_ERROR;
	}
    }
    else {
	E_INFO("No variance file given; none written\n");
    }

    if (mixwfn) {
	if (s3mixw_write(mixwfn,
			 mixw,
			 ts_cnt,
			 n_stream,
			 n_density) != S3_SUCCESS) {
	    return S3_ERROR;
	}
    }
    else {
	E_INFO("No mixing weight file given; none written\n");
    }

    return S3_SUCCESS;
}