int main(int argc, char *argv[]) { if (initialize(argc, argv) != S3_SUCCESS) { E_ERROR("errors initializing.\n"); return 1; } if (init_mixw() != S3_SUCCESS) { return 1; } return 0; }
static int init_state(const char *obsdmp, const char *obsidx, uint32 n_density, uint32 n_stream, const uint32 *veclen, int reest, const char *mixwfn, const char *meanfn, const char *varfn, uint32 ts_off, uint32 ts_cnt, uint32 n_ts, uint32 n_d_ts) { uint32 blksz; vector_t ***mean; vector_t ***var = NULL; vector_t ****fullvar = NULL; float32 ***mixw = NULL; uint32 n_frame; uint32 ignore = 0; codew_t *label; uint32 n_corpus = 0; float64 sqerr; float64 tot_sqerr; segdmp_type_t t; uint32 i, j, ts, n; timing_t *km_timer; timing_t *var_timer; timing_t *em_timer; int32 full_covar; km_timer = timing_get("km"); var_timer = timing_get("var"); em_timer = timing_get("em"); blksz = feat_blksize(); full_covar = cmd_ln_int32("-fullvar"); /* fully-continuous for now */ mean = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen); if (full_covar) fullvar = gauden_alloc_param_full(ts_cnt, n_stream, n_density, veclen); else var = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen); if (mixwfn) mixw = (float32 ***)ckd_calloc_3d(ts_cnt, n_stream, n_density, sizeof(float32)); if ((const char *)cmd_ln_access("-segidxfn")) { E_INFO("Multi-class dump\n"); if (segdmp_open_read((const char **)cmd_ln_access("-segdmpdirs"), (const char *)cmd_ln_access("-segdmpfn"), (const char *)cmd_ln_access("-segidxfn"), &n, &t) != S3_SUCCESS) { E_FATAL("Unable to open dumps\n"); } if (n != n_d_ts) { E_FATAL("Expected %u tied-states in dump, but apparently %u\n", n_d_ts, n); } if (t != SEGDMP_TYPE_FEAT) { E_FATAL("Expected feature dump, but instead saw %u\n", t); } multiclass = TRUE; } else { E_INFO("1-class dump file\n"); multiclass = FALSE; dmp_fp = s3open((const char *)cmd_ln_access("-segdmpfn"), "rb", &dmp_swp); if (dmp_fp == NULL) { E_ERROR_SYSTEM("Unable to open dump file %s for reading\n", (const char *)cmd_ln_access("-segdmpfn")); return S3_ERROR; } if (s3read(&n_frame, sizeof(uint32), 1, dmp_fp, dmp_swp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to open dump file %s for reading\n", (const char *)cmd_ln_access("-segdmpfn")); return S3_ERROR; } data_offset = ftell(dmp_fp); } tot_sqerr = 0; for (i = 0; i < ts_cnt; i++) { ts = ts_off + i; /* stride not accounted for yet */ if (o2d == NULL) { if (multiclass) n_frame = segdmp_n_seg(ts); } else { for (j = 0, n_frame = 0; j < n_o2d[ts]; j++) { n_frame += segdmp_n_seg(o2d[ts][j]); } } E_INFO("Corpus %u: sz==%u frames%s\n", ts, n_frame, (n_frame > *(uint32 *)cmd_ln_access("-vartiethr") ? "" : " tied var")); if (n_frame == 0) { continue; } E_INFO("Convergence ratios are abs(cur - prior) / abs(prior)\n"); /* Do some variety of k-means clustering */ if (km_timer) timing_start(km_timer); sqerr = cluster(ts, n_stream, n_frame, veclen, mean[i], n_density, &label); if (km_timer) timing_stop(km_timer); if (sqerr < 0) { E_ERROR("Unable to do k-means for state %u; skipping...\n", ts); continue; } /* Given the k-means and assuming equal prior liklihoods * compute the variances */ if (var_timer) timing_start(var_timer); if (full_covar) full_variances(ts, mean[i], fullvar[i], n_density, veclen, n_frame, n_stream, label); else variances(ts, mean[i], var[i], n_density, veclen, n_frame, n_stream, label); if (var_timer) timing_stop(var_timer); if (mixwfn) { /* initialize the mixing weights by counting # of occurrances * of the top codeword over the corpus and normalizing */ init_mixw(mixw[i], mean[i], n_density, veclen, n_frame, n_stream, label); ckd_free(label); if (reest == TRUE && full_covar) E_ERROR("EM re-estimation is not yet supported for full covariances\n"); else if (reest == TRUE) { if (em_timer) timing_start(em_timer); /* Do iterations of EM to estimate the mixture densities */ reest_sum(ts, mean[i], var[i], mixw[i], n_density, n_stream, n_frame, veclen, *(uint32 *)cmd_ln_access("-niter"), FALSE, *(uint32 *)cmd_ln_access("-vartiethr")); if (em_timer) timing_stop(em_timer); } } ++n_corpus; tot_sqerr += sqerr; E_INFO("sqerr [%u] == %e\n", ts, sqerr); } if (n_corpus > 0) { E_INFO("sqerr = %e tot %e rms\n", tot_sqerr, sqrt(tot_sqerr/n_corpus)); } if (!multiclass) s3close(dmp_fp); if (meanfn) { if (s3gau_write(meanfn, (const vector_t ***)mean, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_INFO("No mean file given; none written\n"); } if (varfn) { if (full_covar) { if (s3gau_write_full(varfn, (const vector_t ****)fullvar, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } else { if (s3gau_write(varfn, (const vector_t ***)var, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } } else { E_INFO("No variance file given; none written\n"); } if (mixwfn) { if (s3mixw_write(mixwfn, mixw, ts_cnt, n_stream, n_density) != S3_SUCCESS) { return S3_ERROR; } } else { E_INFO("No mixing weight file given; none written\n"); } return S3_SUCCESS; }