int srch_FSG_init(kb_t * kb, /**< The KB */ void *srch /**< The pointer to a search structure */ ) { srch_t *s; fsg_search_t *fsgsrch; word_fsg_t *wordfsg; s = (srch_t *) srch; /* This is very strange */ fsgsrch = fsg_search_init(NULL, s); s->grh->graph_struct = fsgsrch; s->grh->graph_type = GRAPH_STRUCT_GENGRAPH; if ((wordfsg = srch_FSG_read_fsgfile(s, cmd_ln_str("-fsg"))) == NULL) { E_INFO("Could not read wordfsg with file name %s\n", cmd_ln_str("-fsg")); return SRCH_FAILURE; } if (!fsg_search_set_current_fsg(fsgsrch, wordfsg->name)) { E_INFO("Could not set the current fsg with name %s\n", wordfsg->name); return SRCH_FAILURE; } return SRCH_SUCCESS; }
static void gst_pocketsphinx_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { switch (prop_id) { case PROP_HMM_DIR: g_value_set_string(value, cmd_ln_str("-hmm")); break; case PROP_LM_FILE: g_value_set_string(value, cmd_ln_str("-lm")); break; case PROP_DICT_FILE: g_value_set_string(value, cmd_ln_str("-dict")); break; case PROP_FSG_FILE: g_value_set_string(value, cmd_ln_str("-fsg")); break; case PROP_FWDFLAT: g_value_set_boolean(value, cmd_ln_boolean("-fwdflat")); break; case PROP_BESTPATH: g_value_set_boolean(value, cmd_ln_boolean("-bestpath")); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } }
int main(int argc, char *argv[]) { cmd_ln_parse(defs, orig_argc, orig_argv, TRUE); cmd_ln_parse(defs, argc, argv, FALSE); printf("%d %s %d %f\n", cmd_ln_int32("-a"), cmd_ln_str("-b") ? cmd_ln_str("-b") : "(null)", cmd_ln_boolean("-c"), cmd_ln_float64("-d")); return 0; }
static int wr_parm() { if (omixw) { if(cmd_ln_str("-omixwfn") == NULL) { E_INFO("Please specify -omixwfn\n"); return S3_ERROR; } wr_mixw(cmd_ln_str("-omixwfn")); } if (ogau) { if(cmd_ln_str("-ogaufn") == NULL) { E_INFO("Please specify -ogaufn\n"); return S3_ERROR; } wr_gau(cmd_ln_str("-ogaufn")); } if (ogau_full) { if(cmd_ln_str("-ofullgaufn") == NULL) { E_INFO("Please specify -ofullgaufn\n"); return S3_ERROR; } wr_gau_full(cmd_ln_str("-ofullgaufn")); } if (otmat) { if(cmd_ln_str("-otmatfn") == NULL) { E_INFO("Please specify -otmatfn\n"); return S3_ERROR; } wr_tmat(cmd_ln_str("-otmatfn")); } return S3_SUCCESS; }
int main(int argc, char *argv[]) { int32 i; int32 n_map=0; int32 n_class=0; int32 *mllr_map; char line[128]; parse_cmd_ln(argc, argv); if (cmd_ln_str("-nmap")) { n_map = cmd_ln_int32("-nmap"); } else { E_FATAL("Specify # of state -> MLLR class mappings using -nmap\n"); } if (cmd_ln_str("-nclass")) { n_class = cmd_ln_int32("-nclass"); } else { E_FATAL("Specify # of MLLR class mappings using -nclass\n"); } if (cmd_ln_str("-cb2mllrfn") == NULL) { E_FATAL("Specify output file using -cb2mllrfn\n"); } mllr_map = (int32 *)ckd_calloc(n_map, sizeof(int32)); for (i = 0; i < n_map; i++) { if (fgets(line, 128, stdin) == NULL) { E_FATAL("Ran out of mappings at %d, but expected %d\n", i, n_map); } mllr_map[i] = atoi(line); } if (fgets(line, 128, stdin) != NULL) { E_WARN("Expected EOF after %d mappings, but still more data\n", n_map); } if (s3cb2mllr_write(cmd_ln_str("-cb2mllrfn"), mllr_map, n_map, n_class) != S3_SUCCESS) { return 1; } return 0; }
int main(int argc, char *argv[]) { cmd_ln_parse(defn, argc, argv, TRUE); /* Run a control file if requested. */ if (cmd_ln_str("-c")) { if (run_control_file(cmd_ln_str("-c")) < 0) return 1; } else { if (extract_pitch(cmd_ln_str("-i"), cmd_ln_str("-o")) < 0) return 1; } cmd_ln_free(); return 0; }
float64 cluster(int32 ts, uint32 n_stream, uint32 n_in_frame, uint32 *veclen, uint32 blksize, vector_t **mean, uint32 n_density, codew_t **out_label) { float64 sum_sqerr, sqerr=0; uint32 s, n_frame; const char *meth; *out_label = NULL; k_means_set_get_obs(&get_obs); for (s = 0, sum_sqerr = 0; s < n_stream; s++, sum_sqerr += sqerr) { meth = cmd_ln_str("-method"); n_frame = setup_obs(ts, s, n_in_frame, n_stream, veclen, blksize); if (strcmp(meth, "rkm") == 0) { sqerr = random_kmeans(cmd_ln_int32("-ntrial"), n_frame, veclen[s], mean[s], n_density, cmd_ln_float32("-minratio"), cmd_ln_int32("-maxiter"), out_label); if (sqerr < 0) { E_ERROR("Too few observations for kmeans\n"); return -1.0; } } else if (strcmp(meth, "fnkm") == 0) { sqerr = furthest_neighbor_kmeans(n_frame, veclen[s], mean[s], n_density, cmd_ln_float32("-minratio"), cmd_ln_int32("-maxiter")); } else { E_ERROR("I don't know how to do method '%s'. Sorry.\n", meth); } } return sum_sqerr; }
int main(int argc, char *argv[]) { uint32 *spd; model_def_t *mdef; const char *tying_type; uint32 i; uint32 n_cb=0; parse_cmd_ln(argc, argv); E_INFO("Reading model definition file %s\n", cmd_ln_str("-moddeffn")); if (model_def_read(&mdef, cmd_ln_str("-moddeffn")) != S3_SUCCESS) { exit(1); } E_INFO("%d tied states defined\n", mdef->n_tied_state); tying_type = cmd_ln_str("-tyingtype"); E_INFO("Generating state parameter definitions for %s tying\n", tying_type); spd = ckd_calloc(mdef->n_tied_state, sizeof(uint32)); if (strcmp(tying_type, "semi") == 0) { n_cb = 1; } else if (strcmp(tying_type, "pd") == 0) { E_INFO("Phone dependent codebooks not yet implemented\n"); exit(1); } else if (strcmp(tying_type, "cont") == 0) { n_cb = mdef->n_tied_state; for (i = 0; i < mdef->n_tied_state; i++) spd[i] = i; } else { E_FATAL("Unknown tying type %s given\n", tying_type); } E_INFO("Writing %s\n", cmd_ln_str("-ts2cbfn")); if (s3ts2cb_write(cmd_ln_str("-ts2cbfn"), spd, mdef->n_tied_state, n_cb) != S3_SUCCESS) { E_FATAL_SYSTEM("Unable to write %s\n", cmd_ln_str("-ts2cbfn")); } return 0; }
int srch_FSG_dump_vithist(void *srch) { FILE *latfp; char file[8192]; srch_t *s; fsg_search_t *fsgsrch; s = (srch_t *) srch; fsgsrch = (fsg_search_t *) s->grh->graph_struct; sprintf(file, "%s/%s.hist", cmd_ln_str("-bptbldir"), fsgsrch->uttid); if ((latfp = fopen(file, "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", file); else { fsg_history_dump(fsgsrch->history, fsgsrch->uttid, latfp, fsgsrch->dict); fclose(latfp); } return SRCH_SUCCESS; }
/* lgalescu 2004/08/22 */ int32 save_cm_to_file(float32 *cep_means, int32 ceplen) { int32 i; FILE *cmfp; char *cmfn = cmd_ln_str("-cmsave"); /* be sure we only call this function when it makes sense */ if (cmfn == NULL) return 0; if ((cmfp = fopen(cmfn, "w")) == NULL) { E_WARN("Could not open file %s. Cepstral means not saved.\n", cmfn); return 0; } for (i = 0; i < ceplen; i++) { fprintf(cmfp, "cur_mean[%d] = %.2f\n", i, cep_means[i]); } fclose(cmfp); return 1; }
static int rd_parm() { if(cmd_ln_str("-imixwfn") ==NULL&& cmd_ln_str("-igaufn") ==NULL&& cmd_ln_str("-ifullgaufn")==NULL&& cmd_ln_str("-itmatfn") ==NULL ) { E_INFO("Please specify one of the following: -imixwfn, -igaufn, -ifullgaufn, -itmatfn\n"); return S3_ERROR; } if (cmd_ln_str("-imixwfn")) { if(cmd_ln_str("-nmixwout")==NULL){ E_INFO("Please specify -nmixwout\n"); return S3_ERROR; } rd_mixw(cmd_ln_str("-imixwfn"), cmd_ln_int32("-nmixwout")); } if (cmd_ln_str("-igaufn")) { if(cmd_ln_str("-ncbout")==NULL){ E_INFO("Please specify -ncbout\n"); return S3_ERROR; } rd_gau(cmd_ln_str("-igaufn"), cmd_ln_int32("-ncbout")); } if (cmd_ln_str("-ifullgaufn")) { if(cmd_ln_str("-ncbout")==NULL){ E_INFO("Please specify -ncbout\n"); return S3_ERROR; } rd_gau_full(cmd_ln_str("-ifullgaufn"), cmd_ln_int32("-ncbout")); } if (cmd_ln_str("-itmatfn")) { if(cmd_ln_str("-ntmatout")==NULL){ E_INFO("Please specify -ntmatout\n"); return S3_ERROR; } rd_tmat(cmd_ln_str("-itmatfn"), cmd_ln_int32("-ntmatout")); } return S3_SUCCESS; }
int mk_node(dtree_node_t *node, uint32 node_id, uint32 *id, uint32 n_id, float32 ****mixw, float32 ****means, float32 ****vars, uint32 *veclen, uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, float32 mwfloor) { float32 ***mixw_occ, **dist; uint32 mm, m, s, j, k; float64 *dnom, norm, wt_ent, s_wt_ent, occ; float32 mx_wt; uint32 *l_id; float32 ***lmeans=0,***lvars=0; float32 varfloor=0; uint32 continuous, sumveclen; char* type; type = (char *)cmd_ln_str("-ts2cbfn"); if (strcmp(type,".semi.")!=0 && strcmp(type,".cont.") != 0) E_FATAL("Type %s unsupported; trees can only be built on types .semi. or .cont.\n",type); if (strcmp(type,".cont.") == 0) continuous = 1; else continuous = 0; if (continuous == 1) { varfloor = cmd_ln_float32("-varfloor"); /* Sumveclen is overallocation, but coding is simpler */ for (j=0,sumveclen=0; j < n_stream; j++) sumveclen += veclen[j]; lmeans = (float32 ***) ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); lvars = (float32 ***) ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); } mixw_occ = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32)); dist = (float32 **)ckd_calloc_2d(n_stream, n_density, sizeof(float32)); dnom = (float64 *)ckd_calloc(n_stream, sizeof(float64)); /* Merge distributions of all the elements in a cluster for combined distribution */ for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { float32 *lmeanvec=0, *lvarvec=0; if (continuous == 1) { lmeanvec = lmeans[s][j]; lvarvec = lvars[s][j]; } for (mm = 0; mm < n_id; mm++) { m = id[mm]; for (k = 0; k < n_density; k++) { mixw_occ[s][j][k] += mixw[m][s][j][k]; } /* For continuous hmms we have only one gaussian per state */ if (continuous == 1) { for (k = 0; k < veclen[j]; k++) { lmeanvec[k] += mixw[m][s][j][0] * means[m][s][j][k]; lvarvec[k] += mixw[m][s][j][0] * (vars[m][s][j][k] + means[m][s][j][k] * means[m][s][j][k]); } } } if (continuous == 1) { if (mixw_occ[s][j][0] != 0) { for (k = 0; k < veclen[j]; k++) { lmeanvec[k] /= mixw_occ[s][j][0]; lvarvec[k] = lvarvec[k]/mixw_occ[s][j][0] - lmeanvec[k]*lmeanvec[k]; if (lvarvec[k] < varfloor) lvarvec[k] = varfloor; } } else { for (k = 0; k < veclen[j]; k++) if (lmeanvec[k] != 0) E_FATAL("denominator = 0, but numerator = %f at k = %d\n",lmeanvec[k],k); } } } } /* Find out which state is under consideration */ for (j = 0, mx_wt = 0, s = 0; s < n_state; s++) { if (stwt[s] > mx_wt) { mx_wt = stwt[s]; j = s; } } /* occ is the same for each independent feature, so just choose 0 */ for (k = 0, occ = 0; k < n_density; k++) { occ += mixw_occ[j][0][k]; } for (s = 0, wt_ent = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0, dnom[j] = 0; k < n_density; k++) { dnom[j] += mixw_occ[s][j][k]; } } for (j = 0, s_wt_ent = 0; j < n_stream; j++) { norm = 1.0 / dnom[j]; /* discrete_entropy for discrete case, continuous entropy for continuous HMMs */ if (continuous != 1) { for (k = 0; k < n_density; k++) { dist[j][k] = mixw_occ[s][j][k] * norm; if (dist[j][k] < mwfloor) dist[j][k] = mwfloor; } s_wt_ent += dnom[j] * ent_d(dist[j], n_density); } else { s_wt_ent += dnom[j] * ent_cont(lmeans[s][j], lvars[s][j], veclen[j]); } } wt_ent += stwt[s] * s_wt_ent; } node->node_id = node_id; l_id = ckd_calloc(n_id, sizeof(uint32)); for (j = 0; j < n_id; j++) { l_id[j] = id[j]; } node->id = l_id; node->n_id = n_id; node->mixw_occ = mixw_occ; if (continuous == 1) { node->means = lmeans; node->vars = lvars; } node->occ = occ; node->wt_ent = wt_ent; ckd_free_2d((void **)dist); ckd_free((void *)dnom); return S3_SUCCESS; }
int main(int argc, char *argv[]) { vector_t ***mean; vector_t ***var = NULL; vector_t ****fullvar = NULL; vector_t ***new_mean; vector_t ***new_var = NULL; vector_t ****new_fullvar = NULL; float32 ***dnom; float32 ***mixw; float32 ***new_mixw; uint32 n_mixw; uint32 n_mgau; uint32 n_dnom; uint32 n_feat; uint32 n_density; uint32 n_inc; uint32 *veclen; int32 var_is_full; parse_cmd_ln(argc, argv); E_INFO("Reading mixing weight file %s.\n", cmd_ln_str("-inmixwfn")); if (s3mixw_read(cmd_ln_str("-inmixwfn"), &mixw, &n_mixw, &n_feat, &n_density) != S3_SUCCESS) { return 1; } n_inc = cmd_ln_int32("-ninc"); if (n_inc > n_density) { E_WARN("# of densities to split (== %u) > total # of densities/mixture (== %u); # split <- %u # den/mix\n", n_inc, n_density, n_density); n_inc = n_density; } if (s3gau_read(cmd_ln_str("-inmeanfn"), &mean, &n_mgau, &n_feat, &n_density, &veclen) != S3_SUCCESS) { return 1; } var_is_full = cmd_ln_int32("-fullvar"); if (var_is_full) { if (s3gau_read_full(cmd_ln_str("-invarfn"), &fullvar, &n_mgau, &n_feat, &n_density, &veclen) != S3_SUCCESS) { return 1; } } else { if (s3gau_read(cmd_ln_str("-invarfn"), &var, &n_mgau, &n_feat, &n_density, &veclen) != S3_SUCCESS) { return 1; } } if (s3gaudnom_read(cmd_ln_str("-dcountfn"), &dnom, &n_dnom, &n_feat, &n_density) != S3_SUCCESS) { return 1; } new_mean = gauden_alloc_param(n_mgau, n_feat, n_density+n_inc, veclen); if (var_is_full) new_fullvar = gauden_alloc_param_full(n_mgau, n_feat, n_density+n_inc, veclen); else new_var = gauden_alloc_param(n_mgau, n_feat, n_density+n_inc, veclen); new_mixw = (float32 ***)ckd_calloc_3d(n_mixw, n_feat, n_density+n_inc, sizeof(float32)); E_INFO("output n_density == %u\n", n_density+n_inc); inc_densities(new_mixw, new_mean, new_var, new_fullvar, mixw, mean, var, fullvar, dnom, n_mixw, n_mgau, n_dnom, n_feat, n_density, veclen, n_inc); if (cmd_ln_str("-outmixwfn") != NULL) { if (s3mixw_write(cmd_ln_str("-outmixwfn"), new_mixw, n_mixw, n_feat, n_density+n_inc) != S3_SUCCESS) { return 1; } } else { E_FATAL("You must use the -outmixwfn argument\n"); } if (cmd_ln_str("-outmeanfn") != NULL) { if (s3gau_write(cmd_ln_str("-outmeanfn"), (const vector_t ***)new_mean, n_mgau, n_feat, n_density+n_inc, veclen) != S3_SUCCESS) { return 1; } } else { E_FATAL("You must use the -outmeanfn argument\n"); } if (cmd_ln_str("-outvarfn") != NULL) { if (var_is_full) { if (s3gau_write_full(cmd_ln_str("-outvarfn"), (const vector_t ****)new_fullvar, n_mgau, n_feat, n_density+n_inc, veclen) != S3_SUCCESS) { return 1; } } else { if (s3gau_write(cmd_ln_str("-outvarfn"), (const vector_t ***)new_var, n_mgau, n_feat, n_density+n_inc, veclen) != S3_SUCCESS) { return 1; } } } else { E_FATAL("You must use the -outvarfn argument\n"); } return 0; }
int init(model_def_t **out_imdef, pset_t **out_pset, uint32 *out_n_pset, dtree_t ****out_tree, uint32 *out_n_seno) { model_def_t *imdef; uint32 p, s; uint32 n_ci, n_state; char fn[MAXPATHLEN+1]; const char *a_fn; FILE *fp; dtree_t ***tree, *tr; pset_t *pset; uint32 n_pset; uint32 n_seno; const char *treedir; uint32 ts_id; int allphones; a_fn = cmd_ln_str("-imoddeffn"); if (a_fn == NULL) E_FATAL("Specify -imoddeffn\n"); if (model_def_read(&imdef, a_fn) != S3_SUCCESS) { return S3_ERROR; } *out_imdef = imdef; a_fn = cmd_ln_str("-psetfn"); E_INFO("Reading: %s\n", a_fn); *out_pset = pset = read_pset_file(a_fn, imdef->acmod_set, &n_pset); *out_n_pset = n_pset; allphones = cmd_ln_int32("-allphones"); if (allphones) n_ci = 1; else n_ci = acmod_set_n_ci(imdef->acmod_set); treedir = cmd_ln_str("-treedir"); tree = (dtree_t ***)ckd_calloc(n_ci, sizeof(dtree_t **)); *out_tree = tree; ts_id = imdef->n_tied_ci_state; for (p = 0, n_seno = 0; p < n_ci; p++) { if (allphones || !acmod_set_has_attrib(imdef->acmod_set, p, "filler")) { const char *pname; if (allphones) { n_state = imdef->defn[acmod_set_n_ci(imdef->acmod_set)].n_state; pname = "ALLPHONES"; } else { n_state = imdef->defn[p].n_state; pname = acmod_set_id2name(imdef->acmod_set, p); } tree[p] = (dtree_t **)ckd_calloc(n_state, sizeof(dtree_t *)); for (s = 0; s < n_state-1; s++) { E_INFO("%s-%u: offset %u\n", pname, s, ts_id); sprintf(fn, "%s/%s-%u.dtree", treedir, pname, s); fp = fopen(fn, "r"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", fn); } tree[p][s] = tr = read_final_tree(fp, pset, n_pset); label_leaves(&tr->node[0], &ts_id); fclose(fp); n_seno += cnt_leaf(&tr->node[0]); } } } assert(n_seno == (ts_id - imdef->n_tied_ci_state)); E_INFO("n_seno= %u\n", ts_id); *out_n_seno = n_seno; return S3_SUCCESS; }
void cluster_leaves(dtree_t *tr, uint32 *veclen, float64 *wt_ent_dec, uint32 *out_n_a, uint32 *out_n_b, pset_t *pset, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, float32 mwfloor) { uint32 n_leaf; float32 ****mixw_occ; uint32 *clust, n_a, n_b; uint32 *node_id; dtree_node_t *root; uint32 i; float32 ****means=0; float32 ****vars=0; const char* type; uint32 continuous, sumveclen; type = cmd_ln_str("-ts2cbfn"); if (strcmp(type,".semi.")!=0 && strcmp(type,".cont.") != 0) E_FATAL("Type %s unsupported; trees can only be built on types .semi. or .cont.\n",type); if (strcmp(type,".cont.") == 0) continuous = 1; else continuous = 0; root = &tr->node[0]; /* determine the # of leaf nodes in the simple tree */ n_leaf = cnt_leaf(root); if (continuous == 1) { for (i=0,sumveclen=0; i < n_stream; i++) sumveclen += veclen[i]; means = (float32 ****)ckd_calloc_4d(n_leaf, n_state, n_stream, sumveclen, sizeof(float32)); vars = (float32 ****)ckd_calloc_4d(n_leaf, n_state, n_stream, sumveclen, sizeof(float32)); } /* Alloc space for: * - leaf node distribution array * - leaf node cluster id array * - leaf node id array */ mixw_occ = (float32 ****)ckd_calloc_4d(n_leaf, n_state, n_stream, n_density, sizeof(float32)); clust = (uint32 *)ckd_calloc(n_leaf, sizeof(uint32)); node_id = (uint32 *)ckd_calloc(n_leaf, sizeof(uint32)); /* compute the density occupancies of the leaves */ leaf_mixw_occ(root, pset, mixw_occ, node_id, n_state, n_stream, n_density, 0); if (continuous == 1) { /* compute means and variances of the leaves */ leaf_mean_vars(root, pset, means, vars, node_id, n_state, n_stream, veclen, 0); } /* Cluster the leaf nodes into two classes */ *wt_ent_dec = two_class(mixw_occ, means, vars, veclen, n_leaf, n_state, n_stream, n_density, stwt, clust, mwfloor); for (i = 0; i < n_leaf; i++) { tr->node[node_id[i]].clust = clust[i]; } /* Simplify the tree based on the two classes * (i.e. if siblings belong to the same class, * delete the node) */ prune_leaves(root, pset); /* Determine how many leaf nodes in class A and B * in the simplified tree */ n_a = n_b = 0; cnt_class(root, &n_a, &n_b); #if 0 fprintf(stderr, "Pruned tree %u/%u:\n", n_a, n_b); print_tree(stderr, "|", root, pset, 1); fprintf(stderr, "\n"); #endif *out_n_a = n_a; *out_n_b = n_b; }
int32 mmi_viterbi_update(vector_t **feature, uint32 n_obs, state_t *state_seq, uint32 n_state, model_inventory_t *inv, float64 a_beam, int32 mean_reest, int32 var_reest, float64 arc_gamma, feat_t *fcb) { float64 *scale = NULL; float64 **dscale = NULL; float64 **active_alpha; uint32 **active_astate; uint32 **bp; uint32 *n_active_astate; gauden_t *g;/* Gaussian density parameters and reestimation sums */ float32 ***mixw;/* all mixing weights */ float64 ***now_den = NULL;/* Short for den[t] */ uint32 ***now_den_idx = NULL;/* Short for den_idx[t] */ uint32 *active_cb; uint32 n_active_cb; float32 ***denacc = NULL;/* mean/var reestimation accumulators for time t */ size_t denacc_size;/* Total size of data references in denacc. Allows for quick clears between time frames */ uint32 n_lcl_cb; uint32 *cb_inv; uint32 i, j, q; int32 t; uint32 n_feat; uint32 n_density; uint32 n_top; int ret; uint32 n_cb; static float64 *p_op = NULL; static float64 *p_ci_op = NULL; static float64 **d_term = NULL; static float64 **d_term_ci = NULL; /* caller must ensure that there is some non-zero amount of work to be done here */ assert(n_obs > 0); assert(n_state > 0); g = inv->gauden; n_feat = gauden_n_feat(g); n_density = gauden_n_density(g); n_top = gauden_n_top(g); n_cb = gauden_n_mgau(g); if (p_op == NULL) { p_op = ckd_calloc(n_feat, sizeof(float64)); p_ci_op = ckd_calloc(n_feat, sizeof(float64)); } if (d_term == NULL) { d_term = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); d_term_ci = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); } scale = (float64 *)ckd_calloc(n_obs, sizeof(float64)); dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32)); active_alpha = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); active_cb = ckd_calloc(2*n_state, sizeof(uint32)); bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); /* Run forward algorithm, which has embedded Viterbi. */ ret = forward(active_alpha, active_astate, n_active_astate, bp, scale, dscale, feature, n_obs, state_seq, n_state, inv, a_beam, NULL, 1); if (cmd_ln_str("-outphsegdir")) { E_FATAL("current MMI implementation don't support -outphsegdir\n"); } if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto all_done; } mixw = inv->mixw; n_lcl_cb = inv->n_cb_inverse; cb_inv = inv->cb_inverse; /* Allocate local accumulators for mean, variance reestimation sums if necessary */ gauden_alloc_l_acc(g, n_lcl_cb, mean_reest, var_reest, FALSE); n_active_cb = 0; now_den = (float64 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(float64)); now_den_idx = (uint32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(uint32)); if (mean_reest || var_reest) { /* allocate space for the per frame density counts */ denacc = (float32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_density, sizeof(float32)); /* # of bytes required to store all weighted vectors */ denacc_size = n_lcl_cb * n_feat * n_density * sizeof(float32); } else { denacc = NULL; denacc_size = 0; } /* Okay now run through the backtrace and accumulate counts. */ /* Find the non-emitting ending state */ for (q = 0; q < n_active_astate[n_obs-1]; ++q) { if (active_astate[n_obs-1][q] == n_state-1) break; } if (q == n_active_astate[n_obs-1]) { E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n"); ret = S3_ERROR; goto all_done; } for (t = n_obs-1; t >= 0; --t) { uint32 l_cb; uint32 l_ci_cb; float64 op, p_reest_term; uint32 prev; j = active_astate[t][q]; /* Follow any non-emitting states at time t first. */ while (state_seq[j].mixw == TYING_NON_EMITTING) { prev = active_astate[t][bp[t][q]]; q = bp[t][q]; j = prev; } /* Now accumulate statistics for the real state. */ l_cb = state_seq[j].l_cb; l_ci_cb = state_seq[j].l_ci_cb; n_active_cb = 0; gauden_compute_log(now_den[l_cb], now_den_idx[l_cb], feature[t], g, state_seq[j].cb, NULL); active_cb[n_active_cb++] = l_cb; if (l_cb != l_ci_cb) { gauden_compute_log(now_den[l_ci_cb], now_den_idx[l_ci_cb], feature[t], g, state_seq[j].ci_cb, NULL); active_cb[n_active_cb++] = l_ci_cb; } ret = gauden_scale_densities_bwd(now_den, now_den_idx, &dscale[t], active_cb, n_active_cb, g); if (ret != S3_SUCCESS) goto all_done; assert(state_seq[j].mixw != TYING_NON_EMITTING); /* Now calculate mixture densities. */ /* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */ op = gauden_mixture(now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], g); /* Make up this bogus value to be consistent with backward.c */ p_reest_term = 1.0 / op; /* Compute the output probability excluding the contribution * of each feature stream. i.e. p_op[0] is the output * probability excluding feature stream 0 */ partial_op(p_op, op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); /* compute the probability of each (of possibly topn) density */ den_terms(d_term, p_reest_term, p_op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); if (l_cb != l_ci_cb) { /* For each feature stream f, compute: * sum_k(mixw[f][k] den[f][k]) * and store the results in p_ci_op */ partial_ci_op(p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); /* For each feature stream and density compute the terms: * w[f][k] den[f][k] / sum_k(w[f][k] den[f][k]) * post_j * and store results in d_term_ci */ den_terms_ci(d_term_ci, 1.0, /* post_j = 1.0 */ p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); } /* accumulate the probability for each density in the * density reestimation accumulators */ if (mean_reest || var_reest) { accum_den_terms(denacc[l_cb], d_term, now_den_idx[l_cb], n_feat, n_top); if (l_cb != l_ci_cb) { accum_den_terms(denacc[l_ci_cb], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } /* Note that there is only one state/frame so this is kind of redundant */ if (mean_reest || var_reest) { /* Update the mean and variance reestimation accumulators */ mmi_accum_gauden(denacc, cb_inv, n_lcl_cb, feature[t], now_den_idx, g, mean_reest, var_reest, arc_gamma, fcb); memset(&denacc[0][0][0], 0, denacc_size); } if (t > 0) { prev = active_astate[t-1][bp[t][q]]; q = bp[t][q]; j = prev; } } /* If no error was found, add the resulting utterance reestimation * accumulators to the global reestimation accumulators */ accum_global(inv, state_seq, n_state, FALSE, FALSE, mean_reest, var_reest, FALSE); all_done: ckd_free((void *)scale); for (i = 0; i < n_obs; i++) { if (dscale[i]) ckd_free((void *)dscale[i]); } ckd_free((void **)dscale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free((void *)active_cb); ckd_free((void **)bp); if (denacc) ckd_free_3d((void ***)denacc); if (now_den) ckd_free_3d((void ***)now_den); if (now_den_idx) ckd_free_3d((void ***)now_den_idx); if (ret != S3_SUCCESS) E_ERROR("viterbi update error in sentence %s\n", corpus_utt_brief_name()); return ret; }
int main(int argc, char *argv[]) { model_def_t *imdef; model_def_t *omdef; pset_t *pset; uint32 n_pset; dtree_t ***tree; uint32 n_seno; uint32 n_ci; uint32 n_acmod; uint32 p; uint32 s; model_def_entry_t *idefn, *odefn; acmod_id_t b, l, r; word_posn_t wp; int allphones; parse_cmd_ln(argc, argv); if (init(&imdef, &pset, &n_pset, &tree, &n_seno) != S3_SUCCESS) return 1; omdef = (model_def_t *)ckd_calloc(1, sizeof(model_def_t)); omdef->acmod_set = imdef->acmod_set; /* same set of acoustic models */ omdef->n_total_state = imdef->n_total_state; omdef->n_tied_ci_state = imdef->n_tied_ci_state; omdef->n_tied_state = imdef->n_tied_ci_state + n_seno; omdef->n_tied_tmat = imdef->n_tied_tmat; omdef->defn = (model_def_entry_t *)ckd_calloc(imdef->n_defn, sizeof(model_def_entry_t)); /* * Define the context-independent models */ n_ci = acmod_set_n_ci(imdef->acmod_set); for (p = 0; p < n_ci; p++) { idefn = &imdef->defn[p]; odefn = &omdef->defn[p]; odefn->p = idefn->p; odefn->tmat = idefn->tmat; odefn->state = ckd_calloc(idefn->n_state, sizeof(uint32)); odefn->n_state = idefn->n_state; for (s = 0; s < idefn->n_state; s++) { if (idefn->state[s] == NO_ID) odefn->state[s] = NO_ID; else { odefn->state[s] = idefn->state[s]; } } } /* * Define the rest of the models */ allphones = cmd_ln_int32("-allphones"); n_acmod = acmod_set_n_acmod(omdef->acmod_set); for (; p < n_acmod; p++) { b = acmod_set_base_phone(omdef->acmod_set, p); assert(p != b); idefn = &imdef->defn[p]; odefn = &omdef->defn[p]; odefn->p = idefn->p; odefn->tmat = idefn->tmat; odefn->state = ckd_calloc(idefn->n_state, sizeof(uint32)); odefn->n_state = idefn->n_state; for (s = 0; s < idefn->n_state; s++) { if (idefn->state[s] == NO_ID) /* Non-emitting state */ odefn->state[s] = NO_ID; else { uint32 bb; /* emitting state: find the tied state */ acmod_set_id2tri(omdef->acmod_set, &b, &l, &r, &wp, p); #ifdef HORRIBLY_VERBOSE fprintf(stderr, "%s %u ", acmod_set_id2name(omdef->acmod_set, p), s); #endif bb = allphones ? 0 : b; odefn->state[s] = tied_state(&tree[bb][s]->node[0], b, l, r, wp, pset); #ifdef HORRIBLY_VERBOSE fprintf(stderr, "\t-> %u\n", odefn->state[s]); fprintf(stderr, "\n"); #endif } } } if (model_def_write(omdef, cmd_ln_str("-omoddeffn")) != S3_SUCCESS) { return 1; } return 0; }
int main(int argc, char *argv[]) { lexicon_t *lex; model_def_t *omdef; model_def_t *dmdef; feat_t *feat; uint32 n_stream, blksize; uint32 *veclen; uint32 ts_off; uint32 ts_cnt; FILE *fp; if (main_initialize(argc, argv, &lex, &omdef, &dmdef, &feat) != S3_SUCCESS) { return -1; } n_stream = feat_dimension1(feat); veclen = feat_stream_lengths(feat); blksize = feat_dimension(feat); if (strcmp(cmd_ln_str("-gthobj"), "state") == 0) { ts_off = cmd_ln_int32("-tsoff"); if (cmd_ln_str("-tscnt") == NULL) { ts_cnt = omdef->n_tied_state - ts_off; } else { ts_cnt = cmd_ln_int32("-tscnt"); } if (ts_off + ts_cnt > omdef->n_tied_state) { E_FATAL("Too many tied states specified\n"); } n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), cmd_ln_str("-segidxfn"), cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), ts_off, ts_cnt, omdef->n_tied_state, (dmdef != NULL ? dmdef->n_tied_state : omdef->n_tied_state)) != S3_SUCCESS) { E_ERROR("Unable to train [%u %u]\n", ts_off, ts_off+ts_cnt-1); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } if (cmd_ln_str("-tsrngfn") != NULL) { fp = fopen(cmd_ln_str("-tsrngfn"), "w"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", cmd_ln_str("-tsrngfn")); } fprintf(fp, "%d %d\n", ts_off, ts_cnt); } else if (ts_cnt != omdef->n_tied_state) { E_WARN("Subset of tied states specified, but no -tsrngfn arg"); } } else if (strcmp(cmd_ln_str("-gthobj"), "single") == 0) { n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), NULL, /* No index -> single class dump file */ cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), 0, 1, 1, 1) != S3_SUCCESS) { E_ERROR("Unable to train\n"); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } } return 0; }
int main_initialize(int argc, char *argv[], lexicon_t **out_lex, model_def_t **out_omdef, model_def_t **out_dmdef, feat_t** out_feat) { model_def_t *dmdef = NULL; model_def_t *omdef = NULL; lexicon_t *lex = NULL; feat_t *feat; const char *fn; uint32 n_ts; uint32 n_cb; const char *ts2cbfn; parse_cmd_ln(argc, argv); feat = feat_init(cmd_ln_str("-feat"), cmn_type_from_str(cmd_ln_str("-cmn")), cmd_ln_boolean("-varnorm"), agc_type_from_str(cmd_ln_str("-agc")), 1, cmd_ln_int32("-ceplen")); if (cmd_ln_str("-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str("-lda")); if (feat_read_lda(feat, cmd_ln_str("-lda"), cmd_ln_int32("-ldadim")) < 0) return -1; } if (cmd_ln_str("-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str("-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL) return -1; if ((feat_set_subvecs(feat, subvecs)) < 0) return -1; } if (cmd_ln_exists("-agcthresh") && 0 != strcmp(cmd_ln_str("-agc"), "none")) { agc_set_threshold(feat->agc_struct, cmd_ln_float32("-agcthresh")); } if (feat->cmn_struct && cmd_ln_exists("-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str("-cmninit")); c = vallist; nvals = 0; while (nvals < feat->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); c = cc + 1; ++nvals; } if (nvals < feat->cmn_struct->veclen && *c != '\0') { feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); } ckd_free(vallist); } *out_feat = feat; if (cmd_ln_str("-omoddeffn")) { E_INFO("Reading output model definitions: %s\n", cmd_ln_str("-omoddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&omdef, cmd_ln_str("-omoddeffn")) != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_str("-dmoddeffn")) { E_INFO("Reading dump model definitions: %s\n", cmd_ln_str("-dmoddeffn")); if (model_def_read(&dmdef, cmd_ln_str("-dmoddeffn")) != S3_SUCCESS) { return S3_ERROR; } setup_d2o_map(dmdef, omdef); } else { E_INFO("Assuming dump and output model definitions are identical\n"); } ts2cbfn = cmd_ln_str("-ts2cbfn"); if (ts2cbfn) { if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { omdef->cb = semi_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { omdef->cb = cont_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = omdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { omdef->cb = ptm_ts2cb(omdef); n_ts = omdef->n_tied_state; n_cb = omdef->acmod_set->n_ci; } else if (s3ts2cb_read(cmd_ln_str("-ts2cbfn"), &omdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } if (omdef->n_tied_state != n_ts) { E_FATAL("Model definition file n_tied_state = %u, but %u mappings in ts2cb\n", omdef->n_tied_state, n_ts); } } } else { E_INFO("No mdef files. Assuming 1-class init\n"); } *out_omdef = omdef; *out_dmdef = dmdef; fn = cmd_ln_str("-dictfn"); if (fn) { E_INFO("Reading main lexicon: %s\n", fn); lex = lexicon_read(NULL, fn, omdef->acmod_set); if (lex == NULL) return S3_ERROR; } fn = cmd_ln_str("-fdictfn"); if (fn) { E_INFO("Reading filler lexicon: %s\n", fn); (void)lexicon_read(lex, fn, omdef->acmod_set); } *out_lex = lex; stride = cmd_ln_int32("-stride"); return S3_SUCCESS; }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
static int init_state(const char *obsdmp, const char *obsidx, uint32 n_density, uint32 n_stream, uint32 *veclen, uint32 blksize, int reest, const char *mixwfn, const char *meanfn, const char *varfn, uint32 ts_off, uint32 ts_cnt, uint32 n_ts, uint32 n_d_ts) { vector_t ***mean; vector_t ***var = NULL; vector_t ****fullvar = NULL; float32 ***mixw = NULL; uint32 n_frame; uint32 ignore = 0; codew_t *label; uint32 n_corpus = 0; float64 sqerr; float64 tot_sqerr; segdmp_type_t t; uint32 i, j, ts, n; int32 full_covar; full_covar = cmd_ln_int32("-fullvar"); /* fully-continuous for now */ mean = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen); if (full_covar) fullvar = gauden_alloc_param_full(ts_cnt, n_stream, n_density, veclen); else var = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen); if (mixwfn) mixw = (float32 ***)ckd_calloc_3d(ts_cnt, n_stream, n_density, sizeof(float32)); if (cmd_ln_str("-segidxfn")) { E_INFO("Multi-class dump\n"); if (segdmp_open_read(cmd_ln_str_list("-segdmpdirs"), cmd_ln_str("-segdmpfn"), cmd_ln_str("-segidxfn"), &n, &t, n_stream, veclen, blksize) != S3_SUCCESS) { E_FATAL("Unable to open dumps\n"); } if (n != n_d_ts) { E_FATAL("Expected %u tied-states in dump, but apparently %u\n", n_d_ts, n); } if (t != SEGDMP_TYPE_FEAT) { E_FATAL("Expected feature dump, but instead saw %u\n", t); } multiclass = TRUE; } else { E_INFO("1-class dump file\n"); multiclass = FALSE; dmp_fp = s3open(cmd_ln_str("-segdmpfn"), "rb", &dmp_swp); if (dmp_fp == NULL) { E_ERROR_SYSTEM("Unable to open dump file %s for reading\n", cmd_ln_str("-segdmpfn")); return S3_ERROR; } if (s3read(&n_frame, sizeof(uint32), 1, dmp_fp, dmp_swp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to open dump file %s for reading\n", cmd_ln_str("-segdmpfn")); return S3_ERROR; } data_offset = ftell(dmp_fp); } tot_sqerr = 0; for (i = 0; i < ts_cnt; i++) { ts = ts_off + i; /* stride not accounted for yet */ if (o2d == NULL) { if (multiclass) n_frame = segdmp_n_seg(ts); } else { for (j = 0, n_frame = 0; j < n_o2d[ts]; j++) { n_frame += segdmp_n_seg(o2d[ts][j]); } } E_INFO("Corpus %u: sz==%u frames%s\n", ts, n_frame, (n_frame > cmd_ln_int32("-vartiethr") ? "" : " tied var")); if (n_frame == 0) { continue; } E_INFO("Convergence ratios are abs(cur - prior) / abs(prior)\n"); /* Do some variety of k-means clustering */ ptmr_start(&km_timer); sqerr = cluster(ts, n_stream, n_frame, veclen, blksize, mean[i], n_density, &label); ptmr_stop(&km_timer); if (sqerr < 0) { E_ERROR("Unable to do k-means for state %u; skipping...\n", ts); continue; } /* Given the k-means and assuming equal prior liklihoods * compute the variances */ ptmr_start(&var_timer); if (full_covar) full_variances(ts, mean[i], fullvar[i], n_density, n_stream, veclen, blksize, n_frame, label); else variances(ts, mean[i], var[i], n_density, n_stream, veclen, blksize, n_frame, label); ptmr_stop(&var_timer); if (mixwfn) { /* initialize the mixing weights by counting # of occurrances * of the top codeword over the corpus and normalizing */ init_mixw(mixw[i], mean[i], n_density, veclen, n_frame, n_stream, label); ckd_free(label); if (reest == TRUE && full_covar) E_ERROR("EM re-estimation is not yet supported for full covariances\n"); else if (reest == TRUE) { ptmr_start(&em_timer); /* Do iterations of EM to estimate the mixture densities */ reest_sum(ts, mean[i], var[i], mixw[i], n_density, n_stream, n_frame, veclen, blksize, cmd_ln_int32("-niter"), FALSE, cmd_ln_int32("-vartiethr")); ptmr_stop(&em_timer); } } ++n_corpus; tot_sqerr += sqerr; E_INFO("sqerr [%u] == %e\n", ts, sqerr); } if (n_corpus > 0) { E_INFO("sqerr = %e tot %e rms\n", tot_sqerr, sqrt(tot_sqerr/n_corpus)); } if (!multiclass) s3close(dmp_fp); if (meanfn) { if (s3gau_write(meanfn, (const vector_t ***)mean, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_INFO("No mean file given; none written\n"); } if (varfn) { if (full_covar) { if (s3gau_write_full(varfn, (const vector_t ****)fullvar, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } else { if (s3gau_write(varfn, (const vector_t ***)var, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } } else { E_INFO("No variance file given; none written\n"); } if (mixwfn) { if (s3mixw_write(mixwfn, mixw, ts_cnt, n_stream, n_density) != S3_SUCCESS) { return S3_ERROR; } } else { E_INFO("No mixing weight file given; none written\n"); } return S3_SUCCESS; }
int main_initialize(int argc, char *argv[], lexicon_t **out_lex, model_def_t **out_omdef, model_def_t **out_dmdef) { model_def_t *dmdef = NULL; model_def_t *omdef = NULL; lexicon_t *lex = NULL; const char *fn; uint32 n_ts; uint32 n_cb; const char *ts2cbfn; parse_cmd_ln(argc, argv); timing_bind_name("km", timing_new()); timing_bind_name("var", timing_new()); timing_bind_name("em", timing_new()); timing_bind_name("all", timing_new()); if (cmd_ln_access("-feat") != NULL) { feat_set(cmd_ln_str("-feat")); feat_set_in_veclen(cmd_ln_int32("-ceplen")); feat_set_subvecs(cmd_ln_str("-svspec")); } else { E_FATAL("You need to set a feature extraction config using -feat\n"); } if (cmd_ln_access("-ldafn") != NULL) { if (feat_read_lda(cmd_ln_access("-ldafn"), cmd_ln_int32("-ldadim"))) { E_FATAL("Failed to read LDA matrix\n"); } } if (cmd_ln_access("-omoddeffn")) { E_INFO("Reading output model definitions: %s\n", cmd_ln_access("-omoddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&omdef, cmd_ln_access("-omoddeffn")) != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_access("-dmoddeffn")) { E_INFO("Reading dump model definitions: %s\n", cmd_ln_access("-dmoddeffn")); if (model_def_read(&dmdef, cmd_ln_access("-dmoddeffn")) != S3_SUCCESS) { return S3_ERROR; } setup_d2o_map(dmdef, omdef); } else { E_INFO("Assuming dump and output model definitions are identical\n"); } ts2cbfn = cmd_ln_access("-ts2cbfn"); if (ts2cbfn) { if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { omdef->cb = semi_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { omdef->cb = cont_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = omdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { omdef->cb = ptm_ts2cb(omdef); n_ts = omdef->n_tied_state; n_cb = omdef->acmod_set->n_ci; } else if (s3ts2cb_read(cmd_ln_access("-ts2cbfn"), &omdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } if (omdef->n_tied_state != n_ts) { E_FATAL("Model definition file n_tied_state = %u, but %u mappings in ts2cb\n", omdef->n_tied_state, n_ts); } } } else { E_INFO("No mdef files. Assuming 1-class init\n"); } *out_omdef = omdef; *out_dmdef = dmdef; fn = cmd_ln_access("-dictfn"); if (fn) { E_INFO("Reading main lexicon: %s\n", fn); lex = lexicon_read(NULL, fn, omdef->acmod_set); if (lex == NULL) return S3_ERROR; } fn = cmd_ln_access("-fdictfn"); if (fn) { E_INFO("Reading filler lexicon: %s\n", fn); (void)lexicon_read(lex, fn, omdef->acmod_set); } *out_lex = lex; stride = *(int32 *)cmd_ln_access("-stride"); return S3_SUCCESS; }
/* the following function is used for MMIE training lqin 2010-03 */ static int mmi_normalize() { uint32 i; uint32 n_mgau; uint32 n_stream; uint32 n_density; vector_t ***in_mean = NULL; vector_t ***in_var = NULL; vector_t ***wt_mean = NULL; vector_t ***wt_var = NULL; const uint32 *veclen = NULL; const char **accum_dir; const char *in_mean_fn; const char *out_mean_fn; const char *in_var_fn; const char *out_var_fn; vector_t ***wt_num_mean = NULL; vector_t ***wt_den_mean = NULL; vector_t ***wt_num_var = NULL; vector_t ***wt_den_var = NULL; float32 ***num_dnom = NULL; float32 ***den_dnom = NULL; uint32 n_num_mgau; uint32 n_den_mgau; uint32 n_num_stream; uint32 n_den_stream; uint32 n_num_density; uint32 n_den_density; float32 constE; uint32 n_temp_mgau; uint32 n_temp_stream; uint32 n_temp_density; const uint32 *temp_veclen = NULL; accum_dir = cmd_ln_str_list("-accumdir"); /* the following variables are used for mmie training */ out_mean_fn = cmd_ln_str("-meanfn"); out_var_fn = cmd_ln_str("-varfn"); in_mean_fn = cmd_ln_str("-inmeanfn"); in_var_fn = cmd_ln_str("-invarfn"); constE = cmd_ln_float32("-constE"); /* get rid of some unnecessary parameters */ if (cmd_ln_int32("-fullvar")) { E_FATAL("Current MMIE training can not be done for full variance, set -fulllvar as no\n"); } if (cmd_ln_int32("-tiedvar")) { E_FATAL("Current MMIE training can not be done for tied variance, set -tiedvar as no\n"); } if (cmd_ln_str("-mixwfn")) { E_FATAL("Current MMIE training does not support mixture weight update, remove -mixwfn \n"); } if (cmd_ln_str("-inmixwfn")) { E_FATAL("Current MMIE training does not support mixture weight update, remove -inmixwfn \n"); } if (cmd_ln_str("-tmatfn")) { E_FATAL("Current MMIE training does not support transition matrix update, remove -tmatfn \n"); } if (cmd_ln_str("-regmatfn")) { E_FATAL("Using norm for computing regression matrix is obsolete, please use mllr_transform \n"); } /* must be at least one accum dir */ if (accum_dir[0] == NULL) { E_FATAL("No accumulated reestimation path is specified, use -accumdir \n"); } /* at least update mean or variance parameters */ if (out_mean_fn == NULL && out_var_fn == NULL) { E_FATAL("Neither -meanfn nor -varfn is specified, at least do mean or variance update \n"); } else if (out_mean_fn == NULL) { E_INFO("No -meanfn specified, will skip if any\n"); } else if (out_var_fn == NULL) { E_INFO("No -varfn specified, will skip if any\n"); } /* read input mean */ if (in_mean_fn != NULL) { E_INFO("read original density mean parameters from %s\n", in_mean_fn); if (s3gau_read(in_mean_fn, &in_mean, &n_mgau, &n_stream, &n_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_mean_fn); } ckd_free((void *)veclen); veclen = NULL; } /* read input variance */ if (in_var_fn != NULL) { E_INFO("read original density variance parameters from %s\n", in_var_fn); if (s3gau_read(in_var_fn, &in_var, &n_mgau, &n_stream, &n_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } ckd_free((void *)veclen); veclen = NULL; } /* read accumulated numerator and denominator counts */ for (i = 0; accum_dir[i]; i++) { E_INFO("Reading and accumulating counts from %s\n", accum_dir[i]); rdacc_mmie_den(accum_dir[i], "numlat", &wt_num_mean, &wt_num_var, &num_dnom, &n_num_mgau, &n_num_stream, &n_num_density, &veclen); rdacc_mmie_den(accum_dir[i], "denlat", &wt_den_mean, &wt_den_var, &den_dnom, &n_den_mgau, &n_den_stream, &n_den_density, &veclen); if (n_num_mgau != n_den_mgau) E_FATAL("number of gaussians inconsistent between num and den lattice\n"); else if (n_num_mgau != n_mgau) E_FATAL("number of gaussians inconsistent between imput model and accumulator (%u != %u)\n", n_mgau, n_num_mgau); if (n_num_stream != n_den_stream) E_FATAL("number of gaussian streams inconsistent between num and den lattice\n"); else if (n_num_stream != n_stream) E_FATAL("number of gaussian streams inconsistent between imput model and accumulator (%u != %u)\n", n_stream, n_num_stream); if (n_num_density != n_den_density) E_FATAL("number of gaussian densities inconsistent between num and den lattice\n"); else if (n_num_density != n_density) E_FATAL("number of gaussian densities inconsistent between imput model and accumulator (%u != %u)\n", n_density, n_num_density); } /* initialize update parameters as the input parameters */ if (out_mean_fn) { if (s3gau_read(in_mean_fn, &wt_mean, &n_temp_mgau, &n_temp_stream, &n_temp_density, &temp_veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_mean_fn); } ckd_free((void *)temp_veclen); temp_veclen = NULL; } if (out_var_fn) { if (s3gau_read(in_var_fn, &wt_var, &n_temp_mgau, &n_temp_stream, &n_temp_density, &temp_veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } ckd_free((void *)temp_veclen); temp_veclen = NULL; } /* update mean parameters */ if (wt_mean) { if (out_mean_fn) { E_INFO("Normalizing mean for n_mgau= %u, n_stream= %u, n_density= %u\n", n_mgau, n_stream, n_density); gauden_norm_wt_mmie_mean(in_mean, wt_mean, wt_num_mean, wt_den_mean, in_var, wt_num_var, wt_den_var, num_dnom, den_dnom, n_mgau, n_stream, n_density, veclen, constE); } else { E_INFO("Ignoring means since -meanfn not specified\n"); } } else { E_INFO("No means to normalize\n"); } /* update variance parameters */ if (wt_var) { if (out_var_fn) { E_INFO("Normalizing variance for n_mgau= %u, n_stream= %u, n_density= %u\n", n_mgau, n_stream, n_density); gauden_norm_wt_mmie_var(in_var, wt_var, wt_num_var, wt_den_var, num_dnom, den_dnom, in_mean, wt_mean, wt_num_mean, wt_den_mean, n_mgau, n_stream, n_density, veclen, constE); } else { E_INFO("Ignoring variances since -varfn not specified\n"); } } else { E_INFO("No variances to normalize\n"); } /* write the updated mean parameters to files */ if (out_mean_fn) { if (wt_mean) { if (s3gau_write(out_mean_fn, (const vector_t ***)wt_mean, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO reestimated means seen, but -meanfn specified\n"); } } else { if (wt_mean) { E_INFO("Reestimated means seen, but -meanfn NOT specified\n"); } } /* write the updated variance parameters to files */ if (out_var_fn) { if (wt_var) { if (s3gau_write(out_var_fn, (const vector_t ***)wt_var, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO reestimated variances seen, but -varfn specified\n"); } } else { if (wt_var) { E_INFO("Reestimated variances seen, but -varfn NOT specified\n"); } } if (veclen) ckd_free((void *)veclen); if (temp_veclen) ckd_free((void *)temp_veclen); return S3_SUCCESS; }
int32 main (int32 argc, char *argv[]) { FILE *fpout; mgau_model_t *mgau; int32 **subvec; int32 max_datarows, datarows, datacols, svqrows, svqcols; float32 **data, **vqmean; int32 *datamap, *vqmap; float64 sqerr; int32 stdev; int32 i, j, v, m, c; cmd_ln_parse (arg, argc, argv); logs3_init ((float64) 1.0003); /* Load means/vars but DO NOT precompute variance inverses or determinants */ mgau = mgau_init (cmd_ln_str("-mean"), cmd_ln_str("-var"), 0.0 /* no varfloor */, cmd_ln_str("-mixw"), cmd_ln_float64 ("-mixwfloor"), FALSE); mgau_var_nzvec_floor (mgau, cmd_ln_float64 ("-varfloor")); /* Parse subvector spec argument; subvec is null terminated; subvec[x] is -1 terminated */ subvec = parse_subvecs (cmd_ln_str("-svspec")); if (cmd_ln_str ("-subvq")) fpout = myfopen (cmd_ln_str ("-subvq"), "w"); else fpout = stdout; /* Echo command line to output file */ for (i = 0; i < argc-1; i++) fprintf (fpout, "# %s \\\n", argv[i]); fprintf (fpout, "# %s\n#\n", argv[argc-1]); /* Print input and output configurations to output file */ for (v = 0; subvec[v]; v++); /* No. of subvectors */ svqrows = cmd_ln_int32 ("-svqrows"); fprintf (fpout, "VQParam %d %d -> %d %d\n", mgau_n_mgau(mgau), mgau_max_comp(mgau), v, svqrows); for (v = 0; subvec[v]; v++) { for (i = 0; subvec[v][i] >= 0; i++); fprintf (fpout, "Subvector %d length %d ", v, i); for (i = 0; subvec[v][i] >= 0; i++) fprintf (fpout, " %2d", subvec[v][i]); fprintf (fpout, "\n"); } fflush (fpout); /* * datamap[] for identifying non-0 input vectors that take part in the clustering process: * datamap[m*max_mean + c] = row index of data[][] containing the copy. * vqmap[] for mapping vq input data to vq output. */ max_datarows = mgau_n_mgau(mgau) * mgau_max_comp(mgau); datamap = (int32 *) ckd_calloc (max_datarows, sizeof(int32)); vqmap = (int32 *) ckd_calloc (max_datarows, sizeof(int32)); stdev = cmd_ln_int32 ("-stdev"); /* Copy and cluster each subvector */ for (v = 0; subvec[v]; v++) { E_INFO("Clustering subvector %d\n", v); for (datacols = 0; subvec[v][datacols] >= 0; datacols++); /* Input subvec length */ svqcols = datacols * 2; /* subvec length after concatenating mean + var */ /* Allocate input/output data areas */ data = (float32 **) ckd_calloc_2d (max_datarows, svqcols, sizeof(float32)); vqmean = (float32 **) ckd_calloc_2d (svqrows, svqcols, sizeof(float32)); /* Make a copy of the subvectors from the input data, and initialize maps */ for (i = 0; i < max_datarows; i++) datamap[i] = -1; datarows = 0; for (m = 0; m < mgau_n_mgau(mgau); m++) { /* For each mixture m */ for (c = 0; c < mgau_n_comp(mgau, m); c++) { /* For each component c in m */ if (vector_is_zero (mgau_var(mgau, m, c), mgau_veclen(mgau))) continue; for (i = 0; i < datacols; i++) { /* Copy specified dimensions, mean+var */ data[datarows][i*2] = mgau->mgau[m].mean[c][subvec[v][i]]; data[datarows][i*2+1] = (! stdev) ? mgau->mgau[m].var[c][subvec[v][i]] : sqrt(mgau->mgau[m].var[c][subvec[v][i]]); } datamap[m * mgau_max_comp(mgau) + c] = datarows++; } } E_INFO("Sanity check: input data[0]:\n"); vector_print (stderr, data[0], svqcols); for (i = 0; i < max_datarows; i++) vqmap[i] = -1; #if 0 { int32 **in; printf ("Input data: %d x %d\n", datarows, svqcols); in = (int32 **)data; for (i = 0; i < datarows; i++) { printf ("%8d:", i); for (j = 0; j < svqcols; j++) printf (" %08x", in[i][j]); printf ("\n"); } for (i = 0; i < datarows; i++) { printf ("%15d:", i); for (j = 0; j < svqcols; j++) printf (" %15.7e", data[i][j]); printf ("\n"); } fflush (stdout); } #endif /* VQ the subvector copy built above */ sqerr = vector_vqgen (data, datarows, svqcols, svqrows, cmd_ln_float64("-eps"), cmd_ln_int32("-iter"), vqmean, vqmap); /* Output VQ */ fprintf (fpout, "Codebook %d Sqerr %e\n", v, sqerr); for (i = 0; i < svqrows; i++) { if (stdev) { /* Convert clustered stdev back to var */ for (j = 1; j < svqcols; j += 2) vqmean[i][j] *= vqmean[i][j]; } vector_print (fpout, vqmean[i], svqcols); } fprintf (fpout, "Map %d\n", v); for (i = 0; i < max_datarows; i += mgau_max_comp(mgau)) { for (j = 0; j < mgau_max_comp(mgau); j++) { if (datamap[i+j] < 0) fprintf (fpout, " -1"); else fprintf (fpout, " %d", vqmap[datamap[i+j]]); } fprintf (fpout, "\n"); } fflush (fpout); /* Cleanup */ ckd_free_2d ((void **) data); ckd_free_2d ((void **) vqmean); } fprintf (fpout, "End\n"); fclose (fpout); exit(0); }
static int cp_parm() { FILE *fp; uint32 i, o; uint32 max=0; /* Open the file first to see whether command-line parameters match */ if(cmd_ln_str("-cpopsfn")==NULL) { E_INFO("Please specify -cpopsfn\n"); return S3_ERROR; } fp = fopen(cmd_ln_str("-cpopsfn"), "r"); if (fp == NULL) { E_INFO("Unable to open cpops file\n"); return S3_ERROR; } while (fscanf(fp, "%u %u", &o, &i) == 2) { if(o+1>max) { max=o+1; } } if (omixw) { if(max != n_mixw_o) { E_INFO("Mismatch between cp operation file (max out %d) and -nmixout (%d)\n",max, n_mixw_o); return S3_ERROR; } } if (ogau) { if(max != n_cb_o) { E_INFO("Mismatch between cp operation file (max out %d) and -ncbout (%d)\n",max, n_cb_o); return S3_ERROR; } } if (ogau_full) { if(max != n_cb_o) { E_INFO("Mismatch between cp operation file (max out %d) and -ncbout (%d)\n",max, n_cb_o); return S3_ERROR; } } if (otmat) { if(max != n_tmat_o) { E_INFO("Mismatch between cp operation file (max out %d) and -ntmatout (%d)\n",max, n_tmat_o); return S3_ERROR; } } fclose(fp); fp = fopen(cmd_ln_str("-cpopsfn"), "r"); while (fscanf(fp, "%u %u", &o, &i) == 2) { if (omixw) { cp_mixw(o, i); } if (ogau) { cp_gau(o, i); } if (ogau_full) { cp_gau_full(o, i); } if (otmat) { cp_tmat(o, i); } } fclose(fp); return S3_SUCCESS; }
void kb_setlm(char* lmname,kb_t* kb) { lmset_t* lms; kbcore_t* kbc=NULL; int i; int j; int n; /* s3wid_t dictid;*/ kbc=kb->kbcore; lms=kbc->lmset; E_INFO("Inside kb_setlm\n"); kbc->lm=NULL; for(j=0;j<kb->n_lextree;j++){ kb->ugtree[j]=NULL; } E_INFO("Inside kb_setlm\n"); if(lms!=NULL || cmd_ln_str("-lmctlfn")){ for(i=0;i<kbc->n_lm;i++){ if(!strcmp(lmname,lms[i].name)){ /* Point the current lm to a particular lm */ kbc->lm=lms[i].lm; for(j=0;j<kb->n_lextree;j++){ kb->ugtree[j]=kb->ugtreeMulti[i*kb->n_lextree+j]; } break; } } if(kbc->lm==NULL){ E_ERROR("LM name %s cannot be found! Fall back to use base LM model\n",lmname); kbc->lm=lms[0].lm; for(j=0;j<kb->n_lextree;j++){ kb->ugtree[j]=kb->ugtreeMulti[j]; } } } E_INFO("Current LM name %s.\n",lms[i].name); /* if((kb->vithist->lms2vh_root= (vh_lms2vh_t**)ckd_realloc(kb->vithist->lms2vh_root, lm_n_ug(kbc->lm)*sizeof(vh_lms2vh_t *) ))==NULL) { E_FATAL("failed to allocate memory for vithist\n"); }*/ n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_realloc (kb->hmm_hist,(n+1)*sizeof(int32)); /* Really no need for +1 */ E_INFO("Current LM name %s\n",lms[i].name); E_INFO("LM ug size %d\n",kbc->lm->n_ug); E_INFO("LM bg size %d\n",kbc->lm->n_bg); E_INFO("LM tg size %d\n",kbc->lm->n_tg); E_INFO("HMM history bin size %d\n", n+1); for(j=0;j<kb->n_lextree;j++){ E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[j])); } /* for (n = 0; n < dict_size(kbcore_dict(kbc)); n++){ E_INFO("Index %d, map %d word %s\n",n,kbc->lm->dict2lmwid[n],dict_wordstr(kbcore_dict(kbc),n)); }*/ }
static int normalize() { char file_name[MAXPATHLEN+1]; float32 ***mixw_acc = NULL; float32 ***in_mixw = NULL; float64 s; uint32 n_mixw; uint32 n_stream; uint32 n_mllr_class; uint32 n_density; float32 ***tmat_acc = NULL; uint32 n_tmat; uint32 n_state_pm; uint32 i, j, k; vector_t ***in_mean = NULL; vector_t ***wt_mean = NULL; vector_t ***in_var = NULL; vector_t ***wt_var = NULL; vector_t ****in_fullvar = NULL; vector_t ****wt_fullvar = NULL; int32 pass2var = FALSE; int32 var_is_full = FALSE; float32 ***dnom = NULL; uint32 n_mgau; uint32 n_gau_stream; uint32 n_gau_density; const uint32 *veclen = NULL; const char **accum_dir; const char *oaccum_dir; const char *in_mixw_fn; const char *out_mixw_fn; const char *out_tmat_fn; const char *in_mean_fn; const char *out_mean_fn; const char *in_var_fn; const char *out_var_fn; const char *out_dcount_fn; int err; uint32 mllr_mult; uint32 mllr_add; float32 *****regl = NULL; float32 ****regr = NULL; uint32 no_retries=0; accum_dir = cmd_ln_str_list("-accumdir"); oaccum_dir = cmd_ln_str("-oaccumdir"); out_mixw_fn = cmd_ln_str("-mixwfn"); out_tmat_fn = cmd_ln_str("-tmatfn"); out_mean_fn = cmd_ln_str("-meanfn"); out_var_fn = cmd_ln_str("-varfn"); in_mixw_fn = cmd_ln_str("-inmixwfn"); in_mean_fn = cmd_ln_str("-inmeanfn"); in_var_fn = cmd_ln_str("-invarfn"); out_dcount_fn = cmd_ln_str("-dcountfn"); var_is_full = cmd_ln_int32("-fullvar"); /* must be at least one accum dir */ assert(accum_dir[0] != NULL); if (out_mixw_fn == NULL) { E_INFO("No -mixwfn specified, will skip if any\n"); } if (out_tmat_fn == NULL) { E_INFO("No -tmatfn specified, will skip if any\n"); } if (out_mean_fn == NULL) { E_INFO("No -meanfn specified, will skip if any\n"); } if (out_var_fn == NULL) { E_INFO("No -varfn specified, will skip if any\n"); } if (in_mixw_fn != NULL) { E_INFO("Selecting unseen mixing weight parameters from %s\n", in_mixw_fn); } if (in_mean_fn != NULL) { E_INFO("Selecting unseen density mean parameters from %s\n", in_mean_fn); if (s3gau_read(in_mean_fn, &in_mean, &n_mgau, &n_gau_stream, &n_gau_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_mean_fn); } ckd_free((void *)veclen); veclen = NULL; } if (in_var_fn != NULL) { E_INFO("Selecting unseen density variance parameters from %s\n", in_var_fn); if (var_is_full) { if (s3gau_read_full(in_var_fn, &in_fullvar, &n_mgau, &n_gau_stream, &n_gau_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } } else { if (s3gau_read(in_var_fn, &in_var, &n_mgau, &n_gau_stream, &n_gau_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } } ckd_free((void *)veclen); veclen = NULL; } n_stream = 0; for (i = 0; accum_dir[i]; i++) { E_INFO("Reading and accumulating counts from %s\n", accum_dir[i]); if (out_mixw_fn) { rdacc_mixw(accum_dir[i], &mixw_acc, &n_mixw, &n_stream, &n_density); } if (out_tmat_fn) { rdacc_tmat(accum_dir[i], &tmat_acc, &n_tmat, &n_state_pm); } if (out_mean_fn || out_var_fn) { if (var_is_full) rdacc_den_full(accum_dir[i], &wt_mean, &wt_fullvar, &pass2var, &dnom, &n_mgau, &n_gau_stream, &n_gau_density, &veclen); else rdacc_den(accum_dir[i], &wt_mean, &wt_var, &pass2var, &dnom, &n_mgau, &n_gau_stream, &n_gau_density, &veclen); if (out_mixw_fn) { if (n_stream != n_gau_stream) { E_ERROR("mixw inconsistent w/ densities WRT # " "streams (%u != %u)\n", n_stream, n_gau_stream); } if (n_density != n_gau_density) { E_ERROR("mixw inconsistent w/ densities WRT # " "den/mix (%u != %u)\n", n_density, n_gau_density); } } else { n_stream = n_gau_stream; n_density = n_gau_density; } } } if (oaccum_dir && mixw_acc) { /* write the total mixing weight reest. accumulators */ err = 0; sprintf(file_name, "%s/mixw_counts", oaccum_dir); if (in_mixw_fn) { if (s3mixw_read(in_mixw_fn, &in_mixw, &i, &j, &k) != S3_SUCCESS) { E_FATAL_SYSTEM("Unable to read %s", in_mixw_fn); } if (i != n_mixw) { E_FATAL("# mixw in input mixw file != # mixw in output mixw file\n"); } if (j != n_stream) { E_FATAL("# stream in input mixw file != # stream in output mixw file\n"); } if (k != n_density) { E_FATAL("# density in input mixw file != # density in output mixw file\n"); } for (i = 0; i < n_mixw; i++) { for (j = 0; j < n_stream; j++) { for (k = 0, s = 0; k < n_density; k++) { s += mixw_acc[i][j][k]; } if ((s == 0) && in_mixw) { for (k = 0, s = 0; k < n_density; k++) { mixw_acc[i][j][k] = in_mixw[i][j][k]; } E_INFO("set mixw %u stream %u to input mixw value\n", i, j); } } } } do { /* Write out the accumulated reestimation sums */ if (s3mixw_write(file_name, mixw_acc, n_mixw, n_stream, n_density) != S3_SUCCESS) { if (err == 0) { E_ERROR("Unable to write %s; Retrying...\n", file_name); } ++err; sleep(3); no_retries++; if(no_retries>10){ E_FATAL("Failed to get the files after 10 retries(about 30 seconds).\n "); } } } while (err > 1); } if (pass2var) E_INFO("-2passvar yes\n"); if (oaccum_dir && (wt_mean || wt_var || wt_fullvar)) { /* write the total mixing Gau. den reest. accumulators */ err = 0; sprintf(file_name, "%s/gauden_counts", oaccum_dir); do { int32 rv; if (var_is_full) rv = s3gaucnt_write_full(file_name, wt_mean, wt_fullvar, pass2var, dnom, n_mgau, n_gau_stream, n_gau_density, veclen); else rv = s3gaucnt_write(file_name, wt_mean, wt_var, pass2var, dnom, n_mgau, n_gau_stream, n_gau_density, veclen); if (rv != S3_SUCCESS) { if (err == 0) { E_ERROR("Unable to write %s; Retrying...\n", file_name); } ++err; sleep(3); no_retries++; if(no_retries>10){ E_FATAL("Failed to get the files after 10 retries(about 5 minutes).\n "); } } } while (err > 1); } if (oaccum_dir && tmat_acc) { /* write the total transition matrix reest. accumulators */ err = 0; sprintf(file_name, "%s/tmat_counts", oaccum_dir); do { if (s3tmat_write(file_name, tmat_acc, n_tmat, n_state_pm) != S3_SUCCESS) { if (err == 0) { E_ERROR("Unable to write %s; Retrying...\n", file_name); } ++err; sleep(3); no_retries++; if(no_retries>10){ E_FATAL("Failed to get the files after 10 retries(about 5 minutes).\n "); } } } while (err > 1); } if (oaccum_dir && regr && regl) { /* write the total MLLR regression matrix accumulators */ err = 0; sprintf(file_name, "%s/regmat_counts", oaccum_dir); do { if (s3regmatcnt_write(file_name, regr, regl, n_mllr_class, n_stream, veclen, mllr_mult, mllr_add) != S3_SUCCESS) { if (err == 0) { E_ERROR("Unable to write %s; Retrying...\n", file_name); } ++err; sleep(3); no_retries++; if(no_retries>10){ E_FATAL("Failed to get the files after 10 retries(about 5 minutes).\n "); } } } while (err > 1); } if (wt_mean || wt_var || wt_fullvar) { if (out_mean_fn) { E_INFO("Normalizing mean for n_mgau= %u, n_stream= %u, n_density= %u\n", n_mgau, n_stream, n_density); gauden_norm_wt_mean(in_mean, wt_mean, dnom, n_mgau, n_stream, n_density, veclen); } else { if (wt_mean) { E_INFO("Ignoring means since -meanfn not specified\n"); } } if (out_var_fn) { if (var_is_full) { if (wt_fullvar) { E_INFO("Normalizing fullvar\n"); gauden_norm_wt_fullvar(in_fullvar, wt_fullvar, pass2var, dnom, wt_mean, /* wt_mean now just mean */ n_mgau, n_stream, n_density, veclen, cmd_ln_boolean("-tiedvar")); } } else { if (wt_var) { E_INFO("Normalizing var\n"); gauden_norm_wt_var(in_var, wt_var, pass2var, dnom, wt_mean, /* wt_mean now just mean */ n_mgau, n_stream, n_density, veclen, cmd_ln_boolean("-tiedvar")); } } } else { if (wt_var || wt_fullvar) { E_INFO("Ignoring variances since -varfn not specified\n"); } } } else { E_INFO("No means or variances to normalize\n"); } /* * Write the parameters to files */ if (out_mixw_fn) { if (mixw_acc) { if (s3mixw_write(out_mixw_fn, mixw_acc, n_mixw, n_stream, n_density) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO mixing weight accumulators seen, but -mixwfn specified.\n"); } } else { if (mixw_acc) { E_INFO("Mixing weight accumulators seen, but -mixwfn NOT specified.\n"); } } if (out_tmat_fn) { if (tmat_acc) { if (s3tmat_write(out_tmat_fn, tmat_acc, n_tmat, n_state_pm) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO transition matrix accumulators seen, but -tmatfn specified.\n"); } } else { if (tmat_acc) E_INFO("Transition matrix accumulators seen, but -tmatfn NOT specified\n"); } if (out_mean_fn) { if (wt_mean) { if (s3gau_write(out_mean_fn, (const vector_t ***)wt_mean, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; if (out_dcount_fn) { if (s3gaudnom_write(out_dcount_fn, dnom, n_mgau, n_stream, n_density) != S3_SUCCESS) return S3_ERROR; } } else E_WARN("NO reestimated means seen, but -meanfn specified\n"); } else { if (wt_mean) { E_INFO("Reestimated means seen, but -meanfn NOT specified\n"); } } if (out_var_fn) { if (var_is_full) { if (wt_fullvar) { if (s3gau_write_full(out_var_fn, (const vector_t ****)wt_fullvar, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } else E_WARN("NO reestimated variances seen, but -varfn specified\n"); } else { if (wt_var) { if (s3gau_write(out_var_fn, (const vector_t ***)wt_var, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } else E_WARN("NO reestimated variances seen, but -varfn specified\n"); } } else { if (wt_var) { E_INFO("Reestimated variances seen, but -varfn NOT specified\n"); } } if (veclen) ckd_free((void *)veclen); return S3_SUCCESS; }
int32 viterbi_update(float64 *log_forw_prob, vector_t **feature, uint32 n_obs, state_t *state_seq, uint32 n_state, model_inventory_t *inv, float64 a_beam, float32 spthresh, s3phseg_t *phseg, int32 mixw_reest, int32 tmat_reest, int32 mean_reest, int32 var_reest, int32 pass2var, int32 var_is_full, FILE *pdumpfh, feat_t *fcb) { float64 *scale = NULL; float64 **dscale = NULL; float64 **active_alpha; uint32 **active_astate; uint32 **bp; uint32 *n_active_astate; gauden_t *g; /* Gaussian density parameters and reestimation sums */ float32 ***mixw; /* all mixing weights */ float64 ***now_den = NULL; /* Short for den[t] */ uint32 ***now_den_idx = NULL;/* Short for den_idx[t] */ uint32 *active_cb; uint32 n_active_cb; float32 **tacc; /* Transition matrix reestimation sum accumulators for the utterance. */ float32 ***wacc; /* mixing weight reestimation sum accumulators for the utterance. */ float32 ***denacc = NULL; /* mean/var reestimation accumulators for time t */ size_t denacc_size; /* Total size of data references in denacc. Allows for quick clears between time frames */ uint32 n_lcl_cb; uint32 *cb_inv; uint32 i, j, q; int32 t; uint32 n_feat; uint32 n_density; uint32 n_top; int ret; timing_t *fwd_timer = NULL; timing_t *rstu_timer = NULL; timing_t *gau_timer = NULL; timing_t *rsts_timer = NULL; timing_t *rstf_timer = NULL; float64 log_fp; /* accumulator for the log of the probability * of observing the input given the model */ uint32 max_n_next = 0; uint32 n_cb; static float64 *p_op = NULL; static float64 *p_ci_op = NULL; static float64 **d_term = NULL; static float64 **d_term_ci = NULL; /* caller must ensure that there is some non-zero amount of work to be done here */ assert(n_obs > 0); assert(n_state > 0); /* Get the forward estimation CPU timer */ fwd_timer = timing_get("fwd"); /* Get the per utterance reestimation CPU timer */ rstu_timer = timing_get("rstu"); /* Get the Gaussian density evaluation CPU timer */ gau_timer = timing_get("gau"); /* Get the per state reestimation CPU timer */ rsts_timer = timing_get("rsts"); /* Get the per frame reestimation CPU timer */ rstf_timer = timing_get("rstf"); g = inv->gauden; n_feat = gauden_n_feat(g); n_density = gauden_n_density(g); n_top = gauden_n_top(g); n_cb = gauden_n_mgau(g); if (p_op == NULL) { p_op = ckd_calloc(n_feat, sizeof(float64)); p_ci_op = ckd_calloc(n_feat, sizeof(float64)); } if (d_term == NULL) { d_term = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); d_term_ci = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); } scale = (float64 *)ckd_calloc(n_obs, sizeof(float64)); dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32)); active_alpha = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); active_cb = ckd_calloc(2*n_state, sizeof(uint32)); bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); /* Run forward algorithm, which has embedded Viterbi. */ if (fwd_timer) timing_start(fwd_timer); ret = forward(active_alpha, active_astate, n_active_astate, bp, scale, dscale, feature, n_obs, state_seq, n_state, inv, a_beam, phseg, 0); /* Dump a phoneme segmentation if requested */ if (cmd_ln_str("-outphsegdir")) { const char *phsegdir; char *segfn, *uttid; phsegdir = cmd_ln_str("-outphsegdir"); uttid = (cmd_ln_int32("-outputfullpath") ? corpus_utt_full_name() : corpus_utt()); segfn = ckd_calloc(strlen(phsegdir) + 1 + strlen(uttid) + strlen(".phseg") + 1, 1); strcpy(segfn, phsegdir); strcat(segfn, "/"); strcat(segfn, uttid); strcat(segfn, ".phseg"); write_phseg(segfn, inv, state_seq, active_astate, n_active_astate, n_state, n_obs, active_alpha, scale, bp); ckd_free(segfn); } if (fwd_timer) timing_stop(fwd_timer); if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto all_done; } mixw = inv->mixw; if (mixw_reest) { /* Need to reallocate mixing accumulators for utt */ if (inv->l_mixw_acc) { ckd_free_3d((void ***)inv->l_mixw_acc); inv->l_mixw_acc = NULL; } inv->l_mixw_acc = (float32 ***)ckd_calloc_3d(inv->n_mixw_inverse, n_feat, n_density, sizeof(float32)); } wacc = inv->l_mixw_acc; n_lcl_cb = inv->n_cb_inverse; cb_inv = inv->cb_inverse; /* Allocate local accumulators for mean, variance reestimation sums if necessary */ gauden_alloc_l_acc(g, n_lcl_cb, mean_reest, var_reest, var_is_full); if (tmat_reest) { if (inv->l_tmat_acc) { ckd_free_2d((void **)inv->l_tmat_acc); inv->l_tmat_acc = NULL; } for (i = 0; i < n_state; i++) { if (state_seq[i].n_next > max_n_next) max_n_next = state_seq[i].n_next; } inv->l_tmat_acc = (float32 **)ckd_calloc_2d(n_state, max_n_next, sizeof(float32)); } /* transition matrix reestimation sum accumulators for the utterance */ tacc = inv->l_tmat_acc; n_active_cb = 0; now_den = (float64 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(float64)); now_den_idx = (uint32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(uint32)); if (mean_reest || var_reest) { /* allocate space for the per frame density counts */ denacc = (float32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_density, sizeof(float32)); /* # of bytes required to store all weighted vectors */ denacc_size = n_lcl_cb * n_feat * n_density * sizeof(float32); } else { denacc = NULL; denacc_size = 0; } /* Okay now run through the backtrace and accumulate counts. */ /* Find the non-emitting ending state */ for (q = 0; q < n_active_astate[n_obs-1]; ++q) { if (active_astate[n_obs-1][q] == n_state-1) break; } if (q == n_active_astate[n_obs-1]) { E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n"); ret = S3_ERROR; goto all_done; } for (t = n_obs-1; t >= 0; --t) { uint32 l_cb; uint32 l_ci_cb; float64 op, p_reest_term; uint32 prev; j = active_astate[t][q]; /* Follow any non-emitting states at time t first. */ while (state_seq[j].mixw == TYING_NON_EMITTING) { prev = active_astate[t][bp[t][q]]; #if VITERBI_DEBUG printf("Following non-emitting state at time %d, %u => %u\n", t, j, prev); #endif /* Backtrace and accumulate transition counts. */ if (tmat_reest) { assert(tacc != NULL); tacc[prev][j - prev] += 1.0; } q = bp[t][q]; j = prev; } /* Now accumulate statistics for the real state. */ l_cb = state_seq[j].l_cb; l_ci_cb = state_seq[j].l_ci_cb; n_active_cb = 0; if (gau_timer) timing_start(gau_timer); gauden_compute_log(now_den[l_cb], now_den_idx[l_cb], feature[t], g, state_seq[j].cb, NULL); active_cb[n_active_cb++] = l_cb; if (l_cb != l_ci_cb) { gauden_compute_log(now_den[l_ci_cb], now_den_idx[l_ci_cb], feature[t], g, state_seq[j].ci_cb, NULL); active_cb[n_active_cb++] = l_ci_cb; } gauden_scale_densities_bwd(now_den, now_den_idx, &dscale[t], active_cb, n_active_cb, g); assert(state_seq[j].mixw != TYING_NON_EMITTING); /* Now calculate mixture densities. */ /* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */ op = gauden_mixture(now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], g); if (gau_timer) timing_stop(gau_timer); if (rsts_timer) timing_start(rsts_timer); /* Make up this bogus value to be consistent with backward.c */ p_reest_term = 1.0 / op; /* Compute the output probability excluding the contribution * of each feature stream. i.e. p_op[0] is the output * probability excluding feature stream 0 */ partial_op(p_op, op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); /* compute the probability of each (of possibly topn) density */ den_terms(d_term, p_reest_term, p_op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); if (l_cb != l_ci_cb) { /* For each feature stream f, compute: * sum_k(mixw[f][k] den[f][k]) * and store the results in p_ci_op */ partial_ci_op(p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); /* For each feature stream and density compute the terms: * w[f][k] den[f][k] / sum_k(w[f][k] den[f][k]) * post_j * and store results in d_term_ci */ den_terms_ci(d_term_ci, 1.0, /* post_j = 1.0 */ p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); } /* accumulate the probability for each density in the mixing * weight reestimation accumulators */ if (mixw_reest) { accum_den_terms(wacc[state_seq[j].l_mixw], d_term, now_den_idx[l_cb], n_feat, n_top); /* check if mixw and ci_mixw are different to avoid * doubling the EM counts in a CI run. */ if (state_seq[j].mixw != state_seq[j].ci_mixw) { if (n_cb < inv->n_mixw) { /* semi-continuous, tied mixture, and discrete case */ accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term, now_den_idx[l_cb], n_feat, n_top); } else { /* continuous case */ accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } } /* accumulate the probability for each density in the * density reestimation accumulators */ if (mean_reest || var_reest) { accum_den_terms(denacc[l_cb], d_term, now_den_idx[l_cb], n_feat, n_top); if (l_cb != l_ci_cb) { accum_den_terms(denacc[l_ci_cb], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } if (rsts_timer) timing_stop(rsts_timer); /* Note that there is only one state/frame so this is kind of redundant */ if (rstf_timer) timing_start(rstf_timer); if (mean_reest || var_reest) { /* Update the mean and variance reestimation accumulators */ if (pdumpfh) fprintf(pdumpfh, "time %d:\n", t); accum_gauden(denacc, cb_inv, n_lcl_cb, feature[t], now_den_idx, g, mean_reest, var_reest, pass2var, inv->l_mixw_acc, var_is_full, pdumpfh, fcb); memset(&denacc[0][0][0], 0, denacc_size); } if (rstf_timer) timing_stop(rstf_timer); if (t > 0) { prev = active_astate[t-1][bp[t][q]]; #if VITERBI_DEBUG printf("Backtrace at time %d, %u => %u\n", t, j, prev); #endif /* Backtrace and accumulate transition counts. */ if (tmat_reest) { assert(tacc != NULL); tacc[prev][j-prev] += 1.0; } q = bp[t][q]; j = prev; } } /* If no error was found, add the resulting utterance reestimation * accumulators to the global reestimation accumulators */ if (rstu_timer) timing_start(rstu_timer); accum_global(inv, state_seq, n_state, mixw_reest, tmat_reest, mean_reest, var_reest, var_is_full); if (rstu_timer) timing_stop(rstu_timer); /* Find the final state */ for (i = 0; i < n_active_astate[n_obs-1]; ++i) { if (active_astate[n_obs-1][i] == n_state-1) break; } /* Calculate log[ p( O | \lambda ) ] */ assert(active_alpha[n_obs-1][i] > 0); log_fp = log(active_alpha[n_obs-1][i]); for (t = 0; t < n_obs; t++) { assert(scale[t] > 0); log_fp -= log(scale[t]); for (j = 0; j < inv->gauden->n_feat; j++) { log_fp += dscale[t][j]; } } *log_forw_prob = log_fp; all_done: ckd_free((void *)scale); for (i = 0; i < n_obs; i++) { if (dscale[i]) ckd_free((void *)dscale[i]); } ckd_free((void **)dscale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free((void *)active_cb); if (denacc) ckd_free_3d((void ***)denacc); if (now_den) ckd_free_3d((void ***)now_den); if (now_den_idx) ckd_free_3d((void ***)now_den_idx); if (ret != S3_SUCCESS) E_ERROR("%s ignored\n", corpus_utt_brief_name()); return ret; }
int main(int argc, char *argv[]) { int i, j, offset; int32 noframe, vsize, dsize, column; int32 frm_begin, frm_end; int is_header, is_describe; float *z, **cep; char const *cepfile; print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", arg); vsize = cmd_ln_int32("-i"); dsize = cmd_ln_int32("-d"); frm_begin = cmd_ln_int32("-b"); frm_end = cmd_ln_int32("-e"); is_header = cmd_ln_int32("-header"); is_describe = cmd_ln_int32("-describe"); if (vsize < 0) E_FATAL("-i : Input vector size should be larger than 0.\n"); if (dsize < 0) E_FATAL("-d : Column size should be larger than 0\n"); if (frm_begin < 0) E_FATAL("-b : Beginning frame should be larger than 0\n"); /* The following condition is redundant * if (frm_end < 0) E_FATAL("-e : Ending frame should be larger than 0\n"); */ if (frm_begin >= frm_end) E_FATAL ("Ending frame (-e) should be larger than beginning frame (-b).\n"); if ((cepfile = cmd_ln_str("-f")) == NULL) { E_FATAL("Input file was not specified with (-f)\n"); } if (read_cep(cepfile, &cep, &noframe, vsize) == IO_ERR) E_FATAL("ERROR opening %s for reading\n", cepfile); z = cep[0]; offset = 0; column = (vsize > dsize) ? dsize : vsize; frm_end = (frm_end > noframe) ? noframe : frm_end; E_INFO("Displaying %d out of %d columns per frame\n", column, vsize); E_INFO("Total %d frames\n\n", noframe); /* This part should be moved to a special library if this file is longer than 300 lines. */ if (is_header) { if (is_describe) { printf("\n%6s", "frame#:"); } for (j = 0; j < column; ++j) { printf("%3s%3d%s ", "c[", j, "]"); } printf("\n"); } offset += frm_begin * vsize; for (i = frm_begin; i < frm_end; ++i) { if (is_describe) { printf("%6d:", i); } for (j = 0; j < column; ++j) printf("%7.3f ", z[offset + j]); printf("\n"); offset += vsize; } fflush(stdout); cmd_ln_appl_exit(); ckd_free_2d(cep); return (IO_SUCCESS); }
int main(int argc, char *argv[]) { model_def_t *mdef; uint32 n_tmat; uint32 n_tied_state; uint32 n_state_pm; uint32 n_stream; uint32 n_density; float32 ***tmat; float32 **proto_tmat; float32 ***mixw; uint32 i, j, k; float32 mixw_ini; int retval = 0; parse_cmd_ln(argc, argv); printf("%s(%d): Reading model definition file %s\n", __FILE__, __LINE__, cmd_ln_str("-moddeffn")); if (model_def_read(&mdef, cmd_ln_str("-moddeffn")) != S3_SUCCESS) { return 1; } printf("%s(%d): %d models defined\n", __FILE__, __LINE__, mdef->n_defn); if (!cmd_ln_str("-tmatfn") && ! cmd_ln_str("-mixwfn")){ E_FATAL("Both -tmatfn and -mixwfn were not specified, forced exit\n"); } if (cmd_ln_str("-tmatfn")) { if (topo_read(&proto_tmat, &n_state_pm, cmd_ln_str("-topo")) != S3_SUCCESS) return 1; /* proto_tmat is normalized */ n_tmat = mdef->n_tied_tmat; tmat = (float32 ***)ckd_calloc_3d(n_tmat, n_state_pm-1, n_state_pm, sizeof(float32)); for (k = 0; k < n_tmat; k++) { for (i = 0; i < n_state_pm-1; i++) { for (j = 0; j < n_state_pm; j++) { /* perhaps this could be replaced with a block copy per tmat */ tmat[k][i][j] = proto_tmat[i][j]; } } } if (s3tmat_write(cmd_ln_str("-tmatfn"), tmat, n_tmat, n_state_pm) != S3_SUCCESS) { retval = 1; } ckd_free_3d((void ***)tmat); } else { E_INFO("No tmat file given; none generated\n"); } n_tied_state = mdef->n_tied_state; n_stream = cmd_ln_int32("-nstream"); n_density = cmd_ln_int32("-ndensity"); mixw = (float32 ***)ckd_calloc_3d(n_tied_state, n_stream, n_density, sizeof(float32)); /* weight each density uniformly */ mixw_ini = 1.0 / (float)n_density; for (i = 0; i < n_tied_state; i++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { mixw[i][j][k] = mixw_ini; } } } if (cmd_ln_str("-mixwfn")) { if (s3mixw_write(cmd_ln_str("-mixwfn"), mixw, n_tied_state, n_stream, n_density) != S3_SUCCESS) { retval = 2; } } else { E_INFO("No mixw file given; none generated\n"); } ckd_free_3d((void ***)mixw); return retval; }