int s2_write_hmm(float32 ***tmat, acmod_set_t *acmod_set, const char *out_dir_name) { uint32 n_ci; uint32 i; int err; n_ci = acmod_set_n_ci(acmod_set); E_INFO("Writing %d tied CI transition matrices to %s\n", n_ci, out_dir_name); err = 0; for (i = 0; i < n_ci; i++) { if (put_sdm(tmat[i], out_dir_name, acmod_set_id2name(acmod_set, i)) != S3_SUCCESS) err = 1; #ifdef S2_WRITE_HMM_VERBOSE if (!err) { print_tmat(stdout, tmat[i], S2_N_STATE); fflush(stdout); } #endif } if (!err) { return S3_SUCCESS; } else { return S3_ERROR; } }
int32 model_def_write(model_def_t *mdef, const char *fn) { FILE *fp; uint32 n_ci; uint32 n_acmod; acmod_set_t *acmod_set; const char **attrib; acmod_id_t b; acmod_id_t l; acmod_id_t r; word_posn_t wp; uint32 p; char *wp2c = WORD_POSN_CHAR_MAP; uint32 i; fp = fopen(fn, "w"); if (fp == NULL) { E_ERROR_SYSTEM("Unable to open %s for writing", fn); return S3_ERROR; } acmod_set = mdef->acmod_set; fprintf(fp, "%s\n", MODEL_DEF_VERSION); n_ci = acmod_set_n_ci(acmod_set); fprintf(fp, "%u n_base\n", n_ci); fprintf(fp, "%u n_tri\n", acmod_set_n_multi(acmod_set)); fprintf(fp, "%u n_state_map\n", mdef->n_total_state); fprintf(fp, "%u n_tied_state\n", mdef->n_tied_state); fprintf(fp, "%u n_tied_ci_state\n", mdef->n_tied_ci_state); fprintf(fp, "%u n_tied_tmat\n", mdef->n_tied_tmat); fprintf(fp, "#\n# Columns definitions\n"); fprintf(fp, "#%4s %3s %3s %1s %6s %4s %s\n", "base", "lft", "rt", "p", "attrib", "tmat", " ... state id's ..."); n_acmod = acmod_set_n_acmod(acmod_set); for (p = 0; p < n_ci; p++) { fprintf(fp, "%5s %3s %3s %1s", acmod_set_id2name(acmod_set, p), "-", "-", "-"); attrib = acmod_set_attrib(acmod_set, p); if ((attrib == NULL) || (attrib[0] == NULL)) { fprintf(fp, " %6s", "n/a"); } else { fprintf(fp, " %6s", attrib[0]); for (i = 1; attrib[i]; i++) { fprintf(fp, ",%s", attrib[i]); } } fprintf(fp, " %4d", mdef->defn[p].tmat); for (i = 0; i < mdef->defn[p].n_state; i++) { if (mdef->defn[p].state[i] == NO_ID) { fprintf(fp, " N"); } else { fprintf(fp, " %6u", mdef->defn[p].state[i]); } } fprintf(fp, "\n"); } for (; p < n_acmod; p++) { acmod_set_id2tri(acmod_set, &b, &l, &r, &wp, p); fprintf(fp, "%5s %3s %3s %c", acmod_set_id2name(acmod_set, b), acmod_set_id2name(acmod_set, l), acmod_set_id2name(acmod_set, r), wp2c[(uint32)wp]); attrib = acmod_set_attrib(acmod_set, p); if ((attrib == NULL) || (attrib[0] == NULL)) { fprintf(fp, " %6s", "n/a"); } else { fprintf(fp, " %6s", attrib[0]); for (i = 1; attrib[i]; i++) { fprintf(fp, ",%s", attrib[i]); } } fprintf(fp, " %4d", mdef->defn[p].tmat); for (i = 0; i < mdef->defn[p].n_state; i++) { if (mdef->defn[p].state[i] == NO_ID) { fprintf(fp, " N"); } else { fprintf(fp, " %6u", mdef->defn[p].state[i]); } } fprintf(fp, "\n"); } fclose(fp); return S3_SUCCESS; }
int init(model_def_t **out_imdef, pset_t **out_pset, uint32 *out_n_pset, dtree_t ****out_tree, uint32 *out_n_seno) { model_def_t *imdef; uint32 p, s; uint32 n_ci, n_state; char fn[MAXPATHLEN+1]; const char *a_fn; FILE *fp; dtree_t ***tree, *tr; pset_t *pset; uint32 n_pset; uint32 n_seno; const char *treedir; uint32 ts_id; int allphones; a_fn = cmd_ln_str("-imoddeffn"); if (a_fn == NULL) E_FATAL("Specify -imoddeffn\n"); if (model_def_read(&imdef, a_fn) != S3_SUCCESS) { return S3_ERROR; } *out_imdef = imdef; a_fn = cmd_ln_str("-psetfn"); E_INFO("Reading: %s\n", a_fn); *out_pset = pset = read_pset_file(a_fn, imdef->acmod_set, &n_pset); *out_n_pset = n_pset; allphones = cmd_ln_int32("-allphones"); if (allphones) n_ci = 1; else n_ci = acmod_set_n_ci(imdef->acmod_set); treedir = cmd_ln_str("-treedir"); tree = (dtree_t ***)ckd_calloc(n_ci, sizeof(dtree_t **)); *out_tree = tree; ts_id = imdef->n_tied_ci_state; for (p = 0, n_seno = 0; p < n_ci; p++) { if (allphones || !acmod_set_has_attrib(imdef->acmod_set, p, "filler")) { const char *pname; if (allphones) { n_state = imdef->defn[acmod_set_n_ci(imdef->acmod_set)].n_state; pname = "ALLPHONES"; } else { n_state = imdef->defn[p].n_state; pname = acmod_set_id2name(imdef->acmod_set, p); } tree[p] = (dtree_t **)ckd_calloc(n_state, sizeof(dtree_t *)); for (s = 0; s < n_state-1; s++) { E_INFO("%s-%u: offset %u\n", pname, s, ts_id); sprintf(fn, "%s/%s-%u.dtree", treedir, pname, s); fp = fopen(fn, "r"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", fn); } tree[p][s] = tr = read_final_tree(fp, pset, n_pset); label_leaves(&tr->node[0], &ts_id); fclose(fp); n_seno += cnt_leaf(&tr->node[0]); } } } assert(n_seno == (ts_id - imdef->n_tied_ci_state)); E_INFO("n_seno= %u\n", ts_id); *out_n_seno = n_seno; return S3_SUCCESS; }
int main(int argc, char *argv[]) { model_def_t *imdef; model_def_t *omdef; pset_t *pset; uint32 n_pset; dtree_t ***tree; uint32 n_seno; uint32 n_ci; uint32 n_acmod; uint32 p; uint32 s; model_def_entry_t *idefn, *odefn; acmod_id_t b, l, r; word_posn_t wp; int allphones; parse_cmd_ln(argc, argv); if (init(&imdef, &pset, &n_pset, &tree, &n_seno) != S3_SUCCESS) return 1; omdef = (model_def_t *)ckd_calloc(1, sizeof(model_def_t)); omdef->acmod_set = imdef->acmod_set; /* same set of acoustic models */ omdef->n_total_state = imdef->n_total_state; omdef->n_tied_ci_state = imdef->n_tied_ci_state; omdef->n_tied_state = imdef->n_tied_ci_state + n_seno; omdef->n_tied_tmat = imdef->n_tied_tmat; omdef->defn = (model_def_entry_t *)ckd_calloc(imdef->n_defn, sizeof(model_def_entry_t)); /* * Define the context-independent models */ n_ci = acmod_set_n_ci(imdef->acmod_set); for (p = 0; p < n_ci; p++) { idefn = &imdef->defn[p]; odefn = &omdef->defn[p]; odefn->p = idefn->p; odefn->tmat = idefn->tmat; odefn->state = ckd_calloc(idefn->n_state, sizeof(uint32)); odefn->n_state = idefn->n_state; for (s = 0; s < idefn->n_state; s++) { if (idefn->state[s] == NO_ID) odefn->state[s] = NO_ID; else { odefn->state[s] = idefn->state[s]; } } } /* * Define the rest of the models */ allphones = cmd_ln_int32("-allphones"); n_acmod = acmod_set_n_acmod(omdef->acmod_set); for (; p < n_acmod; p++) { b = acmod_set_base_phone(omdef->acmod_set, p); assert(p != b); idefn = &imdef->defn[p]; odefn = &omdef->defn[p]; odefn->p = idefn->p; odefn->tmat = idefn->tmat; odefn->state = ckd_calloc(idefn->n_state, sizeof(uint32)); odefn->n_state = idefn->n_state; for (s = 0; s < idefn->n_state; s++) { if (idefn->state[s] == NO_ID) /* Non-emitting state */ odefn->state[s] = NO_ID; else { uint32 bb; /* emitting state: find the tied state */ acmod_set_id2tri(omdef->acmod_set, &b, &l, &r, &wp, p); #ifdef HORRIBLY_VERBOSE fprintf(stderr, "%s %u ", acmod_set_id2name(omdef->acmod_set, p), s); #endif bb = allphones ? 0 : b; odefn->state[s] = tied_state(&tree[bb][s]->node[0], b, l, r, wp, pset); #ifdef HORRIBLY_VERBOSE fprintf(stderr, "\t-> %u\n", odefn->state[s]); fprintf(stderr, "\n"); #endif } } } if (model_def_write(omdef, cmd_ln_str("-omoddeffn")) != S3_SUCCESS) { return 1; } return 0; }
int main(int argc, char *argv[]) { model_def_t *mdef; model_def_entry_t *defn; uint32 n_defn; uint32 *cluster_offset; uint32 max_int; uint32 *state_of; uint32 max_state; uint32 sstate; int32 i; uint32 j; uint32 n_base_phone; acmod_id_t base; acmod_id_t p; float32 ***out; uint32 **smap; char comment[4192]; time_t t; parse_cmd_ln(argc, argv); printf("%s(%d): Reading model definition file %s\n", __FILE__, __LINE__, (const char *)cmd_ln_access("-moddeffn")); if (model_def_read(&mdef, cmd_ln_access("-moddeffn")) != S3_SUCCESS) { exit(1); } defn = mdef->defn; n_defn = mdef->n_defn; printf("%s(%d): %d models defined\n", __FILE__, __LINE__, n_defn); smap = ckd_calloc(n_defn, sizeof(uint32 *)); n_base_phone = acmod_set_n_ci(mdef->acmod_set); cluster_offset = ckd_calloc(n_base_phone+1, sizeof(uint32)); max_int = 0; --max_int; /* underflow offset values to max value */ for (i = 0; i < n_base_phone; i++) { cluster_offset[i] = max_int; } for (i = 0, max_state = 0; i < n_defn; i++) { for (j = 0; j < defn[i].n_state; j++) { sstate = defn[i].state[j]; if ((sstate != TYING_NON_EMITTING) && (defn[i].state[j] > max_state)) max_state = defn[i].state[j]; } } /* record the total # of senones */ cluster_offset[n_base_phone] = max_state+1; state_of = ckd_calloc(max_state+1, sizeof(uint32)); for (i = 0; i <= max_state; i++) state_of[i] = NO_STATE; for (i = 0; i < n_defn; i++) { p = defn[i].p; base = acmod_set_base_phone(mdef->acmod_set, defn[i].p); smap[i] = defn[i].state; for (j = 0; j < defn[i].n_state; j++) { sstate = defn[i].state[j]; if (sstate != TYING_NON_EMITTING) { if (state_of[sstate] == NO_STATE) state_of[sstate] = j; else if (state_of[sstate] != j) { printf("%s %d appears as %d%s and %d%s model states\n", acmod_set_id2name(mdef->acmod_set, acmod_set_base_phone(mdef->acmod_set, defn[i].p)), sstate, state_of[sstate], ord_suff(state_of[sstate]), j, ord_suff(j)); } if ((p != base) && (cluster_offset[base] > sstate)) { cluster_offset[base] = sstate; } } } } /* any untouched CLUSTER_OFFSET's implies a base phone without any CD states. So offset is same as next one */ for (i = (n_base_phone - 1); i >= 0 ; i--) { if (cluster_offset[i] == max_int) cluster_offset[i] = cluster_offset[i+1]; } fflush(stdout); for (i = 0; i < n_base_phone; i++) { if (cluster_offset[i] != max_int) { fprintf(stderr, "%s(%d): %s offset %d\n", __FILE__, __LINE__, acmod_set_id2name(mdef->acmod_set, i), cluster_offset[i]); } else { fprintf(stderr, "%s(%d): %s <no CD states>\n", __FILE__, __LINE__, acmod_set_id2name(mdef->acmod_set, i)); } } fflush(stderr); printf("%s(%d): Reading senone weights in %s with floor %e\n", __FILE__, __LINE__, (const char *)cmd_ln_access("-hmmdir"), *(float32 *)cmd_ln_access("-floor")); out = s2_read_seno_3(mdef->acmod_set, cluster_offset, cmd_ln_access("-hmmdir"), (*(int32 *)cmd_ln_access("-ci2cd") ? NULL : smap), *(float32 *)cmd_ln_access("-floor"), state_of); t = time(NULL); sprintf(comment, "Generated on %s\n\tmoddeffn: %s\n\tfloor: %e\n\thmmdir: %s\n\n\n\n\n\n\n\n\n", ctime(&t), (const char *)cmd_ln_access("-moddeffn"), *(float32 *)cmd_ln_access("-floor"), (const char *)cmd_ln_access("-hmmdir")); fflush(stdout); fprintf(stderr, "%s(%d): writing %s\n", __FILE__, __LINE__, (const char *)cmd_ln_access("-mixwfn")); fflush(stderr); if (s3mixw_write(cmd_ln_access("-mixwfn"), out, cluster_offset[n_base_phone], /* total # states */ S2_N_FEATURE, S2_N_CODEWORD) != S3_SUCCESS) { fflush(stdout); fprintf(stderr, "%s(%d): couldn't write mixture weight file\n", __FILE__, __LINE__); perror(cmd_ln_access("-mixwfn")); fflush(stderr); } ckd_free(state_of); ckd_free(cluster_offset); return 0; }
/* Converts the state map, smap, into a global one-to-one mapping from model states onto consecutive integers from 0 to some max */ int s2_convert_smap_to_global(acmod_set_t *acmod_set, uint32 **smap, uint32 **out_state_of, uint32 *cluster_size) { uint32 i; uint32 t; uint32 offset; uint32 n_ci = acmod_set_n_ci(acmod_set); uint32 n = acmod_set_n_multi(acmod_set) + n_ci; acmod_id_t b; acmod_id_t id; uint32 state; uint32 *state_of; uint32 ci_seno; offset = n_ci * (S2_N_STATE-1); E_INFO("|CI states| == %d\n", offset); /* convert the cluster_size matrix into an offset table */ for (i = 0; i < n_ci; i++) { t = cluster_size[i]; cluster_size[i] = offset; offset += t; E_INFO("|%s| == %d (%d)\n", acmod_set_id2name(acmod_set, i), t, offset); } cluster_size[i] = offset; /* total # of states */ state_of = ckd_calloc(offset, sizeof(uint32)); /* map the ci phones to unshared distribution */ for (id = 0; id < n_ci; id++) { for (state = 0; state < S2_N_STATE-1; state++) { ci_seno = id * (S2_N_STATE-1) + state; smap[id][state] = ci_seno; state_of[ci_seno] = state; } } /* use the ci phone offsets to convert ci phone relative mappings to a global one-to-one mapping onto consecutive integers from 0 to some max */ for (; id < n; id++) { for (state = 0; state < S2_N_STATE-1; state++) { if (smap[id][state] == TYING_NO_ID) { uint32 base_id; base_id = acmod_set_base_phone(acmod_set, id); E_WARN("%s<%d> is unmapped, approximating with CI state <%d>.\n", acmod_set_id2name(acmod_set, id), state, state); smap[id][state] = smap[base_id][state]; /* no state_of[] assignment need bee done since it was done above */ } else { b = acmod_set_base_phone(acmod_set, id); smap[id][state] += cluster_size[b]; state_of[ smap[id][state] ] = state; } } } *out_state_of = state_of; return S3_SUCCESS; }