ps_search_t * state_align_search_init(const char *name, cmd_ln_t *config, acmod_t *acmod, ps_alignment_t *al) { state_align_search_t *sas; ps_alignment_iter_t *itor; hmm_t *hmm; sas = ckd_calloc(1, sizeof(*sas)); ps_search_init(ps_search_base(sas), &state_align_search_funcs, PS_SEARCH_TYPE_STATE_ALIGN, name, config, acmod, al->d2p->dict, al->d2p); sas->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), acmod->tmat->tp, NULL, acmod->mdef->sseq); if (sas->hmmctx == NULL) { ckd_free(sas); return NULL; } sas->al = al; /* Generate HMM vector from phone level of alignment. */ sas->n_phones = ps_alignment_n_phones(al); sas->n_emit_state = ps_alignment_n_states(al); sas->hmms = ckd_calloc(sas->n_phones, sizeof(*sas->hmms)); for (hmm = sas->hmms, itor = ps_alignment_phones(al); itor; ++hmm, itor = ps_alignment_iter_next(itor)) { ps_alignment_entry_t *ent = ps_alignment_iter_get(itor); hmm_init(sas->hmmctx, hmm, FALSE, ent->id.pid.ssid, ent->id.pid.tmatid); } return ps_search_base(sas); }
static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) { phone_loop_search_t *pls = (phone_loop_search_t *)search; cmd_ln_t *config = ps_search_config(search); acmod_t *acmod = ps_search_acmod(search); int i; /* Free old dict2pid, dict, if necessary. */ ps_search_base_reinit(search, dict, d2p); /* Initialize HMM context. */ if (pls->hmmctx) hmm_context_free(pls->hmmctx); pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), acmod->tmat->tp, NULL, acmod->mdef->sseq); if (pls->hmmctx == NULL) return -1; /* Initialize penalty storage */ pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); pls->window = cmd_ln_int32_r(config, "-pl_window"); if (pls->penalties) ckd_free(pls->penalties); pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties)); if (pls->pen_buf) ckd_free_2d(pls->pen_buf); pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf)); /* Initialize phone HMMs. */ if (pls->hmms) { for (i = 0; i < pls->n_phones; ++i) hmm_deinit((hmm_t *)&pls->hmms[i]); ckd_free(pls->hmms); } pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms)); for (i = 0; i < pls->n_phones; ++i) { hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i], FALSE, bin_mdef_pid2ssid(acmod->mdef, i), bin_mdef_pid2tmatid(acmod->mdef, i)); } pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight"); pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT; pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT; pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT; E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", pls->beam, pls->pbeam, pls->pip); return 0; }
void dict2pid_dump(FILE * fp, dict2pid_t * d2p) { int32 w, p, pronlen; int32 i, j, b, l, r; bin_mdef_t *mdef = d2p->mdef; dict_t *dict = d2p->dict; fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n"); for (w = 0; w < dict_size(dict); w++) { fprintf(fp, "%30s ", dict_wordstr(dict, w)); pronlen = dict_pronlen(dict, w); for (p = 0; p < pronlen; p++) fprintf(fp, " %5d", dict2pid_internal(d2p, w, p)); fprintf(fp, "\n"); } fprintf(fp, "#\n"); fprintf(fp, "# LDIPH_LC (b r l ssid)\n"); for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) { for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { if (IS_S3SSID(d2p->ldiph_lc[b][r][l])) fprintf(fp, "%6s %6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) r), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]); /* RAH, ldiph_lc is returning an int32, %d expects an int16 */ } } } fprintf(fp, "#\n"); fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq); for (i = 0; i < mdef->n_sseq; i++) { fprintf(fp, "%5d ", i); for (j = 0; j < bin_mdef_n_emit_state(mdef); j++) fprintf(fp, " %5d", mdef->sseq[i][j]); fprintf(fp, "\n"); } fprintf(fp, "#\n"); fprintf(fp, "# END\n"); fflush(fp); }
int main(int argc, char *argv[]) { dict_t *dict; dict2pid_t *d2p; bin_mdef_t *mdef; glextree_t *tree; hmm_context_t *ctx; logmath_t *lmath; tmat_t *tmat; int i; TEST_ASSERT(mdef = bin_mdef_read(NULL, MODELDIR "/hmm/en_US/hub4wsj_sc_8k/mdef")); TEST_ASSERT(dict = dict_init(cmd_ln_init(NULL, NULL, FALSE, "-dict", DATADIR "/turtle.dic", "-dictcase", "no", NULL), mdef)); TEST_ASSERT(d2p = dict2pid_build(mdef, dict)); lmath = logmath_init(1.0001, 0, TRUE); tmat = tmat_init(MODELDIR "/hmm/en_US/hub4wsj_sc_8k/transition_matrices", lmath, 1e-5, TRUE); ctx = hmm_context_init(bin_mdef_n_emit_state(mdef), tmat->tp, NULL, mdef->sseq); TEST_ASSERT(tree = glextree_build(ctx, dict, d2p, NULL, NULL)); /* Check that a path exists for all dictionary words. */ for (i = 0; i < dict_size(dict); ++i) TEST_ASSERT(glextree_has_word(tree, i)); dict_free(dict); dict2pid_free(d2p); bin_mdef_free(mdef); tmat_free(tmat); hmm_context_free(ctx); glextree_free(tree); return 0; }
/* FIXME: Somewhat the same as the above function, needs refactoring */ int ps_alignment_populate_ci(ps_alignment_t *al) { dict2pid_t *d2p; dict_t *dict; bin_mdef_t *mdef; int i; /* Clear phone and state sequences. */ ps_alignment_vector_empty(&al->sseq); ps_alignment_vector_empty(&al->state); /* For each word, expand to phones/senone sequences. */ d2p = al->d2p; dict = d2p->dict; mdef = d2p->mdef; for (i = 0; i < al->word.n_ent; ++i) { ps_alignment_entry_t *went = al->word.seq + i; ps_alignment_entry_t *sent; int wid = went->id.wid; int len = dict_pronlen(dict, wid); int j; for (j = 0; j < len; ++j) { if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_pron(dict, wid, j); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid); oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } } /* For each senone sequence, expand to senones. (we could do this * nested above but this makes it more clear and easier to * refactor) */ for (i = 0; i < al->sseq.n_ent; ++i) { ps_alignment_entry_t *pent = al->sseq.seq + i; ps_alignment_entry_t *sent; int j; for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { E_ERROR("Failed to add state entry!\n"); return -1; } sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); oe_assert(sent->id.senid != BAD_SENID); sent->start = pent->start; sent->duration = pent->duration; sent->parent = i; if (j == 0) pent->child = (uint16)(sent - al->state.seq); } } return 0; }
int ps_alignment_populate(ps_alignment_t *al) { dict2pid_t *d2p; dict_t *dict; bin_mdef_t *mdef; int i, lc; /* Clear phone and state sequences. */ ps_alignment_vector_empty(&al->sseq); ps_alignment_vector_empty(&al->state); /* For each word, expand to phones/senone sequences. */ d2p = al->d2p; dict = d2p->dict; mdef = d2p->mdef; lc = bin_mdef_silphone(mdef); for (i = 0; i < al->word.n_ent; ++i) { ps_alignment_entry_t *went = al->word.seq + i; ps_alignment_entry_t *sent; int wid = went->id.wid; int len = dict_pronlen(dict, wid); int j, rc; if (i < al->word.n_ent - 1) rc = dict_first_phone(dict, al->word.seq[i+1].id.wid); else rc = bin_mdef_silphone(mdef); /* First phone. */ if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_first_phone(dict, wid); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->start = went->start; sent->duration = went->duration; sent->parent = i; went->child = (uint16)(sent - al->sseq.seq); if (len == 1) sent->id.pid.ssid = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc); else sent->id.pid.ssid = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid, dict_second_phone(dict, wid), lc); oe_assert(sent->id.pid.ssid != BAD_SSID); /* Internal phones. */ for (j = 1; j < len - 1; ++j) { if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_pron(dict, wid, j); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->id.pid.ssid = dict2pid_internal(d2p, wid, j); oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } /* Last phone. */ if (j < len) { xwdssid_t *rssid; oe_assert(j == len - 1); if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_last_phone(dict, wid); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); rssid = dict2pid_rssid(d2p, sent->id.pid.cipid, dict_second_last_phone(dict, wid)); sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]]; oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } /* Update lc. Could just use sent->id.pid.cipid here but that * seems needlessly obscure. */ lc = dict_last_phone(dict, wid); } /* For each senone sequence, expand to senones. (we could do this * nested above but this makes it more clear and easier to * refactor) */ for (i = 0; i < al->sseq.n_ent; ++i) { ps_alignment_entry_t *pent = al->sseq.seq + i; ps_alignment_entry_t *sent; int j; for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { E_ERROR("Failed to add state entry!\n"); return -1; } sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); oe_assert(sent->id.senid != BAD_SENID); sent->start = pent->start; sent->duration = pent->duration; sent->parent = i; if (j == 0) pent->child = (uint16)(sent - al->state.seq); } } return 0; }