/* FIXME: Somewhat the same as the above function, needs refactoring */ int ps_alignment_populate_ci(ps_alignment_t *al) { dict2pid_t *d2p; dict_t *dict; bin_mdef_t *mdef; int i; /* Clear phone and state sequences. */ ps_alignment_vector_empty(&al->sseq); ps_alignment_vector_empty(&al->state); /* For each word, expand to phones/senone sequences. */ d2p = al->d2p; dict = d2p->dict; mdef = d2p->mdef; for (i = 0; i < al->word.n_ent; ++i) { ps_alignment_entry_t *went = al->word.seq + i; ps_alignment_entry_t *sent; int wid = went->id.wid; int len = dict_pronlen(dict, wid); int j; for (j = 0; j < len; ++j) { if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_pron(dict, wid, j); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid); oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } } /* For each senone sequence, expand to senones. (we could do this * nested above but this makes it more clear and easier to * refactor) */ for (i = 0; i < al->sseq.n_ent; ++i) { ps_alignment_entry_t *pent = al->sseq.seq + i; ps_alignment_entry_t *sent; int j; for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { E_ERROR("Failed to add state entry!\n"); return -1; } sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); oe_assert(sent->id.senid != BAD_SENID); sent->start = pent->start; sent->duration = pent->duration; sent->parent = i; if (j == 0) pent->child = (uint16)(sent - al->state.seq); } } return 0; }
bin_mdef_t * bin_mdef_read(cmd_ln_t *config, const char *filename) { bin_mdef_t *m; FILE *fh; size_t tree_start; int32 val, i, swap, pos, end; int32 *sseq_size; int do_mmap; /* Try to read it as text first. */ if ((m = bin_mdef_read_text(config, filename)) != NULL) return m; E_INFO("Reading binary model definition: %s\n", filename); if ((fh = fopen(filename, "rb")) == NULL) return NULL; if (fread(&val, 4, 1, fh) != 1) { fclose(fh); E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n", filename); return NULL; } swap = 0; if (val == BIN_MDEF_OTHER_ENDIAN) { swap = 1; E_INFO("Must byte-swap %s\n", filename); } if (fread(&val, 4, 1, fh) != 1) { fclose(fh); E_ERROR_SYSTEM("Failed to read version from %s\n", filename); return NULL; } if (swap) SWAP_INT32(&val); if (val > BIN_MDEF_FORMAT_VERSION) { E_ERROR("File format version %d for %s is newer than library\n", val, filename); fclose(fh); return NULL; } if (fread(&val, 4, 1, fh) != 1) { fclose(fh); E_ERROR_SYSTEM("Failed to read header length from %s\n", filename); return NULL; } if (swap) SWAP_INT32(&val); /* Skip format descriptor. */ fseek(fh, val, SEEK_CUR); /* Finally allocate it. */ m = ckd_calloc(1, sizeof(*m)); m->refcnt = 1; /* Check these, to make gcc/glibc shut up. */ #define FREAD_SWAP32_CHK(dest) \ if (fread((dest), 4, 1, fh) != 1) { \ fclose(fh); \ ckd_free(m); \ E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \ return NULL; \ } \ if (swap) SWAP_INT32(dest); FREAD_SWAP32_CHK(&m->n_ciphone); FREAD_SWAP32_CHK(&m->n_phone); FREAD_SWAP32_CHK(&m->n_emit_state); FREAD_SWAP32_CHK(&m->n_ci_sen); FREAD_SWAP32_CHK(&m->n_sen); FREAD_SWAP32_CHK(&m->n_tmat); FREAD_SWAP32_CHK(&m->n_sseq); FREAD_SWAP32_CHK(&m->n_ctx); FREAD_SWAP32_CHK(&m->n_cd_tree); FREAD_SWAP32_CHK(&m->sil); /* CI names are first in the file. */ m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname)); /* Decide whether to read in the whole file or mmap it. */ do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE; if (swap) { E_WARN("-mmap specified, but mdef is other-endian. Will not memory-map.\n"); do_mmap = FALSE; } /* Actually try to mmap it. */ if (do_mmap) { m->filemap = mmio_file_read(filename); if (m->filemap == NULL) do_mmap = FALSE; } pos = ftell(fh); if (do_mmap) { /* Get the base pointer from the memory map. */ m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos; /* Success! */ m->alloc_mode = BIN_MDEF_ON_DISK; } else { /* Read everything into memory. */ m->alloc_mode = BIN_MDEF_IN_MEMORY; fseek(fh, 0, SEEK_END); end = ftell(fh); fseek(fh, pos, SEEK_SET); m->ciname[0] = ckd_malloc(end - pos); if (fread(m->ciname[0], 1, end - pos, fh) != end - pos) E_FATAL("Failed to read %d bytes of data from %s\n", end - pos, filename); } for (i = 1; i < m->n_ciphone; ++i) m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1; /* Skip past the padding. */ tree_start = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0]; tree_start = (tree_start + 3) & ~3; m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start); if (swap) { for (i = 0; i < m->n_cd_tree; ++i) { SWAP_INT16(&m->cd_tree[i].ctx); SWAP_INT16(&m->cd_tree[i].n_down); SWAP_INT32(&m->cd_tree[i].c.down); } } m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree); if (swap) { for (i = 0; i < m->n_phone; ++i) { SWAP_INT32(&m->phone[i].ssid); SWAP_INT32(&m->phone[i].tmat); } } sseq_size = (int32 *) (m->phone + m->n_phone); if (swap) SWAP_INT32(sseq_size); m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq)); m->sseq[0] = (uint16 *) (sseq_size + 1); if (swap) { for (i = 0; i < *sseq_size; ++i) SWAP_INT16(m->sseq[0] + i); } if (m->n_emit_state) { for (i = 1; i < m->n_sseq; ++i) m->sseq[i] = m->sseq[0] + i * m->n_emit_state; } else { m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size); for (i = 1; i < m->n_sseq; ++i) m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1]; } /* Now build the CD-to-CI mappings using the senone sequences. * This is the only really accurate way to do it, though it is * still inaccurate in the case of heterogeneous topologies or * cross-state tying. */ m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen)); m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap)); /* Default mappings (identity, none) */ for (i = 0; i < m->n_ci_sen; ++i) m->cd2cisen[i] = i; for (; i < m->n_sen; ++i) m->cd2cisen[i] = -1; for (i = 0; i < m->n_sen; ++i) m->sen2cimap[i] = -1; for (i = 0; i < m->n_phone; ++i) { int32 j, ssid = m->phone[i].ssid; for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) { int s = bin_mdef_sseq2sen(m, ssid, j); int ci = bin_mdef_pid2ci(m, i); /* Take the first one and warn if we have cross-state tying. */ if (m->sen2cimap[s] == -1) m->sen2cimap[s] = ci; if (m->sen2cimap[s] != ci) E_WARN ("Senone %d is shared between multiple base phones\n", s); if (j > bin_mdef_n_emit_state_phone(m, ci)) E_WARN("CD phone %d has fewer states than CI phone %d\n", i, ci); else m->cd2cisen[s] = bin_mdef_sseq2sen(m, m->phone[ci].ssid, j); } } /* Set the silence phone. */ m->sil = bin_mdef_ciphone_id(m, S3_SILENCE_CIPHONE); E_INFO ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n", m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state, m->n_ci_sen, m->n_sen, m->n_sseq); fclose(fh); return m; }
int ps_alignment_populate(ps_alignment_t *al) { dict2pid_t *d2p; dict_t *dict; bin_mdef_t *mdef; int i, lc; /* Clear phone and state sequences. */ ps_alignment_vector_empty(&al->sseq); ps_alignment_vector_empty(&al->state); /* For each word, expand to phones/senone sequences. */ d2p = al->d2p; dict = d2p->dict; mdef = d2p->mdef; lc = bin_mdef_silphone(mdef); for (i = 0; i < al->word.n_ent; ++i) { ps_alignment_entry_t *went = al->word.seq + i; ps_alignment_entry_t *sent; int wid = went->id.wid; int len = dict_pronlen(dict, wid); int j, rc; if (i < al->word.n_ent - 1) rc = dict_first_phone(dict, al->word.seq[i+1].id.wid); else rc = bin_mdef_silphone(mdef); /* First phone. */ if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_first_phone(dict, wid); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->start = went->start; sent->duration = went->duration; sent->parent = i; went->child = (uint16)(sent - al->sseq.seq); if (len == 1) sent->id.pid.ssid = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc); else sent->id.pid.ssid = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid, dict_second_phone(dict, wid), lc); oe_assert(sent->id.pid.ssid != BAD_SSID); /* Internal phones. */ for (j = 1; j < len - 1; ++j) { if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_pron(dict, wid, j); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->id.pid.ssid = dict2pid_internal(d2p, wid, j); oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } /* Last phone. */ if (j < len) { xwdssid_t *rssid; oe_assert(j == len - 1); if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_last_phone(dict, wid); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); rssid = dict2pid_rssid(d2p, sent->id.pid.cipid, dict_second_last_phone(dict, wid)); sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]]; oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } /* Update lc. Could just use sent->id.pid.cipid here but that * seems needlessly obscure. */ lc = dict_last_phone(dict, wid); } /* For each senone sequence, expand to senones. (we could do this * nested above but this makes it more clear and easier to * refactor) */ for (i = 0; i < al->sseq.n_ent; ++i) { ps_alignment_entry_t *pent = al->sseq.seq + i; ps_alignment_entry_t *sent; int j; for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { E_ERROR("Failed to add state entry!\n"); return -1; } sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); oe_assert(sent->id.senid != BAD_SENID); sent->start = pent->start; sent->duration = pent->duration; sent->parent = i; if (j == 0) pent->child = (uint16)(sent - al->state.seq); } } return 0; }