Beispiel #1
0
int
s2_write_hmm(float32 ***tmat,
	     acmod_set_t *acmod_set,
	     const char *out_dir_name)
{ 
    uint32 n_ci;
    uint32 i;
    int err;

    n_ci = acmod_set_n_ci(acmod_set);

    E_INFO("Writing %d tied CI transition matrices to %s\n",
	   n_ci, out_dir_name);

    err = 0;
    for (i = 0; i < n_ci; i++) {
	if (put_sdm(tmat[i],
		    out_dir_name,
		    acmod_set_id2name(acmod_set, i)) != S3_SUCCESS)
	    err = 1;
	
#ifdef S2_WRITE_HMM_VERBOSE
	if (!err) {
	    print_tmat(stdout, tmat[i], S2_N_STATE);
	    fflush(stdout);
	}
#endif
    }
    
    if (!err) {
	return S3_SUCCESS;
    }
    else {
	return S3_ERROR;
    }
}
Beispiel #2
0
int32
model_def_write(model_def_t *mdef,
		const char *fn)
{
    FILE *fp;
    uint32 n_ci;
    uint32 n_acmod;
    acmod_set_t *acmod_set;
    const char **attrib;
    acmod_id_t b;
    acmod_id_t l;
    acmod_id_t r;
    word_posn_t wp;
    uint32 p;
    char *wp2c = WORD_POSN_CHAR_MAP;
    uint32 i;

    fp = fopen(fn, "w");
    if (fp == NULL) {
	E_ERROR_SYSTEM("Unable to open %s for writing", fn);
	return S3_ERROR;
    }

    acmod_set = mdef->acmod_set;

    fprintf(fp, "%s\n", MODEL_DEF_VERSION);
    n_ci = acmod_set_n_ci(acmod_set);
    fprintf(fp, "%u n_base\n", n_ci);
    fprintf(fp, "%u n_tri\n", acmod_set_n_multi(acmod_set));
    fprintf(fp, "%u n_state_map\n", mdef->n_total_state);
    fprintf(fp, "%u n_tied_state\n", mdef->n_tied_state);
    fprintf(fp, "%u n_tied_ci_state\n", mdef->n_tied_ci_state);
    fprintf(fp, "%u n_tied_tmat\n", mdef->n_tied_tmat);
    fprintf(fp, "#\n# Columns definitions\n");
    fprintf(fp, "#%4s %3s %3s %1s %6s %4s %s\n",
	    "base", "lft", "rt", "p", "attrib", "tmat",
	    "     ... state id's ...");

    n_acmod = acmod_set_n_acmod(acmod_set);
    for (p = 0; p < n_ci; p++) {
	fprintf(fp, "%5s %3s %3s %1s",
		acmod_set_id2name(acmod_set, p),
		"-", "-", "-");

	attrib = acmod_set_attrib(acmod_set, p);
	if ((attrib == NULL) || (attrib[0] == NULL)) {
	    fprintf(fp, " %6s", "n/a");
	}
	else {
	    fprintf(fp, " %6s", attrib[0]);

	    for (i = 1; attrib[i]; i++) {
		fprintf(fp, ",%s", attrib[i]);
	    }
	}
	fprintf(fp, " %4d", mdef->defn[p].tmat);

	for (i = 0; i < mdef->defn[p].n_state; i++) {
	    if (mdef->defn[p].state[i] == NO_ID) {
		fprintf(fp, " N");
	    }
	    else {
		fprintf(fp, " %6u", mdef->defn[p].state[i]);
	    }
	}
	fprintf(fp, "\n");
    }

    for (; p < n_acmod; p++) {
	acmod_set_id2tri(acmod_set,
			 &b, &l, &r, &wp,
			 p);
	
	fprintf(fp, "%5s %3s %3s %c",
		acmod_set_id2name(acmod_set, b),
		acmod_set_id2name(acmod_set, l),
		acmod_set_id2name(acmod_set, r),
		wp2c[(uint32)wp]);

	attrib = acmod_set_attrib(acmod_set, p);
	if ((attrib == NULL) || (attrib[0] == NULL)) {
	    fprintf(fp, " %6s", "n/a");
	}
	else {
	    fprintf(fp, " %6s", attrib[0]);

	    for (i = 1; attrib[i]; i++) {
		fprintf(fp, ",%s", attrib[i]);
	    }
	}
	fprintf(fp, " %4d", mdef->defn[p].tmat);

	for (i = 0; i < mdef->defn[p].n_state; i++) {
	    if (mdef->defn[p].state[i] == NO_ID) {
		fprintf(fp, " N");
	    }
	    else {
		fprintf(fp, " %6u", mdef->defn[p].state[i]);
	    }
	}
	fprintf(fp, "\n");
    }

    fclose(fp);

    return S3_SUCCESS;
}
Beispiel #3
0
int
init(model_def_t **out_imdef,
     pset_t **out_pset,
     uint32 *out_n_pset,
     dtree_t ****out_tree,
     uint32 *out_n_seno)
{
    model_def_t *imdef;
    uint32 p, s;
    uint32 n_ci, n_state;
    char fn[MAXPATHLEN+1];
    const char *a_fn;
    FILE *fp;
    dtree_t ***tree, *tr;
    pset_t *pset;
    uint32 n_pset;
    uint32 n_seno;
    const char *treedir;
    uint32 ts_id;
    int allphones;

    a_fn = cmd_ln_str("-imoddeffn");
    if (a_fn == NULL)
	E_FATAL("Specify -imoddeffn\n");
    if (model_def_read(&imdef, a_fn) != S3_SUCCESS) {
	return S3_ERROR;
    }
    *out_imdef = imdef;

    a_fn = cmd_ln_str("-psetfn");
    E_INFO("Reading: %s\n", a_fn);
    *out_pset = pset = read_pset_file(a_fn, imdef->acmod_set, &n_pset);
    *out_n_pset = n_pset;

    allphones = cmd_ln_int32("-allphones");
    if (allphones)
      n_ci = 1;
    else
      n_ci = acmod_set_n_ci(imdef->acmod_set);

    treedir = cmd_ln_str("-treedir");
    tree = (dtree_t ***)ckd_calloc(n_ci, sizeof(dtree_t **));
    *out_tree = tree;

    ts_id = imdef->n_tied_ci_state;
    for (p = 0, n_seno = 0; p < n_ci; p++) {
	if (allphones || !acmod_set_has_attrib(imdef->acmod_set, p, "filler")) {
	    const char *pname;

	    if (allphones) {
		n_state = imdef->defn[acmod_set_n_ci(imdef->acmod_set)].n_state;
		pname = "ALLPHONES";
	    }
	    else {
		n_state = imdef->defn[p].n_state;
		pname = acmod_set_id2name(imdef->acmod_set, p);
	    }
	    tree[p] = (dtree_t **)ckd_calloc(n_state, sizeof(dtree_t *));

	    for (s = 0; s < n_state-1; s++) {
		E_INFO("%s-%u: offset %u\n",
		       pname, s, ts_id);

		sprintf(fn, "%s/%s-%u.dtree",
			treedir, pname, s);
		fp = fopen(fn, "r");
		if (fp == NULL) {
		    E_FATAL_SYSTEM("Unable to open %s for reading", fn);
		}
		tree[p][s] = tr = read_final_tree(fp, pset, n_pset);

		label_leaves(&tr->node[0], &ts_id);

		fclose(fp);

		n_seno += cnt_leaf(&tr->node[0]);
	    }
	}
    }

    assert(n_seno == (ts_id - imdef->n_tied_ci_state));

    E_INFO("n_seno= %u\n", ts_id);

    *out_n_seno = n_seno;

    return S3_SUCCESS;
}
Beispiel #4
0
int
main(int argc, char *argv[])
{
    model_def_t *imdef;
    model_def_t *omdef;
    pset_t *pset;
    uint32 n_pset;
    dtree_t ***tree;
    uint32 n_seno;
    uint32 n_ci;
    uint32 n_acmod;
    uint32 p;
    uint32 s;
    model_def_entry_t *idefn, *odefn;
    acmod_id_t b, l, r;
    word_posn_t wp;
    int allphones;

    parse_cmd_ln(argc, argv);

    if (init(&imdef, &pset, &n_pset, &tree, &n_seno) != S3_SUCCESS)
	return 1;

    omdef = (model_def_t *)ckd_calloc(1, sizeof(model_def_t));

    omdef->acmod_set = imdef->acmod_set; /* same set of acoustic models */

    omdef->n_total_state = imdef->n_total_state;

    omdef->n_tied_ci_state = imdef->n_tied_ci_state;
    omdef->n_tied_state = imdef->n_tied_ci_state + n_seno;

    omdef->n_tied_tmat = imdef->n_tied_tmat;

    omdef->defn = (model_def_entry_t *)ckd_calloc(imdef->n_defn,
						  sizeof(model_def_entry_t));

    /*
     * Define the context-independent models
     */
    n_ci = acmod_set_n_ci(imdef->acmod_set);
    for (p = 0; p < n_ci; p++) {
	idefn = &imdef->defn[p];
	odefn = &omdef->defn[p];
	
	odefn->p    = idefn->p;
	odefn->tmat = idefn->tmat;

	odefn->state = ckd_calloc(idefn->n_state, sizeof(uint32));
	odefn->n_state = idefn->n_state;

	for (s = 0; s < idefn->n_state; s++) {
	    if (idefn->state[s] == NO_ID)
		odefn->state[s] = NO_ID;
	    else {
		odefn->state[s] = idefn->state[s];
	    }
	}
    }

    /*
     * Define the rest of the models
     */
    allphones = cmd_ln_int32("-allphones");
    n_acmod = acmod_set_n_acmod(omdef->acmod_set);
    for (; p < n_acmod; p++) {
	b = acmod_set_base_phone(omdef->acmod_set, p);

	assert(p != b);

	idefn = &imdef->defn[p];
	odefn = &omdef->defn[p];

	odefn->p    = idefn->p;
	odefn->tmat = idefn->tmat;

	odefn->state = ckd_calloc(idefn->n_state, sizeof(uint32));
	odefn->n_state = idefn->n_state;
	for (s = 0; s < idefn->n_state; s++) {
	    if (idefn->state[s] == NO_ID)
		/* Non-emitting state */
		odefn->state[s] = NO_ID;
	    else {
		uint32 bb;

		/* emitting state: find the tied state */
		acmod_set_id2tri(omdef->acmod_set,
				 &b, &l, &r, &wp,
				 p);
#ifdef HORRIBLY_VERBOSE
		fprintf(stderr, "%s %u ",
			acmod_set_id2name(omdef->acmod_set, p), s);
#endif

		bb = allphones ? 0 : b;
		odefn->state[s] = tied_state(&tree[bb][s]->node[0],
					     b, l, r, wp,
					     pset);

#ifdef HORRIBLY_VERBOSE
		fprintf(stderr, "\t-> %u\n", odefn->state[s]);

		fprintf(stderr, "\n");
#endif
	    }
	}
    }

    if (model_def_write(omdef, cmd_ln_str("-omoddeffn")) != S3_SUCCESS) {
	return 1;
    }

    return 0;
}
Beispiel #5
0
int
main(int argc, char *argv[])
{
    model_def_t *mdef;
    model_def_entry_t *defn;
    uint32 n_defn;
    uint32 *cluster_offset;
    uint32 max_int;
    uint32 *state_of;
    uint32 max_state;
    uint32 sstate;
    int32 i;
    uint32 j;
    uint32 n_base_phone;
    acmod_id_t base;
    acmod_id_t p;
    float32 ***out;
    uint32 **smap;
    char comment[4192];
    time_t t;

    parse_cmd_ln(argc, argv);
    
    printf("%s(%d): Reading model definition file %s\n",
	   __FILE__, __LINE__, (const char *)cmd_ln_access("-moddeffn"));
    
    if (model_def_read(&mdef, cmd_ln_access("-moddeffn")) != S3_SUCCESS) {
	exit(1);
    }

    defn = mdef->defn;
    n_defn = mdef->n_defn;

    printf("%s(%d): %d models defined\n",
	   __FILE__, __LINE__, n_defn);

    smap = ckd_calloc(n_defn, sizeof(uint32 *));

    n_base_phone = acmod_set_n_ci(mdef->acmod_set);

    cluster_offset = ckd_calloc(n_base_phone+1, sizeof(uint32));

    max_int = 0;
    --max_int;	/* underflow offset values to max value */
    for (i = 0; i < n_base_phone; i++) {
	cluster_offset[i] = max_int;	
    }

    for (i = 0, max_state = 0; i < n_defn; i++) {
	for (j = 0; j < defn[i].n_state; j++) {
	    sstate = defn[i].state[j];
	    
	    if ((sstate != TYING_NON_EMITTING) &&
		(defn[i].state[j] > max_state)) max_state = defn[i].state[j];
	}
    }

    /* record the total # of senones */
    cluster_offset[n_base_phone] = max_state+1;

    state_of = ckd_calloc(max_state+1, sizeof(uint32));

    for (i = 0; i <= max_state; i++)
	state_of[i] = NO_STATE;
    
    for (i = 0; i < n_defn; i++) {
	p = defn[i].p;
	base = acmod_set_base_phone(mdef->acmod_set, defn[i].p);

	smap[i] = defn[i].state;
	
	for (j = 0; j < defn[i].n_state; j++) {
	    sstate = defn[i].state[j];

	    if (sstate != TYING_NON_EMITTING) {
		if (state_of[sstate] == NO_STATE)
		    state_of[sstate] = j;
		else if (state_of[sstate] != j) {
		    printf("%s %d appears as %d%s and %d%s model states\n",
			   acmod_set_id2name(mdef->acmod_set, acmod_set_base_phone(mdef->acmod_set, defn[i].p)),
			   sstate,
			   state_of[sstate],
			   ord_suff(state_of[sstate]),
			   j,
			   ord_suff(j));
		}

		if ((p != base) && (cluster_offset[base] > sstate)) {
		    cluster_offset[base] = sstate;
		}
	    }
	}
    }
    
    /* any untouched CLUSTER_OFFSET's implies a base phone
       without any CD states.  So offset is same as next
       one */
    for (i = (n_base_phone - 1); i >= 0 ; i--) {
	if (cluster_offset[i] == max_int)
	    cluster_offset[i] = cluster_offset[i+1];
    }

    fflush(stdout);
    for (i = 0; i < n_base_phone; i++) {
	if (cluster_offset[i] != max_int) {
	    fprintf(stderr, "%s(%d): %s offset %d\n",
		    __FILE__, __LINE__,
		    acmod_set_id2name(mdef->acmod_set, i), cluster_offset[i]);
	}
	else {
	    fprintf(stderr, "%s(%d): %s <no CD states>\n",
		    __FILE__, __LINE__,
		    acmod_set_id2name(mdef->acmod_set, i));
	}
    }
    fflush(stderr);

    printf("%s(%d): Reading senone weights in %s with floor %e\n",
	   __FILE__, __LINE__, (const char *)cmd_ln_access("-hmmdir"),
	   *(float32 *)cmd_ln_access("-floor"));
    
    out = s2_read_seno_3(mdef->acmod_set, cluster_offset,
			 cmd_ln_access("-hmmdir"),
			 (*(int32 *)cmd_ln_access("-ci2cd") ? NULL : smap),
			 *(float32 *)cmd_ln_access("-floor"),
			 state_of);

    t = time(NULL);
    sprintf(comment,
	    "Generated on %s\n\tmoddeffn: %s\n\tfloor: %e\n\thmmdir: %s\n\n\n\n\n\n\n\n\n",
	    ctime(&t),
	    (const char *)cmd_ln_access("-moddeffn"),
	    *(float32 *)cmd_ln_access("-floor"),
	    (const char *)cmd_ln_access("-hmmdir"));

    fflush(stdout);
    fprintf(stderr, "%s(%d): writing %s\n",
	    __FILE__, __LINE__,
	    (const char *)cmd_ln_access("-mixwfn"));
    fflush(stderr);
    
    if (s3mixw_write(cmd_ln_access("-mixwfn"),
		     out,
		     cluster_offset[n_base_phone],	/* total # states */
		     S2_N_FEATURE,
		     S2_N_CODEWORD) != S3_SUCCESS) {
	fflush(stdout);
	fprintf(stderr, "%s(%d): couldn't write mixture weight file\n",
		__FILE__, __LINE__);
	perror(cmd_ln_access("-mixwfn"));
	fflush(stderr);
    }
		     
    ckd_free(state_of);
    ckd_free(cluster_offset);

    return 0;
}
Beispiel #6
0
/* Converts the state map, smap, into a global
   one-to-one mapping from model states onto
   consecutive integers from 0 to some max */
int
s2_convert_smap_to_global(acmod_set_t *acmod_set,
                          uint32 **smap,
                          uint32 **out_state_of,
                          uint32 *cluster_size)
{
    uint32 i;
    uint32 t;
    uint32 offset;
    uint32 n_ci = acmod_set_n_ci(acmod_set);
    uint32 n = acmod_set_n_multi(acmod_set) + n_ci;
    acmod_id_t b;
    acmod_id_t id;
    uint32 state;
    uint32 *state_of;
    uint32 ci_seno;

    offset = n_ci * (S2_N_STATE-1);

    E_INFO("|CI states| == %d\n", offset);

    /* convert the cluster_size matrix into an offset table */
    for (i = 0; i < n_ci; i++) {
        t = cluster_size[i];
        cluster_size[i] = offset;
        offset += t;

        E_INFO("|%s| == %d (%d)\n",
               acmod_set_id2name(acmod_set, i),
               t, offset);
    }

    cluster_size[i] = offset;	/* total # of states */

    state_of = ckd_calloc(offset, sizeof(uint32));

    /* map the ci phones to unshared distribution */
    for (id = 0; id < n_ci; id++) {
        for (state = 0; state < S2_N_STATE-1; state++) {
            ci_seno = id * (S2_N_STATE-1) + state;
            smap[id][state] = ci_seno;
            state_of[ci_seno] = state;
        }
    }

    /* use the ci phone offsets to convert ci phone relative
       mappings to a global one-to-one mapping onto
       consecutive integers from 0 to some max */
    for (; id < n; id++) {
        for (state = 0; state < S2_N_STATE-1; state++) {
            if (smap[id][state] == TYING_NO_ID) {
                uint32 base_id;

                base_id = acmod_set_base_phone(acmod_set, id);

                E_WARN("%s<%d> is unmapped, approximating with CI state <%d>.\n",
                       acmod_set_id2name(acmod_set, id),
                       state, state);

                smap[id][state] = smap[base_id][state];

                /* no state_of[] assignment need bee done since it was done
                   above */
            }
            else {
                b = acmod_set_base_phone(acmod_set, id);

                smap[id][state] += cluster_size[b];
                state_of[ smap[id][state] ] = state;
            }
        }
    }

    *out_state_of = state_of;

    return S3_SUCCESS;
}