Ejemplo n.º 1
0
static void
vithist_lmstate_reset(vithist_t * vh)
{
    gnode_t *lgn, *gn;
    int32 i;
    vh_lms2vh_t *lms2vh, *child;

    for (lgn = vh->lwidlist; lgn; lgn = gnode_next(lgn)) {
        i = (int32) gnode_int32(lgn);
        lms2vh = vh->lms2vh_root[i];

        for (gn = lms2vh->children; gn; gn = gnode_next(gn)) {
            child = (vh_lms2vh_t *) gnode_ptr(gn);
            ckd_free((void *) child);
        }
        glist_free(lms2vh->children);

        ckd_free((void *) lms2vh);

        vh->lms2vh_root[i] = NULL;
    }

    glist_free(vh->lwidlist);
    vh->lwidlist = NULL;
}
Ejemplo n.º 2
0
static void vithist_lmstate_dump (vithist_t *vh, kbcore_t *kbc, FILE *fp)
{
    glist_t gl;
    gnode_t *lgn, *gn;
    int32 i;
    vh_lmstate2vithist_t *lms2vh;
    mdef_t *mdef;
    lm_t *lm;
    
    mdef = kbcore_mdef (kbc);
    lm = kbcore_lm (kbc);
    
    fprintf (fp, "LMSTATE\n");
    for (lgn = vh->lwidlist; lgn; lgn = gnode_next(lgn)) {
	i = (int32) gnode_int32 (lgn);
	
	gl = vh->lmstate_root[i];
	assert (gl);
	
	for (gn = gl; gn; gn = gnode_next(gn)) {
	    lms2vh = (vh_lmstate2vithist_t *) gnode_ptr (gn);
	    
	    fprintf (fp, "\t%s.%s -> %d\n",
		     lm_wordstr(lm, i), mdef_ciphone_str (mdef, lms2vh->state), lms2vh->vhid);
	    vithist_lmstate_subtree_dump (vh, kbc, lms2vh, 1, fp);
	}
    }
    fprintf (fp, "END_LMSTATE\n");
    fflush (fp);
}
Ejemplo n.º 3
0
struct winner_t
dict_get_winner_wid(ngram_model_t *model, const char * word_grapheme, glist_t history_list, const int32 total_unigrams,
                    int word_offset)
{
    int32 current_prob = -2147483647;
    struct winner_t winner;
    int32 i = 0, j = 0;
    int nused;
    int32 ngram_order = ngram_model_get_size(model);
    int32 *history = ckd_calloc((size_t)ngram_order+1, sizeof(int32));
    gnode_t *gn;
    const char *vocab;
    const char *sub;
    int32 prob;
    unigram_t unigram;

    for (gn = history_list; gn; gn = gnode_next(gn)) {
        history[ngram_order-j] = gnode_int32(gn);
        j++;
        if (j >= ngram_order)
            break;
    }

    for (i = 0; i < total_unigrams; i++) {
        vocab = ngram_word(model, i);
        unigram  = dict_split_unigram(vocab);
        sub = word_grapheme + word_offset;
        if (dict_starts_with(unigram.word, sub)){
            prob = ngram_ng_prob(model, i, history, j, &nused);
            if (current_prob < prob) {
                current_prob = prob;
                winner.winner_wid = i;
                winner.length_match = strlen(unigram.word);
                winner.len_phoneme = strlen(unigram.phone);
            }
        }

        if (unigram.word)
            ckd_free(unigram.word);
        if (unigram.phone)
            ckd_free(unigram.phone);
    }

    if (history)
        ckd_free(history);

    return winner;
}
Ejemplo n.º 4
0
char *
dict_g2p(char const *word_grapheme, ngram_model_t *ngram_g2p_model) 
{
    char *final_phone = NULL;
    int totalh = 0;
    size_t increment = 1;
    int word_offset = 0;
    int j;
    size_t grapheme_len = 0, final_phoneme_len = 0;
    glist_t history_list = NULL;
    gnode_t *gn;
    int first = 0;
    const int32 *total_unigrams;
    struct winner_t winner;
    const char *word;
    unigram_t unigram;

    total_unigrams = ngram_model_get_counts(ngram_g2p_model);
    int32 wid_sentence = ngram_wid(ngram_g2p_model,"<s>"); // start with sentence
    history_list = glist_add_int32(history_list, wid_sentence);
    grapheme_len = strlen(word_grapheme);
    for (j = 0; j < grapheme_len; j += increment) {
        winner = dict_get_winner_wid(ngram_g2p_model, word_grapheme, history_list, *total_unigrams, word_offset);
        increment = winner.length_match;
        if (increment == 0) {
            E_ERROR("Error trying to find matching phoneme (%s) Exiting.. \n" , word_grapheme);
            return NULL;
        }
        history_list = glist_add_int32(history_list, winner.winner_wid);
        totalh = j + 1;
        word_offset += winner.length_match;
        final_phoneme_len += winner.len_phoneme;
    }

    history_list = glist_reverse(history_list);
    final_phone = ckd_calloc(1, final_phoneme_len * 2);
    for (gn = history_list; gn; gn = gnode_next(gn)) {
        if (!first) {
            first = 1;
            continue;
        }
        word = ngram_word(ngram_g2p_model, gnode_int32(gn));

        if (!word)
            continue;

        unigram  = dict_split_unigram(word);

        if (strcmp(unigram.phone, "_") == 0) {
            if (unigram.word)
                ckd_free(unigram.word);
            if (unigram.phone)
                ckd_free(unigram.phone);
            continue;
        }
        strcat(final_phone, unigram.phone);
        strcat(final_phone, " ");

        if (unigram.word)
            ckd_free(unigram.word);
        if (unigram.phone)
            ckd_free(unigram.phone);
    }

    if (history_list)
        glist_free(history_list);

    return final_phone;
}
/*
 * Convert the glist of ssids to a composite sseq id.  Return the composite ID.
 */
static s3ssid_t ssidlist2comsseq (glist_t g, mdef_t *mdef, dict2pid_t *dict2pid,
				  hash_table_t *hs,	/* For composite states */
				  hash_table_t *hp)	/* For composite senone seq */
{
    int32 i, j, n, s, ssid;
    s3senid_t **sen;
    s3senid_t *comsenid;
    gnode_t *gn;
    
    n = glist_count (g);
    if (n <= 0)
	E_FATAL("Panic: length(ssidlist)= %d\n", n);
    
    /* Space for list of senones for each state, derived from the given glist */
    sen = (s3senid_t **) ckd_calloc (mdef_n_emit_state (mdef), sizeof(s3senid_t *));
    for (i = 0; i < mdef_n_emit_state (mdef); i++) {
	sen[i] = (s3senid_t *) ckd_calloc (n+1, sizeof(s3senid_t));
	sen[i][0] = BAD_S3SENID;	/* Sentinel */
    }
    /* Space for composite senone ID for each state position */
    comsenid = (s3senid_t *) ckd_calloc (mdef_n_emit_state (mdef), sizeof(s3senid_t));
    
    for (gn = g; gn; gn = gnode_next(gn)) {
	ssid = gnode_int32 (gn);
	
	/* Expand ssid into individual states (senones); insert in sen[][] if not present */
	for (i = 0; i < mdef_n_emit_state (mdef); i++) {
	    s = mdef->sseq[ssid][i];
	    
	    for (j = 0; (IS_S3SENID(sen[i][j])) && (sen[i][j] != s); j++);
	    if (NOT_S3SENID(sen[i][j])) {
		sen[i][j] = s;
		sen[i][j+1] = BAD_S3SENID;
	    }
	}
    }
    
    /* Convert senones list for each state position into composite state */
    for (i = 0; i < mdef_n_emit_state (mdef); i++) {
	for (j = 0; IS_S3SENID(sen[i][j]); j++);
	assert (j > 0);
	
	j = hash_enter_bkey (hs, (char *)(sen[i]), j*sizeof(s3senid_t), dict2pid->n_comstate);
	if (j == dict2pid->n_comstate)
	    dict2pid->n_comstate++;	/* New composite state */
	else
	    ckd_free ((void *) sen[i]);
	
	comsenid[i] = j;
    }
    ckd_free (sen);
    
    /* Convert sequence of composite senids to composite sseq ID */
    j = hash_enter_bkey (hp, (char *)comsenid, mdef->n_emit_state * sizeof(s3senid_t),
			 dict2pid->n_comsseq);
    if (j == dict2pid->n_comsseq) {
	dict2pid->n_comsseq++;
	if (dict2pid->n_comsseq >= MAX_S3SENID)
	    E_FATAL("#Composite sseq limit(%d) reached; increase MAX_S3SENID\n",
		    dict2pid->n_comsseq);
    } else
	ckd_free ((void *) comsenid);
    
    return ((s3ssid_t)j);
}