示例#1
0
static s3cipid_t
dict_ciphone_id(dict_t * d, const char *str)
{
    if (d->mdef)
        return mdef_ciphone_id(d->mdef, str);
    else {
	void *val;

        if (hash_table_lookup(d->pht, str, &val) < 0) {
	    s3cipid_t id;

            id = (d->n_ciphone)++;

            if (id >= MAX_S3CIPID)
                E_FATAL
                    ("Too many CIphones in dictionary; increase MAX_S3CIPID\n");
            d->ciphone_str[id] = (char *) ckd_salloc(str);      /* Freed in dict_free() */

            if (hash_table_enter(d->pht, d->ciphone_str[id], (void *)(long)id) != (void *)(long)id)
                E_FATAL("hash_table_enter(local-phonetable, %s) failed\n", str);
	    return id;
        }
	else
	    return (s3cipid_t)(long)val;
    }
}
示例#2
0
文件: mdef.c 项目: 10v/cmusphinx
static int32 parse_base_line (mdef_t *m, char *line, int32 lineno, s3pid_t p)
{
    int32 wlen, n;
    char *word;
    s3cipid_t ci;
    char tmp;

    /* Read base phone name */
    if ((wlen = nextword (line, delim, &word, &tmp)) < 0)	/* Empty line */
	E_FATAL("Line %d: Incomplete base phone line\n", lineno);

    /* Make sure it's not a duplicate */
    ci = mdef_ciphone_id (m, word);
    if (IS_CIPID(ci))
        E_FATAL("Line %d: Duplicate base phone: %s\n", lineno, word);

    /* Add ciphone to ciphone table with id p */
    ciphone_add (m, word, p);
    ci = p;

    /* Restore original delimiter to word */
    line = word+wlen;
    word[wlen] = tmp;

    /* Read and skip "-" for lc, rc, wpos */
    for (n = 0; n < 3; n++) {
        if ((wlen = nextword (line, delim, &word, &tmp)) < 0)
	    E_FATAL("Line %d: Incomplete base phone line\n", lineno);
	if ((wlen != 1) || (word[0] != '-'))
	    E_FATAL("Line %d: %s instead of '-' in base phone line\n", word, lineno);
	line = word+wlen;
	word[wlen] = tmp;
    }
    
    /* Read filler attribute, if present */
    if ((wlen = nextword (line, delim, &word, &tmp)) < 0)
	E_FATAL("Line %d: Incomplete base phone line\n", lineno);
    if (strcmp (word, "filler") == 0)
        m->ciphone[ci].filler = 1;
    else if (strcmp (word, "n/a") == 0)
        m->ciphone[ci].filler = 0;
    else
        E_FATAL("Line %d: Illegal attribute string: %s\n", lineno, word);
    line = word+wlen;
    word[wlen] = tmp;

    if (triphone_add (m, ci, BAD_CIPID, BAD_CIPID, WORD_POSN_UNDEFINED, p) < 0)
	E_FATAL("Line %d: Duplicate/Bad triphone\n", lineno);

    /* Parse remainder of line: transition matrix and state->senone mappings */
    parse_tmat_senmap (m, line, lineno, p);
    
    return 0;
}
示例#3
0
文件: mdef.c 项目: Ankit77/cmusphinx
static void
parse_base_line(mdef_t * m, const char *line, s3pid_t p)
{
    int32 wlen, n;
    char word[1024];
    const char *lp;
    s3cipid_t ci;

    lp = line;

    /* Read base phone name */
    if (sscanf(lp, "%s%n", word, &wlen) != 1)
        E_FATAL("Missing base phone name: %s\n", line);
    lp += wlen;

    /* Make sure it's not a duplicate */
    ci = mdef_ciphone_id(m, word);
    if (IS_S3CIPID(ci))
        E_FATAL("Duplicate base phone: %s\n", line);

    /* Add ciphone to ciphone table with id p */
    ciphone_add(m, word, p);
    ci = (s3cipid_t) p;

    /* Read and skip "-" for lc, rc, wpos */
    for (n = 0; n < 3; n++) {
        if ((sscanf(lp, "%s%n", word, &wlen) != 1)
            || (strcmp(word, "-") != 0))
            E_FATAL("Bad context info for base phone: %s\n", line);
        lp += wlen;
    }

    /* Read filler attribute, if present */
    if (sscanf(lp, "%s%n", word, &wlen) != 1)
        E_FATAL("Missing filler atribute field: %s\n", line);
    lp += wlen;
    if (strcmp(word, "filler") == 0)
        m->ciphone[(int) ci].filler = 1;
    else if (strcmp(word, "n/a") == 0)
        m->ciphone[(int) ci].filler = 0;
    else
        E_FATAL("Bad filler attribute field: %s\n", line);

    triphone_add(m, ci, BAD_S3CIPID, BAD_S3CIPID, WORD_POSN_UNDEFINED, p);

    /* Parse remainder of line: transition matrix and state->senone mappings */
    parse_tmat_senmap(m, line, lp - line, p);
}
示例#4
0
文件: dict.c 项目: 10v/cmusphinx
static s3cipid_t dict_ciphone_id (dict_t *d, char *str)
{
    int32 id;
    
    if (d->mdef)
	return mdef_ciphone_id (d->mdef, str);
    else {
	if (hash_lookup (d->pht, str, &id) < 0) {
	    id = (d->n_ciphone)++;
	    
	    if (id >= MAX_CIPID)
		E_FATAL("Too many CIphones in dictionary; increase MAX_CIPID\n");
	    d->ciphone_str[id] = (char *) ckd_salloc(str);
	    
	    if (hash_enter (d->pht, d->ciphone_str[id], id) != id)
		E_FATAL("hash_enter(local-phonetable, %s) failed\n", str);
	}
	return id;
    }
}
示例#5
0
/*
 * Initialize phones (ci and triphones) and state->senone mappings from .mdef file.
 */
mdef_t *
mdef_init(char *mdeffile, int32 breport)
{
    FILE *fp;
    int32 n_ci, n_tri, n_map, n;
    __BIGSTACKVARIABLE__ char tag[1024], buf[1024];
    uint16 **senmap;
    int p;
    int32 s, ci, cd;
    mdef_t *m;

    if (!mdeffile)
        E_FATAL("No mdef-file\n");

    if (breport)
        E_INFO("Reading model definition: %s\n", mdeffile);

    m = (mdef_t *) ckd_calloc(1, sizeof(mdef_t));       /* freed in mdef_free */

    if ((fp = fopen(mdeffile, "r")) == NULL)
        E_FATAL_SYSTEM("Failed to open mdef file '%s' for reading", mdeffile);

    if (noncomment_line(buf, sizeof(buf), fp) < 0)
        E_FATAL("Empty file: %s\n", mdeffile);

    if (strncmp(buf, "BMDF", 4) == 0 || strncmp(buf, "FDMB", 4) == 0) {
        E_INFO
            ("Found byte-order mark %.4s, assuming this is a binary mdef file\n",
             buf);
        fclose(fp);
        ckd_free(m);
        return NULL;
    }
    if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0)
        E_FATAL("Version error: Expecing %s, but read %s\n",
                MODEL_DEF_VERSION, buf);

    /* Read #base phones, #triphones, #senone mappings defined in header */
    n_ci = -1;
    n_tri = -1;
    n_map = -1;
    m->n_ci_sen = -1;
    m->n_sen = -1;
    m->n_tmat = -1;
    do {
        if (noncomment_line(buf, sizeof(buf), fp) < 0)
            E_FATAL("Incomplete header\n");

        if ((sscanf(buf, "%d %s", &n, tag) != 2) || (n < 0))
            E_FATAL("Error in header: %s\n", buf);

        if (strcmp(tag, "n_base") == 0)
            n_ci = n;
        else if (strcmp(tag, "n_tri") == 0)
            n_tri = n;
        else if (strcmp(tag, "n_state_map") == 0)
            n_map = n;
        else if (strcmp(tag, "n_tied_ci_state") == 0)
            m->n_ci_sen = n;
        else if (strcmp(tag, "n_tied_state") == 0)
            m->n_sen = n;
        else if (strcmp(tag, "n_tied_tmat") == 0)
            m->n_tmat = n;
        else
            E_FATAL("Unknown header line: %s\n", buf);
    } while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) ||
             (m->n_ci_sen < 0) || (m->n_sen < 0) || (m->n_tmat < 0));

    if ((n_ci == 0) || (m->n_ci_sen == 0) || (m->n_tmat == 0)
        || (m->n_ci_sen > m->n_sen))
        E_FATAL("%s: Error in header\n", mdeffile);

    /* Check typesize limits */
    if (n_ci >= MAX_INT16)
        E_FATAL("%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci,
                MAX_INT16);
    if (n_ci + n_tri >= MAX_INT32) /* Comparison is always false... */
        E_FATAL("%s: #Phones (%d) exceeds limit (%d)\n", mdeffile,
                n_ci + n_tri, MAX_INT32);
    if (m->n_sen >= MAX_INT16)
        E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", mdeffile,
                m->n_sen, MAX_INT16);
    if (m->n_tmat >= MAX_INT32) /* Comparison is always false... */
        E_FATAL("%s: #tmats (%d) exceeds limit (%d)\n", mdeffile,
                m->n_tmat, MAX_INT32);

    m->n_emit_state = (n_map / (n_ci + n_tri)) - 1;
    if ((m->n_emit_state + 1) * (n_ci + n_tri) != n_map)
        E_FATAL
            ("Header error: n_state_map not a multiple of n_ci*n_tri\n");

    /* Initialize ciphone info */
    m->n_ciphone = n_ci;
    m->ciphone_ht = hash_table_new(n_ci, HASH_CASE_YES);  /* With case-insensitive string names *//* freed in mdef_free */
    m->ciphone = (ciphone_t *) ckd_calloc(n_ci, sizeof(ciphone_t));     /* freed in mdef_free */

    /* Initialize phones info (ciphones + triphones) */
    m->n_phone = n_ci + n_tri;
    m->phone = (phone_t *) ckd_calloc(m->n_phone, sizeof(phone_t));     /* freed in mdef_free */

    /* Allocate space for state->senone map for each phone */
    senmap = ckd_calloc_2d(m->n_phone, m->n_emit_state, sizeof(**senmap));      /* freed in mdef_free */
    m->sseq = senmap;           /* TEMPORARY; until it is compressed into just the unique ones */

    /* Allocate initial space for <ci,lc,rc,wpos> -> pid mapping */
    m->wpos_ci_lclist = (ph_lc_t ***) ckd_calloc_2d(N_WORD_POSN, m->n_ciphone, sizeof(ph_lc_t *));      /* freed in mdef_free */

    /*
     * Read base phones and triphones.  They'll simply be assigned a running sequence
     * number as their "phone-id".  If the phone-id < n_ci, it's a ciphone.
     */

    /* Read base phones */
    for (p = 0; p < n_ci; p++) {
        if (noncomment_line(buf, sizeof(buf), fp) < 0)
            E_FATAL("Premature EOF reading CIphone %d\n", p);
        parse_base_line(m, buf, p);
    }
    m->sil = mdef_ciphone_id(m, S3_SILENCE_CIPHONE);

    /* Read triphones, if any */
    for (; p < m->n_phone; p++) {
        if (noncomment_line(buf, sizeof(buf), fp) < 0)
            E_FATAL("Premature EOF reading phone %d\n", p);
        parse_tri_line(m, buf, p);
    }

    if (noncomment_line(buf, sizeof(buf), fp) >= 0)
        E_ERROR("Non-empty file beyond expected #phones (%d)\n",
                m->n_phone);

    /* Build CD senones to CI senones map */
    if (m->n_ciphone * m->n_emit_state != m->n_ci_sen)
        E_FATAL
            ("#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n",
             m->n_ci_sen, m->n_ciphone, m->n_emit_state);
    m->cd2cisen = (int16 *) ckd_calloc(m->n_sen, sizeof(*m->cd2cisen)); /* freed in mdef_free */

    m->sen2cimap = (int16 *) ckd_calloc(m->n_sen, sizeof(*m->sen2cimap)); /* freed in mdef_free */

    for (s = 0; s < m->n_sen; s++)
        m->sen2cimap[s] = -1;
    for (s = 0; s < m->n_ci_sen; s++) { /* CI senones */
        m->cd2cisen[s] = s;
        m->sen2cimap[s] = s / m->n_emit_state;
    }
    for (p = n_ci; p < m->n_phone; p++) {       /* CD senones */
        for (s = 0; s < m->n_emit_state; s++) {
            cd = m->sseq[p][s];
            ci = m->sseq[m->phone[p].ci][s];
            m->cd2cisen[cd] = ci;
            m->sen2cimap[cd] = m->phone[p].ci;
        }
    }

    sseq_compress(m);
    fclose(fp);

    return m;
}
示例#6
0
static void
parse_tri_line(mdef_t * m, char *line, int p)
{
    int32 wlen;
    __BIGSTACKVARIABLE__ char word[1024], *lp;
    int ci, lc, rc;
    word_posn_t wpos = WORD_POSN_BEGIN;

    lp = line;

    /* Read base phone name */
    if (sscanf(lp, "%s%n", word, &wlen) != 1)
        E_FATAL("Missing base phone name: %s\n", line);
    lp += wlen;

    ci = mdef_ciphone_id(m, word);
    if (ci < 0)
        E_FATAL("Unknown base phone: %s\n", line);

    /* Read lc */
    if (sscanf(lp, "%s%n", word, &wlen) != 1)
        E_FATAL("Missing left context: %s\n", line);
    lp += wlen;
    lc = mdef_ciphone_id(m, word);
    if (lc < 0)
        E_FATAL("Unknown left context: %s\n", line);

    /* Read rc */
    if (sscanf(lp, "%s%n", word, &wlen) != 1)
        E_FATAL("Missing right context: %s\n", line);
    lp += wlen;
    rc = mdef_ciphone_id(m, word);
    if (rc < 0)
        E_FATAL("Unknown right  context: %s\n", line);

    /* Read tripone word-position within word */
    if ((sscanf(lp, "%s%n", word, &wlen) != 1) || (word[1] != '\0'))
        E_FATAL("Missing or bad word-position spec: %s\n", line);
    lp += wlen;
    switch (word[0]) {
    case 'b':
        wpos = WORD_POSN_BEGIN;
        break;
    case 'e':
        wpos = WORD_POSN_END;
        break;
    case 's':
        wpos = WORD_POSN_SINGLE;
        break;
    case 'i':
        wpos = WORD_POSN_INTERNAL;
        break;
    default:
        E_FATAL("Bad word-position spec: %s\n", line);
    }

    /* Read filler attribute, if present.  Must match base phone attribute */
    if (sscanf(lp, "%s%n", word, &wlen) != 1)
        E_FATAL("Missing filler attribute field: %s\n", line);
    lp += wlen;
    if (((strcmp(word, "filler") == 0) && (m->ciphone[(int) ci].filler)) ||
        ((strcmp(word, "n/a") == 0) && (!m->ciphone[(int) ci].filler))) {
        /* Everything is fine */
    }
    else
        E_FATAL("Bad filler attribute field: %s\n", line);

    triphone_add(m, ci, lc, rc, wpos, p);

    /* Parse remainder of line: transition matrix and state->senone mappings */
    parse_tmat_senmap(m, line, lp - line, p);
}
示例#7
0
文件: mdef.c 项目: 10v/cmusphinx
/*
 * Initialize phones (ci and triphones) and state->senone mappings from .mdef file.
 */
mdef_t *mdef_init (char *mdeffile)
{
    FILE *fp;
    int32 n_ci, n_tri, n_map, n;
    char tag[1024], buf[1024];
    int32 n_read;
    s3senid_t *senmap;
    s3pid_t p;
    int32 s, ci, cd;
    
    E_INFO("Reading model definition: %s\n", mdeffile);

    assert (! mdef);
    mdef = (mdef_t *) ckd_calloc (1, sizeof(mdef_t));
    
    if ((fp = fopen(mdeffile, "r")) == NULL)
        E_FATAL("fopen(%s,r) failed\n", mdeffile);

    n_read = 0;

    if (noncomment_line(buf, sizeof(buf), &n_read, fp) < 0)
        E_FATAL("Empty file: %s\n", mdeffile);

    if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0)
        E_FATAL("Line %d: Version error.  Expecing %s, but read %s\n",
		n_read, MODEL_DEF_VERSION, buf);

    /* Read #base phones, #triphones, #senone mappings defined in header */
    n_ci = -1;
    n_tri = -1;
    n_map = -1;
    mdef->n_ci_sen = -1;
    mdef->n_sen = -1;
    mdef->n_tmat = -1;
    do {
	if (noncomment_line(buf, sizeof(buf), &n_read, fp) < 0)
	    E_FATAL("Incomplete header\n");

	if ((sscanf(buf, "%d %s", &n, tag) != 2) || (n < 0))
	    E_FATAL("Line %d: Error in reading header\n", n_read);

	if (strcmp(tag, "n_base") == 0)
	    n_ci = n;
	else if (strcmp(tag, "n_tri") == 0)
	    n_tri = n;
	else if (strcmp(tag, "n_state_map") == 0)
	    n_map = n;
	else if (strcmp(tag, "n_tied_ci_state") == 0)
	    mdef->n_ci_sen = n;
	else if (strcmp(tag, "n_tied_state") == 0)
	    mdef->n_sen = n;
	else if (strcmp(tag, "n_tied_tmat") == 0)
	    mdef->n_tmat = n;
	else
	    E_FATAL("Line %d: Unknown tag: %s\n", n_read, tag);
    } while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) ||
	     (mdef->n_ci_sen < 0) || (mdef->n_sen < 0) || (mdef->n_tmat < 0));
    if ((n_ci == 0) || (mdef->n_ci_sen == 0) || (mdef->n_tmat == 0) ||
	(mdef->n_ci_sen > mdef->n_sen))
        E_FATAL("Incorrect information in header\n");
    
    mdef->n_emit_state = (n_map / (n_ci+n_tri)) - 1;
    if ((mdef->n_emit_state+1) * (n_ci+n_tri) != n_map)
        E_FATAL("Header error: n_state_map not a multiple of n_ci*n_tri\n");

    /* Initialize ciphone info */
    max_ciphone = n_ci;
    mdef->ciphone_ht = hash_new ("ciphone", n_ci);
    mdef->ciphone = (ciphone_t *) ckd_calloc (n_ci, sizeof(ciphone_t));
    mdef->n_ciphone = 0;

    /* Initialize phones info (ciphones + triphones) */
    max_phone = n_ci + n_tri;
    mdef->phone = (phone_t *) ckd_calloc (max_phone, sizeof(phone_t));
    mdef->n_phone = 0;
    
    /* Allocate space for state->senone map for each phone */
    senmap = (s3senid_t *) ckd_calloc (max_phone * mdef->n_emit_state, sizeof(s3senid_t));
    for (p = 0; p < max_phone; p++)
        mdef->phone[p].state = senmap + (p * mdef->n_emit_state);

    /* Allocate initial space for <ci,lc,rc,wpos> -> pid mapping */
    mdef->wpos_ci_lclist = (ph_lc_t ***) ckd_calloc_2d (N_WORD_POSN,
							max_ciphone,
							sizeof(ph_lc_t *));

    /*
     * Read base phones and triphones.  They'll simply be assigned a running sequence
     * number as their "phone-id".  If the phone-id < n_ci, it's a ciphone.
     */

    /* Read base phones */
    for (p = 0; p < n_ci; p++) {
        if (noncomment_line(buf, sizeof(buf), &n_read, fp) < 0)
	    E_FATAL("Premature EOF(%s)\n", mdeffile);
        parse_base_line (mdef, buf, n_read, p);
    }
    mdef->sil = mdef_ciphone_id (mdef, SILENCE_CIPHONE);
    
    /* Read triphones, if any */
    for (; p < max_phone; p++) {
        if (noncomment_line(buf, sizeof(buf), &n_read, fp) < 0)
	    E_FATAL("Premature EOF(%s)\n", mdeffile);
        parse_tri_line (mdef, buf, n_read, p);
    }

    if (noncomment_line(buf, sizeof(buf), &n_read, fp) >= 0)
	E_ERROR("Line %d: File continues beyond expected size\n", n_read);

    /* Build CD senones to CI senones map */
    mdef->cd2cisen = (s3senid_t *) ckd_calloc (mdef->n_sen, sizeof(s3senid_t));
    for (s = 0; s < mdef->n_ci_sen; s++)		/* CI senones */
	mdef->cd2cisen[s] = (s3senid_t) s;
    for (p = n_ci; p < max_phone; p++) {		/* CD senones */
	for (s = 0; s < mdef->n_emit_state; s++) {
	    cd = mdef->phone[p].state[s];
	    ci = mdef->phone[mdef->phone[p].ci].state[s];
	    mdef->cd2cisen[cd] = (s3senid_t) ci;
	}
    }

    E_INFO("%d CI-phones, %d CD-phones, %d emitting states/phone, %d sen, %d CI-sen\n",
	   mdef->n_ciphone, mdef->n_phone - mdef->n_ciphone, mdef->n_emit_state,
	   mdef->n_sen, mdef->n_ci_sen);

    fclose (fp);

    return mdef;
}
示例#8
0
文件: mdef.c 项目: 10v/cmusphinx
static int32 parse_tri_line (mdef_t *m, char *line, int32 lineno, s3pid_t p)
{
    int32 wlen;
    char *word;
    s3cipid_t ci, lc, rc;
    word_posn_t wpos;
    char tmp;

    /* Read base phone name */
    if ((wlen = nextword (line, delim, &word, &tmp)) < 0)	/* Empty line */
	E_FATAL("Line %d: Incomplete triphone line\n", lineno);
    ci = mdef_ciphone_id (m, word);
    if (NOT_CIPID(ci))
        E_FATAL("Line %d: Unknown base phone in triphone: %s\n", lineno, word);
    line = word+wlen;
    word[wlen] = tmp;

    /* Read lc */
    if ((wlen = nextword (line, delim, &word, &tmp)) < 0)
        E_FATAL("Line %d: Incomplete triphone line\n", lineno);
    lc = mdef_ciphone_id (m, word);
    if (NOT_CIPID(lc))
        E_FATAL("Line %d: Unknown left context in triphone: %s\n", lineno, word);
    line = word+wlen;
    word[wlen] = tmp;

    /* Read rc */
    if ((wlen = nextword (line, delim, &word, &tmp)) < 0)
        E_FATAL("Line %d: Incomplete triphone line\n", lineno);
    rc = mdef_ciphone_id (m, word);
    if (NOT_CIPID(rc))
        E_FATAL("Line %d: Unknown left context in triphone: %s\n", lineno, word);
    line = word+wlen;
    word[wlen] = tmp;
    
    /* Read tripone word-position within word */
    if (((wlen = nextword (line, delim, &word, &tmp)) < 0) ||
	(word[1] != '\0'))
        E_FATAL("Line %d: Missing or bad triphone word-position spec\n", lineno);
    switch (word[0]) {
    case 'b': wpos = WORD_POSN_BEGIN; break;
    case 'e': wpos = WORD_POSN_END; break;
    case 's': wpos = WORD_POSN_SINGLE; break;
    case 'i': wpos = WORD_POSN_INTERNAL; break;
    default: E_FATAL("Line %d: Bad word-position spec: %s\n", lineno, word);
    }
    line = word+wlen;
    word[wlen] = tmp;

    /* Read filler attribute, if present.  Must match base phone attribute */
    if ((wlen = nextword (line, delim, &word, &tmp)) < 0)
	E_FATAL("Line %d: Incomplete base phone line\n", lineno);
    if (((strcmp (word, "filler") == 0) && (m->ciphone[ci].filler)) ||
	((strcmp (word, "n/a") == 0) && (! m->ciphone[ci].filler))) {
    } else
        E_FATAL("Line %d: Bad attribute string: %s\n", lineno, word);
    line = word+wlen;
    word[wlen] = tmp;
    
    if (triphone_add (m, ci, lc, rc, wpos, p) < 0)
	E_FATAL("Line %d: Duplicate/Bad triphone\n", lineno);

    /* Parse remainder of line: transition matrix and state->senone mappings */
    parse_tmat_senmap (m, line, lineno, p);
    
    return 0;
}
示例#9
0
文件: pronerr.c 项目: 10v/cmusphinx
static wseg_t *line2wseg (char *line, s3wid_t *ref,
			  s3cipid_t *ap, int8 *ap_err, int32 aplen, char *id)
{
    char word[1024], uttid[1024], *lp;
    int32 i, k, n_hypci, n_refwd, n_refci, pronlen;
    s3cipid_t ci;
    typedef enum {CORR=0, REFERR=1, HYPERR=2} state_t;
    state_t state;
    static wseg_t *wseg = NULL;
    
    if (! wseg)
	wseg = (wseg_t *) ckd_calloc (MAX_UTT_LEN, sizeof(wseg_t));

    lp = line;
    n_hypci = n_refci = pronlen = 0;
    n_refwd = -1;
    uttid[0] = '\0';
    state = CORR;
    
    while (sscanf (lp, "%s%n", word, &k) == 1) {
	lp += k;
	
	if (is_uttid (word, uttid))
	    break;
	
	if (strcmp (word, "[[") == 0) {
	    if (state != CORR)
		E_FATAL("%s: Illegal [[\n", id);
	    state = REFERR;
	    if (n_refci < pronlen)
		wseg[n_refwd].err = 1;
	} else if (strcmp (word, "]]") == 0) {
	    if (state != HYPERR)
		E_FATAL("%s: Illegal ]]\n", id);
	    state = CORR;
	} else if (strcmp (word, "=>") == 0) {
	    if (state != REFERR)
		E_FATAL("%s: Illegal =>\n", id);
	    state = HYPERR;
	} else {
	    ci = mdef_ciphone_id (mdef, word);
	    if (NOT_CIPID(ci))
		E_FATAL("%s: Unknown CIphone %s\n", id, word);
	    
	    if (state != HYPERR) {	/* Check if matches next pron in ref word */
		if (n_refci >= pronlen) {
		    assert (n_refci == pronlen);
		    n_refwd++;
		    pronlen = dict->word[ref[n_refwd]].pronlen;
		    assert (pronlen > 0);

		    wseg[n_refwd].s = (state == CORR) ? n_hypci : -1;
		    wseg[n_refwd].e = -1;
		    wseg[n_refwd].err = 0;
		    
		    n_refci = 0;
		}
		if (NOT_WID(ref[n_refwd]))
		    E_FATAL("%s: Premature end of ref wid\n", id);

		if (dict->word[ref[n_refwd]].ciphone[n_refci] != ci)
		    E_FATAL("%s: CIphone mismatch at word %d, ciphone %d\n",
			    id, n_refwd, n_refci);
		n_refci++;
		if ((n_refci == pronlen) && (state == CORR))
		    wseg[n_refwd].e = n_hypci;

		if (state != CORR)
		    wseg[n_refwd].err = 1;
	    }
	    
	    if (state != REFERR) {
		if (n_hypci >= aplen)
		    E_FATAL("%s: Too many CIphones: >%d\n", id, aplen);
		ap[n_hypci] = ci;
		ap_err[n_hypci] = (state == CORR) ? 0 : 1;
		n_hypci++;
	    }
	}
    }
    assert (n_refci == pronlen);
    n_refwd++;
    assert (NOT_WID(ref[n_refwd]));
    wseg[n_refwd].s = wseg[n_refwd].e = n_hypci;
    wseg[n_refwd].err = 0;
    
    ap[n_hypci] = BAD_CIPID;
    ap_err[n_hypci] = 1;
    
    if (strcmp (uttid, id) != 0)
	E_FATAL("Uttid mismatch: %s expected, %s found\n", id, uttid);

#if 0
    for (i = 0; IS_WID(ref[i]); i++) {
	printf ("%s: %4d %4d %d %s\n", id, wseg[i].s, wseg[i].e, wseg[i].err,
		dict_wordstr (dict, ref[i]));
    }
#endif

    return wseg;
}
示例#10
0
文件: mdef.c 项目: Ankit77/cmusphinx
/*
 * Initialize phones (ci and triphones) and state->senone mappings from .mdef file.
 */
mdef_t *
mdef_init(const char *mdeffile, int32 breport)
{
    FILE *fp;
    int32 n_ci, n_tri, n_map, n;
    char tag[1024], buf[1024];
    s3senid_t **senmap;
    /*    s3senid_t *tempsenmap; */

    s3pid_t p;
    int32 s, ci, cd;
    mdef_t *m;

    if (!mdeffile)
        E_FATAL("No mdef-file\n");

    if (breport)
        E_INFO("Reading model definition: %s\n", mdeffile);

    m = (mdef_t *) ckd_calloc(1, sizeof(mdef_t));       /* freed in mdef_free */

    if ((fp = fopen(mdeffile, "r")) == NULL)
        E_FATAL_SYSTEM("fopen(%s,r) failed\n", mdeffile);

    if (noncomment_line(buf, sizeof(buf), fp) < 0)
        E_FATAL("Empty file: %s\n", mdeffile);

    if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0)
        E_FATAL("Version error: Expecing %s, but read %s\n",
                MODEL_DEF_VERSION, buf);

    /* Read #base phones, #triphones, #senone mappings defined in header */
    n_ci = -1;
    n_tri = -1;
    n_map = -1;
    m->n_ci_sen = -1;
    m->n_sen = -1;
    m->n_tmat = -1;
    do {
        if (noncomment_line(buf, sizeof(buf), fp) < 0)
            E_FATAL("Incomplete header\n");

        if ((sscanf(buf, "%d %s", &n, tag) != 2) || (n < 0))
            E_FATAL("Error in header: %s\n", buf);

        if (strcmp(tag, "n_base") == 0)
            n_ci = n;
        else if (strcmp(tag, "n_tri") == 0)
            n_tri = n;
        else if (strcmp(tag, "n_state_map") == 0)
            n_map = n;
        else if (strcmp(tag, "n_tied_ci_state") == 0)
            m->n_ci_sen = n;
        else if (strcmp(tag, "n_tied_state") == 0)
            m->n_sen = n;
        else if (strcmp(tag, "n_tied_tmat") == 0)
            m->n_tmat = n;
        else
            E_FATAL("Unknown header line: %s\n", buf);
    } while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) ||
             (m->n_ci_sen < 0) || (m->n_sen < 0) || (m->n_tmat < 0));

    if ((n_ci == 0) || (m->n_ci_sen == 0) || (m->n_tmat == 0)
        || (m->n_ci_sen > m->n_sen))
        E_FATAL("%s: Error in header\n", mdeffile);

    /* Check typesize limits */
    if (n_ci >= MAX_S3CIPID)
        E_FATAL("%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci,
                MAX_S3CIPID);
    if (n_ci + n_tri >= MAX_S3PID)
        E_FATAL("%s: #Phones (%d) exceeds limit (%d)\n", mdeffile,
                n_ci + n_tri, MAX_S3PID);
    if (m->n_sen >= MAX_S3SENID)
        E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", mdeffile,
                m->n_sen, MAX_S3SENID);
    if (m->n_tmat >= MAX_S3TMATID)
        E_FATAL("%s: #tmats (%d) exceeds limit (%d)\n", mdeffile,
                m->n_tmat, MAX_S3TMATID);

    m->n_emit_state = (n_map / (n_ci + n_tri)) - 1;
    if ((m->n_emit_state + 1) * (n_ci + n_tri) != n_map)
        E_FATAL
            ("Header error: n_state_map not a multiple of n_ci*n_tri\n");

    /* Initialize ciphone info */
    m->n_ciphone = n_ci;
    m->ciphone_ht = hash_table_new(n_ci, HASH_CASE_YES);  /* With case-insensitive string names *//* freed in mdef_free */
    m->ciphone = (ciphone_t *) ckd_calloc(n_ci, sizeof(ciphone_t));     /* freed in mdef_free */

    /* Initialize phones info (ciphones + triphones) */
    m->n_phone = n_ci + n_tri;
    m->phone = (phone_t *) ckd_calloc(m->n_phone, sizeof(phone_t));     /* freed in mdef_free */

    /* Allocate space for state->senone map for each phone */
    senmap = (s3senid_t **) ckd_calloc_2d(m->n_phone, m->n_emit_state, sizeof(s3senid_t));      /* freed in mdef_free */
    m->sseq = senmap;           /* TEMPORARY; until it is compressed into just the unique ones */


    /**CODE DUPLICATION!*****************************************************************************************************/
    /* Flat decoder-specific */
    /* Allocate space for state->senone map for each phone */

    /* ARCHAN 20040820, this sacrifice readability and may cause pointer
       problems in future. However, this is a less evil than
       duplication of code.  This is trick point all the state mapping
       to the global mapping and avoid duplicated memory.  
     */

    /* S3 xwdpid_compress will compress the below list phone list. 
     */

    /* ARCHAN, this part should not be used when one of the recognizer is used. */
    m->st2senmap =
        (s3senid_t *) ckd_calloc(m->n_phone * m->n_emit_state,
                                 sizeof(s3senid_t));
    for (p = 0; p < m->n_phone; p++)
        m->phone[p].state = m->st2senmap + (p * m->n_emit_state);
    /******************************************************************************************************/


    /* Allocate initial space for <ci,lc,rc,wpos> -> pid mapping */
    m->wpos_ci_lclist = (ph_lc_t ***) ckd_calloc_2d(N_WORD_POSN, m->n_ciphone, sizeof(ph_lc_t *));      /* freed in mdef_free */

    /*
     * Read base phones and triphones.  They'll simply be assigned a running sequence
     * number as their "phone-id".  If the phone-id < n_ci, it's a ciphone.
     */

    /* Read base phones */
    for (p = 0; p < n_ci; p++) {
        if (noncomment_line(buf, sizeof(buf), fp) < 0)
            E_FATAL("Premature EOF reading CIphone %d\n", p);
        parse_base_line(m, buf, p);
    }
    m->sil = mdef_ciphone_id(m, S3_SILENCE_CIPHONE);

    /* Read triphones, if any */
    for (; p < m->n_phone; p++) {
        if (noncomment_line(buf, sizeof(buf), fp) < 0)
            E_FATAL("Premature EOF reading phone %d\n", p);
        parse_tri_line(m, buf, p);
    }

    if (noncomment_line(buf, sizeof(buf), fp) >= 0)
        E_ERROR("Non-empty file beyond expected #phones (%d)\n",
                m->n_phone);

    /* Build CD senones to CI senones map */
    if (m->n_ciphone * m->n_emit_state != m->n_ci_sen)
        E_FATAL
            ("#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n",
             m->n_ci_sen, m->n_ciphone, m->n_emit_state);
    m->cd2cisen = (s3senid_t *) ckd_calloc(m->n_sen, sizeof(s3senid_t));        /* freed in mdef_free */

    m->sen2cimap = (s3cipid_t *) ckd_calloc(m->n_sen, sizeof(s3cipid_t));       /* freed in mdef_free */

    for (s = 0; s < m->n_sen; s++)
        m->sen2cimap[s] = BAD_S3CIPID;
    for (s = 0; s < m->n_ci_sen; s++) { /* CI senones */
        m->cd2cisen[s] = (s3senid_t) s;
        m->sen2cimap[s] = s / m->n_emit_state;
    }
    for (p = n_ci; p < m->n_phone; p++) {       /* CD senones */
        for (s = 0; s < m->n_emit_state; s++) {
            cd = m->sseq[p][s];
            ci = m->sseq[(int) m->phone[p].ci][s];
            m->cd2cisen[cd] = (s3senid_t) ci;
            m->sen2cimap[cd] = m->phone[p].ci;
        }
    }

    sseq_compress(m);
    fclose(fp);

    return m;
}