예제 #1
0
fsg_model_t *
fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
{
    fsg_model_t *fsg;
    hash_table_t *vocab;
    hash_iter_t *itor;
    int32 lastwid;
    char **wordptr;
    char *lineptr;
    char *fsgname;
    int32 lineno;
    int32 n, i, j;
    int n_state, n_trans, n_null_trans;
    glist_t nulls;
    float32 p;

    lineno = 0;
    vocab = hash_table_new(32, FALSE);
    wordptr = NULL;
    lineptr = NULL;
    nulls = NULL;
    fsgname = NULL;
    fsg = NULL;

    /* Scan upto FSG_BEGIN header */
    for (;;) {
        n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
        if (n < 0) {
            E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
            goto parse_error;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
            if (n > 2) {
                E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
                        lineno);
                goto parse_error;
            }
            break;
        }
    }
    /* Save FSG name, or it will get clobbered below :(.
     * If name is missing, try the default.
     */
    if (n == 2) {
        fsgname = ckd_salloc(wordptr[1]);
    }
    else {
        E_WARN("FSG name is missing\n");
        fsgname = ckd_salloc("unknown");
    }

    /* Read #states */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &n_state) != 1)
        || (n_state <= 0)) {
        E_ERROR
            ("Line[%d]: #states declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Now create the FSG. */
    fsg = fsg_model_init(fsgname, lmath, lw, n_state);
    ckd_free(fsgname);
    fsgname = NULL;

    /* Read start state */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
        || (fsg->start_state < 0)
        || (fsg->start_state >= fsg->n_state)) {
        E_ERROR
            ("Line[%d]: start state declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Read final state */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
        || (fsg->final_state < 0)
        || (fsg->final_state >= fsg->n_state)) {
        E_ERROR
            ("Line[%d]: final state declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Read transitions */
    lastwid = 0;
    n_trans = n_null_trans = 0;
    for (;;) {
        int32 wid, tprob;

        n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
        if (n <= 0) {
            E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
                    lineno);
            goto parse_error;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
            break;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
            || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {


            if (((n != 4) && (n != 5))
                || (sscanf(wordptr[1], "%d", &i) != 1)
                || (sscanf(wordptr[2], "%d", &j) != 1)
                || (i < 0) || (i >= fsg->n_state)
                || (j < 0) || (j >= fsg->n_state)) {
                E_ERROR
                    ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
                     lineno);
                goto parse_error;
            }

            p = atof_c(wordptr[3]);
            if ((p <= 0.0) || (p > 1.0)) {
                E_ERROR
                    ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
                     lineno);
                goto parse_error;
            }
        }
        else {
            E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
                    lineno);
            goto parse_error;
        }

        tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
        /* Add word to "dictionary". */
        if (n > 4) {
            if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
                (void) hash_table_enter_int32(vocab,
                                              ckd_salloc(wordptr[4]),
                                              lastwid);
                wid = lastwid;
                ++lastwid;
            }
            fsg_model_trans_add(fsg, i, j, tprob, wid);
            ++n_trans;
        }
        else {
            if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
                ++n_null_trans;
                nulls =
                    glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
            }
        }
    }

    E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
           fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);


    /* Now create a string table from the "dictionary" */
    fsg->n_word = hash_table_inuse(vocab);
    fsg->n_word_alloc = fsg->n_word + 10;       /* Pad it a bit. */
    fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
    for (itor = hash_table_iter(vocab); itor;
         itor = hash_table_iter_next(itor)) {
        char const *word = hash_entry_key(itor->ent);
        int32 wid = (int32) (long) hash_entry_val(itor->ent);
        fsg->vocab[wid] = (char *) word;
    }
    hash_table_free(vocab);

    /* Do transitive closure on null transitions */
    nulls = fsg_model_null_trans_closure(fsg, nulls);
    glist_free(nulls);

    ckd_free(lineptr);
    ckd_free(wordptr);

    return fsg;

  parse_error:
    for (itor = hash_table_iter(vocab); itor;
         itor = hash_table_iter_next(itor))
        ckd_free((char *) hash_entry_key(itor->ent));
    glist_free(nulls);
    hash_table_free(vocab);
    ckd_free(fsgname);
    ckd_free(lineptr);
    ckd_free(wordptr);
    fsg_model_free(fsg);
    return NULL;
}
예제 #2
0
word_fsg_t *
word_fsg_read(FILE * fp,
              int use_altpron, int use_filler,
	      kbcore_t *kbc)
{
    s2_fsg_t *fsg;              /* "External" FSG structure */
    s2_fsg_trans_t *trans;
    word_fsg_t *cfsg;           /* "Compiled" FSG structure */
    char *wordptr[WORD_FSG_MAX_WORDPTR];        /* ptrs to words in an input line */
    char line[WORD_FSG_MAX_LINE];
    int32 lineno;
    int32 n, i, j;
    float32 p;

    lineno = 0;

    /* Scan upto FSG_BEGIN header */
    for (;;) {
        n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE,
			       wordptr, WORD_FSG_MAX_WORDPTR);
        if (n < 0) {
            E_ERROR("%s declaration missing\n", WORD_FSG_BEGIN_DECL);
            return NULL;
        }

        if ((strcmp(wordptr[0], WORD_FSG_BEGIN_DECL) == 0)
            || (strcmp(wordptr[0], WORD_FST_BEGIN_DECL) == 0)) {
            if (n > 2) {
                E_ERROR("Line[%d]: malformed FSG_BEGIN delcaration\n",
                        lineno);
                return NULL;
            }
            break;
        }
    }

    /* FSG_BEGIN found; note FSG name */
    fsg = (s2_fsg_t *) ckd_calloc(1, sizeof(s2_fsg_t));
    fsg->name = (n == 2) ? ckd_salloc(wordptr[1]) : NULL;
    fsg->trans_list = NULL;


    /* Read #states */
    n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE,
			   wordptr, WORD_FSG_MAX_WORDPTR);
    if ((n != 2)
        || ((strcmp(wordptr[0], WORD_FSG_N_DECL) != 0)
            && (strcmp(wordptr[0], WORD_FSG_NUM_STATES_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->n_state)) != 1)
        || (fsg->n_state <= 0)) {
        E_ERROR
            ("Line[%d]: #states declaration line missing or malformed\n",
             lineno);

        goto parse_error;
    }

    /* Read start state */
    n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE,
			   wordptr, WORD_FSG_MAX_WORDPTR);
    if ((n != 2)
        || ((strcmp(wordptr[0], WORD_FSG_S_DECL) != 0)
            && (strcmp(wordptr[0], WORD_FSG_START_STATE_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
        || (fsg->start_state < 0)
        || (fsg->start_state >= fsg->n_state)) {
        E_ERROR
            ("Line[%d]: start state declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Read final state */
    n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE,
			   wordptr, WORD_FSG_MAX_WORDPTR);
    if ((n != 2)
        || ((strcmp(wordptr[0], WORD_FSG_F_DECL) != 0)
            && (strcmp(wordptr[0], WORD_FSG_FINAL_STATE_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
        || (fsg->final_state < 0)
        || (fsg->final_state >= fsg->n_state)) {
        E_ERROR
            ("Line[%d]: final state declaration line missing or malformed\n",
             lineno);

        goto parse_error;
    }

    /* Read transitions */
    for (;;) {
        n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE,
			   wordptr, WORD_FSG_MAX_WORDPTR);
        if (n <= 0) {
            E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
                    lineno);
            goto parse_error;
        }

        if ((strcmp(wordptr[0], WORD_FSG_END_DECL) == 0)
            || (strcmp(wordptr[0], WORD_FST_END_DECL) == 0)) {
            break;
        }

        if ((strcmp(wordptr[0], WORD_FSG_T_DECL) == 0)
            || (strcmp(wordptr[0], WORD_FSG_TRANSITION_DECL) == 0)) {
            if (((n != 4) && (n != 5))
                || (sscanf(wordptr[1], "%d", &i) != 1)
                || (sscanf(wordptr[2], "%d", &j) != 1)
                || (sscanf(wordptr[3], "%f", &p) != 1)
                || (i < 0) || (i >= fsg->n_state)
                || (j < 0) || (j >= fsg->n_state)
                || (p <= 0.0) || (p > 1.0)) {
                E_ERROR
                    ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
                     lineno);
                if (p <= 0.0) {
                    E_ERROR
                        ("Probability couldn't be zero in the current format. \n");
                }
                if (j >= fsg->n_state) {
                    E_ERROR
                        ("Number of state (%d) is more than expected (from the FINAL_STATE: %d)\n",
                         j + 1, fsg->n_state);
                }

                goto parse_error;
            }
        }
        else {
            E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
                    lineno);
            goto parse_error;
        }

        /* Add transition to fsg */
        trans = (s2_fsg_trans_t *) ckd_calloc(1, sizeof(s2_fsg_trans_t));
        trans->from_state = i;
        trans->to_state = j;
        trans->prob = p;
        trans->word = (n > 4) ? ckd_salloc(wordptr[4]) : NULL;
        trans->next = fsg->trans_list;
        fsg->trans_list = trans;
    }

    cfsg = word_fsg_load(fsg, use_altpron, use_filler, kbc);

    s2_fsg_free(fsg);

    return cfsg;

  parse_error:
    s2_fsg_free(fsg);
    return NULL;
}