fsg_model_t * fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) { fsg_model_t *fsg; hash_table_t *vocab; hash_iter_t *itor; int32 lastwid; char **wordptr; char *lineptr; char *fsgname; int32 lineno; int32 n, i, j; int n_state, n_trans, n_null_trans; glist_t nulls; float32 p; lineno = 0; vocab = hash_table_new(32, FALSE); wordptr = NULL; lineptr = NULL; nulls = NULL; fsgname = NULL; fsg = NULL; /* Scan upto FSG_BEGIN header */ for (;;) { n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n < 0) { E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { if (n > 2) { E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", lineno); goto parse_error; } break; } } /* Save FSG name, or it will get clobbered below :(. * If name is missing, try the default. */ if (n == 2) { fsgname = ckd_salloc(wordptr[1]); } else { E_WARN("FSG name is missing\n"); fsgname = ckd_salloc("unknown"); } /* Read #states */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) || (sscanf(wordptr[1], "%d", &n_state) != 1) || (n_state <= 0)) { E_ERROR ("Line[%d]: #states declaration line missing or malformed\n", lineno); goto parse_error; } /* Now create the FSG. */ fsg = fsg_model_init(fsgname, lmath, lw, n_state); ckd_free(fsgname); fsgname = NULL; /* Read start state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) || (fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) { E_ERROR ("Line[%d]: start state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read final state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) || (fsg->final_state < 0) || (fsg->final_state >= fsg->n_state)) { E_ERROR ("Line[%d]: final state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read transitions */ lastwid = 0; n_trans = n_null_trans = 0; for (;;) { int32 wid, tprob; n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n <= 0) { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { break; } if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { if (((n != 4) && (n != 5)) || (sscanf(wordptr[1], "%d", &i) != 1) || (sscanf(wordptr[2], "%d", &j) != 1) || (i < 0) || (i >= fsg->n_state) || (j < 0) || (j >= fsg->n_state)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", lineno); goto parse_error; } p = atof_c(wordptr[3]); if ((p <= 0.0) || (p > 1.0)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", lineno); goto parse_error; } } else { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } tprob = (int32) (logmath_log(lmath, p) * fsg->lw); /* Add word to "dictionary". */ if (n > 4) { if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { (void) hash_table_enter_int32(vocab, ckd_salloc(wordptr[4]), lastwid); wid = lastwid; ++lastwid; } fsg_model_trans_add(fsg, i, j, tprob, wid); ++n_trans; } else { if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { ++n_null_trans; nulls = glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); } } } E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); /* Now create a string table from the "dictionary" */ fsg->n_word = hash_table_inuse(vocab); fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) { char const *word = hash_entry_key(itor->ent); int32 wid = (int32) (long) hash_entry_val(itor->ent); fsg->vocab[wid] = (char *) word; } hash_table_free(vocab); /* Do transitive closure on null transitions */ nulls = fsg_model_null_trans_closure(fsg, nulls); glist_free(nulls); ckd_free(lineptr); ckd_free(wordptr); return fsg; parse_error: for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) ckd_free((char *) hash_entry_key(itor->ent)); glist_free(nulls); hash_table_free(vocab); ckd_free(fsgname); ckd_free(lineptr); ckd_free(wordptr); fsg_model_free(fsg); return NULL; }
word_fsg_t * word_fsg_read(FILE * fp, int use_altpron, int use_filler, kbcore_t *kbc) { s2_fsg_t *fsg; /* "External" FSG structure */ s2_fsg_trans_t *trans; word_fsg_t *cfsg; /* "Compiled" FSG structure */ char *wordptr[WORD_FSG_MAX_WORDPTR]; /* ptrs to words in an input line */ char line[WORD_FSG_MAX_LINE]; int32 lineno; int32 n, i, j; float32 p; lineno = 0; /* Scan upto FSG_BEGIN header */ for (;;) { n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE, wordptr, WORD_FSG_MAX_WORDPTR); if (n < 0) { E_ERROR("%s declaration missing\n", WORD_FSG_BEGIN_DECL); return NULL; } if ((strcmp(wordptr[0], WORD_FSG_BEGIN_DECL) == 0) || (strcmp(wordptr[0], WORD_FST_BEGIN_DECL) == 0)) { if (n > 2) { E_ERROR("Line[%d]: malformed FSG_BEGIN delcaration\n", lineno); return NULL; } break; } } /* FSG_BEGIN found; note FSG name */ fsg = (s2_fsg_t *) ckd_calloc(1, sizeof(s2_fsg_t)); fsg->name = (n == 2) ? ckd_salloc(wordptr[1]) : NULL; fsg->trans_list = NULL; /* Read #states */ n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE, wordptr, WORD_FSG_MAX_WORDPTR); if ((n != 2) || ((strcmp(wordptr[0], WORD_FSG_N_DECL) != 0) && (strcmp(wordptr[0], WORD_FSG_NUM_STATES_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->n_state)) != 1) || (fsg->n_state <= 0)) { E_ERROR ("Line[%d]: #states declaration line missing or malformed\n", lineno); goto parse_error; } /* Read start state */ n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE, wordptr, WORD_FSG_MAX_WORDPTR); if ((n != 2) || ((strcmp(wordptr[0], WORD_FSG_S_DECL) != 0) && (strcmp(wordptr[0], WORD_FSG_START_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) || (fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) { E_ERROR ("Line[%d]: start state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read final state */ n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE, wordptr, WORD_FSG_MAX_WORDPTR); if ((n != 2) || ((strcmp(wordptr[0], WORD_FSG_F_DECL) != 0) && (strcmp(wordptr[0], WORD_FSG_FINAL_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) || (fsg->final_state < 0) || (fsg->final_state >= fsg->n_state)) { E_ERROR ("Line[%d]: final state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read transitions */ for (;;) { n = nextline_str2words(fp, &lineno, line, WORD_FSG_MAX_LINE, wordptr, WORD_FSG_MAX_WORDPTR); if (n <= 0) { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } if ((strcmp(wordptr[0], WORD_FSG_END_DECL) == 0) || (strcmp(wordptr[0], WORD_FST_END_DECL) == 0)) { break; } if ((strcmp(wordptr[0], WORD_FSG_T_DECL) == 0) || (strcmp(wordptr[0], WORD_FSG_TRANSITION_DECL) == 0)) { if (((n != 4) && (n != 5)) || (sscanf(wordptr[1], "%d", &i) != 1) || (sscanf(wordptr[2], "%d", &j) != 1) || (sscanf(wordptr[3], "%f", &p) != 1) || (i < 0) || (i >= fsg->n_state) || (j < 0) || (j >= fsg->n_state) || (p <= 0.0) || (p > 1.0)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", lineno); if (p <= 0.0) { E_ERROR ("Probability couldn't be zero in the current format. \n"); } if (j >= fsg->n_state) { E_ERROR ("Number of state (%d) is more than expected (from the FINAL_STATE: %d)\n", j + 1, fsg->n_state); } goto parse_error; } } else { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } /* Add transition to fsg */ trans = (s2_fsg_trans_t *) ckd_calloc(1, sizeof(s2_fsg_trans_t)); trans->from_state = i; trans->to_state = j; trans->prob = p; trans->word = (n > 4) ? ckd_salloc(wordptr[4]) : NULL; trans->next = fsg->trans_list; fsg->trans_list = trans; } cfsg = word_fsg_load(fsg, use_altpron, use_filler, kbc); s2_fsg_free(fsg); return cfsg; parse_error: s2_fsg_free(fsg); return NULL; }