jsgf_rule_t * jsgf_define_rule(jsgf_t * jsgf, char *name, jsgf_rhs_t * rhs, int is_public) { jsgf_rule_t *rule; void *val; if (name == NULL) { name = ckd_malloc(strlen(jsgf->name) + 16); sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules)); } else { char *newname; newname = jsgf_fullname(jsgf, name); name = newname; } rule = ckd_calloc(1, sizeof(*rule)); rule->refcnt = 1; rule->name = ckd_salloc(name); rule->rhs = rhs; rule->is_public = is_public; E_INFO("Defined rule: %s%s\n", rule->is_public ? "PUBLIC " : "", rule->name); val = hash_table_enter(jsgf->rules, name, rule); if (val != (void *) rule) { E_WARN("Multiply defined symbol: %s\n", name); } return rule; }
static void build_widmap(ngram_model_t * base, logmath_t * lmath, int32 n) { ngram_model_set_t *set = (ngram_model_set_t *) base; ngram_model_t **models = set->lms; hash_table_t *vocab; glist_t hlist; gnode_t *gn; int32 i; /* Construct a merged vocabulary and a set of word-ID mappings. */ vocab = hash_table_new(models[0]->n_words, FALSE); /* Create the set of merged words. */ for (i = 0; i < set->n_models; ++i) { int32 j; for (j = 0; j < models[i]->n_words; ++j) { /* Ignore collisions. */ (void) hash_table_enter_int32(vocab, models[i]->word_str[j], j); } } /* Create the array of words, then sort it. */ if (hash_table_lookup(vocab, "<UNK>", NULL) != 0) (void) hash_table_enter_int32(vocab, "<UNK>", 0); /* Now we know the number of unigrams, initialize the base model. */ ngram_model_init(base, &ngram_model_set_funcs, lmath, n, hash_table_inuse(vocab)); base->writable = FALSE; /* We will reuse the pointers from the submodels. */ i = 0; hlist = hash_table_tolist(vocab, NULL); for (gn = hlist; gn; gn = gnode_next(gn)) { hash_entry_t *ent = gnode_ptr(gn); base->word_str[i++] = (char *) ent->key; } glist_free(hlist); qsort(base->word_str, base->n_words, sizeof(*base->word_str), my_compare); /* Now create the word ID mappings. */ if (set->widmap) ckd_free_2d((void **) set->widmap); set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models, sizeof(**set->widmap)); for (i = 0; i < base->n_words; ++i) { int32 j; /* Also create the master wid mapping. */ (void) hash_table_enter_int32(base->wid, base->word_str[i], i); /* printf("%s: %d => ", base->word_str[i], i); */ for (j = 0; j < set->n_models; ++j) { set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]); /* printf("%d ", set->widmap[i][j]); */ } /* printf("\n"); */ } hash_table_free(vocab); }
fsg_model_t * fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) { fsg_model_t *fsg; hash_table_t *vocab; hash_iter_t *itor; int32 lastwid; char **wordptr; char *lineptr; char *fsgname; int32 lineno; int32 n, i, j; int n_state, n_trans, n_null_trans; glist_t nulls; float32 p; lineno = 0; vocab = hash_table_new(32, FALSE); wordptr = NULL; lineptr = NULL; nulls = NULL; fsgname = NULL; fsg = NULL; /* Scan upto FSG_BEGIN header */ for (;;) { n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n < 0) { E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { if (n > 2) { E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", lineno); goto parse_error; } break; } } /* Save FSG name, or it will get clobbered below :(. * If name is missing, try the default. */ if (n == 2) { fsgname = ckd_salloc(wordptr[1]); } else { E_WARN("FSG name is missing\n"); fsgname = ckd_salloc("unknown"); } /* Read #states */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) || (sscanf(wordptr[1], "%d", &n_state) != 1) || (n_state <= 0)) { E_ERROR ("Line[%d]: #states declaration line missing or malformed\n", lineno); goto parse_error; } /* Now create the FSG. */ fsg = fsg_model_init(fsgname, lmath, lw, n_state); ckd_free(fsgname); fsgname = NULL; /* Read start state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) || (fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) { E_ERROR ("Line[%d]: start state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read final state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) || (fsg->final_state < 0) || (fsg->final_state >= fsg->n_state)) { E_ERROR ("Line[%d]: final state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read transitions */ lastwid = 0; n_trans = n_null_trans = 0; for (;;) { int32 wid, tprob; n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n <= 0) { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { break; } if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { if (((n != 4) && (n != 5)) || (sscanf(wordptr[1], "%d", &i) != 1) || (sscanf(wordptr[2], "%d", &j) != 1) || (i < 0) || (i >= fsg->n_state) || (j < 0) || (j >= fsg->n_state)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", lineno); goto parse_error; } p = atof_c(wordptr[3]); if ((p <= 0.0) || (p > 1.0)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", lineno); goto parse_error; } } else { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } tprob = (int32) (logmath_log(lmath, p) * fsg->lw); /* Add word to "dictionary". */ if (n > 4) { if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { (void) hash_table_enter_int32(vocab, ckd_salloc(wordptr[4]), lastwid); wid = lastwid; ++lastwid; } fsg_model_trans_add(fsg, i, j, tprob, wid); ++n_trans; } else { if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { ++n_null_trans; nulls = glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); } } } E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); /* Now create a string table from the "dictionary" */ fsg->n_word = hash_table_inuse(vocab); fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) { char const *word = hash_entry_key(itor->ent); int32 wid = (int32) (long) hash_entry_val(itor->ent); fsg->vocab[wid] = (char *) word; } hash_table_free(vocab); /* Do transitive closure on null transitions */ nulls = fsg_model_null_trans_closure(fsg, nulls); glist_free(nulls); ckd_free(lineptr); ckd_free(wordptr); return fsg; parse_error: for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) ckd_free((char *) hash_entry_key(itor->ent)); glist_free(nulls); hash_table_free(vocab); ckd_free(fsgname); ckd_free(lineptr); ckd_free(wordptr); fsg_model_free(fsg); return NULL; }