void jsgf_grammar_free(jsgf_t *jsgf) { /* FIXME: Probably should just use refcounting instead. */ if (jsgf->parent == NULL) { hash_iter_t *itor; gnode_t *gn; for (itor = hash_table_iter(jsgf->rules); itor; itor = hash_table_iter_next(itor)) { ckd_free((char *)itor->ent->key); jsgf_rule_free((jsgf_rule_t *)itor->ent->val); } hash_table_free(jsgf->rules); for (itor = hash_table_iter(jsgf->imports); itor; itor = hash_table_iter_next(itor)) { ckd_free((char *)itor->ent->key); jsgf_grammar_free((jsgf_t *)itor->ent->val); } hash_table_free(jsgf->imports); for (gn = jsgf->searchpath; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(jsgf->searchpath); for (gn = jsgf->links; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(jsgf->links); } ckd_free(jsgf->name); ckd_free(jsgf->version); ckd_free(jsgf->charset); ckd_free(jsgf->locale); ckd_free(jsgf); }
static void vithist_lmstate_reset(vithist_t * vh) { gnode_t *lgn, *gn; int32 i; vh_lms2vh_t *lms2vh, *child; for (lgn = vh->lwidlist; lgn; lgn = gnode_next(lgn)) { i = (int32) gnode_int32(lgn); lms2vh = vh->lms2vh_root[i]; for (gn = lms2vh->children; gn; gn = gnode_next(gn)) { child = (vh_lms2vh_t *) gnode_ptr(gn); ckd_free((void *) child); } glist_free(lms2vh->children); ckd_free((void *) lms2vh); vh->lms2vh_root[i] = NULL; } glist_free(vh->lwidlist); vh->lwidlist = NULL; }
static fsg_model_t * jsgf_build_fsg_internal(jsgf_t * grammar, jsgf_rule_t * rule, logmath_t * lmath, float32 lw, int do_closure) { fsg_model_t *fsg; glist_t nulls; gnode_t *gn; int rule_entry, rule_exit; /* Clear previous links */ for (gn = grammar->links; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(grammar->links); grammar->links = NULL; grammar->nstate = 0; /* Create the top-level entry state, and expand the top-level rule. */ rule_entry = grammar->nstate++; rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE); /* If no exit-state was created, create one. */ if (rule_exit == NO_NODE) { rule_exit = grammar->nstate++; jsgf_add_link(grammar, NULL, rule_entry, rule_exit); } fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); fsg->start_state = rule_entry; fsg->final_state = rule_exit; grammar->links = glist_reverse(grammar->links); for (gn = grammar->links; gn; gn = gnode_next(gn)) { jsgf_link_t *link = gnode_ptr(gn); if (link->atom) { if (jsgf_atom_is_rule(link->atom)) { fsg_model_null_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight)); } else { int wid = fsg_model_word_add(fsg, link->atom->name); fsg_model_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight), wid); } } else { fsg_model_null_trans_add(fsg, link->from, link->to, 0); } } if (do_closure) { nulls = fsg_model_null_trans_closure(fsg, NULL); glist_free(nulls); } return fsg; }
void word_fsg_free(word_fsg_t * fsg) { int32 i, j; gnode_t *gn; word_fsglink_t *tl; for (i = 0; i < fsg->n_state; i++) { for (j = 0; j < fsg->n_state; j++) { /* Free all non-null transitions between states i and j */ for (gn = fsg->trans[i][j]; gn; gn = gnode_next(gn)) { tl = (word_fsglink_t *) gnode_ptr(gn); ckd_free((void *) tl); } glist_free(fsg->trans[i][j]); /* Free any null transition i->j */ ckd_free((void *) fsg->null_trans[i][j]); } } ctxt_table_free(fsg->ctxt); ckd_free_2d((void **) fsg->trans); ckd_free_2d((void **) fsg->null_trans); ckd_free((void *) fsg->name); if (fsg->lc) ckd_free_2d((void **) fsg->lc); if (fsg->rc) ckd_free_2d((void **) fsg->rc); ckd_free((void *) fsg); }
int cmd_ln_free_r(cmd_ln_t *cmdln) { if (cmdln == NULL) return 0; if (--cmdln->refcount > 0) return cmdln->refcount; if (cmdln->ht) { glist_t entries; gnode_t *gn; int32 n; entries = hash_table_tolist(cmdln->ht, &n); for (gn = entries; gn; gn = gnode_next(gn)) { hash_entry_t *e = gnode_ptr(gn); cmd_ln_val_free((cmd_ln_val_t *)e->val); } glist_free(entries); hash_table_free(cmdln->ht); cmdln->ht = NULL; } if (cmdln->f_argv) { int32 i; for (i = 0; i < cmdln->f_argc; ++i) { ckd_free(cmdln->f_argv[i]); } ckd_free(cmdln->f_argv); cmdln->f_argv = NULL; cmdln->f_argc = 0; } ckd_free(cmdln); return 0; }
static fsg_model_t * jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw, int do_closure) { fsg_model_t *fsg; glist_t nulls; gnode_t *gn; /* Clear previous links */ for (gn = grammar->links; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(grammar->links); grammar->links = NULL; rule->entry = rule->exit = 0; grammar->nstate = 0; expand_rule(grammar, rule); fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); fsg->start_state = rule->entry; fsg->final_state = rule->exit; grammar->links = glist_reverse(grammar->links); for (gn = grammar->links; gn; gn = gnode_next(gn)) { jsgf_link_t *link = gnode_ptr(gn); if (link->atom) { if (jsgf_atom_is_rule(link->atom)) { fsg_model_null_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight)); } else { int wid = fsg_model_word_add(fsg, link->atom->name); fsg_model_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight), wid); } } else { fsg_model_null_trans_add(fsg, link->from, link->to, 0); } } if (do_closure) { nulls = fsg_model_null_trans_closure(fsg, NULL); glist_free(nulls); } return fsg; }
void fsg_glist_linklist_free(fsg_glist_linklist_t *glist) { if (glist) { fsg_glist_linklist_t *nxtglist; if (glist->glist) glist_free(glist->glist); nxtglist = glist->next; while (nxtglist) { ckd_free(glist); glist = nxtglist; if (glist->glist) glist_free(glist->glist); nxtglist = glist->next; } ckd_free(glist); } return; }
static void phone_loop_search_free_renorm(phone_loop_search_t *pls) { gnode_t *gn; for (gn = pls->renorm; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(pls->renorm); pls->renorm = NULL; }
static void build_widmap(ngram_model_t * base, logmath_t * lmath, int32 n) { ngram_model_set_t *set = (ngram_model_set_t *) base; ngram_model_t **models = set->lms; hash_table_t *vocab; glist_t hlist; gnode_t *gn; int32 i; /* Construct a merged vocabulary and a set of word-ID mappings. */ vocab = hash_table_new(models[0]->n_words, FALSE); /* Create the set of merged words. */ for (i = 0; i < set->n_models; ++i) { int32 j; for (j = 0; j < models[i]->n_words; ++j) { /* Ignore collisions. */ (void) hash_table_enter_int32(vocab, models[i]->word_str[j], j); } } /* Create the array of words, then sort it. */ if (hash_table_lookup(vocab, "<UNK>", NULL) != 0) (void) hash_table_enter_int32(vocab, "<UNK>", 0); /* Now we know the number of unigrams, initialize the base model. */ ngram_model_init(base, &ngram_model_set_funcs, lmath, n, hash_table_inuse(vocab)); base->writable = FALSE; /* We will reuse the pointers from the submodels. */ i = 0; hlist = hash_table_tolist(vocab, NULL); for (gn = hlist; gn; gn = gnode_next(gn)) { hash_entry_t *ent = gnode_ptr(gn); base->word_str[i++] = (char *) ent->key; } glist_free(hlist); qsort(base->word_str, base->n_words, sizeof(*base->word_str), my_compare); /* Now create the word ID mappings. */ if (set->widmap) ckd_free_2d((void **) set->widmap); set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models, sizeof(**set->widmap)); for (i = 0; i < base->n_words; ++i) { int32 j; /* Also create the master wid mapping. */ (void) hash_table_enter_int32(base->wid, base->word_str[i], i); /* printf("%s: %d => ", base->word_str[i], i); */ for (j = 0; j < set->n_models; ++j) { set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]); /* printf("%d ", set->widmap[i][j]); */ } /* printf("\n"); */ } hash_table_free(vocab); }
void kws_detections_reset(kws_detections_t *detections) { gnode_t *gn; if (!detections->detect_list) return; for (gn = detections->detect_list; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(detections->detect_list); detections->detect_list = NULL; }
static void ps_free_searches(ps_decoder_t *ps) { gnode_t *gn; if (ps->searches == NULL) return; for (gn = ps->searches; gn; gn = gnode_next(gn)) ps_search_free(gnode_ptr(gn)); glist_free(ps->searches); ps->searches = NULL; ps->search = NULL; }
int32 ngram_model_add_class(ngram_model_t * model, const char *classname, float32 classweight, char **words, const float32 * weights, int32 n_words) { ngram_class_t *lmclass; glist_t classwords = NULL; int32 i, start_wid = -1; int32 classid, tag_wid; /* Check if classname already exists in model. If not, add it. */ if ((tag_wid = ngram_wid(model, classname)) == ngram_unknown_wid(model)) { tag_wid = ngram_model_add_word(model, classname, classweight); if (tag_wid == NGRAM_INVALID_WID) return -1; } if (model->n_classes == 128) { E_ERROR("Number of classes cannot exceed 128 (sorry)\n"); return -1; } classid = model->n_classes; for (i = 0; i < n_words; ++i) { int32 wid; wid = ngram_add_word_internal(model, words[i], classid); if (wid == NGRAM_INVALID_WID) return -1; if (start_wid == -1) start_wid = NGRAM_BASEWID(wid); classwords = glist_add_float32(classwords, weights[i]); } classwords = glist_reverse(classwords); lmclass = ngram_class_new(model, tag_wid, start_wid, classwords); glist_free(classwords); if (lmclass == NULL) return -1; ++model->n_classes; if (model->classes == NULL) model->classes = ckd_calloc(1, sizeof(*model->classes)); else model->classes = ckd_realloc(model->classes, model->n_classes * sizeof(*model->classes)); model->classes[classid] = lmclass; return classid; }
static void jsgf_rhs_free(jsgf_rhs_t *rhs) { gnode_t *gn; if (rhs == NULL) return; jsgf_rhs_free(rhs->alt); for (gn = rhs->atoms; gn; gn = gnode_next(gn)) jsgf_atom_free(gnode_ptr(gn)); glist_free(rhs->atoms); ckd_free(rhs); }
int32 live_get_partialhyp(int32 endutt) { int32 id, nwds; glist_t hyp; gnode_t *gn; hyp_t *h; dict_t *dict; dict = kbcore_dict (kb->kbcore); if (endutt) id = vithist_utt_end(kb->vithist, kb->kbcore); else id = vithist_partialutt_end(kb->vithist, kb->kbcore); if (id > 0) { hyp = vithist_backtrace(kb->vithist,id); for (gn = hyp,nwds=0; gn; gn = gnode_next(gn),nwds++) { h = (hyp_t *) gnode_ptr (gn); if (parthyp[nwds].word != NULL) { ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } parthyp[nwds].word = strdup(dict_wordstr(dict, h->id)); parthyp[nwds].sf = h->sf; parthyp[nwds].ef = h->ef; parthyp[nwds].ascr = h->ascr; parthyp[nwds].lscr = h->lscr; } if (parthyp[nwds].word != NULL){ ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } /* Free hyplist */ for (gn = hyp; gn && (gnode_next(gn)); gn = gnode_next(gn)) { h = (hyp_t *) gnode_ptr (gn); ckd_free ((void *) h); } glist_free (hyp); } else { nwds = 0; if (parthyp[nwds].word != NULL) { ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } } return(nwds); }
static void sseq_compress(mdef_t * m) { hash_table_t *h; s3senid_t **sseq; int32 n_sseq; int32 p, j, k; glist_t g; gnode_t *gn; hash_entry_t *he; k = m->n_emit_state * sizeof(s3senid_t); h = hash_table_new(m->n_phone, HASH_CASE_YES); n_sseq = 0; /* Identify unique senone-sequence IDs. BUG: tmat-id not being considered!! */ for (p = 0; p < m->n_phone; p++) { /* Add senone sequence to hash table */ if ((j = (long) hash_table_enter_bkey(h, (char *) (m->sseq[p]), k, (void *)(long)n_sseq)) == n_sseq) n_sseq++; m->phone[p].ssid = j; } /* Generate compacted sseq table */ sseq = (s3senid_t **) ckd_calloc_2d(n_sseq, m->n_emit_state, sizeof(s3senid_t)); /* freed in mdef_free() */ g = hash_table_tolist(h, &j); assert(j == n_sseq); for (gn = g; gn; gn = gnode_next(gn)) { he = (hash_entry_t *) gnode_ptr(gn); j = (int32)(long)hash_entry_val(he); memcpy(sseq[j], hash_entry_key(he), k); } glist_free(g); /* Free the old, temporary senone sequence table, replace with compacted one */ ckd_free_2d((void **) m->sseq); m->sseq = sseq; m->n_sseq = n_sseq; hash_table_free(h); }
static void trans_list_free(fsg_model_t * fsg, int32 i) { hash_iter_t *itor; /* FIXME (maybe): FSG links will all get freed when we call * listelem_alloc_free() so don't bother freeing them explicitly * here. */ if (fsg->trans[i].trans) { for (itor = hash_table_iter(fsg->trans[i].trans); itor; itor = hash_table_iter_next(itor)) { glist_t gl = (glist_t) hash_entry_val(itor->ent); glist_free(gl); } } hash_table_free(fsg->trans[i].trans); hash_table_free(fsg->trans[i].null_trans); }
/* * Transfer the surviving history entries for this frame into the permanent * history table. */ void fsg_history_end_frame (fsg_history_t *h) { int32 s, lc, ns, np; gnode_t *gn; fsg_hist_entry_t *entry; ns = word_fsg_n_state(h->fsg); np = phoneCiCount(); for (s = 0; s < ns; s++) { for (lc = 0; lc < np; lc++) { for (gn = h->frame_entries[s][lc]; gn; gn = gnode_next(gn)) { entry = (fsg_hist_entry_t *) gnode_ptr(gn); blkarray_list_append(h->entries, (void *) entry); } glist_free(h->frame_entries[s][lc]); h->frame_entries[s][lc] = NULL; } } }
int ps_free(ps_decoder_t *ps) { gnode_t *gn; if (ps == NULL) return 0; if (--ps->refcount > 0) return ps->refcount; for (gn = ps->searches; gn; gn = gnode_next(gn)) ps_search_free(gnode_ptr(gn)); glist_free(ps->searches); dict_free(ps->dict); dict2pid_free(ps->d2p); acmod_free(ps->acmod); logmath_free(ps->lmath); cmd_ln_free_r(ps->config); ckd_free(ps->uttid); ckd_free(ps); return 0; }
int32 ngram_model_read_classdef(ngram_model_t *model, const char *file_name) { hash_table_t *classes; glist_t hl = NULL; gnode_t *gn; int32 rv = -1; classes = hash_table_new(0, FALSE); if (read_classdef_file(classes, file_name) < 0) { hash_table_free(classes); return -1; } /* Create a new class in the language model for each classdef. */ hl = hash_table_tolist(classes, NULL); for (gn = hl; gn; gn = gnode_next(gn)) { hash_entry_t *he = gnode_ptr(gn); classdef_t *classdef = he->val; if (ngram_model_add_class(model, he->key, 1.0, classdef->words, classdef->weights, classdef->n_words) < 0) goto error_out; } rv = 0; error_out: for (gn = hl; gn; gn = gnode_next(gn)) { hash_entry_t *he = gnode_ptr(gn); ckd_free((char *)he->key); classdef_free(he->val); } glist_free(hl); hash_table_free(classes); return rv; }
void fsg_history_free(fsg_history_t *h) { int32 s, lc, ns, np; gnode_t *gn; if (h->fsg) { ns = fsg_model_n_state(h->fsg); np = h->n_ciphone; for (s = 0; s < ns; s++) { for (lc = 0; lc < np; lc++) { for (gn = h->frame_entries[s][lc]; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(h->frame_entries[s][lc]); } } } ckd_free_2d(h->frame_entries); blkarray_list_free(h->entries); ckd_free(h); }
fsg_model_t * fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) { fsg_model_t *fsg; hash_table_t *vocab; hash_iter_t *itor; int32 lastwid; char **wordptr; char *lineptr; char *fsgname; int32 lineno; int32 n, i, j; int n_state, n_trans, n_null_trans; glist_t nulls; float32 p; lineno = 0; vocab = hash_table_new(32, FALSE); wordptr = NULL; lineptr = NULL; nulls = NULL; fsgname = NULL; fsg = NULL; /* Scan upto FSG_BEGIN header */ for (;;) { n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n < 0) { E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { if (n > 2) { E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", lineno); goto parse_error; } break; } } /* Save FSG name, or it will get clobbered below :(. * If name is missing, try the default. */ if (n == 2) { fsgname = ckd_salloc(wordptr[1]); } else { E_WARN("FSG name is missing\n"); fsgname = ckd_salloc("unknown"); } /* Read #states */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) || (sscanf(wordptr[1], "%d", &n_state) != 1) || (n_state <= 0)) { E_ERROR ("Line[%d]: #states declaration line missing or malformed\n", lineno); goto parse_error; } /* Now create the FSG. */ fsg = fsg_model_init(fsgname, lmath, lw, n_state); ckd_free(fsgname); fsgname = NULL; /* Read start state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) || (fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) { E_ERROR ("Line[%d]: start state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read final state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) || (fsg->final_state < 0) || (fsg->final_state >= fsg->n_state)) { E_ERROR ("Line[%d]: final state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read transitions */ lastwid = 0; n_trans = n_null_trans = 0; for (;;) { int32 wid, tprob; n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n <= 0) { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { break; } if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { if (((n != 4) && (n != 5)) || (sscanf(wordptr[1], "%d", &i) != 1) || (sscanf(wordptr[2], "%d", &j) != 1) || (i < 0) || (i >= fsg->n_state) || (j < 0) || (j >= fsg->n_state)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", lineno); goto parse_error; } p = atof_c(wordptr[3]); if ((p <= 0.0) || (p > 1.0)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", lineno); goto parse_error; } } else { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } tprob = (int32) (logmath_log(lmath, p) * fsg->lw); /* Add word to "dictionary". */ if (n > 4) { if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { (void) hash_table_enter_int32(vocab, ckd_salloc(wordptr[4]), lastwid); wid = lastwid; ++lastwid; } fsg_model_trans_add(fsg, i, j, tprob, wid); ++n_trans; } else { if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { ++n_null_trans; nulls = glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); } } } E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); /* Now create a string table from the "dictionary" */ fsg->n_word = hash_table_inuse(vocab); fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) { char const *word = hash_entry_key(itor->ent); int32 wid = (int32) (long) hash_entry_val(itor->ent); fsg->vocab[wid] = (char *) word; } hash_table_free(vocab); /* Do transitive closure on null transitions */ nulls = fsg_model_null_trans_closure(fsg, nulls); glist_free(nulls); ckd_free(lineptr); ckd_free(wordptr); return fsg; parse_error: for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) ckd_free((char *) hash_entry_key(itor->ent)); glist_free(nulls); hash_table_free(vocab); ckd_free(fsgname); ckd_free(lineptr); ckd_free(wordptr); fsg_model_free(fsg); return NULL; }
char * dict_g2p(char const *word_grapheme, ngram_model_t *ngram_g2p_model) { char *final_phone = NULL; int totalh = 0; size_t increment = 1; int word_offset = 0; int j; size_t grapheme_len = 0, final_phoneme_len = 0; glist_t history_list = NULL; gnode_t *gn; int first = 0; const int32 *total_unigrams; struct winner_t winner; const char *word; unigram_t unigram; total_unigrams = ngram_model_get_counts(ngram_g2p_model); int32 wid_sentence = ngram_wid(ngram_g2p_model,"<s>"); // start with sentence history_list = glist_add_int32(history_list, wid_sentence); grapheme_len = strlen(word_grapheme); for (j = 0; j < grapheme_len; j += increment) { winner = dict_get_winner_wid(ngram_g2p_model, word_grapheme, history_list, *total_unigrams, word_offset); increment = winner.length_match; if (increment == 0) { E_ERROR("Error trying to find matching phoneme (%s) Exiting.. \n" , word_grapheme); return NULL; } history_list = glist_add_int32(history_list, winner.winner_wid); totalh = j + 1; word_offset += winner.length_match; final_phoneme_len += winner.len_phoneme; } history_list = glist_reverse(history_list); final_phone = ckd_calloc(1, final_phoneme_len * 2); for (gn = history_list; gn; gn = gnode_next(gn)) { if (!first) { first = 1; continue; } word = ngram_word(ngram_g2p_model, gnode_int32(gn)); if (!word) continue; unigram = dict_split_unigram(word); if (strcmp(unigram.phone, "_") == 0) { if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); continue; } strcat(final_phone, unigram.phone); strcat(final_phone, " "); if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); } if (history_list) glist_free(history_list); return final_phone; }
int s3_decode_record_hyps(s3_decode_t * _decode, int _end_utt) { int32 i = 0; glist_t hyp_list; gnode_t *node; srch_hyp_t *hyp; char *hyp_strptr = 0; char *hyp_str = 0; srch_t *srch; srch_hyp_t **hyp_segs = 0; int hyp_seglen = 0; int hyp_strlen = 0; int finish_wid = 0; kb_t *kb = 0; dict_t *dict; int rv; if (_decode == NULL) return S3_DECODE_ERROR_NULL_POINTER; s3_decode_free_hyps(_decode); kb = &_decode->kb; dict = kbcore_dict(_decode->kbcore); srch = (srch_t *) _decode->kb.srch; hyp_list = srch_get_hyp(srch); if (hyp_list == NULL) { E_WARN("Failed to retrieve viterbi history.\n"); return S3_DECODE_ERROR_INTERNAL; } /** record the segment length and the overall string length */ finish_wid = dict_finishwid(dict); for (node = hyp_list; node != NULL; node = gnode_next(node)) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_seglen++; if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } if (hyp_strlen == 0) { hyp_strlen = 1; } /** allocate array to hold the segments and/or decoded string */ hyp_str = (char *) ckd_calloc(hyp_strlen, sizeof(char)); hyp_segs = (srch_hyp_t **) ckd_calloc(hyp_seglen + 1, sizeof(srch_hyp_t *)); if (hyp_segs == NULL || hyp_str == NULL) { E_WARN("Failed to allocate storage for hypothesis.\n"); rv = S3_DECODE_ERROR_OUT_OF_MEMORY; goto s3_decode_record_hyps_cleanup; } /** iterate thru to fill in the array of segments and/or decoded string */ i = 0; hyp_strptr = hyp_str; for (node = hyp_list; node != NULL; node = gnode_next(node), i++) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_segs[i] = hyp; hyp->word = dict_wordstr(dict, dict_basewid(dict, hyp->id)); if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); *hyp_strptr = ' '; hyp_strptr += 1; } } glist_free(hyp_list); hyp_str[hyp_strlen - 1] = '\0'; hyp_segs[hyp_seglen] = 0; _decode->hyp_frame_num = _decode->num_frames_decoded; _decode->hyp_segs = hyp_segs; _decode->hyp_str = hyp_str; return S3_DECODE_SUCCESS; s3_decode_record_hyps_cleanup: if (hyp_segs != NULL) { ckd_free(hyp_segs); } if (hyp_str != NULL) { ckd_free(hyp_str); } if (hyp_list != NULL) { for (node = hyp_list; node != NULL; node = gnode_next(node)) { if ((hyp = (srch_hyp_t *) gnode_ptr(node)) != NULL) { ckd_free(hyp); } } glist_free(hyp_list); } return rv; }
int32 read_classdef_file(hash_table_t * classes, const char *file_name) { FILE *fp; int32 is_pipe; int inclass; /**< Are we currently reading a list of class words? */ int32 rv = -1; gnode_t *gn; glist_t classwords = NULL; glist_t classprobs = NULL; char *classname = NULL; if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { E_ERROR("File %s not found\n", file_name); return -1; } inclass = FALSE; while (!feof(fp)) { char line[512]; char *wptr[2]; int n_words; if (fgets(line, sizeof(line), fp) == NULL) break; n_words = str2words(line, wptr, 2); if (n_words <= 0) continue; if (inclass) { /* Look for an end of class marker. */ if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { classdef_t *classdef; gnode_t *word, *weight; int32 i; if (classname == NULL || 0 != strcmp(wptr[1], classname)) goto error_out; inclass = FALSE; /* Construct a class from the list of words collected. */ classdef = ckd_calloc(1, sizeof(*classdef)); classwords = glist_reverse(classwords); classprobs = glist_reverse(classprobs); classdef->n_words = glist_count(classwords); classdef->words = ckd_calloc(classdef->n_words, sizeof(*classdef->words)); classdef->weights = ckd_calloc(classdef->n_words, sizeof(*classdef->weights)); word = classwords; weight = classprobs; for (i = 0; i < classdef->n_words; ++i) { classdef->words[i] = gnode_ptr(word); classdef->weights[i] = gnode_float32(weight); word = gnode_next(word); weight = gnode_next(weight); } /* Add this class to the hash table. */ if (hash_table_enter(classes, classname, classdef) != classdef) { classdef_free(classdef); goto error_out; } /* Reset everything. */ glist_free(classwords); glist_free(classprobs); classwords = NULL; classprobs = NULL; classname = NULL; } else { float32 fprob; if (n_words == 2) fprob = atof_c(wptr[1]); else fprob = 1.0f; /* Add it to the list of words for this class. */ classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0])); classprobs = glist_add_float32(classprobs, fprob); } } else { /* Start a new LM class if the LMCLASS marker is seen */ if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { if (inclass) goto error_out; inclass = TRUE; classname = ckd_salloc(wptr[1]); } /* Otherwise, just ignore whatever junk we got */ } } rv = 0; /* Success. */ error_out: /* Free all the stuff we might have allocated. */ fclose_comp(fp, is_pipe); for (gn = classwords; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(classwords); glist_free(classprobs); ckd_free(classname); return rv; }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } } glist_free(lc_pnodelist); } else { /* Filler word; no context modelled */ ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */ pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; fsg_pnode_add_all_ctxt(&(pnode->ctxt)); pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); } } else { /* Multi-phone word */ fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */ ssid_pnode_map = (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *)); lc_pnodelist = NULL; rc_pnodelist = NULL; for (p = 0; p < pronlen; p++) { int ci = dict_pron(lextree->dict, dictwid, p); if (p == 0) { /* Root phone, handle required left contexts */ /* Find if we already have an lc_pnodelist for the first phone of this word */ fsg_glist_linklist_t *predglist=*curglist; fsg_glist_linklist_t *glist=*curglist; rc = dict_pron(lextree->dict, dictwid, 1); while (glist && glist->glist && glist->ci != ci && glist->rc != rc){ glist = glist->next; } if (glist && glist->ci == ci && glist->rc == rc && glist->glist) { /* We've found a valid glist. Hook to it and move to next phoneme */ lc_pnodelist = glist->glist; /* Set the predecessor node for the future tree first */ pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist); continue; } else { /* Two cases that can bring us here * a. glist == NULL, i.e. end of current list. Create new entry. * b. glist->glist == NULL, i.e. first entry into list. */ if (!glist) { /* Case a; reduce it to case b by allocing glist */ glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t)); glist->next = predglist; *curglist = glist; } glist->ci = ci; glist->rc = rc; glist->lc = -1; lc_pnodelist = glist->glist = NULL; /* Gets created below */ } for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc); /* Compression is not done by d2p, so we do it * here. This might be slow, but it might not * be... we'll see. */ pnode = ssid_pnode_map[0]; for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) { pnode = ssid_pnode_map[j]; if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) break; } assert(j < n_ci); if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* This bit is tricky! For now we'll put the prob in the final link only */ /* pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; */ pnode->logs2prob = lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = FALSE; pnode->sibling = root; /* All root nodes linked together */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } fsg_pnode_add_ctxt(pnode, lc); } /* Put the lc_pnodelist back into glist */ glist->glist = lc_pnodelist; /* The predecessor node for the future tree is the root */ pred = root; } else if (p != pronlen - 1) { /* Word internal phone */ fsg_pnode_t *pnodeyoungest; ssid = dict2pid_internal(lextree->d2p, dictwid, p); /* First check if we already have this ssid in our tree */ pnode = pred->next.succ; pnodeyoungest = pnode; /* The youngest sibling */ while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) { pnode = pnode->sibling; } if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) { /* Found the ssid; go to next phoneme */ pred = pnode; continue; } /* pnode not found, allocate it */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->logs2prob = lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = FALSE; pnode->sibling = pnodeyoungest; /* May be NULL */ if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); pred->next.succ = pnode; } } else { /* Predecessor = word internal node */ pred->next.succ = pnode; } pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); pred = pnode; } else { /* Leaf phone, handle required right contexts */ /* Note, leaf phones are not part of the tree */ xwdssid_t *rssid; memset((void *) ssid_pnode_map, 0, n_ci * sizeof(fsg_pnode_t *)); lc = dict_pron(lextree->dict, dictwid, p-1); rssid = dict2pid_rssid(lextree->d2p, ci, lc); for (i = 0; rclist[i] >= 0; i++) { rc = rclist[i]; j = rssid->cimap[rc]; ssid = rssid->ssid[j]; pnode = ssid_pnode_map[j]; if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* We are plugging the word prob here. Ugly */ /* pnode->logs2prob = lextree->pip; */ pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = TRUE; pnode->sibling = rc_pnodelist ? (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL; pnode->next.fsglink = fsglink; pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); rc_pnodelist = glist_add_ptr(rc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } else { assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid); } fsg_pnode_add_ctxt(pnode, rc); } if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist); /* Since all entries of lc_pnodelist point to the same array, sufficient to update it once */ break; } } } else { /* Predecessor = word internal node */ if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); } } } } ckd_free((void *) ssid_pnode_map); /* glist_free(lc_pnodelist); Nope; this gets freed outside */ glist_free(rc_pnodelist); } *alloc_head = head; return root; }
int ld_utt_hyps(live_decoder_t *decoder, char **hyp_str, hyp_t ***hyp_segs) { int32 id; int32 i = 0; glist_t hyp_list; gnode_t *node; hyp_t *hyp; dict_t *dict; char *hyp_strptr; kb_t *kb = &decoder->kb; if (decoder->ld_state == LD_STATE_ENDED) { if (hyp_segs) { *hyp_segs = kb->hyp_segs; } if (hyp_str) { *hyp_str = kb->hyp_str; } return 0; } else { kb_freehyps(kb); } dict = kbcore_dict (decoder->kbcore); id = vithist_partialutt_end(kb->vithist, decoder->kbcore); if (id >= 0) { hyp_list = vithist_backtrace(kb->vithist, id); /* record the segment length and the overall string length */ for (node = hyp_list; node; node = gnode_next(node)) { hyp = (hyp_t *)gnode_ptr(node); if (hyp_segs) { kb->hyp_seglen++; } if (hyp_str) { if (!dict_filler_word(dict, hyp->id) && hyp->id != dict_finishwid(dict)) { kb->hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } } /* allocate array to hold the segments and/or decoded string */ if (hyp_segs) { kb->hyp_segs = (hyp_t **)ckd_calloc(kb->hyp_seglen, sizeof(hyp_t *)); } if (hyp_str) { kb->hyp_str = (char *)ckd_calloc(kb->hyp_strlen+1, sizeof(char)); } /* iterate thru to fill in the array of segments and/or decoded string */ i = 0; if (hyp_str) { hyp_strptr = kb->hyp_str; } for (node = hyp_list; node; node = gnode_next(node), i++) { hyp = (hyp_t *)gnode_ptr(node); if (hyp_segs) { kb->hyp_segs[i] = hyp; } if (hyp_str) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); strcat(hyp_strptr, " "); hyp_strptr += 1; } } glist_free(hyp_list); if (hyp_str) { kb->hyp_str[kb->hyp_strlen - 1] = '\0'; } } if (hyp_segs) { *hyp_segs = kb->hyp_segs; } if (hyp_str) { *hyp_str = kb->hyp_str; } return 0; }
word_fsg_t * word_fsg_load(s2_fsg_t * fsg, int use_altpron, int use_filler, kbcore_t *kbc) { float32 silprob = kbc->fillpen->silprob; float32 fillprob = kbc->fillpen->fillerprob; float32 lw = kbc->fillpen->lw; word_fsg_t *word_fsg; s2_fsg_trans_t *trans; int32 n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk; int32 wid; int32 logp; glist_t nulls; int32 i, j; assert(fsg); /* Some error checking */ if (lw <= 0.0) E_WARN("Unusual language-weight value: %.3e\n", lw); if (use_filler && ((silprob < 0.0) || (fillprob < 0.0))) { E_ERROR("silprob/fillprob must be >= 0\n"); return NULL; } if ((fsg->n_state <= 0) || ((fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) || ((fsg->final_state < 0) || (fsg->final_state >= fsg->n_state))) { E_ERROR("Bad #states/start_state/final_state values: %d/%d/%d\n", fsg->n_state, fsg->start_state, fsg->final_state); return NULL; } for (trans = fsg->trans_list; trans; trans = trans->next) { if ((trans->from_state < 0) || (trans->from_state >= fsg->n_state) || (trans->to_state < 0) || (trans->to_state >= fsg->n_state) || (trans->prob <= 0) || (trans->prob > 1.0)) { E_ERROR("Bad transition: P(%d -> %d) = %e\n", trans->from_state, trans->to_state, trans->prob); return NULL; } } word_fsg = (word_fsg_t *) ckd_calloc(1, sizeof(word_fsg_t)); word_fsg->name = ckd_salloc(fsg->name ? fsg->name : ""); word_fsg->n_state = fsg->n_state; word_fsg->start_state = fsg->start_state; word_fsg->final_state = fsg->final_state; word_fsg->use_altpron = use_altpron; word_fsg->use_filler = use_filler; word_fsg->lw = lw; word_fsg->lc = NULL; word_fsg->rc = NULL; word_fsg->dict = kbc->dict; word_fsg->mdef = kbc->mdef; word_fsg->tmat = kbc->tmat; word_fsg->n_ciphone = mdef_n_ciphone(kbc->mdef); /* Allocate non-epsilon transition matrix array */ word_fsg->trans = (glist_t **) ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state, sizeof(glist_t)); /* Allocate epsilon transition matrix array */ word_fsg->null_trans = (word_fsglink_t ***) ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state, sizeof(word_fsglink_t *)); /* Process transitions */ n_null_trans = 0; n_alt_trans = 0; n_filler_trans = 0; n_unk = 0; nulls = NULL; for (trans = fsg->trans_list, n_trans = 0; trans; trans = trans->next, n_trans++) { /* Convert prob to logs2prob and apply language weight */ logp = (int32) (logs3(kbcore_logmath(kbc), trans->prob) * lw); /* Check if word is in dictionary */ if (trans->word) { wid = dict_wordid(kbc->dict, trans->word); if (wid < 0) { E_ERROR("Unknown word '%s'; ignored\n", trans->word); n_unk++; } else if (use_altpron) { wid = dict_basewid(kbc->dict, wid); assert(wid >= 0); } } else wid = -1; /* Null transition */ /* Add transition to word_fsg structure */ i = trans->from_state; j = trans->to_state; if (wid < 0) { if (word_fsg_null_trans_add(word_fsg, i, j, logp) == 1) { n_null_trans++; nulls = glist_add_ptr(nulls, (void *) word_fsg->null_trans[i][j]); } } else { word_fsg_trans_add(word_fsg, i, j, logp, wid); /* Add transitions for alternative pronunciations, if any */ if (use_altpron) { for (wid = dict_nextalt(kbc->dict, wid); wid >= 0; wid = dict_nextalt(kbc->dict, wid)) { word_fsg_trans_add(word_fsg, i, j, logp, wid); n_alt_trans++; n_trans++; } } } } /* Add silence and noise filler word transitions if specified */ if (use_filler) { n_filler_trans = word_fsg_add_filler(word_fsg, silprob, fillprob, kbcore_logmath(kbc)); n_trans += n_filler_trans; } E_INFO ("FSG: %d states, %d transitions (%d null, %d alt, %d filler, %d unknown)\n", word_fsg->n_state, n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk); #if __FSG_DBG__ E_INFO("FSG before NULL closure:\n"); word_fsg_write(word_fsg, stdout); #endif /* Null transitions closure */ nulls = word_fsg_null_trans_closure(word_fsg, nulls); glist_free(nulls); #if __FSG_DBG__ E_INFO("FSG after NULL closure:\n"); word_fsg_write(word_fsg, stdout); #endif /* Compute left and right context CIphone lists for each state */ word_fsg_lc_rc(word_fsg); #if __FSG_DBG__ E_INFO("FSG after lc/rc:\n"); word_fsg_write(word_fsg, stdout); #endif return word_fsg; }
static void prune_states(s2_fsg_t *_fsg) { s2_fsg_trans_t **edges; s2_fsg_trans_t *trans, *prev; glist_t *adj; int *states; int i, j, count; assert(_fsg != NULL); count = 0; for (trans = _fsg->trans_list; trans; trans = trans->next, count++); states = (int *)ckd_calloc(_fsg->n_state, sizeof(int)); edges = (s2_fsg_trans_t **)ckd_calloc(_fsg->n_state, sizeof(s2_fsg_trans_t *)); /* Check and remove passable states (states with only one out-going epsilon * edge). The array states[i] keeps track of out-degree and the array * edges[i] keeps track of the last out-going edge from state i. */ for (i = _fsg->n_state - 1; i >= 0; i--) { edges[i] = NULL; states[i] = 0; } for (trans = _fsg->trans_list; trans != NULL; trans = trans->next) { edges[trans->from_state] = trans; states[trans->from_state]++; } count = 0; for (i = _fsg->n_state - 1; i >= 0; i--) { if (states[i] == 1 && edges[i]->word == NULL) { j = i; while (states[j] == 1 && edges[j]->word == NULL) j = edges[j]->to_state; states[i] = j; count++; } else states[i] = -1; } trans = _fsg->trans_list; prev = NULL; while (trans) { if (states[trans->from_state] != -1) { if (prev == NULL) { trans = _fsg->trans_list; _fsg->trans_list = trans->next; ckd_free(trans->word); ckd_free(trans); trans = _fsg->trans_list; } else { prev->next = trans->next; ckd_free(trans->word); ckd_free(trans); trans = prev->next; } } else { if (states[trans->to_state] != -1) trans->to_state = states[trans->to_state]; prev = trans; trans = trans->next; } } /* Check and remove dead states (states that cannot reach the final state */ adj = (glist_t *)ckd_calloc(_fsg->n_state, sizeof(glist_t)); for (i = _fsg->n_state - 1; i >= 0; i--) { adj[i] = NULL; states[i] = -1; } for (trans = _fsg->trans_list; trans; trans = trans->next) adj[trans->to_state] = glist_add_ptr(adj[trans->to_state], trans); mark_dead_state(_fsg, _fsg->final_state, states, adj); count = 0; for (i = 0; i < _fsg->n_state; i++) { glist_free(adj[i]); if (states[i] != -1) states[i] = count++; } _fsg->n_state = count; trans = _fsg->trans_list; prev = NULL; while (trans) { if (states[trans->from_state] == -1 || states[trans->to_state] == -1) { if (prev == NULL) { trans = _fsg->trans_list; _fsg->trans_list = trans->next; ckd_free(trans->word); ckd_free(trans); trans = _fsg->trans_list; } else { prev->next = trans->next; ckd_free(trans->word); ckd_free(trans); trans = prev->next; } } else { trans->from_state = states[trans->from_state]; trans->to_state = states[trans->to_state]; prev = trans; trans = trans->next; } } ckd_free(states); ckd_free(edges); }
int32 live_get_partialhyp(int32 endutt) { int32 id, nwds; glist_t hyp; gnode_t *gn; hyp_t *h; dict_t *dict; dict = kbcore_dict (kb->kbcore); if (endutt) id = vithist_utt_end(kb->vithist, kb->kbcore); else id = vithist_partialutt_end(kb->vithist, kb->kbcore); if (id > 0) { hyp = vithist_backtrace(kb->vithist,id); for (gn = hyp,nwds=0; gn; gn = gnode_next(gn),nwds++) { h = (hyp_t *) gnode_ptr (gn); if (parthyp[nwds].word != NULL) { ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } /* 20040905 L Galescu <*****@*****.**> * Report noise? If not, replace with silence word. */ if ((cmd_ln_int32("-reportfill") == 0) && dict_filler_word(dict, h->id)) parthyp[nwds].word = strdup(dict_wordstr(dict, dict->silwid)); else parthyp[nwds].word = strdup(dict_wordstr(dict, h->id)); /* 20040901 L Galescu <*****@*****.**> * Choice to report alternative pronunciations or not. */ if (cmd_ln_int32("-reportpron") == 0) dict_word2basestr(parthyp[nwds].word); parthyp[nwds].sf = h->sf; parthyp[nwds].ef = h->ef; parthyp[nwds].ascr = h->ascr; parthyp[nwds].lscr = h->lscr; } if (parthyp[nwds].word != NULL){ ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } parthyplen = nwds; /* Free hyplist */ for (gn = hyp; gn && (gnode_next(gn)); gn = gnode_next(gn)) { h = (hyp_t *) gnode_ptr (gn); ckd_free ((void *) h); } glist_free (hyp); } else if (id == 0) { nwds = 0; if (parthyp[nwds].word != NULL) { ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } parthyplen = nwds; } else { /* lgalescu 2004/10/13: nothing changed; we return the same hyp that was there before */ } return(parthyplen); }
ngram_model_t * ngram_model_set_read(cmd_ln_t * config, const char *lmctlfile, logmath_t * lmath) { FILE *ctlfp; glist_t lms = NULL; glist_t lmnames = NULL; __BIGSTACKVARIABLE__ char str[1024]; ngram_model_t *set = NULL; hash_table_t *classes; char *basedir, *c; /* Read all the class definition files to accumulate a mapping of * classnames to definitions. */ classes = hash_table_new(0, FALSE); if ((ctlfp = fopen(lmctlfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open %s", lmctlfile); return NULL; } /* Try to find the base directory to append to relative paths in * the lmctl file. */ if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) { /* Include the trailing slash. */ basedir = ckd_calloc(c - lmctlfile + 2, 1); memcpy(basedir, lmctlfile, c - lmctlfile + 1); } else { basedir = NULL; } E_INFO("Reading LM control file '%s'\n", lmctlfile); if (basedir) E_INFO("Will prepend '%s' to unqualified paths\n", basedir); if (fscanf(ctlfp, "%1023s", str) == 1) { if (strcmp(str, "{") == 0) { /* Load LMclass files */ while ((fscanf(ctlfp, "%1023s", str) == 1) && (strcmp(str, "}") != 0)) { char *deffile; if (basedir && !path_is_absolute(str)) deffile = string_join(basedir, str, NULL); else deffile = ckd_salloc(str); E_INFO("Reading classdef from '%s'\n", deffile); if (read_classdef_file(classes, deffile) < 0) { ckd_free(deffile); goto error_out; } ckd_free(deffile); } if (strcmp(str, "}") != 0) { E_ERROR("Unexpected EOF in %s\n", lmctlfile); goto error_out; } /* This might be the first LM name. */ if (fscanf(ctlfp, "%1023s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; /* Read in one LM at a time and add classes to them as necessary. */ while (str[0] != '\0') { char *lmfile; ngram_model_t *lm; if (basedir && str[0] != '/' && str[0] != '\\') lmfile = string_join(basedir, str, NULL); else lmfile = ckd_salloc(str); E_INFO("Reading lm from '%s'\n", lmfile); lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath); if (lm == NULL) { ckd_free(lmfile); goto error_out; } if (fscanf(ctlfp, "%1023s", str) != 1) { E_ERROR("LMname missing after LMFileName '%s'\n", lmfile); ckd_free(lmfile); goto error_out; } ckd_free(lmfile); lms = glist_add_ptr(lms, lm); lmnames = glist_add_ptr(lmnames, ckd_salloc(str)); if (fscanf(ctlfp, "%1023s", str) == 1) { if (strcmp(str, "{") == 0) { /* LM uses classes; read their names */ while ((fscanf(ctlfp, "%1023s", str) == 1) && (strcmp(str, "}") != 0)) { void *val; classdef_t *classdef; if (hash_table_lookup(classes, str, &val) == -1) { E_ERROR("Unknown class %s in control file\n", str); goto error_out; } classdef = val; if (ngram_model_add_class(lm, str, 1.0, classdef->words, classdef->weights, classdef->n_words) < 0) { goto error_out; } E_INFO("Added class %s containing %d words\n", str, classdef->n_words); } if (strcmp(str, "}") != 0) { E_ERROR("Unexpected EOF in %s\n", lmctlfile); goto error_out; } if (fscanf(ctlfp, "%1023s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; } fclose(ctlfp); /* Now construct arrays out of lms and lmnames, and build an * ngram_model_set. */ lms = glist_reverse(lms); lmnames = glist_reverse(lmnames); { int32 n_models; ngram_model_t **lm_array; char **name_array; gnode_t *lm_node, *name_node; int32 i; n_models = glist_count(lms); lm_array = ckd_calloc(n_models, sizeof(*lm_array)); name_array = ckd_calloc(n_models, sizeof(*name_array)); lm_node = lms; name_node = lmnames; for (i = 0; i < n_models; ++i) { lm_array[i] = gnode_ptr(lm_node); name_array[i] = gnode_ptr(name_node); lm_node = gnode_next(lm_node); name_node = gnode_next(name_node); } set = ngram_model_set_init(config, lm_array, name_array, NULL, n_models); for (i = 0; i < n_models; ++i) { ngram_model_free(lm_array[i]); } ckd_free(lm_array); ckd_free(name_array); } error_out: { gnode_t *gn; glist_t hlist; if (set == NULL) { for (gn = lms; gn; gn = gnode_next(gn)) { ngram_model_free(gnode_ptr(gn)); } } glist_free(lms); for (gn = lmnames; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(lmnames); hlist = hash_table_tolist(classes, NULL); for (gn = hlist; gn; gn = gnode_next(gn)) { hash_entry_t *he = gnode_ptr(gn); ckd_free((char *) he->key); classdef_free(he->val); } glist_free(hlist); hash_table_free(classes); ckd_free(basedir); } return set; }