glist_t srch_FLAT_FWD_gen_hyp(void *srch /**< a pointer of srch_t */ ) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; srch_hyp_t *tmph, *hyp; glist_t ghyp, rhyp; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; if (s->exit_id == -1) s->exit_id = lat_final_entry(fwg->lathist, kbcore_dict(s->kbc), fwg->n_frm, s->uttid); if (NOT_S3LATID(s->exit_id)) { E_INFO("lattice ID: %d\n", s->exit_id); E_ERROR("%s: NO RECOGNITION\n", s->uttid); return NULL; } else { /* BAD_S3WID => Any right context */ lattice_backtrace(fwg->lathist, s->exit_id, BAD_S3WID, &hyp, s->kbc->lmset->cur_lm, kbcore_dict(s->kbc), fwg->ctxt, s->kbc->fillpen); ghyp = NULL; for (tmph = hyp; tmph; tmph = tmph->next) { ghyp = glist_add_ptr(ghyp, (void *) tmph); } rhyp = glist_reverse(ghyp); return rhyp; } }
static void jsgf_set_search_path(jsgf_t * jsgf, const char *filename) { char *jsgf_path; #if !defined(_WIN32_WCE) if ((jsgf_path = getenv("JSGF_PATH")) != NULL) { char *word, *c; /* FIXME: This should be a function in libsphinxbase. */ word = jsgf_path = ckd_salloc(jsgf_path); while ((c = strchr(word, ':'))) { *c = '\0'; jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); word = c + 1; } jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); jsgf->searchpath = glist_reverse(jsgf->searchpath); return; } #endif if (!filename) { jsgf->searchpath = glist_add_ptr(jsgf->searchpath, ckd_salloc(".")); return; } jsgf_path = ckd_salloc(filename); path2dirname(filename, jsgf_path); jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path); }
static fsg_model_t * jsgf_build_fsg_internal(jsgf_t * grammar, jsgf_rule_t * rule, logmath_t * lmath, float32 lw, int do_closure) { fsg_model_t *fsg; glist_t nulls; gnode_t *gn; int rule_entry, rule_exit; /* Clear previous links */ for (gn = grammar->links; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(grammar->links); grammar->links = NULL; grammar->nstate = 0; /* Create the top-level entry state, and expand the top-level rule. */ rule_entry = grammar->nstate++; rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE); /* If no exit-state was created, create one. */ if (rule_exit == NO_NODE) { rule_exit = grammar->nstate++; jsgf_add_link(grammar, NULL, rule_entry, rule_exit); } fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); fsg->start_state = rule_entry; fsg->final_state = rule_exit; grammar->links = glist_reverse(grammar->links); for (gn = grammar->links; gn; gn = gnode_next(gn)) { jsgf_link_t *link = gnode_ptr(gn); if (link->atom) { if (jsgf_atom_is_rule(link->atom)) { fsg_model_null_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight)); } else { int wid = fsg_model_word_add(fsg, link->atom->name); fsg_model_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight), wid); } } else { fsg_model_null_trans_add(fsg, link->from, link->to, 0); } } if (do_closure) { nulls = fsg_model_null_trans_closure(fsg, NULL); glist_free(nulls); } return fsg; }
glist_t srch_FLAT_FWD_bestpath_impl(void *srch, /**< A void pointer to a search structure */ dag_t * dag) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; float32 bestpathlw; float64 lwf; srch_hyp_t *tmph, *bph; glist_t ghyp, rhyp; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw"); lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0; flat_fwd_dag_add_fudge_edges(fwg, dag, cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"), cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"), (void *) fwg->lathist, s->kbc->dict); /* Bypass filler nodes */ if (!dag->filler_removed) { /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */ if (dict_filler_word(s->kbc->dict, dag->end->wid)) dag->end->wid = s->kbc->dict->finishwid; if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0) E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); else dag->filler_removed = 1; } bph = dag_search(dag, s->uttid, lwf, dag->end, s->kbc->dict, s->kbc->lmset->cur_lm, s->kbc->fillpen); if (bph != NULL) { ghyp = NULL; for (tmph = bph; tmph; tmph = tmph->next) ghyp = glist_add_ptr(ghyp, (void *) tmph); rhyp = glist_reverse(ghyp); return rhyp; } else { return NULL; } }
int32 ngram_model_add_class(ngram_model_t * model, const char *classname, float32 classweight, char **words, const float32 * weights, int32 n_words) { ngram_class_t *lmclass; glist_t classwords = NULL; int32 i, start_wid = -1; int32 classid, tag_wid; /* Check if classname already exists in model. If not, add it. */ if ((tag_wid = ngram_wid(model, classname)) == ngram_unknown_wid(model)) { tag_wid = ngram_model_add_word(model, classname, classweight); if (tag_wid == NGRAM_INVALID_WID) return -1; } if (model->n_classes == 128) { E_ERROR("Number of classes cannot exceed 128 (sorry)\n"); return -1; } classid = model->n_classes; for (i = 0; i < n_words; ++i) { int32 wid; wid = ngram_add_word_internal(model, words[i], classid); if (wid == NGRAM_INVALID_WID) return -1; if (start_wid == -1) start_wid = NGRAM_BASEWID(wid); classwords = glist_add_float32(classwords, weights[i]); } classwords = glist_reverse(classwords); lmclass = ngram_class_new(model, tag_wid, start_wid, classwords); glist_free(classwords); if (lmclass == NULL) return -1; ++model->n_classes; if (model->classes == NULL) model->classes = ckd_calloc(1, sizeof(*model->classes)); else model->classes = ckd_realloc(model->classes, model->n_classes * sizeof(*model->classes)); model->classes[classid] = lmclass; return classid; }
static fsg_model_t * jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw, int do_closure) { fsg_model_t *fsg; glist_t nulls; gnode_t *gn; /* Clear previous links */ for (gn = grammar->links; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(grammar->links); grammar->links = NULL; rule->entry = rule->exit = 0; grammar->nstate = 0; expand_rule(grammar, rule); fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); fsg->start_state = rule->entry; fsg->final_state = rule->exit; grammar->links = glist_reverse(grammar->links); for (gn = grammar->links; gn; gn = gnode_next(gn)) { jsgf_link_t *link = gnode_ptr(gn); if (link->atom) { if (jsgf_atom_is_rule(link->atom)) { fsg_model_null_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight)); } else { int wid = fsg_model_word_add(fsg, link->atom->name); fsg_model_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight), wid); } } else { fsg_model_null_trans_add(fsg, link->from, link->to, 0); } } if (do_closure) { nulls = fsg_model_null_trans_closure(fsg, NULL); glist_free(nulls); } return fsg; }
jsgf_t * jsgf_grammar_new(jsgf_t *parent) { jsgf_t *grammar; grammar = ckd_calloc(1, sizeof(*grammar)); /* If this is an imported/subgrammar, then we will share a global * namespace with the parent grammar. */ if (parent) { grammar->rules = parent->rules; grammar->imports = parent->imports; grammar->searchpath = parent->searchpath; grammar->parent = parent; } else { char *jsgf_path; grammar->rules = hash_table_new(64, 0); grammar->imports = hash_table_new(16, 0); /* Silvio Moioli: no getenv() in Windows CE */ #if !defined(_WIN32_WCE) if ((jsgf_path = getenv("JSGF_PATH")) != NULL) { char *word, *c; /* FIXME: This should be a function in libsphinxbase. */ /* FIXME: Also nextword() is totally useless... */ word = jsgf_path = ckd_salloc(jsgf_path); while ((c = strchr(word, ':'))) { *c = '\0'; grammar->searchpath = glist_add_ptr(grammar->searchpath, word); word = c + 1; } grammar->searchpath = glist_add_ptr(grammar->searchpath, word); grammar->searchpath = glist_reverse(grammar->searchpath); } else { /* Default to current directory. */ grammar->searchpath = glist_add_ptr(grammar->searchpath, ckd_salloc(".")); } #endif } return grammar; }
glist_t srch_FSG_gen_hyp(void *srch /**< a pointer of srch_t */ ) { srch_t *s; fsg_search_t *fsgsrch; srch_hyp_t *tmph; glist_t ghyp, rhyp; s = (srch_t *) srch; fsgsrch = (fsg_search_t *) s->grh->graph_struct; fsg_search_history_backtrace(fsgsrch, TRUE); ghyp = NULL; for (tmph = fsgsrch->hyp; tmph; tmph = tmph->next) { ghyp = glist_add_ptr(ghyp, (void *) tmph); } rhyp = glist_reverse(ghyp); return rhyp; }
int32 read_classdef_file(hash_table_t * classes, const char *file_name) { FILE *fp; int32 is_pipe; int inclass; /**< Are we currently reading a list of class words? */ int32 rv = -1; gnode_t *gn; glist_t classwords = NULL; glist_t classprobs = NULL; char *classname = NULL; if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { E_ERROR("File %s not found\n", file_name); return -1; } inclass = FALSE; while (!feof(fp)) { char line[512]; char *wptr[2]; int n_words; if (fgets(line, sizeof(line), fp) == NULL) break; n_words = str2words(line, wptr, 2); if (n_words <= 0) continue; if (inclass) { /* Look for an end of class marker. */ if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { classdef_t *classdef; gnode_t *word, *weight; int32 i; if (classname == NULL || 0 != strcmp(wptr[1], classname)) goto error_out; inclass = FALSE; /* Construct a class from the list of words collected. */ classdef = ckd_calloc(1, sizeof(*classdef)); classwords = glist_reverse(classwords); classprobs = glist_reverse(classprobs); classdef->n_words = glist_count(classwords); classdef->words = ckd_calloc(classdef->n_words, sizeof(*classdef->words)); classdef->weights = ckd_calloc(classdef->n_words, sizeof(*classdef->weights)); word = classwords; weight = classprobs; for (i = 0; i < classdef->n_words; ++i) { classdef->words[i] = gnode_ptr(word); classdef->weights[i] = gnode_float32(weight); word = gnode_next(word); weight = gnode_next(weight); } /* Add this class to the hash table. */ if (hash_table_enter(classes, classname, classdef) != classdef) { classdef_free(classdef); goto error_out; } /* Reset everything. */ glist_free(classwords); glist_free(classprobs); classwords = NULL; classprobs = NULL; classname = NULL; } else { float32 fprob; if (n_words == 2) fprob = atof_c(wptr[1]); else fprob = 1.0f; /* Add it to the list of words for this class. */ classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0])); classprobs = glist_add_float32(classprobs, fprob); } } else { /* Start a new LM class if the LMCLASS marker is seen */ if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { if (inclass) goto error_out; inclass = TRUE; classname = ckd_salloc(wptr[1]); } /* Otherwise, just ignore whatever junk we got */ } } rv = 0; /* Success. */ error_out: /* Free all the stuff we might have allocated. */ fclose_comp(fp, is_pipe); for (gn = classwords; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(classwords); glist_free(classprobs); ckd_free(classname); return rv; }
ngram_model_t * ngram_model_set_read(cmd_ln_t * config, const char *lmctlfile, logmath_t * lmath) { FILE *ctlfp; glist_t lms = NULL; glist_t lmnames = NULL; __BIGSTACKVARIABLE__ char str[1024]; ngram_model_t *set = NULL; hash_table_t *classes; char *basedir, *c; /* Read all the class definition files to accumulate a mapping of * classnames to definitions. */ classes = hash_table_new(0, FALSE); if ((ctlfp = fopen(lmctlfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open %s", lmctlfile); return NULL; } /* Try to find the base directory to append to relative paths in * the lmctl file. */ if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) { /* Include the trailing slash. */ basedir = ckd_calloc(c - lmctlfile + 2, 1); memcpy(basedir, lmctlfile, c - lmctlfile + 1); } else { basedir = NULL; } E_INFO("Reading LM control file '%s'\n", lmctlfile); if (basedir) E_INFO("Will prepend '%s' to unqualified paths\n", basedir); if (fscanf(ctlfp, "%1023s", str) == 1) { if (strcmp(str, "{") == 0) { /* Load LMclass files */ while ((fscanf(ctlfp, "%1023s", str) == 1) && (strcmp(str, "}") != 0)) { char *deffile; if (basedir && !path_is_absolute(str)) deffile = string_join(basedir, str, NULL); else deffile = ckd_salloc(str); E_INFO("Reading classdef from '%s'\n", deffile); if (read_classdef_file(classes, deffile) < 0) { ckd_free(deffile); goto error_out; } ckd_free(deffile); } if (strcmp(str, "}") != 0) { E_ERROR("Unexpected EOF in %s\n", lmctlfile); goto error_out; } /* This might be the first LM name. */ if (fscanf(ctlfp, "%1023s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; /* Read in one LM at a time and add classes to them as necessary. */ while (str[0] != '\0') { char *lmfile; ngram_model_t *lm; if (basedir && str[0] != '/' && str[0] != '\\') lmfile = string_join(basedir, str, NULL); else lmfile = ckd_salloc(str); E_INFO("Reading lm from '%s'\n", lmfile); lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath); if (lm == NULL) { ckd_free(lmfile); goto error_out; } if (fscanf(ctlfp, "%1023s", str) != 1) { E_ERROR("LMname missing after LMFileName '%s'\n", lmfile); ckd_free(lmfile); goto error_out; } ckd_free(lmfile); lms = glist_add_ptr(lms, lm); lmnames = glist_add_ptr(lmnames, ckd_salloc(str)); if (fscanf(ctlfp, "%1023s", str) == 1) { if (strcmp(str, "{") == 0) { /* LM uses classes; read their names */ while ((fscanf(ctlfp, "%1023s", str) == 1) && (strcmp(str, "}") != 0)) { void *val; classdef_t *classdef; if (hash_table_lookup(classes, str, &val) == -1) { E_ERROR("Unknown class %s in control file\n", str); goto error_out; } classdef = val; if (ngram_model_add_class(lm, str, 1.0, classdef->words, classdef->weights, classdef->n_words) < 0) { goto error_out; } E_INFO("Added class %s containing %d words\n", str, classdef->n_words); } if (strcmp(str, "}") != 0) { E_ERROR("Unexpected EOF in %s\n", lmctlfile); goto error_out; } if (fscanf(ctlfp, "%1023s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; } fclose(ctlfp); /* Now construct arrays out of lms and lmnames, and build an * ngram_model_set. */ lms = glist_reverse(lms); lmnames = glist_reverse(lmnames); { int32 n_models; ngram_model_t **lm_array; char **name_array; gnode_t *lm_node, *name_node; int32 i; n_models = glist_count(lms); lm_array = ckd_calloc(n_models, sizeof(*lm_array)); name_array = ckd_calloc(n_models, sizeof(*name_array)); lm_node = lms; name_node = lmnames; for (i = 0; i < n_models; ++i) { lm_array[i] = gnode_ptr(lm_node); name_array[i] = gnode_ptr(name_node); lm_node = gnode_next(lm_node); name_node = gnode_next(name_node); } set = ngram_model_set_init(config, lm_array, name_array, NULL, n_models); for (i = 0; i < n_models; ++i) { ngram_model_free(lm_array[i]); } ckd_free(lm_array); ckd_free(name_array); } error_out: { gnode_t *gn; glist_t hlist; if (set == NULL) { for (gn = lms; gn; gn = gnode_next(gn)) { ngram_model_free(gnode_ptr(gn)); } } glist_free(lms); for (gn = lmnames; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(lmnames); hlist = hash_table_tolist(classes, NULL); for (gn = hlist; gn; gn = gnode_next(gn)) { hash_entry_t *he = gnode_ptr(gn); ckd_free((char *) he->key); classdef_free(he->val); } glist_free(hlist); hash_table_free(classes); ckd_free(basedir); } return set; }
char * dict_g2p(char const *word_grapheme, ngram_model_t *ngram_g2p_model) { char *final_phone = NULL; int totalh = 0; size_t increment = 1; int word_offset = 0; int j; size_t grapheme_len = 0, final_phoneme_len = 0; glist_t history_list = NULL; gnode_t *gn; int first = 0; const int32 *total_unigrams; struct winner_t winner; const char *word; unigram_t unigram; total_unigrams = ngram_model_get_counts(ngram_g2p_model); int32 wid_sentence = ngram_wid(ngram_g2p_model,"<s>"); // start with sentence history_list = glist_add_int32(history_list, wid_sentence); grapheme_len = strlen(word_grapheme); for (j = 0; j < grapheme_len; j += increment) { winner = dict_get_winner_wid(ngram_g2p_model, word_grapheme, history_list, *total_unigrams, word_offset); increment = winner.length_match; if (increment == 0) { E_ERROR("Error trying to find matching phoneme (%s) Exiting.. \n" , word_grapheme); return NULL; } history_list = glist_add_int32(history_list, winner.winner_wid); totalh = j + 1; word_offset += winner.length_match; final_phoneme_len += winner.len_phoneme; } history_list = glist_reverse(history_list); final_phone = ckd_calloc(1, final_phoneme_len * 2); for (gn = history_list; gn; gn = gnode_next(gn)) { if (!first) { first = 1; continue; } word = ngram_word(ngram_g2p_model, gnode_int32(gn)); if (!word) continue; unigram = dict_split_unigram(word); if (strcmp(unigram.phone, "_") == 0) { if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); continue; } strcat(final_phone, unigram.phone); strcat(final_phone, " "); if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); } if (history_list) glist_free(history_list); return final_phone; }