int32 ngram_model_read_classdef(ngram_model_t *model, const char *file_name) { hash_table_t *classes; glist_t hl = NULL; gnode_t *gn; int32 rv = -1; classes = hash_table_new(0, FALSE); if (read_classdef_file(classes, file_name) < 0) { hash_table_free(classes); return -1; } /* Create a new class in the language model for each classdef. */ hl = hash_table_tolist(classes, NULL); for (gn = hl; gn; gn = gnode_next(gn)) { hash_entry_t *he = gnode_ptr(gn); classdef_t *classdef = he->val; if (ngram_model_add_class(model, he->key, 1.0, classdef->words, classdef->weights, classdef->n_words) < 0) goto error_out; } rv = 0; error_out: for (gn = hl; gn; gn = gnode_next(gn)) { hash_entry_t *he = gnode_ptr(gn); ckd_free((char *)he->key); classdef_free(he->val); } glist_free(hl); hash_table_free(classes); return rv; }
int32 read_classdef_file(hash_table_t * classes, const char *file_name) { FILE *fp; int32 is_pipe; int inclass; /**< Are we currently reading a list of class words? */ int32 rv = -1; gnode_t *gn; glist_t classwords = NULL; glist_t classprobs = NULL; char *classname = NULL; if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { E_ERROR("File %s not found\n", file_name); return -1; } inclass = FALSE; while (!feof(fp)) { char line[512]; char *wptr[2]; int n_words; if (fgets(line, sizeof(line), fp) == NULL) break; n_words = str2words(line, wptr, 2); if (n_words <= 0) continue; if (inclass) { /* Look for an end of class marker. */ if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { classdef_t *classdef; gnode_t *word, *weight; int32 i; if (classname == NULL || 0 != strcmp(wptr[1], classname)) goto error_out; inclass = FALSE; /* Construct a class from the list of words collected. */ classdef = ckd_calloc(1, sizeof(*classdef)); classwords = glist_reverse(classwords); classprobs = glist_reverse(classprobs); classdef->n_words = glist_count(classwords); classdef->words = ckd_calloc(classdef->n_words, sizeof(*classdef->words)); classdef->weights = ckd_calloc(classdef->n_words, sizeof(*classdef->weights)); word = classwords; weight = classprobs; for (i = 0; i < classdef->n_words; ++i) { classdef->words[i] = gnode_ptr(word); classdef->weights[i] = gnode_float32(weight); word = gnode_next(word); weight = gnode_next(weight); } /* Add this class to the hash table. */ if (hash_table_enter(classes, classname, classdef) != classdef) { classdef_free(classdef); goto error_out; } /* Reset everything. */ glist_free(classwords); glist_free(classprobs); classwords = NULL; classprobs = NULL; classname = NULL; } else { float32 fprob; if (n_words == 2) fprob = atof_c(wptr[1]); else fprob = 1.0f; /* Add it to the list of words for this class. */ classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0])); classprobs = glist_add_float32(classprobs, fprob); } } else { /* Start a new LM class if the LMCLASS marker is seen */ if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { if (inclass) goto error_out; inclass = TRUE; classname = ckd_salloc(wptr[1]); } /* Otherwise, just ignore whatever junk we got */ } } rv = 0; /* Success. */ error_out: /* Free all the stuff we might have allocated. */ fclose_comp(fp, is_pipe); for (gn = classwords; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(classwords); glist_free(classprobs); ckd_free(classname); return rv; }
ngram_model_t * ngram_model_set_read(cmd_ln_t * config, const char *lmctlfile, logmath_t * lmath) { FILE *ctlfp; glist_t lms = NULL; glist_t lmnames = NULL; __BIGSTACKVARIABLE__ char str[1024]; ngram_model_t *set = NULL; hash_table_t *classes; char *basedir, *c; /* Read all the class definition files to accumulate a mapping of * classnames to definitions. */ classes = hash_table_new(0, FALSE); if ((ctlfp = fopen(lmctlfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open %s", lmctlfile); return NULL; } /* Try to find the base directory to append to relative paths in * the lmctl file. */ if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) { /* Include the trailing slash. */ basedir = ckd_calloc(c - lmctlfile + 2, 1); memcpy(basedir, lmctlfile, c - lmctlfile + 1); } else { basedir = NULL; } E_INFO("Reading LM control file '%s'\n", lmctlfile); if (basedir) E_INFO("Will prepend '%s' to unqualified paths\n", basedir); if (fscanf(ctlfp, "%1023s", str) == 1) { if (strcmp(str, "{") == 0) { /* Load LMclass files */ while ((fscanf(ctlfp, "%1023s", str) == 1) && (strcmp(str, "}") != 0)) { char *deffile; if (basedir && !path_is_absolute(str)) deffile = string_join(basedir, str, NULL); else deffile = ckd_salloc(str); E_INFO("Reading classdef from '%s'\n", deffile); if (read_classdef_file(classes, deffile) < 0) { ckd_free(deffile); goto error_out; } ckd_free(deffile); } if (strcmp(str, "}") != 0) { E_ERROR("Unexpected EOF in %s\n", lmctlfile); goto error_out; } /* This might be the first LM name. */ if (fscanf(ctlfp, "%1023s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; /* Read in one LM at a time and add classes to them as necessary. */ while (str[0] != '\0') { char *lmfile; ngram_model_t *lm; if (basedir && str[0] != '/' && str[0] != '\\') lmfile = string_join(basedir, str, NULL); else lmfile = ckd_salloc(str); E_INFO("Reading lm from '%s'\n", lmfile); lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath); if (lm == NULL) { ckd_free(lmfile); goto error_out; } if (fscanf(ctlfp, "%1023s", str) != 1) { E_ERROR("LMname missing after LMFileName '%s'\n", lmfile); ckd_free(lmfile); goto error_out; } ckd_free(lmfile); lms = glist_add_ptr(lms, lm); lmnames = glist_add_ptr(lmnames, ckd_salloc(str)); if (fscanf(ctlfp, "%1023s", str) == 1) { if (strcmp(str, "{") == 0) { /* LM uses classes; read their names */ while ((fscanf(ctlfp, "%1023s", str) == 1) && (strcmp(str, "}") != 0)) { void *val; classdef_t *classdef; if (hash_table_lookup(classes, str, &val) == -1) { E_ERROR("Unknown class %s in control file\n", str); goto error_out; } classdef = val; if (ngram_model_add_class(lm, str, 1.0, classdef->words, classdef->weights, classdef->n_words) < 0) { goto error_out; } E_INFO("Added class %s containing %d words\n", str, classdef->n_words); } if (strcmp(str, "}") != 0) { E_ERROR("Unexpected EOF in %s\n", lmctlfile); goto error_out; } if (fscanf(ctlfp, "%1023s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; } fclose(ctlfp); /* Now construct arrays out of lms and lmnames, and build an * ngram_model_set. */ lms = glist_reverse(lms); lmnames = glist_reverse(lmnames); { int32 n_models; ngram_model_t **lm_array; char **name_array; gnode_t *lm_node, *name_node; int32 i; n_models = glist_count(lms); lm_array = ckd_calloc(n_models, sizeof(*lm_array)); name_array = ckd_calloc(n_models, sizeof(*name_array)); lm_node = lms; name_node = lmnames; for (i = 0; i < n_models; ++i) { lm_array[i] = gnode_ptr(lm_node); name_array[i] = gnode_ptr(name_node); lm_node = gnode_next(lm_node); name_node = gnode_next(name_node); } set = ngram_model_set_init(config, lm_array, name_array, NULL, n_models); for (i = 0; i < n_models; ++i) { ngram_model_free(lm_array[i]); } ckd_free(lm_array); ckd_free(name_array); } error_out: { gnode_t *gn; glist_t hlist; if (set == NULL) { for (gn = lms; gn; gn = gnode_next(gn)) { ngram_model_free(gnode_ptr(gn)); } } glist_free(lms); for (gn = lmnames; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(lmnames); hlist = hash_table_tolist(classes, NULL); for (gn = hlist; gn; gn = gnode_next(gn)) { hash_entry_t *he = gnode_ptr(gn); ckd_free((char *) he->key); classdef_free(he->val); } glist_free(hlist); hash_table_free(classes); ckd_free(basedir); } return set; }