int main(int argc, char *argv[]) { hash_table_t *h; hash_iter_t *itor; /* Test insertion */ TEST_ASSERT(h = hash_table_new(42, FALSE)); TEST_EQUAL((void*)0xdeadbeef, hash_table_enter(h, "foo", (void*)0xdeadbeef)); TEST_EQUAL((void*)0xdeadbeef, hash_table_enter(h, "foo", (void*)0xd0d0feed)); TEST_EQUAL((void*)0xcafec0de, hash_table_enter(h, "bar", (void*)0xcafec0de)); TEST_EQUAL((void*)0xeeefeeef, hash_table_enter(h, "baz", (void*)0xeeefeeef)); TEST_EQUAL((void*)0xbabababa, hash_table_enter(h, "quux", (void*)0xbabababa)); /* Now test iterators. */ for (itor = hash_table_iter(h); itor; itor = hash_table_iter_next(itor)) { printf("%s %p\n", itor->ent->key, itor->ent->val); if (0 == strcmp(itor->ent->key, "foo")) { TEST_EQUAL(itor->ent->val, (void*)0xdeadbeef); } else if (0 == strcmp(itor->ent->key, "bar")) { TEST_EQUAL(itor->ent->val, (void*)0xcafec0de); } else if (0 == strcmp(itor->ent->key, "baz")) { TEST_EQUAL(itor->ent->val, (void*)0xeeefeeef); } else if (0 == strcmp(itor->ent->key, "quux")) { TEST_EQUAL(itor->ent->val, (void*)0xbabababa); } } return 0; }
/* Insert -hmmdump, -lm, -svq4svq, -beam, -lminmemory into a hash and display it. */ int main(int argc, char **argv) { hash_table_t *ht; ht = hash_table_new(75, 0); if (hash_table_enter(ht, "-hmmdump", (void *)1) != (void *)1) { E_FATAL("Insertion of -hmmdump failed\n"); } if (hash_table_enter(ht, "-svq4svq", (void *)1) != (void *)1) { E_FATAL("Insertion of -svq4svq failed\n"); } if (hash_table_enter(ht, "-lm", (void *)1) != (void *)1) { E_FATAL("Insertion of -lm failed\n"); } if (hash_table_enter(ht, "-beam", (void *)1) != (void *)1) { E_FATAL("Insertion of -beam failed\n"); } if (hash_table_enter(ht, "-lminmemory", (void *)1) != (void *)1) { E_FATAL("Insertion of -lminmemory failed\n"); } hash_table_display(ht, 1); hash_table_free(ht); ht = NULL; return 0; }
static s3_cfg_item_t * add_item(s3_cfg_t *_cfg, char *_name) { s3_cfg_item_t *item = NULL; char *name = NULL; int index; assert(_cfg != NULL); assert(_name != NULL); index = s3_arraylist_count(&_cfg->item_info); item = (s3_cfg_item_t *)ckd_calloc(1, sizeof(s3_cfg_item_t)); name = (char *)ckd_salloc(_name); s3_arraylist_init(&item->rules); /* create item's new id */ item->id = (name[0] == S3_CFG_NONTERM_PREFIX ? 0 : S3_CFG_TERM_BIT) | index; item->name = name; item->nil_rule = NULL; hash_table_enter(_cfg->name2id, name, (void *)item->id); s3_arraylist_set(&_cfg->item_info, index, item); return item; }
jsgf_rule_t * jsgf_define_rule(jsgf_t * jsgf, char *name, jsgf_rhs_t * rhs, int is_public) { jsgf_rule_t *rule; void *val; if (name == NULL) { name = ckd_malloc(strlen(jsgf->name) + 16); sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules)); } else { char *newname; newname = jsgf_fullname(jsgf, name); name = newname; } rule = ckd_calloc(1, sizeof(*rule)); rule->refcnt = 1; rule->name = ckd_salloc(name); rule->rhs = rhs; rule->is_public = is_public; E_INFO("Defined rule: %s%s\n", rule->is_public ? "PUBLIC " : "", rule->name); val = hash_table_enter(jsgf->rules, name, rule); if (val != (void *) rule) { E_WARN("Multiply defined symbol: %s\n", name); } return rule; }
static s3cipid_t dict_ciphone_id(dict_t * d, const char *str) { if (d->mdef) return mdef_ciphone_id(d->mdef, str); else { void *val; if (hash_table_lookup(d->pht, str, &val) < 0) { s3cipid_t id; id = (d->n_ciphone)++; if (id >= MAX_S3CIPID) E_FATAL ("Too many CIphones in dictionary; increase MAX_S3CIPID\n"); d->ciphone_str[id] = (char *) ckd_salloc(str); /* Freed in dict_free() */ if (hash_table_enter(d->pht, d->ciphone_str[id], (void *)(long)id) != (void *)(long)id) E_FATAL("hash_table_enter(local-phonetable, %s) failed\n", str); return id; } else return (s3cipid_t)(long)val; } }
static void read_word_str(ngram_model_t * base, FILE * fp) { int32 k; uint32 i, j; char *tmp_word_str; /* read ascii word strings */ base->writable = TRUE; fread(&k, sizeof(k), 1, fp); tmp_word_str = (char *) ckd_calloc((size_t) k, 1); fread(tmp_word_str, 1, (size_t) k, fp); /* First make sure string just read contains n_counts[0] words (PARANOIA!!) */ for (i = 0, j = 0; i < (uint32) k; i++) if (tmp_word_str[i] == '\0') j++; if (j != base->n_counts[0]) { E_ERROR ("Error reading word strings (%d doesn't match n_unigrams %d)\n", j, base->n_counts[0]); } /* Break up string just read into words */ j = 0; for (i = 0; i < base->n_counts[0]; i++) { base->word_str[i] = ckd_salloc(tmp_word_str + j); if (hash_table_enter(base->wid, base->word_str[i], (void *) (long) i) != (void *) (long) i) { E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]); } j += strlen(base->word_str[i]) + 1; } free(tmp_word_str); }
huff_code_t * huff_code_read(FILE *infh) { huff_code_t *hc; uint32 i, j; hc = (huff_code_t*)ckd_calloc(1, sizeof(*hc)); hc->refcount = 1; hc->maxbits = fgetc(infh); hc->type = fgetc(infh); /* Two bytes of padding. */ fgetc(infh); fgetc(infh); /* Allocate stuff. */ hc->firstcode = (uint32*)ckd_calloc(hc->maxbits + 1, sizeof(*hc->firstcode)); hc->numl = (uint32*)ckd_calloc(hc->maxbits + 1, sizeof(*hc->numl)); hc->syms = (huff_codeword_t**)ckd_calloc(hc->maxbits + 1, sizeof(*hc->syms)); /* Read the symbol tables. */ hc->codewords = hash_table_new(hc->maxbits, HASH_CASE_YES); for (i = 1; i <= hc->maxbits; ++i) { if (fread(&hc->firstcode[i], 4, 1, infh) != 1) goto error_out; SWAP_BE_32(&hc->firstcode[i]); if (fread(&hc->numl[i], 4, 1, infh) != 1) goto error_out; SWAP_BE_32(&hc->numl[i]); hc->syms[i] =(huff_codeword_t*) ckd_calloc(hc->numl[i], sizeof(**hc->syms)); for (j = 0; j < hc->numl[i]; ++j) { huff_codeword_t *cw = &hc->syms[i][j]; cw->nbits = i; cw->codeword = hc->firstcode[i] + j; if (hc->type == HUFF_CODE_INT) { if (fread(&cw->r.ival, 4, 1, infh) != 1) goto error_out; SWAP_BE_32(&cw->r.ival); hash_table_enter_bkey(hc->codewords, (char const *)&cw->r.ival, sizeof(cw->r.ival), (void *)cw); } else { size_t len; cw->r.sval = fread_line(infh, &len); cw->r.sval[len-1] = '\0'; hash_table_enter(hc->codewords, cw->r.sval, (void *)cw); } } } return hc; error_out: huff_code_free(hc); return 0; }
static void ciphone_add(mdef_t * m, const char *ci, s3pid_t p) { assert(p < m->n_ciphone); m->ciphone[p].name = (char *) ckd_salloc(ci); /* freed in mdef_free */ if (hash_table_enter(m->ciphone_ht, m->ciphone[p].name, (void *)(long)p) != (void *)(long)p) E_FATAL("hash_table_enter(%s) failed; duplicate CIphone?\n", m->ciphone[p].name); }
static size_t word2id(char *w) { void *val; if (hash_table_lookup(dict_ht, w, &val) < 0) { if (n_word >= n_word_alloc) E_FATAL("Increase dictionary size\n"); word[n_word] = ckd_salloc(w); hash_table_enter(dict_ht, word[n_word], (void *)n_word); return n_word++; } else return (size_t)val; }
static void huff_code_canonicalize(huff_code_t *hc, huff_node_t *root) { glist_t agenda; uint32 *nextcode; int i, ncw; hc->firstcode =(uint32*) ckd_calloc(hc->maxbits+1, sizeof(*hc->firstcode)); hc->syms = (huff_codeword_t**)ckd_calloc(hc->maxbits+1, sizeof(*hc->syms)); hc->numl =(uint32*) ckd_calloc(hc->maxbits+1, sizeof(*nextcode)); nextcode =(uint32*) ckd_calloc(hc->maxbits+1, sizeof(*nextcode)); /* Traverse the tree, annotating it with the actual bit * lengths, and histogramming them in numl. */ root->nbits = 0; ncw = 0; agenda = glist_add_ptr(0, root); while (agenda) { huff_node_t *node = (huff_node_t*)gnode_ptr(agenda); agenda = gnode_free(agenda, 0); if (node->l) { node->l->nbits = node->nbits + 1; agenda = glist_add_ptr(agenda, node->l); node->r.r->nbits = node->nbits + 1; agenda = glist_add_ptr(agenda, node->r.r); } else { hc->numl[node->nbits]++; ncw++; } } /* Create starting codes and symbol tables for each bit length. */ hc->syms[hc->maxbits] = (huff_codeword_t*)ckd_calloc(hc->numl[hc->maxbits], sizeof(**hc->syms)); for (i = hc->maxbits - 1; i > 0; --i) { hc->firstcode[i] = (hc->firstcode[i+1] + hc->numl[i+1]) / 2; hc->syms[i] = (huff_codeword_t*)ckd_calloc(hc->numl[i], sizeof(**hc->syms)); } memcpy(nextcode, hc->firstcode, (hc->maxbits + 1) * sizeof(*nextcode)); /* Traverse the tree again to produce the codebook itself. */ hc->codewords = hash_table_new(ncw, HASH_CASE_YES); agenda = glist_add_ptr(0, root); while (agenda) { huff_node_t *node = (huff_node_t*)gnode_ptr(agenda); agenda = gnode_free(agenda, 0); if (node->l) { agenda = glist_add_ptr(agenda, node->l); agenda = glist_add_ptr(agenda, node->r.r); } else { /* Initialize codebook entry, which also retains symbol pointer. */ huff_codeword_t *cw; uint32 codeword = nextcode[node->nbits] & ((1 << node->nbits) - 1); cw = hc->syms[node->nbits] + (codeword - hc->firstcode[node->nbits]); cw->nbits = node->nbits; cw->r.sval = node->r.sval; /* Will copy ints too... */ cw->codeword = codeword; if (hc->type == HUFF_CODE_INT) { hash_table_enter_bkey(hc->codewords, (char const *)&cw->r.ival, sizeof(cw->r.ival), (void *)cw); } else { hash_table_enter(hc->codewords, cw->r.sval, (void *)cw); } ++nextcode[node->nbits]; } } ckd_free(nextcode); }
int32 read_classdef_file(hash_table_t * classes, const char *file_name) { FILE *fp; int32 is_pipe; int inclass; /**< Are we currently reading a list of class words? */ int32 rv = -1; gnode_t *gn; glist_t classwords = NULL; glist_t classprobs = NULL; char *classname = NULL; if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { E_ERROR("File %s not found\n", file_name); return -1; } inclass = FALSE; while (!feof(fp)) { char line[512]; char *wptr[2]; int n_words; if (fgets(line, sizeof(line), fp) == NULL) break; n_words = str2words(line, wptr, 2); if (n_words <= 0) continue; if (inclass) { /* Look for an end of class marker. */ if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { classdef_t *classdef; gnode_t *word, *weight; int32 i; if (classname == NULL || 0 != strcmp(wptr[1], classname)) goto error_out; inclass = FALSE; /* Construct a class from the list of words collected. */ classdef = ckd_calloc(1, sizeof(*classdef)); classwords = glist_reverse(classwords); classprobs = glist_reverse(classprobs); classdef->n_words = glist_count(classwords); classdef->words = ckd_calloc(classdef->n_words, sizeof(*classdef->words)); classdef->weights = ckd_calloc(classdef->n_words, sizeof(*classdef->weights)); word = classwords; weight = classprobs; for (i = 0; i < classdef->n_words; ++i) { classdef->words[i] = gnode_ptr(word); classdef->weights[i] = gnode_float32(weight); word = gnode_next(word); weight = gnode_next(weight); } /* Add this class to the hash table. */ if (hash_table_enter(classes, classname, classdef) != classdef) { classdef_free(classdef); goto error_out; } /* Reset everything. */ glist_free(classwords); glist_free(classprobs); classwords = NULL; classprobs = NULL; classname = NULL; } else { float32 fprob; if (n_words == 2) fprob = atof_c(wptr[1]); else fprob = 1.0f; /* Add it to the list of words for this class. */ classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0])); classprobs = glist_add_float32(classprobs, fprob); } } else { /* Start a new LM class if the LMCLASS marker is seen */ if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { if (inclass) goto error_out; inclass = TRUE; classname = ckd_salloc(wptr[1]); } /* Otherwise, just ignore whatever junk we got */ } } rv = 0; /* Success. */ error_out: /* Free all the stuff we might have allocated. */ fclose_comp(fp, is_pipe); for (gn = classwords; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(classwords); glist_free(classprobs); ckd_free(classname); return rv; }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; const char *keyphrase; int32 lw; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; hash_table_enter(ps->searches, ckd_salloc(ps_search_name(ps->phone_loop)), ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef, ps->acmod->lmath)) == NULL) return -1; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; lw = cmd_ln_float32_r(config, "-lw"); /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ /* Load KWS if one was specified in config */ if ((keyphrase = cmd_ln_str_r(config, "-keyphrase"))) { if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(config, "-kws"))) { if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); if (!fsg) return -1; if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Or load a JSGF grammar */ if ((path = cmd_ln_str_r(config, "-jsgf"))) { if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lm")) && !cmd_ln_boolean_r(ps->config, "-allphone")) { if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { const char *name; ngram_model_t *lmset; ngram_model_set_iter_t *lmset_it; if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { E_ERROR("Failed to read language model control file: %s\n", path); return -1; } for(lmset_it = ngram_model_set_iter(lmset); lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_free(lm); ngram_model_set_iter_free(lmset_it); return -1; } ngram_model_free(lm); } name = cmd_ln_str_r(config, "-lmname"); if (name) ps_set_search(ps, name); else { E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); return -1; } } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
jsgf_rule_t * jsgf_import_rule(jsgf_t * jsgf, char *name) { char *c, *path, *newpath; size_t namelen, packlen; void *val; jsgf_t *imp; int import_all; /* Trim the leading and trailing <> */ namelen = strlen(name); path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */ strcpy(path, name + 1); /* Split off the first part of the name */ c = strrchr(path, '.'); if (c == NULL) { E_ERROR("Imported rule is not qualified: %s\n", name); ckd_free(path); return NULL; } packlen = c - path; *c = '\0'; /* Look for import foo.* */ import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>")); /* Construct a filename. */ for (c = path; *c; ++c) if (*c == '.') *c = '/'; strcat(path, ".gram"); newpath = path_list_search(jsgf->searchpath, path); if (newpath == NULL) { E_ERROR("Failed to find grammar %s\n", path); ckd_free(path); return NULL; } ckd_free(path); path = newpath; E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name); /* FIXME: Also, we need to make sure that path is fully qualified * here, by adding any prefixes from jsgf->name to it. */ /* See if we have parsed it already */ if (hash_table_lookup(jsgf->imports, path, &val) == 0) { E_INFO("Already imported %s\n", path); imp = val; ckd_free(path); } else { /* If not, parse it. */ imp = jsgf_parse_file(path, jsgf); val = hash_table_enter(jsgf->imports, path, imp); if (val != (void *) imp) { E_WARN("Multiply imported file: %s\n", path); } } if (imp != NULL) { hash_iter_t *itor; /* Look for public rules matching rulename. */ for (itor = hash_table_iter(imp->rules); itor; itor = hash_table_iter_next(itor)) { hash_entry_t *he = itor->ent; jsgf_rule_t *rule = hash_entry_val(he); int rule_matches; char *rule_name = importname2rulename(name); if (import_all) { /* Match package name (symbol table is shared) */ rule_matches = !strncmp(rule_name, rule->name, packlen + 1); } else { /* Exact match */ rule_matches = !strcmp(rule_name, rule->name); } ckd_free(rule_name); if (rule->is_public && rule_matches) { void *val; char *newname; /* Link this rule into the current namespace. */ c = strrchr(rule->name, '.'); assert(c != NULL); newname = jsgf_fullname(jsgf, c); E_INFO("Imported %s\n", newname); val = hash_table_enter(jsgf->rules, newname, jsgf_rule_retain(rule)); if (val != (void *) rule) { E_WARN("Multiply defined symbol: %s\n", newname); } if (!import_all) { hash_table_iter_free(itor); return rule; } } } } return NULL; }
corpus_t * corpus_load_tailid(const char *file, int32(*validate) (char *str), int32(*dup_resolve) (char *s1, char *s2)) { FILE *fp; char line[16384], uttid[4096], *id; int32 j, m, n; corpus_t *corp; E_INFO("Loading corpus (%s)\n", file); if ((fp = fopen(file, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", file); corp = (corpus_t *) ckd_calloc(1, sizeof(corpus_t)); n = 0; while (fgets(line, sizeof(line), fp) != NULL) { /* Skip empty lines */ if (sscanf(line, "%s", uttid) == 1) n++; } rewind(fp); corp->ht = hash_table_new(n, HASH_CASE_YES); corp->n = 0; corp->str = (char **) ckd_calloc(n, sizeof(char *)); n = 0; while (fgets(line, sizeof(line), fp) != NULL) { /* Skip blank lines */ if (sscanf(line, "%s", uttid) < 1) continue; /* Look for a (uttid) at the end */ if (sep_tailid(line, uttid) < 0) E_FATAL("corpus_load_tailid(%s) failed; bad line: %s\n", file, line); /* Validate if a validation function is given */ if (validate && (!(*validate) (line))) { E_INFO("Corpus validation %s failed; skipping\n", uttid); continue; } id = ckd_salloc(uttid); if ((m = (long) hash_table_enter(corp->ht, id, (void *)(long)n)) != n) { /* Duplicate entry */ if (!dup_resolve) E_FATAL ("corpus_load_tailid(%s) failed; duplicate ID: %s\n", file, id); else { /* Invoke the application provided duplicate resolver function */ if ((j = (*dup_resolve) (corp->str[m], line)) < 0) E_FATAL ("corpus_load(tailid(%s) failed; duplicate ID: %s\n", file, id); ckd_free(id); if (j > 0) { /* Overwrite the original with the new entry */ ckd_free(corp->str[m]); corp->str[m] = ckd_salloc(line); } else { /* Retain the original entry, discard the new one */ } } } else { /* Fill in new entry */ corp->str[n] = ckd_salloc(line); n++; } } corp->n = n; fclose(fp); E_INFO("%s: %d entries\n", file, n); return corp; }
static int read_1grams_arpa(lineiter_t ** li, uint32 count, ngram_model_t * base, unigram_t * unigrams) { uint32 i; int n; int n_parts; char *wptr[3]; while (*li && strcmp((*li)->buf, "\\1-grams:") != 0) { *li = lineiter_next(*li); } if (*li == NULL) { E_ERROR_SYSTEM("Failed to read \\1-grams: mark"); return -1; } n_parts = 2; for (i = 0; i < count; i++) { *li = lineiter_next(*li); if (*li == NULL) { E_ERROR ("Unexpected end of ARPA file. Failed to read %dth unigram\n", i + 1); return -1; } if ((n = str2words((*li)->buf, wptr, 3)) < n_parts) { E_ERROR("Format error at line %s, Failed to read unigrams\n", (*li)->buf); return -1; } unigram_t *unigram = &unigrams[i]; unigram->prob = logmath_log10_to_log_float(base->lmath, atof_c(wptr[0])); if (unigram->prob > 0) { E_WARN("Unigram '%s' has positive probability\n", wptr[1]); unigram->prob = 0; } if (n == n_parts + 1) { unigram->bo = logmath_log10_to_log_float(base->lmath, atof_c(wptr[2])); } else { unigram->bo = 0.0f; } /* TODO: classify float with fpclassify and warn if bad value occurred */ base->word_str[i] = ckd_salloc(wptr[1]); } /* fill hash-table that maps unigram names to their word ids */ for (i = 0; i < count; i++) { if ((hash_table_enter (base->wid, base->word_str[i], (void *) (long) i)) != (void *) (long) i) { E_WARN("Duplicate word in dictionary: %s\n", base->word_str[i]); } } return 0; }
batch_decoder_t * batch_decoder_init_argv(int argc, char *argv[]) { batch_decoder_t *bd; char const *str; bd = ckd_calloc(1, sizeof(*bd)); bd->config = cmd_ln_parse_r(NULL, ms_args_def, argc, argv, FALSE); if ((str = cmd_ln_str_r(bd->config, "-ctl")) == NULL) { E_ERROR("-ctl argument not present, nothing to do in batch mode!\n"); goto error_out; } if ((bd->ctlfh = fopen(str, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open control file '%s'", str); goto error_out; } if ((str = cmd_ln_str_r(bd->config, "-align")) != NULL) { if ((bd->alignfh = fopen(str, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open align file '%s'", str); } } if ((str = cmd_ln_str_r(bd->config, "-hyp")) != NULL) { if ((bd->hypfh = fopen(str, "w")) == NULL) { E_ERROR_SYSTEM("Failed to open hypothesis file '%s'", str); } } if ((bd->sf = search_factory_init_cmdln(bd->config)) == NULL) goto error_out; if ((str = cmd_ln_str_r(bd->config, "-fwdtreelm")) != NULL) { if ((bd->fwdtree = search_factory_create(bd->sf, NULL, "fwdtree", "-fwdtreelm", str, NULL)) == NULL) goto error_out; if ((bd->fwdflat = search_factory_create(bd->sf, NULL, "fwdflat", NULL)) == NULL) goto error_out; } else { if ((bd->fwdtree = search_factory_create(bd->sf, NULL, "fwdtree", NULL)) == NULL) goto error_out; if ((bd->fwdflat = search_factory_create(bd->sf, bd->fwdtree, "fwdflat", NULL)) == NULL) goto error_out; } if ((str = cmd_ln_str_r(bd->config, "-vm")) != NULL) { vocab_map_t *vm = vocab_map_init(search_factory_d2p(bd->sf)->dict); FILE *vmfh; if (vm == NULL) goto error_out; if ((vmfh = fopen(str, "r")) == NULL) { vocab_map_free(vm); goto error_out; } if (vocab_map_read(vm, vmfh) < 0) { vocab_map_free(vm); goto error_out; } fclose(vmfh); fwdflat_search_set_vocab_map(bd->fwdflat, vm); } //if ((bd->latgen = search_factory_create(bd->sf, "latgen", NULL)) == NULL) //goto error_out; search_link(bd->fwdtree, bd->fwdflat, "fwdtree", FALSE); // search_link(bd->fwdflat, bd->latgen, "fwdflat", TRUE); search_set_cb(bd->fwdtree, search_cb, bd); search_set_cb(bd->fwdflat, search_cb, bd); bd->hypfiles = hash_table_new(0, FALSE); if ((str = cmd_ln_str_r(bd->config, "-hypprefix"))) { char *hypfile; FILE *hypfh; hypfile = string_join(str, ".fwdtree.hyp", NULL); hypfh = fopen(hypfile, "w"); if (hypfh == NULL) { E_ERROR_SYSTEM("Could not open %s", hypfile); } else { hash_table_enter(bd->hypfiles, "fwdtree", hypfh); } ckd_free(hypfile); hypfile = string_join(str, ".fwdflat.hyp", NULL); hypfh = fopen(hypfile, "w"); if (hypfh == NULL) { E_ERROR_SYSTEM("Could not open %s", hypfile); } else { hash_table_enter(bd->hypfiles, "fwdflat", hypfh); } ckd_free(hypfile); } return bd; error_out: return NULL; }
s3wid_t dict_add_word(dict_t * d, char *word, s3cipid_t * p, int32 np) { int32 len; dictword_t *wordp; s3wid_t newwid; if (d->n_word >= d->max_words) { E_INFO ("Dictionary max size (%d) exceeded; reallocate another entries %d \n", d->max_words, DICT_INC_SZ); d->word = (dictword_t *) ckd_realloc(d->word, (d->max_words + DICT_INC_SZ) * sizeof(dictword_t)); d->max_words = d->max_words + DICT_INC_SZ; return (BAD_S3WID); } wordp = d->word + d->n_word; wordp->word = (char *) ckd_salloc(word); /* Freed in dict_free */ /* Associate word string with d->n_word in hash table */ if (hash_table_enter(d->ht, wordp->word, (void *)(long)d->n_word) != (void *)(long)d->n_word) { ckd_free(wordp->word); return (BAD_S3WID); } /* Fill in word entry, and set defaults */ if (p && (np > 0)) { wordp->ciphone = (s3cipid_t *) ckd_malloc(np * sizeof(s3cipid_t)); /* Freed in dict_free */ memcpy(wordp->ciphone, p, np * sizeof(s3cipid_t)); wordp->pronlen = np; } else { wordp->ciphone = NULL; wordp->pronlen = 0; } wordp->alt = BAD_S3WID; wordp->basewid = d->n_word; wordp->n_comp = 0; wordp->comp = NULL; /* Determine base/alt wids */ if ((len = dict_word2basestr(word)) > 0) { void *val; s3wid_t w; /* Truncated to a baseword string; find its ID */ if (hash_table_lookup(d->ht, word, &val) < 0) { word[len] = '('; /* Get back the original word */ E_FATAL("Missing base word for: %s\n", word); } else word[len] = '('; /* Get back the original word */ /* Link into alt list */ w = (s3wid_t)(long)val; wordp->basewid = w; wordp->alt = d->word[w].alt; d->word[w].alt = d->n_word; } newwid = d->n_word++; return (newwid); }
cmd_ln_t * cmd_ln_parse_r(cmd_ln_t *inout_cmdln, const arg_t * defn, int32 argc, char *argv[], int strict) { int32 i, j, n, argstart; hash_table_t *defidx = NULL; cmd_ln_t *cmdln; /* Construct command-line object */ if (inout_cmdln == NULL) { cmdln = ckd_calloc(1, sizeof(*cmdln)); cmdln->refcount = 1; } else cmdln = inout_cmdln; /* Build a hash table for argument definitions */ defidx = hash_table_new(50, 0); if (defn) { for (n = 0; defn[n].name; n++) { void *v; v = hash_table_enter(defidx, defn[n].name, (void *)&defn[n]); if (strict && (v != &defn[n])) { E_ERROR("Duplicate argument name in definition: %s\n", defn[n].name); goto error; } } } else { /* No definitions. */ n = 0; } /* Allocate memory for argument values */ if (cmdln->ht == NULL) cmdln->ht = hash_table_new(n, 0 /* argument names are case-sensitive */ ); /* skip argv[0] if it doesn't start with dash */ argstart = 0; if (argc > 0 && argv[0][0] != '-') { argstart = 1; } /* Parse command line arguments (name-value pairs) */ for (j = argstart; j < argc; j += 2) { arg_t *argdef; cmd_ln_val_t *val; void *v; if (hash_table_lookup(defidx, argv[j], &v) < 0) { if (strict) { E_ERROR("Unknown argument name '%s'\n", argv[j]); goto error; } else if (defn == NULL) v = NULL; else continue; } argdef = v; /* Enter argument value */ if (j + 1 >= argc) { cmd_ln_print_help_r(cmdln, stderr, defn); E_ERROR("Argument value for '%s' missing\n", argv[j]); goto error; } if (argdef == NULL) val = cmd_ln_val_init(ARG_STRING, argv[j + 1]); else { if ((val = cmd_ln_val_init(argdef->type, argv[j + 1])) == NULL) { cmd_ln_print_help_r(cmdln, stderr, defn); E_ERROR("Bad argument value for %s: %s\n", argv[j], argv[j + 1]); goto error; } } #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ if ((v = hash_table_enter(cmdln->ht, argv[j], (void *)val)) != (void *)val) { if (strict) { cmd_ln_val_free(val); E_ERROR("Duplicate argument name in arguments: %s\n", argdef->name); goto error; } else { v = hash_table_replace(cmdln->ht, argv[j], (void *)val); cmd_ln_val_free((cmd_ln_val_t *)v); } } } #endif /* Fill in default values, if any, for unspecified arguments */ for (i = 0; i < n; i++) { cmd_ln_val_t *val; void *v; if (hash_table_lookup(cmdln->ht, defn[i].name, &v) < 0) { if ((val = cmd_ln_val_init(defn[i].type, defn[i].deflt)) == NULL) { E_ERROR ("Bad default argument value for %s: %s\n", defn[i].name, defn[i].deflt); goto error; } hash_table_enter(cmdln->ht, defn[i].name, (void *)val); } } /* Check for required arguments; exit if any missing */ j = 0; for (i = 0; i < n; i++) { if (defn[i].type & ARG_REQUIRED) { void *v; if (hash_table_lookup(cmdln->ht, defn[i].name, &v) != 0) E_ERROR("Missing required argument %s\n", defn[i].name); } } if (j > 0) { cmd_ln_print_help_r(cmdln, stderr, defn); goto error; } if (strict && argc == 1) { E_ERROR("No arguments given, available options are:\n"); cmd_ln_print_help_r(cmdln, stderr, defn); if (defidx) hash_table_free(defidx); if (inout_cmdln == NULL) cmd_ln_free_r(cmdln); return NULL; } #ifndef _WIN32_WCE if(verbose_cmuclmtk == 1 || verbose_pocketsphinx == 1) { /* Set up logging. We need to do this earlier because we want to dump * the information to the configured log, not to the stderr. */ if (cmd_ln_exists_r(cmdln, "-logfn") && cmd_ln_str_r(cmdln, "-logfn")) err_set_logfile(cmd_ln_str_r(cmdln, "-logfn")); /* Echo command line */ E_INFO("Parsing command line:\n"); for (i = 0; i < argc; i++) { if (argv[i][0] == '-') E_INFOCONT("\\\n\t"); E_INFOCONT("%s ", argv[i]); } E_INFOCONT("\n\n"); fflush(stderr); /* Print configuration */ E_INFOCONT("Current configuration:\n"); arg_dump_r(cmdln, err_get_logfp(), defn, 0); } #endif hash_table_free(defidx); return cmdln; error: if (defidx) hash_table_free(defidx); if (inout_cmdln == NULL) cmd_ln_free_r(cmdln); E_ERROR("Failed to parse arguments list\n"); return NULL; }
static int run_control_file(sphinx_wave2feat_t *wtf, char const *ctlfile) { hash_table_t *files; hash_iter_t *itor; lineiter_t *li; FILE *ctlfh; int nskip, runlen, npart, rv = 0; if ((ctlfh = fopen(ctlfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open control file %s", ctlfile); return -1; } nskip = cmd_ln_int32_r(wtf->config, "-nskip"); runlen = cmd_ln_int32_r(wtf->config, "-runlen"); if ((npart = cmd_ln_int32_r(wtf->config, "-npart"))) { /* Count lines in the file. */ int partlen, part, nlines = 0; part = cmd_ln_int32_r(wtf->config, "-part"); for (li = lineiter_start(ctlfh); li; li = lineiter_next(li)) ++nlines; fseek(ctlfh, 0, SEEK_SET); partlen = nlines / npart; nskip = partlen * (part - 1); if (part == npart) runlen = -1; else runlen = partlen; } if (runlen != -1){ E_INFO("Processing %d utterances at position %d\n", runlen, nskip); files = hash_table_new(runlen, HASH_CASE_YES); } else { E_INFO("Processing all remaining utterances at position %d\n", nskip); files = hash_table_new(1000, HASH_CASE_YES); } for (li = lineiter_start(ctlfh); li; li = lineiter_next(li)) { char *c, *infile, *outfile; if (nskip-- > 0) continue; if (runlen == 0) { lineiter_free(li); break; } --runlen; string_trim(li->buf, STRING_BOTH); /* Extract the file ID from the control line. */ if ((c = strchr(li->buf, ' ')) != NULL) *c = '\0'; if (strlen(li->buf) == 0) { E_WARN("Empty line %d in control file, skipping\n", li->lineno); continue; } build_filenames(wtf->config, li->buf, &infile, &outfile); if (hash_table_lookup(files, infile, NULL) == 0) continue; rv = sphinx_wave2feat_convert_file(wtf, infile, outfile); hash_table_enter(files, infile, outfile); if (rv != 0) { lineiter_free(li); break; } } for (itor = hash_table_iter(files); itor; itor = hash_table_iter_next(itor)) { ckd_free((void *)hash_entry_key(itor->ent)); ckd_free(hash_entry_val(itor->ent)); } hash_table_free(files); if (fclose(ctlfh) == EOF) E_ERROR_SYSTEM("Failed to close control file"); return rv; }
static int32 lm_read_dump_wordstr(lm_t * lm, const char *file, int32 is32bits) { int32 i, j, k; char *tmp_word_str; s3lmwid32_t startwid, endwid; /* Read word string names */ k = lm_fread_int32(lm); if (k <= 0) { E_ERROR("Bad wordstrings size: %d\n", k); return LM_FAIL; } tmp_word_str = (char *) ckd_calloc(k, sizeof(char)); if (fread(tmp_word_str, sizeof(char), k, lm->fp) != (size_t) k) { E_ERROR("fread(%s) failed\n", file); return LM_FAIL; } /* First make sure string just read contains n_ug words (PARANOIA!!) */ for (i = 0, j = 0; i < k; i++) if (tmp_word_str[i] == '\0') j++; if (j != lm->n_ug) { E_ERROR("Bad #words: %d\n", j); return LM_FAIL; } startwid = endwid = (s3lmwid32_t) BAD_LMWID(lm); lm->wordstr = (char **) ckd_calloc(lm->n_ug, sizeof(char *)); j = 0; for (i = 0; i < lm->n_ug; i++) { if (strcmp(tmp_word_str + j, S3_START_WORD) == 0) startwid = i; else if (strcmp(tmp_word_str + j, S3_FINISH_WORD) == 0) endwid = i; lm->wordstr[i] = (char *) ckd_salloc(tmp_word_str + j); hash_table_enter(lm->HT, lm->wordstr[i], (void *)(long)i); j += strlen(tmp_word_str + j) + 1; } free(tmp_word_str); E_INFO("%8d word strings\n", i); /* Force ugprob(<s>) = MIN_PROB_F */ if (IS_LMWID(lm, startwid)) { lm->ug[startwid].prob.f = MIN_PROB_F; lm->startlwid = startwid; } /* Force bowt(</s>) = MIN_PROB_F */ if (IS_LMWID(lm, endwid)) { lm->ug[endwid].bowt.f = MIN_PROB_F; lm->finishlwid = endwid; } else { E_WARN("No </s> in LM!\n"); } return LM_SUCCESS; }