int ps_process_cep(ps_decoder_t *ps, mfcc_t **data, int32 n_frames, int no_search, int full_utt) { int n_searchfr = 0; if (no_search) acmod_set_grow(ps->acmod, TRUE); while (n_frames) { int nfr; /* Process some data into features. */ if ((nfr = acmod_process_cep(ps->acmod, &data, &n_frames, full_utt)) < 0) return nfr; /* Score and search as much data as possible */ if (no_search) continue; if ((nfr = ps_search_forward(ps)) < 0) return nfr; n_searchfr += nfr; } return n_searchfr; }
int ps_process_raw(ps_decoder_t *ps, int16 const *data, size_t n_samples, int no_search, int full_utt) { int n_searchfr = 0; if (ps->acmod->state == ACMOD_IDLE) { E_ERROR("Failed to process data, utterance is not started. Use start_utt to start it\n"); return 0; } if (no_search) acmod_set_grow(ps->acmod, TRUE); while (n_samples) { int nfr; /* Process some data into features. */ if ((nfr = acmod_process_raw(ps->acmod, &data, &n_samples, full_utt)) < 0) return nfr; /* Score and search as much data as possible */ if (no_search) continue; if ((nfr = ps_search_forward(ps)) < 0) return nfr; n_searchfr += nfr; } return n_searchfr; }
int main(int argc, char *argv[]) { ps_decoder_t *ps; cmd_ln_t *config; acmod_t *acmod; ngram_search_t *ngs; ps_lattice_t *dag; clock_t c; int i; TEST_ASSERT(config = cmd_ln_init(NULL, ps_args(), TRUE, "-hmm", MODELDIR "/en-us/en-us", "-lm", MODELDIR "/en-us/en-us.lm.bin", "-dict", MODELDIR "/en-us/cmudict-en-us.dict", "-fwdtree", "yes", "-fwdflat", "no", "-bestpath", "yes", "-samprate", "16000", NULL)); TEST_ASSERT(ps = ps_init(config)); ngs = (ngram_search_t *)ps->search; acmod = ps->acmod; acmod_set_grow(ps->acmod, TRUE); setbuf(stdout, NULL); c = clock(); for (i = 0; i < 5; ++i) { FILE *rawfh; int16 buf[2048]; size_t nread; int16 const *bptr; int nfr; ps_astar_t *nbest; ps_latpath_t *path; char *besthyp; const char *astar_besthyp = NULL; int32 astar_hyp_score; int i; /* PocketSphinx API would do this for us but we have to do it manually here. */ ps_lattice_free(ps->search->dag); ps->search->dag = NULL; TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); TEST_EQUAL(0, acmod_start_utt(acmod)); ngram_fwdtree_start(ngs); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), 2048, rawfh); bptr = buf; while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0) { while (acmod->n_feat_frame > 0) { ngram_fwdtree_search(ngs, acmod->output_frame); acmod_advance(acmod); } } } ngram_fwdtree_finish(ngs); printf("FWDTREE: %s\n", ngram_search_bp_hyp(ngs, ngram_search_find_exit(ngs, -1, NULL))); TEST_ASSERT(acmod_end_utt(acmod) >= 0); fclose(rawfh); dag = ngram_search_lattice(ps->search); if (dag == NULL) { E_ERROR("Failed to build DAG!\n"); return 1; } besthyp = ckd_salloc (ps_lattice_hyp(dag, ps_lattice_bestpath (dag, ngs->lmset, 9.5/6.5, 1.0))); printf("BESTPATH: %s\n", besthyp); TEST_ASSERT(nbest = ps_astar_start(dag, ngs->lmset, 9.5/6.5, 0, -1, -1, -1)); i = 0; astar_hyp_score = WORST_SCORE; while ((path = ps_astar_next(nbest))) { if (i < 10) printf("NBEST %d: %s (%d)\n", i, ps_astar_hyp(nbest, path), path->score); if (path->score > astar_hyp_score) { astar_hyp_score = path->score; astar_besthyp = ps_astar_hyp(nbest, path); } i++; } TEST_EQUAL(0, strcmp(besthyp, astar_besthyp)); ps_astar_finish(nbest); ckd_free(besthyp); } printf("%s\n", ngram_search_bp_hyp(ngs, ngram_search_find_exit(ngs, -1, NULL))); TEST_EQUAL(0, strcmp("go forward ten meters", ngram_search_bp_hyp(ngs, ngram_search_find_exit(ngs, -1, NULL)))); c = clock() - c; printf("5 * fwdtree + bestpath + N-best search in %.2f sec\n", (double)c / CLOCKS_PER_SEC); ps_free(ps); cmd_ln_free_r(config); return 0; }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { char const *lmfile, *lmctl = NULL; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = config; } #ifndef _WIN32_WCE /* Set up logging. */ if (cmd_ln_str_r(ps->config, "-logfn")) err_set_logfile(cmd_ln_str_r(ps->config, "-logfn")); #endif err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; /* Make the acmod's feature buffer growable if we are doing two-pass search. */ if (cmd_ln_boolean_r(ps->config, "-fwdflat") && cmd_ln_boolean_r(ps->config, "-fwdtree")) acmod_set_grow(ps->acmod, TRUE); if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; ps->searches = glist_add_ptr(ps->searches, ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) return -1; /* Determine whether we are starting out in FSG or N-Gram search mode. */ if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) { ps_search_t *fsgs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; fsgs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, fsgs); ps->search = fsgs; } else if ((lmfile = cmd_ln_str_r(ps->config, "-lm")) || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) { ps_search_t *ngs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; ngs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, ngs); ps->search = ngs; } /* Otherwise, we will initialize the search whenever the user * decides to load an FSG or a language model. */ else { if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }