static int fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel) { mel->sampling_rate = fe->sampling_rate; mel->fft_size = fe->fft_size; mel->num_cepstra = fe->num_cepstra; mel->num_filters = cmd_ln_int32_r(config, "-nfilt"); if (fe->log_spec) fe->feature_dimension = mel->num_filters; else fe->feature_dimension = fe->num_cepstra; mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf"); mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf"); mel->doublewide = cmd_ln_boolean_r(config, "-doublebw"); mel->warp_type = cmd_ln_str_r(config, "-warp_type"); mel->warp_params = cmd_ln_str_r(config, "-warp_params"); mel->lifter_val = cmd_ln_int32_r(config, "-lifter"); mel->unit_area = cmd_ln_boolean_r(config, "-unit_area"); mel->round_filters = cmd_ln_boolean_r(config, "-round_filters"); if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) { E_ERROR("Failed to initialize the warping function.\n"); return -1; } fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate); return 0; }
int fe_parse_general_params(cmd_ln_t *config, fe_t * fe) { int j; fe->config = config; fe->sampling_rate = cmd_ln_float32_r(config, "-samprate"); fe->frame_rate = (int16)cmd_ln_int32_r(config, "-frate"); if (cmd_ln_boolean_r(config, "-dither")) { fe->dither = 1; fe->seed = cmd_ln_int32_r(config, "-seed"); } #ifdef WORDS_BIGENDIAN fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; #else fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; #endif fe->window_length = cmd_ln_float32_r(config, "-wlen"); fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha"); fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep"); fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft"); /* Check FFT size, compute FFT order (log_2(n)) */ for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) { if (((j % 2) != 0) || (fe->fft_size <= 0)) { E_ERROR("fft: number of points must be a power of 2 (is %d)\n", fe->fft_size); return -1; } } /* Verify that FFT size is greater or equal to window length. */ if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) { E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n", (int)(fe->window_length * fe->sampling_rate)); return -1; } fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc"); if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct")) fe->transform = DCT_II; else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy")) fe->transform = LEGACY_DCT; else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk")) fe->transform = DCT_HTK; else { E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); return -1; } if (cmd_ln_boolean_r(config, "-logspec")) fe->log_spec = RAW_LOG_SPEC; if (cmd_ln_boolean_r(config, "-smoothspec")) fe->log_spec = SMOOTH_LOG_SPEC; return 0; }
glist_t srch_FLAT_FWD_bestpath_impl(void *srch, /**< A void pointer to a search structure */ dag_t * dag) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; float32 bestpathlw; float64 lwf; srch_hyp_t *tmph, *bph; glist_t ghyp, rhyp; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw"); lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0; flat_fwd_dag_add_fudge_edges(fwg, dag, cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"), cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"), (void *) fwg->lathist, s->kbc->dict); /* Bypass filler nodes */ if (!dag->filler_removed) { /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */ if (dict_filler_word(s->kbc->dict, dag->end->wid)) dag->end->wid = s->kbc->dict->finishwid; if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0) E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); else dag->filler_removed = 1; } bph = dag_search(dag, s->uttid, lwf, dag->end, s->kbc->dict, s->kbc->lmset->cur_lm, s->kbc->fillpen); if (bph != NULL) { ghyp = NULL; for (tmph = bph; tmph; tmph = tmph->next) ghyp = glist_add_ptr(ghyp, (void *) tmph); rhyp = glist_reverse(ghyp); return rhyp; } else { return NULL; } }
ngram_model_t * ngram_model_read(cmd_ln_t * config, const char *file_name, ngram_file_type_t file_type, logmath_t * lmath) { ngram_model_t *model = NULL; switch (file_type) { case NGRAM_AUTO:{ if ((model = ngram_model_trie_read_bin(config, file_name, lmath)) != NULL) break; if ((model = ngram_model_trie_read_arpa(config, file_name, lmath)) != NULL) break; if ((model = ngram_model_trie_read_dmp(config, file_name, lmath)) != NULL) break; return NULL; } case NGRAM_ARPA: model = ngram_model_trie_read_arpa(config, file_name, lmath); break; case NGRAM_BIN: if ((model = ngram_model_trie_read_bin(config, file_name, lmath)) != NULL) break; if ((model = ngram_model_trie_read_dmp(config, file_name, lmath)) != NULL) break; return NULL; default: E_ERROR("language model file type not supported\n"); return NULL; } /* Now set weights based on config if present. */ if (config) { float32 lw = 1.0; float32 wip = 1.0; if (cmd_ln_exists_r(config, "-lw")) lw = cmd_ln_float32_r(config, "-lw"); if (cmd_ln_exists_r(config, "-wip")) wip = cmd_ln_float32_r(config, "-wip"); ngram_model_apply_weights(model, lw, wip); } return model; }
glist_t srch_FLAT_FWD_nbest_impl(void *srch, /**< A void pointer to a search structure */ dag_t * dag) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; float32 bestpathlw; float64 lwf; char str[2000]; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); if (!(cmd_ln_exists_r(kbcore_config(fwg->kbcore), "-nbestdir") && cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"))) return NULL; ctl_outfile(str, cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"), cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestext"), (s->uttfile ? s->uttfile : s->uttid), s->uttid, cmd_ln_boolean_r(kbcore_config(fwg->kbcore), "-build_outdirs")); bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw"); lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0; flat_fwd_dag_add_fudge_edges(fwg, dag, cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"), cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"), (void *) fwg->lathist, s->kbc->dict); /* Bypass filler nodes */ if (!dag->filler_removed) { /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */ if (dict_filler_word(s->kbc->dict, dag->end->wid)) dag->end->wid = s->kbc->dict->finishwid; dag_remove_unreachable(dag); if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0) E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); } dag_compute_hscr(dag, kbcore_dict(s->kbc), kbcore_lm(s->kbc), lwf); dag_remove_bypass_links(dag); dag->filler_removed = 0; nbest_search(dag, str, s->uttid, lwf, kbcore_dict(s->kbc), kbcore_lm(s->kbc), kbcore_fillpen(s->kbc) ); return NULL; }
int main(int argc, char *argv[]) { cmd_ln_t *config; logmath_t *lmath; acmod_t *acmod[5]; sbthread_t *thr[5]; featbuf_t *fb; FILE *raw; int16 buf[2048]; int nsamp; int i; config = cmd_ln_init(NULL, ps_args(), TRUE, "-hmm", TESTDATADIR "/hub4wsj_sc_8k", "-lm", TESTDATADIR "/bn10000.3g.arpa", "-dict", TESTDATADIR "/bn10000.dic", "-compallsen", "yes", NULL); ps_init_defaults(config); fb = featbuf_init(config); TEST_ASSERT(fb); lmath = logmath_init(cmd_ln_float32_r(config, "-logbase"), 0, FALSE); acmod[0] = acmod_init(config, lmath, fb); TEST_ASSERT(acmod[0]); /* Create a couple threads to pull features out of it. */ for (i = 0; i < 5; ++i) { if (i != 0) acmod[i] = acmod_copy(acmod[0]); thr[i] = sbthread_start(NULL, consumer, acmod[i]); } /* Feed them some data. */ raw = fopen(TESTDATADIR "/chan3.raw", "rb"); featbuf_producer_start_utt(fb, "chan3"); while ((nsamp = fread(buf, 2, 2048, raw)) > 0) { int rv; rv = featbuf_producer_process_raw(fb, buf, nsamp, FALSE); printf("Producer processed %d samples\n", nsamp); TEST_ASSERT(rv > 0); } fclose(raw); printf("Waiting for consumers\n"); featbuf_producer_end_utt(fb); printf("Finished waiting\n"); /* Reap those threads. */ for (i = 0; i < 5; ++i) { sbthread_wait(thr[i]); sbthread_free(thr[i]); acmod_free(acmod[i]); printf("Reaped consumer %p\n", acmod[i]); } featbuf_free(fb); logmath_free(lmath); cmd_ln_free_r(config); return 0; }
int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string) { fsg_model_t *fsg; jsgf_rule_t *rule; char const *toprule; jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL); float lw; int result; if (!jsgf) return -1; rule = NULL; /* Take the -toprule if specified. */ if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { rule = jsgf_get_rule(jsgf, toprule); if (rule == NULL) { E_ERROR("Start rule %s not found\n", toprule); return -1; } } else { rule = jsgf_get_public_rule(jsgf); if (rule == NULL) { E_ERROR("No public rules found in input string\n"); return -1; } } lw = cmd_ln_float32_r(ps->config, "-lw"); fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); result = ps_set_fsg(ps, name, fsg); fsg_model_free(fsg); return result; }
static int acmod_init_feat(acmod_t *acmod) { acmod->fcb = feat_init(cmd_ln_str_r(acmod->config, "-feat"), cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), cmd_ln_boolean_r(acmod->config, "-varnorm"), agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), 1, cmd_ln_int32_r(acmod->config, "-ceplen")); if (acmod->fcb == NULL) return -1; if (cmd_ln_str_r(acmod->config, "-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str_r(acmod->config, "-lda")); if (feat_read_lda(acmod->fcb, cmd_ln_str_r(acmod->config, "-lda"), cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) return -1; } if (cmd_ln_str_r(acmod->config, "-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str_r(acmod->config, "-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) return -1; if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0) return -1; } if (cmd_ln_exists_r(acmod->config, "-agcthresh") && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { agc_set_threshold(acmod->fcb->agc_struct, cmd_ln_float32_r(acmod->config, "-agcthresh")); } if (acmod->fcb->cmn_struct && cmd_ln_exists_r(acmod->config, "-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); c = vallist; nvals = 0; while (nvals < acmod->fcb->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); c = cc + 1; ++nvals; } if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') { acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); } ckd_free(vallist); } return 0; }
bool FSpeechRecognitionWorker::Init() { std::string modelPath = contentPath_str + "model/" + langStr + "/" + langStr; std::string languageModel = contentPath_str + "model/" + langStr + "/" + langStr + ".lm.bin"; std::string dictionaryPath = contentPath_str + "model/" + langStr + "/" + langStr + ".dict"; // load dictionary dictionaryMap.clear(); std::ifstream file(dictionaryPath); std::vector<std::string> words; std::string currentLine; while (file.good()) { std::getline(file, currentLine); std::string word = currentLine.substr(0, currentLine.find(" ")); std::string phrase = currentLine.substr(currentLine.find(" ") + 1, currentLine.size()); dictionaryMap.insert(make_pair(word, phrase)); } // Start Sphinx config = cmd_ln_init(NULL, ps_args(), 1, "-hmm", modelPath.c_str(), "-lm", languageModel.c_str(), NULL); ps = ps_init(config); if (!Manager | !ps) { ClientMessage(FString(TEXT("Speech Recognition Thread failed to start"))); initSuccess = false; return false; } // only include the words/phrases that have been added for (auto It = dictionaryList.CreateConstIterator(); It; ++It) { FString word = *It; std::string wordStr = std::string(TCHAR_TO_UTF8(*word)); if (dictionaryMap.find(wordStr) != dictionaryMap.end()) { std::string phraseStr = dictionaryMap.at(wordStr); ps_add_word(ps, wordStr.c_str(), phraseStr.c_str(), TRUE); } } // attempt to open the default recording device if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int)cmd_ln_float32_r(config, "-samprate"))) == NULL) { ClientMessage(FString(TEXT("Failed to open audio device"))); initSuccess = false; return initSuccess; } utt_started = 0; return true; }
int main(int argc, char *argv[]) { cmd_ln_t *config; ngram_model_t *lm = NULL; logmath_t *lmath; const char *lmfn, *probdefn, *lsnfn, *text; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; verbose = cmd_ln_boolean_r(config, "-verbose"); /* Create log math object. */ if ((lmath = logmath_init (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) { E_FATAL("Failed to initialize log math\n"); } /* Load the language model. */ lmfn = cmd_ln_str_r(config, "-lm"); if (lmfn == NULL || (lm = ngram_model_read(config, lmfn, NGRAM_AUTO, lmath)) == NULL) { E_FATAL("Failed to load language model from %s\n", cmd_ln_str_r(config, "-lm")); } if ((probdefn = cmd_ln_str_r(config, "-probdef")) != NULL) ngram_model_read_classdef(lm, probdefn); ngram_model_apply_weights(lm, cmd_ln_float32_r(config, "-lw"), cmd_ln_float32_r(config, "-wip"), cmd_ln_float32_r(config, "-uw")); /* Now evaluate some text. */ lsnfn = cmd_ln_str_r(config, "-lsn"); text = cmd_ln_str_r(config, "-text"); if (lsnfn) { evaluate_file(lm, lmath, lsnfn); } else if (text) { evaluate_string(lm, lmath, text); } return 0; }
static void models_init(void) { mdef = mdef_init(cmd_ln_str_r(config, "-mdef"), 1); dict = dict_init(mdef, cmd_ln_str_r(config, "-dict"), cmd_ln_str_r(config, "-fdict"), cmd_ln_boolean_r(config, "-lts_mismatch"), cmd_ln_boolean_r(config, "-mdef_fillers"), FALSE, TRUE); lmset = lmset_init(cmd_ln_str_r(config, "-lm"), cmd_ln_str_r(config, "-lmctlfn"), cmd_ln_str_r(config, "-ctl_lm"), cmd_ln_str_r(config, "-lmname"), cmd_ln_str_r(config, "-lmdumpdir"), cmd_ln_float32_r(config, "-lw"), cmd_ln_float32_r(config, "-wip"), cmd_ln_float32_r(config, "-uw"), dict, logmath); /* Filler penalties */ fpen = fillpen_init(dict, cmd_ln_str_r(config, "-fillpen"), cmd_ln_float32_r(config, "-silprob"), cmd_ln_float32_r(config, "-fillprob"), cmd_ln_float32_r(config, "-lw"), cmd_ln_float32_r(config, "-wip"), logmath); }
int srch_FLAT_FWD_srch_one_frame_lv2(void *srch) { int32 bestscr; /* Best state score for any whmm evaluated in this frame */ int32 whmm_thresh; /* Threshold for any whmm to stay alive in search */ int32 word_thresh; /* Threshold for a word-final whmm to succeed */ int32 phone_penalty; srch_FLAT_FWD_graph_t *fwg; srch_t *s; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; ptmr_start(&(fwg->tm_hmmeval)); bestscr = whmm_eval(fwg, s->ascr->senscr); /* E_INFO("bestscr %d RENORM_THRESH %d\n",bestscr, RENORM_THRESH); */ ptmr_stop(&(fwg->tm_hmmeval)); whmm_thresh = bestscr + s->beam->hmm; word_thresh = bestscr + s->beam->word; phone_penalty = logs3(kbcore_logmath(s->kbc), cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-phonepen")); assert(s->ascr->senscr); /* E_INFO("fwg->n_frm %d\n",fwg->n_frm); */ dump_fwd_dbg_info(fwg, fwg->fwdDBG, s->ascr, bestscr, whmm_thresh, word_thresh, s->ascr->senscr); { ptmr_start(&(fwg->tm_hmmtrans)); fwg->lathist->frm_latstart[fwg->n_frm] = fwg->lathist->n_lat_entry; whmm_exit(fwg, fwg->whmm, fwg->lathist, whmm_thresh, word_thresh, phone_penalty); ptmr_stop(&(fwg->tm_hmmtrans)); /* Please read, the In whmm_exit, if word ends are reach, n_lat_entry will increase, see whmm_exit(). Then word_trans will be triggered. */ ptmr_start(&(fwg->tm_wdtrans)); if (fwg->lathist->frm_latstart[fwg->n_frm] < fwg->lathist->n_lat_entry) word_trans(fwg, fwg->whmm, fwg->lathist, whmm_thresh, phone_penalty); ptmr_stop(&(fwg->tm_wdtrans)); } if (bestscr < RENORM_THRESH) { E_INFO("Frame %d: bestscore= %d; renormalizing\n", fwg->n_frm, bestscr); whmm_renorm(fwg, fwg->whmm, bestscr); } fwg->lathist->n_frm++; fwg->n_frm++; return SRCH_SUCCESS; }
int32 gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, cmd_ln_t *config) { int32 i, m, f, d, *flen; float32 ****fgau; /* Reload means and variances (un-precomputed). */ fgau = NULL; gauden_param_read(&fgau, &g->n_mgau, &g->n_feat, &g->n_density, &g->featlen, cmd_ln_str_r(config, "-mean")); g->mean = (mfcc_t ****)fgau; fgau = NULL; gauden_param_read(&fgau, &m, &f, &d, &flen, cmd_ln_str_r(config, "-var")); g->var = (mfcc_t ****)fgau; /* Verify mean and variance parameter dimensions */ if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density)) E_FATAL ("Mixture-gaussians dimensions for means and variances differ\n"); for (i = 0; i < g->n_feat; i++) if (g->featlen[i] != flen[i]) E_FATAL("Feature lengths for means and variances differ\n"); ckd_free(flen); /* Transform codebook for each stream s */ for (i = 0; i < g->n_mgau; ++i) { for (f = 0; f < g->n_feat; ++f) { float64 *temp; temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64)); /* Transform each density d in selected codebook */ for (d = 0; d < g->n_density; d++) { int l; for (l = 0; l < g->featlen[f]; l++) { temp[l] = 0.0; for (m = 0; m < g->featlen[f]; m++) { /* FIXME: For now, only one class, hence the zeros below. */ temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m]; } temp[l] += mllr->b[f][0][l]; } for (l = 0; l < g->featlen[f]; l++) { g->mean[i][f][d][l] = (float32) temp[l]; g->var[i][f][d][l] *= mllr->h[f][0][l]; } } ckd_free(temp); } } /* Re-precompute (if we aren't adapting variances this isn't * actually necessary...) */ gauden_dist_precompute(g, g->lmath, cmd_ln_float32_r(config, "-varfloor")); return 0; }
int main(int argc, char *argv[]) { ngram_trie_t *t; dict_t *dict; bin_mdef_t *mdef; logmath_t *lmath; cmd_ln_t *config; FILE *arpafh; config = cmd_ln_init(NULL, ps_args(), TRUE, "-hmm", TESTDATADIR "/hub4wsj_sc_8k", "-dict", TESTDATADIR "/bn10000.homos.dic", NULL); ps_init_defaults(config); lmath = logmath_init(cmd_ln_float32_r(config, "-logbase"), 0, FALSE); mdef = bin_mdef_read(config, cmd_ln_str_r(config, "-mdef")); dict = dict_init(config, mdef); t = ngram_trie_init(dict, lmath); arpafh = fopen(TESTDATADIR "/bn10000.3g.arpa", "r"); ngram_trie_read_arpa(t, arpafh); fclose(arpafh); /* Test 1, 2, 3-gram probs without backoff. */ test_lookups(t, lmath); arpafh = fopen("tmp.bn10000.3g.arpa", "w"); ngram_trie_write_arpa(t, arpafh); fclose(arpafh); ngram_trie_free(t); t = ngram_trie_init(dict, lmath); arpafh = fopen("tmp.bn10000.3g.arpa", "r"); ngram_trie_read_arpa(t, arpafh); fclose(arpafh); /* Test 1, 2, 3-gram probs without backoff. */ test_lookups(t, lmath); /* Test adding nodes. */ test_add_nodes(t, lmath); ngram_trie_free(t); dict_free(dict); logmath_free(lmath); bin_mdef_free(mdef); cmd_ln_free_r(config); return 0; }
static int acmod_init_am(acmod_t *acmod) { char const *mdeffn, *tmatfn; /* Read model definition. */ if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) { E_ERROR("Must specify -mdef or -hmm\n"); return -1; } if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) { E_ERROR("Failed to read model definition from %s\n", mdeffn); return -1; } /* Read transition matrices. */ if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) { E_ERROR("No tmat file specified\n"); return -1; } acmod->tmat = tmat_init(tmatfn, acmod->lmath, cmd_ln_float32_r(acmod->config, "-tmatfloor"), TRUE); /* Read the acoustic models. */ if ((cmd_ln_str_r(acmod->config, "-mean") == NULL) || (cmd_ln_str_r(acmod->config, "-var") == NULL) || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) { E_ERROR("No mean/var/tmat files specified\n"); return -1; } if (cmd_ln_str_r(acmod->config, "-senmgau")) { E_INFO("Using general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } else { E_INFO("Attempting to use SCHMM computation module\n"); if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) { E_INFO("Attempting to use PTHMM computation module\n"); if ((acmod->mgau = ptm_mgau_init(acmod)) == NULL) { E_INFO("Falling back to general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } } } return 0; }
int main(int argc, char *argv[]) { print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", defn); unlimit(); config = cmd_ln_get(); logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1, cmd_ln_int32_r(config, "-log3table")); E_INFO("Value of base %f \n", cmd_ln_float32_r(config, "-logbase")); models_init(); ptmr_init(&tm_utt); if ((inmatchsegfp = fopen(cmd_ln_str_r(config, "-inhypseg"), "r")) == NULL) E_ERROR("fopen(%s,r) failed\n", cmd_ln_str_r(config, "-inhypseg")); if ((outconfmatchsegfp = fopen(cmd_ln_str_r(config, "-output"), "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", cmd_ln_str_r(config, "-output")); if (cmd_ln_str_r(config, "-ctl")) { ctl_process(cmd_ln_str_r(config, "-ctl"), cmd_ln_str_r(config, "-ctl_lm"), NULL, cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_confidence, NULL); } else { E_FATAL("-ctl is not specified\n"); } #if (! WIN32) system("ps auxwww | grep s3dag"); #endif fclose(outconfmatchsegfp); fclose(inmatchsegfp); models_free(); logmath_free(logmath); cmd_ln_free_r(config); return 0; }
static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) { phone_loop_search_t *pls = (phone_loop_search_t *)search; cmd_ln_t *config = ps_search_config(search); acmod_t *acmod = ps_search_acmod(search); int i; /* Free old dict2pid, dict, if necessary. */ ps_search_base_reinit(search, dict, d2p); /* Initialize HMM context. */ if (pls->hmmctx) hmm_context_free(pls->hmmctx); pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), acmod->tmat->tp, NULL, acmod->mdef->sseq); if (pls->hmmctx == NULL) return -1; /* Initialize penalty storage */ pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); pls->window = cmd_ln_int32_r(config, "-pl_window"); if (pls->penalties) ckd_free(pls->penalties); pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties)); if (pls->pen_buf) ckd_free_2d(pls->pen_buf); pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf)); /* Initialize phone HMMs. */ if (pls->hmms) { for (i = 0; i < pls->n_phones; ++i) hmm_deinit((hmm_t *)&pls->hmms[i]); ckd_free(pls->hmms); } pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms)); for (i = 0; i < pls->n_phones; ++i) { hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i], FALSE, bin_mdef_pid2ssid(acmod->mdef, i), bin_mdef_pid2tmatid(acmod->mdef, i)); } pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight"); pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT; pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT; pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT; E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", pls->beam, pls->pbeam, pls->pip); return 0; }
void s3_decode_read_lm(s3_decode_t * _decode, const char *lmpath, const char *lmname) { srch_t *s; lm_t *lm; int32 ndict; s = (srch_t *) _decode->kb.srch; ndict = dict_size(_decode->kb.kbcore->dict); lm = lm_read_advance(lmpath, lmname, cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-lw"), cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-wip"), cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-uw"), ndict, NULL, 1, /* Weight apply */ kbcore_logmath(s->kbc), cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-ugonly"), cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-bgonly") ); s->funcs->add_lm(s, lm, lmname); }
/* * Main utterance processing loop: * for (;;) { * start utterance and wait for speech to process * decoding till end-of-utterance silence will be detected * print utterance result; * } */ static void recognize_from_microphone() { ad_rec_t *ad; int16 adbuf[2048]; uint8 utt_started, in_speech; int32 k; char const *hyp; if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int) cmd_ln_float32_r(config, "-samprate"))) == NULL) E_FATAL("Failed to open audio device\n"); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); if (ps_start_utt(ps) < 0) E_FATAL("Failed to start utterance\n"); utt_started = FALSE; E_INFO("Ready....\n"); for (;;) { if ((k = ad_read(ad, adbuf, 2048)) < 0) E_FATAL("Failed to read audio\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; E_INFO("Listening...\n"); } if (!in_speech && utt_started) { /* speech -> silence transition, time to start new utterance */ ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL ); if (hyp != NULL) { printf("%s\n", hyp); fflush(stdout); } if (ps_start_utt(ps) < 0) E_FATAL("Failed to start utterance\n"); utt_started = FALSE; E_INFO("Ready....\n"); } sleep_msec(100); } ad_close(ad); }
int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) { fsg_model_t *fsg; jsgf_rule_t *rule; char const *toprule; jsgf_t *jsgf = jsgf_parse_file(path, NULL); float lw; int result; if (!jsgf) return -1; rule = NULL; /* Take the -toprule if specified. */ if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { char *ruletok; ruletok = string_join("<", toprule, ">", NULL); rule = jsgf_get_rule(jsgf, ruletok); ckd_free(ruletok); if (rule == NULL) { E_ERROR("Start rule %s not found\n", toprule); return -1; } } else { /* Otherwise, take the first public rule. */ jsgf_rule_iter_t *itor; for (itor = jsgf_rule_iter(jsgf); itor; itor = jsgf_rule_iter_next(itor)) { rule = jsgf_rule_iter_rule(itor); if (jsgf_rule_public(rule)) { jsgf_rule_iter_free(itor); break; } } if (rule == NULL) { E_ERROR("No public rules found in %s\n", path); return -1; } } lw = cmd_ln_float32_r(ps->config, "-lw"); fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); result = ps_set_fsg(ps, name, fsg); fsg_model_free(fsg); return result; }
int main(int argc, char *argv[]) { cmd_ln_t *config; char const *cfg; /* Make sure we exit cleanly (needed for profiling among other things) */ /* Signals seem to be broken in arm-wince-pe. */ #if !defined(GNUWINCE) && !defined(_WIN32_WCE) signal(SIGINT, &sighandler); #endif if (argc == 2) { config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE); } else { config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE); } /* Handle argument file as -argfile. */ if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); } if (config == NULL) return 1; ps = ps_init(config); if (ps == NULL) return 1; if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int)cmd_ln_float32_r(config, "-samprate"))) == NULL) E_FATAL("ad_open_dev failed\n"); E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); if (setjmp(jbuf) == 0) { utterance_loop(); } ps_free(ps); ad_close(ad); return 0; }
uint32 FSpeechRecognitionWorker::Run() { char const *hyp; // attempt to open the default recording device if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int)cmd_ln_float32_r(config, "-samprate"))) == NULL) { ClientMessage(FString(TEXT("Failed to open audio device"))); return 1; } if (ad_start_rec(ad) < 0) { ClientMessage(FString(TEXT("Failed to start recording"))); return 2; } if (ps_start_utt(ps) < 0) { ClientMessage(FString(TEXT("Failed to start utterance"))); return 3; } while (StopTaskCounter.GetValue() == 0) { if ((k = ad_read(ad, adbuf, 1024)) < 0) ClientMessage(FString(TEXT("Failed to read audio"))); ps_process_raw(ps, adbuf, k, 0, 0); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = 1; } if (!in_speech && utt_started) { /* speech -> silence transition, time to start new utterance */ ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) Manager->WordSpoken_method(FString(hyp)); if (ps_start_utt(ps) < 0) ClientMessage(FString(TEXT("Failed to start"))); utt_started = 0; } } ad_close(ad); return 0; }
static int process_fsgctl_line(ps_decoder_t *ps, cmd_ln_t *config, char const *fname) { fsg_model_t *fsg; int err; char *path = NULL; const char *fsgdir = cmd_ln_str_r(config, "-fsgdir"); const char *fsgext = cmd_ln_str_r(config, "-fsgext"); if (fname == NULL) return 0; if (fsgdir) path = string_join(fsgdir, "/", fname, fsgext ? fsgext : "", NULL); else if (fsgext) path = string_join(fname, fsgext, NULL); else path = ckd_salloc(fname); fsg = fsg_model_readfile(path, ps_get_logmath(ps), cmd_ln_float32_r(config, "-lw")); err = 0; if (!fsg) { err = -1; goto error_out; } if (ps_set_fsg(ps, fname, fsg)) { err = -1; goto error_out; } E_INFO("Using FSG: %s\n", fname); if (ps_set_search(ps, fname)) err = -1; error_out: fsg_model_free(fsg); ckd_free(path); return err; }
/** * Output HTK format header. */ static int output_header_htk(sphinx_wave2feat_t *wtf, int32 nfloat) { int32 samp_period; int16 samp_size; int16 param_kind; int swap = FALSE; /* HTK files are big-endian. */ if (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian"))) swap = TRUE; /* Same file size thing as in Sphinx files (I think) */ if (swap) SWAP_INT32(&nfloat); if (fwrite(&nfloat, 4, 1, wtf->outfh) != 1) return -1; /* Sample period in 100ns units. */ samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->config, "-frate")); if (swap) SWAP_INT32(&samp_period); if (fwrite(&samp_period, 4, 1, wtf->outfh) != 1) return -1; /* Sample size - veclen * sizeof each sample. */ samp_size = wtf->veclen * 4; if (swap) SWAP_INT16(&samp_size); if (fwrite(&samp_size, 2, 1, wtf->outfh) != 1) return -1; /* Format and flags. */ if (cmd_ln_boolean_r(wtf->config, "-logspec") || cmd_ln_boolean_r(wtf->config, "-cep2spec")) param_kind = FBANK; /* log mel-filter bank outputs */ else param_kind = MFCC | _O; /* MFCC + CEP0 (note reordering...) */ if (swap) SWAP_INT16(¶m_kind); if (fwrite(¶m_kind, 2, 1, wtf->outfh) != 1) return -1; return 0; }
/* * Continuous recognition from a file */ static void recognize_from_file() { int16 adbuf[2048]; const char *fname; const char *hyp; int32 k; uint8 utt_started, in_speech; int32 print_times = cmd_ln_boolean_r(config, "-time"); fname = cmd_ln_str_r(config, "-infile"); if ((rawfd = fopen(fname, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to open file '%s' for reading", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) { char waveheader[44]; fread(waveheader, 1, 44, rawfd); if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate"))) E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) { E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n"); } ps_start_utt(ps); utt_started = FALSE; while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) { ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; } if (!in_speech && utt_started) { ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) printf("%s\n", hyp); if (print_times) print_word_times(); fflush(stdout); ps_start_utt(ps); utt_started = FALSE; } } ps_end_utt(ps); if (utt_started) { hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) { printf("%s\n", hyp); if (print_times) { print_word_times(); } } } fclose(rawfd); }
int main(int argc, char *argv[]) { char const *cfg; int i; int16 buf[2048]; if (argc == 2) { config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE); } else { config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE); } /* Handle argument file as -argfile. */ if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); } if (config == NULL) return 1; singlefile = cmd_ln_boolean_r(config, "-singlefile"); if ((infile_path = cmd_ln_str_r(config, "-infile")) != NULL) { if ((infile = fopen(infile_path, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to read audio from '%s'", infile_path); return 1; } read_audio = &read_audio_file; /* skip wav header */ read_audio(buf, 44); } else { if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int) cmd_ln_float32_r(config, "-samprate"))) == NULL) { E_FATAL("Failed to open audio device\n"); return 1; } read_audio = &read_audio_adev; printf("Start recording ...\n"); fflush(stdout); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); /* TODO remove this thing */ for (i = 0; i < 5; i++) { sleep_msec(200); read_audio(buf, 2048); } printf("You may speak now\n"); fflush(stdout); } fe = fe_init_auto_r(config); if (fe == NULL) return 1; segment_audio(); if (ad) ad_close(ad); if (infile) fclose(infile); fe_free(fe); cmd_ln_free_r(config); return 0; }
void segment_audio() { FILE *file; int16 pcm_buf[BLOCKSIZE]; mfcc_t **cep_buf; int16 voiced_buf = NULL; int32 voiced_nsamps, out_frameidx, uttstart = 0; char file_name[1024]; uint8 cur_vad_state, vad_state, writing; int uttno, uttlen, sample_rate; int32 nframes, nframes_tmp; int16 frame_size, frame_shift, frame_rate; size_t k; sample_rate = (int) cmd_ln_float32_r(config, "-samprate"); frame_rate = cmd_ln_int32_r(config, "-frate"); frame_size = (int32) (cmd_ln_float32_r(config, "-wlen") * sample_rate + 0.5); frame_shift = (int32) (sample_rate / cmd_ln_int32_r(config, "-frate") + 0.5); nframes = (BLOCKSIZE - frame_size) / frame_shift; cep_buf = (mfcc_t **) ckd_calloc_2d(nframes, fe_get_output_size(fe), sizeof(mfcc_t)); uttno = 0; uttlen = 0; cur_vad_state = 0; voiced_nsamps = 0; writing = 0; file = NULL; fe_start_stream(fe); fe_start_utt(fe); while ((k = read_audio(pcm_buf, BLOCKSIZE)) > 0) { int16 const *pcm_buf_tmp; pcm_buf_tmp = &pcm_buf[0]; while (k) { nframes_tmp = nframes; fe_process_frames_ext(fe, &pcm_buf_tmp, &k, cep_buf, &nframes_tmp, voiced_buf, &voiced_nsamps, &out_frameidx); if (out_frameidx > 0) { uttstart = out_frameidx; } vad_state = fe_get_vad_state(fe); if (!cur_vad_state && vad_state) { /* silence->speech transition, time to start new file */ uttno++; if (!singlefile) { sprintf(file_name, "%s%04d.raw", infile_path, uttno); if ((file = fopen(file_name, "wb")) == NULL) E_FATAL_SYSTEM("Failed to open '%s' for writing", file_name); } else { sprintf(file_name, "%s.raw", infile_path); if ((file = fopen(file_name, "ab")) == NULL) E_FATAL_SYSTEM("Failed to open '%s' for writing", file_name); } writing = 1; } if (writing && file && voiced_nsamps > 0) { fwrite(voiced_buf, sizeof(int16), voiced_nsamps, file); uttlen += voiced_nsamps; } if (cur_vad_state && !vad_state) { /* speech -> silence transition, time to finish file */ fclose(file); printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n", uttno, file_name, ((double) uttstart) / frame_rate, uttlen, ((double) uttlen) / sample_rate); fflush(stdout); fe_end_utt(fe, cep_buf[0], &nframes_tmp); writing = 0; uttlen = 0; voiced_nsamps = 0; fe_start_utt(fe); } cur_vad_state = vad_state; } } if (writing) { fclose(file); printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n", uttno, file_name, ((double) uttstart) / frame_rate, uttlen, ((double) uttlen) / sample_rate); fflush(stdout); } fe_end_utt(fe, cep_buf[0], &nframes); ckd_free_2d(cep_buf); }
static int acmod_init_am(acmod_t *acmod) { char const *mdeffn, *tmatfn, *mllrfn, *hmmdir; /* Read model definition. */ if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) { if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL) E_ERROR("Acoustic model definition is not specified either " "with -mdef option or with -hmm\n"); else E_ERROR("Folder '%s' does not contain acoustic model " "definition 'mdef'\n", hmmdir); return -1; } if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) { E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn); return -1; } /* Read transition matrices. */ if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) { E_ERROR("No tmat file specified\n"); return -1; } acmod->tmat = tmat_init(tmatfn, acmod->lmath, cmd_ln_float32_r(acmod->config, "-tmatfloor"), TRUE); /* Read the acoustic models. */ if ((cmd_ln_str_r(acmod->config, "-mean") == NULL) || (cmd_ln_str_r(acmod->config, "-var") == NULL) || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) { E_ERROR("No mean/var/tmat files specified\n"); return -1; } if (cmd_ln_str_r(acmod->config, "-senmgau")) { E_INFO("Using general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } else { E_INFO("Attempting to use PTM computation module\n"); if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) { E_INFO("Attempting to use semi-continuous computation module\n"); if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) { E_INFO("Falling back to general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } } } /* If there is an MLLR transform, apply it. */ if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) { ps_mllr_t *mllr = ps_mllr_read(mllrfn); if (mllr == NULL) return -1; acmod_update_mllr(acmod, mllr); } return 0; }
/* * Main utterance processing loop: * for (;;) { * wait for start of next utterance; * decode utterance until silence of at least 1 sec observed; * print utterance result; * } */ static void recognize_from_microphone() { ad_rec_t *ad; int16 adbuf[4096]; int32 k, ts, rem; char const *hyp; char const *uttid; cont_ad_t *cont; char word[256]; char c1[256], c2[256]; int tracking = 0; int halted = 0; int LEFT = 0; int RIGHT = 1; int MOVE_CENT = 100; //1 meter int numwords; setlinebuf(stdout); if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int)cmd_ln_float32_r(config, "-samprate"))) == NULL) E_FATAL("Failed to open audio device\n"); /* Initialize continuous listening module */ if ((cont = cont_ad_init(ad, ad_read)) == NULL) E_FATAL("Failed to initialize voice activity detection\n"); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); if (cont_ad_calib(cont) < 0) E_FATAL("Failed to calibrate voice activity detection\n"); printf("LEDON BLUE\n"); for (;;) { /* Indicate listening for next utterance */ fprintf(stderr, "READY....\n"); fflush(stderr); /* Wait data for next utterance */ while ((k = cont_ad_read(cont, adbuf, 4096)) == 0) sleep_msec(100); if (k < 0) E_FATAL("Failed to read audio\n"); /* * Non-zero amount of data received; start recognition of new utterance. * NULL argument to uttproc_begin_utt => automatic generation of utterance-id. */ if (ps_start_utt(ps, NULL) < 0) E_FATAL("Failed to start utterance\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); fprintf(stderr, "Listening...\n"); /* Note timestamp for this first block of data */ ts = cont->read_ts; /* Decode utterance until end (marked by a "long" silence, >1sec) */ for (;;) { /* Read non-silence audio data, if any, from continuous listening module */ if ((k = cont_ad_read(cont, adbuf, 4096)) < 0) E_FATAL("Failed to read audio\n"); if (k == 0) { /* * No speech data available; check current timestamp with most recent * speech to see if more than 1 sec elapsed. If so, end of utterance. */ if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC) break; } else { /* New speech data received; note current timestamp */ ts = cont->read_ts; } /* * Decode whatever data was read above. */ rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE); /* If no work to be done, sleep a bit */ if ((rem == 0) && (k == 0)) sleep_msec(20); } /* * Utterance ended; flush any accumulated, unprocessed A/D data and stop * listening until current utterance completely decoded */ ad_stop_rec(ad); while (ad_read(ad, adbuf, 4096) >= 0); cont_ad_reset(cont); fprintf(stderr, "Stopped listening, please wait...\n"); fflush(stdout); /* Finish decoding, obtain and print result */ ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL, &uttid); fprintf(stderr, "%s: %s\n", uttid, hyp); /* Exit if the first word spoken was GOODBYE */ if (hyp) { numwords = sscanf(hyp, "%s %s %s", word, c1, c2); if(strcmp(word, "GUGGUG") == 0) { if(strcmp(c1, "HALT") == 0) { printf("LEDOFF BLUE\n"); halted = 1; } else if(strcmp(c1, "RESUME") == 0) { printf("LEDON BLUE\n"); halted = 0; } if(strcmp(c1, "BEGIN") == 0 || strcmp(c1, "START") == 0) { if(strcmp(c2, "TRACKING") == 0 && !tracking) { printf("START TRACKING\n"); tracking = 1; halted = 0; } } else if(strcmp(c1, "STOP") == 0) { if(strcmp(c2, "TRACKING") == 0 && tracking) { printf("STOP TRACKING\n"); tracking = 0; } } if(!tracking && !halted && numwords == 3) { if(strcmp(c1, "TURN") == 0) { if(strcmp(c2, "AROUND") == 0) { printf("TURN %d 180\n", LEFT); } else if(strcmp(c2, "LEFT") == 0) { printf("TURN %d 90\n", LEFT); } else if(strcmp(c2, "RIGHT") == 0) { printf("TURN %d 90\n", RIGHT); } } else if(strcmp(c1, "MOVE") == 0) { if(strcmp(c2, "FORWARD") == 0) { printf("MOVE 0 %d\n", MOVE_CENT); } else if(strcmp(c2, "BACKWARD") == 0) { printf("MOVE 1 %d\n", MOVE_CENT); } } } } } /* Resume A/D recording for next utterance */ if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); } cont_ad_close(cont); ad_close(ad); }
/* * Continuous recognition from a file */ static void recognize_from_file() { cont_ad_t *cont; ad_rec_t file_ad = {0}; int16 adbuf[4096]; const char* hyp; const char* uttid; int32 k, ts, start; char waveheader[44]; if ((rawfd = fopen(cmd_ln_str_r(config, "-infile"), "rb")) == NULL) { E_FATAL_SYSTEM("Failed to open file '%s' for reading", cmd_ln_str_r(config, "-infile")); } fread(waveheader, 1, 44, rawfd); file_ad.sps = (int32)cmd_ln_float32_r(config, "-samprate"); file_ad.bps = sizeof(int16); if ((cont = cont_ad_init(&file_ad, ad_file_read)) == NULL) { E_FATAL("Failed to initialize voice activity detection"); } if (cont_ad_calib(cont) < 0) E_FATAL("Failed to calibrate voice activity detection\n"); rewind (rawfd); for (;;) { while ((k = cont_ad_read(cont, adbuf, 4096)) == 0); if (k < 0) { break; } if (ps_start_utt(ps, NULL) < 0) E_FATAL("ps_start_utt() failed\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); ts = cont->read_ts; start = ((ts - k) * 100.0) / file_ad.sps; for (;;) { if ((k = cont_ad_read(cont, adbuf, 4096)) < 0) break; if (k == 0) { /* * No speech data available; check current timestamp with most recent * speech to see if more than 1 sec elapsed. If so, end of utterance. */ if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC) break; } else { /* New speech data received; note current timestamp */ ts = cont->read_ts; } ps_process_raw(ps, adbuf, k, FALSE, FALSE); } ps_end_utt(ps); if (cmd_ln_boolean_r(config, "-time")) { print_word_times(start); } else { hyp = ps_get_hyp(ps, NULL, &uttid); fprintf(stderr, "%s: %s\n", uttid, hyp); } fflush(stdout); } cont_ad_close(cont); fclose(rawfd); }