int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format) { dict2pid_t *d2p; dict_t *dict; hash_iter_t *search_it; cmd_ln_t *newconfig; /* Create a new scratch config to load this dict (so existing one * won't be affected if it fails) */ newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL); cmd_ln_set_boolean_r(newconfig, "-dictcase", cmd_ln_boolean_r(ps->config, "-dictcase")); cmd_ln_set_str_r(newconfig, "-dict", dictfile); if (fdictfile) cmd_ln_set_str_extra_r(newconfig, "_fdict", fdictfile); else cmd_ln_set_str_extra_r(newconfig, "_fdict", cmd_ln_str_r(ps->config, "_fdict")); /* Try to load it. */ if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) { cmd_ln_free_r(newconfig); return -1; } /* Reinit the dict2pid. */ if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) { cmd_ln_free_r(newconfig); return -1; } /* Success! Update the existing config to reflect new dicts and * drop everything into place. */ cmd_ln_free_r(newconfig); dict_free(ps->dict); ps->dict = dict; dict2pid_free(ps->d2p); ps->d2p = d2p; /* And tell all searches to reconfigure themselves. */ for (search_it = hash_table_iter(ps->searches); search_it; search_it = hash_table_iter_next(search_it)) { if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) { hash_table_iter_free(search_it); return -1; } } return 0; }
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh) { char nsenstr[64], logbasestr[64]; sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef)); sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath)); return bio_writehdr(logfh, "version", "0.1", "mdef_file", cmd_ln_str_r(acmod->config, "-mdef"), "n_sen", nsenstr, "logbase", logbasestr, NULL); }
static void ps_expand_model_config(ps_decoder_t *ps) { char const *hmmdir, *featparams; /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */ #ifdef __ADSPBLACKFIN__ E_INFO("Will not use mmap() on uClinux/Blackfin."); cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE); #endif /* Get acoustic model filenames and add them to the command-line */ if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) { ps_add_file(ps, "-mdef", hmmdir, "mdef"); ps_add_file(ps, "-mean", hmmdir, "means"); ps_add_file(ps, "-var", hmmdir, "variances"); ps_add_file(ps, "-tmat", hmmdir, "transition_matrices"); ps_add_file(ps, "-mixw", hmmdir, "mixture_weights"); ps_add_file(ps, "-sendump", hmmdir, "sendump"); ps_add_file(ps, "-fdict", hmmdir, "noisedict"); ps_add_file(ps, "-lda", hmmdir, "feature_transform"); ps_add_file(ps, "-featparams", hmmdir, "feat.params"); ps_add_file(ps, "-senmgau", hmmdir, "senmgau"); } /* Look for feat.params in acoustic model dir. */ if ((featparams = cmd_ln_str_r(ps->config, "-featparams"))) { if (NULL != cmd_ln_parse_file_r(ps->config, feat_defn, featparams, FALSE)) E_INFO("Parsed model-specific feature parameters from %s\n", featparams); } /* Print here because acmod_init might load feat.params file */ if (err_get_logfp() != NULL) { cmd_ln_print_values_r(ps->config, err_get_logfp(), ps_args()); } }
int main(int argc, char *argv[]) { char const *cfg; config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE); /* Handle argument file as -argfile. */ if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); } if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) { E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.\n"); cmd_ln_free_r(config); return 1; } ps_default_search_args(config); ps = ps_init(config); if (ps == NULL) { cmd_ln_free_r(config); return 1; } E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); if (cmd_ln_str_r(config, "-infile") != NULL) { recognize_from_file(); } else if (cmd_ln_boolean_r(config, "-inmic")) { recognize_from_microphone(); } ps_free(ps); cmd_ln_free_r(config); return 0; }
int main(int32 argc, char *argv[]) { ps_decoder_t *ps; cmd_ln_t *config; char const *ctl; FILE *ctlfh; config = cmd_ln_parse_r(NULL, ps_args_def, argc, argv, TRUE); /* Handle argument file as -argfile. */ if (config && (ctl = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, ps_args_def, ctl, FALSE); } if (config == NULL) { /* This probably just means that we got no arguments. */ return 1; } if ((ctl = cmd_ln_str_r(config, "-ctl")) == NULL) { E_FATAL("-ctl argument not present, nothing to do in batch mode!\n"); } if ((ctlfh = fopen(ctl, "r")) == NULL) { E_FATAL_SYSTEM("Failed to open control file '%s'", ctl); } ps_default_search_args(config); if (!(ps = ps_init(config))) E_FATAL("PocketSphinx decoder init failed\n"); process_ctl(ps, config, ctlfh); fclose(ctlfh); ps_free(ps); return 0; }
void ps_default_search_args(cmd_ln_t *config) { #ifdef MODELDIR /* Set default acoustic and language models. */ const char *hmmdir = cmd_ln_str_r(config, "-hmm"); if (hmmdir == NULL && hmmdir_exists(MODELDIR "/hmm/en_US/hub4wsj_sc_8k")) { hmmdir = MODELDIR "/hmm/en_US/hub4wsj_sc_8k"; cmd_ln_set_str_r(config, "-hmm", hmmdir); } const char *lmfile = cmd_ln_str_r(config, "-lm"); if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg") && !cmd_ln_str_r(config, "-jsgf") && !cmd_ln_str_r(config, "-kws") && !cmd_ln_str_r(config, "-keyphrase") && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) { lmfile = MODELDIR "/lm/en_US/hub4.5000.DMP"; cmd_ln_set_str_r(config, "-lm", lmfile); } const char *dictfile = cmd_ln_str_r(config, "-dict"); if (dictfile == NULL && file_exists(MODELDIR "/lm/en_US/cmu07a.dic")) { dictfile = MODELDIR "/lm/en_US/cmu07a.dic"; cmd_ln_set_str_r(config, "-dict", dictfile); } /* Expand acoustic and language model filenames relative to installation * path. */ if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) { char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL); if (hmmdir_exists(tmphmm)) { cmd_ln_set_str_r(config, "-hmm", tmphmm); } else { E_ERROR("Failed to find mdef file inside the model folder " "specified with -hmm `%s'\n", hmmdir); } ckd_free(tmphmm); } if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) { char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL); cmd_ln_set_str_r(config, "-lm", tmplm); ckd_free(tmplm); } if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) { char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL); cmd_ln_set_str_r(config, "-dict", tmpdict); ckd_free(tmpdict); } #endif }
int s3_decode_init(s3_decode_t * _decode, cmd_ln_t *_config) { if (_decode == NULL) return S3_DECODE_ERROR_NULL_POINTER; /* capture decoder parameters */ kb_init(&_decode->kb, _config); /* initialize decoder variables */ _decode->kbcore = _decode->kb.kbcore; _decode->hyp_frame_num = -1; _decode->uttid = NULL; _decode->state = S3_DECODE_STATE_IDLE; _decode->hyp_str = NULL; _decode->hyp_segs = NULL; _decode->swap = strcmp(cmd_ln_str_r(_config,"-machine_endian"), cmd_ln_str_r(_config,"-input_endian")); if (_decode->swap) E_INFO("Input data WILL be byte swapped\n"); else E_INFO("Input data will NOT be byte swapped\n"); _decode->phypdump = (cmd_ln_int32_r(_config, "-phypdump")); if (_decode->phypdump) E_INFO("Partial hypothesis WILL be dumped\n"); else E_INFO("Partial hypothesis will NOT be dumped\n"); _decode->rawext = (cmd_ln_str_r(_config, "-rawext")); return S3_DECODE_SUCCESS; }
/* * Continuous recognition from mic */ int recognize_from_mic() { ad_rec_t *ad; int16 adbuf[2048]; const char *fname; const char* seg; int32 k; char str[1000]=""; uint8 utt_started, in_speech; if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),16000)) == NULL) perror("Failed to open audio device\n"); if (ad_start_rec(ad) < 0) perror("Failed to start recording\n"); ps_start_utt(ps); utt_started = FALSE; ps_seg_t *psegt; while (!finished) { if ((k = ad_read(ad, adbuf, 2048)) < 0) perror("Failed to read audio\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; } if (!in_speech && utt_started) { ps_end_utt(ps); psegt = ps_seg_iter(ps, NULL); while (psegt!=NULL){ seg = ps_seg_word(psegt); strncpy_s( str, seg, strlen(seg)); listenCallback(str); printf("%s\n", seg); int prob = ps_seg_prob(psegt,NULL,NULL,NULL); printf("%d\n", prob); psegt = ps_seg_next(psegt); } ps_start_utt(ps); utt_started = FALSE; } Sleep(100); } ps_end_utt(ps); fclose(rawfd); return 0; }
int main(int argc, char *argv[]) { print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", defn); unlimit(); config = cmd_ln_get(); logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1, cmd_ln_int32_r(config, "-log3table")); E_INFO("Value of base %f \n", cmd_ln_float32_r(config, "-logbase")); models_init(); ptmr_init(&tm_utt); if ((inmatchsegfp = fopen(cmd_ln_str_r(config, "-inhypseg"), "r")) == NULL) E_ERROR("fopen(%s,r) failed\n", cmd_ln_str_r(config, "-inhypseg")); if ((outconfmatchsegfp = fopen(cmd_ln_str_r(config, "-output"), "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", cmd_ln_str_r(config, "-output")); if (cmd_ln_str_r(config, "-ctl")) { ctl_process(cmd_ln_str_r(config, "-ctl"), cmd_ln_str_r(config, "-ctl_lm"), NULL, cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_confidence, NULL); } else { E_FATAL("-ctl is not specified\n"); } #if (! WIN32) system("ps auxwww | grep s3dag"); #endif fclose(outconfmatchsegfp); fclose(inmatchsegfp); models_free(); logmath_free(logmath); cmd_ln_free_r(config); return 0; }
/* * Main utterance processing loop: * for (;;) { * start utterance and wait for speech to process * decoding till end-of-utterance silence will be detected * print utterance result; * } */ static void recognize_from_microphone() { ad_rec_t *ad; int16 adbuf[2048]; uint8 utt_started, in_speech; int32 k; char const *hyp; if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int) cmd_ln_float32_r(config, "-samprate"))) == NULL) E_FATAL("Failed to open audio device\n"); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); if (ps_start_utt(ps) < 0) E_FATAL("Failed to start utterance\n"); utt_started = FALSE; E_INFO("Ready....\n"); for (;;) { if ((k = ad_read(ad, adbuf, 2048)) < 0) E_FATAL("Failed to read audio\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; E_INFO("Listening...\n"); } if (!in_speech && utt_started) { /* speech -> silence transition, time to start new utterance */ ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL ); if (hyp != NULL) { printf("%s\n", hyp); fflush(stdout); } if (ps_start_utt(ps) < 0) E_FATAL("Failed to start utterance\n"); utt_started = FALSE; E_INFO("Ready....\n"); } sleep_msec(100); } ad_close(ad); }
int main(int argc, char *argv[]) { cmd_ln_t *config; config = cmd_ln_parse_r(NULL, defs, argc, argv, TRUE); if (config == NULL) return 1; printf("%d %s %d %f\n", cmd_ln_int32_r(config, "-a"), cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)", cmd_ln_boolean_r(config, "-c"), cmd_ln_float64_r(config, "-d")); cmd_ln_free_r(config); config = cmd_ln_init(NULL, NULL, FALSE, "-b", "foobie", NULL); if (config == NULL) return 1; cmd_ln_free_r(config); config = cmd_ln_init(NULL, defs, TRUE, "-b", "foobie", NULL); if (config == NULL) return 1; printf("%d %s %d %f\n", cmd_ln_int32_r(config, "-a"), cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)", cmd_ln_boolean_r(config, "-c"), cmd_ln_float64_r(config, "-d")); cmd_ln_free_r(config); config = cmd_ln_init(NULL, NULL, FALSE, "-b", "foobie", NULL); if (config == NULL) return 1; printf("%s\n", cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)"); cmd_ln_set_str_r(config, "-b", "blatz"); printf("%s\n", cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)"); cmd_ln_free_r(config); return 0; }
int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) { fsg_model_t *fsg; jsgf_rule_t *rule; char const *toprule; jsgf_t *jsgf = jsgf_parse_file(path, NULL); float lw; int result; if (!jsgf) return -1; rule = NULL; /* Take the -toprule if specified. */ if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { char *ruletok; ruletok = string_join("<", toprule, ">", NULL); rule = jsgf_get_rule(jsgf, ruletok); ckd_free(ruletok); if (rule == NULL) { E_ERROR("Start rule %s not found\n", toprule); return -1; } } else { /* Otherwise, take the first public rule. */ jsgf_rule_iter_t *itor; for (itor = jsgf_rule_iter(jsgf); itor; itor = jsgf_rule_iter_next(itor)) { rule = jsgf_rule_iter_rule(itor); if (jsgf_rule_public(rule)) { jsgf_rule_iter_free(itor); break; } } if (rule == NULL) { E_ERROR("No public rules found in %s\n", path); return -1; } } lw = cmd_ln_float32_r(ps->config, "-lw"); fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); result = ps_set_fsg(ps, name, fsg); fsg_model_free(fsg); return result; }
static int acmod_init_am(acmod_t *acmod) { char const *mdeffn, *tmatfn; /* Read model definition. */ if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) { E_ERROR("Must specify -mdef or -hmm\n"); return -1; } if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) { E_ERROR("Failed to read model definition from %s\n", mdeffn); return -1; } /* Read transition matrices. */ if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) { E_ERROR("No tmat file specified\n"); return -1; } acmod->tmat = tmat_init(tmatfn, acmod->lmath, cmd_ln_float32_r(acmod->config, "-tmatfloor"), TRUE); /* Read the acoustic models. */ if ((cmd_ln_str_r(acmod->config, "-mean") == NULL) || (cmd_ln_str_r(acmod->config, "-var") == NULL) || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) { E_ERROR("No mean/var/tmat files specified\n"); return -1; } if (cmd_ln_str_r(acmod->config, "-senmgau")) { E_INFO("Using general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } else { E_INFO("Attempting to use SCHMM computation module\n"); if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) { E_INFO("Attempting to use PTHMM computation module\n"); if ((acmod->mgau = ptm_mgau_init(acmod)) == NULL) { E_INFO("Falling back to general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } } } return 0; }
/** * Process Sphinx MFCCs/logspectra from a filehandle. Assume that * wtf->infh is positioned just after the file header. */ static int decode_sphinx_mfc(sphinx_wave2feat_t *wtf) { int nfloat = 0, n; int featsize = wtf->featsize; /* If the input vector length is less than the output length, we * need to do this one frame at a time, because there's empty * space at the end of each vector in wtf->feat. */ if (wtf->in_veclen < wtf->veclen) featsize = 1; while ((n = fread(wtf->feat[0], sizeof(**wtf->feat), featsize * wtf->in_veclen, wtf->infh)) != 0) { int i, nfr = n / wtf->in_veclen; if (n % wtf->in_veclen) { E_ERROR("Size of file %d not a multiple of veclen %d\n", n, wtf->in_veclen); return -1; } /* Byteswap stuff here if necessary. */ if (wtf->byteswap) { for (i = 0; i < n; ++i) SWAP_FLOAT32(wtf->feat[0] + i); } fe_float_to_mfcc(wtf->fe, (float32 **)wtf->feat, wtf->feat, nfr); for (i = 0; i < nfr; ++i) { if (cmd_ln_boolean_r(wtf->config, "-spec2cep")) { if (0 == strcmp(cmd_ln_str_r(wtf->config, "-transform"), "legacy")) fe_logspec_to_mfcc(wtf->fe, wtf->feat[i], wtf->feat[i]); else fe_logspec_dct2(wtf->fe, wtf->feat[i], wtf->feat[i]); } else if (cmd_ln_boolean_r(wtf->config, "-cep2spec")) { fe_mfcc_dct3(wtf->fe, wtf->feat[i], wtf->feat[i]); } } if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0) return -1; nfloat += n; } if (fclose(wtf->infh) == EOF) E_ERROR_SYSTEM("Failed to close input file"); wtf->infh = NULL; return nfloat; }
int main(int argc, char *argv[]) { jsgf_t *jsgf; fsg_model_t *fsg; cmd_ln_t *config; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; if (cmd_ln_boolean_r(config, "-help")) { usagemsg(argv[0]); } jsgf = jsgf_parse_file(cmd_ln_str_r(config, "-jsgf"), NULL); if (jsgf == NULL) { return 1; } fsg = get_fsg(jsgf, cmd_ln_str_r(config, "-rule") ? cmd_ln_str_r(config, "-rule") : NULL); if (cmd_ln_boolean_r(config, "-compile")) { fsg_model_null_trans_closure(fsg, NULL); } if (cmd_ln_str_r(config, "-fsm")) { const char* outfile = cmd_ln_str_r(config, "-fsm"); const char* symfile = cmd_ln_str_r(config, "-symtab"); if (outfile) fsg_model_writefile_fsm(fsg, outfile); else fsg_model_write_fsm(fsg, stdout); if (symfile) fsg_model_writefile_symtab(fsg, symfile); } else { const char *outfile = cmd_ln_str_r(config, "-fsg"); if (outfile) fsg_model_writefile(fsg, outfile); else fsg_model_write(fsg, stdout); } fsg_model_free(fsg); jsgf_grammar_free(jsgf); return 0; }
static void ps_expand_file_config(ps_decoder_t *ps, const char *arg, const char *extra_arg, const char *hmmdir, const char *file) { const char *val; if ((val = cmd_ln_str_r(ps->config, arg)) != NULL) { cmd_ln_set_str_extra_r(ps->config, extra_arg, val); } else if (hmmdir == NULL) { cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL); } else { char *tmp = string_join(hmmdir, "/", file, NULL); if (file_exists(tmp)) cmd_ln_set_str_extra_r(ps->config, extra_arg, tmp); else cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL); ckd_free(tmp); } }
uint32 FSpeechRecognitionWorker::Run() { char const *hyp; // attempt to open the default recording device if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int)cmd_ln_float32_r(config, "-samprate"))) == NULL) { ClientMessage(FString(TEXT("Failed to open audio device"))); return 1; } if (ad_start_rec(ad) < 0) { ClientMessage(FString(TEXT("Failed to start recording"))); return 2; } if (ps_start_utt(ps) < 0) { ClientMessage(FString(TEXT("Failed to start utterance"))); return 3; } while (StopTaskCounter.GetValue() == 0) { if ((k = ad_read(ad, adbuf, 1024)) < 0) ClientMessage(FString(TEXT("Failed to read audio"))); ps_process_raw(ps, adbuf, k, 0, 0); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = 1; } if (!in_speech && utt_started) { /* speech -> silence transition, time to start new utterance */ ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) Manager->WordSpoken_method(FString(hyp)); if (ps_start_utt(ps) < 0) ClientMessage(FString(TEXT("Failed to start"))); utt_started = 0; } } ad_close(ad); return 0; }
int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) { fsg_model_t *fsg; jsgf_rule_t *rule; char const *toprule; jsgf_t *jsgf = jsgf_parse_file(path, NULL); float lw; int result; if (!jsgf) return -1; rule = NULL; /* Take the -toprule if specified. */ if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { rule = jsgf_get_rule(jsgf, toprule); if (rule == NULL) { E_ERROR("Start rule %s not found\n", toprule); jsgf_grammar_free(jsgf); return -1; } } else { rule = jsgf_get_public_rule(jsgf); if (rule == NULL) { E_ERROR("No public rules found in %s\n", path); jsgf_grammar_free(jsgf); return -1; } } lw = cmd_ln_float32_r(ps->config, "-lw"); fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); result = ps_set_fsg(ps, name, fsg); fsg_model_free(fsg); jsgf_grammar_free(jsgf); return result; }
static fwd_dbg_t * init_fwd_dbg(srch_FLAT_FWD_graph_t * fwg) { const char *tmpstr; fwd_dbg_t *fd; fd = (fwd_dbg_t *) ckd_calloc(1, sizeof(fwd_dbg_t)); assert(fd); /* Word to be traced in detail */ if ((tmpstr = cmd_ln_str_r(kbcore_config(fwg->kbcore), "-tracewhmm")) != NULL) { fd->trace_wid = dict_wordid(fwg->kbcore->dict, tmpstr); if (NOT_S3WID(fd->trace_wid)) E_ERROR("%s not in dictionary; cannot be traced\n", tmpstr); } else fd->trace_wid = BAD_S3WID; /* Active words to be dumped for debugging after and before the given frame nos, if any */ fd->word_dump_sf = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpsf")) fd->word_dump_sf = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpsf"); fd->word_dump_ef = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpef")) fd->word_dump_ef = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpef"); /* Active HMMs to be dumped for debugging after and before the given frame nos, if any */ fd->hmm_dump_sf = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpsf")) fd->hmm_dump_sf = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpsf"); fd->hmm_dump_ef = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpef")) fd->hmm_dump_ef = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpef"); return fd; }
/** * Output HTK format header. */ static int output_header_htk(sphinx_wave2feat_t *wtf, int32 nfloat) { int32 samp_period; int16 samp_size; int16 param_kind; int swap = FALSE; /* HTK files are big-endian. */ if (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian"))) swap = TRUE; /* Same file size thing as in Sphinx files (I think) */ if (swap) SWAP_INT32(&nfloat); if (fwrite(&nfloat, 4, 1, wtf->outfh) != 1) return -1; /* Sample period in 100ns units. */ samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->config, "-frate")); if (swap) SWAP_INT32(&samp_period); if (fwrite(&samp_period, 4, 1, wtf->outfh) != 1) return -1; /* Sample size - veclen * sizeof each sample. */ samp_size = wtf->veclen * 4; if (swap) SWAP_INT16(&samp_size); if (fwrite(&samp_size, 2, 1, wtf->outfh) != 1) return -1; /* Format and flags. */ if (cmd_ln_boolean_r(wtf->config, "-logspec") || cmd_ln_boolean_r(wtf->config, "-cep2spec")) param_kind = FBANK; /* log mel-filter bank outputs */ else param_kind = MFCC | _O; /* MFCC + CEP0 (note reordering...) */ if (swap) SWAP_INT16(¶m_kind); if (fwrite(¶m_kind, 2, 1, wtf->outfh) != 1) return -1; return 0; }
static void ps_init_defaults(ps_decoder_t *ps) { /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */ #ifdef __ADSPBLACKFIN__ E_INFO("Will not use mmap() on uClinux/Blackfin."); cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE); #endif char const *hmmdir; /* Get acoustic model filenames and add them to the command-line */ if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) { ps_add_file(ps, "-mdef", hmmdir, "mdef"); ps_add_file(ps, "-mean", hmmdir, "means"); ps_add_file(ps, "-var", hmmdir, "variances"); ps_add_file(ps, "-tmat", hmmdir, "transition_matrices"); ps_add_file(ps, "-mixw", hmmdir, "mixture_weights"); ps_add_file(ps, "-sendump", hmmdir, "sendump"); ps_add_file(ps, "-fdict", hmmdir, "noisedict"); ps_add_file(ps, "-lda", hmmdir, "feature_transform"); ps_add_file(ps, "-featparams", hmmdir, "feat.params"); ps_add_file(ps, "-senmgau", hmmdir, "senmgau"); } }
int32 gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, cmd_ln_t *config) { int32 i, m, f, d, *flen; float32 ****fgau; /* Free data if already here */ if (g->mean) gauden_param_free(g->mean); if (g->var) gauden_param_free(g->var); if (g->det) ckd_free_3d(g->det); if (g->featlen) ckd_free(g->featlen); g->mean = 0; g->var = 0; g->det = 0; g->featlen = 0; /* Reload means and variances (un-precomputed). */ fgau = 0; gauden_param_read(&fgau, &g->n_mgau, &g->n_feat, &g->n_density, &g->featlen, cmd_ln_str_r(config, "-mean")); g->mean = (mfcc_t ****)fgau; fgau = 0; gauden_param_read(&fgau, &m, &f, &d, &flen, cmd_ln_str_r(config, "-var")); g->var = (mfcc_t ****)fgau; /* Verify mean and variance parameter dimensions */ if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density)) E_FATAL ("Mixture-gaussians dimensions for means and variances differ\n"); for (i = 0; i < g->n_feat; i++) if (g->featlen[i] != flen[i]) E_FATAL("Feature lengths for means and variances differ\n"); ckd_free(flen); /* Transform codebook for each stream s */ for (i = 0; i < g->n_mgau; ++i) { for (f = 0; f < g->n_feat; ++f) { float64 *temp; temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64)); /* Transform each density d in selected codebook */ for (d = 0; d < g->n_density; d++) { int l; for (l = 0; l < g->featlen[f]; l++) { temp[l] = 0.0; for (m = 0; m < g->featlen[f]; m++) { /* FIXME: For now, only one class, hence the zeros below. */ temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m]; } temp[l] += mllr->b[f][0][l]; } for (l = 0; l < g->featlen[f]; l++) { g->mean[i][f][d][l] = (float32) temp[l]; g->var[i][f][d][l] *= mllr->h[f][0][l]; } } ckd_free(temp); } } /* Re-precompute (if we aren't adapting variances this isn't * actually necessary...) */ gauden_dist_precompute(g, g->lmath, cmd_ln_float32_r(config, "-varfloor")); return 0; }
/* * Main utterance processing loop: * for (;;) { * wait for start of next utterance; * decode utterance until silence of at least 1 sec observed; * print utterance result; * } */ static void recognize_from_microphone() { ad_rec_t *ad; int16 adbuf[4096]; int32 k, ts, rem; char const *hyp; char const *uttid; cont_ad_t *cont; char word[256]; char c1[256], c2[256]; int tracking = 0; int halted = 0; int LEFT = 0; int RIGHT = 1; int MOVE_CENT = 100; //1 meter int numwords; setlinebuf(stdout); if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int)cmd_ln_float32_r(config, "-samprate"))) == NULL) E_FATAL("Failed to open audio device\n"); /* Initialize continuous listening module */ if ((cont = cont_ad_init(ad, ad_read)) == NULL) E_FATAL("Failed to initialize voice activity detection\n"); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); if (cont_ad_calib(cont) < 0) E_FATAL("Failed to calibrate voice activity detection\n"); printf("LEDON BLUE\n"); for (;;) { /* Indicate listening for next utterance */ fprintf(stderr, "READY....\n"); fflush(stderr); /* Wait data for next utterance */ while ((k = cont_ad_read(cont, adbuf, 4096)) == 0) sleep_msec(100); if (k < 0) E_FATAL("Failed to read audio\n"); /* * Non-zero amount of data received; start recognition of new utterance. * NULL argument to uttproc_begin_utt => automatic generation of utterance-id. */ if (ps_start_utt(ps, NULL) < 0) E_FATAL("Failed to start utterance\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); fprintf(stderr, "Listening...\n"); /* Note timestamp for this first block of data */ ts = cont->read_ts; /* Decode utterance until end (marked by a "long" silence, >1sec) */ for (;;) { /* Read non-silence audio data, if any, from continuous listening module */ if ((k = cont_ad_read(cont, adbuf, 4096)) < 0) E_FATAL("Failed to read audio\n"); if (k == 0) { /* * No speech data available; check current timestamp with most recent * speech to see if more than 1 sec elapsed. If so, end of utterance. */ if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC) break; } else { /* New speech data received; note current timestamp */ ts = cont->read_ts; } /* * Decode whatever data was read above. */ rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE); /* If no work to be done, sleep a bit */ if ((rem == 0) && (k == 0)) sleep_msec(20); } /* * Utterance ended; flush any accumulated, unprocessed A/D data and stop * listening until current utterance completely decoded */ ad_stop_rec(ad); while (ad_read(ad, adbuf, 4096) >= 0); cont_ad_reset(cont); fprintf(stderr, "Stopped listening, please wait...\n"); fflush(stdout); /* Finish decoding, obtain and print result */ ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL, &uttid); fprintf(stderr, "%s: %s\n", uttid, hyp); /* Exit if the first word spoken was GOODBYE */ if (hyp) { numwords = sscanf(hyp, "%s %s %s", word, c1, c2); if(strcmp(word, "GUGGUG") == 0) { if(strcmp(c1, "HALT") == 0) { printf("LEDOFF BLUE\n"); halted = 1; } else if(strcmp(c1, "RESUME") == 0) { printf("LEDON BLUE\n"); halted = 0; } if(strcmp(c1, "BEGIN") == 0 || strcmp(c1, "START") == 0) { if(strcmp(c2, "TRACKING") == 0 && !tracking) { printf("START TRACKING\n"); tracking = 1; halted = 0; } } else if(strcmp(c1, "STOP") == 0) { if(strcmp(c2, "TRACKING") == 0 && tracking) { printf("STOP TRACKING\n"); tracking = 0; } } if(!tracking && !halted && numwords == 3) { if(strcmp(c1, "TURN") == 0) { if(strcmp(c2, "AROUND") == 0) { printf("TURN %d 180\n", LEFT); } else if(strcmp(c2, "LEFT") == 0) { printf("TURN %d 90\n", LEFT); } else if(strcmp(c2, "RIGHT") == 0) { printf("TURN %d 90\n", RIGHT); } } else if(strcmp(c1, "MOVE") == 0) { if(strcmp(c2, "FORWARD") == 0) { printf("MOVE 0 %d\n", MOVE_CENT); } else if(strcmp(c2, "BACKWARD") == 0) { printf("MOVE 1 %d\n", MOVE_CENT); } } } } } /* Resume A/D recording for next utterance */ if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); } cont_ad_close(cont); ad_close(ad); }
static int acmod_init_feat(acmod_t *acmod) { acmod->fcb = feat_init(cmd_ln_str_r(acmod->config, "-feat"), cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), cmd_ln_boolean_r(acmod->config, "-varnorm"), agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), 1, cmd_ln_int32_r(acmod->config, "-ceplen")); if (acmod->fcb == NULL) return -1; if (cmd_ln_str_r(acmod->config, "-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str_r(acmod->config, "-lda")); if (feat_read_lda(acmod->fcb, cmd_ln_str_r(acmod->config, "-lda"), cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) return -1; } if (cmd_ln_str_r(acmod->config, "-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str_r(acmod->config, "-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) return -1; if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0) return -1; } if (cmd_ln_exists_r(acmod->config, "-agcthresh") && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { agc_set_threshold(acmod->fcb->agc_struct, cmd_ln_float32_r(acmod->config, "-agcthresh")); } if (acmod->fcb->cmn_struct && cmd_ln_exists_r(acmod->config, "-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); c = vallist; nvals = 0; while (nvals < acmod->fcb->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); c = cc + 1; ++nvals; } if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') { acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); } ckd_free(vallist); } return 0; }
/* * Continuous recognition from a file */ static void recognize_from_file() { int16 adbuf[2048]; const char *fname; const char *hyp; int32 k; uint8 utt_started, in_speech; int32 print_times = cmd_ln_boolean_r(config, "-time"); fname = cmd_ln_str_r(config, "-infile"); if ((rawfd = fopen(fname, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to open file '%s' for reading", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) { char waveheader[44]; fread(waveheader, 1, 44, rawfd); if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate"))) E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) { E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n"); } ps_start_utt(ps); utt_started = FALSE; while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) { ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; } if (!in_speech && utt_started) { ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) printf("%s\n", hyp); if (print_times) print_word_times(); fflush(stdout); ps_start_utt(ps); utt_started = FALSE; } } ps_end_utt(ps); if (utt_started) { hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) { printf("%s\n", hyp); if (print_times) { print_word_times(); } } } fclose(rawfd); }
static void gst_pocketsphinx_get_property(GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstPocketSphinx *ps = GST_POCKETSPHINX(object); switch (prop_id) { case PROP_DECODER: g_value_set_boxed(value, ps->ps); break; case PROP_CONFIGURED: g_value_set_boolean(value, ps->ps != NULL); break; case PROP_HMM_DIR: g_value_set_string(value, cmd_ln_str_r(ps->config, "-hmm")); break; case PROP_LM_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-lm")); break; case PROP_LMCTL_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmctl")); break; case PROP_LM_NAME: g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmname")); break; case PROP_DICT_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-dict")); break; case PROP_MLLR_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-mllr")); break; case PROP_FSG_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-fsg")); break; case PROP_FWDFLAT: g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-fwdflat")); break; case PROP_BESTPATH: g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-bestpath")); break; case PROP_LATDIR: g_value_set_string(value, ps->latdir); break; case PROP_MAXHMMPF: g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxhmmpf")); break; case PROP_MAXWPF: g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxwpf")); break; case PROP_BEAM: g_value_set_double(value, cmd_ln_float_r(ps->config, "-beam")); break; case PROP_PBEAM: g_value_set_double(value, cmd_ln_float_r(ps->config, "-pbeam")); break; case PROP_WBEAM: g_value_set_double(value, cmd_ln_float_r(ps->config, "-wbeam")); break; case PROP_DSRATIO: g_value_set_int(value, cmd_ln_int32_r(ps->config, "-ds")); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; } }
int main(int argc, char *argv[]) { char const *cfg; int i; int16 buf[2048]; if (argc == 2) { config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE); } else { config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE); } /* Handle argument file as -argfile. */ if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); } if (config == NULL) return 1; singlefile = cmd_ln_boolean_r(config, "-singlefile"); if ((infile_path = cmd_ln_str_r(config, "-infile")) != NULL) { if ((infile = fopen(infile_path, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to read audio from '%s'", infile_path); return 1; } read_audio = &read_audio_file; /* skip wav header */ read_audio(buf, 44); } else { if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int) cmd_ln_float32_r(config, "-samprate"))) == NULL) { E_FATAL("Failed to open audio device\n"); return 1; } read_audio = &read_audio_adev; printf("Start recording ...\n"); fflush(stdout); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); /* TODO remove this thing */ for (i = 0; i < 5; i++) { sleep_msec(200); read_audio(buf, 2048); } printf("You may speak now\n"); fflush(stdout); } fe = fe_init_auto_r(config); if (fe == NULL) return 1; segment_audio(); if (ad) ad_close(ad); if (infile) fclose(infile); fe_free(fe); cmd_ln_free_r(config); return 0; }
int main(int argc, char *argv[]) { cmd_ln_t *config; ngram_model_t *lm = NULL; logmath_t *lmath; int itype, otype; char const *kase; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; if (cmd_ln_boolean_r(config, "-help")) { usagemsg(argv[0]); } err_set_debug_level(cmd_ln_int32_r(config, "-debug")); /* Create log math object. */ if ((lmath = logmath_init (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) { E_FATAL("Failed to initialize log math\n"); } if (cmd_ln_str_r(config, "-i") == NULL || cmd_ln_str_r(config, "-i") == NULL) { E_ERROR("Please specify both input and output models\n"); goto error_out; } /* Load the input language model. */ if (cmd_ln_str_r(config, "-ifmt")) { if ((itype = ngram_str_to_type(cmd_ln_str_r(config, "-ifmt"))) == NGRAM_INVALID) { E_ERROR("Invalid input type %s\n", cmd_ln_str_r(config, "-ifmt")); goto error_out; } lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"), itype, lmath); } else { lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"), NGRAM_AUTO, lmath); } /* Guess or set the output language model type. */ if (cmd_ln_str_r(config, "-ofmt")) { if ((otype = ngram_str_to_type(cmd_ln_str_r(config, "-ofmt"))) == NGRAM_INVALID) { E_ERROR("Invalid output type %s\n", cmd_ln_str_r(config, "-ofmt")); goto error_out; } } else { otype = ngram_file_name_to_type(cmd_ln_str_r(config, "-o")); } /* Recode the language model if desired. */ if (cmd_ln_str_r(config, "-ienc")) { if (ngram_model_recode(lm, cmd_ln_str_r(config, "-ienc"), cmd_ln_str_r(config, "-oenc")) != 0) { E_ERROR("Failed to recode language model from %s to %s\n", cmd_ln_str_r(config, "-ienc"), cmd_ln_str_r(config, "-oenc")); goto error_out; } } /* Case fold if requested. */ if ((kase = cmd_ln_str_r(config, "-case"))) { if (0 == strcmp(kase, "lower")) { ngram_model_casefold(lm, NGRAM_LOWER); } else if (0 == strcmp(kase, "upper")) { ngram_model_casefold(lm, NGRAM_UPPER); } else { E_ERROR("Unknown value for -case: %s\n", kase); goto error_out; } } /* Write the output language model. */ if (ngram_model_write(lm, cmd_ln_str_r(config, "-o"), otype) != 0) { E_ERROR("Failed to write language model in format %s to %s\n", ngram_type_to_str(otype), cmd_ln_str_r(config, "-o")); goto error_out; } /* That's all folks! */ ngram_model_free(lm); return 0; error_out: ngram_model_free(lm); return 1; }
static int acmod_init_am(acmod_t *acmod) { char const *mdeffn, *tmatfn, *mllrfn, *hmmdir; /* Read model definition. */ if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) { if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL) E_ERROR("Acoustic model definition is not specified either " "with -mdef option or with -hmm\n"); else E_ERROR("Folder '%s' does not contain acoustic model " "definition 'mdef'\n", hmmdir); return -1; } if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) { E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn); return -1; } /* Read transition matrices. */ if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) { E_ERROR("No tmat file specified\n"); return -1; } acmod->tmat = tmat_init(tmatfn, acmod->lmath, cmd_ln_float32_r(acmod->config, "-tmatfloor"), TRUE); /* Read the acoustic models. */ if ((cmd_ln_str_r(acmod->config, "-mean") == NULL) || (cmd_ln_str_r(acmod->config, "-var") == NULL) || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) { E_ERROR("No mean/var/tmat files specified\n"); return -1; } if (cmd_ln_str_r(acmod->config, "-senmgau")) { E_INFO("Using general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } else { E_INFO("Attempting to use PTM computation module\n"); if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) { E_INFO("Attempting to use semi-continuous computation module\n"); if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) { E_INFO("Falling back to general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } } } /* If there is an MLLR transform, apply it. */ if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) { ps_mllr_t *mllr = ps_mllr_read(mllrfn); if (mllr == NULL) return -1; acmod_update_mllr(acmod, mllr); } return 0; }
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb) { acmod_t *acmod; char const *featparams; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->config = cmd_ln_retain(config); acmod->lmath = lmath; acmod->state = ACMOD_IDLE; /* Look for feat.params in acoustic model dir. */ if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) { if (NULL != cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE)) E_INFO("Parsed model-specific feature parameters from %s\n", featparams); } /* Initialize feature computation. */ if (fe) { if (acmod_fe_mismatch(acmod, fe)) goto error_out; fe_retain(fe); acmod->fe = fe; } else { /* Initialize a new front end. */ acmod->fe = fe_init_auto_r(config); if (acmod->fe == NULL) goto error_out; if (acmod_fe_mismatch(acmod, acmod->fe)) goto error_out; } if (fcb) { if (acmod_feat_mismatch(acmod, fcb)) goto error_out; feat_retain(fcb); acmod->fcb = fcb; } else { /* Initialize a new fcb. */ if (acmod_init_feat(acmod) < 0) goto error_out; } /* Load acoustic model parameters. */ if (acmod_init_am(acmod) < 0) goto error_out; /* The MFCC buffer needs to be at least as large as the dynamic * feature window. */ acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; acmod->mfc_buf = (mfcc_t **) ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, sizeof(**acmod->mfc_buf)); /* Feature buffer has to be at least as large as MFCC buffer. */ acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window"); acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); acmod->utt_start_frame = 0; /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); return acmod; error_out: acmod_free(acmod); return NULL; }