int main(int argc, char *argv[]) { char const *cfg; config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE); /* Handle argument file as -argfile. */ if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); } if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) { E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone."); cmd_ln_free_r(config); return 1; } ps_default_search_args(config); ps = ps_init(config); if (ps == NULL) { cmd_ln_free_r(config); return 1; } E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); if (cmd_ln_boolean_r(config, "-inmic")) { recognize_from_microphone(); } ps_free(ps); cmd_ln_free_r(config); return 0; }
static int fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel) { mel->sampling_rate = fe->sampling_rate; mel->fft_size = fe->fft_size; mel->num_cepstra = fe->num_cepstra; mel->num_filters = cmd_ln_int32_r(config, "-nfilt"); if (fe->log_spec) fe->feature_dimension = mel->num_filters; else fe->feature_dimension = fe->num_cepstra; mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf"); mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf"); mel->doublewide = cmd_ln_boolean_r(config, "-doublebw"); mel->warp_type = cmd_ln_str_r(config, "-warp_type"); mel->warp_params = cmd_ln_str_r(config, "-warp_params"); mel->lifter_val = cmd_ln_int32_r(config, "-lifter"); mel->unit_area = cmd_ln_boolean_r(config, "-unit_area"); mel->round_filters = cmd_ln_boolean_r(config, "-round_filters"); if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) { E_ERROR("Failed to initialize the warping function.\n"); return -1; } fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate); return 0; }
static void models_init(void) { mdef = mdef_init(cmd_ln_str_r(config, "-mdef"), 1); dict = dict_init(mdef, cmd_ln_str_r(config, "-dict"), cmd_ln_str_r(config, "-fdict"), cmd_ln_boolean_r(config, "-lts_mismatch"), cmd_ln_boolean_r(config, "-mdef_fillers"), FALSE, TRUE); lmset = lmset_init(cmd_ln_str_r(config, "-lm"), cmd_ln_str_r(config, "-lmctlfn"), cmd_ln_str_r(config, "-ctl_lm"), cmd_ln_str_r(config, "-lmname"), cmd_ln_str_r(config, "-lmdumpdir"), cmd_ln_float32_r(config, "-lw"), cmd_ln_float32_r(config, "-wip"), cmd_ln_float32_r(config, "-uw"), dict, logmath); /* Filler penalties */ fpen = fillpen_init(dict, cmd_ln_str_r(config, "-fillpen"), cmd_ln_float32_r(config, "-silprob"), cmd_ln_float32_r(config, "-fillprob"), cmd_ln_float32_r(config, "-lw"), cmd_ln_float32_r(config, "-wip"), logmath); }
/** * Output frames in HTK format. */ static int output_frames_htk(sphinx_wave2feat_t *wtf, mfcc_t **frames, int nfr) { int i, j, swap, htk_reorder, nfloat = 0; fe_mfcc_to_float(wtf->fe, frames, (float32 **)frames, nfr); /* This is possibly inefficient, but probably not a big deal. */ swap = (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian"))); htk_reorder = (0 == strcmp("htk", wtf->ot->name) && !(cmd_ln_boolean_r(wtf->config, "-logspec") || cmd_ln_boolean_r(wtf->config, "-cep2spec"))); for (i = 0; i < nfr; ++i) { if (htk_reorder) { mfcc_t c0 = frames[i][0]; memmove(frames[i] + 1, frames[i], (wtf->veclen - 1) * 4); frames[i][wtf->veclen - 1] = c0; } if (swap) for (j = 0; j < wtf->veclen; ++j) SWAP_FLOAT32(frames[i] + j); if (fwrite(frames[i], sizeof(float32), wtf->veclen, wtf->outfh) != wtf->veclen) { E_ERROR_SYSTEM("Writing %d values to %s failed", wtf->veclen, wtf->outfile); return -1; } nfloat += wtf->veclen; } return nfloat; }
int fe_parse_general_params(cmd_ln_t *config, fe_t * fe) { int j; fe->config = config; fe->sampling_rate = cmd_ln_float32_r(config, "-samprate"); fe->frame_rate = (int16)cmd_ln_int32_r(config, "-frate"); if (cmd_ln_boolean_r(config, "-dither")) { fe->dither = 1; fe->seed = cmd_ln_int32_r(config, "-seed"); } #ifdef WORDS_BIGENDIAN fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; #else fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; #endif fe->window_length = cmd_ln_float32_r(config, "-wlen"); fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha"); fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep"); fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft"); /* Check FFT size, compute FFT order (log_2(n)) */ for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) { if (((j % 2) != 0) || (fe->fft_size <= 0)) { E_ERROR("fft: number of points must be a power of 2 (is %d)\n", fe->fft_size); return -1; } } /* Verify that FFT size is greater or equal to window length. */ if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) { E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n", (int)(fe->window_length * fe->sampling_rate)); return -1; } fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc"); if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct")) fe->transform = DCT_II; else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy")) fe->transform = LEGACY_DCT; else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk")) fe->transform = DCT_HTK; else { E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); return -1; } if (cmd_ln_boolean_r(config, "-logspec")) fe->log_spec = RAW_LOG_SPEC; if (cmd_ln_boolean_r(config, "-smoothspec")) fe->log_spec = SMOOTH_LOG_SPEC; return 0; }
int main(int argc, char *argv[]) { jsgf_t *jsgf; fsg_model_t *fsg; cmd_ln_t *config; const char *rule; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; if (cmd_ln_boolean_r(config, "-help")) { usagemsg(argv[0]); } jsgf = jsgf_parse_file(cmd_ln_str_r(config, "-jsgf"), NULL); if (jsgf == NULL) { return 1; } rule = cmd_ln_str_r(config, "-toprule") ? cmd_ln_str_r(config, "-toprule") : NULL; if (!(fsg = get_fsg(jsgf, rule))) { E_ERROR("No fsg was built for the given rule '%s'.\n" "Check rule name; it should be qualified (with grammar name)\n" "and not enclosed in angle brackets (e.g. 'grammar.rulename').", rule); return 1; } if (cmd_ln_boolean_r(config, "-compile")) { fsg_model_null_trans_closure(fsg, NULL); } if (cmd_ln_str_r(config, "-fsm")) { const char* outfile = cmd_ln_str_r(config, "-fsm"); const char* symfile = cmd_ln_str_r(config, "-symtab"); if (outfile) fsg_model_writefile_fsm(fsg, outfile); else fsg_model_write_fsm(fsg, stdout); if (symfile) fsg_model_writefile_symtab(fsg, symfile); } else { const char *outfile = cmd_ln_str_r(config, "-fsg"); if (outfile) fsg_model_writefile(fsg, outfile); else fsg_model_write(fsg, stdout); } fsg_model_free(fsg); jsgf_grammar_free(jsgf); return 0; }
/** * "Detect" Sphinx MFCC files, meaning verify their lousy headers, and * set up some parameters from the config object. * * @return TRUE, or -1 on error. */ static int detect_sphinx_mfc(sphinx_wave2feat_t *wtf) { FILE *fh; int32 len; long flen; if ((fh = fopen(wtf->infile, "rb")) == NULL) { E_ERROR_SYSTEM("Failed to open %s", wtf->infile); return -1; } if (fread(&len, 4, 1, fh) != 1) { E_ERROR_SYSTEM("Failed to read header from %s\n", wtf->infile); fclose(fh); return -1; } fseek(fh, 0, SEEK_END); flen = ftell(fh); /* figure out whether to byteswap */ flen = (flen / 4) - 1; if (flen != len) { /* First make sure this is an endianness problem, otherwise fail. */ SWAP_INT32(&len); if (flen != len) { SWAP_INT32(&len); E_ERROR("Mismatch in header/file lengths: 0x%08x vs 0x%08x\n", len, flen); return -1; } /* Set the input endianness to the opposite of the machine endianness... */ cmd_ln_set_str_r(wtf->config, "-input_endian", (0 == strcmp("big", cmd_ln_str_r(wtf->config, "-mach_endian")) ? "little" : "big")); } fseek(fh, 4, SEEK_SET); wtf->infh = fh; if (cmd_ln_boolean_r(wtf->config, "-spec2cep")) { wtf->in_veclen = cmd_ln_int32_r(wtf->config, "-nfilt"); } else if (cmd_ln_boolean_r(wtf->config, "-cep2spec")) { wtf->in_veclen = cmd_ln_int32_r(wtf->config, "-ncep"); wtf->veclen = cmd_ln_int32_r(wtf->config, "-nfilt"); } else { /* Should not happen. */ E_ERROR("Sphinx MFCC file reading requested but -spec2cep/-cep2spec not given\n"); assert(FALSE); } return TRUE; }
static audio_type_t const * detect_audio_type(sphinx_wave2feat_t *wtf) { audio_type_t const *atype; int i; /* Special case audio type for Sphinx MFCC inputs. */ if (cmd_ln_boolean_r(wtf->config, "-spec2cep") || cmd_ln_boolean_r(wtf->config, "-cep2spec")) { int rv = mfcc_type.detect(wtf); if (rv == -1) goto error_out; return &mfcc_type; } /* Try to use the type of infile given on the command line. */ for (i = 0; i < ntypes; ++i) { int rv; atype = &types[i]; if (cmd_ln_boolean_r(wtf->config, atype->name)) { rv = (*atype->detect)(wtf); if (rv == -1) goto error_out; else if (rv == TRUE) break; } } if (i == ntypes) { /* Detect file type of infile and get parameters. */ for (i = 0; i < ntypes; ++i) { int rv; atype = &types[i]; rv = (*atype->detect)(wtf); if (rv == -1) goto error_out; else if (rv == TRUE) break; } if (i == ntypes) goto error_out; } return atype; error_out: if (wtf->infh) fclose(wtf->infh); wtf->infh = NULL; return NULL; }
int main(int argc, char *argv[]) { jsgf_t *jsgf; fsg_model_t *fsg; cmd_ln_t *config; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; if (cmd_ln_boolean_r(config, "-help")) { usagemsg(argv[0]); } jsgf = jsgf_parse_file(cmd_ln_str_r(config, "-jsgf"), NULL); if (jsgf == NULL) { return 1; } fsg = get_fsg(jsgf, cmd_ln_str_r(config, "-rule") ? cmd_ln_str_r(config, "-rule") : NULL); if (cmd_ln_boolean_r(config, "-compile")) { fsg_model_null_trans_closure(fsg, NULL); } if (cmd_ln_str_r(config, "-fsm")) { const char* outfile = cmd_ln_str_r(config, "-fsm"); const char* symfile = cmd_ln_str_r(config, "-symtab"); if (outfile) fsg_model_writefile_fsm(fsg, outfile); else fsg_model_write_fsm(fsg, stdout); if (symfile) fsg_model_writefile_symtab(fsg, symfile); } else { const char *outfile = cmd_ln_str_r(config, "-fsg"); if (outfile) fsg_model_writefile(fsg, outfile); else fsg_model_write(fsg, stdout); } fsg_model_free(fsg); jsgf_grammar_free(jsgf); return 0; }
int32 srch_FLAT_FWD_dag_dump(void *srch, dag_t *dag) { char str[2048]; srch_t *s; srch_FLAT_FWD_graph_t *fwg; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); ctl_outfile(str, cmd_ln_str_r(kbcore_config(fwg->kbcore), "-outlatdir"), cmd_ln_str_r(kbcore_config(fwg->kbcore), "-latext"), (s->uttfile ? s->uttfile : s->uttid), s->uttid, cmd_ln_boolean_r(kbcore_config(fwg->kbcore), "-build_outdirs")); E_INFO("Writing lattice file: %s\n", str); latticehist_dag_write(fwg->lathist, str, dag, kbcore_lm(s->kbc), kbcore_dict(s->kbc), fwg->ctxt, s->kbc->fillpen); return SRCH_SUCCESS; }
void build_filenames(cmd_ln_t *config, char const *basename, char **out_infile, char **out_outfile) { char const *di, *do_, *ei, *eo; di = cmd_ln_str_r(config, "-di"); do_ = cmd_ln_str_r(config, "-do"); ei = cmd_ln_str_r(config, "-ei"); eo = cmd_ln_str_r(config, "-eo"); *out_infile = string_join(di ? di : "", di ? "/" : "", basename, ei ? "." : "", ei ? ei : "", NULL); *out_outfile = string_join(do_ ? do_ : "", do_ ? "/" : "", basename, eo ? "." : "", eo ? eo : "", NULL); /* Build output directory structure if possible/requested (it is * by default). */ if (cmd_ln_boolean_r(config, "-build_outdirs")) { char *dirname = ckd_salloc(*out_outfile); path2dirname(*out_outfile, dirname); build_directory(dirname); ckd_free(dirname); } }
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, featbuf_t *fb) { acmod_t *acmod; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->refcount = 1; acmod->config = cmd_ln_retain(config); acmod->lmath = logmath_retain(lmath); acmod->fb = featbuf_retain(fb); acmod->fcb = featbuf_get_fcb(acmod->fb); /* Load acoustic model parameters. */ if (acmod_init_am(acmod) < 0) goto error_out; /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); acmod->feat_buf = feat_array_alloc(acmod->fcb, 1); return acmod; error_out: acmod_free(acmod); return NULL; }
acmod_t * acmod_copy(acmod_t *other) { acmod_t *acmod; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->refcount = 1; acmod->config = cmd_ln_retain(other->config); acmod->lmath = logmath_retain(other->lmath); acmod->mdef = bin_mdef_retain(other->mdef); acmod->tmat = tmat_retain(other->tmat); acmod->mgau = ps_mgau_copy(other->mgau); acmod->fb = featbuf_retain(other->fb); acmod->fcb = other->fcb; /* Implicitly retained with fb, I think */ /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen"); acmod->feat_buf = feat_array_alloc(acmod->fcb, 1); return acmod; }
int ps_end_utt(ps_decoder_t *ps) { int rv, i; acmod_end_utt(ps->acmod); /* Search any remaining frames. */ if ((rv = ps_search_forward(ps)) < 0) { ptmr_stop(&ps->perf); return rv; } /* Finish phone loop search. */ if (ps->phone_loop) { if ((rv = ps_search_finish(ps->phone_loop)) < 0) { ptmr_stop(&ps->perf); return rv; } } /* Search any frames remaining in the lookahead window. */ for (i = ps->acmod->output_frame - ps->pl_window; i < ps->acmod->output_frame; ++i) ps_search_step(ps->search, i); /* Finish main search. */ if ((rv = ps_search_finish(ps->search)) < 0) { ptmr_stop(&ps->perf); return rv; } ptmr_stop(&ps->perf); /* Log a backtrace if requested. */ if (cmd_ln_boolean_r(ps->config, "-backtrace")) { char const *uttid, *hyp; ps_seg_t *seg; int32 score; hyp = ps_get_hyp(ps, &score, &uttid); if (hyp != NULL) { E_INFO("%s: %s (%d)\n", uttid, hyp, score); E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", "word", "start", "end", "pprob", "ascr", "lscr", "lback"); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } } } return rv; }
static int acmod_init_feat(acmod_t *acmod) { acmod->fcb = feat_init(cmd_ln_str_r(acmod->config, "-feat"), cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), cmd_ln_boolean_r(acmod->config, "-varnorm"), agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), 1, cmd_ln_int32_r(acmod->config, "-ceplen")); if (acmod->fcb == NULL) return -1; if (cmd_ln_str_r(acmod->config, "-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str_r(acmod->config, "-lda")); if (feat_read_lda(acmod->fcb, cmd_ln_str_r(acmod->config, "-lda"), cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) return -1; } if (cmd_ln_str_r(acmod->config, "-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str_r(acmod->config, "-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) return -1; if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0) return -1; } if (cmd_ln_exists_r(acmod->config, "-agcthresh") && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { agc_set_threshold(acmod->fcb->agc_struct, cmd_ln_float32_r(acmod->config, "-agcthresh")); } if (acmod->fcb->cmn_struct && cmd_ln_exists_r(acmod->config, "-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); c = vallist; nvals = 0; while (nvals < acmod->fcb->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); c = cc + 1; ++nvals; } if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') { acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); } ckd_free(vallist); } return 0; }
/** * Process Sphinx MFCCs/logspectra from a filehandle. Assume that * wtf->infh is positioned just after the file header. */ static int decode_sphinx_mfc(sphinx_wave2feat_t *wtf) { int nfloat = 0, n; int featsize = wtf->featsize; /* If the input vector length is less than the output length, we * need to do this one frame at a time, because there's empty * space at the end of each vector in wtf->feat. */ if (wtf->in_veclen < wtf->veclen) featsize = 1; while ((n = fread(wtf->feat[0], sizeof(**wtf->feat), featsize * wtf->in_veclen, wtf->infh)) != 0) { int i, nfr = n / wtf->in_veclen; if (n % wtf->in_veclen) { E_ERROR("Size of file %d not a multiple of veclen %d\n", n, wtf->in_veclen); return -1; } /* Byteswap stuff here if necessary. */ if (wtf->byteswap) { for (i = 0; i < n; ++i) SWAP_FLOAT32(wtf->feat[0] + i); } fe_float_to_mfcc(wtf->fe, (float32 **)wtf->feat, wtf->feat, nfr); for (i = 0; i < nfr; ++i) { if (cmd_ln_boolean_r(wtf->config, "-spec2cep")) { if (0 == strcmp(cmd_ln_str_r(wtf->config, "-transform"), "legacy")) fe_logspec_to_mfcc(wtf->fe, wtf->feat[i], wtf->feat[i]); else fe_logspec_dct2(wtf->fe, wtf->feat[i], wtf->feat[i]); } else if (cmd_ln_boolean_r(wtf->config, "-cep2spec")) { fe_mfcc_dct3(wtf->fe, wtf->feat[i], wtf->feat[i]); } } if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0) return -1; nfloat += n; } if (fclose(wtf->infh) == EOF) E_ERROR_SYSTEM("Failed to close input file"); wtf->infh = NULL; return nfloat; }
int main(int argc, char *argv[]) { cmd_ln_t *config; config = cmd_ln_parse_r(NULL, defs, argc, argv, TRUE); if (config == NULL) return 1; printf("%d %s %d %f\n", cmd_ln_int32_r(config, "-a"), cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)", cmd_ln_boolean_r(config, "-c"), cmd_ln_float64_r(config, "-d")); cmd_ln_free_r(config); config = cmd_ln_init(NULL, NULL, FALSE, "-b", "foobie", NULL); if (config == NULL) return 1; cmd_ln_free_r(config); config = cmd_ln_init(NULL, defs, TRUE, "-b", "foobie", NULL); if (config == NULL) return 1; printf("%d %s %d %f\n", cmd_ln_int32_r(config, "-a"), cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)", cmd_ln_boolean_r(config, "-c"), cmd_ln_float64_r(config, "-d")); cmd_ln_free_r(config); config = cmd_ln_init(NULL, NULL, FALSE, "-b", "foobie", NULL); if (config == NULL) return 1; printf("%s\n", cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)"); cmd_ln_set_str_r(config, "-b", "blatz"); printf("%s\n", cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)"); cmd_ln_free_r(config); return 0; }
int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format) { cmd_ln_t *newconfig; dict2pid_t *d2p; dict_t *dict; hash_iter_t *search_it; /* Create a new scratch config to load this dict (so existing one * won't be affected if it fails) */ newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL); cmd_ln_set_boolean_r(newconfig, "-dictcase", cmd_ln_boolean_r(ps->config, "-dictcase")); cmd_ln_set_str_r(newconfig, "-dict", dictfile); if (fdictfile) cmd_ln_set_str_r(newconfig, "-fdict", fdictfile); else cmd_ln_set_str_r(newconfig, "-fdict", cmd_ln_str_r(ps->config, "-fdict")); /* Try to load it. */ if ((dict = dict_init(newconfig, ps->acmod->mdef, ps->acmod->lmath)) == NULL) { cmd_ln_free_r(newconfig); return -1; } /* Reinit the dict2pid. */ if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) { cmd_ln_free_r(newconfig); return -1; } /* Success! Update the existing config to reflect new dicts and * drop everything into place. */ cmd_ln_free_r(newconfig); cmd_ln_set_str_r(ps->config, "-dict", dictfile); if (fdictfile) cmd_ln_set_str_r(ps->config, "-fdict", fdictfile); dict_free(ps->dict); ps->dict = dict; dict2pid_free(ps->d2p); ps->d2p = d2p; /* And tell all searches to reconfigure themselves. */ for (search_it = hash_table_iter(ps->searches); search_it; search_it = hash_table_iter_next(search_it)) { if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) { hash_table_iter_free(search_it); return -1; } } return 0; }
int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format) { cmd_ln_t *newconfig; dict2pid_t *d2p; dict_t *dict; gnode_t *gn; int rv; /* Create a new scratch config to load this dict (so existing one * won't be affected if it fails) */ newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL); cmd_ln_set_boolean_r(newconfig, "-dictcase", cmd_ln_boolean_r(ps->config, "-dictcase")); cmd_ln_set_str_r(newconfig, "-dict", dictfile); if (fdictfile) cmd_ln_set_str_r(newconfig, "-fdict", fdictfile); else cmd_ln_set_str_r(newconfig, "-fdict", cmd_ln_str_r(ps->config, "-fdict")); /* Try to load it. */ if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) { cmd_ln_free_r(newconfig); return -1; } /* Reinit the dict2pid. */ if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) { cmd_ln_free_r(newconfig); return -1; } /* Success! Update the existing config to reflect new dicts and * drop everything into place. */ cmd_ln_free_r(newconfig); cmd_ln_set_str_r(ps->config, "-dict", dictfile); if (fdictfile) cmd_ln_set_str_r(ps->config, "-fdict", fdictfile); dict_free(ps->dict); ps->dict = dict; dict2pid_free(ps->d2p); ps->d2p = d2p; /* And tell all searches to reconfigure themselves. */ for (gn = ps->searches; gn; gn = gnode_next(gn)) { ps_search_t *search = gnode_ptr(gn); if ((rv = ps_search_reinit(search, dict, d2p)) < 0) return rv; } return 0; }
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh) { acmod->insenfh = senfh; if (senfh == NULL) { acmod->n_feat_frame = 0; acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen"); return 0; } acmod->compallsen = TRUE; return acmod_read_senfh_header(acmod); }
int batch_decoder_decode(batch_decoder_t *bd, char *file, char *uttid, int32 sf, int32 ef, alignment_t *al) { featbuf_t *fb; FILE *infh; char const *cepdir, *cepext; char *infile; int rv; if (ef != -1 && ef < sf) { E_ERROR("End frame %d is < start frame %d\n", ef, sf); return -1; } cepdir = cmd_ln_str_r(bd->config, "-cepdir"); cepext = cmd_ln_str_r(bd->config, "-cepext"); /* Build input filename. */ infile = string_join(cepdir ? cepdir : "", "/", file, cepext ? cepext : "", NULL); if (uttid == NULL) uttid = file; if ((infh = fopen(infile, "rb")) == NULL) { E_ERROR_SYSTEM("Failed to open %s", infile); return -1; } fb = search_factory_featbuf(bd->sf); gettimeofday(&bd->utt_start, NULL); featbuf_producer_start_utt(fb, uttid); if (cmd_ln_boolean_r(bd->config, "-adcin")) rv = batch_decoder_decode_adc(bd, infh, sf, ef, al); else rv = batch_decoder_decode_mfc(bd, infh, sf, ef, al); featbuf_producer_end_utt(fb); if (bd->hypfh) { char const *hyp; int32 score; hyp = search_hyp(bd->fwdflat, &score); fprintf(bd->hypfh, "%s (%s %d)\n", hyp, uttid, score); } fclose(infh); ckd_free(infile); return rv; }
glist_t srch_FLAT_FWD_nbest_impl(void *srch, /**< A void pointer to a search structure */ dag_t * dag) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; float32 bestpathlw; float64 lwf; char str[2000]; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); if (!(cmd_ln_exists_r(kbcore_config(fwg->kbcore), "-nbestdir") && cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"))) return NULL; ctl_outfile(str, cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"), cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestext"), (s->uttfile ? s->uttfile : s->uttid), s->uttid, cmd_ln_boolean_r(kbcore_config(fwg->kbcore), "-build_outdirs")); bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw"); lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0; flat_fwd_dag_add_fudge_edges(fwg, dag, cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"), cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"), (void *) fwg->lathist, s->kbc->dict); /* Bypass filler nodes */ if (!dag->filler_removed) { /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */ if (dict_filler_word(s->kbc->dict, dag->end->wid)) dag->end->wid = s->kbc->dict->finishwid; dag_remove_unreachable(dag); if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0) E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); } dag_compute_hscr(dag, kbcore_dict(s->kbc), kbcore_lm(s->kbc), lwf); dag_remove_bypass_links(dag); dag->filler_removed = 0; nbest_search(dag, str, s->uttid, lwf, kbcore_dict(s->kbc), kbcore_lm(s->kbc), kbcore_fillpen(s->kbc) ); return NULL; }
/** * Output HTK format header. */ static int output_header_htk(sphinx_wave2feat_t *wtf, int32 nfloat) { int32 samp_period; int16 samp_size; int16 param_kind; int swap = FALSE; /* HTK files are big-endian. */ if (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian"))) swap = TRUE; /* Same file size thing as in Sphinx files (I think) */ if (swap) SWAP_INT32(&nfloat); if (fwrite(&nfloat, 4, 1, wtf->outfh) != 1) return -1; /* Sample period in 100ns units. */ samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->config, "-frate")); if (swap) SWAP_INT32(&samp_period); if (fwrite(&samp_period, 4, 1, wtf->outfh) != 1) return -1; /* Sample size - veclen * sizeof each sample. */ samp_size = wtf->veclen * 4; if (swap) SWAP_INT16(&samp_size); if (fwrite(&samp_size, 2, 1, wtf->outfh) != 1) return -1; /* Format and flags. */ if (cmd_ln_boolean_r(wtf->config, "-logspec") || cmd_ln_boolean_r(wtf->config, "-cep2spec")) param_kind = FBANK; /* log mel-filter bank outputs */ else param_kind = MFCC | _O; /* MFCC + CEP0 (note reordering...) */ if (swap) SWAP_INT16(¶m_kind); if (fwrite(¶m_kind, 2, 1, wtf->outfh) != 1) return -1; return 0; }
void s3_decode_read_lm(s3_decode_t * _decode, const char *lmpath, const char *lmname) { srch_t *s; lm_t *lm; int32 ndict; s = (srch_t *) _decode->kb.srch; ndict = dict_size(_decode->kb.kbcore->dict); lm = lm_read_advance(lmpath, lmname, cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-lw"), cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-wip"), cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-uw"), ndict, NULL, 1, /* Weight apply */ kbcore_logmath(s->kbc), cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-ugonly"), cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-bgonly") ); s->funcs->add_lm(s, lm, lmname); }
int main(int argc, char *argv[]) { cmd_ln_t *config; ngram_model_t *lm = NULL; logmath_t *lmath; const char *lmfn, *probdefn, *lsnfn, *text; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; verbose = cmd_ln_boolean_r(config, "-verbose"); /* Create log math object. */ if ((lmath = logmath_init (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) { E_FATAL("Failed to initialize log math\n"); } /* Load the language model. */ lmfn = cmd_ln_str_r(config, "-lm"); if (lmfn == NULL || (lm = ngram_model_read(config, lmfn, NGRAM_AUTO, lmath)) == NULL) { E_FATAL("Failed to load language model from %s\n", cmd_ln_str_r(config, "-lm")); } if ((probdefn = cmd_ln_str_r(config, "-probdef")) != NULL) ngram_model_read_classdef(lm, probdefn); ngram_model_apply_weights(lm, cmd_ln_float32_r(config, "-lw"), cmd_ln_float32_r(config, "-wip"), cmd_ln_float32_r(config, "-uw")); /* Now evaluate some text. */ lsnfn = cmd_ln_str_r(config, "-lsn"); text = cmd_ln_str_r(config, "-text"); if (lsnfn) { evaluate_file(lm, lmath, lsnfn); } else if (text) { evaluate_string(lm, lmath, text); } return 0; }
/* * Continuous recognition from a file */ static void recognize_from_file() { int16 adbuf[2048]; const char *fname; const char *hyp; int32 k; uint8 utt_started, in_speech; int32 print_times = cmd_ln_boolean_r(config, "-time"); fname = cmd_ln_str_r(config, "-infile"); if ((rawfd = fopen(fname, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to open file '%s' for reading", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) { char waveheader[44]; fread(waveheader, 1, 44, rawfd); if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate"))) E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) { E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n"); } ps_start_utt(ps); utt_started = FALSE; while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) { ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; } if (!in_speech && utt_started) { ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) printf("%s\n", hyp); if (print_times) print_word_times(); fflush(stdout); ps_start_utt(ps); utt_started = FALSE; } } ps_end_utt(ps); if (utt_started) { hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) { printf("%s\n", hyp); if (print_times) { print_word_times(); } } } fclose(rawfd); }
static void gst_pocketsphinx_get_property(GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstPocketSphinx *ps = GST_POCKETSPHINX(object); switch (prop_id) { case PROP_DECODER: g_value_set_boxed(value, ps->ps); break; case PROP_CONFIGURED: g_value_set_boolean(value, ps->ps != NULL); break; case PROP_HMM_DIR: g_value_set_string(value, cmd_ln_str_r(ps->config, "-hmm")); break; case PROP_LM_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-lm")); break; case PROP_LMCTL_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmctl")); break; case PROP_LM_NAME: g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmname")); break; case PROP_DICT_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-dict")); break; case PROP_MLLR_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-mllr")); break; case PROP_FSG_FILE: g_value_set_string(value, cmd_ln_str_r(ps->config, "-fsg")); break; case PROP_FWDFLAT: g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-fwdflat")); break; case PROP_BESTPATH: g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-bestpath")); break; case PROP_LATDIR: g_value_set_string(value, ps->latdir); break; case PROP_MAXHMMPF: g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxhmmpf")); break; case PROP_MAXWPF: g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxwpf")); break; case PROP_BEAM: g_value_set_double(value, cmd_ln_float_r(ps->config, "-beam")); break; case PROP_PBEAM: g_value_set_double(value, cmd_ln_float_r(ps->config, "-pbeam")); break; case PROP_WBEAM: g_value_set_double(value, cmd_ln_float_r(ps->config, "-wbeam")); break; case PROP_DSRATIO: g_value_set_int(value, cmd_ln_int32_r(ps->config, "-ds")); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; } }
int main(int argc, char *argv[]) { char const *cfg; int i; int16 buf[2048]; if (argc == 2) { config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE); } else { config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE); } /* Handle argument file as -argfile. */ if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); } if (config == NULL) return 1; singlefile = cmd_ln_boolean_r(config, "-singlefile"); if ((infile_path = cmd_ln_str_r(config, "-infile")) != NULL) { if ((infile = fopen(infile_path, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to read audio from '%s'", infile_path); return 1; } read_audio = &read_audio_file; /* skip wav header */ read_audio(buf, 44); } else { if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int) cmd_ln_float32_r(config, "-samprate"))) == NULL) { E_FATAL("Failed to open audio device\n"); return 1; } read_audio = &read_audio_adev; printf("Start recording ...\n"); fflush(stdout); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); /* TODO remove this thing */ for (i = 0; i < 5; i++) { sleep_msec(200); read_audio(buf, 2048); } printf("You may speak now\n"); fflush(stdout); } fe = fe_init_auto_r(config); if (fe == NULL) return 1; segment_audio(); if (ad) ad_close(ad); if (infile) fclose(infile); fe_free(fe); cmd_ln_free_r(config); return 0; }
int main(int argc, char *argv[]) { cmd_ln_t *config; ngram_model_t *lm = NULL; logmath_t *lmath; int itype, otype; char const *kase; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; if (cmd_ln_boolean_r(config, "-help")) { usagemsg(argv[0]); } err_set_debug_level(cmd_ln_int32_r(config, "-debug")); /* Create log math object. */ if ((lmath = logmath_init (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) { E_FATAL("Failed to initialize log math\n"); } if (cmd_ln_str_r(config, "-i") == NULL || cmd_ln_str_r(config, "-i") == NULL) { E_ERROR("Please specify both input and output models\n"); goto error_out; } /* Load the input language model. */ if (cmd_ln_str_r(config, "-ifmt")) { if ((itype = ngram_str_to_type(cmd_ln_str_r(config, "-ifmt"))) == NGRAM_INVALID) { E_ERROR("Invalid input type %s\n", cmd_ln_str_r(config, "-ifmt")); goto error_out; } lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"), itype, lmath); } else { lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"), NGRAM_AUTO, lmath); } /* Guess or set the output language model type. */ if (cmd_ln_str_r(config, "-ofmt")) { if ((otype = ngram_str_to_type(cmd_ln_str_r(config, "-ofmt"))) == NGRAM_INVALID) { E_ERROR("Invalid output type %s\n", cmd_ln_str_r(config, "-ofmt")); goto error_out; } } else { otype = ngram_file_name_to_type(cmd_ln_str_r(config, "-o")); } /* Recode the language model if desired. */ if (cmd_ln_str_r(config, "-ienc")) { if (ngram_model_recode(lm, cmd_ln_str_r(config, "-ienc"), cmd_ln_str_r(config, "-oenc")) != 0) { E_ERROR("Failed to recode language model from %s to %s\n", cmd_ln_str_r(config, "-ienc"), cmd_ln_str_r(config, "-oenc")); goto error_out; } } /* Case fold if requested. */ if ((kase = cmd_ln_str_r(config, "-case"))) { if (0 == strcmp(kase, "lower")) { ngram_model_casefold(lm, NGRAM_LOWER); } else if (0 == strcmp(kase, "upper")) { ngram_model_casefold(lm, NGRAM_UPPER); } else { E_ERROR("Unknown value for -case: %s\n", kase); goto error_out; } } /* Write the output language model. */ if (ngram_model_write(lm, cmd_ln_str_r(config, "-o"), otype) != 0) { E_ERROR("Failed to write language model in format %s to %s\n", ngram_type_to_str(otype), cmd_ln_str_r(config, "-o")); goto error_out; } /* That's all folks! */ ngram_model_free(lm); return 0; error_out: ngram_model_free(lm); return 1; }
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb) { acmod_t *acmod; char const *featparams; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->config = cmd_ln_retain(config); acmod->lmath = lmath; acmod->state = ACMOD_IDLE; /* Look for feat.params in acoustic model dir. */ if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) { if (NULL != cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE)) E_INFO("Parsed model-specific feature parameters from %s\n", featparams); } /* Initialize feature computation. */ if (fe) { if (acmod_fe_mismatch(acmod, fe)) goto error_out; fe_retain(fe); acmod->fe = fe; } else { /* Initialize a new front end. */ acmod->fe = fe_init_auto_r(config); if (acmod->fe == NULL) goto error_out; if (acmod_fe_mismatch(acmod, acmod->fe)) goto error_out; } if (fcb) { if (acmod_feat_mismatch(acmod, fcb)) goto error_out; feat_retain(fcb); acmod->fcb = fcb; } else { /* Initialize a new fcb. */ if (acmod_init_feat(acmod) < 0) goto error_out; } /* Load acoustic model parameters. */ if (acmod_init_am(acmod) < 0) goto error_out; /* The MFCC buffer needs to be at least as large as the dynamic * feature window. */ acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; acmod->mfc_buf = (mfcc_t **) ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, sizeof(**acmod->mfc_buf)); /* Feature buffer has to be at least as large as MFCC buffer. */ acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window"); acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); acmod->utt_start_frame = 0; /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); return acmod; error_out: acmod_free(acmod); return NULL; }