static int fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel) { mel->sampling_rate = fe->sampling_rate; mel->fft_size = fe->fft_size; mel->num_cepstra = fe->num_cepstra; mel->num_filters = cmd_ln_int32_r(config, "-nfilt"); if (fe->log_spec) fe->feature_dimension = mel->num_filters; else fe->feature_dimension = fe->num_cepstra; mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf"); mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf"); mel->doublewide = cmd_ln_boolean_r(config, "-doublebw"); mel->warp_type = cmd_ln_str_r(config, "-warp_type"); mel->warp_params = cmd_ln_str_r(config, "-warp_params"); mel->lifter_val = cmd_ln_int32_r(config, "-lifter"); mel->unit_area = cmd_ln_boolean_r(config, "-unit_area"); mel->round_filters = cmd_ln_boolean_r(config, "-round_filters"); if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) { E_ERROR("Failed to initialize the warping function.\n"); return -1; } fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate); return 0; }
int32 main(int32 argc, char *argv[]) { kb_t kb; stat_t *st; cmd_ln_t *config; print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", arg); unlimit(); config = cmd_ln_get(); kb_init(&kb, config); st = kb.stat; fprintf(stdout, "\n"); if (cmd_ln_str_r(config, "-ctl")) { /* When -ctlfile is speicified, corpus.c will look at -ctl_lm and -ctl_mllr to get the corresponding LM and MLLR for the utterance */ st->tm = ctl_process(cmd_ln_str_r(config, "-ctl"), cmd_ln_str_r(config, "-ctl_lm"), cmd_ln_str_r(config, "-ctl_mllr"), cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_decode, &kb); } else if (cmd_ln_str_r(config, "-utt")) { /* When -utt is specified, corpus.c will wait for the utterance to change */ st->tm = ctl_process_utt(cmd_ln_str_r(config, "-utt"), cmd_ln_int32_r(config, "-ctlcount"), utt_decode, &kb); } else { /* Is error checking good enough?" */ E_FATAL("Both -utt and -ctl are not specified.\n"); } if (kb.matchsegfp) fclose(kb.matchsegfp); if (kb.matchfp) fclose(kb.matchfp); stat_report_corpus(kb.stat); kb_free(&kb); #if (! WIN32) #if defined(_SUN4) system("ps -el | grep sphinx3_decode"); #else system("ps aguxwww | grep sphinx3_decode"); #endif #endif cmd_ln_free_r(config); exit(0); }
static int acmod_init_feat(acmod_t *acmod) { acmod->fcb = feat_init(cmd_ln_str_r(acmod->config, "-feat"), cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), cmd_ln_boolean_r(acmod->config, "-varnorm"), agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), 1, cmd_ln_int32_r(acmod->config, "-ceplen")); if (acmod->fcb == NULL) return -1; if (cmd_ln_str_r(acmod->config, "-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str_r(acmod->config, "-lda")); if (feat_read_lda(acmod->fcb, cmd_ln_str_r(acmod->config, "-lda"), cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) return -1; } if (cmd_ln_str_r(acmod->config, "-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str_r(acmod->config, "-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) return -1; if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0) return -1; } if (cmd_ln_exists_r(acmod->config, "-agcthresh") && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { agc_set_threshold(acmod->fcb->agc_struct, cmd_ln_float32_r(acmod->config, "-agcthresh")); } if (acmod->fcb->cmn_struct && cmd_ln_exists_r(acmod->config, "-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); c = vallist; nvals = 0; while (nvals < acmod->fcb->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); c = cc + 1; ++nvals; } if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') { acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); } ckd_free(vallist); } return 0; }
int fe_parse_general_params(cmd_ln_t *config, fe_t * fe) { int j; fe->config = config; fe->sampling_rate = cmd_ln_float32_r(config, "-samprate"); fe->frame_rate = (int16)cmd_ln_int32_r(config, "-frate"); if (cmd_ln_boolean_r(config, "-dither")) { fe->dither = 1; fe->seed = cmd_ln_int32_r(config, "-seed"); } #ifdef WORDS_BIGENDIAN fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; #else fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; #endif fe->window_length = cmd_ln_float32_r(config, "-wlen"); fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha"); fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep"); fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft"); /* Check FFT size, compute FFT order (log_2(n)) */ for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) { if (((j % 2) != 0) || (fe->fft_size <= 0)) { E_ERROR("fft: number of points must be a power of 2 (is %d)\n", fe->fft_size); return -1; } } /* Verify that FFT size is greater or equal to window length. */ if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) { E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n", (int)(fe->window_length * fe->sampling_rate)); return -1; } fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc"); if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct")) fe->transform = DCT_II; else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy")) fe->transform = LEGACY_DCT; else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk")) fe->transform = DCT_HTK; else { E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); return -1; } if (cmd_ln_boolean_r(config, "-logspec")) fe->log_spec = RAW_LOG_SPEC; if (cmd_ln_boolean_r(config, "-smoothspec")) fe->log_spec = SMOOTH_LOG_SPEC; return 0; }
glist_t srch_FLAT_FWD_bestpath_impl(void *srch, /**< A void pointer to a search structure */ dag_t * dag) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; float32 bestpathlw; float64 lwf; srch_hyp_t *tmph, *bph; glist_t ghyp, rhyp; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw"); lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0; flat_fwd_dag_add_fudge_edges(fwg, dag, cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"), cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"), (void *) fwg->lathist, s->kbc->dict); /* Bypass filler nodes */ if (!dag->filler_removed) { /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */ if (dict_filler_word(s->kbc->dict, dag->end->wid)) dag->end->wid = s->kbc->dict->finishwid; if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0) E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); else dag->filler_removed = 1; } bph = dag_search(dag, s->uttid, lwf, dag->end, s->kbc->dict, s->kbc->lmset->cur_lm, s->kbc->fillpen); if (bph != NULL) { ghyp = NULL; for (tmph = bph; tmph; tmph = tmph->next) ghyp = glist_add_ptr(ghyp, (void *) tmph); rhyp = glist_reverse(ghyp); return rhyp; } else { return NULL; } }
/** * "Detect" Sphinx MFCC files, meaning verify their lousy headers, and * set up some parameters from the config object. * * @return TRUE, or -1 on error. */ static int detect_sphinx_mfc(sphinx_wave2feat_t *wtf) { FILE *fh; int32 len; long flen; if ((fh = fopen(wtf->infile, "rb")) == NULL) { E_ERROR_SYSTEM("Failed to open %s", wtf->infile); return -1; } if (fread(&len, 4, 1, fh) != 1) { E_ERROR_SYSTEM("Failed to read header from %s\n", wtf->infile); fclose(fh); return -1; } fseek(fh, 0, SEEK_END); flen = ftell(fh); /* figure out whether to byteswap */ flen = (flen / 4) - 1; if (flen != len) { /* First make sure this is an endianness problem, otherwise fail. */ SWAP_INT32(&len); if (flen != len) { SWAP_INT32(&len); E_ERROR("Mismatch in header/file lengths: 0x%08x vs 0x%08x\n", len, flen); return -1; } /* Set the input endianness to the opposite of the machine endianness... */ cmd_ln_set_str_r(wtf->config, "-input_endian", (0 == strcmp("big", cmd_ln_str_r(wtf->config, "-mach_endian")) ? "little" : "big")); } fseek(fh, 4, SEEK_SET); wtf->infh = fh; if (cmd_ln_boolean_r(wtf->config, "-spec2cep")) { wtf->in_veclen = cmd_ln_int32_r(wtf->config, "-nfilt"); } else if (cmd_ln_boolean_r(wtf->config, "-cep2spec")) { wtf->in_veclen = cmd_ln_int32_r(wtf->config, "-ncep"); wtf->veclen = cmd_ln_int32_r(wtf->config, "-nfilt"); } else { /* Should not happen. */ E_ERROR("Sphinx MFCC file reading requested but -spec2cep/-cep2spec not given\n"); assert(FALSE); } return TRUE; }
/** * Process PCM audio from a filehandle. Assume that wtf->infh is * positioned just after the file header. */ static int decode_pcm(sphinx_wave2feat_t *wtf) { size_t nsamp; int32 n, nfr, nchans, whichchan; uint32 nfloat; nchans = cmd_ln_int32_r(wtf->config, "-nchans"); whichchan = cmd_ln_int32_r(wtf->config, "-whichchan"); fe_start_utt(wtf->fe); nfloat = 0; while ((nsamp = fread(wtf->audio, 2, wtf->blocksize, wtf->infh)) != 0) { size_t nvec; int16 const *inspeech; /* Byteswap stuff here if necessary. */ if (wtf->byteswap) { for (n = 0; n < nsamp; ++n) SWAP_INT16(wtf->audio + n); } /* Mix or pick channels. */ if (nchans > 1) nsamp = mixnpick_channels(wtf->audio, nsamp, nchans, whichchan); inspeech = wtf->audio; nvec = wtf->featsize; /* Consume all samples. */ while (nsamp) { nfr = nvec; fe_process_frames(wtf->fe, &inspeech, &nsamp, wtf->feat, &nfr, NULL); if (nfr) { if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0) return -1; nfloat += n; } } inspeech = wtf->audio; } /* Now process any leftover audio frames. */ fe_end_utt(wtf->fe, wtf->feat[0], &nfr); if (nfr) { if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0) return -1; nfloat += n; } if (fclose(wtf->infh) == EOF) E_ERROR_SYSTEM("Failed to close input file"); wtf->infh = NULL; return nfloat; }
int main(int argc, char *argv[]) { print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", defn); unlimit(); config = cmd_ln_get(); logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1, cmd_ln_int32_r(config, "-log3table")); E_INFO("Value of base %f \n", cmd_ln_float32_r(config, "-logbase")); models_init(); ptmr_init(&tm_utt); if ((inmatchsegfp = fopen(cmd_ln_str_r(config, "-inhypseg"), "r")) == NULL) E_ERROR("fopen(%s,r) failed\n", cmd_ln_str_r(config, "-inhypseg")); if ((outconfmatchsegfp = fopen(cmd_ln_str_r(config, "-output"), "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", cmd_ln_str_r(config, "-output")); if (cmd_ln_str_r(config, "-ctl")) { ctl_process(cmd_ln_str_r(config, "-ctl"), cmd_ln_str_r(config, "-ctl_lm"), NULL, cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_confidence, NULL); } else { E_FATAL("-ctl is not specified\n"); } #if (! WIN32) system("ps auxwww | grep s3dag"); #endif fclose(outconfmatchsegfp); fclose(inmatchsegfp); models_free(); logmath_free(logmath); cmd_ln_free_r(config); return 0; }
glist_t srch_FLAT_FWD_nbest_impl(void *srch, /**< A void pointer to a search structure */ dag_t * dag) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; float32 bestpathlw; float64 lwf; char str[2000]; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); if (!(cmd_ln_exists_r(kbcore_config(fwg->kbcore), "-nbestdir") && cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"))) return NULL; ctl_outfile(str, cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"), cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestext"), (s->uttfile ? s->uttfile : s->uttid), s->uttid, cmd_ln_boolean_r(kbcore_config(fwg->kbcore), "-build_outdirs")); bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw"); lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0; flat_fwd_dag_add_fudge_edges(fwg, dag, cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"), cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"), (void *) fwg->lathist, s->kbc->dict); /* Bypass filler nodes */ if (!dag->filler_removed) { /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */ if (dict_filler_word(s->kbc->dict, dag->end->wid)) dag->end->wid = s->kbc->dict->finishwid; dag_remove_unreachable(dag); if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0) E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); } dag_compute_hscr(dag, kbcore_dict(s->kbc), kbcore_lm(s->kbc), lwf); dag_remove_bypass_links(dag); dag->filler_removed = 0; nbest_search(dag, str, s->uttid, lwf, kbcore_dict(s->kbc), kbcore_lm(s->kbc), kbcore_fillpen(s->kbc) ); return NULL; }
int main(int _argc, char **_argv) { char *ctrlfn; char *cfgfn; cmd_ln_t *config = NULL; print_appl_info(_argv[0]); if (_argc != 4) { printf("\nUSAGE: %s <ctrlfile> <rawdir> <cfgfile>\n", _argv[0]); return -1; } ctrlfn = _argv[1]; rawdirfn = _argv[2]; cfgfn = _argv[3]; if ((config = cmd_ln_parse_file_r(config, S3_DECODE_ARG_DEFS, cfgfn, TRUE)) == NULL) E_FATAL("Bad configuration file %s.\n", cfgfn); if (s3_decode_init(&decoder, config) != S3_DECODE_SUCCESS) E_FATAL("Failed to initialize live-decoder.\n"); fe = fe_init_auto_r(config); st = decoder.kb.stat; ptmr_init(&(st->tm)); if (ctrlfn) { /* When -ctlfile is speicified, corpus.c will look at -ctl_lm and -ctl_mllr to get the corresponding LM and MLLR for the utterance */ st->tm = ctl_process(ctrlfn, cmd_ln_str_r(config, "-ctl_lm"), cmd_ln_str_r(config, "-ctl_mllr"), cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_livepretend, &(decoder.kb)); } else { E_FATAL("control file is not specified.\n"); } stat_report_corpus(decoder.kb.stat); s3_decode_close(&decoder); fe_free(fe); return 0; }
int acmod_fe_mismatch(acmod_t *acmod, fe_t *fe) { /* Output vector dimension needs to be the same. */ if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) { E_ERROR("Configured feature length %d doesn't match feature extraction output size %d\n", cmd_ln_int32_r(acmod->config, "-ceplen"), fe_get_output_size(fe)); return TRUE; } /* Feature parameters need to be the same. */ /* ... */ return FALSE; }
static int write_nbest(ps_decoder_t *ps, char const *nbestdir, char const *uttid) { cmd_ln_t *config; char *outfile; FILE *fh; ps_nbest_t *nbest; int32 i, n, score; const char* hyp; config = ps_get_config(ps); outfile = string_join(nbestdir, "/", uttid, cmd_ln_str_r(config, "-nbestext"), NULL); n = cmd_ln_int32_r(config, "-nbest"); fh = fopen(outfile, "w"); if (fh == NULL) { E_ERROR_SYSTEM("Failed to write a lattice to file %s\n", outfile); return -1; } nbest = ps_nbest(ps, 0, -1, NULL, NULL); for (i = 0; i < n && nbest && (nbest = ps_nbest_next(nbest)); i++) { hyp = ps_nbest_hyp(nbest, &score); fprintf(fh, "%s %d\n", hyp, score); } if (nbest) ps_nbest_free(nbest); fclose(fh); return 0; }
int filter_buffer_create(context_t *ctx) { options_t *opts; decoder_set_t *decset; decoder_t *dec; cmd_ln_t *cfg; uint32_t rate; int32 fps; int32_t frlen; filter_buf_t *filtbuf; if (!ctx || !(opts = ctx->opts) || !(decset = ctx->decset) || !(dec = decset->decs) || !(cfg = dec->cfg) || !(filtbuf = mrp_allocz(sizeof(filter_buf_t)))) return -1; rate = opts->rate; fps = cmd_ln_int32_r(cfg, "-frate"); frlen = rate / (double)fps; filtbuf->len = 0; filtbuf->frlen = frlen; filtbuf->fdrec = open_file_for_recording(opts->audio); ctx->filtbuf = filtbuf; return 0; }
/** * Process PCM audio from a libsndfile file. FIXME: looks a lot like * decode_pcm! Also needs stereo support (as does decode_pcm). */ static int decode_sndfile(sphinx_wave2feat_t *wtf) { size_t nsamp; int32 nfr, nchans, whichchan; int nfloat, n; nchans = cmd_ln_int32_r(wtf->config, "-nchans"); whichchan = cmd_ln_int32_r(wtf->config, "-whichchan"); fe_start_utt(wtf->fe); nfloat = 0; while ((nsamp = sf_read_short(wtf->insfh, wtf->audio, wtf->blocksize)) != 0) { int16 const *inspeech; size_t nvec; /* Mix or pick channels. */ if (nchans > 1) nsamp = mixnpick_channels(wtf->audio, nsamp, nchans, whichchan); inspeech = wtf->audio; nvec = wtf->featsize; /* Consume all samples. */ while (nsamp) { nfr = nvec; fe_process_frames(wtf->fe, &inspeech, &nsamp, wtf->feat, &nfr, NULL); if (nfr) { if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0) return -1; nfloat += n; } } inspeech = wtf->audio; } /* Now process any leftover audio frames. */ fe_end_utt(wtf->fe, wtf->feat[0], &nfr); if (nfr) { if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0) return -1; nfloat += n; } sf_close(wtf->insfh); wtf->insfh = NULL; return nfloat; }
int main(int argc, char *argv[]) { cmd_ln_t *config; config = cmd_ln_parse_r(NULL, defs, argc, argv, TRUE); if (config == NULL) return 1; printf("%d %s %d %f\n", cmd_ln_int32_r(config, "-a"), cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)", cmd_ln_boolean_r(config, "-c"), cmd_ln_float64_r(config, "-d")); cmd_ln_free_r(config); config = cmd_ln_init(NULL, NULL, FALSE, "-b", "foobie", NULL); if (config == NULL) return 1; cmd_ln_free_r(config); config = cmd_ln_init(NULL, defs, TRUE, "-b", "foobie", NULL); if (config == NULL) return 1; printf("%d %s %d %f\n", cmd_ln_int32_r(config, "-a"), cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)", cmd_ln_boolean_r(config, "-c"), cmd_ln_float64_r(config, "-d")); cmd_ln_free_r(config); config = cmd_ln_init(NULL, NULL, FALSE, "-b", "foobie", NULL); if (config == NULL) return 1; printf("%s\n", cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)"); cmd_ln_set_str_r(config, "-b", "blatz"); printf("%s\n", cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)"); cmd_ln_free_r(config); return 0; }
int main(int32 argc, char *argv[]) { /* kb_t kb; ptmr_t tm; */ cmd_ln_appl_enter(argc, argv, "default.arg", defn); config = cmd_ln_get(); logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1, cmd_ln_int32_r(config, "-log3table")); /* Read in input databases */ models_init(); ptmr_init(&tm_utt); nbestdir = cmd_ln_str_r(config, "-nbestdir"); if (cmd_ln_str_r(config, "-ctl")) { ctl_process(cmd_ln_str_r(config, "-ctl"), cmd_ln_str_r(config, "-ctl_lm"), NULL, cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_astar, NULL); } else { E_FATAL("-ctl is not specified\n"); } models_free(); logmath_free(logmath); #if (! WIN32) system("ps aguxwww | grep s3astar"); #endif cmd_ln_free_r(config); return 0; }
void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall) { int32 frate; frate = cmd_ln_int32_r(ps->config, "-frate"); *out_nspeech = (double)ps->n_frame / frate; *out_ncpu = ps->perf.t_tot_cpu; *out_nwall = ps->perf.t_tot_elapsed; }
int acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb) { /* Feature type needs to be the same. */ if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb))) return TRUE; /* Input vector dimension needs to be the same. */ if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb)) return TRUE; /* FIXME: Need to check LDA and stuff too. */ return FALSE; }
static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) { phone_loop_search_t *pls = (phone_loop_search_t *)search; cmd_ln_t *config = ps_search_config(search); acmod_t *acmod = ps_search_acmod(search); int i; /* Free old dict2pid, dict, if necessary. */ ps_search_base_reinit(search, dict, d2p); /* Initialize HMM context. */ if (pls->hmmctx) hmm_context_free(pls->hmmctx); pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), acmod->tmat->tp, NULL, acmod->mdef->sseq); if (pls->hmmctx == NULL) return -1; /* Initialize penalty storage */ pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); pls->window = cmd_ln_int32_r(config, "-pl_window"); if (pls->penalties) ckd_free(pls->penalties); pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties)); if (pls->pen_buf) ckd_free_2d(pls->pen_buf); pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf)); /* Initialize phone HMMs. */ if (pls->hmms) { for (i = 0; i < pls->n_phones; ++i) hmm_deinit((hmm_t *)&pls->hmms[i]); ckd_free(pls->hmms); } pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms)); for (i = 0; i < pls->n_phones; ++i) { hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i], FALSE, bin_mdef_pid2ssid(acmod->mdef, i), bin_mdef_pid2tmatid(acmod->mdef, i)); } pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight"); pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT; pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT; pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT; E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", pls->beam, pls->pbeam, pls->pip); return 0; }
static int search_cb(search_t *search, search_event_t *evt, void *udata) { batch_decoder_t *bd = (batch_decoder_t *) udata; dict_t *d = search_factory_d2p(bd->sf)->dict; double delta = get_time_delta(bd); double frate = cmd_ln_int32_r(search_config(search), "-frate"); FILE *hypfh = NULL; void *val; if (hash_table_lookup(bd->hypfiles, search_name(search), &val) == 0) hypfh = val; else hypfh = stdout; fprintf(hypfh, "time delta %f ", delta); switch (evt->event) { case SEARCH_PARTIAL_RESULT: { int32 score; seg_iter_t *seg = search_seg_iter(search, &score); fprintf(hypfh, "partial: "); for (; seg; seg = seg_iter_next(seg)) { int sf, ef; seg_iter_times(seg, &sf, &ef); fprintf(hypfh, "%s:%.3f ", dict_basestr(d, seg_iter_wid(seg)), (double) ef / frate); } fprintf(hypfh, "(%s)\n", search_uttid(search)); break; } case SEARCH_START_UTT: fprintf(hypfh, "start %s\n", search_uttid(search)); break; case SEARCH_END_UTT: fprintf(hypfh, "end %s\n", search_uttid(search)); break; case SEARCH_FINAL_RESULT: { int32 score; seg_iter_t *seg = search_seg_iter(search, &score); fprintf(hypfh, "full: "); for (; seg; seg = seg_iter_next(seg)) { int sf, ef; seg_iter_times(seg, &sf, &ef); fprintf(hypfh, "%s:%.3f ", dict_basestr(d, seg_iter_wid(seg)), (double) ef / frate); } fprintf(hypfh, "(%s)\n", search_uttid(search)); break; } } return 0; }
void ngram_fwdflat_init(ngram_search_t *ngs) { int n_words; n_words = ps_search_n_words(ngs); ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist)); ngs->expand_word_flag = bitvec_alloc(n_words); ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list)); ngs->frm_wordlist = ckd_calloc(ngs->n_frame_alloc, sizeof(*ngs->frm_wordlist)); ngs->min_ef_width = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatefwid"); ngs->max_sf_win = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatsfwin"); E_INFO("fwdflat: min_ef_width = %d, max_sf_win = %d\n", ngs->min_ef_width, ngs->max_sf_win); /* No tree-search; pre-build the expansion list, including all LM words. */ if (!ngs->fwdtree) { /* Build full expansion list from LM words. */ ngram_fwdflat_expand_all(ngs); /* Allocate single phone words. */ ngram_fwdflat_allocate_1ph(ngs); } }
static int batch_decoder_decode_mfc(batch_decoder_t *bd, FILE *infh, int sf, int ef, alignment_t *al) { featbuf_t *fb = search_factory_featbuf(bd->sf); mfcc_t **mfcs; int nfr, rv; if (NULL == (mfcs = read_mfc_file(infh, sf, ef, &nfr, cmd_ln_int32_r(bd->config, "-ceplen")))) return -1; rv = featbuf_producer_process_cep(fb, mfcs, nfr, TRUE); ckd_free_2d(mfcs); return rv; }
int ps_set_search(ps_decoder_t *ps, const char *name) { ps_search_t *search = ps_find_search(ps, name); if (search) ps->search = search; /* Set pl window depending on the search */ if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) { ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"); } else { ps->pl_window = 0; } return search ? 0 : -1; }
static void print_word_times() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate), ((float) ef / frame_rate), conf); iter = ps_seg_next(iter); } }
void ofApp::process_result() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps, NULL); printf("\n\n"); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); //here is where we process the word new_utterance new_utt; new_utt.conf = conf; new_utt.sf = sf; new_utt.st = (float)sf / frame_rate; new_utt.ef = ef; new_utt.et = (float) ef / frame_rate; new_utt.utt = ps_seg_word(iter); printf("Recognised: %s %.3f %.3f %f\n", ps_seg_word(iter), new_utt.st, new_utt.et, new_utt.conf); std::string word = ps_seg_word(iter); result.push_back(new_utt); iter = ps_seg_next(iter); } printf("\n\n"); engineExit(); }
static void test_set_search() { cmd_ln_t *config = default_config(); ps_decoder_t *ps = ps_init(config); ps_search_iter_t *itor; jsgf_t *jsgf = jsgf_parse_file(DATADIR "/goforward.gram", NULL); fsg_model_t *fsg = jsgf_build_fsg(jsgf, jsgf_get_rule(jsgf, "goforward.move"), ps->lmath, cmd_ln_int32_r(config, "-lw")); TEST_ASSERT(!ps_set_fsg(ps, "goforward", fsg)); fsg_model_free(fsg); TEST_ASSERT(!ps_set_jsgf_file(ps, "goforward_other", DATADIR "/goforward.gram")); ngram_model_t *lm = ngram_model_read(config, DATADIR "/tidigits/lm/tidigits.lm.dmp", NGRAM_AUTO, ps->lmath); TEST_ASSERT(!ps_set_lm(ps, "tidigits", lm)); ngram_model_free(lm); TEST_ASSERT(!ps_set_search(ps, "tidigits")); TEST_ASSERT(!ps_set_search(ps, "goforward")); itor = ps_search_iter(ps); TEST_EQUAL(0, strcmp("goforward_other", ps_search_iter_val(itor))); itor = ps_search_iter_next(itor); TEST_EQUAL(0, strcmp("tidigits", ps_search_iter_val(itor))); itor = ps_search_iter_next(itor); TEST_EQUAL(0, strcmp("goforward", ps_search_iter_val(itor))); itor = ps_search_iter_next(itor); TEST_EQUAL(0, strcmp("phone_loop", ps_search_iter_val(itor))); itor = ps_search_iter_next(itor); TEST_EQUAL(NULL, itor); TEST_ASSERT(!ps_start_utt(ps)); TEST_ASSERT(!ps_end_utt(ps)); ps_free(ps); cmd_ln_free_r(config); }
static fwd_dbg_t * init_fwd_dbg(srch_FLAT_FWD_graph_t * fwg) { const char *tmpstr; fwd_dbg_t *fd; fd = (fwd_dbg_t *) ckd_calloc(1, sizeof(fwd_dbg_t)); assert(fd); /* Word to be traced in detail */ if ((tmpstr = cmd_ln_str_r(kbcore_config(fwg->kbcore), "-tracewhmm")) != NULL) { fd->trace_wid = dict_wordid(fwg->kbcore->dict, tmpstr); if (NOT_S3WID(fd->trace_wid)) E_ERROR("%s not in dictionary; cannot be traced\n", tmpstr); } else fd->trace_wid = BAD_S3WID; /* Active words to be dumped for debugging after and before the given frame nos, if any */ fd->word_dump_sf = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpsf")) fd->word_dump_sf = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpsf"); fd->word_dump_ef = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpef")) fd->word_dump_ef = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpef"); /* Active HMMs to be dumped for debugging after and before the given frame nos, if any */ fd->hmm_dump_sf = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpsf")) fd->hmm_dump_sf = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpsf"); fd->hmm_dump_ef = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpef")) fd->hmm_dump_ef = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpef"); return fd; }
void ngram_fwdflat_deinit(ngram_search_t *ngs) { double n_speech = (double)ngs->n_tot_frame / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); E_INFO("TOTAL fwdflat %.2f CPU %.3f xRT\n", ngs->fwdflat_perf.t_tot_cpu, ngs->fwdflat_perf.t_tot_cpu / n_speech); E_INFO("TOTAL fwdflat %.2f wall %.3f xRT\n", ngs->fwdflat_perf.t_tot_elapsed, ngs->fwdflat_perf.t_tot_elapsed / n_speech); /* Free single-phone words if we allocated them. */ if (!ngs->fwdtree) { ngram_fwdflat_free_1ph(ngs); } ckd_free(ngs->fwdflat_wordlist); bitvec_free(ngs->expand_word_flag); ckd_free(ngs->expand_word_list); ckd_free(ngs->frm_wordlist); }
int s3_decode_init(s3_decode_t * _decode, cmd_ln_t *_config) { if (_decode == NULL) return S3_DECODE_ERROR_NULL_POINTER; /* capture decoder parameters */ kb_init(&_decode->kb, _config); /* initialize decoder variables */ _decode->kbcore = _decode->kb.kbcore; _decode->hyp_frame_num = -1; _decode->uttid = NULL; _decode->state = S3_DECODE_STATE_IDLE; _decode->hyp_str = NULL; _decode->hyp_segs = NULL; _decode->swap = strcmp(cmd_ln_str_r(_config,"-machine_endian"), cmd_ln_str_r(_config,"-input_endian")); if (_decode->swap) E_INFO("Input data WILL be byte swapped\n"); else E_INFO("Input data will NOT be byte swapped\n"); _decode->phypdump = (cmd_ln_int32_r(_config, "-phypdump")); if (_decode->phypdump) E_INFO("Partial hypothesis WILL be dumped\n"); else E_INFO("Partial hypothesis will NOT be dumped\n"); _decode->rawext = (cmd_ln_str_r(_config, "-rawext")); return S3_DECODE_SUCCESS; }
void ngram_fwdflat_finish(ngram_search_t *ngs) { int32 cf; destroy_fwdflat_chan(ngs); destroy_fwdflat_wordlist(ngs); bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); /* This is the number of frames processed. */ cf = ps_search_acmod(ngs)->output_frame; /* Add a mark in the backpointer table for one past the final frame. */ ngram_search_mark_bptable(ngs, cf); ptmr_stop(&ngs->fwdflat_perf); /* Print out some statistics. */ if (cf > 0) { double n_speech = (double)(cf + 1) / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); E_INFO("%8d words recognized (%d/fr)\n", ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1)); E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt, (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1)); E_INFO("%8d channels searched (%d/fr)\n", ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1)); E_INFO("%8d words searched (%d/fr)\n", ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1)); E_INFO("%8d word transitions (%d/fr)\n", ngs->st.n_fwdflat_word_transition, ngs->st.n_fwdflat_word_transition / (cf + 1)); E_INFO("fwdflat %.2f CPU %.3f xRT\n", ngs->fwdflat_perf.t_cpu, ngs->fwdflat_perf.t_cpu / n_speech); E_INFO("fwdflat %.2f wall %.3f xRT\n", ngs->fwdflat_perf.t_elapsed, ngs->fwdflat_perf.t_elapsed / n_speech); }