sphinx_wave2feat_t * sphinx_wave2feat_init(cmd_ln_t *config) { sphinx_wave2feat_t *wtf; int i; wtf = (sphinx_wave2feat_t *)ckd_calloc(1, sizeof(*wtf)); wtf->refcount = 1; wtf->config = cmd_ln_retain(config); wtf->fe = fe_init_auto_r(wtf->config); if (!wtf->fe) { E_FATAL("Failed to create feature extraction\n"); } wtf->ot = outtypes; /* Default (sphinx) type. */ for (i = 0; i < nouttypes; ++i) { output_type_t const *otype = &outtypes[i]; if (0 == strcmp(cmd_ln_str_r(config, "-ofmt"), otype->name)) { wtf->ot = otype; break; } } if (i == nouttypes) { E_ERROR("Unknown output type: '%s'\n", cmd_ln_str_r(config, "-ofmt")); sphinx_wave2feat_free(wtf); return NULL; } return wtf; }
int main(int _argc, char **_argv) { char *ctrlfn; char *cfgfn; cmd_ln_t *config = NULL; print_appl_info(_argv[0]); if (_argc != 4) { printf("\nUSAGE: %s <ctrlfile> <rawdir> <cfgfile>\n", _argv[0]); return -1; } ctrlfn = _argv[1]; rawdirfn = _argv[2]; cfgfn = _argv[3]; if ((config = cmd_ln_parse_file_r(config, S3_DECODE_ARG_DEFS, cfgfn, TRUE)) == NULL) E_FATAL("Bad configuration file %s.\n", cfgfn); if (s3_decode_init(&decoder, config) != S3_DECODE_SUCCESS) E_FATAL("Failed to initialize live-decoder.\n"); fe = fe_init_auto_r(config); st = decoder.kb.stat; ptmr_init(&(st->tm)); if (ctrlfn) { /* When -ctlfile is speicified, corpus.c will look at -ctl_lm and -ctl_mllr to get the corresponding LM and MLLR for the utterance */ st->tm = ctl_process(ctrlfn, cmd_ln_str_r(config, "-ctl_lm"), cmd_ln_str_r(config, "-ctl_mllr"), cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_livepretend, &(decoder.kb)); } else { E_FATAL("control file is not specified.\n"); } stat_report_corpus(decoder.kb.stat); s3_decode_close(&decoder); fe_free(fe); return 0; }
static int process(sbthread_t *th) { FILE *raw; int16 *buf; mfcc_t **cepbuf; size_t nsamps; fe_t *fe; long fsize; int32 nfr; char outfile[16]; FILE *logfile; sprintf(outfile, "%03ld.log", (long) sbthread_arg(th)); logfile = fopen(outfile, "w"); pthread_setspecific(logfp_index, (void *)logfile); if ((fe = fe_init_auto_r(sbthread_config(th))) == NULL) return -1; if ((raw = fopen(TESTDATADIR "/chan3.raw", "rb")) == NULL) return -1; fseek(raw, 0, SEEK_END); fsize = ftell(raw); fseek(raw, 0, SEEK_SET); buf = ckd_malloc(fsize); fread(buf, 1, fsize, raw); nsamps = fsize / 2; fe_process_utt(fe, buf, nsamps, &cepbuf, &nfr); E_INFO("nfr = %d\n", nfr); fe_free_2d(cepbuf); ckd_free(buf); fclose(raw); fe_free(fe); fclose(logfile); return 0; }
int main(int argc, char *argv[]) { char const *cfg; int i; int16 buf[2048]; if (argc == 2) { config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE); } else { config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE); } /* Handle argument file as -argfile. */ if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); } if (config == NULL) return 1; singlefile = cmd_ln_boolean_r(config, "-singlefile"); if ((infile_path = cmd_ln_str_r(config, "-infile")) != NULL) { if ((infile = fopen(infile_path, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to read audio from '%s'", infile_path); return 1; } read_audio = &read_audio_file; /* skip wav header */ read_audio(buf, 44); } else { if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int) cmd_ln_float32_r(config, "-samprate"))) == NULL) { E_FATAL("Failed to open audio device\n"); return 1; } read_audio = &read_audio_adev; printf("Start recording ...\n"); fflush(stdout); if (ad_start_rec(ad) < 0) E_FATAL("Failed to start recording\n"); /* TODO remove this thing */ for (i = 0; i < 5; i++) { sleep_msec(200); read_audio(buf, 2048); } printf("You may speak now\n"); fflush(stdout); } fe = fe_init_auto_r(config); if (fe == NULL) return 1; segment_audio(); if (ad) ad_close(ad); if (infile) fclose(infile); fe_free(fe); cmd_ln_free_r(config); return 0; }
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb) { acmod_t *acmod; char const *featparams; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->config = cmd_ln_retain(config); acmod->lmath = lmath; acmod->state = ACMOD_IDLE; /* Look for feat.params in acoustic model dir. */ if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) { if (NULL != cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE)) E_INFO("Parsed model-specific feature parameters from %s\n", featparams); } /* Initialize feature computation. */ if (fe) { if (acmod_fe_mismatch(acmod, fe)) goto error_out; fe_retain(fe); acmod->fe = fe; } else { /* Initialize a new front end. */ acmod->fe = fe_init_auto_r(config); if (acmod->fe == NULL) goto error_out; if (acmod_fe_mismatch(acmod, acmod->fe)) goto error_out; } if (fcb) { if (acmod_feat_mismatch(acmod, fcb)) goto error_out; feat_retain(fcb); acmod->fcb = fcb; } else { /* Initialize a new fcb. */ if (acmod_init_feat(acmod) < 0) goto error_out; } /* Load acoustic model parameters. */ if (acmod_init_am(acmod) < 0) goto error_out; /* The MFCC buffer needs to be at least as large as the dynamic * feature window. */ acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; acmod->mfc_buf = (mfcc_t **) ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, sizeof(**acmod->mfc_buf)); /* Feature buffer has to be at least as large as MFCC buffer. */ acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window"); acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); acmod->utt_start_frame = 0; /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); return acmod; error_out: acmod_free(acmod); return NULL; }
int ofxSphinxASR::engineInit(ofAsrEngineArgs *e) { #if defined TARGET_WIN32 char cfg_filename[] = "sphinx.cfg"; char grammarJSGF_filename[] = "grammar.jsgf"; char grammarFSG_filename[] = "grammar.fsg"; #else char cfg_filename[] = "/tmp/sphinx.cfg"; char grammarJSGF_filename[] = "/tmp/grammar.jsgf"; char grammarFSG_filename[] = "/tmp/grammar.fsg"; #endif FILE *cfg_fp = fopen(cfg_filename, "wt"); if (cfg_fp==NULL) return OFXASR_FAIL_WRITE_CONFIG; if ( access(e->sphinxmodel_am.c_str(), 0) != 0 ) return OFXASR_FAIL_READ_FILES; if ( access(e->sphinxmodel_lm.c_str(), 0) != 0 ) return OFXASR_FAIL_READ_FILES; if ( access(e->sphinxmodel_dict.c_str(), 0) != 0 ) return OFXASR_FAIL_READ_FILES; if ( access(e->sphinxmodel_fdict.c_str(), 0) != 0 ) return OFXASR_FAIL_READ_FILES; char cur_path[1024]; getcwd(cur_path, 1024); fprintf(cfg_fp, "-samprate %d\n", e->samplerate); fprintf(cfg_fp, "-hmm %s/%s\n", cur_path, e->sphinxmodel_am.c_str()); fprintf(cfg_fp, "-dict %s/%s\n", cur_path, e->sphinxmodel_dict.c_str()); fprintf(cfg_fp, "-fdict %s/%s\n", cur_path, e->sphinxmodel_fdict.c_str()); fprintf(cfg_fp, "-lm %s/%s\n", cur_path, e->sphinxmodel_lm.c_str()); if (e->sphinx_mode == 2) { if(e->sphinx_candidate_sentences.size() < 1) { printf("Warning: The word list is empty! Use mode 4.\n"); e->sphinx_mode = 4; } else { FILE *gram_fp = fopen(grammarJSGF_filename, "wt"); if (gram_fp==NULL) return OFXASR_FAIL_WRITE_CONFIG; fprintf(gram_fp, "#JSGF V1.0;\n\ngrammar cca_gram;\n\npublic <cca_gram> = (\n"); for (int i=0; i<e->sphinx_candidate_sentences.size()-1; i++) { fprintf(gram_fp, "%s |\n", e->sphinx_candidate_sentences[i].c_str()); } fprintf(gram_fp, "%s );\n\n", e->sphinx_candidate_sentences[e->sphinx_candidate_sentences.size()-1].c_str()); fclose(gram_fp); } jsgf_t *jsgf = jsgf_parse_file(grammarJSGF_filename, NULL); if (jsgf == NULL) { printf("Bad jsgf file %s.\n", grammarJSGF_filename); return OFXASR_INVALID_JSGF_GRAMMAR; } fsg_model_t *fsg = get_fsg(jsgf, NULL); fsg_model_writefile(fsg, grammarFSG_filename); fsg_model_free(fsg); jsgf_grammar_free(jsgf); fprintf(cfg_fp, "-fsg %s\n", grammarFSG_filename); } fprintf(cfg_fp, "-op_mode %d\n", e->sphinx_mode); fclose(cfg_fp); err_set_logfp(NULL); // disable logs cmd_ln_t *config = NULL; config = cmd_ln_parse_file_r(config, S3_DECODE_ARG_DEFS, cfg_filename, TRUE); if (config == NULL) { return OFXASR_INVALID_CONFIG; } decoder = new s3_decode_t; if (s3_decode_init(decoder, config) != S3_DECODE_SUCCESS) { return OFXASR_FAIL_INIT_DECODER; } fe = fe_init_auto_r(config); if (fe == NULL) { return OFXASR_FAIL_INIT_FRONTEND; } bEngineInitialed = true; return OFXASR_SUCCESS; }
void kb_init(kb_t * kb, cmd_ln_t *config) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; int32 cisencnt; /* STRUCTURE: Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init(config); if (kb->kbcore == NULL) E_FATAL("Initialization of kb failed\n"); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); d2p = kbcore_dict2pid(kbcore); err_set_debug_level(cmd_ln_int32_r(config, "-debug")); /* STRUCTURE INITIALIZATION: Initialize the beam data structure */ if (cmd_ln_exists_r(config, "-ptranskip")) { kb->beam = beam_init(cmd_ln_float64_r(config, "-beam"), cmd_ln_float64_r(config, "-pbeam"), cmd_ln_float64_r(config, "-wbeam"), cmd_ln_float64_r(config, "-wend_beam"), cmd_ln_int32_r(config, "-ptranskip"), mdef_n_ciphone(mdef), kbcore->logmath ); /* REPORT : Report the parameters in the beam data structure */ if (REPORT_KB) beam_report(kb->beam); } /* STRUCTURE INITIALIZATION: Initialize the fast GMM computation data structure */ if (cmd_ln_exists_r(config, "-ci_pbeam")) { kb->fastgmm = fast_gmm_init(cmd_ln_int32_r(config, "-ds"), cmd_ln_int32_r(config, "-cond_ds"), cmd_ln_int32_r(config, "-dist_ds"), cmd_ln_int32_r(config, "-gs4gs"), cmd_ln_int32_r(config, "-svq4svq"), cmd_ln_float64_r(config, "-subvqbeam"), cmd_ln_float64_r(config, "-ci_pbeam"), cmd_ln_float64_r(config, "-tighten_factor"), cmd_ln_int32_r(config, "-maxcdsenpf"), mdef->n_ci_sen, kbcore->logmath); /* REPORT : Report the parameters in the fast_gmm_t data struture */ if (REPORT_KB) fast_gmm_report(kb->fastgmm); } /* STRUCTURE INITIALIZATION: Initialize the phoneme lookahead data structure */ if (cmd_ln_exists_r(config, "-pl_beam")) { kb->pl = pl_init(cmd_ln_int32_r(config, "-pheurtype"), cmd_ln_float64_r(config, "-pl_beam"), mdef_n_ciphone(mdef), kbcore->logmath ); /* REPORT : Report the parameters in the pl_t data struture */ if (REPORT_KB) pl_report(kb->pl); } /* STRUCTURE INITIALIZATION: Initialize the acoustic score data structure */ { int32 pl_window = 1; if (cmd_ln_exists_r(config, "-pl_window")) pl_window = cmd_ln_int32_r(config, "-pl_window"); for (cisencnt = 0; cisencnt == mdef->cd2cisen[cisencnt]; cisencnt++) ; kb->ascr = ascr_init(kbcore_n_mgau(kbcore), kb->kbcore->dict2pid->n_comstate, mdef_n_sseq(mdef), dict2pid_n_comsseq(d2p), pl_window, cisencnt); if (REPORT_KB) ascr_report(kb->ascr); } /* Initialize the front end if -adcin is specified */ if (cmd_ln_exists_r(config, "-adcin") && cmd_ln_boolean_r(config, "-adcin")) { if ((kb->fe = fe_init_auto_r(config)) == NULL) { E_FATAL("fe_init_auto_r() failed\n"); } } /* STRUCTURE INITIALIZATION : The feature vector */ if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore), S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); /* STRUCTURE INITIALIZATION : The statistics for the search */ kb->stat = stat_init(); /* STRUCTURE INITIALIZATION : The adaptation routines of the search */ kb->adapt_am = adapt_am_init(); if (cmd_ln_str_r(config, "-mllr")) { kb_setmllr(cmd_ln_str_r(config, "-mllr"), cmd_ln_str_r(config, "-cb2mllr"), kb); } /* CHECK: make sure when (-cond_ds) is specified, a Gaussian map is also specified */ if (cmd_ln_int32_r(config, "-cond_ds") > 0 && kb->kbcore->gs == NULL) E_FATAL ("Conditional Down Sampling require the use of Gaussian Selection map\n"); /* MEMORY ALLOCATION : Word best score and exit */ /* Open hypseg file if specified */ kb->matchsegfp = kb->matchfp = NULL; kb->matchsegfp = file_open(cmd_ln_str_r(config, "-hypseg")); kb->matchfp = file_open(cmd_ln_str_r(config, "-hyp")); if (cmd_ln_exists_r(config, "-hmmdump")) kb->hmmdumpfp = cmd_ln_int32_r(config, "-hmmdump") ? stderr : NULL; /* STRUCTURE INITIALIZATION : The search data structure, done only after kb is initialized kb is acted as a clipboard. */ if (cmd_ln_exists_r(config, "-op_mode")) { /* -op_mode, if set (i.e. not -1), takes precedence over -mode. */ if (cmd_ln_int32_r(config, "-op_mode") != -1) kb->op_mode = cmd_ln_int32_r(config, "-op_mode"); else kb->op_mode = srch_mode_str_to_index(cmd_ln_str_r(config, "-mode")); E_INFO("SEARCH MODE INDEX %d\n", kb->op_mode); if ((kb->srch = (srch_t *) srch_init(kb, kb->op_mode)) == NULL) { E_FATAL("Search initialization failed. Forced exit\n"); } if (REPORT_KB) { srch_report(kb->srch); } } }
int main(int argc, char *argv[]) { static const arg_t fe_args[] = { waveform_to_cepstral_command_line_macro(), { NULL, 0, NULL, NULL } }; FILE *raw; cmd_ln_t *config; fe_t *fe; int16 buf[1024]; int16 const *inptr; int32 frame_shift, frame_size; mfcc_t **cepbuf1, **cepbuf2, **cptr; int32 nfr, i; size_t nsamp; TEST_ASSERT(config = cmd_ln_parse_r(NULL, fe_args, argc, argv, FALSE)); TEST_ASSERT(fe = fe_init_auto_r(config)); TEST_EQUAL(fe_get_output_size(fe), DEFAULT_NUM_CEPSTRA); fe_get_input_size(fe, &frame_shift, &frame_size); TEST_EQUAL(frame_shift, DEFAULT_FRAME_SHIFT); TEST_EQUAL(frame_size, (int)(DEFAULT_WINDOW_LENGTH*DEFAULT_SAMPLING_RATE)); TEST_ASSERT(raw = fopen(TESTDATADIR "/chan3.raw", "rb")); TEST_EQUAL(0, fe_start_utt(fe)); TEST_EQUAL(1024, fread(buf, sizeof(int16), 1024, raw)); nsamp = 1024; TEST_ASSERT(fe_process_frames(fe, NULL, &nsamp, NULL, &nfr) >= 0); TEST_EQUAL(1024, nsamp); TEST_EQUAL(4, nfr); cepbuf1 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf1)); inptr = &buf[0]; nfr = 1; printf("frame_size %d frame_shift %d\n", frame_size, frame_shift); /* Process the first frame. */ TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[0], &nfr) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr); TEST_EQUAL(nfr, 1); /* Note that this next one won't actually consume any frames * of input, because it already got sufficient overflow * samples last time around. This is implementation-dependent * so we shouldn't actually test for it. */ TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[1], &nfr) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr); TEST_EQUAL(nfr, 1); TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[2], &nfr) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr); TEST_EQUAL(nfr, 1); TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[3], &nfr) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr); TEST_EQUAL(nfr, 1); TEST_ASSERT(fe_end_utt(fe, cepbuf1[4], &nfr) >= 0); printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 1); /* What we *should* test is that the output we get by * processing one frame at a time is exactly the same as what * we get from doing them all at once. So let's do that */ cepbuf2 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf2)); inptr = &buf[0]; nfr = 5; nsamp = 1024; TEST_EQUAL(0, fe_start_utt(fe)); TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cepbuf2, &nfr) >= 0); printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 4); nfr = 1; TEST_ASSERT(fe_end_utt(fe, cepbuf2[4], &nfr) >= 0); printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 1); for (i = 0; i < 5; ++i) { int j; printf("%d: ", i); for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) { printf("%.2f,%.2f ", MFCC2FLOAT(cepbuf1[i][j]), MFCC2FLOAT(cepbuf2[i][j])); TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]); } printf("\n"); } /* Now, also test to make sure that even if we feed data in * little tiny bits we can still make things work. */ memset(cepbuf2[0], 0, 5 * DEFAULT_NUM_CEPSTRA * sizeof(**cepbuf2)); inptr = &buf[0]; cptr = &cepbuf2[0]; nfr = 5; i = 5; nsamp = 256; TEST_EQUAL(0, fe_start_utt(fe)); TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i); cptr += i; nfr -= i; i = nfr; nsamp = 256; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i); cptr += i; nfr -= i; i = nfr; nsamp = 256; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i); cptr += i; nfr -= i; i = nfr; nsamp = 256; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i); cptr += i; nfr -= i; TEST_ASSERT(fe_end_utt(fe, *cptr, &nfr) >= 0); printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 1); for (i = 0; i < 5; ++i) { int j; printf("%d: ", i); for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) { printf("%.2f,%.2f ", MFCC2FLOAT(cepbuf1[i][j]), MFCC2FLOAT(cepbuf2[i][j])); TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]); } printf("\n"); } /* And now, finally, test fe_process_utt() */ inptr = &buf[0]; i = 0; TEST_EQUAL(0, fe_start_utt(fe)); TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); if (nfr) memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr)); ckd_free_2d(cptr); i += nfr; inptr += 256; TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); if (nfr) memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr)); ckd_free_2d(cptr); i += nfr; inptr += 256; TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); if (nfr) memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr)); ckd_free_2d(cptr); i += nfr; inptr += 256; TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); if (nfr) memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr)); ckd_free_2d(cptr); i += nfr; inptr += 256; TEST_ASSERT(fe_end_utt(fe, cepbuf2[i], &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); TEST_EQUAL(nfr, 1); for (i = 0; i < 5; ++i) { int j; printf("%d: ", i); for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) { printf("%.2f,%.2f ", MFCC2FLOAT(cepbuf1[i][j]), MFCC2FLOAT(cepbuf2[i][j])); TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]); } printf("\n"); } ckd_free_2d(cepbuf1); ckd_free_2d(cepbuf2); fclose(raw); fe_free(fe); return 0; }
fe_t * fe_init_auto() { return fe_init_auto_r(cmd_ln_get()); }
int sphinx_wave2feat_convert_file(sphinx_wave2feat_t *wtf, char const *infile, char const *outfile) { int nchans, minfft, nfft, nfloat, veclen; audio_type_t const *atype; int fshift, fsize; if (cmd_ln_boolean_r(wtf->config, "-verbose")) E_INFO("Converting %s to %s\n", infile, outfile); wtf->infile = ckd_salloc(infile); /* Detect input file type. */ if ((atype = detect_audio_type(wtf)) == NULL) return -1; /* Determine whether to byteswap input. */ wtf->byteswap = strcmp(cmd_ln_str_r(wtf->config, "-mach_endian"), cmd_ln_str_r(wtf->config, "-input_endian")); /* Make sure the FFT size is sufficiently large. */ minfft = (int)(cmd_ln_float32_r(wtf->config, "-samprate") * cmd_ln_float32_r(wtf->config, "-wlen") + 0.5); for (nfft = 1; nfft < minfft; nfft <<= 1) ; if (nfft > cmd_ln_int32_r(wtf->config, "-nfft")) { E_WARN("Value of -nfft = %d is too small, increasing to %d\n", cmd_ln_int32_r(wtf->config, "-nfft"), nfft); cmd_ln_set_int32_r(wtf->config, "-nfft", nfft); fe_free(wtf->fe); wtf->fe = fe_init_auto_r(wtf->config); } /* Get the output frame size (if not already set). */ if (wtf->veclen == 0) wtf->veclen = fe_get_output_size(wtf->fe); /* Set up the input and output buffers. */ fe_get_input_size(wtf->fe, &fshift, &fsize); /* Want to get at least a whole frame plus shift in here. Also we will either pick or mix multiple channels so we need to read them all at once. */ nchans = cmd_ln_int32_r(wtf->config, "-nchans"); wtf->blocksize = cmd_ln_int32_r(wtf->config, "-blocksize") * nchans; if (wtf->blocksize < (fsize + fshift) * nchans) { E_INFO("Block size of %d too small, increasing to %d\n", wtf->blocksize, (fsize + fshift) * nchans); wtf->blocksize = (fsize + fshift) * nchans; } wtf->audio = (short *)ckd_calloc(wtf->blocksize, sizeof(*wtf->audio)); wtf->featsize = (wtf->blocksize / nchans - fsize) / fshift; /* Use the maximum of the input and output frame sizes to allocate this. */ veclen = wtf->veclen; if (wtf->in_veclen > veclen) veclen = wtf->in_veclen; wtf->feat = (mfcc_t**)ckd_calloc_2d(wtf->featsize, veclen, sizeof(**wtf->feat)); /* Let's go! */ if ((wtf->outfh = fopen(outfile, "wb")) == NULL) { E_ERROR_SYSTEM("Failed to open %s for writing", outfile); return -1; } /* Write an empty header, which we'll fill in later. */ if (wtf->ot->output_header && (*wtf->ot->output_header)(wtf, 0) < 0) { E_ERROR_SYSTEM("Failed to write empty header to %s\n", outfile); goto error_out; } wtf->outfile = ckd_salloc(outfile); if ((nfloat = (*atype->decode)(wtf)) < 0) { E_ERROR("Failed to convert"); goto error_out; } if (wtf->ot->output_header) { if (fseek(wtf->outfh, 0, SEEK_SET) < 0) { E_ERROR_SYSTEM("Failed to seek to beginning of %s\n", outfile); goto error_out; } if ((*wtf->ot->output_header)(wtf, nfloat) < 0) { E_ERROR_SYSTEM("Failed to write header to %s\n", outfile); goto error_out; } } if (wtf->audio) ckd_free(wtf->audio); if (wtf->feat) ckd_free_2d(wtf->feat); if (wtf->infile) ckd_free(wtf->infile); if (wtf->outfile) ckd_free(wtf->outfile); wtf->audio = NULL; wtf->infile = NULL; wtf->feat = NULL; wtf->outfile = NULL; if (wtf->outfh) if (fclose(wtf->outfh) == EOF) E_ERROR_SYSTEM("Failed to close output file"); wtf->outfh = NULL; return 0; error_out: if (wtf->audio) ckd_free(wtf->audio); if (wtf->feat) ckd_free_2d(wtf->feat); if (wtf->infile) ckd_free(wtf->infile); if (wtf->outfile) ckd_free(wtf->outfile); wtf->audio = NULL; wtf->infile = NULL; wtf->feat = NULL; wtf->outfile = NULL; if (wtf->outfh) if (fclose(wtf->outfh) == EOF) E_ERROR_SYSTEM("Failed to close output file"); wtf->outfh = NULL; return -1; }
static void models_init(cmd_ln_t *config) { int32 cisencnt; kbc = New_kbcore(config); kbc->logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1, cmd_ln_int32_r(config, "-log3table")); /* Initialize feaure stream type */ kbc->fcb = feat_init(cmd_ln_str_r(config, "-feat"), cmn_type_from_str(cmd_ln_str_r(config, "-cmn")), cmd_ln_boolean_r(config, "-varnorm"), agc_type_from_str(cmd_ln_str_r(config, "-agc")), 1, cmd_ln_int32_r(config, "-ceplen")); s3_am_init(kbc); /* Initialize the front end if -adcin is specified */ if (cmd_ln_exists_r(config, "-adcin") && cmd_ln_boolean_r(config, "-adcin")) { if ((fe = fe_init_auto_r(config)) == NULL) { E_FATAL("fe_init_auto_r() failed\n"); } } assert(kbc); assert(kbc->mdef); assert(kbc->tmat); /* Dictionary */ dict = dict_init(kbc->mdef, cmd_ln_str_r(config, "-dict"), cmd_ln_str_r(config, "-fdict"), cmd_ln_int32_r(config, "-lts_mismatch"), cmd_ln_boolean_r(config, "-mdef_fillers"), /* Never do mdef filler phones. */ FALSE, TRUE); for (cisencnt = 0; cisencnt == kbc->mdef->cd2cisen[cisencnt]; cisencnt++); ascr = ascr_init(kbc->mdef->n_sen, 0, /* No composite senone */ mdef_n_sseq(kbc->mdef), 0, /* No composite senone sequence */ 1, /* Phoneme lookahead window =1. Not enabled phoneme lookahead at this moment */ cisencnt); fastgmm = fast_gmm_init(cmd_ln_int32_r(config, "-ds"), cmd_ln_int32_r(config, "-cond_ds"), cmd_ln_int32_r(config, "-dist_ds"), cmd_ln_int32_r(config, "-gs4gs"), cmd_ln_int32_r(config, "-svq4svq"), cmd_ln_float64_r(config, "-subvqbeam"), cmd_ln_float64_r(config, "-ci_pbeam"), cmd_ln_float64_r(config, "-tighten_factor"), cmd_ln_int32_r(config, "-maxcdsenpf"), kbc->mdef->n_ci_sen, kbc->logmath); adapt_am = adapt_am_init(); }