int32 live_utt_decode_block (int16 *samples, int32 nsamples, int32 live_endutt, partialhyp_t **ohyp) { static int32 live_begin_new_utt = 1; static int32 frmno; float32 **live_feat; int32 live_nfr, live_nfeatvec; int32 nwds; /* int32 id; */ /* unreferenced variable */ /* glist_t hyp; */ /* unreferenced variable */ /* gnode_t *gn; */ /* unreferenced variable */ /* hyp_t *h; */ /* unreferenced variable */ /* dict_t *dict; */ /* unreferenced variable */ float32 **mfcbuf; if (live_begin_new_utt){ fe_start_utt(fe); utt_begin (kb); frmno = 0; kb->nfr = 0; kb->utt_hmm_eval = 0; kb->utt_sen_eval = 0; kb->utt_gau_eval = 0; live_begin_new_utt = 0; } sample_blk++; /* 10.jan.01 RAH, fe_process_utt now requires ***mfcbuf and it allocates the memory internally) */ mfcbuf = NULL; live_nfr = fe_process_utt(fe, samples, nsamples, &mfcbuf); /* */ if (live_endutt) /* RAH, It seems that we shouldn't throw out this data */ fe_end_utt(fe,dummyframe); /* Flush out the fe */ /* Compute feature vectors */ live_nfeatvec = feat_s2mfc2feat_block(kbcore_fcb(kbcore), mfcbuf, live_nfr, live_begin_new_utt, live_endutt, &live_feat); E_INFO ("live_nfeatvec: %ld\n",live_nfeatvec); /* decode the block */ if (sample_blk<=START_BLOCK) single_utt_decode_block (live_feat, live_nfeatvec, &frmno, kb, maxwpf, maxhistpf, maxhmmpf, ptranskip, hmmdumpfp); else utt_decode_block (live_feat, live_nfeatvec, &frmno, kb, maxwpf, maxhistpf, maxhmmpf, ptranskip, hmmdumpfp); /* Pull out partial hypothesis */ nwds = live_get_partialhyp(live_endutt); *ohyp = parthyp; /* Clean up */ if (live_endutt) { live_begin_new_utt = 1; kb->tot_fr += kb->nfr; utt_end(kb); } else { live_begin_new_utt = 0; } /* I'm starting to think that fe_process_utt should not be allocating its memory, that or it should allocate some max and just keep on going, this idea of constantly allocating freeing memory seems dangerous to me.*/ ckd_free_2d((void **) mfcbuf); /* RAH, this must be freed since fe_process_utt allocates it */ return(nwds); }
fe_t * fe_init_auto_r(cmd_ln_t *config) { fe_t *fe; fe = ckd_calloc(1, sizeof(*fe)); fe->refcount = 1; /* transfer params to front end */ if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { fe_free(fe); return NULL; } /* compute remaining fe parameters */ /* We add 0.5 so approximate the float with the closest * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 */ fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); fe->prior = 0; fe->frame_counter = 0; assert (fe->frame_shift > 1); if (fe->frame_size > (fe->fft_size)) { E_WARN ("Number of FFT points has to be a power of 2 higher than %d\n", (fe->frame_size)); fe_free(fe); return (NULL); } if (fe->dither) fe_init_dither(fe->seed); /* establish buffers for overflow samps and hamming window */ fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); /* create hamming window */ fe_create_hamming(fe->hamming_window, fe->frame_size); /* init and fill appropriate filter structure */ fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); /* transfer params to mel fb */ fe_parse_melfb_params(config, fe, fe->mel_fb); fe_build_melfilters(fe->mel_fb); fe_compute_melcosine(fe->mel_fb); /* Create temporary FFT, spectrum and mel-spectrum buffers. */ /* FIXME: Gosh there are a lot of these. */ fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); /* create twiddle factors */ fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); fe_create_twiddle(fe); if (cmd_ln_boolean_r(config, "-verbose")) { fe_print_current(fe); } /*** Z.A.B. ***/ /*** Initialize the overflow buffers ***/ fe_start_utt(fe); return fe; }
fe_t * fe_init_auto_r(cmd_ln_t *config) { fe_t *fe; int prespch_frame_len; fe = (fe_t*)ckd_calloc(1, sizeof(*fe)); fe->refcount = 1; /* transfer params to front end */ if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { fe_free(fe); return NULL; } /* compute remaining fe parameters */ /* We add 0.5 so approximate the float with the closest * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 */ fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); fe->prior = 0; fe_start_stream(fe); assert (fe->frame_shift > 1); if (fe->frame_size > (fe->fft_size)) { E_ERROR ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n", fe->frame_size, fe->fft_size); fe_free(fe); return NULL; } if (fe->dither) fe_init_dither(fe->seed); /* establish buffers for overflow samps and hamming window */ fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); /* create hamming window */ fe_create_hamming(fe->hamming_window, fe->frame_size); /* init and fill appropriate filter structure */ fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); /* transfer params to mel fb */ fe_parse_melfb_params(config, fe, fe->mel_fb); if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) { E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n", fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2); fe_free(fe); return NULL; } fe_build_melfilters(fe->mel_fb); fe_compute_melcosine(fe->mel_fb); if (fe->remove_noise || fe->remove_silence) fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters); fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data)); prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters; fe->vad_data->prespch_buf = fe_prespch_init(fe->prespch_len + 1, prespch_frame_len, fe->frame_shift); /* Create temporary FFT, spectrum and mel-spectrum buffers. */ /* FIXME: Gosh there are a lot of these. */ fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); /* create twiddle factors */ fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); fe_create_twiddle(fe); if (cmd_ln_boolean_r(config, "-verbose")) { fe_print_current(fe); } /*** Initialize the overflow buffers ***/ fe_start_utt(fe); return fe; }
static void utt_align(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { int32 nfr; int k, i; const char *cepdir; const char *cepext; char sent[16384]; cmd_ln_t *config = (cmd_ln_t*) data; cepdir = cmd_ln_str_r(kbc->config, "-cepdir"); cepext = cmd_ln_str_r(kbc->config, "-cepext"); /* UGLY! */ /* Read utterance transcript and match it with the control file. */ if (fgets(sent, sizeof(sent), sentfp) == NULL) { E_FATAL("EOF(%s) of the transcription\n", sentfile); } /* E_INFO("SENT %s\n",sent); */ /* Strip utterance id from the end of the transcript */ for (k = strlen(sent) - 1; (k > 0) && ((sent[k] == '\n') || (sent[k] == '\t') || (sent[k] == ' ')); --k); if ((k > 0) && (sent[k] == ')')) { for (--k; (k >= 0) && (sent[k] != '('); --k); if ((k >= 0) && (sent[k] == '(')) { sent[k] = '\0'; /* Check that uttid in transcript and control file match */ for (i = ++k; sent[i] && (sent[i] != ')') && (sent[i] != '\n') && (sent[i] != '\t') && (sent[i] != ' '); i++); sent[i] = '\0'; if (id_cmp(sent + k, uttid) != 0) E_ERROR ("Uttid mismatch: ctlfile = \"%s\"; transcript = \"%s\"\n", uttid, sent + k); } } /* Convert input file to cepstra if waveform input is selected */ if (cmd_ln_boolean_r(config, "-adcin")) { int16 *adcdata; int32 nsamps = 0; mfcc_t **mfcc; if ((adcdata = bio_read_wavfile(cmd_ln_str_r(config, "-cepdir"), ur->uttfile, cmd_ln_str_r(config, "-cepext"), cmd_ln_int32_r(config, "-adchdr"), strcmp(cmd_ln_str_r(config, "-input_endian"), "big"), &nsamps)) == NULL) { E_FATAL("Cannot read file %s\n", ur->uttfile); } fe_start_utt(fe); if (fe_process_utt(fe, adcdata, nsamps, &mfcc, &nfr) < 0) { E_FATAL("MFCC calculation failed\n", ur->uttfile); } ckd_free(adcdata); if (nfr > S3_MAX_FRAMES) { E_FATAL("Maximum number of frames (%d) exceeded\n", S3_MAX_FRAMES); } if ((nfr = feat_s2mfc2feat_live(kbcore_fcb(kbc), mfcc, &nfr, TRUE, TRUE, feat)) < 0) { E_FATAL("Feature computation failed\n"); } if (mfcc) ckd_free_2d((void **)mfcc); } else { nfr = feat_s2mfc2feat(kbcore_fcb(kbc), ur->uttfile, cepdir, cepext, sf, ef, feat, S3_MAX_FRAMES); } if (ur->regmatname) { if (kbc->mgau) adapt_set_mllr(adapt_am, kbc->mgau, ur->regmatname, ur->cb2mllrname, kbc->mdef, kbc->config); else if (kbc->ms_mgau) model_set_mllr(kbc->ms_mgau, ur->regmatname, ur->cb2mllrname, kbcore_fcb(kbc), kbc->mdef, kbc->config); else E_WARN("Can't use MLLR matrices with .s2semi. yet\n"); } if (nfr <= 0) { if (cepdir != NULL) { E_ERROR ("Utt %s: Input file read (%s) with dir (%s) and extension (%s) failed \n", uttid, ur->uttfile, cepdir, cepext); } else { E_ERROR ("Utt %s: Input file read (%s) with extension (%s) failed \n", uttid, ur->uttfile, cepext); } } else { E_INFO("%s: %d input frames\n", uttid, nfr); align_utt(sent, nfr, ur->uttfile, uttid); } }
void segment_audio() { FILE *file; int16 pcm_buf[BLOCKSIZE]; mfcc_t **cep_buf; int16 *voiced_buf; int32 voiced_nsamps, out_frameidx, uttstart = 0; char file_name[1024]; uint8 cur_vad_state, vad_state, writing; int uttno, uttlen, sample_rate; int32 nframes, nframes_tmp; int16 frame_size, frame_shift, frame_rate; size_t k; sample_rate = (int) cmd_ln_float32_r(config, "-samprate"); frame_rate = cmd_ln_int32_r(config, "-frate"); frame_size = (int32) (cmd_ln_float32_r(config, "-wlen") * sample_rate + 0.5); frame_shift = (int32) (sample_rate / cmd_ln_int32_r(config, "-frate") + 0.5); nframes = (BLOCKSIZE - frame_size) / frame_shift; cep_buf = (mfcc_t **) ckd_calloc_2d(nframes, fe_get_output_size(fe), sizeof(mfcc_t)); uttno = 0; uttlen = 0; cur_vad_state = 0; voiced_nsamps = 0; writing = 0; file = NULL; voiced_buf = NULL; fe_start_stream(fe); fe_start_utt(fe); while ((k = read_audio(pcm_buf, BLOCKSIZE)) > 0) { int16 const *pcm_buf_tmp; pcm_buf_tmp = &pcm_buf[0]; while (k) { nframes_tmp = nframes; fe_process_frames_ext(fe, &pcm_buf_tmp, &k, cep_buf, &nframes_tmp, &voiced_buf, &voiced_nsamps, &out_frameidx); if (out_frameidx > 0) { uttstart = out_frameidx; } vad_state = fe_get_vad_state(fe); if (!cur_vad_state && vad_state) { /* silence->speech transition, time to start new file */ uttno++; if (!singlefile) { sprintf(file_name, "%s%04d.raw", infile_path, uttno); if ((file = fopen(file_name, "wb")) == NULL) E_FATAL_SYSTEM("Failed to open '%s' for writing", file_name); } else { sprintf(file_name, "%s.raw", infile_path); if ((file = fopen(file_name, "ab")) == NULL) E_FATAL_SYSTEM("Failed to open '%s' for writing", file_name); } writing = 1; } if (writing && file && voiced_nsamps > 0) { fwrite(voiced_buf, sizeof(int16), voiced_nsamps, file); uttlen += voiced_nsamps; } if (cur_vad_state && !vad_state) { /* speech -> silence transition, time to finish file */ fclose(file); printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n", uttno, file_name, ((double) uttstart) / frame_rate, uttlen, ((double) uttlen) / sample_rate); fflush(stdout); fe_end_utt(fe, cep_buf[0], &nframes_tmp); writing = 0; uttlen = 0; voiced_nsamps = 0; fe_start_utt(fe); } cur_vad_state = vad_state; } } if (writing) { fclose(file); printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n", uttno, file_name, ((double) uttstart) / frame_rate, uttlen, ((double) uttlen) / sample_rate); fflush(stdout); } fe_end_utt(fe, cep_buf[0], &nframes); ckd_free_2d(cep_buf); }
int ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected) { ps_decoder_t *ps; mfcc_t **cepbuf; FILE *rawfh; int16 *buf; int16 const *bptr; size_t nread; size_t nsamps; int32 nfr, i, score, prob; char const *hyp; char const *uttid; double n_speech, n_cpu, n_wall; ps_seg_t *seg; TEST_ASSERT(ps = ps_init(config)); /* Test it first with pocketsphinx_decode_raw() */ TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); ps_decode_raw(ps, rawfh, "goforward", -1); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Test it with ps_process_raw() */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); TEST_EQUAL(0, ps_start_utt(ps, NULL)); nsamps = 2048; buf = ckd_calloc(nsamps, sizeof(*buf)); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), nsamps, rawfh); ps_process_raw(ps, buf, nread, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000000")); TEST_EQUAL(0, strcmp(hyp, expected)); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Now read the whole file and produce an MFCC buffer. */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf)); TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh)); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr, NULL); cepbuf = ckd_calloc_2d(nfr + 1, fe_get_output_size(ps->acmod->fe), sizeof(**cepbuf)); fe_start_utt(ps->acmod->fe); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr, NULL); fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i); /* Decode it with process_cep() */ TEST_EQUAL(0, ps_start_utt(ps, NULL)); for (i = 0; i < nfr; ++i) { ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000001")); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0 } ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall); printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); fclose(rawfh); ps_free(ps); cmd_ln_free_r(config); ckd_free_2d(cepbuf); ckd_free(buf); return 0; }
void utt_decode(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { kb_t *kb; kbcore_t *kbcore; cmd_ln_t *config; int32 num_decode_frame; int32 total_frame; stat_t *st; srch_t *s; num_decode_frame = 0; E_INFO("Processing: %s\n", uttid); kb = (kb_t *) data; kbcore = kb->kbcore; config = kbcore_config(kbcore); kb_set_uttid(uttid, ur->uttfile, kb); st = kb->stat; /* Convert input file to cepstra if waveform input is selected */ if (cmd_ln_boolean_r(config, "-adcin")) { int16 *adcdata; int32 nsamps = 0; if ((adcdata = bio_read_wavfile(cmd_ln_str_r(config, "-cepdir"), ur->uttfile, cmd_ln_str_r(config, "-cepext"), cmd_ln_int32_r(config, "-adchdr"), strcmp(cmd_ln_str_r(config, "-input_endian"), "big"), &nsamps)) == NULL) { E_FATAL("Cannot read file %s\n", ur->uttfile); } if (kb->mfcc) { ckd_free_2d((void **)kb->mfcc); } fe_start_utt(kb->fe); if (fe_process_utt(kb->fe, adcdata, nsamps, &kb->mfcc, &total_frame) < 0) { E_FATAL("MFCC calculation failed\n", ur->uttfile); } ckd_free(adcdata); if (total_frame > S3_MAX_FRAMES) { E_FATAL("Maximum number of frames (%d) exceeded\n", S3_MAX_FRAMES); } if ((total_frame = feat_s2mfc2feat_live(kbcore_fcb(kbcore), kb->mfcc, &total_frame, TRUE, TRUE, kb->feat)) < 0) { E_FATAL("Feature computation failed\n"); } } else { /* Read mfc file and build feature vectors for entire utterance */ if ((total_frame = feat_s2mfc2feat(kbcore_fcb(kbcore), ur->uttfile, cmd_ln_str_r(config, "-cepdir"), cmd_ln_str_r(config, "-cepext"), sf, ef, kb->feat, S3_MAX_FRAMES)) < 0) { E_FATAL("Cannot read file %s. Forced exit\n", ur->uttfile); } } /* Also need to make sure we don't set resource if it is the same. Well, this mechanism could be provided inside the following function. */ s = kb->srch; if (ur->lmname != NULL) srch_set_lm(s, ur->lmname); if (ur->regmatname != NULL) kb_setmllr(ur->regmatname, ur->cb2mllrname, kb); /* These are necessary! */ s->uttid = kb->uttid; s->uttfile = kb->uttfile; utt_begin(kb); utt_decode_block(kb->feat, total_frame, &num_decode_frame, kb); utt_end(kb); st->tot_fr += st->nfr; }
int main(int argc, char *argv[]) { acmod_t *acmod; logmath_t *lmath; cmd_ln_t *config; FILE *rawfh; int16 *buf; int16 const *bptr; mfcc_t **cepbuf, **cptr; size_t nread, nsamps; int nfr; int frame_counter; int bestsen1[270]; lmath = logmath_init(1.0001, 0, 0); config = cmd_ln_init(NULL, ps_args(), TRUE, "-mdef", MODELDIR "/en-us/en-us/mdef", "-mean", MODELDIR "/en-us/en-us/means", "-var", MODELDIR "/en-us/en-us/variances", "-tmat", MODELDIR "/en-us/en-us/transition_matrices", "-sendump", MODELDIR "/en-us/en-us/sendump", "-compallsen", "true", "-cmn", "prior", "-tmatfloor", "0.0001", "-mixwfloor", "0.001", "-varfloor", "0.0001", "-mmap", "no", "-topn", "4", "-ds", "1", "-input_endian", "little", "-samprate", "16000", NULL); TEST_ASSERT(config); cmd_ln_parse_file_r(config, ps_args(), MODELDIR "/en-us/en-us/feat.params", FALSE); TEST_ASSERT(acmod = acmod_init(config, lmath, NULL, NULL)); cmn_prior_set(acmod->fcb->cmn_struct, prior); nsamps = 2048; frame_counter = 0; buf = ckd_calloc(nsamps, sizeof(*buf)); TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); TEST_EQUAL(0, acmod_start_utt(acmod)); E_INFO("Incremental(2048):\n"); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), nsamps, rawfh); bptr = buf; while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0 || nread > 0) { int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) bestsen1[frame_counter] = best_score; ++frame_counter; frame_idx = -1; } } } TEST_EQUAL(0, acmod_end_utt(acmod)); nread = 0; { int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) bestsen1[frame_counter] = best_score; TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Now try to process the whole thing at once. */ E_INFO("Whole utterance:\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); nsamps = ftell(rawfh) / sizeof(*buf); clearerr(rawfh); fseek(rawfh, 0, SEEK_SET); buf = ckd_realloc(buf, nsamps * sizeof(*buf)); TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh)); bptr = buf; TEST_EQUAL(0, acmod_start_utt(acmod)); acmod_process_raw(acmod, &bptr, &nsamps, TRUE); TEST_EQUAL(0, acmod_end_utt(acmod)); { int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Now process MFCCs and make sure we get the same results. */ cepbuf = ckd_calloc_2d(frame_counter, fe_get_output_size(acmod->fe), sizeof(**cepbuf)); fe_start_utt(acmod->fe); nsamps = ftell(rawfh) / sizeof(*buf); bptr = buf; nfr = frame_counter; fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr, NULL); fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr); E_INFO("Incremental(MFCC):\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); TEST_EQUAL(0, acmod_start_utt(acmod)); cptr = cepbuf; nfr = frame_counter; frame_counter = 0; while ((acmod_process_cep(acmod, &cptr, &nfr, FALSE)) > 0) { int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); ++frame_counter; frame_idx = -1; } } TEST_EQUAL(0, acmod_end_utt(acmod)); nfr = 0; acmod_process_cep(acmod, &cptr, &nfr, FALSE); { int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); ++frame_counter; frame_idx = -1; } } /* Note that we have to process the whole thing again because * !#@$@ s2mfc2feat modifies its argument (not for long) */ fe_start_utt(acmod->fe); nsamps = ftell(rawfh) / sizeof(*buf); bptr = buf; nfr = frame_counter; fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr, NULL); fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr); E_INFO("Whole utterance (MFCC):\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); TEST_EQUAL(0, acmod_start_utt(acmod)); cptr = cepbuf; nfr = frame_counter; acmod_process_cep(acmod, &cptr, &nfr, TRUE); TEST_EQUAL(0, acmod_end_utt(acmod)); { int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } E_INFO("Rewound (MFCC):\n"); TEST_EQUAL(0, acmod_rewind(acmod)); { int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Clean up, go home. */ ckd_free_2d(cepbuf); fclose(rawfh); ckd_free(buf); acmod_free(acmod); logmath_free(lmath); cmd_ln_free_r(config); return 0; }
int32 live_utt_decode_block (int16 *samples, int32 nsamples, int32 live_endutt, partialhyp_t **ohyp) { static int32 live_begin_new_utt = 1; static int32 frmno; static float32 ***live_feat = NULL; int32 live_nfr, live_nfeatvec; int32 nwds =0; float32 **mfcbuf; /* int i,j;*/ /* 2004/08/27 L Galescu <*****@*****.**> -- added raw audio file saving */ static char uttfn[1024]; static FILE *rawfp = NULL; int16 block_peak_amplitude; if(live_feat==NULL) live_feat = feat_array_alloc (kbcore_fcb(kbcore), LIVEBUFBLOCKSIZE); if (live_begin_new_utt){ fe_start_utt(fe); utt_begin (kb); frmno = 0; kb->nfr = 0; kb->utt_hmm_eval = 0; kb->utt_sen_eval = 0; kb->utt_gau_eval = 0; live_begin_new_utt = 0; sprintf(uttfn, "%s/%s.raw", cmd_ln_str("-outrawdir"), kb->uttid); rawfp = fopen(uttfn, "wb"); } /* 10.jan.01 RAH, fe_process_utt now requires ***mfcbuf and it allocates the memory internally) */ mfcbuf = NULL; /* LG 20080613 */ block_peak_amplitude = get_peak_amplitude(samples, nsamples); if (block_peak_amplitude > peak_amplitude) peak_amplitude = block_peak_amplitude; E_INFO("segment peak %d\n",peak_amplitude); live_nfr = fe_process_utt(fe, samples, nsamples, &mfcbuf); /**/ if (rawfp != NULL) { fwrite(samples, sizeof(int16), nsamples, rawfp); if (live_endutt) fclose(rawfp); } if (live_endutt) { /* RAH, It seems that we shouldn't throw out this data */ fe_end_utt(fe,dummyframe); /* Flush out the fe */ } #if 0 E_INFO("Number frame after fe_process_utt %d\n",live_nfr); for(i=0;i<live_nfr;i++){ printf("%d ",i); for(j=0;j<13;j++){ printf("%f ",mfcbuf[i][j]); fflush(stdout); } printf("\n"); fflush(stdout); } #endif /* lgalescu 2004/08/22 -- i am under the impression that * feat_s2mfc2feat_block() needs to be called at the end of utt * even if no frames need processing */ /* lgalescu 2004/10/13 -- rescinded the above */ if(live_nfr>0){ /* Compute feature vectors */ live_nfeatvec = feat_s2mfc2feat_block(kbcore_fcb(kbcore), mfcbuf, live_nfr, live_begin_new_utt, live_endutt, live_feat); #if 0 E_INFO ("live_nfeatvec: %ld\n",live_nfeatvec); #endif #if 0 E_INFO("Current frame number %d, Number of frames %d, Number frame after feat_s2mfcfeat_block %d\n",frmno,live_nfr,live_nfeatvec); for(i=0;i<live_nfeatvec;i++){ printf("%d\n",i); printf("Cep: "); fflush(stdout); for(j=0;j<13;j++){ printf("%f ",live_feat[i][0][j]); fflush(stdout); } printf("\n"); fflush(stdout); printf("Del: "); fflush(stdout); for(j=13;j<26;j++){ printf("%f ",live_feat[i][0][j]); fflush(stdout); } printf("\n"); fflush(stdout); printf("Acc: "); fflush(stdout); for(j=26;j<39;j++){ printf("%f ",live_feat[i][0][j]); fflush(stdout); } printf("\n"); fflush(stdout); } #endif /* decode the block */ utt_decode_block (live_feat, live_nfeatvec, &frmno, kb, maxwpf, maxhistpf, maxhmmpf, ptranskip, hmmdumpfp); /* lgalescu 2004/08/21 * moved the following block out of the previous if(){} because we need * the output even when no feature computation has to be done. */ /* lgalescu 2004/10/13 -- rescinded */ /* Pull out partial hypothesis */ nwds = live_get_partialhyp(live_endutt); *ohyp = parthyp; parthyplen = nwds; } /* Clean up */ if (live_endutt) { live_begin_new_utt = 1; kb->tot_fr += kb->nfr; utt_end(kb); } else { live_begin_new_utt = 0; } /* I'm starting to think that fe_process_utt should not be allocating its * memory, that or it should allocate some max and just keep on going, * this idea of constantly allocating freeing memory seems dangerous to me. */ /* 20040318 ARCHAN : It sounds extremely dangerous to me and I will * eliminate it sometime. */ /* lgalescu: i second that! the memory issue needs to be investigated: after a run on linux, i noticed some 1.6M of memory having "disappeared"! */ if(live_nfr>0){ ckd_free_2d((void **) mfcbuf); /* RAH, this must be freed since fe_process_utt allocates it */ } return(parthyplen); }
fe_t * fe_init(param_t const *P) { fe_t *FE = (fe_t *) calloc(1, sizeof(fe_t)); if (FE == NULL) { E_WARN("memory alloc failed in fe_init()\n"); return (NULL); } /* transfer params to front end */ fe_parse_general_params(P, FE); /* compute remaining FE parameters */ /* We add 0.5 so approximate the float with the closest * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 */ FE->FRAME_SHIFT = (int32) (FE->SAMPLING_RATE / FE->FRAME_RATE + 0.5); /* why 0.5? */ FE->FRAME_SIZE = (int32) (FE->WINDOW_LENGTH * FE->SAMPLING_RATE + 0.5); /* why 0.5? */ FE->PRIOR = 0; FE->FRAME_COUNTER = 0; if (FE->FRAME_SIZE > (FE->FFT_SIZE)) { E_WARN ("Number of FFT points has to be a power of 2 higher than %d\n", (FE->FRAME_SIZE)); return (NULL); } if (FE->dither) { fe_init_dither(FE->seed); } /* establish buffers for overflow samps and hamming window */ FE->OVERFLOW_SAMPS = (int16 *) calloc(FE->FRAME_SIZE, sizeof(int16)); FE->HAMMING_WINDOW = (window_t *) calloc(FE->FRAME_SIZE, sizeof(window_t)); if (FE->OVERFLOW_SAMPS == NULL || FE->HAMMING_WINDOW == NULL) { E_WARN("memory alloc failed in fe_init()\n"); return (NULL); } /* create hamming window */ fe_create_hamming(FE->HAMMING_WINDOW, FE->FRAME_SIZE); /* init and fill appropriate filter structure */ if (FE->FB_TYPE == MEL_SCALE) { if ((FE->MEL_FB = (melfb_t *) calloc(1, sizeof(melfb_t))) == NULL) { E_WARN("memory alloc failed in fe_init()\n"); return (NULL); } /* transfer params to mel fb */ fe_parse_melfb_params(P, FE->MEL_FB); fe_build_melfilters(FE->MEL_FB); fe_compute_melcosine(FE->MEL_FB); } else { E_WARN("MEL SCALE IS CURRENTLY THE ONLY IMPLEMENTATION!\n"); return (NULL); } if (P->verbose) { fe_print_current(FE); } /*** Z.A.B. ***/ /*** Initialize the overflow buffers ***/ fe_start_utt(FE); return (FE); }
int main(int argc, char *argv[]) { static const arg_t fe_args[] = { waveform_to_cepstral_command_line_macro(), { NULL, 0, NULL, NULL } }; FILE *raw; cmd_ln_t *config; fe_t *fe; int16 buf[1024]; int16 const *inptr; int32 frame_shift, frame_size; mfcc_t **cepbuf1, **cepbuf2, **cptr; int32 nfr, i; size_t nsamp; TEST_ASSERT(config = cmd_ln_parse_r(NULL, fe_args, argc, argv, FALSE)); TEST_ASSERT(fe = fe_init_auto_r(config)); TEST_EQUAL(fe_get_output_size(fe), DEFAULT_NUM_CEPSTRA); fe_get_input_size(fe, &frame_shift, &frame_size); TEST_EQUAL(frame_shift, DEFAULT_FRAME_SHIFT); TEST_EQUAL(frame_size, (int)(DEFAULT_WINDOW_LENGTH*DEFAULT_SAMPLING_RATE)); TEST_ASSERT(raw = fopen(TESTDATADIR "/chan3.raw", "rb")); TEST_EQUAL(0, fe_start_utt(fe)); TEST_EQUAL(1024, fread(buf, sizeof(int16), 1024, raw)); nsamp = 1024; TEST_ASSERT(fe_process_frames(fe, NULL, &nsamp, NULL, &nfr, NULL) >= 0); TEST_EQUAL(1024, nsamp); TEST_EQUAL(4, nfr); cepbuf1 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf1)); inptr = &buf[0]; nfr = 1; printf("frame_size %d frame_shift %d\n", frame_size, frame_shift); /* Process the first frame. */ TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[0], &nfr, NULL) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr); /* First frame assumed to be unvoiced to init noise reduction */ TEST_EQUAL(nfr, 0); /* Note that this next one won't actually consume any frames * of input, because it already got sufficient overflow * samples last time around. This is implementation-dependent * so we shouldn't actually test for it. * First 1024 samples of chan3.raw is silence, nfr is expected to stay 0 */ nfr = 1; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[1], &nfr, NULL) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr); TEST_EQUAL(nfr, 0); nfr = 1; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[2], &nfr, NULL) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr); TEST_EQUAL(nfr, 0); nfr = 1; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[3], &nfr, NULL) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr); TEST_EQUAL(nfr, 0); nfr = 1; TEST_ASSERT(fe_end_utt(fe, cepbuf1[4], &nfr) >= 0); printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 0); /* What we *should* test is that the output we get by * processing one frame at a time is exactly the same as what * we get from doing them all at once. So let's do that */ cepbuf2 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf2)); inptr = &buf[0]; nfr = 5; nsamp = 1024; TEST_EQUAL(0, fe_start_utt(fe)); TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cepbuf2, &nfr, NULL) >= 0); /* First 1024 samples of chan3.raw is silence, nfr is expected to stay 0 */ printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 0); nfr = 1; TEST_ASSERT(fe_end_utt(fe, cepbuf2[4], &nfr) >= 0); printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 0); /* fe_process_frames overwrites features if frame is unvoiced, * so for cepbuf2 last frame is at 0 and previous are lost */ printf("%d: ", 3); for (i = 0; i < DEFAULT_NUM_CEPSTRA; ++i) { printf("%.2f,%.2f ", MFCC2FLOAT(cepbuf1[3][i]), MFCC2FLOAT(cepbuf2[0][i])); TEST_EQUAL_FLOAT(cepbuf1[3][i], cepbuf2[0][i]); } printf("\n"); /* output features stored in cepbuf[4] by fe_end_utt * should be the same */ printf("%d: ", 4); for (i = 0; i < DEFAULT_NUM_CEPSTRA; ++i) { printf("%.2f,%.2f ", MFCC2FLOAT(cepbuf1[4][i]), MFCC2FLOAT(cepbuf2[4][i])); TEST_EQUAL_FLOAT(cepbuf1[4][i], cepbuf2[4][i]); } printf("\n"); /* Now, also test to make sure that even if we feed data in * little tiny bits we can still make things work. */ memset(cepbuf2[0], 0, 5 * DEFAULT_NUM_CEPSTRA * sizeof(**cepbuf2)); inptr = &buf[0]; cptr = &cepbuf2[0]; nfr = 5; i = 5; nsamp = 256; TEST_EQUAL(0, fe_start_utt(fe)); TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i, NULL) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i); cptr += i; nfr -= i; i = nfr; nsamp = 256; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i, NULL) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i); cptr += i; nfr -= i; i = nfr; nsamp = 256; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i, NULL) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i); cptr += i; nfr -= i; i = nfr; nsamp = 256; TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i, NULL) >= 0); printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i); cptr += i; nfr -= i; printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 5); /* inptr contains unvoiced audio, * no out feature frames will be produced */ TEST_ASSERT(fe_end_utt(fe, *cptr, &nfr) >= 0); printf("nfr %d\n", nfr); TEST_EQUAL(nfr, 0); /* fe_process_frames overwrites features if frame is unvoiced, * so for cepbuf2 last frame is at 0 and previous are lost */ printf("%d: ", 4); for (i = 0; i < DEFAULT_NUM_CEPSTRA; ++i) { printf("%.2f,%.2f ", MFCC2FLOAT(cepbuf1[4][i]), MFCC2FLOAT(cepbuf2[0][i])); TEST_EQUAL_FLOAT(cepbuf1[4][i], cepbuf2[0][i]); } printf("\n"); /* And now, finally, test fe_process_utt() */ inptr = &buf[0]; i = 0; TEST_EQUAL(0, fe_start_utt(fe)); TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); if (nfr) memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr)); ckd_free_2d(cptr); i += nfr; inptr += 256; TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); if (nfr) memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr)); ckd_free_2d(cptr); i += nfr; inptr += 256; TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); if (nfr) memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr)); ckd_free_2d(cptr); i += nfr; inptr += 256; TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); if (nfr) memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr)); ckd_free_2d(cptr); i += nfr; inptr += 256; TEST_ASSERT(fe_end_utt(fe, cepbuf2[i], &nfr) >= 0); printf("i %d nfr %d\n", i, nfr); TEST_EQUAL(nfr, 0); /* fe_process_utt overwrites features if frame is unvoiced, * so for cepbuf2 last frame is at 0 and previous are lost */ printf("%d: ", 4); for (i = 0; i < DEFAULT_NUM_CEPSTRA; ++i) { printf("%.2f,%.2f ", MFCC2FLOAT(cepbuf1[4][i]), MFCC2FLOAT(cepbuf2[0][i])); TEST_EQUAL_FLOAT(cepbuf1[4][i], cepbuf2[0][i]); } printf("\n"); ckd_free_2d(cepbuf1); ckd_free_2d(cepbuf2); fclose(raw); fe_free(fe); cmd_ln_free_r(config); return 0; }
void ld_process_raw_impl(live_decoder_t * _decoder, int16 * samples, int32 num_samples, int32 end_utt) { float32 dummy_frame[MAX_CEP_LEN]; float32 **frames = 0; int32 num_frames = 0; int32 num_features = 0; int32 begin_utt = _decoder->num_frames_entered == 0; int32 return_value; int i; assert(_decoder != NULL); if (begin_utt) { fe_start_utt(_decoder->fe); } if (_decoder->swap) { for (i = 0; i < num_samples; i++) { SWAP_INT16(samples + i); } } return_value = fe_process_utt(_decoder->fe, samples, num_samples, &frames, &num_frames); if (end_utt) { return_value = fe_end_utt(_decoder->fe, dummy_frame, &num_frames); if (num_frames != 0) { /* ARCHAN: If num_frames !=0, assign this last ending frame to frames again. The computation will then be correct. Should clean up the finite state logic in fe_interface layer. */ frames = (float32 **) ckd_calloc_2d(1, _decoder->fe->NUM_CEPSTRA, sizeof(float32)); memcpy(frames[0], dummy_frame, _decoder->fe->NUM_CEPSTRA * sizeof(float32)); } } if (FE_ZERO_ENERGY_ERROR == return_value) { E_WARN("Zero energy frame(s). Consider using dither\n"); } if (num_frames > 0) { num_features = feat_s2mfc2feat_block(kbcore_fcb(_decoder->kbcore), frames, num_frames, begin_utt, end_utt, _decoder->features); _decoder->num_frames_entered += num_frames; } if (num_features > 0) { utt_decode_block(_decoder->features, num_features, &_decoder->num_frames_decoded, &_decoder->kb); } if (frames != NULL) { ckd_free_2d((void **) frames); } }
int main (int32 argc, char *argv[]) { char line[4096], filename[4096], idspec[4096], *uttid, *result; int32 sf, ef, sps, adcin, nf; int16 adbuf[4096]; int32 i, k; float32 **mfcbuf; CDCN_type *cdcn; param_t param; fe_t *fe = NULL; fbs_init (argc, argv); /* Assume that cdcn_init is part of the above fbs_init() */ cdcn = uttproc_get_cdcn_ptr(); adcin = query_adc_input(); assert (adcin); /* Limited to processing audio input files (not cep) */ sps = query_sampling_rate(); fe_init_params(¶m); param.SAMPLING_RATE = (float)sps; if ((fe = fe_init (¶m)) == NULL) { E_ERROR("fe_init() failed to initialize\n"); exit (-1); } mfcbuf = (float32 **) ckd_calloc_2d (8192, 13, sizeof(float32)); /* Process "control file" input through stdin */ while (fgets (line, sizeof(line), stdin) != NULL) { if (uttproc_parse_ctlfile_entry (line, filename, &sf, &ef, idspec) < 0) continue; assert ((sf < 0) && (ef < 0)); /* Processing entire input file */ uttid = build_uttid (idspec); uttproc_begin_utt (uttid); /* Convert raw data file to cepstra */ if (uttfile_open (filename) < 0) { E_ERROR("uttfile_open(%s) failed\n", filename); continue; } fe_start_utt(fe); nf = 0; while ((k = adc_file_read (adbuf, 4096)) >= 0) { if (fe_process_utt (fe, adbuf, k, mfcbuf+nf, &k) == FE_ZERO_ENERGY_ERROR) { E_WARN("Frames with zero energy. Consider using dither\n"); } nf += k; /* WARNING!! No check for mfcbuf overflow */ } fe_end_utt(fe, mfcbuf[nf], &k); fe_close(fe); uttfile_close (); if (nf <= 0) { E_ERROR("Empty utterance\n"); continue; } else E_INFO("%d frames\n", nf); /* Update CDCN module */ cdcn_converged_update (mfcbuf, /* cepstra buffer */ nf, /* Number of frames */ cdcn, /* The CDCN wrapper */ 1 /* One iteration */ ); /* CDCN */ for (i = 0; i < nf; i++) cdcn_norm (mfcbuf[i], cdcn); /* Process normalized cepstra */ uttproc_cepdata (mfcbuf, nf, 1); uttproc_end_utt (); uttproc_result (&k, &result, 1); printf ("\n"); fflush (stdout); } ckd_free_2d((void **)mfcbuf); fbs_end (); return 0; }