/* * Continuous recognition from a file */ static void recognize_from_file() { int16 adbuf[2048]; const char *fname; const char *hyp; int32 k; uint8 utt_started, in_speech; int32 print_times = cmd_ln_boolean_r(config, "-time"); fname = cmd_ln_str_r(config, "-infile"); if ((rawfd = fopen(fname, "rb")) == NULL) { E_FATAL_SYSTEM("Failed to open file '%s' for reading", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) { char waveheader[44]; fread(waveheader, 1, 44, rawfd); if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate"))) E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname); } if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) { E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n"); } ps_start_utt(ps); utt_started = FALSE; while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) { ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; } if (!in_speech && utt_started) { ps_end_utt(ps); hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) printf("%s\n", hyp); if (print_times) print_word_times(); fflush(stdout); ps_start_utt(ps); utt_started = FALSE; } } ps_end_utt(ps); if (utt_started) { hyp = ps_get_hyp(ps, NULL); if (hyp != NULL) { printf("%s\n", hyp); if (print_times) { print_word_times(); } } } fclose(rawfd); }
/* * Continuous recognition from a file */ static void recognize_from_file() { cont_ad_t *cont; ad_rec_t file_ad = {0}; int16 adbuf[4096]; const char* hyp; const char* uttid; int32 k, ts, start; char waveheader[44]; if ((rawfd = fopen(cmd_ln_str_r(config, "-infile"), "rb")) == NULL) { E_FATAL_SYSTEM("Failed to open file '%s' for reading", cmd_ln_str_r(config, "-infile")); } fread(waveheader, 1, 44, rawfd); file_ad.sps = (int32)cmd_ln_float32_r(config, "-samprate"); file_ad.bps = sizeof(int16); if ((cont = cont_ad_init(&file_ad, ad_file_read)) == NULL) { E_FATAL("Failed to initialize voice activity detection"); } if (cont_ad_calib(cont) < 0) E_FATAL("Failed to calibrate voice activity detection\n"); rewind (rawfd); for (;;) { while ((k = cont_ad_read(cont, adbuf, 4096)) == 0); if (k < 0) { break; } if (ps_start_utt(ps, NULL) < 0) E_FATAL("ps_start_utt() failed\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); ts = cont->read_ts; start = ((ts - k) * 100.0) / file_ad.sps; for (;;) { if ((k = cont_ad_read(cont, adbuf, 4096)) < 0) break; if (k == 0) { /* * No speech data available; check current timestamp with most recent * speech to see if more than 1 sec elapsed. If so, end of utterance. */ if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC) break; } else { /* New speech data received; note current timestamp */ ts = cont->read_ts; } ps_process_raw(ps, adbuf, k, FALSE, FALSE); } ps_end_utt(ps); if (cmd_ln_boolean_r(config, "-time")) { print_word_times(start); } else { hyp = ps_get_hyp(ps, NULL, &uttid); fprintf(stderr, "%s: %s\n", uttid, hyp); } fflush(stdout); } cont_ad_close(cont); fclose(rawfd); }