static int write_hypseg(FILE *fh, ps_decoder_t *ps, char const *uttid) { int32 score, lscr, sf, ef; ps_seg_t *itor = ps_seg_iter(ps, &score); /* Accumulate language model scores. */ lscr = 0; while (itor) { int32 ascr, wlscr; ps_seg_prob(itor, &ascr, &wlscr, NULL); lscr += wlscr; itor = ps_seg_next(itor); } fprintf(fh, "%s S %d T %d A %d L %d", uttid, 0, /* "scaling factor" which is mostly useless anyway */ score, score - lscr, lscr); /* Now print out words. */ itor = ps_seg_iter(ps, &score); while (itor) { char const *w = ps_seg_word(itor); int32 ascr, wlscr; ps_seg_prob(itor, &ascr, &wlscr, NULL); ps_seg_frames(itor, &sf, &ef); fprintf(fh, " %d %d %d %s", sf, ascr, wlscr, w); itor = ps_seg_next(itor); } fprintf(fh, " %d\n", ef); return 0; }
int ps_end_utt(ps_decoder_t *ps) { int rv, i; acmod_end_utt(ps->acmod); /* Search any remaining frames. */ if ((rv = ps_search_forward(ps)) < 0) { ptmr_stop(&ps->perf); return rv; } /* Finish phone loop search. */ if (ps->phone_loop) { if ((rv = ps_search_finish(ps->phone_loop)) < 0) { ptmr_stop(&ps->perf); return rv; } } /* Search any frames remaining in the lookahead window. */ for (i = ps->acmod->output_frame - ps->pl_window; i < ps->acmod->output_frame; ++i) ps_search_step(ps->search, i); /* Finish main search. */ if ((rv = ps_search_finish(ps->search)) < 0) { ptmr_stop(&ps->perf); return rv; } ptmr_stop(&ps->perf); /* Log a backtrace if requested. */ if (cmd_ln_boolean_r(ps->config, "-backtrace")) { char const *uttid, *hyp; ps_seg_t *seg; int32 score; hyp = ps_get_hyp(ps, &score, &uttid); if (hyp != NULL) { E_INFO("%s: %s (%d)\n", uttid, hyp, score); E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", "word", "start", "end", "pprob", "ascr", "lscr", "lback"); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } } } return rv; }
static int write_ctm(FILE *fh, ps_decoder_t *ps, ps_seg_t *itor, char const *uttid, int32 frate) { logmath_t *lmath = ps_get_logmath(ps); char *dupid, *show, *channel, *c; double ustart = 0.0; /* We have semi-standardized on comma-separated uttids which * correspond to the fields of the STM file. So if there's a * comma in the uttid, take the first two fields as show and * channel, and also try to find the start time. */ show = dupid = ckd_salloc(uttid ? uttid : "(null)"); if ((c = strchr(dupid, ',')) != NULL) { *c++ = '\0'; channel = c; if ((c = strchr(c, ',')) != NULL) { *c++ = '\0'; if ((c = strchr(c, ',')) != NULL) { ustart = atof_c(c + 1); } } } else { channel = NULL; } while (itor) { int32 prob, sf, ef, wid; char const *w; /* Skip things that aren't "real words" (FIXME: currently * requires s3kr3t h34d3rz...) */ w = ps_seg_word(itor); wid = dict_wordid(ps->dict, w); if (wid >= 0 && dict_real_word(ps->dict, wid)) { prob = ps_seg_prob(itor, NULL, NULL, NULL); ps_seg_frames(itor, &sf, &ef); fprintf(fh, "%s %s %.2f %.2f %s %.3f\n", show, channel ? channel : "1", ustart + (double)sf / frate, (double)(ef - sf) / frate, /* FIXME: More s3kr3tz */ dict_basestr(ps->dict, wid), logmath_exp(lmath, prob)); } itor = ps_seg_next(itor); } ckd_free(dupid); return 0; }
/* * Continuous recognition from mic */ int recognize_from_mic() { ad_rec_t *ad; int16 adbuf[2048]; const char *fname; const char* seg; int32 k; char str[1000]=""; uint8 utt_started, in_speech; if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),16000)) == NULL) perror("Failed to open audio device\n"); if (ad_start_rec(ad) < 0) perror("Failed to start recording\n"); ps_start_utt(ps); utt_started = FALSE; ps_seg_t *psegt; while (!finished) { if ((k = ad_read(ad, adbuf, 2048)) < 0) perror("Failed to read audio\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); in_speech = ps_get_in_speech(ps); if (in_speech && !utt_started) { utt_started = TRUE; } if (!in_speech && utt_started) { ps_end_utt(ps); psegt = ps_seg_iter(ps, NULL); while (psegt!=NULL){ seg = ps_seg_word(psegt); strncpy_s( str, seg, strlen(seg)); listenCallback(str); printf("%s\n", seg); int prob = ps_seg_prob(psegt,NULL,NULL,NULL); printf("%d\n", prob); psegt = ps_seg_next(psegt); } ps_start_utt(ps); utt_started = FALSE; } Sleep(100); } ps_end_utt(ps); fclose(rawfd); return 0; }
static void print_word_times(int32 start) { ps_seg_t *iter = ps_seg_iter(ps, NULL); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames (iter, &sf, &ef); pprob = ps_seg_prob (iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); fprintf (stderr, "%s %f %f %f\n", ps_seg_word (iter), (sf + start) / 100.0, (ef + start) / 100.0, conf); iter = ps_seg_next (iter); } }
void EventHandlerSegmentConfidence::event(ps_decoder_t *decoder) { std::vector<std::pair<std::string,float> > segments; ps_seg_t* iter = ps_seg_iter( decoder, NULL ); while( iter != NULL ) { int32 prob = ps_seg_prob( iter, NULL, NULL, NULL ); segments.push_back( { std::string( ps_seg_word( iter ) ), logmath_exp( ps_get_logmath( decoder ), prob ) } ); iter = ps_seg_next( iter ); } if( ! segments.empty() ) mCb( segments ); }
static void print_word_times() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate), ((float) ef / frame_rate), conf); iter = ps_seg_next(iter); } }
void ofApp::process_result() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps, NULL); printf("\n\n"); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); //here is where we process the word new_utterance new_utt; new_utt.conf = conf; new_utt.sf = sf; new_utt.st = (float)sf / frame_rate; new_utt.ef = ef; new_utt.et = (float) ef / frame_rate; new_utt.utt = ps_seg_word(iter); printf("Recognised: %s %.3f %.3f %f\n", ps_seg_word(iter), new_utt.st, new_utt.et, new_utt.conf); std::string word = ps_seg_word(iter); result.push_back(new_utt); iter = ps_seg_next(iter); } printf("\n\n"); engineExit(); }
int main(int argc, char *argv[]) { ps_decoder_t *ps; cmd_ln_t *config; acmod_t *acmod; fsg_search_t *fsgs; ps_lattice_t *dag; ps_seg_t *seg; int32 score; TEST_ASSERT(config = cmd_ln_init(NULL, ps_args(), TRUE, "-hmm", DATADIR "/tidigits/hmm", "-fsg", DATADIR "/tidigits/lm/tidigits.fsg", "-dict", DATADIR "/tidigits/lm/tidigits.dic", "-bestpath", "no", "-input_endian", "little", "-samprate", "16000", NULL)); TEST_ASSERT(ps = ps_init(config)); fsgs = (fsg_search_t *)ps->search; acmod = ps->acmod; setbuf(stdout, NULL); { FILE *rawfh; int16 buf[2048]; size_t nread; int16 const *bptr; char const *hyp; int nfr; TEST_ASSERT(rawfh = fopen(DATADIR "/numbers.raw", "rb")); TEST_EQUAL(0, acmod_start_utt(acmod)); fsg_search_start(ps_search_base(fsgs)); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), 2048, rawfh); bptr = buf; while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0) { while (acmod->n_feat_frame > 0) { fsg_search_step(ps_search_base(fsgs), acmod->output_frame); acmod_advance(acmod); } } } fsg_search_finish(ps_search_base(fsgs)); hyp = fsg_search_hyp(ps_search_base(fsgs), &score, NULL); printf("FSG: %s (%d)\n", hyp, score); TEST_ASSERT(acmod_end_utt(acmod) >= 0); fclose(rawfh); } for (seg = ps_seg_iter(ps); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } /* Now get the DAG and play with it. */ dag = ps_get_lattice(ps); ps_lattice_write(dag, "test_fsg3.lat"); printf("BESTPATH: %s\n", ps_lattice_hyp(dag, ps_lattice_bestpath(dag, NULL, 1.0, 15.0))); ps_lattice_posterior(dag, NULL, 15.0); ps_free(ps); cmd_ln_free_r(config); return 0; }
int ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected) { ps_decoder_t *ps; mfcc_t **cepbuf; FILE *rawfh; int16 *buf; int16 const *bptr; size_t nread; size_t nsamps; int32 nfr, i, score, prob; char const *hyp; char const *uttid; double n_speech, n_cpu, n_wall; ps_seg_t *seg; TEST_ASSERT(ps = ps_init(config)); /* Test it first with pocketsphinx_decode_raw() */ TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); ps_decode_raw(ps, rawfh, "goforward", -1); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Test it with ps_process_raw() */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); TEST_EQUAL(0, ps_start_utt(ps, NULL)); nsamps = 2048; buf = ckd_calloc(nsamps, sizeof(*buf)); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), nsamps, rawfh); ps_process_raw(ps, buf, nread, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000000")); TEST_EQUAL(0, strcmp(hyp, expected)); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Now read the whole file and produce an MFCC buffer. */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf)); TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh)); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr); cepbuf = ckd_calloc_2d(nfr + 1, fe_get_output_size(ps->acmod->fe), sizeof(**cepbuf)); fe_start_utt(ps->acmod->fe); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr); fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i); /* Decode it with process_cep() */ TEST_EQUAL(0, ps_start_utt(ps, NULL)); for (i = 0; i < nfr; ++i) { ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000001")); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0 } ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall); printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); fclose(rawfh); ps_free(ps); cmd_ln_free_r(config); ckd_free_2d(cepbuf); ckd_free(buf); return 0; }