static int write_lattice(ps_decoder_t *ps, char const *latdir, char const *uttid) { ps_lattice_t *lat; logmath_t *lmath; cmd_ln_t *config; char *outfile; int32 beam; if ((lat = ps_get_lattice(ps)) == NULL) { E_ERROR("Failed to obtain word lattice for utterance %s\n", uttid); return -1; } config = ps_get_config(ps); outfile = string_join(latdir, "/", uttid, cmd_ln_str_r(config, "-outlatext"), NULL); /* Prune lattice. */ lmath = ps_get_logmath(ps); beam = logmath_log(lmath, cmd_ln_float64_r(config, "-outlatbeam")); ps_lattice_posterior_prune(lat, beam); if (0 == strcmp("htk", cmd_ln_str_r(config, "-outlatfmt"))) { if (ps_lattice_write_htk(lat, outfile) < 0) { E_ERROR("Failed to write lattice to %s\n", outfile); return -1; } } else { if (ps_lattice_write(lat, outfile) < 0) { E_ERROR("Failed to write lattice to %s\n", outfile); return -1; } } return 0; }
std::tuple<std::string, double> TwitchStreamChunk::process(std::string body){ int pos = _uri.find_last_of('/'); std::string fileName = _uri.substr(pos + 1); std::ofstream file(fileName); file << body; file.flush(); file.close(); std::stringstream cmd; std::string audioFile = boost::filesystem::unique_path().native(); audioFile.append(".wav"); cmd << "ffmpeg -i " << fileName << " -vn -ac 1 " << audioFile << " > /dev/null 2>&1"; system(cmd.str().c_str()); FILE *aFile = fopen(audioFile.c_str(), "r"); ps_decode_raw(getDecoder(), aFile, -1); fclose(aFile); auto logarithm = ps_get_logmath(getDecoder()); int confidence = 1; const char * result = ps_get_hyp(getDecoder(), &confidence); double tmp = logmath_exp(logarithm, confidence); std::remove(fileName.c_str()); std::remove(audioFile.c_str()); return std::make_tuple(result == nullptr ? "" : std::string(result), tmp); }
int ps_end_utt(ps_decoder_t *ps) { int rv, i; acmod_end_utt(ps->acmod); /* Search any remaining frames. */ if ((rv = ps_search_forward(ps)) < 0) { ptmr_stop(&ps->perf); return rv; } /* Finish phone loop search. */ if (ps->phone_loop) { if ((rv = ps_search_finish(ps->phone_loop)) < 0) { ptmr_stop(&ps->perf); return rv; } } /* Search any frames remaining in the lookahead window. */ for (i = ps->acmod->output_frame - ps->pl_window; i < ps->acmod->output_frame; ++i) ps_search_step(ps->search, i); /* Finish main search. */ if ((rv = ps_search_finish(ps->search)) < 0) { ptmr_stop(&ps->perf); return rv; } ptmr_stop(&ps->perf); /* Log a backtrace if requested. */ if (cmd_ln_boolean_r(ps->config, "-backtrace")) { char const *uttid, *hyp; ps_seg_t *seg; int32 score; hyp = ps_get_hyp(ps, &score, &uttid); if (hyp != NULL) { E_INFO("%s: %s (%d)\n", uttid, hyp, score); E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", "word", "start", "end", "pprob", "ascr", "lscr", "lback"); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } } } return rv; }
static int write_ctm(FILE *fh, ps_decoder_t *ps, ps_seg_t *itor, char const *uttid, int32 frate) { logmath_t *lmath = ps_get_logmath(ps); char *dupid, *show, *channel, *c; double ustart = 0.0; /* We have semi-standardized on comma-separated uttids which * correspond to the fields of the STM file. So if there's a * comma in the uttid, take the first two fields as show and * channel, and also try to find the start time. */ show = dupid = ckd_salloc(uttid ? uttid : "(null)"); if ((c = strchr(dupid, ',')) != NULL) { *c++ = '\0'; channel = c; if ((c = strchr(c, ',')) != NULL) { *c++ = '\0'; if ((c = strchr(c, ',')) != NULL) { ustart = atof_c(c + 1); } } } else { channel = NULL; } while (itor) { int32 prob, sf, ef, wid; char const *w; /* Skip things that aren't "real words" (FIXME: currently * requires s3kr3t h34d3rz...) */ w = ps_seg_word(itor); wid = dict_wordid(ps->dict, w); if (wid >= 0 && dict_real_word(ps->dict, wid)) { prob = ps_seg_prob(itor, NULL, NULL, NULL); ps_seg_frames(itor, &sf, &ef); fprintf(fh, "%s %s %.2f %.2f %s %.3f\n", show, channel ? channel : "1", ustart + (double)sf / frate, (double)(ef - sf) / frate, /* FIXME: More s3kr3tz */ dict_basestr(ps->dict, wid), logmath_exp(lmath, prob)); } itor = ps_seg_next(itor); } ckd_free(dupid); return 0; }
static void print_word_times(int32 start) { ps_seg_t *iter = ps_seg_iter(ps, NULL); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames (iter, &sf, &ef); pprob = ps_seg_prob (iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); fprintf (stderr, "%s %f %f %f\n", ps_seg_word (iter), (sf + start) / 100.0, (ef + start) / 100.0, conf); iter = ps_seg_next (iter); } }
void EventHandlerSegmentConfidence::event(ps_decoder_t *decoder) { std::vector<std::pair<std::string,float> > segments; ps_seg_t* iter = ps_seg_iter( decoder, NULL ); while( iter != NULL ) { int32 prob = ps_seg_prob( iter, NULL, NULL, NULL ); segments.push_back( { std::string( ps_seg_word( iter ) ), logmath_exp( ps_get_logmath( decoder ), prob ) } ); iter = ps_seg_next( iter ); } if( ! segments.empty() ) mCb( segments ); }
void Recognizer::addModelJsgf(const std::string& key, const std::string& jsgfData, bool setActive) { // Create model: fsg_model_t* model = jsgf_read_string( jsgfData.c_str(), ps_get_logmath( mDecoder ), 7.5 ); // Verify model creation: if( model == NULL ) throw std::runtime_error( "Could not parse JSGF model" ); // Add entry: mModelMap[ key ] = ModelRef( new ModelFsg( model ) ); // Add model to decoder: ps_set_fsg( mDecoder, key.c_str(), model ); // Set active, if flagged: if( setActive ) ps_set_search( mDecoder, key.c_str() ); }
static void print_word_times() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate), ((float) ef / frame_rate), conf); iter = ps_seg_next(iter); } }
static int process_fsgctl_line(ps_decoder_t *ps, cmd_ln_t *config, char const *fname) { fsg_model_t *fsg; int err; char *path = NULL; const char *fsgdir = cmd_ln_str_r(config, "-fsgdir"); const char *fsgext = cmd_ln_str_r(config, "-fsgext"); if (fname == NULL) return 0; if (fsgdir) path = string_join(fsgdir, "/", fname, fsgext ? fsgext : "", NULL); else if (fsgext) path = string_join(fname, fsgext, NULL); else path = ckd_salloc(fname); fsg = fsg_model_readfile(path, ps_get_logmath(ps), cmd_ln_float32_r(config, "-lw")); err = 0; if (!fsg) { err = -1; goto error_out; } if (ps_set_fsg(ps, fname, fsg)) { err = -1; goto error_out; } E_INFO("Using FSG: %s\n", fname); if (ps_set_search(ps, fname)) err = -1; error_out: fsg_model_free(fsg); ckd_free(path); return err; }
void ofApp::process_result() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps, NULL); printf("\n\n"); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); //here is where we process the word new_utterance new_utt; new_utt.conf = conf; new_utt.sf = sf; new_utt.st = (float)sf / frame_rate; new_utt.ef = ef; new_utt.et = (float) ef / frame_rate; new_utt.utt = ps_seg_word(iter); printf("Recognised: %s %.3f %.3f %f\n", ps_seg_word(iter), new_utt.st, new_utt.et, new_utt.conf); std::string word = ps_seg_word(iter); result.push_back(new_utt); iter = ps_seg_next(iter); } printf("\n\n"); engineExit(); }
fsg_model_t * gst_sphinx_construct_fsg (GstSphinxSink *sink, GSList *phrases) { fsg_model_t *fsg; GSList *l, *word_list; gchar **words; int n_states, n_transitions, i, j; n_states = 2; /* Final and initial state */ word_list = NULL; for (l = phrases; l; l = l->next) { words = g_strsplit (l->data, " ", 0); word_list = g_slist_append (word_list, words); n_states += g_strv_length (words); } n_transitions = n_states - 2; fsg = fsg_model_init ("desktop-control", ps_get_logmath (sink->decoder), 10.0, n_states); fsg->start_state = 0; fsg->final_state = n_states - 1; for (i = 0, l = word_list; l; l = l->next) { words = l->data; int wid, from_state, to_state, next; for (j = 0; words[j] != NULL; j++, i++) { wid = fsg_model_word_add(fsg, words[j]); from_state = (j == 0) ? 0 : i + 1; to_state = (words[j+1] == NULL) ? n_states - 1 : i + 2; fsg_model_trans_add (fsg, from_state, to_state, 0, wid); } } for (l = word_list; l; l = l->next) g_strfreev (l->data); g_slist_free (word_list); return fsg; }
static void fsg_processor(context_t *ctx, srs_srec_utterance_t *utt, srs_srec_candidate_t *cands, srs_srec_candidate_t **sorted) { filter_buf_t *filtbuf; decoder_set_t *decset; decoder_t *dec; logmath_t *lmath; const char *uttid; int32_t score; double prob; srs_srec_candidate_t *cand; srs_srec_token_t *tkn; ps_lattice_t *dag; ps_latlink_t *lnk; ps_latnode_t *nod; const char *token; int32_t frlen; int32_t start, end; int16 fef, lef; if (!ctx || !(filtbuf = ctx->filtbuf) || !(decset = ctx->decset) || !(dec = decset->curdec)) return; frlen = filtbuf->frlen; lmath = ps_get_logmath(dec->ps); ps_get_hyp(dec->ps, &score, &uttid); prob = logmath_exp(lmath, score); cand = cands; cand->score = 1.0; cand->ntoken = 0; tkn = NULL; if ((dag = ps_get_lattice(dec->ps))) { if ((lnk = ps_lattice_traverse_edges(dag, NULL, NULL))) { ps_latlink_nodes(lnk, &nod); if (nod && (token = ps_latnode_word(dag, nod)) && *token != '<') { tkn = cand->tokens + cand->ntoken++; tkn->token = tknbase(token); tkn->start = ps_latnode_times(nod, &fef, &lef) * frlen; tkn->end = ((fef + lef) / 2) * frlen; } goto handle_destination_node; while ((lnk = ps_lattice_traverse_next(dag, NULL))) { handle_destination_node: nod = ps_latlink_nodes(lnk, NULL); if (nod && (token = ps_latnode_word(dag,nod)) && *token != '<') { start = ps_latnode_times(nod, &fef, &lef) * frlen; end = fef * frlen; if (tkn && start < (int32_t)tkn->end) break; /* just take one candidate */ if (!tkn || !tkneq(token, tkn->token)) { tkn = cand->tokens + cand->ntoken++; tkn->token = tknbase(token); tkn->start = start; tkn->end = end + frlen; } } } } } sorted[0] = cands; sorted[1] = NULL; utt->id = uttid; utt->score = prob < 0.00001 ? 0.00001 : prob; //utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0; utt->length = filtbuf->len; utt->ncand = 1; utt->cands = sorted; }
static void acoustic_processor(context_t *ctx, srs_srec_utterance_t *utt, srs_srec_candidate_t *cands, srs_srec_candidate_t **sorted) { filter_buf_t *filtbuf; decoder_set_t *decset; decoder_t *dec; logmath_t *lmath; const char *uttid; const char *hyp; int32 score; double prob; ps_nbest_t *nb; ps_seg_t *seg; int32_t frlen; int32 start, end; size_t ncand; srs_srec_candidate_t *cand; srs_srec_token_t *tkn; int32_t length; if (!ctx || !(filtbuf = ctx->filtbuf) || !(decset = ctx->decset) || !(dec = decset->curdec)) return; frlen = filtbuf->frlen; lmath = ps_get_logmath(dec->ps); uttid = "<unknown>"; hyp = ps_get_hyp(dec->ps, &score, &uttid); prob = logmath_exp(lmath, score); length = 0; if (prob < 0.00000001) prob = 0.00000001; for (nb = ps_nbest(dec->ps, 0,-1, NULL,NULL), ncand = 0; nb != NULL; nb = ps_nbest_next(nb)) { if (ncand >= CANDIDATE_MAX-1) { break; ps_nbest_free(nb); } if ((seg = ps_nbest_seg(nb, &score))) { while (seg && strcmp(ps_seg_word(seg), "<s>")) seg = ps_seg_next(seg); if (!seg) continue; ps_seg_frames(seg, &start, &end); cand = cands + ncand; cand->score = logmath_exp(lmath, score) / prob; cand->ntoken = 0; length = 0; while ((seg = ps_seg_next(seg))) { if ((hyp = ps_seg_word(seg))) { if (!strcmp(hyp, "</s>") || cand->ntoken >= CANDIDATE_TOKEN_MAX) { ncand++; //memset(cand+1, 0, sizeof(srs_srec_candidate_t)); ps_seg_frames(seg, &start, &end); ps_seg_free(seg); //printf("hyp=</s> ncand=%d\n", ncand); length = (end + 1) * frlen; break; } else if (!strcmp(hyp, "<sil>")) { ps_seg_frames(seg, &start, &end); //printf("hyp=<sil> skip it\n"); } else { tkn = cand->tokens + cand->ntoken++; tkn->token = tknbase(hyp); ps_seg_frames(seg, &start, &end); tkn->start = start * frlen; tkn->end = (end + 1) * frlen; //printf("hyp=%s (%d, %d) tkn count %d\n", // tkn->token, tkn->start,tkn->end, cand->ntoken); } } } /* while seg */ if (!seg && cand->ntoken > 0) { ncand++; cand->score *= 0.9; /* some penalty */ //memset(cand+1, 0, sizeof(srs_srec_candidate_t)); } if (!length) { tkn = cand->tokens + (cand->ntoken - 1); length = tkn->end; } } } /* for nb */ memset(cand+1, 0, sizeof(srs_srec_candidate_t)); utt->id = uttid; utt->score = prob; //utt->length = length; utt->length = filtbuf->len; utt->ncand = candidate_sort(cands, sorted); utt->cands = sorted; }
int main(int argc, char *argv[]) { ps_decoder_t *ps; cmd_ln_t *config; acmod_t *acmod; fsg_search_t *fsgs; ps_lattice_t *dag; ps_seg_t *seg; int32 score; TEST_ASSERT(config = cmd_ln_init(NULL, ps_args(), TRUE, "-hmm", DATADIR "/tidigits/hmm", "-fsg", DATADIR "/tidigits/lm/tidigits.fsg", "-dict", DATADIR "/tidigits/lm/tidigits.dic", "-bestpath", "no", "-input_endian", "little", "-samprate", "16000", NULL)); TEST_ASSERT(ps = ps_init(config)); fsgs = (fsg_search_t *)ps->search; acmod = ps->acmod; setbuf(stdout, NULL); { FILE *rawfh; int16 buf[2048]; size_t nread; int16 const *bptr; char const *hyp; int nfr; TEST_ASSERT(rawfh = fopen(DATADIR "/numbers.raw", "rb")); TEST_EQUAL(0, acmod_start_utt(acmod)); fsg_search_start(ps_search_base(fsgs)); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), 2048, rawfh); bptr = buf; while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0) { while (acmod->n_feat_frame > 0) { fsg_search_step(ps_search_base(fsgs), acmod->output_frame); acmod_advance(acmod); } } } fsg_search_finish(ps_search_base(fsgs)); hyp = fsg_search_hyp(ps_search_base(fsgs), &score, NULL); printf("FSG: %s (%d)\n", hyp, score); TEST_ASSERT(acmod_end_utt(acmod) >= 0); fclose(rawfh); } for (seg = ps_seg_iter(ps); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } /* Now get the DAG and play with it. */ dag = ps_get_lattice(ps); ps_lattice_write(dag, "test_fsg3.lat"); printf("BESTPATH: %s\n", ps_lattice_hyp(dag, ps_lattice_bestpath(dag, NULL, 1.0, 15.0))); ps_lattice_posterior(dag, NULL, 15.0); ps_free(ps); cmd_ln_free_r(config); return 0; }
int ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected) { ps_decoder_t *ps; mfcc_t **cepbuf; FILE *rawfh; int16 *buf; int16 const *bptr; size_t nread; size_t nsamps; int32 nfr, i, score, prob; char const *hyp; char const *uttid; double n_speech, n_cpu, n_wall; ps_seg_t *seg; TEST_ASSERT(ps = ps_init(config)); /* Test it first with pocketsphinx_decode_raw() */ TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); ps_decode_raw(ps, rawfh, "goforward", -1); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Test it with ps_process_raw() */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); TEST_EQUAL(0, ps_start_utt(ps, NULL)); nsamps = 2048; buf = ckd_calloc(nsamps, sizeof(*buf)); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), nsamps, rawfh); ps_process_raw(ps, buf, nread, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000000")); TEST_EQUAL(0, strcmp(hyp, expected)); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Now read the whole file and produce an MFCC buffer. */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf)); TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh)); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr); cepbuf = ckd_calloc_2d(nfr + 1, fe_get_output_size(ps->acmod->fe), sizeof(**cepbuf)); fe_start_utt(ps->acmod->fe); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr); fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i); /* Decode it with process_cep() */ TEST_EQUAL(0, ps_start_utt(ps, NULL)); for (i = 0; i < nfr; ++i) { ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000001")); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0 } ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall); printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); fclose(rawfh); ps_free(ps); cmd_ln_free_r(config); ckd_free_2d(cepbuf); ckd_free(buf); return 0; }