void fsg_model_write(fsg_model_t * fsg, FILE * fp) { int32 i; fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL, fsg->name ? fsg->name : ""); fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state); fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state); fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state); for (i = 0; i < fsg->n_state; i++) { fsg_arciter_t *itor; for (itor = fsg_model_arcs(fsg, i); itor; itor = fsg_arciter_next(itor)) { fsg_link_t *tl = fsg_arciter_get(itor); fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL, tl->from_state, tl->to_state, logmath_exp(fsg->lmath, (int32) (tl->logs2prob / fsg->lw)), (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid)); } } fprintf(fp, "%s\n", FSG_MODEL_END_DECL); fflush(fp); }
std::tuple<std::string, double> TwitchStreamChunk::process(std::string body){ int pos = _uri.find_last_of('/'); std::string fileName = _uri.substr(pos + 1); std::ofstream file(fileName); file << body; file.flush(); file.close(); std::stringstream cmd; std::string audioFile = boost::filesystem::unique_path().native(); audioFile.append(".wav"); cmd << "ffmpeg -i " << fileName << " -vn -ac 1 " << audioFile << " > /dev/null 2>&1"; system(cmd.str().c_str()); FILE *aFile = fopen(audioFile.c_str(), "r"); ps_decode_raw(getDecoder(), aFile, -1); fclose(aFile); auto logarithm = ps_get_logmath(getDecoder()); int confidence = 1; const char * result = ps_get_hyp(getDecoder(), &confidence); double tmp = logmath_exp(logarithm, confidence); std::remove(fileName.c_str()); std::remove(audioFile.c_str()); return std::make_tuple(result == nullptr ? "" : std::string(result), tmp); }
static void evaluate_string(ngram_model_t *lm, logmath_t *lmath, const char *text) { char *textfoo; char **words; int32 n, ch, noovs, nccs, lscr; /* Split it into an array of strings. */ textfoo = ckd_salloc(text); n = str2words(textfoo, NULL, 0); if (n < 0) E_FATAL("str2words(textfoo, NULL, 0) = %d, should not happen\n", n); if (n == 0) /* Do nothing! */ return; words = ckd_calloc(n, sizeof(*words)); str2words(textfoo, words, n); ch = calc_entropy(lm, words, n, &nccs, &noovs, &lscr); printf("input: %s\n", text); printf("cross-entropy: %f bits\n", ch * log(logmath_get_base(lmath)) / log(2)); /* Calculate perplexity pplx = exp CH */ printf("perplexity: %f\n", logmath_exp(lmath, ch)); printf("lm score: %d\n", lscr); /* Report OOVs and CCs */ printf("%d words evaluated\n", n); printf("%d OOVs, %d context cues removed\n", noovs, nccs); ckd_free(textfoo); ckd_free(words); }
static int32 ngram_model_set_add_ug(ngram_model_t * base, int32 wid, int32 lweight) { ngram_model_set_t *set = (ngram_model_set_t *) base; int32 *newwid; int32 i, prob; /* At this point the word has already been added to the master model and we have a new word ID for it. Add it to active submodels and track the word IDs. */ newwid = ckd_calloc(set->n_models, sizeof(*newwid)); prob = base->log_zero; for (i = 0; i < set->n_models; ++i) { int32 wprob, n_hist; /* Only add to active models. */ if (set->cur == -1 || set->cur == i) { /* Did this word already exist? */ newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]); if (newwid[i] == NGRAM_INVALID_WID) { /* Add it to the submodel. */ newwid[i] = ngram_model_add_word(set->lms[i], base->word_str[wid], (float32) logmath_exp(base->lmath, lweight)); if (newwid[i] == NGRAM_INVALID_WID) { ckd_free(newwid); return base->log_zero; } } /* Now get the unigram probability for the new word and either * interpolate it or use it (if this is the current model). */ wprob = ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist); if (set->cur == i) prob = wprob; else if (set->cur == -1) prob = logmath_add(base->lmath, prob, set->lweights[i] + wprob); } else { newwid[i] = NGRAM_INVALID_WID; } } /* Okay we have the word IDs for this in all the submodels. Now do some complicated memory mangling to add this to the widmap. */ set->widmap = ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap)); set->widmap[0] = ckd_realloc(set->widmap[0], base->n_words * set->n_models * sizeof(**set->widmap)); for (i = 0; i < base->n_words; ++i) set->widmap[i] = set->widmap[0] + i * set->n_models; memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid)); ckd_free(newwid); return prob; }
int ps_end_utt(ps_decoder_t *ps) { int rv, i; acmod_end_utt(ps->acmod); /* Search any remaining frames. */ if ((rv = ps_search_forward(ps)) < 0) { ptmr_stop(&ps->perf); return rv; } /* Finish phone loop search. */ if (ps->phone_loop) { if ((rv = ps_search_finish(ps->phone_loop)) < 0) { ptmr_stop(&ps->perf); return rv; } } /* Search any frames remaining in the lookahead window. */ for (i = ps->acmod->output_frame - ps->pl_window; i < ps->acmod->output_frame; ++i) ps_search_step(ps->search, i); /* Finish main search. */ if ((rv = ps_search_finish(ps->search)) < 0) { ptmr_stop(&ps->perf); return rv; } ptmr_stop(&ps->perf); /* Log a backtrace if requested. */ if (cmd_ln_boolean_r(ps->config, "-backtrace")) { char const *uttid, *hyp; ps_seg_t *seg; int32 score; hyp = ps_get_hyp(ps, &score, &uttid); if (hyp != NULL) { E_INFO("%s: %s (%d)\n", uttid, hyp, score); E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", "word", "start", "end", "pprob", "ascr", "lscr", "lback"); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } } } return rv; }
ngram_model_t * ngram_model_set_remove(ngram_model_t * base, const char *name, int reuse_widmap) { ngram_model_set_t *set = (ngram_model_set_t *) base; ngram_model_t *submodel; int32 lmidx, scale, n, i; float32 fprob; for (lmidx = 0; lmidx < set->n_models; ++lmidx) if (0 == strcmp(name, set->names[lmidx])) break; if (lmidx == set->n_models) return NULL; submodel = set->lms[lmidx]; /* Renormalize the interpolation weights by scaling them by * 1/(1-fprob) */ fprob = (float32) logmath_exp(base->lmath, set->lweights[lmidx]); scale = logmath_log(base->lmath, 1.0 - fprob); /* Remove it from the array of lms, renormalize remaining weights, * and recalcluate n. */ --set->n_models; n = 0; ckd_free(set->names[lmidx]); set->names[lmidx] = NULL; for (i = 0; i < set->n_models; ++i) { if (i >= lmidx) { set->lms[i] = set->lms[i + 1]; set->names[i] = set->names[i + 1]; set->lweights[i] = set->lweights[i + 1]; } set->lweights[i] -= scale; if (set->lms[i]->n > n) n = set->lms[i]->n; } /* There's no need to shrink these arrays. */ set->lms[set->n_models] = NULL; set->lweights[set->n_models] = base->log_zero; /* No need to shrink maphist either. */ /* Reuse the existing word ID mapping if requested. */ if (reuse_widmap) { /* Just go through and shrink each row. */ for (i = 0; i < base->n_words; ++i) { memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1, (set->n_models - lmidx) * sizeof(**set->widmap)); } } else { build_widmap(base, base->lmath, n); } return submodel; }
static int write_ctm(FILE *fh, ps_decoder_t *ps, ps_seg_t *itor, char const *uttid, int32 frate) { logmath_t *lmath = ps_get_logmath(ps); char *dupid, *show, *channel, *c; double ustart = 0.0; /* We have semi-standardized on comma-separated uttids which * correspond to the fields of the STM file. So if there's a * comma in the uttid, take the first two fields as show and * channel, and also try to find the start time. */ show = dupid = ckd_salloc(uttid ? uttid : "(null)"); if ((c = strchr(dupid, ',')) != NULL) { *c++ = '\0'; channel = c; if ((c = strchr(c, ',')) != NULL) { *c++ = '\0'; if ((c = strchr(c, ',')) != NULL) { ustart = atof_c(c + 1); } } } else { channel = NULL; } while (itor) { int32 prob, sf, ef, wid; char const *w; /* Skip things that aren't "real words" (FIXME: currently * requires s3kr3t h34d3rz...) */ w = ps_seg_word(itor); wid = dict_wordid(ps->dict, w); if (wid >= 0 && dict_real_word(ps->dict, wid)) { prob = ps_seg_prob(itor, NULL, NULL, NULL); ps_seg_frames(itor, &sf, &ef); fprintf(fh, "%s %s %.2f %.2f %s %.3f\n", show, channel ? channel : "1", ustart + (double)sf / frate, (double)(ef - sf) / frate, /* FIXME: More s3kr3tz */ dict_basestr(ps->dict, wid), logmath_exp(lmath, prob)); } itor = ps_seg_next(itor); } ckd_free(dupid); return 0; }
static int32 lm3g_template_raw_score(ngram_model_t *base, int32 wid, int32 *history, int32 n_hist, int32 *n_used) { NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; int32 score; switch (n_hist) { case 0: /* Access mode: unigram */ *n_used = 1; /* Undo insertion penalty. */ score = model->lm3g.unigrams[wid].prob1.l - base->log_wip; /* Undo language weight. */ score = (int32)(score / base->lw); /* Undo unigram interpolation */ if (strcmp(base->word_str[wid], "<s>") != 0) { /* FIXME: configurable start_sym */ /* This operation is numerically unstable, so try to avoid it * as possible */ if (base->log_uniform + base->log_uniform_weight > logmath_get_zero(base->lmath)) { score = logmath_log(base->lmath, logmath_exp(base->lmath, score) - logmath_exp(base->lmath, base->log_uniform + base->log_uniform_weight)); } } return score; case 1: score = lm3g_bg_score(model, history[0], wid, n_used); break; case 2: default: /* Anything greater than 2 is the same as a trigram for now. */ score = lm3g_tg_score(model, history[1], history[0], wid, n_used); break; } /* FIXME (maybe): This doesn't undo unigram weighting in backoff cases. */ return (int32)((score - base->log_wip) / base->lw); }
static int test_add_nodes(ngram_trie_t *t, logmath_t *lmath) { ngram_trie_node_t *ng; int32 prob, n_used; ng = ngram_trie_ngram_init(t, "FOUR", "POINT", "ZERO", NULL); TEST_ASSERT(ng != NULL); ngram_trie_node_set_params(t, ng, -25776, -42); prob = ngram_trie_prob(t, &n_used, "FOUR", "POINT", "ZERO", NULL); printf("P(ZERO POINT FOUR) = %d = %g = %f\n", prob, logmath_exp(lmath, prob), logmath_log_to_log10(lmath, prob)); TEST_EQUAL_LOG(prob, -25776); return 0; }
static void print_word_times(int32 start) { ps_seg_t *iter = ps_seg_iter(ps, NULL); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames (iter, &sf, &ef); pprob = ps_seg_prob (iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); fprintf (stderr, "%s %f %f %f\n", ps_seg_word (iter), (sf + start) / 100.0, (ef + start) / 100.0, conf); iter = ps_seg_next (iter); } }
void EventHandlerSegmentConfidence::event(ps_decoder_t *decoder) { std::vector<std::pair<std::string,float> > segments; ps_seg_t* iter = ps_seg_iter( decoder, NULL ); while( iter != NULL ) { int32 prob = ps_seg_prob( iter, NULL, NULL, NULL ); segments.push_back( { std::string( ps_seg_word( iter ) ), logmath_exp( ps_get_logmath( decoder ), prob ) } ); iter = ps_seg_next( iter ); } if( ! segments.empty() ) mCb( segments ); }
static void print_word_times() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate), ((float) ef / frame_rate), conf); iter = ps_seg_next(iter); } }
void ofApp::process_result() { int frame_rate = cmd_ln_int32_r(config, "-frate"); ps_seg_t *iter = ps_seg_iter(ps, NULL); printf("\n\n"); while (iter != NULL) { int32 sf, ef, pprob; float conf; ps_seg_frames(iter, &sf, &ef); pprob = ps_seg_prob(iter, NULL, NULL, NULL); conf = logmath_exp(ps_get_logmath(ps), pprob); //here is where we process the word new_utterance new_utt; new_utt.conf = conf; new_utt.sf = sf; new_utt.st = (float)sf / frame_rate; new_utt.ef = ef; new_utt.et = (float) ef / frame_rate; new_utt.utt = ps_seg_word(iter); printf("Recognised: %s %.3f %.3f %f\n", ps_seg_word(iter), new_utt.st, new_utt.et, new_utt.conf); std::string word = ps_seg_word(iter); result.push_back(new_utt); iter = ps_seg_next(iter); } printf("\n\n"); engineExit(); }
static int test_lookups(ngram_trie_t *t, logmath_t *lmath) { int32 prob, n_used; prob = ngram_trie_prob(t, &n_used, "THREE", "POINT", "ZERO", NULL); printf("P(ZERO POINT THREE) = %d = %g = %f\n", prob, logmath_exp(lmath, prob), logmath_log_to_log10(lmath, prob)); TEST_EQUAL_LOG(prob, -25776); prob = ngram_trie_prob(t, &n_used, "THREE", "POINT", NULL); printf("P(POINT THREE) = %d = %g = %f\n", prob, logmath_exp(lmath, prob), logmath_log_to_log10(lmath, prob)); TEST_EQUAL_LOG(prob, -38960); prob = ngram_trie_prob(t, &n_used, "THREE", NULL); printf("P(THREE) = %d = %g = %f\n", prob, logmath_exp(lmath, prob), logmath_log_to_log10(lmath, prob)); TEST_EQUAL_LOG(prob, -69328); /* Test 3-gram probs with backoff. */ /* Backoff to 2-gram POINT FOUR + alpha(ZERO POINT) */ prob = ngram_trie_prob(t, &n_used, "FOUR", "POINT", "ZERO", NULL); printf("P(ZERO POINT FOUR) = %d = %g = %f\n", prob, logmath_exp(lmath, prob), logmath_log_to_log10(lmath, prob)); TEST_EQUAL_LOG(prob, -35600); /* Backoff to 2-gram SIX FOUR + alpha(ZERO) */ prob = ngram_trie_prob(t, &n_used, "FOUR", "SIX", "ZERO", NULL); printf("P(ZERO SIX FOUR) = %d = %g = %f\n", prob, logmath_exp(lmath, prob), logmath_log_to_log10(lmath, prob)); TEST_EQUAL_LOG(prob, -56608); /* Backoff to 1-gram FOUR + alpha(ZERO SEVEN) */ prob = ngram_trie_prob(t, &n_used, "FOUR", "SEVEN", "ZERO", NULL); printf("P(ZERO SEVEN FOUR) = %d = %g = %f\n", prob, logmath_exp(lmath, prob), logmath_log_to_log10(lmath, prob)); TEST_EQUAL_LOG(prob, -76496); return 0; }
void run_tests(logmath_t *lmath, ngram_model_t *model) { int32 rv, i; TEST_ASSERT(model); TEST_EQUAL(ngram_wid(model, "scylla"), 285); TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0); rv = ngram_model_read_classdef(model, LMDIR "/100.probdef"); TEST_EQUAL(rv, 0); /* Verify that class word IDs remain the same. */ TEST_EQUAL(ngram_wid(model, "scylla"), 285); TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0); /* Verify in-class word IDs. */ TEST_EQUAL(ngram_wid(model, "scylla:scylla"), 0x80000000 | 400); /* Verify in-class and out-class unigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4)); TEST_EQUAL_LOG(ngram_score(model, "scooby:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1)); TEST_EQUAL_LOG(ngram_score(model, "scylla", NULL), logmath_log10_to_log(lmath, -2.7884)); TEST_EQUAL_LOG(ngram_score(model, "oh:zero", NULL), logmath_log10_to_log(lmath, -1.9038) + logmath_log(lmath, 0.7)); TEST_EQUAL_LOG(ngram_score(model, "zero", NULL), logmath_log10_to_log(lmath, -1.9038)); /* Verify class bigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "scylla", "on", NULL), logmath_log10_to_log(lmath, -1.2642)); TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", "on", NULL), logmath_log10_to_log(lmath, -1.2642) + logmath_log(lmath, 0.4)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "karybdis:scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "scooby:scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); /* Verify class trigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "zero", "be", "will", NULL), logmath_log10_to_log(lmath, -0.5725)); TEST_EQUAL_LOG(ngram_score(model, "oh:zero", "be", "will", NULL), logmath_log10_to_log(lmath, -0.5725) + logmath_log(lmath, 0.7)); TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero", NULL), logmath_log10_to_log(lmath, -0.9404)); TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero:zero", NULL), logmath_log10_to_log(lmath, -0.9404)); /* Add words to classes. */ rv = ngram_model_add_class_word(model, "scylla", "scrappy:scylla", 1.0); TEST_ASSERT(rv >= 0); TEST_EQUAL(ngram_wid(model, "scrappy:scylla"), 0x80000196); TEST_EQUAL_LOG(ngram_score(model, "scrappy:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.2)); printf("scrappy:scylla %08x %d %f\n", ngram_wid(model, "scrappy:scylla"), ngram_score(model, "scrappy:scylla", NULL), logmath_exp(lmath, ngram_score(model, "scrappy:scylla", NULL))); /* Add a lot of words to a class. */ for (i = 0; i < 129; ++i) { char word[32]; sprintf(word, "%d:scylla", i); rv = ngram_model_add_class_word(model, "scylla", word, 1.0); printf("%s %08x %d %f\n", word, ngram_wid(model, word), ngram_score(model, word, NULL), logmath_exp(lmath, ngram_score(model, word, NULL))); TEST_ASSERT(rv >= 0); TEST_EQUAL(ngram_wid(model, word), 0x80000197 + i); } /* Add a new class. */ { const char *words[] = { "blatz:foobie", "hurf:foobie" }; float32 weights[] = { 0.6, 0.4 }; int32 foobie_prob; rv = ngram_model_add_class(model, "[foobie]", 1.0, words, weights, 2); TEST_ASSERT(rv >= 0); foobie_prob = ngram_score(model, "[foobie]", NULL); TEST_EQUAL_LOG(ngram_score(model, "blatz:foobie", NULL), foobie_prob + logmath_log(lmath, 0.6)); TEST_EQUAL_LOG(ngram_score(model, "hurf:foobie", NULL), foobie_prob + logmath_log(lmath, 0.4)); } }
int main(int argc, char *argv[]) { logmath_t *lmath; int32 rv; lmath = logmath_init(1.0001, 0, 1); TEST_ASSERT(lmath); printf("log(1e-150) = %d\n", logmath_log(lmath, 1e-150)); TEST_EQUAL_LOG(logmath_log(lmath, 1e-150), -3454050); printf("exp(log(1e-150)) = %e\n",logmath_exp(lmath, logmath_log(lmath, 1e-150))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 1e-150)), 1e-150); printf("log(1e-48) = %d\n", logmath_log(lmath, 1e-48)); printf("exp(log(1e-48)) = %e\n",logmath_exp(lmath, logmath_log(lmath, 1e-48))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 1e-48)), 1e-48); printf("log(42) = %d\n", logmath_log(lmath, 42)); TEST_EQUAL_LOG(logmath_log(lmath, 42), 37378); printf("exp(log(42)) = %f\n",logmath_exp(lmath, logmath_log(lmath, 42))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 42)), 42); printf("log(1e-3 + 5e-3) = %d l+ %d = %d\n", logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3), logmath_add(lmath, logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3))); printf("log(1e-3 + 5e-3) = %e + %e = %e\n", logmath_exp(lmath, logmath_log(lmath, 1e-3)), logmath_exp(lmath, logmath_log(lmath, 5e-3)), logmath_exp(lmath, logmath_add(lmath, logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3)))); TEST_EQUAL_LOG(logmath_add(lmath, logmath_log(lmath, 1e-48), logmath_log(lmath, 5e-48)), logmath_log(lmath, 6e-48)); TEST_EQUAL_LOG(logmath_add(lmath, logmath_log(lmath, 1e-48), logmath_log(lmath, 42)), logmath_log(lmath, 42)); rv = logmath_write(lmath, "tmp.logadd"); TEST_EQUAL(rv, 0); logmath_free(lmath); lmath = logmath_read("tmp.logadd"); TEST_ASSERT(lmath); printf("log(1e-150) = %d\n", logmath_log(lmath, 1e-150)); TEST_EQUAL_LOG(logmath_log(lmath, 1e-150), -3454050); printf("exp(log(1e-150)) = %e\n",logmath_exp(lmath, logmath_log(lmath, 1e-150))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 1e-150)), 1e-150); printf("log(1e-48) = %d\n", logmath_log(lmath, 1e-48)); printf("exp(log(1e-48)) = %e\n",logmath_exp(lmath, logmath_log(lmath, 1e-48))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 1e-48)), 1e-48); printf("log(42) = %d\n", logmath_log(lmath, 42)); TEST_EQUAL_LOG(logmath_log(lmath, 42), 37378); printf("exp(log(42)) = %f\n",logmath_exp(lmath, logmath_log(lmath, 42))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 42)), 41.99); printf("log(1e-3 + 5e-3) = %d l+ %d = %d\n", logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3), logmath_add(lmath, logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3))); printf("log(1e-3 + 5e-3) = %e + %e = %e\n", logmath_exp(lmath, logmath_log(lmath, 1e-3)), logmath_exp(lmath, logmath_log(lmath, 5e-3)), logmath_exp(lmath, logmath_add(lmath, logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3)))); TEST_EQUAL_LOG(logmath_add(lmath, logmath_log(lmath, 1e-48), logmath_log(lmath, 5e-48)), logmath_log(lmath, 6e-48)); TEST_EQUAL_LOG(logmath_add(lmath, logmath_log(lmath, 1e-48), logmath_log(lmath, 42)), logmath_log(lmath, 42)); return 0; }
int main(int argc, char *argv[]) { ps_decoder_t *ps; cmd_ln_t *config; acmod_t *acmod; fsg_search_t *fsgs; ps_lattice_t *dag; ps_seg_t *seg; int32 score; TEST_ASSERT(config = cmd_ln_init(NULL, ps_args(), TRUE, "-hmm", DATADIR "/tidigits/hmm", "-fsg", DATADIR "/tidigits/lm/tidigits.fsg", "-dict", DATADIR "/tidigits/lm/tidigits.dic", "-bestpath", "no", "-input_endian", "little", "-samprate", "16000", NULL)); TEST_ASSERT(ps = ps_init(config)); fsgs = (fsg_search_t *)ps->search; acmod = ps->acmod; setbuf(stdout, NULL); { FILE *rawfh; int16 buf[2048]; size_t nread; int16 const *bptr; char const *hyp; int nfr; TEST_ASSERT(rawfh = fopen(DATADIR "/numbers.raw", "rb")); TEST_EQUAL(0, acmod_start_utt(acmod)); fsg_search_start(ps_search_base(fsgs)); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), 2048, rawfh); bptr = buf; while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0) { while (acmod->n_feat_frame > 0) { fsg_search_step(ps_search_base(fsgs), acmod->output_frame); acmod_advance(acmod); } } } fsg_search_finish(ps_search_base(fsgs)); hyp = fsg_search_hyp(ps_search_base(fsgs), &score, NULL); printf("FSG: %s (%d)\n", hyp, score); TEST_ASSERT(acmod_end_utt(acmod) >= 0); fclose(rawfh); } for (seg = ps_seg_iter(ps); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } /* Now get the DAG and play with it. */ dag = ps_get_lattice(ps); ps_lattice_write(dag, "test_fsg3.lat"); printf("BESTPATH: %s\n", ps_lattice_hyp(dag, ps_lattice_bestpath(dag, NULL, 1.0, 15.0))); ps_lattice_posterior(dag, NULL, 15.0); ps_free(ps); cmd_ln_free_r(config); return 0; }
int ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected) { ps_decoder_t *ps; mfcc_t **cepbuf; FILE *rawfh; int16 *buf; int16 const *bptr; size_t nread; size_t nsamps; int32 nfr, i, score, prob; char const *hyp; char const *uttid; double n_speech, n_cpu, n_wall; ps_seg_t *seg; TEST_ASSERT(ps = ps_init(config)); /* Test it first with pocketsphinx_decode_raw() */ TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); ps_decode_raw(ps, rawfh, "goforward", -1); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Test it with ps_process_raw() */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); TEST_EQUAL(0, ps_start_utt(ps, NULL)); nsamps = 2048; buf = ckd_calloc(nsamps, sizeof(*buf)); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), nsamps, rawfh); ps_process_raw(ps, buf, nread, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000000")); TEST_EQUAL(0, strcmp(hyp, expected)); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Now read the whole file and produce an MFCC buffer. */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf)); TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh)); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr); cepbuf = ckd_calloc_2d(nfr + 1, fe_get_output_size(ps->acmod->fe), sizeof(**cepbuf)); fe_start_utt(ps->acmod->fe); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr); fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i); /* Decode it with process_cep() */ TEST_EQUAL(0, ps_start_utt(ps, NULL)); for (i = 0; i < nfr; ++i) { ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000001")); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0 } ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall); printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); fclose(rawfh); ps_free(ps); cmd_ln_free_r(config); ckd_free_2d(cepbuf); ckd_free(buf); return 0; }
static void acoustic_processor(context_t *ctx, srs_srec_utterance_t *utt, srs_srec_candidate_t *cands, srs_srec_candidate_t **sorted) { filter_buf_t *filtbuf; decoder_set_t *decset; decoder_t *dec; logmath_t *lmath; const char *uttid; const char *hyp; int32 score; double prob; ps_nbest_t *nb; ps_seg_t *seg; int32_t frlen; int32 start, end; size_t ncand; srs_srec_candidate_t *cand; srs_srec_token_t *tkn; int32_t length; if (!ctx || !(filtbuf = ctx->filtbuf) || !(decset = ctx->decset) || !(dec = decset->curdec)) return; frlen = filtbuf->frlen; lmath = ps_get_logmath(dec->ps); uttid = "<unknown>"; hyp = ps_get_hyp(dec->ps, &score, &uttid); prob = logmath_exp(lmath, score); length = 0; if (prob < 0.00000001) prob = 0.00000001; for (nb = ps_nbest(dec->ps, 0,-1, NULL,NULL), ncand = 0; nb != NULL; nb = ps_nbest_next(nb)) { if (ncand >= CANDIDATE_MAX-1) { break; ps_nbest_free(nb); } if ((seg = ps_nbest_seg(nb, &score))) { while (seg && strcmp(ps_seg_word(seg), "<s>")) seg = ps_seg_next(seg); if (!seg) continue; ps_seg_frames(seg, &start, &end); cand = cands + ncand; cand->score = logmath_exp(lmath, score) / prob; cand->ntoken = 0; length = 0; while ((seg = ps_seg_next(seg))) { if ((hyp = ps_seg_word(seg))) { if (!strcmp(hyp, "</s>") || cand->ntoken >= CANDIDATE_TOKEN_MAX) { ncand++; //memset(cand+1, 0, sizeof(srs_srec_candidate_t)); ps_seg_frames(seg, &start, &end); ps_seg_free(seg); //printf("hyp=</s> ncand=%d\n", ncand); length = (end + 1) * frlen; break; } else if (!strcmp(hyp, "<sil>")) { ps_seg_frames(seg, &start, &end); //printf("hyp=<sil> skip it\n"); } else { tkn = cand->tokens + cand->ntoken++; tkn->token = tknbase(hyp); ps_seg_frames(seg, &start, &end); tkn->start = start * frlen; tkn->end = (end + 1) * frlen; //printf("hyp=%s (%d, %d) tkn count %d\n", // tkn->token, tkn->start,tkn->end, cand->ntoken); } } } /* while seg */ if (!seg && cand->ntoken > 0) { ncand++; cand->score *= 0.9; /* some penalty */ //memset(cand+1, 0, sizeof(srs_srec_candidate_t)); } if (!length) { tkn = cand->tokens + (cand->ntoken - 1); length = tkn->end; } } } /* for nb */ memset(cand+1, 0, sizeof(srs_srec_candidate_t)); utt->id = uttid; utt->score = prob; //utt->length = length; utt->length = filtbuf->len; utt->ncand = candidate_sort(cands, sorted); utt->cands = sorted; }
static void fsg_processor(context_t *ctx, srs_srec_utterance_t *utt, srs_srec_candidate_t *cands, srs_srec_candidate_t **sorted) { filter_buf_t *filtbuf; decoder_set_t *decset; decoder_t *dec; logmath_t *lmath; const char *uttid; int32_t score; double prob; srs_srec_candidate_t *cand; srs_srec_token_t *tkn; ps_lattice_t *dag; ps_latlink_t *lnk; ps_latnode_t *nod; const char *token; int32_t frlen; int32_t start, end; int16 fef, lef; if (!ctx || !(filtbuf = ctx->filtbuf) || !(decset = ctx->decset) || !(dec = decset->curdec)) return; frlen = filtbuf->frlen; lmath = ps_get_logmath(dec->ps); ps_get_hyp(dec->ps, &score, &uttid); prob = logmath_exp(lmath, score); cand = cands; cand->score = 1.0; cand->ntoken = 0; tkn = NULL; if ((dag = ps_get_lattice(dec->ps))) { if ((lnk = ps_lattice_traverse_edges(dag, NULL, NULL))) { ps_latlink_nodes(lnk, &nod); if (nod && (token = ps_latnode_word(dag, nod)) && *token != '<') { tkn = cand->tokens + cand->ntoken++; tkn->token = tknbase(token); tkn->start = ps_latnode_times(nod, &fef, &lef) * frlen; tkn->end = ((fef + lef) / 2) * frlen; } goto handle_destination_node; while ((lnk = ps_lattice_traverse_next(dag, NULL))) { handle_destination_node: nod = ps_latlink_nodes(lnk, NULL); if (nod && (token = ps_latnode_word(dag,nod)) && *token != '<') { start = ps_latnode_times(nod, &fef, &lef) * frlen; end = fef * frlen; if (tkn && start < (int32_t)tkn->end) break; /* just take one candidate */ if (!tkn || !tkneq(token, tkn->token)) { tkn = cand->tokens + cand->ntoken++; tkn->token = tknbase(token); tkn->start = start; tkn->end = end + frlen; } } } } } sorted[0] = cands; sorted[1] = NULL; utt->id = uttid; utt->score = prob < 0.00001 ? 0.00001 : prob; //utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0; utt->length = filtbuf->len; utt->ncand = 1; utt->cands = sorted; }
int test_decode(ps_decoder_t *ps) { FILE *rawfh; int16 buf[2048]; size_t nread; int16 const *bptr; int nfr; ps_lattice_t *dag; acmod_t *acmod; ngram_search_t *ngs; int i, j; ps_latlink_t *link; ps_latnode_t *node; latlink_list_t *x; int32 norm, post; ngs = (ngram_search_t *)ps->search; acmod = ps->acmod; /* Decode stuff and build a DAG. */ TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); TEST_EQUAL(0, acmod_start_utt(acmod)); ngram_fwdtree_start(ngs); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), 2048, rawfh); bptr = buf; while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0) { while (acmod->n_feat_frame > 0) { ngram_fwdtree_search(ngs, acmod->output_frame); acmod_advance(acmod); } } } ngram_fwdtree_finish(ngs); printf("FWDTREE: %s\n", ngram_search_bp_hyp(ngs, ngram_search_find_exit(ngs, -1, NULL, NULL))); TEST_ASSERT(acmod_end_utt(acmod) >= 0); fclose(rawfh); dag = ngram_search_lattice(ps->search); if (dag == NULL) { E_ERROR("Failed to build DAG!\n"); return -1; } /* Write lattice to disk. */ TEST_EQUAL(0, ps_lattice_write(dag, "test_posterior.lat")); /* Do a bunch of checks on the DAG generation and traversal code: */ /* Verify that forward and backward iteration give the same number of edges. */ i = j = 0; for (link = ps_lattice_traverse_edges(dag, NULL, NULL); link; link = ps_lattice_traverse_next(dag, NULL)) { ++i; } for (link = ps_lattice_reverse_edges(dag, NULL, NULL); link; link = ps_lattice_reverse_next(dag, NULL)) { ++j; } printf("%d forward edges, %d reverse edges\n", i, j); TEST_EQUAL(i,j); /* Verify that the same links are reachable via entries and exits. */ for (node = dag->nodes; node; node = node->next) { for (x = node->exits; x; x = x->next) x->link->alpha = -42; } for (node = dag->nodes; node; node = node->next) { for (x = node->entries; x; x = x->next) TEST_EQUAL(x->link->alpha, -42); } /* Verify that forward iteration is properly ordered. */ for (link = ps_lattice_traverse_edges(dag, NULL, NULL); link; link = ps_lattice_traverse_next(dag, NULL)) { link->alpha = 0; for (x = link->from->entries; x; x = x->next) { TEST_EQUAL(x->link->alpha, 0); } } /* Verify that backward iteration is properly ordered. */ for (node = dag->nodes; node; node = node->next) { for (x = node->exits; x; x = x->next) x->link->alpha = -42; } for (link = ps_lattice_reverse_edges(dag, NULL, NULL); link; link = ps_lattice_reverse_next(dag, NULL)) { link->alpha = 0; for (x = link->to->exits; x; x = x->next) { TEST_EQUAL(x->link->alpha, 0); } } /* Find and print best path. */ link = ps_lattice_bestpath(dag, ngs->lmset, 1.0, 1.0/20.0); printf("BESTPATH: %s\n", ps_lattice_hyp(dag, link)); /* Calculate betas. */ post = ps_lattice_posterior(dag, ngs->lmset, 1.0/20.0); printf("Best path score: %d\n", link->path_scr + dag->final_node_ascr); printf("P(S|O) = %d\n", post); /* Verify that sum of final alphas and initial alphas+betas is * sufficiently similar. */ norm = logmath_get_zero(acmod->lmath); for (x = dag->start->exits; x; x = x->next) norm = logmath_add(acmod->lmath, norm, x->link->beta + x->link->alpha); E_INFO("Sum of final alphas+betas = %d\n", dag->norm); E_INFO("Sum of initial alphas+betas = %d\n", norm); TEST_EQUAL_LOG(dag->norm, norm); /* Print posterior probabilities for each link in best path. */ while (link) { printf("P(%s,%d) = %d = %f\n", dict_wordstr(ps->search->dict, link->from->wid), link->ef, link->alpha + link->beta - dag->norm, logmath_exp(acmod->lmath, link->alpha + link->beta - dag->norm)); link = link->best_prev; } return 0; }