int ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected) { ps_decoder_t *ps; mfcc_t **cepbuf; FILE *rawfh; int16 *buf; int16 const *bptr; size_t nread; size_t nsamps; int32 nfr, i, score, prob; char const *hyp; char const *uttid; double n_speech, n_cpu, n_wall; ps_seg_t *seg; TEST_ASSERT(ps = ps_init(config)); /* Test it first with pocketsphinx_decode_raw() */ TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); ps_decode_raw(ps, rawfh, "goforward", -1); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Test it with ps_process_raw() */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); TEST_EQUAL(0, ps_start_utt(ps, NULL)); nsamps = 2048; buf = ckd_calloc(nsamps, sizeof(*buf)); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), nsamps, rawfh); ps_process_raw(ps, buf, nread, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000000")); TEST_EQUAL(0, strcmp(hyp, expected)); ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); /* Now read the whole file and produce an MFCC buffer. */ clearerr(rawfh); fseek(rawfh, 0, SEEK_END); nsamps = ftell(rawfh) / sizeof(*buf); fseek(rawfh, 0, SEEK_SET); bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf)); TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh)); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr); cepbuf = ckd_calloc_2d(nfr + 1, fe_get_output_size(ps->acmod->fe), sizeof(**cepbuf)); fe_start_utt(ps->acmod->fe); fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr); fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i); /* Decode it with process_cep() */ TEST_EQUAL(0, ps_start_utt(ps, NULL)); for (i = 0; i < nfr; ++i) { ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE); } TEST_EQUAL(0, ps_end_utt(ps)); hyp = ps_get_hyp(ps, &score, &uttid); prob = ps_get_prob(ps, &uttid); printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob); TEST_EQUAL(0, strcmp(uttid, "000000001")); TEST_EQUAL(0, strcmp(hyp, expected)); TEST_ASSERT(prob <= 0); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0 } ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall); printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); fclose(rawfh); ps_free(ps); cmd_ln_free_r(config); ckd_free_2d(cepbuf); ckd_free(buf); return 0; }
static void process_ctl(ps_decoder_t *ps, cmd_ln_t *config, FILE *ctlfh) { int32 ctloffset, ctlcount, ctlincr; int32 i; char *line; size_t len; FILE *hypfh = NULL, *hypsegfh = NULL, *ctmfh = NULL; FILE *mllrfh = NULL, *lmfh = NULL, *fsgfh = NULL; double n_speech, n_cpu, n_wall; char const *outlatdir; char const *nbestdir; char const *str; int frate; ctloffset = cmd_ln_int32_r(config, "-ctloffset"); ctlcount = cmd_ln_int32_r(config, "-ctlcount"); ctlincr = cmd_ln_int32_r(config, "-ctlincr"); outlatdir = cmd_ln_str_r(config, "-outlatdir"); nbestdir = cmd_ln_str_r(config, "-nbestdir"); frate = cmd_ln_int32_r(config, "-frate"); if ((str = cmd_ln_str_r(config, "-mllrctl"))) { mllrfh = fopen(str, "r"); if (mllrfh == NULL) { E_ERROR_SYSTEM("Failed to open MLLR control file file %s", str); goto done; } } if ((str = cmd_ln_str_r(config, "-fsgctl"))) { fsgfh = fopen(str, "r"); if (fsgfh == NULL) { E_ERROR_SYSTEM("Failed to open FSG control file file %s", str); goto done; } } if ((str = cmd_ln_str_r(config, "-lmnamectl"))) { lmfh = fopen(str, "r"); if (lmfh == NULL) { E_ERROR_SYSTEM("Failed to open LM name control file file %s", str); goto done; } } if ((str = cmd_ln_str_r(config, "-hyp"))) { hypfh = fopen(str, "w"); if (hypfh == NULL) { E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str); goto done; } setbuf(hypfh, NULL); } if ((str = cmd_ln_str_r(config, "-hypseg"))) { hypsegfh = fopen(str, "w"); if (hypsegfh == NULL) { E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str); goto done; } setbuf(hypsegfh, NULL); } if ((str = cmd_ln_str_r(config, "-ctm"))) { ctmfh = fopen(str, "w"); if (ctmfh == NULL) { E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str); goto done; } setbuf(ctmfh, NULL); } i = 0; while ((line = fread_line(ctlfh, &len))) { char *wptr[4]; int32 nf, sf, ef; char *mllrline = NULL, *lmline = NULL, *fsgline = NULL; char *fsgfile = NULL, *lmname = NULL, *mllrfile = NULL; if (mllrfh) { mllrline = fread_line(mllrfh, &len); if (mllrline == NULL) { E_ERROR("File size mismatch between control and MLLR control\n"); ckd_free(line); ckd_free(mllrline); goto done; } mllrfile = string_trim(mllrline, STRING_BOTH); } if (lmfh) { lmline = fread_line(lmfh, &len); if (lmline == NULL) { E_ERROR("File size mismatch between control and LM control\n"); ckd_free(line); ckd_free(lmline); goto done; } lmname = string_trim(lmline, STRING_BOTH); } if (fsgfh) { fsgline = fread_line(fsgfh, &len); if (fsgline == NULL) { E_ERROR("File size mismatch between control and FSG control\n"); ckd_free(line); ckd_free(fsgline); goto done; } fsgfile = string_trim(fsgline, STRING_BOTH); } if (i < ctloffset) { i += ctlincr; goto nextline; } if (ctlcount != -1 && i >= ctloffset + ctlcount) { goto nextline; } sf = 0; ef = -1; nf = str2words(line, wptr, 4); if (nf == 0) { /* Do nothing. */ } else if (nf < 0) { E_ERROR("Unexpected extra data in control file at line %d\n", i); } else { char const *hyp, *file, *uttid; int32 score; file = wptr[0]; uttid = NULL; if (nf > 1) sf = atoi(wptr[1]); if (nf > 2) ef = atoi(wptr[2]); if (nf > 3) uttid = wptr[3]; E_INFO("Decoding '%s'\n", uttid ? uttid : file); /* Do actual decoding. */ if(process_mllrctl_line(ps, config, mllrfile) < 0) continue; if(process_lmnamectl_line(ps, config, lmname) < 0) continue; if(process_fsgctl_line(ps, config, fsgfile) < 0) continue; if(process_ctl_line(ps, config, file, uttid, sf, ef) < 0) continue; hyp = ps_get_hyp(ps, &score, &uttid); /* Write out results and such. */ if (hypfh) { fprintf(hypfh, "%s (%s %d)\n", hyp ? hyp : "", uttid, score); } if (hypsegfh) { write_hypseg(hypsegfh, ps, uttid); } if (ctmfh) { ps_seg_t *itor = ps_seg_iter(ps, &score); write_ctm(ctmfh, ps, itor, uttid, frate); } if (outlatdir) { write_lattice(ps, outlatdir, uttid); } if (nbestdir) { write_nbest(ps, nbestdir, uttid); } ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall); E_INFO("%s: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", uttid, n_speech, n_cpu, n_wall); E_INFO("%s: %.2f xRT (CPU), %.2f xRT (elapsed)\n", uttid, n_cpu / n_speech, n_wall / n_speech); E_INFO_NOFN("%s (%s %d)\n", hyp ? hyp : "", uttid, score); } i += ctlincr; nextline: ckd_free(mllrline); ckd_free(fsgline); ckd_free(lmline); ckd_free(line); } ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall); E_INFO("TOTAL %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n", n_speech, n_cpu, n_wall); E_INFO("AVERAGE %.2f xRT (CPU), %.2f xRT (elapsed)\n", n_cpu / n_speech, n_wall / n_speech); done: if (hypfh) fclose(hypfh); if (hypsegfh) fclose(hypsegfh); if (ctmfh) fclose(ctmfh); }