Пример #1
0
int
ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected)
{
    ps_decoder_t *ps;
    mfcc_t **cepbuf;
    FILE *rawfh;
    int16 *buf;
    int16 const *bptr;
    size_t nread;
    size_t nsamps;
    int32 nfr, i, score, prob;
    char const *hyp;
    char const *uttid;
    double n_speech, n_cpu, n_wall;
    ps_seg_t *seg;

    TEST_ASSERT(ps = ps_init(config));
    /* Test it first with pocketsphinx_decode_raw() */
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    ps_decode_raw(ps, rawfh, "goforward", -1);
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Test it with ps_process_raw() */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    nsamps = 2048;
    buf = ckd_calloc(nsamps, sizeof(*buf));
    while (!feof(rawfh)) {
        nread = fread(buf, sizeof(*buf), nsamps, rawfh);
        ps_process_raw(ps, buf, nread, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000000"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Now read the whole file and produce an MFCC buffer. */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf));
    TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh));
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr);
    cepbuf = ckd_calloc_2d(nfr + 1,
                   fe_get_output_size(ps->acmod->fe),
                   sizeof(**cepbuf));
    fe_start_utt(ps->acmod->fe);
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr);
    fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i);

    /* Decode it with process_cep() */
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    for (i = 0; i < nfr; ++i) {
        ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000001"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    for (seg = ps_seg_iter(ps, &score); seg;
         seg = ps_seg_next(seg)) {
        char const *word;
        int sf, ef;
        int32 post, lscr, ascr, lback;

        word = ps_seg_word(seg);
        ps_seg_frames(seg, &sf, &ef);
        post = ps_seg_prob(seg, &ascr, &lscr, &lback);
        printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef,
               logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
        TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0
    }

    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);
    ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    fclose(rawfh);
    ps_free(ps);
    cmd_ln_free_r(config);
    ckd_free_2d(cepbuf);
    ckd_free(buf);

    return 0;
}
Пример #2
0
static void
process_ctl(ps_decoder_t *ps, cmd_ln_t *config, FILE *ctlfh)
{
    int32 ctloffset, ctlcount, ctlincr;
    int32 i;
    char *line;
    size_t len;
    FILE *hypfh = NULL, *hypsegfh = NULL, *ctmfh = NULL;
    FILE *mllrfh = NULL, *lmfh = NULL, *fsgfh = NULL;
    double n_speech, n_cpu, n_wall;
    char const *outlatdir;
    char const *nbestdir;
    char const *str;
    int frate;

    ctloffset = cmd_ln_int32_r(config, "-ctloffset");
    ctlcount = cmd_ln_int32_r(config, "-ctlcount");
    ctlincr = cmd_ln_int32_r(config, "-ctlincr");
    outlatdir = cmd_ln_str_r(config, "-outlatdir");
    nbestdir = cmd_ln_str_r(config, "-nbestdir");
    frate = cmd_ln_int32_r(config, "-frate");

    if ((str = cmd_ln_str_r(config, "-mllrctl"))) {
        mllrfh = fopen(str, "r");
        if (mllrfh == NULL) {
            E_ERROR_SYSTEM("Failed to open MLLR control file file %s", str);
            goto done;
        }
    }
    if ((str = cmd_ln_str_r(config, "-fsgctl"))) {
        fsgfh = fopen(str, "r");
        if (fsgfh == NULL) {
            E_ERROR_SYSTEM("Failed to open FSG control file file %s", str);
            goto done;
        }
    }
    if ((str = cmd_ln_str_r(config, "-lmnamectl"))) {
        lmfh = fopen(str, "r");
        if (lmfh == NULL) {
            E_ERROR_SYSTEM("Failed to open LM name control file file %s", str);
            goto done;
        }
    }
    if ((str = cmd_ln_str_r(config, "-hyp"))) {
        hypfh = fopen(str, "w");
        if (hypfh == NULL) {
            E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str);
            goto done;
        }
        setbuf(hypfh, NULL);
    }
    if ((str = cmd_ln_str_r(config, "-hypseg"))) {
        hypsegfh = fopen(str, "w");
        if (hypsegfh == NULL) {
            E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str);
            goto done;
        }
        setbuf(hypsegfh, NULL);
    }
    if ((str = cmd_ln_str_r(config, "-ctm"))) {
        ctmfh = fopen(str, "w");
        if (ctmfh == NULL) {
            E_ERROR_SYSTEM("Failed to open hypothesis file %s for writing", str);
            goto done;
        }
        setbuf(ctmfh, NULL);
    }

    i = 0;
    while ((line = fread_line(ctlfh, &len))) {
        char *wptr[4];
        int32 nf, sf, ef;
        char *mllrline = NULL, *lmline = NULL, *fsgline = NULL;
        char *fsgfile = NULL, *lmname = NULL, *mllrfile = NULL;

        if (mllrfh) {
            mllrline = fread_line(mllrfh, &len);
            if (mllrline == NULL) {
                E_ERROR("File size mismatch between control and MLLR control\n");
                ckd_free(line);
                ckd_free(mllrline);
                goto done;
            }
            mllrfile = string_trim(mllrline, STRING_BOTH);
        }
        if (lmfh) {
            lmline = fread_line(lmfh, &len);
            if (lmline == NULL) {
                E_ERROR("File size mismatch between control and LM control\n");
                ckd_free(line);
                ckd_free(lmline);
                goto done;
            }
            lmname = string_trim(lmline, STRING_BOTH);
        }
        if (fsgfh) {
            fsgline = fread_line(fsgfh, &len);
            if (fsgline == NULL) {
                E_ERROR("File size mismatch between control and FSG control\n");
                ckd_free(line);
                ckd_free(fsgline);
                goto done;
            }
            fsgfile = string_trim(fsgline, STRING_BOTH);
        }

        if (i < ctloffset) {
            i += ctlincr;
            goto nextline;
        }
        if (ctlcount != -1 && i >= ctloffset + ctlcount) {
            goto nextline;
        }

        sf = 0;
        ef = -1;
        nf = str2words(line, wptr, 4);
        if (nf == 0) {
            /* Do nothing. */
        }
        else if (nf < 0) {
            E_ERROR("Unexpected extra data in control file at line %d\n", i);
        }
        else {
            char const *hyp, *file, *uttid;
            int32 score;

            file = wptr[0];
            uttid = NULL;
            if (nf > 1)
                sf = atoi(wptr[1]);
            if (nf > 2)
                ef = atoi(wptr[2]);
            if (nf > 3)
                uttid = wptr[3];

            E_INFO("Decoding '%s'\n", uttid ? uttid : file);

            /* Do actual decoding. */
            if(process_mllrctl_line(ps, config, mllrfile) < 0)
                continue;
            if(process_lmnamectl_line(ps, config, lmname) < 0)
                continue;
            if(process_fsgctl_line(ps, config, fsgfile) < 0)
                continue;
            if(process_ctl_line(ps, config, file, uttid, sf, ef) < 0)
                continue;
            hyp = ps_get_hyp(ps, &score, &uttid);
            
            /* Write out results and such. */
            if (hypfh) {
                fprintf(hypfh, "%s (%s %d)\n", hyp ? hyp : "", uttid, score);
            }
            if (hypsegfh) {
                write_hypseg(hypsegfh, ps, uttid);
            }
            if (ctmfh) {
                ps_seg_t *itor = ps_seg_iter(ps, &score);
                write_ctm(ctmfh, ps, itor, uttid, frate);
            }
            if (outlatdir) {
                write_lattice(ps, outlatdir, uttid);
            }
            if (nbestdir) {
                write_nbest(ps, nbestdir, uttid);
            }
            ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
            E_INFO("%s: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
                   uttid, n_speech, n_cpu, n_wall);
            E_INFO("%s: %.2f xRT (CPU), %.2f xRT (elapsed)\n",
                   uttid, n_cpu / n_speech, n_wall / n_speech);
            E_INFO_NOFN("%s (%s %d)\n", hyp ? hyp : "", uttid, score);
        }
        i += ctlincr;
    nextline:
        ckd_free(mllrline);
        ckd_free(fsgline);
        ckd_free(lmline);
        ckd_free(line);
    }

    ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall);
    E_INFO("TOTAL %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    E_INFO("AVERAGE %.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

done:
    if (hypfh)
        fclose(hypfh);
    if (hypsegfh)
        fclose(hypsegfh);
    if (ctmfh)
        fclose(ctmfh);
}