Beispiel #1
0
static int
write_hypseg(FILE *fh, ps_decoder_t *ps, char const *uttid)
{
    int32 score, lscr, sf, ef;
    ps_seg_t *itor = ps_seg_iter(ps, &score);

    /* Accumulate language model scores. */
    lscr = 0;
    while (itor) {
        int32 ascr, wlscr;
        ps_seg_prob(itor, &ascr, &wlscr, NULL);
        lscr += wlscr;
        itor = ps_seg_next(itor);
    }
    fprintf(fh, "%s S %d T %d A %d L %d", uttid,
            0, /* "scaling factor" which is mostly useless anyway */
            score, score - lscr, lscr);
    /* Now print out words. */
    itor = ps_seg_iter(ps, &score);
    while (itor) {
        char const *w = ps_seg_word(itor);
        int32 ascr, wlscr;

        ps_seg_prob(itor, &ascr, &wlscr, NULL);
        ps_seg_frames(itor, &sf, &ef);
        fprintf(fh, " %d %d %d %s", sf, ascr, wlscr, w);
        itor = ps_seg_next(itor);
    }
    fprintf(fh, " %d\n", ef);

    return 0;
}
int
ps_end_utt(ps_decoder_t *ps)
{
    int rv, i;

    acmod_end_utt(ps->acmod);

    /* Search any remaining frames. */
    if ((rv = ps_search_forward(ps)) < 0) {
        ptmr_stop(&ps->perf);
        return rv;
    }
    /* Finish phone loop search. */
    if (ps->phone_loop) {
        if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
            ptmr_stop(&ps->perf);
            return rv;
        }
    }
    /* Search any frames remaining in the lookahead window. */
    for (i = ps->acmod->output_frame - ps->pl_window;
         i < ps->acmod->output_frame; ++i)
        ps_search_step(ps->search, i);
    /* Finish main search. */
    if ((rv = ps_search_finish(ps->search)) < 0) {
        ptmr_stop(&ps->perf);
        return rv;
    }
    ptmr_stop(&ps->perf);

    /* Log a backtrace if requested. */
    if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
        char const *uttid, *hyp;
        ps_seg_t *seg;
        int32 score;

        hyp = ps_get_hyp(ps, &score, &uttid);
        
        if (hyp != NULL) {
    	    E_INFO("%s: %s (%d)\n", uttid, hyp, score);
    	    E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
                    "word", "start", "end", "pprob", "ascr", "lscr", "lback");
    	    for (seg = ps_seg_iter(ps, &score); seg;
        	 seg = ps_seg_next(seg)) {
    	        char const *word;
        	int sf, ef;
        	int32 post, lscr, ascr, lback;

        	word = ps_seg_word(seg);
        	ps_seg_frames(seg, &sf, &ef);
        	post = ps_seg_prob(seg, &ascr, &lscr, &lback);
        	E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
                    	    word, sf, ef, logmath_exp(ps_get_logmath(ps), post),
                    	ascr, lscr, lback);
    	    }
        }
    }
    return rv;
}
Beispiel #3
0
static int
write_ctm(FILE *fh, ps_decoder_t *ps, ps_seg_t *itor, char const *uttid, int32 frate)
{
    logmath_t *lmath = ps_get_logmath(ps);
    char *dupid, *show, *channel, *c;
    double ustart = 0.0;

    /* We have semi-standardized on comma-separated uttids which
     * correspond to the fields of the STM file.  So if there's a
     * comma in the uttid, take the first two fields as show and
     * channel, and also try to find the start time. */
    show = dupid = ckd_salloc(uttid ? uttid : "(null)");
    if ((c = strchr(dupid, ',')) != NULL) {
        *c++ = '\0';
        channel = c;
        if ((c = strchr(c, ',')) != NULL) {
            *c++ = '\0';
            if ((c = strchr(c, ',')) != NULL) {
                ustart = atof_c(c + 1);
            }
        }
    }
    else {
        channel = NULL;
    }

    while (itor) {
        int32 prob, sf, ef, wid;
        char const *w;

        /* Skip things that aren't "real words" (FIXME: currently
         * requires s3kr3t h34d3rz...) */
        w = ps_seg_word(itor);
        wid = dict_wordid(ps->dict, w);
        if (wid >= 0 && dict_real_word(ps->dict, wid)) {
            prob = ps_seg_prob(itor, NULL, NULL, NULL);
            ps_seg_frames(itor, &sf, &ef);
        
            fprintf(fh, "%s %s %.2f %.2f %s %.3f\n",
                    show,
                    channel ? channel : "1",
                    ustart + (double)sf / frate,
                    (double)(ef - sf) / frate,
                    /* FIXME: More s3kr3tz */
                    dict_basestr(ps->dict, wid),
                    logmath_exp(lmath, prob));
        }
        itor = ps_seg_next(itor);
    }
    ckd_free(dupid);

    return 0;
}
Beispiel #4
0
/*
 * Continuous recognition from mic
 */
int
recognize_from_mic()
{
	ad_rec_t *ad;
    int16 adbuf[2048];
    const char *fname;
	const char* seg;
    int32 k;
	char str[1000]="";
    uint8 utt_started, in_speech;
	
    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),16000)) == NULL)
		perror("Failed to open audio device\n");
	if (ad_start_rec(ad) < 0)
		perror("Failed to start recording\n");
    
    ps_start_utt(ps);
    utt_started = FALSE;
	ps_seg_t *psegt;
    while (!finished) {
		if ((k = ad_read(ad, adbuf, 2048)) < 0)
			perror("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
			psegt = ps_seg_iter(ps, NULL);
			while (psegt!=NULL){
				seg = ps_seg_word(psegt);
				strncpy_s( str, seg, strlen(seg));
				listenCallback(str);
				printf("%s\n", seg);
				int prob = ps_seg_prob(psegt,NULL,NULL,NULL);
				printf("%d\n", prob);
				psegt = ps_seg_next(psegt);
			}
            ps_start_utt(ps);
            utt_started = FALSE;
        }
		Sleep(100);
    }
	
    ps_end_utt(ps);
    fclose(rawfd);
	return 0;
}
Beispiel #5
0
static void
print_word_times(int32 start)
{
	ps_seg_t *iter = ps_seg_iter(ps, NULL);
	while (iter != NULL) {
		int32 sf, ef, pprob;
		float conf;

		ps_seg_frames (iter, &sf, &ef);
		pprob = ps_seg_prob (iter, NULL, NULL, NULL);
		conf = logmath_exp(ps_get_logmath(ps), pprob);
		fprintf (stderr, "%s %f %f %f\n", ps_seg_word (iter), (sf + start) / 100.0, (ef + start) / 100.0, conf);
		iter = ps_seg_next (iter);
	}
}
Beispiel #6
0
void EventHandlerSegmentConfidence::event(ps_decoder_t *decoder)
{
    std::vector<std::pair<std::string,float> > segments;

    ps_seg_t* iter = ps_seg_iter( decoder, NULL );

    while( iter != NULL ) {
        int32 prob = ps_seg_prob( iter, NULL, NULL, NULL );
        segments.push_back( { std::string( ps_seg_word( iter ) ), logmath_exp( ps_get_logmath( decoder ), prob ) } );
        iter = ps_seg_next( iter );
    }

    if( ! segments.empty() )
        mCb( segments );
}
Beispiel #7
0
static void
print_word_times()
{
    int frame_rate = cmd_ln_int32_r(config, "-frate");
    ps_seg_t *iter = ps_seg_iter(ps);
    while (iter != NULL) {
        int32 sf, ef, pprob;
        float conf;

        ps_seg_frames(iter, &sf, &ef);
        pprob = ps_seg_prob(iter, NULL, NULL, NULL);
        conf = logmath_exp(ps_get_logmath(ps), pprob);
        printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
               ((float) ef / frame_rate), conf);
        iter = ps_seg_next(iter);
    }
}
Beispiel #8
0
void ofApp::process_result()
{
    int frame_rate = cmd_ln_int32_r(config, "-frate");
    ps_seg_t *iter = ps_seg_iter(ps, NULL);
    printf("\n\n");
    
    while (iter != NULL)
    {
        int32 sf, ef, pprob;
        float conf;
        
        ps_seg_frames(iter, &sf, &ef);
        pprob = ps_seg_prob(iter, NULL, NULL, NULL);
        conf = logmath_exp(ps_get_logmath(ps), pprob);
        
        //here is where we process the word
        
        new_utterance new_utt;
        new_utt.conf = conf;
        new_utt.sf = sf;
        new_utt.st = (float)sf / frame_rate;
        new_utt.ef = ef;
        new_utt.et = (float) ef / frame_rate;
        new_utt.utt = ps_seg_word(iter);
        
        printf("Recognised: %s %.3f %.3f %f\n", ps_seg_word(iter), new_utt.st,
               new_utt.et, new_utt.conf);
    
        std::string word = ps_seg_word(iter);
        
        result.push_back(new_utt);
        
        iter = ps_seg_next(iter);
        
    }
    
    printf("\n\n");
    
    engineExit();

}
Beispiel #9
0
int
main(int argc, char *argv[])
{
	ps_decoder_t *ps;
	cmd_ln_t *config;
	acmod_t *acmod;
	fsg_search_t *fsgs;
	ps_lattice_t *dag;
	ps_seg_t *seg;
	int32 score;

	TEST_ASSERT(config =
		    cmd_ln_init(NULL, ps_args(), TRUE,
				"-hmm", DATADIR "/tidigits/hmm",
				"-fsg", DATADIR "/tidigits/lm/tidigits.fsg",
				"-dict", DATADIR "/tidigits/lm/tidigits.dic",
				"-bestpath", "no",
				"-input_endian", "little",
				"-samprate", "16000", NULL));
	TEST_ASSERT(ps = ps_init(config));

	fsgs = (fsg_search_t *)ps->search;
	acmod = ps->acmod;

	setbuf(stdout, NULL);
	{
		FILE *rawfh;
		int16 buf[2048];
		size_t nread;
		int16 const *bptr;
		char const *hyp;
		int nfr;

		TEST_ASSERT(rawfh = fopen(DATADIR "/numbers.raw", "rb"));
		TEST_EQUAL(0, acmod_start_utt(acmod));
		fsg_search_start(ps_search_base(fsgs));
		while (!feof(rawfh)) {
			nread = fread(buf, sizeof(*buf), 2048, rawfh);
			bptr = buf;
			while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0) {
				while (acmod->n_feat_frame > 0) {
					fsg_search_step(ps_search_base(fsgs),
							acmod->output_frame);
					acmod_advance(acmod);
				}
			}
		}
		fsg_search_finish(ps_search_base(fsgs));
		hyp = fsg_search_hyp(ps_search_base(fsgs), &score, NULL);
		printf("FSG: %s (%d)\n", hyp, score);

		TEST_ASSERT(acmod_end_utt(acmod) >= 0);
		fclose(rawfh);
	}
	for (seg = ps_seg_iter(ps); seg;
	     seg = ps_seg_next(seg)) {
		char const *word;
		int sf, ef;
		int32 post, lscr, ascr, lback;

		word = ps_seg_word(seg);
		ps_seg_frames(seg, &sf, &ef);
		post = ps_seg_prob(seg, &ascr, &lscr, &lback);
		printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef,
		       logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
	}

	/* Now get the DAG and play with it. */
	dag = ps_get_lattice(ps);
	ps_lattice_write(dag, "test_fsg3.lat");
	printf("BESTPATH: %s\n",
	       ps_lattice_hyp(dag, ps_lattice_bestpath(dag, NULL, 1.0, 15.0)));
	ps_lattice_posterior(dag, NULL, 15.0);
	ps_free(ps);
	cmd_ln_free_r(config);

	return 0;
}
Beispiel #10
0
int
ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected)
{
    ps_decoder_t *ps;
    mfcc_t **cepbuf;
    FILE *rawfh;
    int16 *buf;
    int16 const *bptr;
    size_t nread;
    size_t nsamps;
    int32 nfr, i, score, prob;
    char const *hyp;
    char const *uttid;
    double n_speech, n_cpu, n_wall;
    ps_seg_t *seg;

    TEST_ASSERT(ps = ps_init(config));
    /* Test it first with pocketsphinx_decode_raw() */
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    ps_decode_raw(ps, rawfh, "goforward", -1);
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Test it with ps_process_raw() */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    nsamps = 2048;
    buf = ckd_calloc(nsamps, sizeof(*buf));
    while (!feof(rawfh)) {
        nread = fread(buf, sizeof(*buf), nsamps, rawfh);
        ps_process_raw(ps, buf, nread, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000000"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Now read the whole file and produce an MFCC buffer. */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf));
    TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh));
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr);
    cepbuf = ckd_calloc_2d(nfr + 1,
                   fe_get_output_size(ps->acmod->fe),
                   sizeof(**cepbuf));
    fe_start_utt(ps->acmod->fe);
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr);
    fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i);

    /* Decode it with process_cep() */
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    for (i = 0; i < nfr; ++i) {
        ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000001"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    for (seg = ps_seg_iter(ps, &score); seg;
         seg = ps_seg_next(seg)) {
        char const *word;
        int sf, ef;
        int32 post, lscr, ascr, lback;

        word = ps_seg_word(seg);
        ps_seg_frames(seg, &sf, &ef);
        post = ps_seg_prob(seg, &ascr, &lscr, &lback);
        printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef,
               logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
        TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0
    }

    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);
    ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    fclose(rawfh);
    ps_free(ps);
    cmd_ln_free_r(config);
    ckd_free_2d(cepbuf);
    ckd_free(buf);

    return 0;
}