Example #1
0
/*! function to read results from the ASR*/
static switch_status_t pocketsphinx_asr_get_results(switch_asr_handle_t *ah, char **xmlstr, switch_asr_flag_t *flags)
{
	pocketsphinx_t *ps = (pocketsphinx_t *) ah->private_info;
	switch_status_t status = SWITCH_STATUS_SUCCESS;
	int32_t conf;

	if (switch_test_flag(ps, PSFLAG_BARGE)) {
		switch_clear_flag_locked(ps, PSFLAG_BARGE);
		status = SWITCH_STATUS_BREAK;
	}

	if (switch_test_flag(ps, PSFLAG_HAS_TEXT)) {
		switch_mutex_lock(ps->flag_mutex);
		switch_clear_flag(ps, PSFLAG_HAS_TEXT);
		conf = ps_get_prob(ps->ps, &ps->uttid);

		ps->confidence = (conf + 20000) / 200;

		if (ps->confidence < 0) {
			ps->confidence = 0;
		}

		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Recognized: %s, Confidence: %d\n", ps->hyp, ps->confidence);
		switch_mutex_unlock(ps->flag_mutex);

		*xmlstr = switch_mprintf("<?xml version=\"1.0\"?>\n"
								 "<result grammar=\"%s\">\n"
								 "  <interpretation grammar=\"%s\" confidence=\"%d\">\n"
								 "    <input mode=\"speech\">%s</input>\n"
								 "  </interpretation>\n" "</result>\n", ps->grammar, ps->grammar, ps->confidence, ps->hyp);

		if (switch_test_flag(ps, SWITCH_ASR_FLAG_AUTO_RESUME)) {
			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Auto Resuming\n");
			switch_set_flag(ps, PSFLAG_READY);

			ps_start_utt(ps->ps, NULL);
		}

		status = SWITCH_STATUS_SUCCESS;
	}

	return status;
}
Example #2
0
int
ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected)
{
    ps_decoder_t *ps;
    mfcc_t **cepbuf;
    FILE *rawfh;
    int16 *buf;
    int16 const *bptr;
    size_t nread;
    size_t nsamps;
    int32 nfr, i, score, prob;
    char const *hyp;
    char const *uttid;
    double n_speech, n_cpu, n_wall;
    ps_seg_t *seg;

    TEST_ASSERT(ps = ps_init(config));
    /* Test it first with pocketsphinx_decode_raw() */
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    ps_decode_raw(ps, rawfh, "goforward", -1);
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Test it with ps_process_raw() */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    nsamps = 2048;
    buf = ckd_calloc(nsamps, sizeof(*buf));
    while (!feof(rawfh)) {
        nread = fread(buf, sizeof(*buf), nsamps, rawfh);
        ps_process_raw(ps, buf, nread, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000000"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Now read the whole file and produce an MFCC buffer. */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf));
    TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh));
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr);
    cepbuf = ckd_calloc_2d(nfr + 1,
                   fe_get_output_size(ps->acmod->fe),
                   sizeof(**cepbuf));
    fe_start_utt(ps->acmod->fe);
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr);
    fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i);

    /* Decode it with process_cep() */
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    for (i = 0; i < nfr; ++i) {
        ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000001"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    for (seg = ps_seg_iter(ps, &score); seg;
         seg = ps_seg_next(seg)) {
        char const *word;
        int sf, ef;
        int32 post, lscr, ascr, lback;

        word = ps_seg_word(seg);
        ps_seg_frames(seg, &sf, &ef);
        post = ps_seg_prob(seg, &ascr, &lscr, &lback);
        printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef,
               logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
        TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0
    }

    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);
    ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    fclose(rawfh);
    ps_free(ps);
    cmd_ln_free_r(config);
    ckd_free_2d(cepbuf);
    ckd_free(buf);

    return 0;
}
/*! function to feed audio to the ASR */
static switch_status_t pocketsphinx_asr_feed(switch_asr_handle_t *ah, void *data, unsigned int len, switch_asr_flag_t *flags)
{
	pocketsphinx_t *ps = (pocketsphinx_t *) ah->private_info;
	int rv = 0;

	if (switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED))
		return SWITCH_STATUS_BREAK;

	if (!switch_test_flag(ps, PSFLAG_NOMATCH) && !switch_test_flag(ps, PSFLAG_NOINPUT) && !switch_test_flag(ps, PSFLAG_HAS_TEXT) && switch_test_flag(ps, PSFLAG_READY)) {
		if (stop_detect(ps, (int16_t *) data, len / 2)) {
			char const *hyp;

			switch_mutex_lock(ps->flag_mutex);
			if ((hyp = ps_get_hyp(ps->ps, &ps->score, &ps->uttid))) {
				if (!zstr(hyp)) {
					ps_end_utt(ps->ps);
					switch_clear_flag(ps, PSFLAG_READY);
					if ((hyp = ps_get_hyp(ps->ps, &ps->score, &ps->uttid))) {
						if (zstr(hyp)) {
							if (!switch_test_flag(ps, PSFLAG_SPEECH_TIMEOUT)) {
								switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Lost the text, never mind....\n");
								ps_start_utt(ps->ps, NULL);
								switch_set_flag(ps, PSFLAG_READY);
							}
						} else {
							/* get match and confidence */
							int32_t conf;

							conf = ps_get_prob(ps->ps, &ps->uttid);

							ps->confidence = (conf + 20000) / 200;

							if (ps->confidence < 0) {
								ps->confidence = 0;
							}

							if (ps->confidence_threshold <= 0 || ps->confidence >= ps->confidence_threshold) {
								ps->hyp = switch_core_strdup(ah->memory_pool, hyp);
								switch_set_flag(ps, PSFLAG_HAS_TEXT);
							} else {
								/* have match, but below confidence threshold */
								switch_set_flag(ps, PSFLAG_NOMATCH);
							}
						}
					}
				}
			}
			if (switch_test_flag(ps, PSFLAG_SPEECH_TIMEOUT) && !switch_test_flag(ps, PSFLAG_HAS_TEXT)) {
				/* heard something, but doesn't match anything */
				switch_clear_flag(ps, PSFLAG_READY);
				switch_set_flag(ps, PSFLAG_NOMATCH);
			}
			switch_mutex_unlock(ps->flag_mutex);
		}

		/* only feed ps_process_raw when we are listening */
		if (ps->listening) {
			switch_mutex_lock(ps->flag_mutex);
			rv = ps_process_raw(ps->ps, (int16 *) data, len / 2, FALSE, FALSE);
			switch_mutex_unlock(ps->flag_mutex);
		}

		if (rv < 0) {
			return SWITCH_STATUS_FALSE;
		}
	} else if (switch_test_flag(ps, PSFLAG_NOINPUT_TIMEOUT)) {
		/* never heard anything */
		switch_clear_flag_locked(ps, PSFLAG_READY);
	}

	return SWITCH_STATUS_SUCCESS;
}
Example #4
0
int
main(int argc, char *argv[])
{
    ps_decoder_t *ps;
    cmd_ln_t *config;
    acmod_t *acmod;
    fsg_search_t *fsgs;
    jsgf_t *jsgf;
    jsgf_rule_t *rule;
    fsg_model_t *fsg;
    ps_seg_t *seg;
    ps_lattice_t *dag;
    FILE *rawfh;
    char const *hyp, *uttid;
    int32 score, prob;
    clock_t c;
    int i;

    TEST_ASSERT(config =
            cmd_ln_init(NULL, ps_args(), TRUE,
                "-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k",
                "-dict", MODELDIR "/lm/en/turtle.dic",
                "-input_endian", "little",
                "-samprate", "16000", NULL));
    TEST_ASSERT(ps = ps_init(config));

    jsgf = jsgf_parse_file(DATADIR "/goforward.gram", NULL);
    TEST_ASSERT(jsgf);
    rule = jsgf_get_rule(jsgf, "<goforward.move2>");
    TEST_ASSERT(rule);
    fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, 7.5);
    TEST_ASSERT(fsg);
    fsg_model_write(fsg, stdout);
  ps_set_fsg(ps, "<goforward.move2>", fsg);
  ps_set_search(ps, "<goforward.move2>"); 

    acmod = ps->acmod;
  fsgs = (fsg_search_t *) fsg_search_init(fsg, config, acmod, ps->dict, ps->d2p);

    setbuf(stdout, NULL);
    c = clock();
    for (i = 0; i < 5; ++i) {
        int16 buf[2048];
        size_t nread;
        int16 const *bptr;
        int nfr;
        int is_final;

        TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
        TEST_EQUAL(0, acmod_start_utt(acmod));
        fsg_search_start(ps_search_base(fsgs));
        is_final = FALSE;
        while (!feof(rawfh)) {
            nread = fread(buf, sizeof(*buf), 2048, rawfh);
            bptr = buf;
            while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0) {
                while (acmod->n_feat_frame > 0) {
                    fsg_search_step(ps_search_base(fsgs),
                            acmod->output_frame);
                    acmod_advance(acmod);
                }
            }
            hyp = fsg_search_hyp(ps_search_base(fsgs), &score, &is_final);
            printf("FSG: %s (%d) frame %d final %s\n", hyp, score, acmod->output_frame, is_final ? "FINAL" : "");
            TEST_EQUAL (is_final, (acmod->output_frame > 170));
        }
        fsg_search_finish(ps_search_base(fsgs));
        hyp = fsg_search_hyp(ps_search_base(fsgs), &score, NULL);
        printf("FSG: %s (%d)\n", hyp, score);

        TEST_ASSERT(acmod_end_utt(acmod) >= 0);
        fclose(rawfh);
    }
    TEST_EQUAL(0, strcmp("go forward ten meters",
                 fsg_search_hyp(ps_search_base(fsgs), &score, NULL)));
    ps->search = (ps_search_t *)fsgs;
    for (seg = ps_seg_iter(ps, &score); seg;
         seg = ps_seg_next(seg)) {
        char const *word;
        int sf, ef;

        word = ps_seg_word(seg);
        ps_seg_frames(seg, &sf, &ef);
        printf("%s %d %d\n", word, sf, ef);
    }
    c = clock() - c;
    printf("5 * fsg search in %.2f sec\n", (double)c / CLOCKS_PER_SEC);

    dag = ps_get_lattice(ps);
    ps_lattice_write(dag, "test_jsgf.lat");
    jsgf_grammar_free(jsgf);
    fsg_search_free(ps_search_base(fsgs));
    ps_free(ps);
    cmd_ln_free_r(config);

    TEST_ASSERT(config =
            cmd_ln_init(NULL, ps_args(), TRUE,
                "-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k",
                "-dict", MODELDIR "/lm/en/turtle.dic",
                "-jsgf", DATADIR "/goforward.gram",
                "-input_endian", "little",
                "-samprate", "16000", NULL));
    TEST_ASSERT(ps = ps_init(config));
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    ps_decode_raw(ps, rawfh, "goforward", -1);
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s: %s (%d, %d)\n", uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp("go forward ten meters", hyp));
    ps_free(ps);
    fclose(rawfh);
    cmd_ln_free_r(config);

    TEST_ASSERT(config =
            cmd_ln_init(NULL, ps_args(), TRUE,
                "-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k",
                "-dict", MODELDIR "/lm/en/turtle.dic",
                "-jsgf", DATADIR "/goforward.gram",
                "-toprule", "goforward.move2",
                "-input_endian", "little",
                "-samprate", "16000", NULL));
    TEST_ASSERT(ps = ps_init(config));
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    ps_decode_raw(ps, rawfh, "goforward", -1);
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s: %s (%d, %d)\n", uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp("go forward ten meters", hyp));
    ps_free(ps);
    cmd_ln_free_r(config);
    fclose(rawfh);

    TEST_ASSERT(config =
            cmd_ln_init(NULL, ps_args(), TRUE,
                "-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k",
                "-dict", MODELDIR "/lm/en/turtle.dic",
                "-jsgf", DATADIR "/defective.gram",
                NULL));
    TEST_ASSERT(NULL == ps_init(config));
    cmd_ln_free_r(config);

    return 0;
}
Example #5
0
static void recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[4096];
    int32 k, ts, rem;
    char const *hyp;
    char const *uttid;
    cont_ad_t *cont;
    char word[256];
	int32 score;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
        E_FATAL("Failed top open audio device\n");

    /* Initialize continuous listening module */
    if ((cont = cont_ad_init(ad, ad_read)) == NULL)
        E_FATAL("Failed to initialize voice activity detection\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");
    if (cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");

    for (;;) {
        /* Indicate listening for next utterance */
        printf("READY....\n");
        fflush(stdout);
        fflush(stderr);

        /* Wait data for next utterance */
        while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
            sleep_msec(100);

        if (k < 0)
            E_FATAL("Failed to read audio\n");

        /*
         * Non-zero amount of data received; start recognition of new utterance.
         * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
         */
        if (ps_start_utt(ps, NULL) < 0)
            E_FATAL("Failed to start utterance\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        printf("Listening...\n");
        fflush(stdout);

        /* Note timestamp for this first block of data */
        ts = cont->read_ts;

        /* Decode utterance until end (marked by a "long" silence, >1sec) */
        for (;;) {
            /* Read non-silence audio data, if any, from continuous listening module */
            if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
                E_FATAL("Failed to read audio\n");
            if (k == 0) {
                /*
                 * No speech data available; check current timestamp with most recent
                 * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                 */
                if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                    break;
            }
            else {
                /* New speech data received; note current timestamp */
                ts = cont->read_ts;
            }

            /*
             * Decode whatever data was read above.
             */
            rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);

            /* If no work to be done, sleep a bit */
            if ((rem == 0) && (k == 0))
                sleep_msec(20);
        }

        /*
         * Utterance ended; flush any accumulated, unprocessed A/D data and stop
         * listening until current utterance completely decoded
         */
        ad_stop_rec(ad);
        while (ad_read(ad, adbuf, 4096) >= 0);
        cont_ad_reset(cont);

        printf("Stopped listening, please wait...\n");
        fflush(stdout);
        /* Finish decoding, obtain and print result */
        ps_end_utt(ps);
        hyp = ps_get_hyp(ps, &score, &uttid);
    	if ( hyp == NULL )
        	return;
    	printf("Recognized: %s (%d %s) with prob %d\n", hyp, score, uttid, ps_get_prob(ps, NULL));
        //printf("%s: %s\n", uttid, hyp);
        fflush(stdout);

        /* Exit if the first word spoken was GOODBYE */
        if (hyp) {
            sscanf(hyp, "%s", word);
            if (strcmp(word, "goodbye") == 0)
                break;
        }

        /* Resume A/D recording for next utterance */
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");
    }

    cont_ad_close(cont);
    ad_close(ad);
}