Example #1
0
int
main(int argc, char *argv[])
{
	ps_decoder_t *ps;
	cmd_ln_t *config;
	FILE *fh;
	char const *hyp, *uttid;
	int16 buf[512];
	int rv;
	int32 score;
	//int i;

	config = cmd_ln_init(NULL, ps_args(), TRUE,
		"-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k",
		"-lm", MODELDIR "/lm/en/turtle.DMP",
		"-dict", MODELDIR "/lm/en/turtle.dic",
		NULL);
	if (config == NULL)
		return 1;
	ps = ps_init(config);
	if (ps == NULL)
		return 1;

	fh = fopen("goforward.raw", "rb");
	if (fh == NULL) {
		perror("Failed to open goforward.raw");
		return 1;
	}

	rv = ps_decode_raw(ps, fh, "goforward", -1);
	if (rv < 0)
		return 1;
	hyp = ps_get_hyp(ps, &score, &uttid);
	if (hyp == NULL)
		return 1;
	printf("Recognized: %s\n", hyp);

	fseek(fh, 0, SEEK_SET);
	rv = ps_start_utt(ps, "goforward");
	if (rv < 0)
		return 1;
	while (!feof(fh)) {
		size_t nsamp;
		nsamp = fread(buf, 2, 512, fh);
		rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
	}
	rv = ps_end_utt(ps);
	if (rv < 0)
		return 1;
	hyp = ps_get_hyp(ps, &score, &uttid);
	if (hyp == NULL)
		return 1;
	printf("Recognized: %s\n", hyp);

	fclose(fh);
	ps_free(ps);
	return 0;
}
Example #2
0
void EventHandlerBasic::event(ps_decoder_t* decoder)
{
    char const* message = ps_get_hyp( decoder, NULL );

    if( mCb != nullptr && message != NULL && strlen( message ) > 0 )
        mCb( std::string( message ) );
}
Example #3
0
std::string PocketSphinxServer::decode(const PocketSphinxIce::sample& signal, const Ice::Current& c)
{
	std::cout << "Decode\n";
	int rv;
	const char* hyp;
	const char* uttid;
	int32 score;

	rv = ps_start_utt(ps);
	if (rv < 0)
		throw PocketSphinxIce::Error("Error in ps_start_utt");

	ps_process_raw(ps, signal.data(), signal.size(), FALSE, FALSE);

	rv = ps_end_utt(ps);
	if (rv < 0)
		throw PocketSphinxIce::Error("Error in ps_end_utt");

	hyp = ps_get_hyp(ps, &score);
	if (!hyp)
		throw PocketSphinxIce::Error("ps_get_hyp returned NULL");

	std::cout << "return:" << hyp << '\n';
	return hyp;
}
Example #4
0
int
main(int argc, char *argv[])
{
	cmd_ln_t *config;
	ps_decoder_t *ps;
	FILE *rawfh;
	char const *hyp;
	char const *uttid;
	int32 score;

	TEST_ASSERT(config =
		    cmd_ln_init(NULL, ps_args(), TRUE,
				"-hmm", DATADIR "/an4_ci_cont",
				"-lm", MODELDIR "/lm/en/turtle.DMP",
				"-dict", MODELDIR "/lm/en/turtle.dic",
				"-mllr", DATADIR "/mllr_matrices",
				"-samprate", "16000", NULL));

	TEST_ASSERT(ps = ps_init(config));
	TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
	ps_decode_raw(ps, rawfh, "goforward", -1);
	fclose(rawfh);
	hyp = ps_get_hyp(ps, &score, &uttid);
	printf("FWDFLAT (%s): %s (%d)\n", uttid, hyp, score);
	ps_free(ps);
	cmd_ln_free_r(config);
	return 0;
}
Example #5
0
int processRaw(const char *rawFile) {
    const char *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    // Open the wav file passed from argument
    printf("file: %s\n", rawFile);
    fh = fopen(rawFile, "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file %s\n", rawFile);
        return -1;
    }

    // Start utterance
    rv = ps_start_utt(ps);
    
    // Process buffer, 512 samples at a time
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    // Recieve the recognized string
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: |%s|\n", hyp);
    fflush(stdout);

    // Close file
    fclose(fh);

    return 0;
}
std::tuple<std::string, double> TwitchStreamChunk::process(std::string body){
  int pos = _uri.find_last_of('/');
  std::string fileName = _uri.substr(pos + 1);
  std::ofstream file(fileName);
  file << body;
  file.flush();
  file.close();

  std::stringstream cmd;
  std::string audioFile = boost::filesystem::unique_path().native();
  audioFile.append(".wav");
  cmd << "ffmpeg -i " << fileName << " -vn -ac 1 " << audioFile << " > /dev/null 2>&1";

  system(cmd.str().c_str());

  FILE *aFile = fopen(audioFile.c_str(), "r");
  ps_decode_raw(getDecoder(), aFile, -1);
  fclose(aFile);

  auto logarithm = ps_get_logmath(getDecoder());

  int confidence = 1;
  const char * result = ps_get_hyp(getDecoder(), &confidence);
  double tmp = logmath_exp(logarithm, confidence);

  std::remove(fileName.c_str());
  std::remove(audioFile.c_str());
  return std::make_tuple(result == nullptr ? "" : std::string(result), tmp);
}
Example #7
0
int ofApp::engineClose()
{
    char const *uttid;
    int rv;
    int32 score;
    
    rv = ps_end_utt(ps);
    if (rv < 0)
    {
        return 1;
    }
    hyp = ps_get_hyp(ps, &score);
    if (hyp == NULL)
    {
        return 1;
    }
    
    printf("NEWLINE_________\n\n\nRecognized: %s\n", hyp);
    
    sentence = hyp;
    
    process_result();

    
}
int
ps_end_utt(ps_decoder_t *ps)
{
    int rv, i;

    acmod_end_utt(ps->acmod);

    /* Search any remaining frames. */
    if ((rv = ps_search_forward(ps)) < 0) {
        ptmr_stop(&ps->perf);
        return rv;
    }
    /* Finish phone loop search. */
    if (ps->phone_loop) {
        if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
            ptmr_stop(&ps->perf);
            return rv;
        }
    }
    /* Search any frames remaining in the lookahead window. */
    for (i = ps->acmod->output_frame - ps->pl_window;
         i < ps->acmod->output_frame; ++i)
        ps_search_step(ps->search, i);
    /* Finish main search. */
    if ((rv = ps_search_finish(ps->search)) < 0) {
        ptmr_stop(&ps->perf);
        return rv;
    }
    ptmr_stop(&ps->perf);

    /* Log a backtrace if requested. */
    if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
        char const *uttid, *hyp;
        ps_seg_t *seg;
        int32 score;

        hyp = ps_get_hyp(ps, &score, &uttid);
        
        if (hyp != NULL) {
    	    E_INFO("%s: %s (%d)\n", uttid, hyp, score);
    	    E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
                    "word", "start", "end", "pprob", "ascr", "lscr", "lback");
    	    for (seg = ps_seg_iter(ps, &score); seg;
        	 seg = ps_seg_next(seg)) {
    	        char const *word;
        	int sf, ef;
        	int32 post, lscr, ascr, lback;

        	word = ps_seg_word(seg);
        	ps_seg_frames(seg, &sf, &ef);
        	post = ps_seg_prob(seg, &ascr, &lscr, &lback);
        	E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
                    	    word, sf, ef, logmath_exp(ps_get_logmath(ps), post),
                    	ascr, lscr, lback);
    	    }
        }
    }
    return rv;
}
Example #9
0
/*! function to feed audio to the ASR */
static switch_status_t pocketsphinx_asr_feed(switch_asr_handle_t *ah, void *data, unsigned int len, switch_asr_flag_t *flags)
{
	pocketsphinx_t *ps = (pocketsphinx_t *) ah->private_info;
	int rv = 0;

	if (switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED))
		return SWITCH_STATUS_BREAK;

	if (!switch_test_flag(ps, PSFLAG_HAS_TEXT) && switch_test_flag(ps, PSFLAG_READY)) {
		if (stop_detect(ps, (int16_t *) data, len / 2)) {
			char const *hyp;

			switch_mutex_lock(ps->flag_mutex);
			if ((hyp = ps_get_hyp(ps->ps, &ps->score, &ps->uttid))) {
				if (!zstr(hyp)) {
					ps_end_utt(ps->ps);
					switch_clear_flag(ps, PSFLAG_READY);
					if ((hyp = ps_get_hyp(ps->ps, &ps->score, &ps->uttid))) {
						if (zstr(hyp)) {
							switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Lost the text, never mind....\n");
							ps_start_utt(ps->ps, NULL);
							switch_set_flag(ps, PSFLAG_READY);
						} else {
							ps->hyp = switch_core_strdup(ah->memory_pool, hyp);
							switch_set_flag(ps, PSFLAG_HAS_TEXT);
						}
					}
				}
			}
			switch_mutex_unlock(ps->flag_mutex);
		}

		/* only feed ps_process_raw when we are listening */
		if (ps->listening) {
			switch_mutex_lock(ps->flag_mutex);
			rv = ps_process_raw(ps->ps, (int16 *) data, len / 2, FALSE, FALSE);
			switch_mutex_unlock(ps->flag_mutex);
		}

		if (rv < 0) {
			return SWITCH_STATUS_FALSE;
		}
	}

	return SWITCH_STATUS_SUCCESS;
}
 ReturnType Recognizer::process(const std::vector<int16_t>& buffer) {
   if ((decoder == NULL) || (!is_recording)) return BAD_STATE;
   if (buffer.size() == 0)
     return RUNTIME_ERROR;
   ps_process_raw(decoder, (short int *) &buffer[0], buffer.size(), 0, 0);
   const char* h = ps_get_hyp(decoder, &score, &sentence_id);
   current_hyp = (h == NULL) ? "" : h;
   return SUCCESS;
 }
Example #11
0
int main(int argc, char *argv[]) {
    ps_decoder_t *ps;
    cmd_ln_t *config;
    FILE *fh;
    char const *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    config = cmd_ln_init(NULL, ps_args(), TRUE,
            "-hmm",    MODELDIR "/en-us/en-us",
            "-lm",     MODELDIR "/en-us/en-us.lm.bin",
            "-dict",   MODELDIR "/en-us/cmudict-en-us.dict",
            NULL);

    if (config == NULL) {
        fprintf(stderr, "Failed to create config object, see log for details\n");
        return -1;
    }
    
    // Initialize pocketsphinx
    ps = ps_init(config);
    if (ps == NULL) {
        fprintf(stderr, "Failed to create recognizer, see log for details\n");
        return -1;
    }

    // Open the wav file passed from argument
    printf("file: %s\n", argv[1]);
    fh = fopen(argv[1], "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file %s\n", argv[1]);
        return -1;
    }

    // Start utterance
    rv = ps_start_utt(ps);
    
    // Process buffer, 512 samples at a time
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    // Recieve the recognized string
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: |%s|\n", hyp);

    // free memory
    fclose(fh);
    ps_free(ps);
    cmd_ln_free_r(config);
    
    return 0;
}
SWIGINTERN Hypothesis *ps_decoder_s_getHyp(struct ps_decoder_s *self){
		char const *hyp, *uttid;
		int32 best_score;
		hyp = ps_get_hyp(self, &best_score, &uttid);
		if (hyp == NULL)
			return NULL;
		else
			return new_Hypothesis(hyp, uttid, best_score);
	}
/**
 * You must sucessfully call spInitListener
 * once before using this function.
 *
 * Reads the next block of audio from the microphone
 * up to SPLBUFSIZE number of samples
 * (defined in splistener.h).
 * If an utterance was completed in that block,
 * the transcription is stored in the string words.
 *
 * When calling this function in a realtime loop, delay
 * by some amount of time between calls keeping in mind
 * your recording's sample rate and maximum samples read
 * per call (ex. sleep the thread for 100 milliseconds)
 * so that some audio can be recorded for the next call.
 *
 * @return true if a speech session was completed and
 *         transcribed this block, otherwise false.
 */
static bool spDecode() {
    static bool uttered = false;

    // lock pocketsphinx resources to make sure they 
    // don't get freed by main thread while in use
    std::lock_guard<std::mutex> ps_lock(ps_mtx);
    if(!mic || !ps)
        return false;

    int samples_read = ad_read(mic, buf, SPLBUFSIZE);
    if (samples_read <= 0) {
        spError("failed to read audio :(");
        return false;
    }

    ps_process_raw(ps, buf, samples_read, FALSE, FALSE);
    bool talking = ps_get_in_speech(ps);

    // Just started talking
    if (talking && !uttered) {
        uttered = true;
        return false;
    }

    // Stopped talking, so transcribe what was said
    // and begin the next utterance
    if (!talking && uttered) {
        ps_end_utt(ps);
        const char *trans = ps_get_hyp(ps, NULL);

        if (ps_start_utt(ps) < 0) {
            spError("failed to start utterance :(");
        }
        uttered = false;

        int l = strlen(trans);
        if (trans && l > 0) {
            std::lock_guard<std::mutex> lock(words_mtx);
            if (words && l + 1 > words_buf_size) {
                delete words;
                words = NULL;
            }
            if (!words) {
                words = new char[l + 1];
                words_buf_size = l + 1;
            }

            std::copy(trans, trans + l, words);
            words[l] = '\0';
            
            return true;
        }
    }

    return false;
}
 ReturnType Recognizer::stop() {
   if ((decoder == NULL) || (!is_recording)) return BAD_STATE;
   if (ps_end_utt(decoder) < 0) {
     return RUNTIME_ERROR;
   }
   const char* h = ps_get_hyp(decoder, &score, &sentence_id);
   current_hyp = (h == NULL) ? "" : h;
   is_recording = false;
   return SUCCESS;
 }
Example #15
0
int
main(int argc, char *argv[]) {
	ps_decoder_t *ps;
	cmd_ln_t *config;
	FILE *fh;
	const char *filename = "goforward.raw";
	const char *word = "goforward";
	char const *hyp, *uttid;
	int rv;
	int32 score;

	/* setup the sphinx config */
	config = cmd_ln_init(NULL, ps_args(), TRUE,
			"-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k", 
			"-lm", MODELDIR "/lm/en/turtle.DMP",
			"-dict", MODELDIR "/lm/en/turtle.dic",
			NULL);
	if(config == NULL) {
		EXIT_ERROR;
	}

	/* initialize the config */
	ps = ps_init(config);
	if(ps == NULL) {
		EXIT_ERROR;
	}

	/* open the audio file (stream?) */
	fh = fopen(filename, "rb");
	if(fh == NULL) {
		perror(filename);
		exit(1);
	}

	/* decode the file */
	rv = ps_decode_raw(ps, fh, word, -1);
	if(rv < 0) {
		EXIT_ERROR;
	}

	/* get hypothesis */
	hyp = ps_get_hyp(ps, &score, &uttid);
	if(hyp == NULL) {
		EXIT_ERROR;
	}
	printf("Recognized: %s; score: %d; uttid: %s\n", 
			hyp, score, uttid);

	/* clean up */
	fclose(fh);
	ps_free(ps);

	return 0;
}
Example #16
0
int
main(int argc, char *argv[])
{
    ps_decoder_t *ps;
    cmd_ln_t *config;
    FILE *fh;
    char const *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    config = cmd_ln_init(NULL, ps_args(), TRUE,
                 "-hmm", MODELDIR "/en-us/en-us",
                 "-keyphrase", "marieta",
                 "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
                 "-kws_threshold", "1e-30",
                 NULL);
    if (config == NULL) {
        fprintf(stderr, "Failed to create config object, see log for details\n");
        return -1;
    }
    
    ps = ps_init(config);
    if (ps == NULL) {
        fprintf(stderr, "Failed to create recognizer, see log for details\n");
        return -1;
    }

    fh = fopen("data/marieta.raw", "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file goforward.raw\n");
        return -1;
    }

    rv = ps_start_utt(ps);
    
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: %s\n", hyp);

    fclose(fh);
    ps_free(ps);
    cmd_ln_free_r(config);
    
    return 0;
}
static void gst_sphinx_sink_process_chunk (GstSphinxSink *sphinxsink)
{
	int32 k;
	int16 adbuf[REQUIRED_FRAME_SAMPLES];
	
	k = cont_ad_read (sphinxsink->cont, adbuf, REQUIRED_FRAME_SAMPLES);
	
	if (k == 0 && sphinxsink->last_ts == 0) {
	      return;
	} else if (k == 0 && sphinxsink->cont->read_ts - sphinxsink->last_ts > 
			    DEFAULT_SAMPLES_PER_SEC) {
	      int32 score;
	      const char *hyp;
    	      char *stripped_hyp;
    	      int i, j;
  
	      ps_end_utt (sphinxsink->decoder);
   	      if ((hyp = ps_get_hyp (sphinxsink->decoder, &score, NULL)) == NULL) {
	    	
	    	      gst_sphinx_sink_send_message (sphinxsink, "message", "");
                      g_message("Not Recognized");
	      
	      } else {
	    	      stripped_hyp = 
		           g_malloc (strlen (hyp) + 1);
	    	      for (i=0, j=0; hyp[i] != 0; i++) {
	    	    	    if (hyp[i] != '(' && hyp[i] != ')' && (hyp[i] < '0' || hyp[i] > '9')) {
	    	    		    stripped_hyp[j++] = hyp[i];
	    	    	    }
	    	      }
	    	      stripped_hyp [j] = 0;
	    	      
	    	      gst_sphinx_sink_send_message (sphinxsink, "message", stripped_hyp);
                      g_message("Recognized: %s", stripped_hyp);
	      }

	      sphinxsink->last_ts = 0;
	      sphinxsink->ad.listening = 0;

	} else if (k != 0) {
	     if (sphinxsink->ad.listening == 0) {
	    	    ps_start_utt (sphinxsink->decoder, NULL);
	    	    sphinxsink->ad.listening = 1;
		    gst_sphinx_sink_send_message (sphinxsink, "listening", NULL);
	    }
	
	     ps_process_raw (sphinxsink->decoder, adbuf, k, 0, 0);
	     sphinxsink->last_ts = sphinxsink->cont->read_ts;
	}
}
Example #18
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
                printf("%s\n", hyp);
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}
Example #19
0
/*
 * Continuous recognition from a file
 */
int
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
	char str[1000]="";
    uint8 utt_started, in_speech;
	
    fname = "C:/Users/Reza/Documents/GitHub/speech_agent/presentation_samples/italy1_reza.wav"; 
    rawfd = fopen(fname, "rb");
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
		fread(waveheader, 1, 44, rawfd);
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;
	
    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
		 

        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);

            if (hyp != NULL){
				strncpy_s( str, hyp, strlen(hyp));
				printf("%s\n", hyp);
				listenCallback(str);
			}
            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
	
    ps_end_utt(ps);
    fclose(rawfd);
	return 0;
	
}
Example #20
0
int
main(int argc, char *argv[])
{
	ps_decoder_t *ps;
	ps_nbest_t *nbest;
	cmd_ln_t *config;
	FILE *rawfh;
	char const *hyp;
	int32 score, n;

	TEST_ASSERT(config =
		    cmd_ln_init(NULL, ps_args(), TRUE,
				"-hmm", MODELDIR "/en-us/en-us",
				"-lm", MODELDIR "/en-us/en-us.lm.bin",
				"-dict", MODELDIR "/en-us/cmudict-en-us.dict",
				"-fwdtree", "yes",
				"-fwdflat", "yes",
				"-bestpath", "yes",
				"-input_endian", "little",
				"-samprate", "16000", NULL));
	TEST_ASSERT(ps = ps_init(config));
	TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
	ps_decode_raw(ps, rawfh, -1);
	fclose(rawfh);
	hyp = ps_get_hyp(ps, &score);
	printf("BESTPATH: %s (%d)\n", hyp, score);

	for (n = 1, nbest = ps_nbest(ps); nbest && n < 10; nbest = ps_nbest_next(nbest), n++) {
		ps_seg_t *seg;
		hyp = ps_nbest_hyp(nbest, &score);
		printf("NBEST %d: %s (%d)\n", n, hyp, score);
		for (seg = ps_nbest_seg(nbest); seg;
		     seg = ps_seg_next(seg)) {
			char const *word;
			int sf, ef;

			word = ps_seg_word(seg);
			ps_seg_frames(seg, &sf, &ef);
			printf("%s %d %d\n", word, sf, ef);
		}
	}
	if (nbest)
	    ps_nbest_free(nbest);
	ps_free(ps);
	cmd_ln_free_r(config);
	return 0;
}
Example #21
0
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(AUDIO_DEVICE_NAME,
                          (int) SAMPLE_RATE )) == NULL) {
        E_FATAL("Failed to open audio device\n");
	}
    if (ad_start_rec(ad) < 0) {
        E_FATAL("Failed to start recording\n");
    }
    if (ps_start_utt(ps) < 0) {
        E_FATAL("Failed to start utterance\n");
    }
    utt_started = FALSE;
    printf("READY....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            printf("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL)
                printf("%s\n", hyp);

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            printf("READY....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}
Example #22
0
int
main(int argc, char *argv[])
{
  ps_decoder_t *ps;
  cmd_ln_t *config;
  FILE *fh;
  char const *hyp, *uttid;
  int16 buf[512];
  int rv;
  int32 score;

  config = cmd_ln_init(NULL, ps_args(), TRUE,
		       "-hmm", MODELDIR "/en-us/en-us",
		       "-lm", MODELDIR "/en-us/en-us.lm.dmp",
		       "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
		       NULL);
  if (config == NULL)
    return 1;
  ps = ps_init(config);
  if (ps == NULL)
    return 1;

  fh = fopen("goforward.raw", "rb");
  if (fh == NULL)
    return -1;
  rv = ps_start_utt(ps);
  if (rv < 0)
    return 1;
  while (!feof(fh)) {
    size_t nsamp;
    nsamp = fread(buf, 2, 512, fh);
    rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
  }
  rv = ps_end_utt(ps);
  if (rv < 0)
    return 1;
  hyp = ps_get_hyp(ps, &score);
  if (hyp == NULL)
    return 1;
  printf("Recognized: %s\n", hyp);

  fclose(fh);
  ps_free(ps);
  cmd_ln_free_r(config);
  return 0;
}
int printResult()
{
    char const *hyp, *uttid;
    int32 score;

    hyp = ps_get_hyp(ps, &score, &uttid);
    if (hyp == NULL) {
        printf("Error getting result.\n");
        return 0;
    }

    printf("Result => %s, uttid: %s, score: %d\n", hyp, uttid, score);

    if (strcmp(hyp, "left") == 0)
        return 0;

    return 1;
}
uint32 FSpeechRecognitionWorker::Run() {

	char const *hyp;
	// attempt to open the default recording device
	if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
		(int)cmd_ln_float32_r(config,
		"-samprate"))) == NULL) {
		ClientMessage(FString(TEXT("Failed to open audio device")));
		return 1;
	}
	if (ad_start_rec(ad) < 0) {
		ClientMessage(FString(TEXT("Failed to start recording")));
		return 2;
	}
	if (ps_start_utt(ps) < 0) {
		ClientMessage(FString(TEXT("Failed to start utterance")));
		return 3;
	}

	while (StopTaskCounter.GetValue() == 0) {
		if ((k = ad_read(ad, adbuf, 1024)) < 0)
			ClientMessage(FString(TEXT("Failed to read audio")));
		ps_process_raw(ps, adbuf, k, 0, 0);
		in_speech = ps_get_in_speech(ps);
		if (in_speech && !utt_started) {
			utt_started = 1;
		}
		if (!in_speech && utt_started) {
			/* speech -> silence transition, time to start new utterance  */
			ps_end_utt(ps);
			hyp = ps_get_hyp(ps, NULL);
			if (hyp != NULL)
				Manager->WordSpoken_method(FString(hyp));

			if (ps_start_utt(ps) < 0)
				ClientMessage(FString(TEXT("Failed to start")));
			utt_started = 0;
		}
	}

	ad_close(ad);
	return 0;
}
static GstFlowReturn
gst_pocketsphinx_chain(GstPad * pad, GstBuffer * buffer)
{
    GstPocketSphinx *ps;

    ps = GST_POCKETSPHINX(GST_OBJECT_PARENT(pad));

    /* Start an utterance for the first buffer we get (i.e. we assume
     * that the VADER is "leaky") */
    if (!ps->listening) {
        ps->listening = TRUE;
        ps_start_utt(ps->ps, NULL);
    }
    ps_process_raw(ps->ps,
                   (short *)GST_BUFFER_DATA(buffer),
                   GST_BUFFER_SIZE(buffer) / sizeof(short),
                   FALSE, FALSE);

    /* Get a partial result every now and then, see if it is different. */
    if (ps->last_result_time == 0
        /* Check every 100 milliseconds. */
        || (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) {
        int32 score;
        char const *hyp;
        char const *uttid;

        hyp = ps_get_hyp(ps->ps, &score, &uttid);
        ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer);
        if (hyp && strlen(hyp) > 0) {
            if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) {
                g_free(ps->last_result);
                ps->last_result = g_strdup(hyp);
                /* Emit a signal for applications. */
                g_signal_emit(ps, gst_pocketsphinx_signals[SIGNAL_PARTIAL_RESULT],
                              0, hyp, uttid);
            }
        }
    }
    gst_buffer_unref(buffer);
    return GST_FLOW_OK;
}
int processCommands()
{
    int32 samples;
    int16 audioBuf[BUFFER_SIZE];
    char const *uttid;
    char const *hyp;
    
    while(run)
    {
        printf("Waiting for utterance...\n");
        samples = waitForNextUtterance();
        if(samples < 0)
            return -1;
        
        if(ps_start_utt(psDecoder, NULL) < 0) {
            fprintf(stderr, "Failed to start next utterance\n");
            return -1;
        }
        ps_process_raw(psDecoder, audioBuf, samples, FALSE, FALSE);
        
        printf("Recording...\n");
        fflush(stdout);
        record();
        
        ad_stop_rec(audioDevice);
        while(ad_read(audioDevice, audioBuf, BUFFER_SIZE) >= 0);
        cont_ad_reset(continousAudoDevice);
        ps_end_utt(psDecoder);
        
        hyp = ps_get_hyp(psDecoder, NULL, &uttid);
        printf("Heard: %s\n", hyp);
        
        if (ad_start_rec(audioDevice) < 0) {
            fprintf(stderr, "Failed to start audio device.\n");
            return -1;
        }
    }
    
    return 0;
}
Example #27
0
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
    AVFilterContext *ctx = inlink->dst;
    AVDictionary **metadata = &in->metadata;
    ASRContext *s = ctx->priv;
    int have_speech;
    const char *speech;

    ps_process_raw(s->ps, (const int16_t *)in->data[0], in->nb_samples, 0, 0);
    have_speech = ps_get_in_speech(s->ps);
    if (have_speech && !s->utt_started)
        s->utt_started = 1;
    if (!have_speech && s->utt_started) {
        ps_end_utt(s->ps);
        speech = ps_get_hyp(s->ps, NULL);
        if (speech != NULL)
            av_dict_set(metadata, "lavfi.asr.text", speech, 0);
        ps_start_utt(s->ps);
        s->utt_started = 0;
    }

    return ff_filter_frame(ctx->outputs[0], in);
}
Example #28
0
static void fsg_processor(context_t *ctx,
                          srs_srec_utterance_t *utt,
                          srs_srec_candidate_t *cands,
                          srs_srec_candidate_t **sorted)
{
    filter_buf_t *filtbuf;
    decoder_set_t *decset;
    decoder_t *dec;
    logmath_t *lmath;
    const char *uttid;
    int32_t score;
    double prob;
    srs_srec_candidate_t *cand;
    srs_srec_token_t *tkn;
    ps_lattice_t *dag;
    ps_latlink_t *lnk;
    ps_latnode_t *nod;
    const char *token;
    int32_t frlen;
    int32_t start, end;
    int16 fef, lef;

    if (!ctx || !(filtbuf = ctx->filtbuf) ||
        !(decset = ctx->decset) || !(dec = decset->curdec))
        return;

    frlen = filtbuf->frlen;
    lmath = ps_get_logmath(dec->ps);
    ps_get_hyp(dec->ps, &score, &uttid);
    prob = logmath_exp(lmath, score);

    cand = cands;
    cand->score = 1.0;
    cand->ntoken = 0;

    tkn = NULL;

    if ((dag = ps_get_lattice(dec->ps))) {

        if ((lnk = ps_lattice_traverse_edges(dag, NULL, NULL))) {

            ps_latlink_nodes(lnk, &nod);

            if (nod && (token = ps_latnode_word(dag, nod)) && *token != '<') {
                tkn = cand->tokens + cand->ntoken++;
                tkn->token = tknbase(token);
                tkn->start = ps_latnode_times(nod, &fef, &lef) * frlen;
                tkn->end = ((fef + lef) / 2) * frlen;
            }

            goto handle_destination_node;

            while ((lnk = ps_lattice_traverse_next(dag, NULL))) {

              handle_destination_node:
                nod = ps_latlink_nodes(lnk, NULL);

                if (nod && (token = ps_latnode_word(dag,nod)) && *token != '<')
                {
                    start = ps_latnode_times(nod, &fef, &lef) * frlen;
                    end = fef * frlen;

                    if (tkn && start < (int32_t)tkn->end)
                        break;  /* just take one candidate */

                    if (!tkn || !tkneq(token, tkn->token)) {
                        tkn = cand->tokens + cand->ntoken++;
                        tkn->token = tknbase(token);
                        tkn->start = start;
                        tkn->end = end + frlen;
                    }
                }
            }
        }
    }

    sorted[0] = cands;
    sorted[1] = NULL;

    utt->id = uttid;
    utt->score = prob < 0.00001 ? 0.00001 : prob;
    //utt->length = dag ? ps_lattice_n_frames(dag) * frlen : 0;
    utt->length = filtbuf->len;
    utt->ncand = 1;
    utt->cands = sorted;
}
Example #29
0
static void acoustic_processor(context_t *ctx,
                               srs_srec_utterance_t *utt,
                               srs_srec_candidate_t *cands,
                               srs_srec_candidate_t **sorted)
{
    filter_buf_t *filtbuf;
    decoder_set_t *decset;
    decoder_t *dec;
    logmath_t *lmath;
    const char *uttid;
    const char *hyp;
    int32 score;
    double prob;
    ps_nbest_t *nb;
    ps_seg_t *seg;
    int32_t frlen;
    int32 start, end;
    size_t ncand;
    srs_srec_candidate_t *cand;
    srs_srec_token_t *tkn;
    int32_t length;

    if (!ctx || !(filtbuf = ctx->filtbuf) ||
        !(decset = ctx->decset) || !(dec = decset->curdec))
        return;

    frlen = filtbuf->frlen;
    lmath = ps_get_logmath(dec->ps);
    uttid = "<unknown>";
    hyp = ps_get_hyp(dec->ps, &score, &uttid);
    prob = logmath_exp(lmath, score);
    length = 0;

    if (prob < 0.00000001)
        prob = 0.00000001;

    for (nb  = ps_nbest(dec->ps, 0,-1, NULL,NULL), ncand = 0;
         nb != NULL;
         nb  = ps_nbest_next(nb))
    {
        if (ncand >= CANDIDATE_MAX-1) {
            break;
            ps_nbest_free(nb);
        }

        if ((seg  = ps_nbest_seg(nb, &score))) {
            while (seg && strcmp(ps_seg_word(seg), "<s>"))
                seg = ps_seg_next(seg);

            if (!seg)
                continue;

            ps_seg_frames(seg, &start, &end);

            cand = cands + ncand;

            cand->score = logmath_exp(lmath, score) / prob;
            cand->ntoken = 0;

            length = 0;

            while ((seg = ps_seg_next(seg))) {
                if ((hyp = ps_seg_word(seg))) {
                    if (!strcmp(hyp, "</s>") ||
                        cand->ntoken >= CANDIDATE_TOKEN_MAX)
                    {
                        ncand++;
                        //memset(cand+1, 0, sizeof(srs_srec_candidate_t));
                        ps_seg_frames(seg, &start, &end);
                        ps_seg_free(seg);
                        //printf("hyp=</s> ncand=%d\n", ncand);
                        length = (end + 1) * frlen;
                        break;
                    }
                    else if (!strcmp(hyp, "<sil>")) {
                        ps_seg_frames(seg, &start, &end);
                        //printf("hyp=<sil> skip it\n");
                    }
                    else {
                        tkn = cand->tokens + cand->ntoken++;
                        tkn->token = tknbase(hyp);
                        ps_seg_frames(seg, &start, &end);
                        tkn->start = start * frlen;
                        tkn->end = (end + 1) * frlen;
                        //printf("hyp=%s (%d, %d) tkn count %d\n",
                        //      tkn->token, tkn->start,tkn->end, cand->ntoken);
                    }
                }
            } /* while seg */

            if (!seg && cand->ntoken > 0) {
                ncand++;
                cand->score *= 0.9; /* some penalty */
                //memset(cand+1, 0, sizeof(srs_srec_candidate_t));
            }

            if (!length) {
                tkn = cand->tokens + (cand->ntoken - 1);
                length = tkn->end;
            }
        }
    } /* for nb */

    memset(cand+1, 0, sizeof(srs_srec_candidate_t));

    utt->id = uttid;
    utt->score = prob;
    //utt->length = length;
    utt->length = filtbuf->len;
    utt->ncand = candidate_sort(cands, sorted);
    utt->cands = sorted;
}
Example #30
0
/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }
    
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
	fread(waveheader, 1, 44, rawfd);
	if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
    	    E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
	E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n");
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
        	printf("%s\n", hyp);
            if (print_times)
        	print_word_times();
            fflush(stdout);

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL) {
    	    printf("%s\n", hyp);
    	    if (print_times) {
    		print_word_times();
	    }
	}
    }
    
    fclose(rawfd);
}