Пример #1
0
/*
 * Continuous recognition from mic
 */
int
recognize_from_mic()
{
	ad_rec_t *ad;
    int16 adbuf[2048];
    const char *fname;
	const char* seg;
    int32 k;
	char str[1000]="";
    uint8 utt_started, in_speech;
	
    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),16000)) == NULL)
		perror("Failed to open audio device\n");
	if (ad_start_rec(ad) < 0)
		perror("Failed to start recording\n");
    
    ps_start_utt(ps);
    utt_started = FALSE;
	ps_seg_t *psegt;
    while (!finished) {
		if ((k = ad_read(ad, adbuf, 2048)) < 0)
			perror("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
			psegt = ps_seg_iter(ps, NULL);
			while (psegt!=NULL){
				seg = ps_seg_word(psegt);
				strncpy_s( str, seg, strlen(seg));
				listenCallback(str);
				printf("%s\n", seg);
				int prob = ps_seg_prob(psegt,NULL,NULL,NULL);
				printf("%d\n", prob);
				psegt = ps_seg_next(psegt);
			}
            ps_start_utt(ps);
            utt_started = FALSE;
        }
		Sleep(100);
    }
	
    ps_end_utt(ps);
    fclose(rawfd);
	return 0;
}
Пример #2
0
int processRaw(const char *rawFile) {
    const char *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    // Open the wav file passed from argument
    printf("file: %s\n", rawFile);
    fh = fopen(rawFile, "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file %s\n", rawFile);
        return -1;
    }

    // Start utterance
    rv = ps_start_utt(ps);
    
    // Process buffer, 512 samples at a time
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    // Recieve the recognized string
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: |%s|\n", hyp);
    fflush(stdout);

    // Close file
    fclose(fh);

    return 0;
}
Пример #3
0
std::string PocketSphinxServer::decode(const PocketSphinxIce::sample& signal, const Ice::Current& c)
{
	std::cout << "Decode\n";
	int rv;
	const char* hyp;
	const char* uttid;
	int32 score;

	rv = ps_start_utt(ps);
	if (rv < 0)
		throw PocketSphinxIce::Error("Error in ps_start_utt");

	ps_process_raw(ps, signal.data(), signal.size(), FALSE, FALSE);

	rv = ps_end_utt(ps);
	if (rv < 0)
		throw PocketSphinxIce::Error("Error in ps_end_utt");

	hyp = ps_get_hyp(ps, &score);
	if (!hyp)
		throw PocketSphinxIce::Error("ps_get_hyp returned NULL");

	std::cout << "return:" << hyp << '\n';
	return hyp;
}
Пример #4
0
int ofApp::engineClose()
{
    char const *uttid;
    int rv;
    int32 score;
    
    rv = ps_end_utt(ps);
    if (rv < 0)
    {
        return 1;
    }
    hyp = ps_get_hyp(ps, &score);
    if (hyp == NULL)
    {
        return 1;
    }
    
    printf("NEWLINE_________\n\n\nRecognized: %s\n", hyp);
    
    sentence = hyp;
    
    process_result();

    
}
Пример #5
0
/*! function to close the asr interface */
static switch_status_t pocketsphinx_asr_close(switch_asr_handle_t *ah, switch_asr_flag_t *flags)
{
	pocketsphinx_t *ps = (pocketsphinx_t *) ah->private_info;

    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, ">>>>>>>>pocketsphinx_asr_close<<<<<<<<<\n");

	switch_mutex_lock(ps->flag_mutex);
	if (switch_test_flag(ps, PSFLAG_ALLOCATED)) {
		if (switch_test_flag(ps, PSFLAG_READY)) {
			ps_end_utt(ps->ps);
		}
		ps_free(ps->ps);
		ps->ps = NULL;
	}
	switch_safe_free(ps->grammar);
	switch_mutex_unlock(ps->flag_mutex);
	switch_clear_flag(ps, PSFLAG_HAS_TEXT);
	switch_clear_flag(ps, PSFLAG_READY);
	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Port Closed.\n");
	switch_set_flag(ah, SWITCH_ASR_FLAG_CLOSED);
    
    /* ifly  end session */
    
	QISRSessionEnd(ps->ifly_session_id, ps->ifly_hints);
    
	return SWITCH_STATUS_SUCCESS;
}
Пример #6
0
/*
 * Continuous recognition from a file
 */
int
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
	char str[1000]="";
    uint8 utt_started, in_speech;
	
    fname = "C:/Users/Reza/Documents/GitHub/speech_agent/presentation_samples/italy1_reza.wav"; 
    rawfd = fopen(fname, "rb");
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
		fread(waveheader, 1, 44, rawfd);
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;
	
    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
		 

        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);

            if (hyp != NULL){
				strncpy_s( str, hyp, strlen(hyp));
				printf("%s\n", hyp);
				listenCallback(str);
			}
            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
	
    ps_end_utt(ps);
    fclose(rawfd);
	return 0;
	
}
Пример #7
0
int
main(int argc, char *argv[])
{
	ps_decoder_t *ps;
	cmd_ln_t *config;
	FILE *fh;
	char const *hyp, *uttid;
	int16 buf[512];
	int rv;
	int32 score;
	//int i;

	config = cmd_ln_init(NULL, ps_args(), TRUE,
		"-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k",
		"-lm", MODELDIR "/lm/en/turtle.DMP",
		"-dict", MODELDIR "/lm/en/turtle.dic",
		NULL);
	if (config == NULL)
		return 1;
	ps = ps_init(config);
	if (ps == NULL)
		return 1;

	fh = fopen("goforward.raw", "rb");
	if (fh == NULL) {
		perror("Failed to open goforward.raw");
		return 1;
	}

	rv = ps_decode_raw(ps, fh, "goforward", -1);
	if (rv < 0)
		return 1;
	hyp = ps_get_hyp(ps, &score, &uttid);
	if (hyp == NULL)
		return 1;
	printf("Recognized: %s\n", hyp);

	fseek(fh, 0, SEEK_SET);
	rv = ps_start_utt(ps, "goforward");
	if (rv < 0)
		return 1;
	while (!feof(fh)) {
		size_t nsamp;
		nsamp = fread(buf, 2, 512, fh);
		rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
	}
	rv = ps_end_utt(ps);
	if (rv < 0)
		return 1;
	hyp = ps_get_hyp(ps, &score, &uttid);
	if (hyp == NULL)
		return 1;
	printf("Recognized: %s\n", hyp);

	fclose(fh);
	ps_free(ps);
	return 0;
}
Пример #8
0
int main(int argc, char *argv[]) {
    ps_decoder_t *ps;
    cmd_ln_t *config;
    FILE *fh;
    char const *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    config = cmd_ln_init(NULL, ps_args(), TRUE,
            "-hmm",    MODELDIR "/en-us/en-us",
            "-lm",     MODELDIR "/en-us/en-us.lm.bin",
            "-dict",   MODELDIR "/en-us/cmudict-en-us.dict",
            NULL);

    if (config == NULL) {
        fprintf(stderr, "Failed to create config object, see log for details\n");
        return -1;
    }
    
    // Initialize pocketsphinx
    ps = ps_init(config);
    if (ps == NULL) {
        fprintf(stderr, "Failed to create recognizer, see log for details\n");
        return -1;
    }

    // Open the wav file passed from argument
    printf("file: %s\n", argv[1]);
    fh = fopen(argv[1], "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file %s\n", argv[1]);
        return -1;
    }

    // Start utterance
    rv = ps_start_utt(ps);
    
    // Process buffer, 512 samples at a time
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    // Recieve the recognized string
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: |%s|\n", hyp);

    // free memory
    fclose(fh);
    ps_free(ps);
    cmd_ln_free_r(config);
    
    return 0;
}
/**
 * You must sucessfully call spInitListener
 * once before using this function.
 *
 * Reads the next block of audio from the microphone
 * up to SPLBUFSIZE number of samples
 * (defined in splistener.h).
 * If an utterance was completed in that block,
 * the transcription is stored in the string words.
 *
 * When calling this function in a realtime loop, delay
 * by some amount of time between calls keeping in mind
 * your recording's sample rate and maximum samples read
 * per call (ex. sleep the thread for 100 milliseconds)
 * so that some audio can be recorded for the next call.
 *
 * @return true if a speech session was completed and
 *         transcribed this block, otherwise false.
 */
static bool spDecode() {
    static bool uttered = false;

    // lock pocketsphinx resources to make sure they 
    // don't get freed by main thread while in use
    std::lock_guard<std::mutex> ps_lock(ps_mtx);
    if(!mic || !ps)
        return false;

    int samples_read = ad_read(mic, buf, SPLBUFSIZE);
    if (samples_read <= 0) {
        spError("failed to read audio :(");
        return false;
    }

    ps_process_raw(ps, buf, samples_read, FALSE, FALSE);
    bool talking = ps_get_in_speech(ps);

    // Just started talking
    if (talking && !uttered) {
        uttered = true;
        return false;
    }

    // Stopped talking, so transcribe what was said
    // and begin the next utterance
    if (!talking && uttered) {
        ps_end_utt(ps);
        const char *trans = ps_get_hyp(ps, NULL);

        if (ps_start_utt(ps) < 0) {
            spError("failed to start utterance :(");
        }
        uttered = false;

        int l = strlen(trans);
        if (trans && l > 0) {
            std::lock_guard<std::mutex> lock(words_mtx);
            if (words && l + 1 > words_buf_size) {
                delete words;
                words = NULL;
            }
            if (!words) {
                words = new char[l + 1];
                words_buf_size = l + 1;
            }

            std::copy(trans, trans + l, words);
            words[l] = '\0';
            
            return true;
        }
    }

    return false;
}
bool GqAndroidSphinx::end_recognize_from_mic() {
	m_precord->stop_record();
	pthread_mutex_lock(&m_pt_mutex);
	int rv = ps_end_utt(m_pdecoder);
	pthread_mutex_unlock(&m_pt_mutex);
	m_fout.close();
	if (rv < 0)
		return false;
	return true;
}
 ReturnType Recognizer::stop() {
   if ((decoder == NULL) || (!is_recording)) return BAD_STATE;
   if (ps_end_utt(decoder) < 0) {
     return RUNTIME_ERROR;
   }
   const char* h = ps_get_hyp(decoder, &score, &sentence_id);
   current_hyp = (h == NULL) ? "" : h;
   is_recording = false;
   return SUCCESS;
 }
int decode()
{
    int ret;
    int16 buf[BUFFER_SIZE];
    
    printf("Listening for input...\n");
    
    if (ad_start_rec(ad) < 0) {
        printf("Error starting recording.\n");
        return 0;
    }
    
	//check if not silent
	while ((ret = cont_ad_read(c_ad, buf, BUFFER_SIZE)) == 0)
        usleep(1000);
	
    if (ps_start_utt(ps, NULL) < 0) {
        printf("Failed to start utterance.\n");
        return 0;
    }
	
	ret = ps_process_raw(ps, buf, BUFFER_SIZE, 0, 0);
    if (ret < 0) {
        printf("Error decoding.\n");
        return 0;
    }
    
    do
    {
        ret = cont_ad_read(c_ad, buf, BUFFER_SIZE);

        if (ret < 0) {
            printf("Failed to record audio.\n");
            return 0;
        } else if(ret > 0) {
            // Valid speech data read.
            ret = ps_process_raw(ps, buf, 4096, 0, 0);
            if (ret < 0) {
                printf("Error decoding.\n");
                return 0;
            }
        } else {
            //no data
            usleep(1000);
        }
    } while(getRun());
    
    ad_stop_rec(ad);
    while (ad_read(ad, buf, BUFFER_SIZE) >= 0);
    cont_ad_reset(c_ad);

    ps_end_utt(ps);
    return 1;
}
Пример #13
0
int
ps_decode_senscr(ps_decoder_t *ps, FILE *senfh)
{
    int nfr, n_searchfr;

    ps_start_utt(ps);
    n_searchfr = 0;
    acmod_set_insenfh(ps->acmod, senfh);
    while ((nfr = acmod_read_scores(ps->acmod)) > 0) {
        if ((nfr = ps_search_forward(ps)) < 0) {
            ps_end_utt(ps);
            return nfr;
        }
        n_searchfr += nfr;
    }
    ps_end_utt(ps);
    acmod_set_insenfh(ps->acmod, NULL);

    return n_searchfr;
}
Пример #14
0
int
main(int argc, char *argv[])
{
    ps_decoder_t *ps;
    cmd_ln_t *config;
    FILE *fh;
    char const *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    config = cmd_ln_init(NULL, ps_args(), TRUE,
                 "-hmm", MODELDIR "/en-us/en-us",
                 "-keyphrase", "marieta",
                 "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
                 "-kws_threshold", "1e-30",
                 NULL);
    if (config == NULL) {
        fprintf(stderr, "Failed to create config object, see log for details\n");
        return -1;
    }
    
    ps = ps_init(config);
    if (ps == NULL) {
        fprintf(stderr, "Failed to create recognizer, see log for details\n");
        return -1;
    }

    fh = fopen("data/marieta.raw", "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file goforward.raw\n");
        return -1;
    }

    rv = ps_start_utt(ps);
    
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: %s\n", hyp);

    fclose(fh);
    ps_free(ps);
    cmd_ln_free_r(config);
    
    return 0;
}
Пример #15
0
void Recognizer::run()
{
    // Create audio converter:
    auto converter = ci::audio::dsp::Converter::create( mMonitorNode->getSampleRate(), 16000, mMonitorNode->getNumChannels(), 1, mMonitorNode->getFramesPerBlock() );
    // Create buffer for converted audio:
    ci::audio::Buffer destBuffer( converter->getDestMaxFramesPerBlock(), converter->getDestNumChannels() );

    bool utt_started, in_speech;

    if( ps_start_utt( mDecoder ) < 0 )
        throw std::runtime_error( "Could not start utterance" );

    utt_started = false;

    while( ! mStop ) {
        // Convert buffer:
        std::pair<size_t,size_t> convertResult = converter->convert( &( mMonitorNode->getBuffer() ), &destBuffer );

        // Convert buffer data:
        int16_t* data = new int16_t[ convertResult.second ];
        convertFloatToInt16( destBuffer.getData(), data, convertResult.second );

        // Process buffer:
        ps_process_raw( mDecoder, data, convertResult.second, false, false );

        // Cleanup buffer data:
        delete[] data;

        in_speech = static_cast<bool>( ps_get_in_speech( mDecoder ) );

        if( in_speech && ! utt_started ) {
            utt_started = true;
        }

        if( ! in_speech && utt_started ) {
            // Start new utterance on speech to silence transition:
            ps_end_utt( mDecoder );

            // Pass to handler:
            if( mHandler )
                mHandler->event( mDecoder );

            // Prepare for next utterance:
            if( ps_start_utt( mDecoder ) < 0 )
                throw std::runtime_error( "Could not start utterance" );

            utt_started = false;
        }

        std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
    }
}
Пример #16
0
void utterance_end(context_t *ctx)
{
    decoder_set_t *decset;
    decoder_t *dec;

    if (ctx && (decset = ctx->decset) && (dec = decset->curdec)) {
        if (dec->utter) {
            ps_end_utt(dec->ps);
            dec->utter = false;
            process_utterance(ctx);
        }
    }
}
Пример #17
0
static void gst_sphinx_sink_process_chunk (GstSphinxSink *sphinxsink)
{
	int32 k;
	int16 adbuf[REQUIRED_FRAME_SAMPLES];
	
	k = cont_ad_read (sphinxsink->cont, adbuf, REQUIRED_FRAME_SAMPLES);
	
	if (k == 0 && sphinxsink->last_ts == 0) {
	      return;
	} else if (k == 0 && sphinxsink->cont->read_ts - sphinxsink->last_ts > 
			    DEFAULT_SAMPLES_PER_SEC) {
	      int32 score;
	      const char *hyp;
    	      char *stripped_hyp;
    	      int i, j;
  
	      ps_end_utt (sphinxsink->decoder);
   	      if ((hyp = ps_get_hyp (sphinxsink->decoder, &score, NULL)) == NULL) {
	    	
	    	      gst_sphinx_sink_send_message (sphinxsink, "message", "");
                      g_message("Not Recognized");
	      
	      } else {
	    	      stripped_hyp = 
		           g_malloc (strlen (hyp) + 1);
	    	      for (i=0, j=0; hyp[i] != 0; i++) {
	    	    	    if (hyp[i] != '(' && hyp[i] != ')' && (hyp[i] < '0' || hyp[i] > '9')) {
	    	    		    stripped_hyp[j++] = hyp[i];
	    	    	    }
	    	      }
	    	      stripped_hyp [j] = 0;
	    	      
	    	      gst_sphinx_sink_send_message (sphinxsink, "message", stripped_hyp);
                      g_message("Recognized: %s", stripped_hyp);
	      }

	      sphinxsink->last_ts = 0;
	      sphinxsink->ad.listening = 0;

	} else if (k != 0) {
	     if (sphinxsink->ad.listening == 0) {
	    	    ps_start_utt (sphinxsink->decoder, NULL);
	    	    sphinxsink->ad.listening = 1;
		    gst_sphinx_sink_send_message (sphinxsink, "listening", NULL);
	    }
	
	     ps_process_raw (sphinxsink->decoder, adbuf, k, 0, 0);
	     sphinxsink->last_ts = sphinxsink->cont->read_ts;
	}
}
Пример #18
0
/*! function to pause recognizer */
static switch_status_t pocketsphinx_asr_pause(switch_asr_handle_t *ah)
{
	pocketsphinx_t *ps = (pocketsphinx_t *) ah->private_info;
	switch_status_t status = SWITCH_STATUS_FALSE;

	switch_mutex_lock(ps->flag_mutex);
	if (switch_test_flag(ps, PSFLAG_READY)) {
		ps_end_utt(ps->ps);
		switch_clear_flag(ps, PSFLAG_READY);
		status = SWITCH_STATUS_SUCCESS;
	}
	switch_mutex_unlock(ps->flag_mutex);

	return status;
}
Пример #19
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
                printf("%s\n", hyp);
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}
Пример #20
0
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(AUDIO_DEVICE_NAME,
                          (int) SAMPLE_RATE )) == NULL) {
        E_FATAL("Failed to open audio device\n");
	}
    if (ad_start_rec(ad) < 0) {
        E_FATAL("Failed to start recording\n");
    }
    if (ps_start_utt(ps) < 0) {
        E_FATAL("Failed to start utterance\n");
    }
    utt_started = FALSE;
    printf("READY....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            printf("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL)
                printf("%s\n", hyp);

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            printf("READY....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}
Пример #21
0
int
main(int argc, char *argv[])
{
  ps_decoder_t *ps;
  cmd_ln_t *config;
  FILE *fh;
  char const *hyp, *uttid;
  int16 buf[512];
  int rv;
  int32 score;

  config = cmd_ln_init(NULL, ps_args(), TRUE,
		       "-hmm", MODELDIR "/en-us/en-us",
		       "-lm", MODELDIR "/en-us/en-us.lm.dmp",
		       "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
		       NULL);
  if (config == NULL)
    return 1;
  ps = ps_init(config);
  if (ps == NULL)
    return 1;

  fh = fopen("goforward.raw", "rb");
  if (fh == NULL)
    return -1;
  rv = ps_start_utt(ps);
  if (rv < 0)
    return 1;
  while (!feof(fh)) {
    size_t nsamp;
    nsamp = fread(buf, 2, 512, fh);
    rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
  }
  rv = ps_end_utt(ps);
  if (rv < 0)
    return 1;
  hyp = ps_get_hyp(ps, &score);
  if (hyp == NULL)
    return 1;
  printf("Recognized: %s\n", hyp);

  fclose(fh);
  ps_free(ps);
  cmd_ln_free_r(config);
  return 0;
}
Пример #22
0
/*! function to feed audio to the ASR */
static switch_status_t pocketsphinx_asr_feed(switch_asr_handle_t *ah, void *data, unsigned int len, switch_asr_flag_t *flags)
{
	pocketsphinx_t *ps = (pocketsphinx_t *) ah->private_info;
	int rv = 0;

	if (switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED))
		return SWITCH_STATUS_BREAK;

	if (!switch_test_flag(ps, PSFLAG_HAS_TEXT) && switch_test_flag(ps, PSFLAG_READY)) {
		if (stop_detect(ps, (int16_t *) data, len / 2)) {
			char const *hyp;

			switch_mutex_lock(ps->flag_mutex);
			if ((hyp = ps_get_hyp(ps->ps, &ps->score, &ps->uttid))) {
				if (!zstr(hyp)) {
					ps_end_utt(ps->ps);
					switch_clear_flag(ps, PSFLAG_READY);
					if ((hyp = ps_get_hyp(ps->ps, &ps->score, &ps->uttid))) {
						if (zstr(hyp)) {
							switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Lost the text, never mind....\n");
							ps_start_utt(ps->ps, NULL);
							switch_set_flag(ps, PSFLAG_READY);
						} else {
							ps->hyp = switch_core_strdup(ah->memory_pool, hyp);
							switch_set_flag(ps, PSFLAG_HAS_TEXT);
						}
					}
				}
			}
			switch_mutex_unlock(ps->flag_mutex);
		}

		/* only feed ps_process_raw when we are listening */
		if (ps->listening) {
			switch_mutex_lock(ps->flag_mutex);
			rv = ps_process_raw(ps->ps, (int16 *) data, len / 2, FALSE, FALSE);
			switch_mutex_unlock(ps->flag_mutex);
		}

		if (rv < 0) {
			return SWITCH_STATUS_FALSE;
		}
	}

	return SWITCH_STATUS_SUCCESS;
}
uint32 FSpeechRecognitionWorker::Run() {

	char const *hyp;
	// attempt to open the default recording device
	if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
		(int)cmd_ln_float32_r(config,
		"-samprate"))) == NULL) {
		ClientMessage(FString(TEXT("Failed to open audio device")));
		return 1;
	}
	if (ad_start_rec(ad) < 0) {
		ClientMessage(FString(TEXT("Failed to start recording")));
		return 2;
	}
	if (ps_start_utt(ps) < 0) {
		ClientMessage(FString(TEXT("Failed to start utterance")));
		return 3;
	}

	while (StopTaskCounter.GetValue() == 0) {
		if ((k = ad_read(ad, adbuf, 1024)) < 0)
			ClientMessage(FString(TEXT("Failed to read audio")));
		ps_process_raw(ps, adbuf, k, 0, 0);
		in_speech = ps_get_in_speech(ps);
		if (in_speech && !utt_started) {
			utt_started = 1;
		}
		if (!in_speech && utt_started) {
			/* speech -> silence transition, time to start new utterance  */
			ps_end_utt(ps);
			hyp = ps_get_hyp(ps, NULL);
			if (hyp != NULL)
				Manager->WordSpoken_method(FString(hyp));

			if (ps_start_utt(ps) < 0)
				ClientMessage(FString(TEXT("Failed to start")));
			utt_started = 0;
		}
	}

	ad_close(ad);
	return 0;
}
Пример #24
0
long
ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
              long maxsamps)
{
    int16 *data;
    long total, pos, endpos;

    ps_start_stream(ps);
    ps_start_utt(ps);

    /* If this file is seekable or maxsamps is specified, then decode
     * the whole thing at once. */
    if (maxsamps != -1) {
        data = ckd_calloc(maxsamps, sizeof(*data));
        total = fread(data, sizeof(*data), maxsamps, rawfh);
        ps_process_raw(ps, data, total, FALSE, TRUE);
        ckd_free(data);
    } else if ((pos = ftell(rawfh)) >= 0) {
        fseek(rawfh, 0, SEEK_END);
        endpos = ftell(rawfh);
        fseek(rawfh, pos, SEEK_SET);
        maxsamps = endpos - pos;

        data = ckd_calloc(maxsamps, sizeof(*data));
        total = fread(data, sizeof(*data), maxsamps, rawfh);
        ps_process_raw(ps, data, total, FALSE, TRUE);
        ckd_free(data);
    } else {
        /* Otherwise decode it in a stream. */
        total = 0;
        while (!feof(rawfh)) {
            int16 data[256];
            size_t nread;

            nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
            ps_process_raw(ps, data, nread, FALSE, FALSE);
            total += nread;
        }
    }
    ps_end_utt(ps);
    return total;
}
static void
test_set_search()
{
    cmd_ln_t *config = default_config();
    ps_decoder_t *ps = ps_init(config);
    ps_search_iter_t *itor;

    jsgf_t *jsgf = jsgf_parse_file(DATADIR "/goforward.gram", NULL);
    fsg_model_t *fsg = jsgf_build_fsg(jsgf,
                                      jsgf_get_rule(jsgf, "goforward.move"),
                                      ps->lmath, cmd_ln_int32_r(config, "-lw"));
    TEST_ASSERT(!ps_set_fsg(ps, "goforward", fsg));
    fsg_model_free(fsg);

    TEST_ASSERT(!ps_set_jsgf_file(ps, "goforward_other", DATADIR "/goforward.gram"));

    ngram_model_t *lm = ngram_model_read(config, DATADIR "/tidigits/lm/tidigits.lm.dmp",
                                         NGRAM_AUTO, ps->lmath);
    TEST_ASSERT(!ps_set_lm(ps, "tidigits", lm));
    ngram_model_free(lm);

    TEST_ASSERT(!ps_set_search(ps, "tidigits"));

    TEST_ASSERT(!ps_set_search(ps, "goforward"));
    
    itor = ps_search_iter(ps);
    TEST_EQUAL(0, strcmp("goforward_other", ps_search_iter_val(itor)));
    itor = ps_search_iter_next(itor);
    TEST_EQUAL(0, strcmp("tidigits", ps_search_iter_val(itor)));
    itor = ps_search_iter_next(itor);
    TEST_EQUAL(0, strcmp("goforward", ps_search_iter_val(itor)));
    itor = ps_search_iter_next(itor);
    TEST_EQUAL(0, strcmp("phone_loop", ps_search_iter_val(itor)));
    itor = ps_search_iter_next(itor);
    TEST_EQUAL(NULL, itor);
    
    TEST_ASSERT(!ps_start_utt(ps));
    TEST_ASSERT(!ps_end_utt(ps));
    ps_free(ps);
    cmd_ln_free_r(config);
}
int processCommands()
{
    int32 samples;
    int16 audioBuf[BUFFER_SIZE];
    char const *uttid;
    char const *hyp;
    
    while(run)
    {
        printf("Waiting for utterance...\n");
        samples = waitForNextUtterance();
        if(samples < 0)
            return -1;
        
        if(ps_start_utt(psDecoder, NULL) < 0) {
            fprintf(stderr, "Failed to start next utterance\n");
            return -1;
        }
        ps_process_raw(psDecoder, audioBuf, samples, FALSE, FALSE);
        
        printf("Recording...\n");
        fflush(stdout);
        record();
        
        ad_stop_rec(audioDevice);
        while(ad_read(audioDevice, audioBuf, BUFFER_SIZE) >= 0);
        cont_ad_reset(continousAudoDevice);
        ps_end_utt(psDecoder);
        
        hyp = ps_get_hyp(psDecoder, NULL, &uttid);
        printf("Heard: %s\n", hyp);
        
        if (ad_start_rec(audioDevice) < 0) {
            fprintf(stderr, "Failed to start audio device.\n");
            return -1;
        }
    }
    
    return 0;
}
Пример #27
0
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
    AVFilterContext *ctx = inlink->dst;
    AVDictionary **metadata = &in->metadata;
    ASRContext *s = ctx->priv;
    int have_speech;
    const char *speech;

    ps_process_raw(s->ps, (const int16_t *)in->data[0], in->nb_samples, 0, 0);
    have_speech = ps_get_in_speech(s->ps);
    if (have_speech && !s->utt_started)
        s->utt_started = 1;
    if (!have_speech && s->utt_started) {
        ps_end_utt(s->ps);
        speech = ps_get_hyp(s->ps, NULL);
        if (speech != NULL)
            av_dict_set(metadata, "lavfi.asr.text", speech, 0);
        ps_start_utt(s->ps);
        s->utt_started = 0;
    }

    return ff_filter_frame(ctx->outputs[0], in);
}
Пример #28
0
/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }
    
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
	fread(waveheader, 1, 44, rawfd);
	if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
    	    E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
	E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n");
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
        	printf("%s\n", hyp);
            if (print_times)
        	print_word_times();
            fflush(stdout);

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL) {
    	    printf("%s\n", hyp);
    	    if (print_times) {
    		print_word_times();
	    }
	}
    }
    
    fclose(rawfd);
}
Пример #29
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 * 	   wait for start of next utterance;
 * 	   decode utterance until silence of at least 1 sec observed;
 * 	   print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[4096];
    int32 k, ts, rem;
    char const *hyp;
    char const *uttid;
    cont_ad_t *cont;
    char word[256];
	char c1[256], c2[256];

	int tracking = 0;
	int halted = 0;
	int LEFT = 0;
	int RIGHT = 1;
	int MOVE_CENT = 100; //1 meter
	int numwords;

	setlinebuf(stdout);

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");

    /* Initialize continuous listening module */
    if ((cont = cont_ad_init(ad, ad_read)) == NULL)
        E_FATAL("Failed to initialize voice activity detection\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");
    if (cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");


	printf("LEDON BLUE\n");
    for (;;) {
        /* Indicate listening for next utterance */
        fprintf(stderr, "READY....\n");
        fflush(stderr);

        /* Wait data for next utterance */
        while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
            sleep_msec(100);

        if (k < 0)
            E_FATAL("Failed to read audio\n");

        /*
         * Non-zero amount of data received; start recognition of new utterance.
         * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
         */
        if (ps_start_utt(ps, NULL) < 0)
            E_FATAL("Failed to start utterance\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        fprintf(stderr, "Listening...\n");

        /* Note timestamp for this first block of data */
        ts = cont->read_ts;

        /* Decode utterance until end (marked by a "long" silence, >1sec) */
        for (;;) {
            /* Read non-silence audio data, if any, from continuous listening module */
            if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
                E_FATAL("Failed to read audio\n");
            if (k == 0) {
                /*
                 * No speech data available; check current timestamp with most recent
                 * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                 */
                if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                    break;
            }
            else {
                /* New speech data received; note current timestamp */
                ts = cont->read_ts;
            }

            /*
             * Decode whatever data was read above.
             */
            rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);

            /* If no work to be done, sleep a bit */
            if ((rem == 0) && (k == 0))
                sleep_msec(20);
        }

        /*
         * Utterance ended; flush any accumulated, unprocessed A/D data and stop
         * listening until current utterance completely decoded
         */
        ad_stop_rec(ad);
        while (ad_read(ad, adbuf, 4096) >= 0);
        cont_ad_reset(cont);

        fprintf(stderr, "Stopped listening, please wait...\n");
        fflush(stdout);
        /* Finish decoding, obtain and print result */
        ps_end_utt(ps);
        hyp = ps_get_hyp(ps, NULL, &uttid);
        fprintf(stderr, "%s: %s\n", uttid, hyp);

        /* Exit if the first word spoken was GOODBYE */
        if (hyp) {
			numwords = sscanf(hyp, "%s %s %s", word, c1, c2);
			if(strcmp(word, "GUGGUG") == 0) {
				if(strcmp(c1, "HALT") == 0) {
					printf("LEDOFF BLUE\n");
					halted = 1;
				} else if(strcmp(c1, "RESUME") == 0) {
					printf("LEDON BLUE\n");
					halted = 0;
				}
				if(strcmp(c1, "BEGIN") == 0 || strcmp(c1, "START") == 0) {
					if(strcmp(c2, "TRACKING") == 0 && !tracking) {
						printf("START TRACKING\n");
						tracking = 1;
						halted = 0;
					}
				} else if(strcmp(c1, "STOP") == 0) {
					if(strcmp(c2, "TRACKING") == 0 && tracking) {
						printf("STOP TRACKING\n");
						tracking = 0;
					}
				}
				if(!tracking && !halted && numwords == 3) {
					if(strcmp(c1, "TURN") == 0) {
						if(strcmp(c2, "AROUND") == 0) {
							printf("TURN %d 180\n", LEFT);
						} else if(strcmp(c2, "LEFT") == 0) {
							printf("TURN %d 90\n", LEFT);
						} else if(strcmp(c2, "RIGHT") == 0) {
							printf("TURN %d 90\n", RIGHT);
						}
					} else if(strcmp(c1, "MOVE") == 0) {
						if(strcmp(c2, "FORWARD") == 0) {
							printf("MOVE 0 %d\n", MOVE_CENT);
						} else if(strcmp(c2, "BACKWARD") == 0) {
							printf("MOVE 1 %d\n", MOVE_CENT);
						}
					}
				}
			}
        }

        /* Resume A/D recording for next utterance */
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");
    }

    cont_ad_close(cont);
    ad_close(ad);
}
Пример #30
0
/*
 * Continuous recognition from a file
 */
static void
recognize_from_file() {
    cont_ad_t *cont;
    ad_rec_t file_ad = {0};
    int16 adbuf[4096];
    const char* hyp;
    const char* uttid;
    int32 k, ts, start;

    char waveheader[44];
    if ((rawfd = fopen(cmd_ln_str_r(config, "-infile"), "rb")) == NULL) {
	E_FATAL_SYSTEM("Failed to open file '%s' for reading",
			cmd_ln_str_r(config, "-infile"));
    }

    fread(waveheader, 1, 44, rawfd);

    file_ad.sps = (int32)cmd_ln_float32_r(config, "-samprate");
    file_ad.bps = sizeof(int16);

    if ((cont = cont_ad_init(&file_ad, ad_file_read)) == NULL) {
        E_FATAL("Failed to initialize voice activity detection");
    }
    if (cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");
    rewind (rawfd);

    for (;;) {

	while ((k = cont_ad_read(cont, adbuf, 4096)) == 0);

        if (k < 0) {
    	    break;
    	}

        if (ps_start_utt(ps, NULL) < 0)
            E_FATAL("ps_start_utt() failed\n");

        ps_process_raw(ps, adbuf, k, FALSE, FALSE);

        ts = cont->read_ts;
        start = ((ts - k) * 100.0) / file_ad.sps;

        for (;;) {
            if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
            	break;

            if (k == 0) {
                /*
                 * No speech data available; check current timestamp with most recent
                 * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                 */
                if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                    break;
            }
            else {
                /* New speech data received; note current timestamp */
                ts = cont->read_ts;
            }


            ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        }

        ps_end_utt(ps);

        if (cmd_ln_boolean_r(config, "-time")) {
	    print_word_times(start);
	} else {
	    hyp = ps_get_hyp(ps, NULL, &uttid);
            fprintf(stderr, "%s: %s\n", uttid, hyp);
        }
        fflush(stdout);
    }

    cont_ad_close(cont);
    fclose(rawfd);
}