int decode()
{
    int ret;
    int16 buf[BUFFER_SIZE];
    
    printf("Listening for input...\n");
    
    if (ad_start_rec(ad) < 0) {
        printf("Error starting recording.\n");
        return 0;
    }
    
	//check if not silent
	while ((ret = cont_ad_read(c_ad, buf, BUFFER_SIZE)) == 0)
        usleep(1000);
	
    if (ps_start_utt(ps, NULL) < 0) {
        printf("Failed to start utterance.\n");
        return 0;
    }
	
	ret = ps_process_raw(ps, buf, BUFFER_SIZE, 0, 0);
    if (ret < 0) {
        printf("Error decoding.\n");
        return 0;
    }
    
    do
    {
        ret = cont_ad_read(c_ad, buf, BUFFER_SIZE);

        if (ret < 0) {
            printf("Failed to record audio.\n");
            return 0;
        } else if(ret > 0) {
            // Valid speech data read.
            ret = ps_process_raw(ps, buf, 4096, 0, 0);
            if (ret < 0) {
                printf("Error decoding.\n");
                return 0;
            }
        } else {
            //no data
            usleep(1000);
        }
    } while(getRun());
    
    ad_stop_rec(ad);
    while (ad_read(ad, buf, BUFFER_SIZE) >= 0);
    cont_ad_reset(c_ad);

    ps_end_utt(ps);
    return 1;
}
Ejemplo n.º 2
0
std::string PocketSphinxServer::decode(const PocketSphinxIce::sample& signal, const Ice::Current& c)
{
	std::cout << "Decode\n";
	int rv;
	const char* hyp;
	const char* uttid;
	int32 score;

	rv = ps_start_utt(ps);
	if (rv < 0)
		throw PocketSphinxIce::Error("Error in ps_start_utt");

	ps_process_raw(ps, signal.data(), signal.size(), FALSE, FALSE);

	rv = ps_end_utt(ps);
	if (rv < 0)
		throw PocketSphinxIce::Error("Error in ps_end_utt");

	hyp = ps_get_hyp(ps, &score);
	if (!hyp)
		throw PocketSphinxIce::Error("ps_get_hyp returned NULL");

	std::cout << "return:" << hyp << '\n';
	return hyp;
}
Ejemplo n.º 3
0
int processRaw(const char *rawFile) {
    const char *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    // Open the wav file passed from argument
    printf("file: %s\n", rawFile);
    fh = fopen(rawFile, "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file %s\n", rawFile);
        return -1;
    }

    // Start utterance
    rv = ps_start_utt(ps);
    
    // Process buffer, 512 samples at a time
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    // Recieve the recognized string
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: |%s|\n", hyp);
    fflush(stdout);

    // Close file
    fclose(fh);

    return 0;
}
Ejemplo n.º 4
0
int
main(int argc, char *argv[])
{
	ps_decoder_t *ps;
	cmd_ln_t *config;
	FILE *fh;
	char const *hyp, *uttid;
	int16 buf[512];
	int rv;
	int32 score;
	//int i;

	config = cmd_ln_init(NULL, ps_args(), TRUE,
		"-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k",
		"-lm", MODELDIR "/lm/en/turtle.DMP",
		"-dict", MODELDIR "/lm/en/turtle.dic",
		NULL);
	if (config == NULL)
		return 1;
	ps = ps_init(config);
	if (ps == NULL)
		return 1;

	fh = fopen("goforward.raw", "rb");
	if (fh == NULL) {
		perror("Failed to open goforward.raw");
		return 1;
	}

	rv = ps_decode_raw(ps, fh, "goforward", -1);
	if (rv < 0)
		return 1;
	hyp = ps_get_hyp(ps, &score, &uttid);
	if (hyp == NULL)
		return 1;
	printf("Recognized: %s\n", hyp);

	fseek(fh, 0, SEEK_SET);
	rv = ps_start_utt(ps, "goforward");
	if (rv < 0)
		return 1;
	while (!feof(fh)) {
		size_t nsamp;
		nsamp = fread(buf, 2, 512, fh);
		rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
	}
	rv = ps_end_utt(ps);
	if (rv < 0)
		return 1;
	hyp = ps_get_hyp(ps, &score, &uttid);
	if (hyp == NULL)
		return 1;
	printf("Recognized: %s\n", hyp);

	fclose(fh);
	ps_free(ps);
	return 0;
}
Ejemplo n.º 5
0
int main(int argc, char *argv[]) {
    ps_decoder_t *ps;
    cmd_ln_t *config;
    FILE *fh;
    char const *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    config = cmd_ln_init(NULL, ps_args(), TRUE,
            "-hmm",    MODELDIR "/en-us/en-us",
            "-lm",     MODELDIR "/en-us/en-us.lm.bin",
            "-dict",   MODELDIR "/en-us/cmudict-en-us.dict",
            NULL);

    if (config == NULL) {
        fprintf(stderr, "Failed to create config object, see log for details\n");
        return -1;
    }
    
    // Initialize pocketsphinx
    ps = ps_init(config);
    if (ps == NULL) {
        fprintf(stderr, "Failed to create recognizer, see log for details\n");
        return -1;
    }

    // Open the wav file passed from argument
    printf("file: %s\n", argv[1]);
    fh = fopen(argv[1], "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file %s\n", argv[1]);
        return -1;
    }

    // Start utterance
    rv = ps_start_utt(ps);
    
    // Process buffer, 512 samples at a time
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    // Recieve the recognized string
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: |%s|\n", hyp);

    // free memory
    fclose(fh);
    ps_free(ps);
    cmd_ln_free_r(config);
    
    return 0;
}
 ReturnType Recognizer::process(const std::vector<int16_t>& buffer) {
   if ((decoder == NULL) || (!is_recording)) return BAD_STATE;
   if (buffer.size() == 0)
     return RUNTIME_ERROR;
   ps_process_raw(decoder, (short int *) &buffer[0], buffer.size(), 0, 0);
   const char* h = ps_get_hyp(decoder, &score, &sentence_id);
   current_hyp = (h == NULL) ? "" : h;
   return SUCCESS;
 }
/**
 * You must sucessfully call spInitListener
 * once before using this function.
 *
 * Reads the next block of audio from the microphone
 * up to SPLBUFSIZE number of samples
 * (defined in splistener.h).
 * If an utterance was completed in that block,
 * the transcription is stored in the string words.
 *
 * When calling this function in a realtime loop, delay
 * by some amount of time between calls keeping in mind
 * your recording's sample rate and maximum samples read
 * per call (ex. sleep the thread for 100 milliseconds)
 * so that some audio can be recorded for the next call.
 *
 * @return true if a speech session was completed and
 *         transcribed this block, otherwise false.
 */
static bool spDecode() {
    static bool uttered = false;

    // lock pocketsphinx resources to make sure they 
    // don't get freed by main thread while in use
    std::lock_guard<std::mutex> ps_lock(ps_mtx);
    if(!mic || !ps)
        return false;

    int samples_read = ad_read(mic, buf, SPLBUFSIZE);
    if (samples_read <= 0) {
        spError("failed to read audio :(");
        return false;
    }

    ps_process_raw(ps, buf, samples_read, FALSE, FALSE);
    bool talking = ps_get_in_speech(ps);

    // Just started talking
    if (talking && !uttered) {
        uttered = true;
        return false;
    }

    // Stopped talking, so transcribe what was said
    // and begin the next utterance
    if (!talking && uttered) {
        ps_end_utt(ps);
        const char *trans = ps_get_hyp(ps, NULL);

        if (ps_start_utt(ps) < 0) {
            spError("failed to start utterance :(");
        }
        uttered = false;

        int l = strlen(trans);
        if (trans && l > 0) {
            std::lock_guard<std::mutex> lock(words_mtx);
            if (words && l + 1 > words_buf_size) {
                delete words;
                words = NULL;
            }
            if (!words) {
                words = new char[l + 1];
                words_buf_size = l + 1;
            }

            std::copy(trans, trans + l, words);
            words[l] = '\0';
            
            return true;
        }
    }

    return false;
}
Ejemplo n.º 8
0
long
ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
              long maxsamps)
{
    int16 *data;
    long total, pos, endpos;

    ps_start_stream(ps);
    ps_start_utt(ps);

    /* If this file is seekable or maxsamps is specified, then decode
     * the whole thing at once. */
    if (maxsamps != -1) {
        data = ckd_calloc(maxsamps, sizeof(*data));
        total = fread(data, sizeof(*data), maxsamps, rawfh);
        ps_process_raw(ps, data, total, FALSE, TRUE);
        ckd_free(data);
    } else if ((pos = ftell(rawfh)) >= 0) {
        fseek(rawfh, 0, SEEK_END);
        endpos = ftell(rawfh);
        fseek(rawfh, pos, SEEK_SET);
        maxsamps = endpos - pos;

        data = ckd_calloc(maxsamps, sizeof(*data));
        total = fread(data, sizeof(*data), maxsamps, rawfh);
        ps_process_raw(ps, data, total, FALSE, TRUE);
        ckd_free(data);
    } else {
        /* Otherwise decode it in a stream. */
        total = 0;
        while (!feof(rawfh)) {
            int16 data[256];
            size_t nread;

            nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
            ps_process_raw(ps, data, nread, FALSE, FALSE);
            total += nread;
        }
    }
    ps_end_utt(ps);
    return total;
}
void GqAndroidSphinx::received_buf_from_recorder(short *record_buf,
		unsigned long buf_size_in_byte) {
	m_fout.write((char *) record_buf, buf_size_in_byte);

	pthread_mutex_lock(&m_pt_mutex);
	LOGD("received_buf_from_recorder before");
	ps_process_raw(m_pdecoder, record_buf, buf_size_in_byte / sizeof(int16),
			TRUE, FALSE);
	LOGD("received_buf_from_recorder end");
	pthread_mutex_unlock(&m_pt_mutex);

}
Ejemplo n.º 10
0
int
main(int argc, char *argv[])
{
    ps_decoder_t *ps;
    cmd_ln_t *config;
    FILE *fh;
    char const *hyp, *uttid;
    int16 buf[512];
    int rv;
    int32 score;

    config = cmd_ln_init(NULL, ps_args(), TRUE,
                 "-hmm", MODELDIR "/en-us/en-us",
                 "-keyphrase", "marieta",
                 "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
                 "-kws_threshold", "1e-30",
                 NULL);
    if (config == NULL) {
        fprintf(stderr, "Failed to create config object, see log for details\n");
        return -1;
    }
    
    ps = ps_init(config);
    if (ps == NULL) {
        fprintf(stderr, "Failed to create recognizer, see log for details\n");
        return -1;
    }

    fh = fopen("data/marieta.raw", "rb");
    if (fh == NULL) {
        fprintf(stderr, "Unable to open input file goforward.raw\n");
        return -1;
    }

    rv = ps_start_utt(ps);
    
    while (!feof(fh)) {
        size_t nsamp;
        nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    rv = ps_end_utt(ps);
    hyp = ps_get_hyp(ps, &score);
    printf("Recognized: %s\n", hyp);

    fclose(fh);
    ps_free(ps);
    cmd_ln_free_r(config);
    
    return 0;
}
Ejemplo n.º 11
0
void Recognizer::run()
{
    // Create audio converter:
    auto converter = ci::audio::dsp::Converter::create( mMonitorNode->getSampleRate(), 16000, mMonitorNode->getNumChannels(), 1, mMonitorNode->getFramesPerBlock() );
    // Create buffer for converted audio:
    ci::audio::Buffer destBuffer( converter->getDestMaxFramesPerBlock(), converter->getDestNumChannels() );

    bool utt_started, in_speech;

    if( ps_start_utt( mDecoder ) < 0 )
        throw std::runtime_error( "Could not start utterance" );

    utt_started = false;

    while( ! mStop ) {
        // Convert buffer:
        std::pair<size_t,size_t> convertResult = converter->convert( &( mMonitorNode->getBuffer() ), &destBuffer );

        // Convert buffer data:
        int16_t* data = new int16_t[ convertResult.second ];
        convertFloatToInt16( destBuffer.getData(), data, convertResult.second );

        // Process buffer:
        ps_process_raw( mDecoder, data, convertResult.second, false, false );

        // Cleanup buffer data:
        delete[] data;

        in_speech = static_cast<bool>( ps_get_in_speech( mDecoder ) );

        if( in_speech && ! utt_started ) {
            utt_started = true;
        }

        if( ! in_speech && utt_started ) {
            // Start new utterance on speech to silence transition:
            ps_end_utt( mDecoder );

            // Pass to handler:
            if( mHandler )
                mHandler->event( mDecoder );

            // Prepare for next utterance:
            if( ps_start_utt( mDecoder ) < 0 )
                throw std::runtime_error( "Could not start utterance" );

            utt_started = false;
        }

        std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
    }
}
Ejemplo n.º 12
0
static void gst_sphinx_sink_process_chunk (GstSphinxSink *sphinxsink)
{
	int32 k;
	int16 adbuf[REQUIRED_FRAME_SAMPLES];
	
	k = cont_ad_read (sphinxsink->cont, adbuf, REQUIRED_FRAME_SAMPLES);
	
	if (k == 0 && sphinxsink->last_ts == 0) {
	      return;
	} else if (k == 0 && sphinxsink->cont->read_ts - sphinxsink->last_ts > 
			    DEFAULT_SAMPLES_PER_SEC) {
	      int32 score;
	      const char *hyp;
    	      char *stripped_hyp;
    	      int i, j;
  
	      ps_end_utt (sphinxsink->decoder);
   	      if ((hyp = ps_get_hyp (sphinxsink->decoder, &score, NULL)) == NULL) {
	    	
	    	      gst_sphinx_sink_send_message (sphinxsink, "message", "");
                      g_message("Not Recognized");
	      
	      } else {
	    	      stripped_hyp = 
		           g_malloc (strlen (hyp) + 1);
	    	      for (i=0, j=0; hyp[i] != 0; i++) {
	    	    	    if (hyp[i] != '(' && hyp[i] != ')' && (hyp[i] < '0' || hyp[i] > '9')) {
	    	    		    stripped_hyp[j++] = hyp[i];
	    	    	    }
	    	      }
	    	      stripped_hyp [j] = 0;
	    	      
	    	      gst_sphinx_sink_send_message (sphinxsink, "message", stripped_hyp);
                      g_message("Recognized: %s", stripped_hyp);
	      }

	      sphinxsink->last_ts = 0;
	      sphinxsink->ad.listening = 0;

	} else if (k != 0) {
	     if (sphinxsink->ad.listening == 0) {
	    	    ps_start_utt (sphinxsink->decoder, NULL);
	    	    sphinxsink->ad.listening = 1;
		    gst_sphinx_sink_send_message (sphinxsink, "listening", NULL);
	    }
	
	     ps_process_raw (sphinxsink->decoder, adbuf, k, 0, 0);
	     sphinxsink->last_ts = sphinxsink->cont->read_ts;
	}
}
Ejemplo n.º 13
0
/*
 * Continuous recognition from mic
 */
int
recognize_from_mic()
{
	ad_rec_t *ad;
    int16 adbuf[2048];
    const char *fname;
	const char* seg;
    int32 k;
	char str[1000]="";
    uint8 utt_started, in_speech;
	
    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),16000)) == NULL)
		perror("Failed to open audio device\n");
	if (ad_start_rec(ad) < 0)
		perror("Failed to start recording\n");
    
    ps_start_utt(ps);
    utt_started = FALSE;
	ps_seg_t *psegt;
    while (!finished) {
		if ((k = ad_read(ad, adbuf, 2048)) < 0)
			perror("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
			psegt = ps_seg_iter(ps, NULL);
			while (psegt!=NULL){
				seg = ps_seg_word(psegt);
				strncpy_s( str, seg, strlen(seg));
				listenCallback(str);
				printf("%s\n", seg);
				int prob = ps_seg_prob(psegt,NULL,NULL,NULL);
				printf("%d\n", prob);
				psegt = ps_seg_next(psegt);
			}
            ps_start_utt(ps);
            utt_started = FALSE;
        }
		Sleep(100);
    }
	
    ps_end_utt(ps);
    fclose(rawfd);
	return 0;
}
Ejemplo n.º 14
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
                printf("%s\n", hyp);
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}
Ejemplo n.º 15
0
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(AUDIO_DEVICE_NAME,
                          (int) SAMPLE_RATE )) == NULL) {
        E_FATAL("Failed to open audio device\n");
	}
    if (ad_start_rec(ad) < 0) {
        E_FATAL("Failed to start recording\n");
    }
    if (ps_start_utt(ps) < 0) {
        E_FATAL("Failed to start utterance\n");
    }
    utt_started = FALSE;
    printf("READY....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            printf("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL)
                printf("%s\n", hyp);

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            printf("READY....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}
Ejemplo n.º 16
0
/*
 * Continuous recognition from a file
 */
int
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
	char str[1000]="";
    uint8 utt_started, in_speech;
	
    fname = "C:/Users/Reza/Documents/GitHub/speech_agent/presentation_samples/italy1_reza.wav"; 
    rawfd = fopen(fname, "rb");
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
		fread(waveheader, 1, 44, rawfd);
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;
	
    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
		 

        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);

            if (hyp != NULL){
				strncpy_s( str, hyp, strlen(hyp));
				printf("%s\n", hyp);
				listenCallback(str);
			}
            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
	
    ps_end_utt(ps);
    fclose(rawfd);
	return 0;
	
}
Ejemplo n.º 17
0
int
main(int argc, char *argv[])
{
  ps_decoder_t *ps;
  cmd_ln_t *config;
  FILE *fh;
  char const *hyp, *uttid;
  int16 buf[512];
  int rv;
  int32 score;

  config = cmd_ln_init(NULL, ps_args(), TRUE,
		       "-hmm", MODELDIR "/en-us/en-us",
		       "-lm", MODELDIR "/en-us/en-us.lm.dmp",
		       "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
		       NULL);
  if (config == NULL)
    return 1;
  ps = ps_init(config);
  if (ps == NULL)
    return 1;

  fh = fopen("goforward.raw", "rb");
  if (fh == NULL)
    return -1;
  rv = ps_start_utt(ps);
  if (rv < 0)
    return 1;
  while (!feof(fh)) {
    size_t nsamp;
    nsamp = fread(buf, 2, 512, fh);
    rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
  }
  rv = ps_end_utt(ps);
  if (rv < 0)
    return 1;
  hyp = ps_get_hyp(ps, &score);
  if (hyp == NULL)
    return 1;
  printf("Recognized: %s\n", hyp);

  fclose(fh);
  ps_free(ps);
  cmd_ln_free_r(config);
  return 0;
}
Ejemplo n.º 18
0
/*! function to feed audio to the ASR */
static switch_status_t pocketsphinx_asr_feed(switch_asr_handle_t *ah, void *data, unsigned int len, switch_asr_flag_t *flags)
{
	pocketsphinx_t *ps = (pocketsphinx_t *) ah->private_info;
	int rv = 0;

	if (switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED))
		return SWITCH_STATUS_BREAK;

	if (!switch_test_flag(ps, PSFLAG_HAS_TEXT) && switch_test_flag(ps, PSFLAG_READY)) {
		if (stop_detect(ps, (int16_t *) data, len / 2)) {
			char const *hyp;

			switch_mutex_lock(ps->flag_mutex);
			if ((hyp = ps_get_hyp(ps->ps, &ps->score, &ps->uttid))) {
				if (!zstr(hyp)) {
					ps_end_utt(ps->ps);
					switch_clear_flag(ps, PSFLAG_READY);
					if ((hyp = ps_get_hyp(ps->ps, &ps->score, &ps->uttid))) {
						if (zstr(hyp)) {
							switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Lost the text, never mind....\n");
							ps_start_utt(ps->ps, NULL);
							switch_set_flag(ps, PSFLAG_READY);
						} else {
							ps->hyp = switch_core_strdup(ah->memory_pool, hyp);
							switch_set_flag(ps, PSFLAG_HAS_TEXT);
						}
					}
				}
			}
			switch_mutex_unlock(ps->flag_mutex);
		}

		/* only feed ps_process_raw when we are listening */
		if (ps->listening) {
			switch_mutex_lock(ps->flag_mutex);
			rv = ps_process_raw(ps->ps, (int16 *) data, len / 2, FALSE, FALSE);
			switch_mutex_unlock(ps->flag_mutex);
		}

		if (rv < 0) {
			return SWITCH_STATUS_FALSE;
		}
	}

	return SWITCH_STATUS_SUCCESS;
}
Ejemplo n.º 19
0
uint32 FSpeechRecognitionWorker::Run() {

	char const *hyp;
	// attempt to open the default recording device
	if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
		(int)cmd_ln_float32_r(config,
		"-samprate"))) == NULL) {
		ClientMessage(FString(TEXT("Failed to open audio device")));
		return 1;
	}
	if (ad_start_rec(ad) < 0) {
		ClientMessage(FString(TEXT("Failed to start recording")));
		return 2;
	}
	if (ps_start_utt(ps) < 0) {
		ClientMessage(FString(TEXT("Failed to start utterance")));
		return 3;
	}

	while (StopTaskCounter.GetValue() == 0) {
		if ((k = ad_read(ad, adbuf, 1024)) < 0)
			ClientMessage(FString(TEXT("Failed to read audio")));
		ps_process_raw(ps, adbuf, k, 0, 0);
		in_speech = ps_get_in_speech(ps);
		if (in_speech && !utt_started) {
			utt_started = 1;
		}
		if (!in_speech && utt_started) {
			/* speech -> silence transition, time to start new utterance  */
			ps_end_utt(ps);
			hyp = ps_get_hyp(ps, NULL);
			if (hyp != NULL)
				Manager->WordSpoken_method(FString(hyp));

			if (ps_start_utt(ps) < 0)
				ClientMessage(FString(TEXT("Failed to start")));
			utt_started = 0;
		}
	}

	ad_close(ad);
	return 0;
}
static GstFlowReturn
gst_pocketsphinx_chain(GstPad * pad, GstBuffer * buffer)
{
    GstPocketSphinx *ps;

    ps = GST_POCKETSPHINX(GST_OBJECT_PARENT(pad));

    /* Start an utterance for the first buffer we get (i.e. we assume
     * that the VADER is "leaky") */
    if (!ps->listening) {
        ps->listening = TRUE;
        ps_start_utt(ps->ps, NULL);
    }
    ps_process_raw(ps->ps,
                   (short *)GST_BUFFER_DATA(buffer),
                   GST_BUFFER_SIZE(buffer) / sizeof(short),
                   FALSE, FALSE);

    /* Get a partial result every now and then, see if it is different. */
    if (ps->last_result_time == 0
        /* Check every 100 milliseconds. */
        || (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) {
        int32 score;
        char const *hyp;
        char const *uttid;

        hyp = ps_get_hyp(ps->ps, &score, &uttid);
        ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer);
        if (hyp && strlen(hyp) > 0) {
            if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) {
                g_free(ps->last_result);
                ps->last_result = g_strdup(hyp);
                /* Emit a signal for applications. */
                g_signal_emit(ps, gst_pocketsphinx_signals[SIGNAL_PARTIAL_RESULT],
                              0, hyp, uttid);
            }
        }
    }
    gst_buffer_unref(buffer);
    return GST_FLOW_OK;
}
int processCommands()
{
    int32 samples;
    int16 audioBuf[BUFFER_SIZE];
    char const *uttid;
    char const *hyp;
    
    while(run)
    {
        printf("Waiting for utterance...\n");
        samples = waitForNextUtterance();
        if(samples < 0)
            return -1;
        
        if(ps_start_utt(psDecoder, NULL) < 0) {
            fprintf(stderr, "Failed to start next utterance\n");
            return -1;
        }
        ps_process_raw(psDecoder, audioBuf, samples, FALSE, FALSE);
        
        printf("Recording...\n");
        fflush(stdout);
        record();
        
        ad_stop_rec(audioDevice);
        while(ad_read(audioDevice, audioBuf, BUFFER_SIZE) >= 0);
        cont_ad_reset(continousAudoDevice);
        ps_end_utt(psDecoder);
        
        hyp = ps_get_hyp(psDecoder, NULL, &uttid);
        printf("Heard: %s\n", hyp);
        
        if (ad_start_rec(audioDevice) < 0) {
            fprintf(stderr, "Failed to start audio device.\n");
            return -1;
        }
    }
    
    return 0;
}
Ejemplo n.º 22
0
int ofApp::engineOpen(string filename)
{
    FILE *fh;
    char const *uttid;
    int16 buf[512];
    int rv; int32 score;

    fh = fopen((directoryString + filename).c_str(), "rb");
    if (fh == NULL)
    {
        return -1;
    }
    rv = ps_start_utt(ps);
    if (rv < 0) return 1;
    while (!feof(fh))
    {
        size_t nsamp; nsamp = fread(buf, 2, 512, fh);
        rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
    }
    
    engineClose();
}
void record()
{
    int32 samples, timeStamp, rem;
    int16 audioBuf[BUFFER_SIZE];
    
    timeStamp = continousAudoDevice->read_ts;
    while(run)
    {
        samples = cont_ad_read(continousAudoDevice, audioBuf, BUFFER_SIZE);
        if (samples == 0) {
            if ((continousAudoDevice->read_ts - timeStamp) > DEFAULT_SAMPLES_PER_SEC)
                break;
        } else {
            timeStamp = continousAudoDevice->read_ts;
        }
        
        rem = ps_process_raw(psDecoder, audioBuf, samples, FALSE, FALSE);
        
        if ((rem == 0) && (samples == 0))
            usleep(20000);
    }
}
Ejemplo n.º 24
0
void filter_buffer_utter(context_t *ctx, bool full_utterance)
{
    decoder_set_t *decset;
    decoder_t *dec;
    filter_buf_t *filtbuf;
    int sts, cnt, size;

    if (!ctx || !(decset = ctx->decset) || !(dec = decset->curdec) ||
        !(filtbuf = ctx->filtbuf))
        return;

    mrp_debug("utterance length %d samples", filtbuf->len);

    if (filtbuf->len > 0) {
        if (filtbuf->fdrec >= 0) {
            size = filtbuf->len * sizeof(int16);

            for (;;) {
                cnt = write(filtbuf->fdrec, filtbuf->buf, size);

                if (cnt != size) {
                    if (cnt < 0 && errno == EINTR)
                        continue;

                    mrp_log_error("failed to record samples (fd %d): %s",
                                  filtbuf->fdrec, strerror(errno));
                }

                break;
            }
        }

        sts = ps_process_raw(dec->ps, filtbuf->buf, filtbuf->len,
                             FALSE, full_utterance);
        if (sts < 0)
            mrp_log_error("Failed to process %d samples", filtbuf->len);
    }
}
Ejemplo n.º 25
0
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
    AVFilterContext *ctx = inlink->dst;
    AVDictionary **metadata = &in->metadata;
    ASRContext *s = ctx->priv;
    int have_speech;
    const char *speech;

    ps_process_raw(s->ps, (const int16_t *)in->data[0], in->nb_samples, 0, 0);
    have_speech = ps_get_in_speech(s->ps);
    if (have_speech && !s->utt_started)
        s->utt_started = 1;
    if (!have_speech && s->utt_started) {
        ps_end_utt(s->ps);
        speech = ps_get_hyp(s->ps, NULL);
        if (speech != NULL)
            av_dict_set(metadata, "lavfi.asr.text", speech, 0);
        ps_start_utt(s->ps);
        s->utt_started = 0;
    }

    return ff_filter_frame(ctx->outputs[0], in);
}
Ejemplo n.º 26
0
/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }
    
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
	fread(waveheader, 1, 44, rawfd);
	if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
    	    E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
	E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n");
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
        	printf("%s\n", hyp);
            if (print_times)
        	print_word_times();
            fflush(stdout);

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL) {
    	    printf("%s\n", hyp);
    	    if (print_times) {
    		print_word_times();
	    }
	}
    }
    
    fclose(rawfd);
}
Ejemplo n.º 27
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 * 	   wait for start of next utterance;
 * 	   decode utterance until silence of at least 1 sec observed;
 * 	   print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[4096];
    int32 k, ts, rem;
    char const *hyp;
    char const *uttid;
    cont_ad_t *cont;
    char word[256];
	char c1[256], c2[256];

	int tracking = 0;
	int halted = 0;
	int LEFT = 0;
	int RIGHT = 1;
	int MOVE_CENT = 100; //1 meter
	int numwords;

	setlinebuf(stdout);

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");

    /* Initialize continuous listening module */
    if ((cont = cont_ad_init(ad, ad_read)) == NULL)
        E_FATAL("Failed to initialize voice activity detection\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");
    if (cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");


	printf("LEDON BLUE\n");
    for (;;) {
        /* Indicate listening for next utterance */
        fprintf(stderr, "READY....\n");
        fflush(stderr);

        /* Wait data for next utterance */
        while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
            sleep_msec(100);

        if (k < 0)
            E_FATAL("Failed to read audio\n");

        /*
         * Non-zero amount of data received; start recognition of new utterance.
         * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
         */
        if (ps_start_utt(ps, NULL) < 0)
            E_FATAL("Failed to start utterance\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        fprintf(stderr, "Listening...\n");

        /* Note timestamp for this first block of data */
        ts = cont->read_ts;

        /* Decode utterance until end (marked by a "long" silence, >1sec) */
        for (;;) {
            /* Read non-silence audio data, if any, from continuous listening module */
            if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
                E_FATAL("Failed to read audio\n");
            if (k == 0) {
                /*
                 * No speech data available; check current timestamp with most recent
                 * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                 */
                if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                    break;
            }
            else {
                /* New speech data received; note current timestamp */
                ts = cont->read_ts;
            }

            /*
             * Decode whatever data was read above.
             */
            rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);

            /* If no work to be done, sleep a bit */
            if ((rem == 0) && (k == 0))
                sleep_msec(20);
        }

        /*
         * Utterance ended; flush any accumulated, unprocessed A/D data and stop
         * listening until current utterance completely decoded
         */
        ad_stop_rec(ad);
        while (ad_read(ad, adbuf, 4096) >= 0);
        cont_ad_reset(cont);

        fprintf(stderr, "Stopped listening, please wait...\n");
        fflush(stdout);
        /* Finish decoding, obtain and print result */
        ps_end_utt(ps);
        hyp = ps_get_hyp(ps, NULL, &uttid);
        fprintf(stderr, "%s: %s\n", uttid, hyp);

        /* Exit if the first word spoken was GOODBYE */
        if (hyp) {
			numwords = sscanf(hyp, "%s %s %s", word, c1, c2);
			if(strcmp(word, "GUGGUG") == 0) {
				if(strcmp(c1, "HALT") == 0) {
					printf("LEDOFF BLUE\n");
					halted = 1;
				} else if(strcmp(c1, "RESUME") == 0) {
					printf("LEDON BLUE\n");
					halted = 0;
				}
				if(strcmp(c1, "BEGIN") == 0 || strcmp(c1, "START") == 0) {
					if(strcmp(c2, "TRACKING") == 0 && !tracking) {
						printf("START TRACKING\n");
						tracking = 1;
						halted = 0;
					}
				} else if(strcmp(c1, "STOP") == 0) {
					if(strcmp(c2, "TRACKING") == 0 && tracking) {
						printf("STOP TRACKING\n");
						tracking = 0;
					}
				}
				if(!tracking && !halted && numwords == 3) {
					if(strcmp(c1, "TURN") == 0) {
						if(strcmp(c2, "AROUND") == 0) {
							printf("TURN %d 180\n", LEFT);
						} else if(strcmp(c2, "LEFT") == 0) {
							printf("TURN %d 90\n", LEFT);
						} else if(strcmp(c2, "RIGHT") == 0) {
							printf("TURN %d 90\n", RIGHT);
						}
					} else if(strcmp(c1, "MOVE") == 0) {
						if(strcmp(c2, "FORWARD") == 0) {
							printf("MOVE 0 %d\n", MOVE_CENT);
						} else if(strcmp(c2, "BACKWARD") == 0) {
							printf("MOVE 1 %d\n", MOVE_CENT);
						}
					}
				}
			}
        }

        /* Resume A/D recording for next utterance */
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");
    }

    cont_ad_close(cont);
    ad_close(ad);
}
Ejemplo n.º 28
0
/*
 * Continuous recognition from a file
 */
static void
recognize_from_file() {
    cont_ad_t *cont;
    ad_rec_t file_ad = {0};
    int16 adbuf[4096];
    const char* hyp;
    const char* uttid;
    int32 k, ts, start;

    char waveheader[44];
    if ((rawfd = fopen(cmd_ln_str_r(config, "-infile"), "rb")) == NULL) {
	E_FATAL_SYSTEM("Failed to open file '%s' for reading",
			cmd_ln_str_r(config, "-infile"));
    }

    fread(waveheader, 1, 44, rawfd);

    file_ad.sps = (int32)cmd_ln_float32_r(config, "-samprate");
    file_ad.bps = sizeof(int16);

    if ((cont = cont_ad_init(&file_ad, ad_file_read)) == NULL) {
        E_FATAL("Failed to initialize voice activity detection");
    }
    if (cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");
    rewind (rawfd);

    for (;;) {

	while ((k = cont_ad_read(cont, adbuf, 4096)) == 0);

        if (k < 0) {
    	    break;
    	}

        if (ps_start_utt(ps, NULL) < 0)
            E_FATAL("ps_start_utt() failed\n");

        ps_process_raw(ps, adbuf, k, FALSE, FALSE);

        ts = cont->read_ts;
        start = ((ts - k) * 100.0) / file_ad.sps;

        for (;;) {
            if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
            	break;

            if (k == 0) {
                /*
                 * No speech data available; check current timestamp with most recent
                 * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                 */
                if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                    break;
            }
            else {
                /* New speech data received; note current timestamp */
                ts = cont->read_ts;
            }


            ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        }

        ps_end_utt(ps);

        if (cmd_ln_boolean_r(config, "-time")) {
	    print_word_times(start);
	} else {
	    hyp = ps_get_hyp(ps, NULL, &uttid);
            fprintf(stderr, "%s: %s\n", uttid, hyp);
        }
        fflush(stdout);
    }

    cont_ad_close(cont);
    fclose(rawfd);
}
Ejemplo n.º 29
0
void listen::recognize_from_microphone(){

    ad_rec_t *ad;
    int16 adbuf[4096];
    int32 k, ts, rem;
    char buffer[128];
    char const *hyp;
    char const *uttid;
    cont_ad_t *cont;
    state = SLEEPING;
    FILE* pipe = popen(c.getValue("[General]", "Hcidump").c_str(), "r");
    std::string bufferStr;
    std::size_t found;
    
    if((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"), (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");

    /* Initialize continuous listening module */
    if((cont = cont_ad_init(ad, ad_read)) == NULL)
        E_FATAL("Failed to initialize voice activity detection\n");
    if(ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");
    if(cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");

    while(!feof(pipe) || state == SLEEPING){
        fgets(buffer, 128, pipe);
        bufferStr = buffer;

        found = bufferStr.find(c.getValue("[General]", "KeyPress"));
        if(found!=std::string::npos){
            i.pauseIfPlaying();
            s.speakThis(c.getValue("[General]", "WakeUpPhrase"));
            state = ACTIVE;
            while(state != SLEEPING){
            
                /* Indicate listening for next utterance */
                printf("READY....\n");
                fflush(stdout);
                fflush(stderr);

                /* Wait data for next utterance */
                while ((k = cont_ad_read(cont, adbuf, 4096)) == 0){
                    sleep_msec(100);
                }

                if (k < 0)
                    E_FATAL("Failed to read audio\n");

                /*
                 * Non-zero amount of data received; start recognition of new utterance.
                 * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
                 */
                if (ps_start_utt(ps, NULL) < 0)
                    E_FATAL("Failed to start utterance\n");
                ps_process_raw(ps, adbuf, k, FALSE, FALSE);
                printf("Listening...\n");
                fflush(stdout);

                /* Note timestamp for this first block of data */
                ts = cont->read_ts;

                /* Decode utterance until end (marked by a "long" silence, >1sec) */
                for(;;){
                //while(sleep(2)){
                    /* Read non-silence audio data, if any, from continuous listening module */
                    if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
                        E_FATAL("Failed to read audio\n");
                    if (k == 0){
                        /*
                         * No speech data available; check current timestamp with most recent
                         * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                         */
                        if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                            break;
                    }
                    else {
                        /* New speech data received; note current timestamp */
                        ts = cont->read_ts;
                    }

                    /*
                     * Decode whatever data was read above.
                     */
                    rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);

                    /* If no work to be done, sleep a bit */
                    if ((rem == 0) && (k == 0))
                        sleep_msec(20);
                }

                /*
                 * Utterance ended; flush any accumulated, unprocessed A/D data and stop
                 * listening until current utterance completely decoded
                 */
                ad_stop_rec(ad);
                while (ad_read(ad, adbuf, 4096) >= 0);
                cont_ad_reset(cont);

                printf("Stopped listening, please wait...\n");
                fflush(stdout);

                /* Finish decoding, obtain and print result */
                ps_end_utt(ps);
                hyp = ps_get_hyp(ps, NULL, &uttid);

                fflush(stdout);

                /* Resume A/D recording for next utterance */
                if (ad_start_rec(ad) < 0)
                    E_FATAL("Failed to start recording\n");

                if(hyp != NULL){
                    
                    if(hyp == c.getValue("[General]", "Sleep")){
                        state = SLEEPING;
                        s.speakThis(c.getValue("[General]", "SleepPhrase"));
                    }else{
                        if(state != SLEEPING){
                            i.parse(hyp);
                        }
                        
                        //Hack for play/pause/select
                        std::string hypStr = hyp;
                        if(hypStr == "PLAY ITEM" || hypStr == "PAUSE ITEM" || hypStr == "SELECT ITEM"){
                            state = SLEEPING;
                        }
                    }
                }
            }
        }
    }
    pclose(pipe);
    cont_ad_close(cont);
    ad_close(ad);
}
SWIGINTERN int Decoder_processRaw__SWIG_1(Decoder *self,short const shorts[],size_t nshorts,bool no_search,bool full_utt){
		return ps_process_raw(self, shorts, nshorts, no_search, full_utt);
	}