/// <summary>
/// Start recognizing speech asynchronously.
/// </summary>
/// <returns>
/// <para>S_OK on success, otherwise failure code.</para>
/// </returns>
HRESULT KinectReader::StartSpeechRecognition()
{
    HRESULT hr = m_pKinectAudioStream->StartCapture();
	
    if (SUCCEEDED(hr))
    {
        // Specify that all top level rules in grammar are now active
        m_pSpeechGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);

        // Specify that engine should always be reading audio
        m_pSpeechRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);

        // Specify that we're only interested in receiving recognition events
        m_pSpeechContext->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));

        // Ensure that engine is recognizing speech and not in paused state
        hr = m_pSpeechContext->Resume(0);
        if (SUCCEEDED(hr))
        {
            m_hSpeechEvent = m_pSpeechContext->GetNotifyEventHandle();
        }
    }
        
    return hr;
}
bool KinectSpeechRecognizer::StartRecognition()
{
    HRESULT hr = S_OK;

    // Specify that engine should always be reading audio
    hr = speechRecognizer->SetRecoState(SPRST_ACTIVE);
    if (!SUCCEEDED(hr))
    {
        return false;
    }

    // Specify that we're only interested in receiving recognition events
    hr = speechContext->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
    if (FAILED(hr))
    {
        return false;
    }

    // Ensure that engine is recognizing speech and not in paused state
    hr = speechContext->Resume(0);
    if (FAILED(hr))
    {
        return false;
    }

    if(SUCCEEDED(hr))
    {
        isPaused = false;
        return true;
    }
    else
        return false;
}
Esempio n. 3
0
KEEPERSPEECH_API KEEPERSPEECH_REASON __cdecl KeeperSpeechInit(void)
{
	HRESULT res;
	KEEPERSPEECH_REASON reason;
	
	reason = KSR_NOT_KNOWN;

	//"loop" for error handling purposes (break on error)
	for (;;) {
		res = state.engine.CoCreateInstance(CLSID_SpSharedRecognizer);
		if (FAILED(res)) {
			reason = KSR_CREATE_ENGINE;
			break;
		}

		res = state.engine->CreateRecoContext(&state.recog);
		if (FAILED(res)) {
			reason = KSR_CREATE_RECOG_CONTEXT;
			break;
		}

		res = state.recog->SetNotifyCallbackFunction(recognitionCallback, 0, 0);
		if (FAILED(res)) {
			reason = KSR_SET_NOTIFY;
			break;
		}

		res = state.recog->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
		if (FAILED(res)) {
			reason = KSR_SET_INTEREST;
			break;
		}

		res = state.recog->CreateGrammar(1, &state.grammar);
		if (FAILED(res)) {
			reason = KSR_CREATE_GRAMMAR;
			break;
		}

		res = state.grammar->LoadCmdFromResource(hModule, MAKEINTRESOURCEW(IDR_COMMAND_GRAMMAR),
			L"SRGRAMMAR", MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), SPLO_DYNAMIC);
		if (FAILED(res)) {
			reason = KSR_LOAD_GRAMMAR;
			break;
		}

		res = state.grammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		if (FAILED(res)) {
			reason = KSR_ACTIVATE_GRAMMAR;
			break;
		}

		return KSR_OK;
	}

	KeeperSpeechExit();
	return reason;
}
Esempio n. 4
0
void SetVoiceNotification(void * pV, HWND hwnd){
	HRESULT			hr;
	ISpVoice		*pVoice = pV;
	if(pVoice){
		hr = ISpVoice_SetInterest(pVoice, SPFEI(SPEI_END_INPUT_STREAM), SPFEI(SPEI_END_INPUT_STREAM));
		if(hr == S_OK){
			hr = ISpVoice_SetNotifyWindowMessage(pVoice, hwnd, PNM_SOUND_ENDS, 0, 0);
		}
	}
}
Esempio n. 5
0
bool
SapiService::Init()
{
  PROFILER_LABEL_FUNC(js::ProfileEntry::Category::OTHER);

  MOZ_ASSERT(!mInitialized);

  if (Preferences::GetBool("media.webspeech.synth.test") ||
      !Preferences::GetBool("media.webspeech.synth.enabled")) {
    // When enabled, we shouldn't add OS backend (Bug 1160844)
    return false;
  }

  if (FAILED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice,
                              getter_AddRefs(mSapiClient)))) {
    return false;
  }

  // Set interest for all the events we are interested in
  ULONGLONG eventMask =
    SPFEI(SPEI_START_INPUT_STREAM) |
    SPFEI(SPEI_TTS_BOOKMARK) |
    SPFEI(SPEI_WORD_BOUNDARY) |
    SPFEI(SPEI_SENTENCE_BOUNDARY) |
    SPFEI(SPEI_END_INPUT_STREAM);

  if (FAILED(mSapiClient->SetInterest(eventMask, eventMask))) {
    return false;
  }

  // Get all the voices from sapi and register in the SynthVoiceRegistry
  if (!RegisterVoices()) {
    return false;
  }

  // Set the callback function for receiving the events
  mSapiClient->SetNotifyCallbackFunction(
    (SPNOTIFYCALLBACK*) SapiService::SpeechEventCallback, (WPARAM) this, 0);

  mInitialized = true;
  return true;
}
Esempio n. 6
0
int main(int argc, char* argv[])
{

	CComPtr<IEnumSpObjectTokens> voice_tokens;

	if (FAILED(::CoInitialize(NULL)))
		return FALSE;

	std::cout << "initialized\n";

	//	cout << "creating voice\n";

	if (S_OK != cpVoice.CoCreateInstance(CLSID_SpVoice))
		return FALSE;
	std::cout << "voice initialized" << endl;

	ULONGLONG event_mask =
		SPFEI(SPEI_START_INPUT_STREAM) |
		SPFEI(SPEI_TTS_BOOKMARK) |
		SPFEI(SPEI_WORD_BOUNDARY) |
		SPFEI(SPEI_SENTENCE_BOUNDARY) |
		SPFEI(SPEI_END_INPUT_STREAM);

	cpVoice->SetInterest(event_mask, event_mask);

	std::cout << "interests set" << endl;

	cpVoice->SetNotifyCallbackFunction(SpeechEventCallback, 0, 0);

	std::cout << "callback function set" << endl;

	cpVoice->Speak(L"This should work", SPF_ASYNC, &stream_number_);
	HANDLE hWait = cpVoice->SpeakCompleteEvent();
	WaitAndPumpMessagesWithTimeout(hWait, INFINITE);


	cpVoice.Release();
	::CoUninitialize();
	return TRUE;
}
void speak(string Qtext,int sleepTime)
{
	HRESULT hr = E_FAIL;

	CSpDynamicString Qtextout;


	if (SUCCEEDED(hr = ::CoInitialize(NULL)))
	{
		{
			CComPtr<ISpRecoContext> cpRecoCtxt;
			CComPtr<ISpRecoGrammar> cpGrammar;
			CComPtr<ISpVoice> cpVoice;
			hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext);

			if (SUCCEEDED(hr))
			{
				hr = cpRecoCtxt->GetVoice(&cpVoice);
			}

			if (cpRecoCtxt && cpVoice &&
				SUCCEEDED(hr = cpRecoCtxt->SetNotifyWin32Event()) &&
				SUCCEEDED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION))) &&
				SUCCEEDED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL)) &&
				SUCCEEDED(hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar)) &&
				SUCCEEDED(hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC)) &&
				SUCCEEDED(hr = cpGrammar->SetDictationState(SPRS_ACTIVE)))
			{
				USES_CONVERSION;
				CComPtr<ISpRecoResult> cpResult;
				Qtextout.operator=(Qtext.c_str());
				cpVoice->Speak(Qtextout, SPF_ASYNC, NULL);
				Sleep(sleepTime);
			}
		}
		::CoUninitialize();
	}
}
void SpeechRecognition::InitContext()
{
	//init com
	if (FAILED(::CoInitialize(nullptr))) 	{
		Log("Failed to initialize com library", LogEntry::Error);
		return;
	}

	HRESULT hr;
	ISpRecognizer* recognizer;
	hr = CoCreateInstance(CLSID_SpSharedRecognizer, nullptr, CLSCTX_ALL, IID_ISpRecognizer, reinterpret_cast<void**>(&recognizer));
	CheckReturn(hr);

	hr = recognizer->CreateRecoContext(&recoContext);
	CheckReturn(hr);
	
	//pause context
	hr = recoContext->Pause(0);
	CheckReturn(hr);

	//make grammar library
	ISpRecoGrammar* recoGrammar = InitGrammar();

	handleEvent = recoContext->GetNotifyEventHandle();
	if (handleEvent == INVALID_HANDLE_VALUE)
		CheckReturn(E_FAIL);

	ULONGLONG interest = SPFEI(SPEI_RECOGNITION);
	hr = recoContext->SetInterest(interest, interest);
	CheckReturn(hr);

	//Activate grammar
	hr = recoGrammar->SetRuleState(RULENAME, 0, SPRS_ACTIVE);
	CheckReturn(hr);

	//enable context again
	hr = recoContext->Resume(0);
	CheckReturn(hr);
	
	std::cout << "Waiting for mocha...." << std::endl;

	Update();	

	std::cout << "Hello!" << std::endl;

}
Esempio n. 9
0
// CMyDlg 메시지 처리기입니다.
BOOL CMyDlg::OnInitSpeech()
{
	HRESULT hr = S_OK;

	hr = cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer);

	if (SUCCEEDED(hr))
	{
		hr = cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt);
	}

	if (SUCCEEDED(hr))
	{
		hr = m_cpRecoCtxt->SetNotifyWindowMessage(m_hWnd, WM_RECOEVENT, 0, 0);
	}

	if (SUCCEEDED(hr))
	{
		const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);
		hr = m_cpRecoCtxt->SetInterest(ullInterest, ullInterest);
	}

	CComPtr<ISpAudio> cpAudio;
	hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);

	hr = cpRecoEngine->SetInput(cpAudio, TRUE);
	hr = cpRecoEngine->SetRecoState(SPRST_ACTIVE);

	if (SUCCEEDED(hr))
		hr = m_cpRecoCtxt->CreateGrammar(GID_DICTATION, &m_cpDictationGrammar);

	if (SUCCEEDED(hr))
		hr = m_cpDictationGrammar->LoadDictation(NULL, SPLO_STATIC);

	if (SUCCEEDED(hr))
		hr = m_cpDictationGrammar->SetDictationState(SPRS_ACTIVE);

	if (FAILED(hr))
		m_cpDictationGrammar.Release();

	return (hr == S_OK);
}
Esempio n. 10
0
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Module implementation
bool SpeechRecognizerModule::configure(ResourceFinder &rf )
{
    setName( rf.check("name",Value("speechRecognizer")).asString().c_str() );
    m_timeout = rf.check("timeout",Value(10000)).asInt();
    USE_LEGACY = !rf.check("noLegacy");
    m_forwardSound = rf.check("forwardSound");
    m_tmpFileFolder = rf.getHomeContextPath().c_str();
    interruptRecognition = false;

    //Deal with speech recognition
    string grammarFile = rf.check("grammarFile",Value("defaultGrammar.grxml")).asString().c_str();
	grammarFile = rf.findFile(grammarFile).c_str();

	std::wstring tmp = s2ws(grammarFile);
    LPCWSTR cwgrammarfile = tmp.c_str();

    m_useTalkBack = rf.check("talkback");

    //Initialise the speech crap
    bool everythingIsFine = true;
    HRESULT hr;
    everythingIsFine = SUCCEEDED( m_cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer));
    everythingIsFine &= SUCCEEDED( SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &m_cpAudio));
    everythingIsFine &= SUCCEEDED( m_cpRecoEngine->CreateRecoContext( &m_cpRecoCtxt ));

    // Here, all we are interested in is the beginning and ends of sounds, as well as
    // when the engine has recognized something
    const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);
    everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->SetInterest(ullInterest, ullInterest));

    // set the input for the engine
    everythingIsFine &= SUCCEEDED( m_cpRecoEngine->SetInput(m_cpAudio, TRUE));
    everythingIsFine &= SUCCEEDED( m_cpRecoEngine->SetRecoState( SPRST_ACTIVE ));

    //Load grammar from file
    everythingIsFine &= SUCCEEDED( m_cpRecoCtxt->CreateGrammar( 1, &m_cpGrammarFromFile ));
    everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_DISABLED));
    everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->LoadCmdFromFile(cwgrammarfile, SPLO_DYNAMIC));
//  everythingIsFine &= loadGrammarFromRf(rf);

    //Create a runtime grammar
    everythingIsFine &= SUCCEEDED( m_cpRecoCtxt->CreateGrammar( 2, &m_cpGrammarRuntime ));
    everythingIsFine &= SUCCEEDED( m_cpGrammarRuntime->SetGrammarState(SPGS_DISABLED));

    //Create a dictation grammar
    everythingIsFine &= SUCCEEDED(m_cpRecoCtxt->CreateGrammar( GID_DICTATION, &m_cpGrammarDictation ));
    everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->LoadDictation(NULL, SPLO_STATIC));
    everythingIsFine &= SUCCEEDED(m_cpGrammarDictation->SetDictationState(SPRS_INACTIVE));
    
    //Setup thing for the raw audio processing
    everythingIsFine &= SUCCEEDED(m_cAudioFmt.AssignFormat(SPSF_22kHz16BitMono));
    hr = m_cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, &m_cAudioFmt.FormatId(), m_cAudioFmt.WaveFormatExPtr());
    //everythingIsFine &= SUCCEEDED(hr = SPBindToFile((const WCHAR *)"C:\\temp.wav", SPFM_CREATE_ALWAYS, &m_streamFormat, &m_cAudioFmt.FormatId(), m_cAudioFmt.WaveFormatExPtr()));

    //CComPtr <ISpStream>     cpStream = NULL;
    //CSpStreamFormat         cAudioFmt;
    //hr = cAudioFmt.AssignFormat(SPSF_22kHz16BitMono);
    //hr = SPBindToFile((const WCHAR *)"c:\\ttstemp.wav", SPFM_CREATE_ALWAYS, &cpStream,  &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());

    if( everythingIsFine )
    {
        string pName = "/";
        pName += getName().c_str();
        pName += "/recog/continuous:o";
        m_portContinuousRecognition.open( pName.c_str() );

        pName = "/";
		pName += getName().c_str();
        pName += "/recog/continuousGrammar:o";
        m_portContinuousRecognitionGrammar.open( pName.c_str() );

        pName = "/";
		pName += getName().c_str();
        pName += "/recog/sound:o";   
        m_portSound.open(pName.c_str());

        //iSpeak
        pName = "/";
		pName += getName().c_str();
        pName += "/tts/iSpeak:o"; 
        m_port2iSpeak.open( pName.c_str() );
                
        pName = "/";
		pName += getName().c_str();
        pName += "/tts/iSpeak/rpc"; 
        m_port2iSpeakRpc.open( pName.c_str() );
        if (Network::connect(m_port2iSpeak.getName().c_str(),"/iSpeak")&&Network::connect(m_port2iSpeakRpc.getName().c_str(),"/iSpeak/rpc"))
            yInfo() <<"Connection to iSpeak succesfull" ;
        else
            yWarning() <<"Unable to connect to iSpeak. Connect manually." ;

        pName = "/";
		pName += getName().c_str();
        pName += "/rpc";
        m_portRPC.open( pName.c_str() );
        attach(m_portRPC);

        //Start recognition
        //everythingIsFine &= SUCCEEDED(m_cpRecoEngine->SetRecoState(SPRST_ACTIVE_ALWAYS));
        everythingIsFine &= SUCCEEDED(m_cpGrammarFromFile->SetRuleState(NULL, NULL, SPRS_ACTIVE));
        everythingIsFine &= SUCCEEDED( m_cpGrammarFromFile->SetGrammarState(SPGS_ENABLED));
    }

    return (everythingIsFine);
}
int voiceRecognition(string Qtext)
{
	HRESULT hr = E_FAIL;
	int word=0;

	CSpDynamicString Qtextout; 
	

	if (SUCCEEDED(hr = ::CoInitialize(NULL)))
	{
		{
			CComPtr<ISpRecoContext> cpRecoCtxt;
			CComPtr<ISpRecoGrammar> cpGrammar;
			CComPtr<ISpVoice> cpVoice;
			hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext);

			if (SUCCEEDED(hr))
			{
				hr = cpRecoCtxt->GetVoice(&cpVoice);
			}

			if (cpRecoCtxt && cpVoice &&
				SUCCEEDED(hr = cpRecoCtxt->SetNotifyWin32Event()) &&
				SUCCEEDED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION))) &&
				SUCCEEDED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL)) &&
				SUCCEEDED(hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar)) &&
				SUCCEEDED(hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC)) &&
				SUCCEEDED(hr = cpGrammar->SetDictationState(SPRS_ACTIVE)))
			{
				USES_CONVERSION;

				CComPtr<ISpRecoResult> cpResult;

				Qtextout.operator=(Qtext.c_str());
				cpVoice->Speak(Qtextout, SPF_ASYNC, NULL);

				if (SUCCEEDED(hr = BlockForResult(cpRecoCtxt, &cpResult)))
				{
					cpGrammar->SetDictationState(SPRS_INACTIVE);

					CSpDynamicString dstrText;

					if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE,
						TRUE, &dstrText, NULL)))
					{
						cpResult.Release();
					}
					if (_wcsicmp(dstrText, L"True") == 0)
					{
						word = 1;
						//break;
					}
					if (_wcsicmp(dstrText, L"Two") == 0)
					{
						word = 1;
						//break;
					}
					if (_wcsicmp(dstrText, L"False") == 0)
					{
						word = 2;
						//break;
					}
					if (_wcsicmp(dstrText, L"Falls") == 0)
					{
						word = 2;
						//break;
					}
					if (_wcsicmp(dstrText, L"Follows") == 0)
					{
						word = 2;
						//break;
					}
					if (_wcsicmp(dstrText, L"A") == 0)
					{
						word = 3;
						//break;
					}
					if (_wcsicmp(dstrText, L"Eight") == 0)
					{
						word = 3;
						//break;
					}
					if (_wcsicmp(dstrText, L"B") == 0)
					{
						word = 4;
						//break;
					}
					if (_wcsicmp(dstrText, L"Bee") == 0)
					{
						word = 4;
						//break;
					}
					if (_wcsicmp(dstrText, L"C") == 0)
					{
						word = 5;
						//break;
					}
					if (_wcsicmp(dstrText, L"See") == 0)
					{
						word = 5;
						//break;
					}
					if (_wcsicmp(dstrText, L"Fire") == 0)
					{
						word = 6;
						//break;
					}
					if (_wcsicmp(dstrText, L"Leave") == 0)
					{
						word = 7;
						//break;
					}
					if (_wcsicmp(dstrText, L"Leave it") == 0)
					{
						word = 7;
						//break;
					}
					if (_wcsicmp(dstrText, L"We've") == 0)
					{
						word = 7;
						//break;
					}
					if (_wcsicmp(dstrText, L"Quit") == 0)
					{
						word = 7;
						//break;
					}
					if (_wcsicmp(dstrText, L"Quits") == 0)
					{
						word = 7;
						//break;
					}
					if (_wcsicmp(dstrText, L"Switch") == 0)
					{
						word = 8;
						//break;
					}
					if (_wcsicmp(dstrText, L"Change") == 0)
					{
						word = 8;
						//break;
					}
					
					cout << dstrText.CopyToChar();
					cpGrammar->SetDictationState(SPRS_ACTIVE);
				}
			}
		}
		::CoUninitialize();
	}
	return word;
}
Esempio n. 12
0
bool VOICEREC_init(HWND hWnd, int event_id, int grammar_id, int command_resource)
{
	HRESULT hr = S_OK;

	while (true)
	{
		// create a recognition engine
		hr = p_recogEngine.CoCreateInstance(CLSID_SpInprocRecognizer);
		if (FAILED(hr))
		{
			os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR, "Failed to create a recognition engine\n","Error");
			printf("Failed to create a recognition engine\n");
			break;
		}

		// create the command recognition context
		hr = p_recogEngine->CreateRecoContext( &p_recogContext );
		if (FAILED(hr))
		{
			os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR,"Failed to create the command recognition context\n","Error");
			printf("Failed to create the command recognition context\n");
			break;
		}

		// Let SR know that window we want it to send event information to, and using
		// what message
		hr = p_recogContext->SetNotifyWindowMessage( hWnd, event_id, 0, 0 );
		if (FAILED(hr))
		{
			os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR,"Failed to SetNotifyWindowMessage\n","Error");
			break;
		}

		// Tell SR what types of events interest us.  Here we only care about command
		// recognition.
		hr = p_recogContext->SetInterest( SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION) );
		if (FAILED(hr))
		{
			os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR,"Failed to set events\n","Error");
			break;
		}

		// Create a grammar
		hr = p_recogContext->CreateGrammar(grammar_id, &p_grammarObject);
		if (FAILED(hr))
		{
			os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR,"Failed to create grammar\n","Error");
			break;
		}

		// Load our grammar from data\phrases.xml, or if that doesn't exist, from the compiled in 
		// user defined ("SRGRAMMAR") resource type.
		hr = p_grammarObject->LoadCmdFromFile(L"data\\phrases.xml", SPLO_STATIC);
		if (FAILED(hr))
		{
			hr = p_grammarObject->LoadCmdFromResource(NULL, MAKEINTRESOURCEW(command_resource),
												L"SRGRAMMAR", MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL),
												SPLO_STATIC);
			if (FAILED(hr))
			{
				os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR,"Failed to load resource SRGRAMMAR\n","Error");
				break;
			}
		}

		hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);
		if (FAILED(hr))
		{
			os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR,"Failed to get default audio input\n", "Error");
			break;
		}

		// Set the audio input to our token.
		hr = p_recogEngine->SetInput(cpAudio, TRUE);
		if (FAILED(hr))
		{
			os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR,"Failed to set audio input\n", "Error");
		}

		// Set rules to active, we are now listening for commands
		hr = p_grammarObject->SetRuleState(NULL, NULL, SPRS_ACTIVE );
		if (FAILED(hr))
		{
			os::dialogs::Message(os::dialogs::MESSAGEBOX_ERROR,"Failed to set listening for commands\n","Error");
			break;
		}

		break;
	}

	// if we failed and have a partially setup SAPI, close it all down
	if (FAILED(hr))
	{
		VOICEREC_deinit();
	}

	os::events::addEventListener(SDL_SYSWMEVENT, os::events::DEFAULT_LISTENER_WEIGHT, system_event_handler);

	return ( hr == S_OK);
}
Esempio n. 13
0
/**********************************************************
* COperator::InitializeSapi *
*---------------------------*
*   Description:
*       Various SAPI initializations.
*   Return:
*       S_OK if SAPI initialized successfully
*       Return values of failed SAPI initialization 
*           functions
***********************************************************/
HRESULT COperator::InitializeSapi()
{
    // Create a voice for speaking on this machine
    HRESULT hr = m_cpLocalVoice.CoCreateInstance( CLSID_SpVoice );
    if ( FAILED( hr ) )
    {
        DoMessage( L"Could not create a TTS voice on the local machine" );
        return hr;
    }

    // Create a reco engine for recognizing speech over the phone
    // This is an inproc recognizer since it will likely be
    // using a format other than the default
    hr = m_cpIncomingRecognizer.CoCreateInstance( CLSID_SpInprocRecognizer );
    if ( FAILED(hr) )
    {
        DoMessage(L"CoCreateInstance on inproc reco engine failed");
        return hr;
    }

    // Create a reco context for this engine
    hr = m_cpIncomingRecognizer->CreateRecoContext( &m_cpIncomingRecoCtxt );
    if ( FAILED(hr) )
    {
        DoMessage(L"Could not create recognition context");
        return hr;
    }

    // Set interest only in PHRASE_START, RECOGNITION, FALSE_RECOGNITION
    const ULONGLONG ullInterest = SPFEI(SPEI_PHRASE_START) | SPFEI(SPEI_RECOGNITION) |
                                  SPFEI(SPEI_FALSE_RECOGNITION);
    hr = m_cpIncomingRecoCtxt->SetInterest( ullInterest, ullInterest );
    if ( FAILED(hr) )
    {
        DoMessage(L"Could not set interest in SAPI events");
        return hr;
    }

    // Retain recognized audio
    hr = m_cpIncomingRecoCtxt->SetAudioOptions( SPAO_RETAIN_AUDIO, NULL, NULL );
    if ( FAILED(hr) )
    {
        DoMessage(L"Could not set audio options to retain recognized audio");
        return hr;
    }

    // Create a dictation grammar and load it
    hr = m_cpIncomingRecoCtxt->CreateGrammar( 0, &m_cpDictGrammar );
    if ( FAILED(hr) )
    {
        DoMessage(L"Could not create dictation grammar");
        return hr;
    }

    hr = m_cpDictGrammar->LoadDictation( NULL, SPLO_STATIC );
    if ( FAILED(hr) )
    {
        DoMessage(L"Could not load dictation");
        return hr;
    }

    // Create a voice for talking on the phone.
    hr = m_cpIncomingRecoCtxt->GetVoice( &m_cpOutgoingVoice );
    if ( FAILED(hr) )
    {
        DoMessage(L"Could not create a TTS voice for speaking over the phone");
        return hr;
    }

    return S_OK;
}   /* COperator::InitializeSapi */
Esempio n. 14
0
mssapi_captions::mssapi_captions(
		captions_cb callback,
		const std::string &lang) try
	: captions_handler(callback, AUDIO_FORMAT_16BIT, 16000)
{
	HRESULT hr;

	std::wstring wlang;
	wlang.resize(lang.size());

	for (size_t i = 0; i < lang.size(); i++)
		wlang[i] = (wchar_t)lang[i];

	LCID lang_id = LocaleNameToLCID(wlang.c_str(), 0);

	wchar_t lang_str[32];
	_snwprintf(lang_str, 31, L"language=%x", (int)lang_id);

	stop = CreateEvent(nullptr, false, false, nullptr);
	if (!stop.Valid())
		throw "Failed to create event";

	hr = SpFindBestToken(SPCAT_RECOGNIZERS, lang_str, nullptr, &token);
	if (FAILED(hr))
		throw HRError("SpFindBestToken failed", hr);

	hr = CoCreateInstance(CLSID_SpInprocRecognizer, nullptr, CLSCTX_ALL,
			__uuidof(ISpRecognizer), (void**)&recognizer);
	if (FAILED(hr))
		throw HRError("CoCreateInstance for recognizer failed", hr);

	hr = recognizer->SetRecognizer(token);
	if (FAILED(hr))
		throw HRError("SetRecognizer failed", hr);

	hr = recognizer->SetRecoState(SPRST_INACTIVE);
	if (FAILED(hr))
		throw HRError("SetRecoState(SPRST_INACTIVE) failed", hr);

	hr = recognizer->CreateRecoContext(&context);
	if (FAILED(hr))
		throw HRError("CreateRecoContext failed", hr);

	ULONGLONG interest = SPFEI(SPEI_RECOGNITION) |
	                     SPFEI(SPEI_END_SR_STREAM);
	hr = context->SetInterest(interest, interest);
	if (FAILED(hr))
		throw HRError("SetInterest failed", hr);

	hr = context->SetNotifyWin32Event();
	if (FAILED(hr))
		throw HRError("SetNotifyWin32Event", hr);

	notify = context->GetNotifyEventHandle();
	if (notify == INVALID_HANDLE_VALUE)
		throw HRError("GetNotifyEventHandle failed", E_NOINTERFACE);

	size_t sample_rate = audio_output_get_sample_rate(obs_get_audio());
	audio = new CaptionStream((DWORD)sample_rate, this);
	audio->Release();

	hr = recognizer->SetInput(audio, false);
	if (FAILED(hr))
		throw HRError("SetInput failed", hr);

	hr = context->CreateGrammar(1, &grammar);
	if (FAILED(hr))
		throw HRError("CreateGrammar failed", hr);

	hr = grammar->LoadDictation(nullptr, SPLO_STATIC);
	if (FAILED(hr))
		throw HRError("LoadDictation failed", hr);

	try {
		t = std::thread([this] () {main_thread();});
	} catch (...) {
		throw "Failed to create thread";
	}

} catch (const char *err) {
	blog(LOG_WARNING, "%s: %s", __FUNCTION__, err);
	throw CAPTIONS_ERROR_GENERIC_FAIL;

} catch (HRError err) {
	blog(LOG_WARNING, "%s: %s (%lX)", __FUNCTION__, err.str, err.hr);
	throw CAPTIONS_ERROR_GENERIC_FAIL;
}
Esempio n. 15
0
/******************************************************************************
* InitSAPI *
*----------*
*   Description:
*       Called once to get SAPI started.
*
******************************************************************************/
HRESULT InitSAPI( HWND hWnd )
{
    HRESULT hr = S_OK;

    while ( 1 )
    {
        // create a recognition engine
        hr = g_cpEngine.CoCreateInstance(CLSID_SpSharedRecognizer);
        if ( FAILED( hr ) )
        {
            break;
        }
       
        // create the command recognition context
        hr = g_cpEngine->CreateRecoContext( &g_cpRecoCtxt );
        if ( FAILED( hr ) )
        {
            break;
        }

        // Let SR know that window we want it to send event information to, and using
        // what message
        hr = g_cpRecoCtxt->SetNotifyWindowMessage( hWnd, WM_RECOEVENT, 0, 0 );
        if ( FAILED( hr ) )
        {
            break;
        }

	    // Tell SR what types of events interest us.  Here we only care about command
        // recognition.
        hr = g_cpRecoCtxt->SetInterest( SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION) );
        if ( FAILED( hr ) )
        {
            break;
        }

        // Load our grammar, which is the compiled form of simple.xml bound into this executable as a
        // user defined ("SRGRAMMAR") resource type.
        hr = g_cpRecoCtxt->CreateGrammar(GRAMMARID1, &g_cpCmdGrammar);
        if (FAILED(hr))
        {
            break;
        }
        hr = g_cpCmdGrammar->LoadCmdFromResource(NULL, MAKEINTRESOURCEW(IDR_CMD_CFG),
                                                 L"SRGRAMMAR", MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL),
                                                 SPLO_DYNAMIC);
        if ( FAILED( hr ) )
        {
            break;
        }

        // Set navigation rule to active, espresso order rule to inactive
        hr = g_cpCmdGrammar->SetRuleIdState( VID_EspressoDrinks, SPRS_INACTIVE );
        if ( FAILED( hr ) )
        {
            break;
        }

        hr = g_cpCmdGrammar->SetRuleIdState( VID_Navigation, SPRS_ACTIVE );
        if ( FAILED( hr ) )
        {
            break;
        }

        // Get the default voice associated with our reco context
        hr = g_cpRecoCtxt->GetVoice(&g_cpVoice);
        if ( FAILED( hr ) )
        {
            break;
        }

        break;

    }

    // if we failed and have a partially setup SAPI, close it all down
    if ( FAILED( hr ) )
    {
        CleanupSAPI();
    }

    return ( hr );
}
Esempio n. 16
0
//The main AISA initializer.
bool AISALib::init(HWND vHwnd)
{
	HRESULT hr = CoInitialize(NULL);
	AL_FAILED_GOTO_MSG(hr, logger, "CoInitialize..failed\n", exit);

	hr = CoCreateInstance(CLSID_SpInprocRecognizer, NULL, CLSCTX_INPROC_SERVER, IID_ISpRecognizer, (void **) &recognizer);
	AL_FAILED_GOTO_MSG(hr, logger, "CoCreateInstance CLSID_SpInprocRecognizer -> IID_ISpRecognizer..failed\n", exit);

	hr = recognizer->CreateRecoContext(&recogCtx);
	AL_FAILED_GOTO_MSG(hr, logger, "recognizer->CreateRecoContext..failed\n", exit);

	hr = recogCtx->SetNotifyWindowMessage(vHwnd, WM_RECOGEVENT, 0, 0 );
	AL_FAILED_GOTO_MSG(hr, logger, "recogCtx->SetNotifyWindowMessage..failed\n", exit);

    const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);
    hr = recogCtx->SetInterest(ullInterest, ullInterest);
	AL_FAILED_GOTO_MSG(hr, logger, "recogCtx->SetInterest..failed\n", exit);

	hr = CoCreateInstance(CLSID_SpObjectTokenCategory, NULL, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **) &audioInCat);
	AL_FAILED_GOTO_MSG(hr, logger, "CoCreateInstance(CLSID_SpObjectTokenCategory..failed\n", exit);

	hr = audioInCat->SetId(SPCAT_AUDIOIN, false);
	AL_FAILED_GOTO_MSG(hr, logger, "audioInCat->SetId(SPCAT_AUDIOIN\n", exit);
	
	hr = audioInCat->EnumTokens(NULL, NULL, &cpEnum);
	AL_FAILED_GOTO_MSG(hr, logger, "audioInCat->EnumTokens..failed\n", exit);

	hr = cpEnum->Next(1, &token, NULL);
	AL_FAILED_GOTO_MSG(hr, logger, "cpEnum->Next..failed\n", exit);

	hr= token->CreateInstance(NULL, CLSCTX_INPROC_SERVER, IID_ISpAudio, (void **) &audio);
	AL_FAILED_GOTO_MSG(hr, logger, "token->CreateInstance IID_ISpAudio..failed\n", exit);

	AL_RELEASE(token, logger, "Release token..\n");
	AL_RELEASE(cpEnum, logger, "Release cpEnum..\n");
	AL_RELEASE(audioInCat, logger, "Release audioInCat..\n");

	hr = recognizer->SetInput(audio, TRUE);
	AL_FAILED_GOTO_MSG(hr, logger, "recognizer->SetInput..failed\n", exit);

	hr = recognizer->SetRecoState(SPRST_ACTIVE);
	AL_FAILED_GOTO_MSG(hr, logger, "recognizer->SetRecoState SPRST_ACTIVE..failed\n", exit);

	hr = recogCtx->CreateGrammar(GID_DICTATION, &grammar);
	AL_FAILED_GOTO_MSG(hr, logger, "recogCtx->CreateGrammar..failed\n", exit);

	hr = grammar->LoadDictation(NULL, SPLO_STATIC);
	AL_FAILED_GOTO_MSG(hr, logger, "grammar->LoadDictation SPLO_STATIC..failed\n", exit);

	hr = grammar->SetDictationState(SPRS_ACTIVE);
	AL_FAILED_GOTO_MSG(hr, logger, "grammar->SetDictationState SPRS_ACTIVE..failed\n", exit);

	hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_INPROC_SERVER, IID_ISpVoice, (void **) &voice);
	AL_FAILED_GOTO_MSG(hr, logger, "CoCreateInstance(CLSID_SpVoice..failed.\n", exit);

	voice->SetNotifyWindowMessage(vHwnd, WM_TTSEVENT, 0, 0);

	hr = voice->SetInterest( SPFEI_ALL_TTS_EVENTS, SPFEI_ALL_TTS_EVENTS );
	AL_FAILED_GOTO_MSG(hr, logger, "voice->SetInterest SPFEI_ALL_TTS_EVENTS..failed \n", exit);

	return true;
exit:
	destroy();
	return false;

}
Esempio n. 17
0
LRESULT CALLBACK WndProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam)
{
	HDC           hdc;
	PAINTSTRUCT   ps;

	switch (message)
	{
	case WM_CREATE:
	{
		//初始化COM端口
		::CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
		//创建识别引擎COM实例为共享型
		HRESULT hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpSharedRecognizer);
		//创建识别上下文接口
		if (SUCCEEDED(hr))
		{
			hr = m_cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt);
		}
		else MessageBox(hwnd, TEXT("error1"), TEXT("error"), S_OK);
		//设置识别消息,使计算机时刻监听语音消息
		if (SUCCEEDED(hr))
		{
			hr = m_cpRecoCtxt->SetNotifyWindowMessage(hwnd, WM_RECOEVENT, 0, 0);
		}
		else MessageBox(hwnd, TEXT("error2"), TEXT("error"), S_OK);
		//设置我们感兴趣的事件
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullMyEvents = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_SOUND_END);
			hr = m_cpRecoCtxt->SetInterest(ullMyEvents, ullMyEvents);
		}
		else MessageBox(hwnd, TEXT("error3"), TEXT("error"), S_OK);
		//创建语法规则
		b_Cmd_Grammar = TRUE;
		if (FAILED(hr))
		{
			MessageBox(hwnd, TEXT("error4"), TEXT("error"), S_OK);
		}
		hr = m_cpRecoCtxt->CreateGrammar(GID_CMD_GR, &m_cpCmdGramma);
		WCHAR wszXMLFile[20] = L"er.xml";
		MultiByteToWideChar(CP_ACP, 0, (LPCSTR)"er.xml", -1, wszXMLFile, 256);
		hr = m_cpCmdGramma->LoadCmdFromFile(wszXMLFile, SPLO_DYNAMIC);
		if (FAILED(hr))
		{
			MessageBox(hwnd, TEXT("error5"), TEXT("error"), S_OK);
		}
		b_initSR = TRUE;
		//在开始识别时,激活语法进行识别
		hr = m_cpCmdGramma->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		return 0;
	}
	case WM_RECOEVENT:
	{
		RECT rect;
		GetClientRect(hwnd, &rect);
		hdc = GetDC(hwnd);
		USES_CONVERSION;
		CSpEvent event;
		while (event.GetFrom(m_cpRecoCtxt) == S_OK)
		{
			switch (event.eEventId)
			{
			case SPEI_RECOGNITION:
			{
				static const WCHAR wszUnrecognized[] = L"<Unrecognized>";
				CSpDynamicString dstrText;
				//取得识别结果
				if (FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL)))
				{
					dstrText = wszUnrecognized;
				}
				BSTR SRout;
				dstrText.CopyToBSTR(&SRout);
				char* lpszText2 = _com_util::ConvertBSTRToString(SRout);

				if (b_Cmd_Grammar)
				{
					if (strstr("打开企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("打开企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						openQQ();

					}

					if (strstr("关闭企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("关闭企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						closeQQ();
					}

					if (strstr("隐藏企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("隐藏企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						yincangQQ();
					}

					if (strstr("显示企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("显示企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						showQQ();
					}

					if (strstr("上移企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("上移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						top();
					}

					if (strstr("下移企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("下移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						bottom();
					}

					if (strstr("左移企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("左移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						left();
					}

					if (strstr("右移企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("右移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						right();
					}
				}
			}
			}
		}
		return TRUE;
	}
	case WM_PAINT:
		hdc = BeginPaint(hwnd, &ps);
		EndPaint(hwnd, &ps);
		return 0;
	case WM_DESTROY:
		PostQuitMessage(0);
		return 0;
	}
	return DefWindowProc(hwnd, message, wParam, lParam);
}
Esempio n. 18
0
//Speech Initialization is done here
HRESULT CASRwrapper::InitSpeech(std::wstring sPathToFile, IStream * pMemStream)
{
	HRESULT hr = S_OK;

	hr = cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer);

	if (SUCCEEDED(hr))
	{
		hr = cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt);
	}

	if (SUCCEEDED(hr))
	{
		WPARAM wparam = NULL;
		LPARAM lparam = NULL;
		hr = m_cpRecoCtxt->SetNotifyWin32Event();
		//hr = m_cpRecoCtxt->SetNotifyCallbackFunction(SpRecCallback,wparam,lparam);
		//	hr = m_cpRecoCtxt->SetNotifyWindowMessage(m_hWnd, WM_RECOEVENT, 0, 0);
	}

	if (SUCCEEDED(hr))
	{
		// This specifies which of the recognition events are going 
		//to trigger notifications. Here, all we are interested in 
		//is the beginning and ends of sounds, as well as
		// when the engine has recognized something
		//using ISpRecoContext
		const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);
		hr = m_cpRecoCtxt->SetInterest(ullInterest, ullInterest);
	}

	if (SUCCEEDED(hr))
	{
		// Specifies that the grammar we want is a dictation grammar.
		// Initializes the grammar (m_cpDictationGrammar)
		// using ISpRecoContext
		hr = m_cpRecoCtxt->CreateGrammar(GID_DICTATION, &m_cpDictationGrammar);
	}

	if (SUCCEEDED(hr))
	{
		//Load the dictation tool for the grammar specified
		//using ISpRecoGrammar
		hr = m_cpDictationGrammar->LoadDictation(NULL, SPLO_STATIC);
	}

	if (!sPathToFile.empty() || pMemStream != NULL)
	{
		CComPtr<ISpStream> cpInputStream;
		if (SUCCEEDED(hr))
		{
			// Create basic SAPI stream object
			// NOTE: The helper SpBindToFile can be used to perform the following operations
			hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
		}

		CSpStreamFormat sInputFormat;
		// generate WaveFormatEx structure, assuming the wav format is 44kHz, 16-bit, Mono
		if (SUCCEEDED(hr))
		{
			hr = sInputFormat.AssignFormat(SPSF_44kHz16BitMono);
		}

		if (pMemStream != NULL)
		{
			if (SUCCEEDED(hr))
			{
				hr = cpInputStream->SetBaseStream(pMemStream, SPDFID_WaveFormatEx, sInputFormat.WaveFormatExPtr());
			}
		}
		else
		{
			if (SUCCEEDED(hr))
			{
				//   for read-only access, since it will only be access by the SR engine
				hr = cpInputStream->BindToFile(sPathToFile.c_str(),
					SPFM_OPEN_READONLY,
					&(sInputFormat.FormatId()),
					sInputFormat.WaveFormatExPtr(),
					SPFEI_ALL_EVENTS);
			}
		}

		if (SUCCEEDED(hr))
		{
			// connect wav input to recognizer
			// SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE
			hr = cpRecoEngine->SetInput(cpInputStream, TRUE);
		}

	}
	else //connect to mic
	{
		// create default audio object
		CComPtr<ISpAudio> cpAudio;
		if (SUCCEEDED(hr))
		{
			hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);
		}

		// set the input for the engine
		if (SUCCEEDED(hr))
		{
			hr = cpRecoEngine->SetInput(cpAudio, TRUE);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecoEngine->SetRecoState(SPRST_ACTIVE);
		}
	}


	if (FAILED(hr))
	{
		//Release the grammar using ISpRecoGrammar
		m_cpDictationGrammar.Release();
	}

	return hr;
}
Esempio n. 19
0
/*********************************************************************************************
* CSimpleDict::InitDialog()
*   Creates the recognition context and activates the grammar.  
*   Returns TRUE iff successful.
**********************************************************************************************/
bool CSimpleDict::InitDialog( HWND hDlg )
{
    m_hDlg = hDlg;
    
    HRESULT hr = S_OK;
    CComPtr<ISpRecognizer> cpRecoEngine;
    hr = cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer);

    if( SUCCEEDED( hr ) )
    {
        hr = cpRecoEngine->CreateRecoContext( &m_cpRecoCtxt );
    }


    // Set recognition notification for dictation
    if (SUCCEEDED(hr))
    {
        hr = m_cpRecoCtxt->SetNotifyWindowMessage( hDlg, WM_RECOEVENT, 0, 0 );
    }
    
    
    if (SUCCEEDED(hr))
    {
        // This specifies which of the recognition events are going to trigger notifications.
        // Here, all we are interested in is the beginning and ends of sounds, as well as
        // when the engine has recognized something
        const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);
        hr = m_cpRecoCtxt->SetInterest(ullInterest, ullInterest);
    }

    // create default audio object
    CComPtr<ISpAudio> cpAudio;
    hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);

    // set the input for the engine
    hr = cpRecoEngine->SetInput(cpAudio, TRUE);
    hr = cpRecoEngine->SetRecoState( SPRST_ACTIVE );




    
    if (SUCCEEDED(hr))
    {
        // Specifies that the grammar we want is a dictation grammar.
        // Initializes the grammar (m_cpDictationGrammar)
        hr = m_cpRecoCtxt->CreateGrammar( GID_DICTATION, &m_cpDictationGrammar );
    }
    if  (SUCCEEDED(hr))
    {
        hr = m_cpDictationGrammar->LoadDictation(NULL, SPLO_STATIC);
    }
    if (SUCCEEDED(hr))
    {
        hr = m_cpDictationGrammar->SetDictationState( SPRS_ACTIVE );
    }
    if (FAILED(hr))
    {
        m_cpDictationGrammar.Release();
    }

    return (hr == S_OK);
}
HRESULT SREngine::InitializeSapi(WId hWnd, UINT Msg)
{
	HRESULT hr = S_OK;

	//FOR ONE NOT FOR ALL
	/* 独享模式的配置 */
	hr = m_cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);  //独享模式
	if (FAILED(hr))
	{
		QMessageBox::information(NULL, "Error", "Create recognizer error", MB_OK);
		return hr;
	}

	hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &m_cpAudio); //建立默认的音频输入对象
	if (FAILED(hr))
	{
		QMessageBox::information(NULL, "Error", "Create default audio object error", MB_OK);
		return hr;
	}

	hr = m_cpRecognizer->SetInput(m_cpAudio, TRUE);  //设置识别引擎输入源
	if (FAILED(hr))
	{
		QMessageBox::information(NULL, "Error", "Error setINPUT", MB_OK);
		return hr;
	}

	hr = m_cpRecognizer->CreateRecoContext(&m_cpRecoContext);   //创建识别上下文接口
	if (FAILED(hr))
	{
		QMessageBox::information(NULL, "Error", "Error CreateRecoContext", MB_OK);
		return hr;
	}

	hr = m_cpRecoContext->SetNotifyWindowMessage((HWND)hWnd, Msg, 0, 0);  //设置识别消息
	if (FAILED(hr))
	{
		QMessageBox::information(NULL, "Error", "Error SetNotifyWindowMessage", MB_OK);
		return hr;
	}

	const ULONGLONG ullInterest = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_SOUND_END) |
		SPFEI(SPEI_PHRASE_START) | SPFEI(SPEI_RECOGNITION) |
		SPFEI(SPEI_FALSE_RECOGNITION) | SPFEI(SPEI_HYPOTHESIS) |
		SPFEI(SPEI_INTERFERENCE) | SPFEI(SPEI_RECO_OTHER_CONTEXT) |
		SPFEI(SPEI_REQUEST_UI) | SPFEI(SPEI_RECO_STATE_CHANGE) |
		SPFEI(SPEI_PROPERTY_NUM_CHANGE) | SPFEI(SPEI_PROPERTY_STRING_CHANGE);
	hr = m_cpRecoContext->SetInterest(ullInterest, ullInterest);   //设置感兴趣的事件
	if (FAILED(hr))
	{
		QMessageBox::information(NULL, "Error", "Error set interest", MB_OK);
	}

	return hr;
}
Esempio n. 21
0
int main(int argc, char* argv[])
{
    HRESULT hr = E_FAIL;
    bool fUseTTS = true;            // turn TTS play back on or off
    bool fReplay = true;            // turn Audio replay on or off

    // Process optional arguments
    if (argc > 1)
    {
        int i;

        for (i = 1; i < argc; i++)
        {
            if (_stricmp(argv[i], "-noTTS") == 0)
            {
                fUseTTS = false;
                continue;
            }
            if (_stricmp(argv[i], "-noReplay") == 0)
            {
                fReplay = false;
                continue;
            }       
            printf ("Usage: %s [-noTTS] [-noReplay] \n", argv[0]);
            return hr;
        }
    }

    if (SUCCEEDED(hr = ::CoInitialize(NULL)))
    {
        {
            CComPtr<ISpRecoContext> cpRecoCtxt;
            CComPtr<ISpRecoGrammar> cpGrammar;
            CComPtr<ISpVoice> cpVoice;
            hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext);
            if(SUCCEEDED(hr))
            {
                hr = cpRecoCtxt->GetVoice(&cpVoice);
            }
           
            if (cpRecoCtxt && cpVoice &&
                SUCCEEDED(hr = cpRecoCtxt->SetNotifyWin32Event()) &&
                SUCCEEDED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION))) &&
                SUCCEEDED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL)) &&
                SUCCEEDED(hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar)) &&
                SUCCEEDED(hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC)) &&
                SUCCEEDED(hr = cpGrammar->SetDictationState(SPRS_ACTIVE)))
            {
                USES_CONVERSION;
                            
                const WCHAR * const pchStop = StopWord();
                CComPtr<ISpRecoResult> cpResult;

                printf( "I will repeat everything you say.\nSay \"%s\" to exit.\n", W2A(pchStop) );

                while (SUCCEEDED(hr = BlockForResult(cpRecoCtxt, &cpResult)))
                {
                    cpGrammar->SetDictationState( SPRS_INACTIVE );

                    CSpDynamicString dstrText;

                    if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, 
                                                    TRUE, &dstrText, NULL)))
                    {
                        printf("I heard:  %s\n", W2A(dstrText));

                        if (fUseTTS)
                        {
                            cpVoice->Speak( L"I heard", SPF_ASYNC, NULL);
                            cpVoice->Speak( dstrText, SPF_ASYNC, NULL );
                        }

                        if (fReplay)
                        {
                            if (fUseTTS)
                                cpVoice->Speak( L"when you said", SPF_ASYNC, NULL);
                            else
                                printf ("\twhen you said...\n");
                            cpResult->SpeakAudio(NULL, 0, NULL, NULL);
                       }

                       cpResult.Release();
                    }
                    if (_wcsicmp(dstrText, pchStop) == 0)
                    {
                        break;
                    }
                    
                    cpGrammar->SetDictationState( SPRS_ACTIVE );
                } 
            }
        }
        ::CoUninitialize();
    }
    return hr;
}
Esempio n. 22
0
//音声認識のためのオブジェクトの構築.
void RSpeechRecognition::Create(const string & inToWave,const string & inGrammarXML) throw(RComException)
{
	USES_CONVERSION;

	HRESULT hr;

	// 認識エンジンオブジェクトの作成
	//	CLSID_SpSharedRecognizer		共有オブジェクト
	//	CLSID_SpInprocRecognizer		アプリ内動作
	
	if ( inToWave.empty() )
	{
//		hr = this->Engine.CoCreateInstance(CLSID_SpSharedRecognizer);
//		if(FAILED(hr))	throw RComException(hr , "CLSID_SpSharedRecognizer 構築 に失敗");
		hr = this->Engine.CoCreateInstance(CLSID_SpInprocRecognizer);
		if(FAILED(hr))	throw RComException(hr , "CLSID_SpInprocRecognizer 構築 に失敗");


		CComPtr<ISpAudio> cpAudio;
		hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);
		if(FAILED(hr))	throw RComException(hr , "SpCreateDefaultObjectFromCategoryId に失敗");

		//認識エンジンのエンジンのディフォルトに設定する。
		hr = this->Engine->SetInput(cpAudio, TRUE);
		if(FAILED(hr))	throw RComException(hr , "SetInput に失敗");

//		hr = this->Engine->SetRecoState( SPRST_ACTIVE );
//		if(FAILED(hr))	throw RComException(hr , "SetRecoState に失敗");
	}
	else
	{
		CComPtr<ISpStream> cpStream;

		hr = this->Engine.CoCreateInstance(CLSID_SpInprocRecognizer);
		if(FAILED(hr))	throw RComException(hr , "CLSID_SpInprocRecognizer 構築 に失敗");

		hr = cpStream.CoCreateInstance(CLSID_SpStream);
		if(FAILED(hr))	throw RComException(hr , "CoCreateInstance  CLSID_SpStream に失敗");

		hr = cpStream->BindToFile( A2W( inToWave.c_str() ) , SPFM_OPEN_READONLY , NULL , NULL,  SPFEI_ALL_EVENTS);  
	    if(FAILED(hr))	throw RComException(hr , "BindToFile に失敗");

		hr = this->Engine->SetInput( cpStream, TRUE);  
	    if(FAILED(hr))	throw RComException( this->Engine , CLSID_SpSharedRecognizer , hr , "SetInput に失敗");
	}

	// 認識コンテクストオブジェクトの作成
	hr = this->Engine->CreateRecoContext(&this->RecoCtxt);
	if(FAILED(hr))	throw RComException(hr , "CreateRecoContext に失敗");

	hr = this->RecoCtxt->SetNotifyWin32Event();
	if ( FAILED(hr) )	throw RComException(hr , "SetNotifyWin32Event に失敗");

	hr = this->RecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
	if ( FAILED(hr) )	throw RComException(hr , "SetInterest に失敗");

	hr = this->RecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL);
	if ( FAILED(hr) )	throw RComException(hr , "SetAudioOptions に失敗");


	//メインとなる文法の作成
	hr = this->RecoCtxt->CreateGrammar(0, &this->DictationGrammar);
	if ( FAILED(hr) )	throw RComException(hr , "CreateGrammar に失敗");

	hr = this->DictationGrammar->LoadDictation(NULL, SPLO_STATIC);
	if ( FAILED(hr) )	throw RComException(hr , "LoadDictation に失敗");

	if ( inGrammarXML.empty() )
	{
		//録音開始
		hr = this->DictationGrammar->SetDictationState( SPRS_ACTIVE );
		if ( FAILED(hr) )	throw RComException(hr , "SetDictationState に失敗");
	}
	else
	{
		//ユーザ指定ファイルからのロード
		hr = this->DictationGrammar->LoadCmdFromFile( A2W( inGrammarXML.c_str() ) ,SPLO_STATIC);
		if ( FAILED(hr) )	throw RComException(hr , "LoadCmdFromFile に失敗");

		//録音開始
		hr = this->DictationGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE );
		if ( FAILED(hr) )	throw RComException(hr , "SetRuleState に失敗");
	}
}
*/


#include "stdafx.h"
#include "sapi_lipsync.h"
#include "phone_estimate.h"
#include "sapi_util.h"

///////////////////////////////////////////////////////////////////////////////
/// constants
///////////////////////////////////////////////////////////////////////////////

#define GID_LIPSYNC   0   // grammar identifier. 

/// Interest level for event in SAPI
const ULONGLONG ullInterest = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_SOUND_END) |
                                      SPFEI(SPEI_PHRASE_START) | SPFEI(SPEI_RECOGNITION) |
                                      SPFEI(SPEI_FALSE_RECOGNITION) | SPFEI(SPEI_HYPOTHESIS) |
                                      SPFEI(SPEI_INTERFERENCE) | SPFEI(SPEI_RECO_OTHER_CONTEXT) |
                                      SPFEI(SPEI_REQUEST_UI) | SPFEI(SPEI_RECO_STATE_CHANGE) |
                                      SPFEI(SPEI_END_SR_STREAM) | 
                                      SPFEI(SPEI_PROPERTY_NUM_CHANGE) | SPFEI(SPEI_PROPERTY_STRING_CHANGE);




///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// sapi_lipsync class implementation
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
Esempio n. 24
0
// 初始化语音识别
HRESULT ThisApp::init_speech_recognizer(){
    HRESULT hr = S_OK;
    // 创建语音输入流
    if (SUCCEEDED(hr)){
        hr = CoCreateInstance(CLSID_SpStream, nullptr, CLSCTX_INPROC_SERVER, __uuidof(ISpStream), (void**)&m_pSpeechStream);;
    }
    // 与我们的Kinect语音输入相连接
    if (SUCCEEDED(hr)){
        WAVEFORMATEX wft = {
            WAVE_FORMAT_PCM, // PCM编码
            1, // 单声道
            16000,  // 采样率为16KHz
            32000, // 每分钟数据流 = 采样率 * 对齐
            2, // 对齐 : 单声道 * 样本深度 = 2byte
            16, // 样本深度 16BIT
            0 // 额外数据
        };
        // 设置状态
        hr = m_pSpeechStream->SetBaseStream(m_p16BitPCMAudioStream, SPDFID_WaveFormatEx, &wft);
    }
    // 创建语音识别对象
    if (SUCCEEDED(hr)){
        ISpObjectToken *pEngineToken = nullptr;
        // 创建语言识别器
        hr = CoCreateInstance(CLSID_SpInprocRecognizer, nullptr, CLSCTX_INPROC_SERVER, __uuidof(ISpRecognizer), (void**)&m_pSpeechRecognizer);
        if (SUCCEEDED(hr)) {
            // 连接我们创建的语音输入流对象
            m_pSpeechRecognizer->SetInput(m_pSpeechStream, TRUE);
            // 创建待识别语言 这里选择大陆汉语(zh-cn) 
            // 目前没有Kinect的汉语语音识别包 有的话可以设置"language=804;Kinect=Ture"
            hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"Language=804", nullptr, &pEngineToken);
            if (SUCCEEDED(hr)) {
                // 设置待识别语言
                m_pSpeechRecognizer->SetRecognizer(pEngineToken);
                // 创建语音识别上下文
                hr = m_pSpeechRecognizer->CreateRecoContext(&m_pSpeechContext);
                // 适应性 ON! 防止因长时间的处理而导致识别能力的退化
                if (SUCCEEDED(hr))  {
                    hr = m_pSpeechRecognizer->SetPropertyNum(L"AdaptationOn", 0);
                }
            }
        }
        SafeRelease(pEngineToken);
    }
    // 创建语法
    if (SUCCEEDED(hr)){
        hr = m_pSpeechContext->CreateGrammar(1, &m_pSpeechGrammar);
    }
    // 加载静态SRGS语法文件
    if (SUCCEEDED(hr)){
        hr = m_pSpeechGrammar->LoadCmdFromFile(s_GrammarFileName, SPLO_STATIC);
    }
    // 激活语法规则
    if (SUCCEEDED(hr)){
        hr = m_pSpeechGrammar->SetRuleState(nullptr, nullptr, SPRS_ACTIVE);
    }
    // 设置识别器一直读取数据
    if (SUCCEEDED(hr)){
        hr = m_pSpeechRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
    }
    // 设置对识别事件感兴趣
    if (SUCCEEDED(hr)){
        hr = m_pSpeechContext->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
    }
    // 保证语音识别处于激活状态
    if (SUCCEEDED(hr)){
        hr = m_pSpeechContext->Resume(0);
    }
    // 获取识别事件
    if (SUCCEEDED(hr)){
        m_p16BitPCMAudioStream->SetSpeechState(TRUE);
        m_hSpeechEvent = m_pSpeechContext->GetNotifyEventHandle();
		printf_s("init_speech_recognizer succeeded\n");
    }
#ifdef _DEBUG
    else
        printf_s("init_speech_recognizer failed\n");
#endif
    return hr;
}
Esempio n. 25
0
/******************************************************************************
* CounterPaneProc *
*-----------------*
*   Description:
*       Handles messages specifically for the counter (order) pane.
*
******************************************************************************/
LRESULT CounterPaneProc( HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam )
{
    USES_CONVERSION;
    HRESULT hr;

    switch ( message )
    {
        case WM_ESPRESSOORDER:
        {
            _ASSERTE( lParam );
            KillTimer( hWnd, 0 );
            ID_TEXT *pulIds = (ID_TEXT *) lParam;
            int i = 0, ilen = 0;
            TCHAR szTempBuf[NORMAL_LOADSTRING];
            TCHAR szSpace[] =  _T(" ");
            int iTemplen;
            
            g_szCounterDisplay[0] = '\0';

            // Sort the array
            while ( 0 != pulIds[i].ulId )
            {
                i++;
            }
            for ( int j = 0; j < i; j++ )
            {
                int iminIndex = j;               
                for ( int k = j; k < i; k++ )
                {
                    if ( pulIds[iminIndex].ulId > pulIds[k].ulId )
                    {
                        iminIndex = k;
                    }
                }
                ULONG ulId = pulIds[iminIndex].ulId;
                WCHAR *pwstr = pulIds[iminIndex].pwstrCoMemText;
                pulIds[iminIndex].pwstrCoMemText = pulIds[j].pwstrCoMemText;
                pulIds[j].pwstrCoMemText = pwstr;
                pulIds[iminIndex].ulId = pulIds[j].ulId;
                pulIds[j].ulId = ulId;
            }
            
            i = 0;
            // Put in the first order words if we actually have an order
            if ( 0 != pulIds[0].ulId )
            {
                iTemplen = LoadString( g_hInst, IDS_ORDERBEGIN, szTempBuf, NORMAL_LOADSTRING );                
                lstrcat( g_szCounterDisplay + ilen, szTempBuf );
                ilen += iTemplen;
            }
            while ( i < MAX_ID_ARRAY && 0 != pulIds[i].ulId )
            {
                TCHAR *pTempStr = W2T( pulIds[i].pwstrCoMemText );

                iTemplen = lstrlen( pTempStr );
                // We'll quit now so we dont overrun the buffer
                if ( ilen + iTemplen >= MAX_LOADSTRING )
                {
                    break;
                }
                if ( i > 0 )
                {
                    lstrcat( g_szCounterDisplay + ilen, szSpace );
                    ilen += 1;
                }
                lstrcat( g_szCounterDisplay, pTempStr );
                ilen += iTemplen;
                i++;
            }
            // Put the thank you on this order
            if ( 0 < i )
            {
                iTemplen = LoadString( g_hInst, IDS_ORDEREND, szTempBuf, NORMAL_LOADSTRING );                
                if ( ilen + iTemplen < MAX_LOADSTRING )
                {
                    lstrcat( g_szCounterDisplay + ilen, szTempBuf );
                    ilen += iTemplen;
                }
            }

            InvalidateRect( hWnd, NULL, TRUE );
            SetTimer( hWnd, 0, TIMEOUT, NULL );

            // Speak the order
		    g_cpVoice->Speak( T2W(g_szCounterDisplay), SPF_ASYNC, NULL);

            // Delete the CoTaskMem we were given initially by ISpPhrase->GetText
            i = 0;
            while ( i < MAX_ID_ARRAY && 0 != pulIds[i].ulId )
            {
                CoTaskMemFree( pulIds[i].pwstrCoMemText );
                i++;
            }
            delete [] pulIds;
            return ( 1 );

        }

        case WM_PAINT:
            CounterPanePaint( hWnd, g_szCounterDisplay );
            return ( 1 );

        case WM_INITPANE:
            LoadString( g_hInst, IDS_PLEASEORDER, g_szCounterDisplay, MAX_LOADSTRING );
            // Set the rule recognizing an espresso order to active, now that we are ready for it
            g_cpCmdGrammar->SetRuleIdState( VID_EspressoDrinks, SPRS_ACTIVE );

            // Set our interests to include false recognitions
            hr = g_cpRecoCtxt->SetInterest( SPFEI(SPEI_RECOGNITION)|SPFEI(SPEI_FALSE_RECOGNITION),
                                                    SPFEI(SPEI_RECOGNITION)|SPFEI(SPEI_FALSE_RECOGNITION) );
            _ASSERTE( SUCCEEDED( hr ) );

            // Speak the welcome string
		    g_cpVoice->Speak( T2W(g_szCounterDisplay), SPF_ASYNC, NULL);

            return ( 1 );

        case WM_TIMER:
            // Revert back to 'go ahead and order' message
            LoadString( g_hInst, IDS_PLEASEORDER, g_szCounterDisplay, MAX_LOADSTRING );
            InvalidateRect( hWnd, NULL, TRUE );

            // Speak the welcome string
		    g_cpVoice->Speak( T2W(g_szCounterDisplay), SPF_ASYNC, NULL);

            KillTimer( hWnd, 0 );
            return ( 1 );

        case WM_GOTOOFFICE:
            KillTimer( hWnd, 0 );
            // Set the rule recognizing an espresso order to inactive
            // since you cant order from the office
            g_cpCmdGrammar->SetRuleIdState( VID_EspressoDrinks, SPRS_INACTIVE );

            // Set our interests to include only recognitions
            hr = g_cpRecoCtxt->SetInterest( SPFEI(SPEI_RECOGNITION),SPFEI(SPEI_RECOGNITION) );
            _ASSERTE( SUCCEEDED( hr ) );

            // Set the right message handler and repaint
            g_fpCurrentPane = OfficePaneProc;
            PostMessage( hWnd, WM_INITPANE, NULL, NULL );
            InvalidateRect( hWnd, NULL, TRUE );
            return ( 1 );

        case WM_DIDNTUNDERSTAND:
            KillTimer( hWnd, 0 );
            LoadString( g_hInst, IDS_DIDNTUNDERSTAND, g_szCounterDisplay, MAX_LOADSTRING );
            InvalidateRect( hWnd, NULL, TRUE );
            // Speak the didn't understand string
		    g_cpVoice->Speak( T2W(g_szCounterDisplay), SPF_ASYNC, NULL);
            SetTimer( hWnd, 0, TIMEOUT, NULL );
            return ( 1 );

    }
    return ( 0 );
}
Esempio n. 26
0
//-----------------------------------------------------------------------------
// Purpose: Given a wave file and a string of words "text", creates a CFG from the
//  sentence and stores the resulting words/phonemes in CSentence
// Input  : *wavname - 
//			text - 
//			sentence - 
//			(*pfnPrint - 
// Output : SR_RESULT
//-----------------------------------------------------------------------------
SR_RESULT ExtractPhonemes( const char *wavname, CSpDynamicString& text, CSentence& sentence, void (*pfnPrint)( const char *fmt, ...) )
{
	// Assume failure
	SR_RESULT result = SR_RESULT_ERROR;

	if ( text.Length() <= 0 )
	{
		pfnPrint( "Error:  no rule / text specified\n" );
		return result;
	}

	USES_CONVERSION;
	HRESULT hr;
	
	CUtlVector < WORDRULETYPE > wordRules;

	CComPtr<ISpStream> cpInputStream;
	CComPtr<ISpRecognizer> cpRecognizer;
	CComPtr<ISpRecoContext> cpRecoContext;
	CComPtr<ISpRecoGrammar> cpRecoGrammar;
	CComPtr<ISpPhoneConverter>  cpPhoneConv;
    
	// Create basic SAPI stream object
	// NOTE: The helper SpBindToFile can be used to perform the following operations
	hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Stream object not installed?\n" );
		return result;
	}

	CSpStreamFormat sInputFormat;
	
	// setup stream object with wav file MY_WAVE_AUDIO_FILENAME
	//   for read-only access, since it will only be access by the SR engine
	hr = cpInputStream->BindToFile(
		T2W(wavname),
		SPFM_OPEN_READONLY,
		NULL,
		sInputFormat.WaveFormatExPtr(),
		SPFEI_ALL_EVENTS );

	if ( FAILED( hr ) )
	{
		pfnPrint( "Error: couldn't open wav file %s\n", wavname );
		return result;
	}
	
	// Create in-process speech recognition engine
	hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 In process recognizer object not installed?\n" );
		return result;
	}

	// Create recognition context to receive events
	hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create recognizer context\n" );
		return result;
	}
	
	// Create a grammar
	hr = cpRecoContext->CreateGrammar( EP_GRAM_ID, &cpRecoGrammar );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create recognizer grammar\n" );
		return result;
	}

	LANGID englishID = 0x409; // 1033 decimal

	bool userSpecified = false;
	LANGID langID = SpGetUserDefaultUILanguage();

	// Allow commandline override
	if ( CommandLine()->FindParm( "-languageid" ) != 0 )
	{
		userSpecified = true;
		langID = CommandLine()->ParmValue( "-languageid", langID );
	}

	// Create a phoneme converter ( so we can convert to IPA codes )
	hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv );
	if ( FAILED( hr ) )
	{
		if ( langID != englishID )
		{
			if ( userSpecified )
			{
				pfnPrint( "Warning:  SAPI 5.1 Unable to create phoneme converter for command line override -languageid %i\n", langID );
			}
			else
			{
				pfnPrint( "Warning:  SAPI 5.1 Unable to create phoneme converter for default UI language %i\n",langID );
			}

			// Try english!!!
			langID = englishID;
			hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv );
		}

		if ( FAILED( hr ) )
		{
			pfnPrint( "Error:  SAPI 5.1 Unable to create phoneme converter for English language id %i\n", langID );
			return result;
		}
		else
		{
			pfnPrint( "Note:  SAPI 5.1 Falling back to use english -languageid %i\n", langID );
		}
	}
	else if ( userSpecified )
	{
		pfnPrint( "Note:  SAPI 5.1 Using user specified -languageid %i\n",langID );
	}

	SPSTATEHANDLE hStateRoot;
	// create/re-create Root level rule of grammar
	hr = cpRecoGrammar->GetRule(L"Root", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateRoot);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create root rule\n" );
		return result;
	}

	// Inactivate it so we can alter it
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to deactivate grammar rules\n" );
		return result;
	}

	// Create the rule set from the words in text
	{
		CSpDynamicString currentWord;
		WCHAR *pos = ( WCHAR * )text;
		WCHAR str[ 2 ];
		str[1]= 0;

		while ( *pos )
		{
			if ( *pos == L' ' /*|| *pos == L'.' || *pos == L'-'*/ )
			{
				// Add word to rule set
				if ( currentWord.Length() > 0 )
				{
					AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
					currentWord.Clear();
				}
				pos++;
				continue;
			}

			// Skip anything that's inside a [ xxx ] pair.
			if ( *pos == L'[' )
			{
				while ( *pos && *pos != L']' )
				{
					pos++;
				}

				if ( *pos )
				{
					pos++;
				}
				continue;
			}

			str[ 0 ] = *pos;

			currentWord.Append( str );
			pos++;
		}

		if ( currentWord.Length() > 0 )
		{
			AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
		}

		if ( wordRules.Size() <= 0 )
		{
			pfnPrint( "Error:  Text %s contained no usable words\n", text );
			return result;
		}

		// Build all word to word transitions in the grammar
		if ( !BuildRules( cpRecoGrammar, &hStateRoot, &wordRules ) )
		{
			pfnPrint( "Error:  Rule set for %s could not be generated\n", text );
			return result;
		}
	}

	// check for recognitions and end of stream event
	const ULONGLONG ullInterest = 
		SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM) | SPFEI(SPEI_FALSE_RECOGNITION) | 
		SPFEI(SPEI_PHRASE_START ) | SPFEI(SPEI_HYPOTHESIS ) | SPFEI(SPEI_INTERFERENCE) ;
	hr = cpRecoContext->SetInterest( ullInterest, ullInterest );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to set interest level\n" );
		return result;
	}	
	// use Win32 events for command-line style application
	hr = cpRecoContext->SetNotifyWin32Event();
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to set win32 notify event\n" );
		return result;
	}
	// connect wav input to recognizer
	// SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE
	hr = cpRecognizer->SetInput(cpInputStream, TRUE);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to associate input stream\n" );
		return result;
	}	

	// Activate the CFG ( rather than using dictation )
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_ACTIVE );
	if ( FAILED( hr ) )
	{
		switch ( hr )
		{
		case E_INVALIDARG:
			pfnPrint( "pszName is invalid or bad. Alternatively, pReserved is non-NULL\n" );
			break;
		case SP_STREAM_UNINITIALIZED:
			pfnPrint( "ISpRecognizer::SetInput has not been called with the InProc recognizer\n" );
			break;
		case SPERR_UNINITIALIZED:
			pfnPrint( "The object has not been properly initialized.\n");
			break;
		case SPERR_UNSUPPORTED_FORMAT:
			pfnPrint( "Audio format is bad or is not recognized. Alternatively, the device driver may be busy by another application and cannot be accessed.\n" );
			break;
		case SPERR_NOT_TOPLEVEL_RULE:
			pfnPrint( "The rule pszName exists, but is not a top-level rule.\n" );
			break;
		default:
			pfnPrint( "Unknown error\n" );
			break;
		}
		pfnPrint( "Error:  SAPI 5.1 Unable to activate rule set\n" );
		return result;
	}

	// while events occur, continue processing
	// timeout should be greater than the audio stream length, or a reasonable amount of time expected to pass before no more recognitions are expected in an audio stream
	BOOL fEndStreamReached = FALSE;
	while (!fEndStreamReached && S_OK == cpRecoContext->WaitForNotifyEvent( SR_WAVTIMEOUT ))
	{
		CSpEvent spEvent;
		// pull all queued events from the reco context's event queue
		
		while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext))
		{
			// Check event type
			switch (spEvent.eEventId)
			{
			case SPEI_INTERFERENCE:
				{
					SPINTERFERENCE interference = spEvent.Interference();

					switch ( interference )
					{
					case SPINTERFERENCE_NONE:
						pfnPrint( "[ I None ]\r\n" );
						break;
					case SPINTERFERENCE_NOISE:
						pfnPrint( "[ I Noise ]\r\n" );
						break;
					case SPINTERFERENCE_NOSIGNAL:
						pfnPrint( "[ I No Signal ]\r\n" );
						break;
					case SPINTERFERENCE_TOOLOUD:
						pfnPrint( "[ I Too Loud ]\r\n" );
						break;
					case SPINTERFERENCE_TOOQUIET:
						pfnPrint( "[ I Too Quiet ]\r\n" );
						break;
					case SPINTERFERENCE_TOOFAST:
						pfnPrint( "[ I Too Fast ]\r\n" );
						break;
					case SPINTERFERENCE_TOOSLOW:
						pfnPrint( "[ I Too Slow ]\r\n" );
						break;
					default:
						break;
					}
				}
				break;
			case SPEI_PHRASE_START:
				pfnPrint( "Phrase Start\r\n" );
				sentence.MarkNewPhraseBase();
				break;

			case SPEI_HYPOTHESIS:
			case SPEI_RECOGNITION:
			case SPEI_FALSE_RECOGNITION:
				{
                    CComPtr<ISpRecoResult> cpResult;
                    cpResult = spEvent.RecoResult();

                    CSpDynamicString dstrText;
                    if (spEvent.eEventId == SPEI_FALSE_RECOGNITION)
                    {
                        dstrText = L"(Unrecognized)";

						result = SR_RESULT_FAILED;

						// It's possible that the failed recog might have more words, so see if that's the case
						EnumeratePhonemes( cpPhoneConv, cpResult, sentence );
					}
                    else
                    {
						// Hypothesis or recognition success
                        cpResult->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);

						EnumeratePhonemes( cpPhoneConv, cpResult, sentence );

						if ( spEvent.eEventId == SPEI_RECOGNITION )
						{
							result = SR_RESULT_SUCCESS;
						}

						pfnPrint( va( "%s%s\r\n", spEvent.eEventId == SPEI_HYPOTHESIS ? "[ Hypothesis ] " : "", dstrText.CopyToChar() ) );
					}
                    
                    cpResult.Release();
				}
				break;
				// end of the wav file was reached by the speech recognition engine
            case SPEI_END_SR_STREAM:
				fEndStreamReached = TRUE;
				break;
			}
			
			// clear any event data/object references
			spEvent.Clear();
		}// END event pulling loop - break on empty event queue OR end stream
	}// END event polling loop - break on event timeout OR end stream
	
	// Deactivate rule
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to deactivate rule set\n" );
		return result;
	}

	// close the input stream, since we're done with it
	// NOTE: smart pointer will call SpStream's destructor, and consequently ::Close, but code may want to check for errors on ::Close operation
	hr = cpInputStream->Close();
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to close input stream\n" );
		return result;
	}

	return result;
}
Esempio n. 27
0
	void Sound::test() {

		ISpVoice * pVoice = NULL;
		ISpObjectToken*        pVoiceToken=nullptr;
		IEnumSpObjectTokens*   pEnum;
		ULONG                  ulCount = 0;

		if (FAILED(::CoInitialize(NULL)))
		{
			return;
		}
		HRESULT hr = S_OK;

		// Find the best matching installed en-us recognizer.
		CComPtr<ISpObjectToken> cpRecognizerToken;

		if (SUCCEEDED(hr))
		{
			hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken);
		}

		// Create the in-process recognizer and immediately set its state to inactive.
		CComPtr<ISpRecognizer> cpRecognizer;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecognizer(cpRecognizerToken);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_INACTIVE);
		}

		// Create a new recognition context from the recognizer.
		CComPtr<ISpRecoContext> cpContext;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->CreateRecoContext(&cpContext);
		}

		// Subscribe to the speech recognition event and end stream event.
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION);
			hr = cpContext->SetInterest(ullEventInterest, ullEventInterest);
		}

		// Establish a Win32 event to signal when speech events are available.
		HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

			if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent)
			{
				// Notification handle unsupported.
				hr = E_NOINTERFACE;
			}
		}

		// Initialize an audio object to use the default audio input of the system and set the recognizer to use it.
		CComPtr<ISpAudio> cpAudioIn;

		if (SUCCEEDED(hr))
		{
			hr = cpAudioIn.CoCreateInstance(CLSID_SpMMAudioIn);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetInput(cpAudioIn, TRUE);
		}

		// Populate a WAVEFORMATEX struct with our desired output audio format. information.
		WAVEFORMATEX* pWfexCoMemRetainedAudioFormat = NULL;
		GUID guidRetainedAudioFormat = GUID_NULL;

		if (SUCCEEDED(hr))
		{
			hr = SpConvertStreamFormatEnum(SPSF_16kHz16BitMono, &guidRetainedAudioFormat, &pWfexCoMemRetainedAudioFormat);
		}

		// Instruct the recognizer to retain the audio from its recognition results.
		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetAudioOptions(SPAO_RETAIN_AUDIO, &guidRetainedAudioFormat, pWfexCoMemRetainedAudioFormat);
		}

		if (NULL != pWfexCoMemRetainedAudioFormat)
		{
			CoTaskMemFree(pWfexCoMemRetainedAudioFormat);
		}

		// Create a new grammar and load an SRGS grammar from file.
		CComPtr<ISpRecoGrammar> cpGrammar;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->CreateGrammar(0, &cpGrammar);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC);
		}

		// Set all top-level rules in the new grammar to the active state.
		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		}

		// Set the recognizer state to active to begin recognition.
		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
		}

		// Establish a separate Win32 event to signal the event loop exit.
		HANDLE hExitEvent = CreateEventW(NULL, FALSE, FALSE, NULL);

		// Collect the events listened for to pump the speech event loop.
		HANDLE rghEvents[] = { hSpeechNotifyEvent, hExitEvent };

		// Speech recognition event loop.
		BOOL fContinue = TRUE;

		while (fContinue && SUCCEEDED(hr))
		{
			// Wait for either a speech event or an exit event, with a 15 second timeout.
			DWORD dwMessage = WaitForMultipleObjects(sp_countof(rghEvents), rghEvents, FALSE, 15000);

			switch (dwMessage)
			{
				// With the WaitForMultipleObjects call above, WAIT_OBJECT_0 is a speech event from hSpeechNotifyEvent.
			case WAIT_OBJECT_0:
			{
				// Sequentially grab the available speech events from the speech event queue.
				CSpEvent spevent;

				while (S_OK == spevent.GetFrom(cpContext))
				{
					switch (spevent.eEventId)
					{
					case SPEI_RECOGNITION:
					{
						// Retrieve the recognition result and output the text of that result.
						ISpRecoResult* pResult = spevent.RecoResult();

						LPWSTR pszCoMemResultText = NULL;
						hr = pResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pszCoMemResultText, NULL);

						if (SUCCEEDED(hr))
						{
							wprintf(L"Recognition event received, text=\"%s\"\r\n", pszCoMemResultText);
						}

						// Also retrieve the retained audio we requested.
						CComPtr<ISpStreamFormat> cpRetainedAudio;

						if (SUCCEEDED(hr))
						{
							hr = pResult->GetAudio(0, 0, &cpRetainedAudio);
						}

						// To demonstrate, we'll speak the retained audio back using ISpVoice.
						CComPtr<ISpVoice> cpVoice;

						if (SUCCEEDED(hr))
						{
							hr = cpVoice.CoCreateInstance(CLSID_SpVoice);
						}

						if (SUCCEEDED(hr))
						{
							hr = cpVoice->SpeakStream(cpRetainedAudio, SPF_DEFAULT, 0);
						}

						if (NULL != pszCoMemResultText)
						{
							CoTaskMemFree(pszCoMemResultText);
						}

						break;
					}
					}
				}

				break;
			}
			case WAIT_OBJECT_0 + 1:
			case WAIT_TIMEOUT:
			{
				// Exit event or timeout; discontinue the speech loop.
				fContinue = FALSE;
				//break;
			}
			}
		}

	CoUninitialize();

		CComPtr <ISpVoice>		cpVoice;
		CComPtr <ISpStream>		cpStream;
		CSpStreamFormat			cAudioFmt;

		//Create a SAPI Voice
		hr = cpVoice.CoCreateInstance(CLSID_SpVoice);

		//Set the audio format
		if (SUCCEEDED(hr))
		{
			hr = cAudioFmt.AssignFormat(SPSF_22kHz16BitMono);
		}

		//Call SPBindToFile, a SAPI helper method,  to bind the audio stream to the file
		if (SUCCEEDED(hr))
		{

			hr = SPBindToFile(L"c:\\ttstemp.wav", SPFM_CREATE_ALWAYS,
				&cpStream, &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());
		}

		//set the output to cpStream so that the output audio data will be stored in cpStream
		if (SUCCEEDED(hr))
		{
			hr = cpVoice->SetOutput(cpStream, TRUE);
		}

		//Speak the text "hello world" synchronously
		if (SUCCEEDED(hr))
		{
			hr = cpVoice->Speak(L"Hello World", SPF_DEFAULT, NULL);
		}

		//close the stream
		if (SUCCEEDED(hr))
		{
			hr = cpStream->Close();
		}

		//Release the stream and voice object
		cpStream.Release();
		cpVoice.Release();

		CComPtr<ISpGrammarBuilder>    cpGrammarBuilder;
		SPSTATEHANDLE                 hStateTravel;
		// Create (if rule does not already exist)
		// top-level Rule, defaulting to Active.
		hr = cpGrammarBuilder->GetRule(L"Travel", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateTravel);

		// Approach 1: List all possible phrases.
		// This is the most intuitive approach, and it does not sacrifice efficiency
		// because the grammar builder will merge shared sub-phrases when possible.
		// There is only one root state, hStateTravel, and the terminal NULL state,
		// and there are six unique transitions between root state and NULL state.

		/* XML Approximation:
		<rule id="Travel">
		<item> fly to Seattle </item>
		<item> fly to New York </item>
		<item> fly to Washington DC </item>
		<item> drive to Seattle </item>
		<item> drive to New York </item>
		<item> drive to Washington DC </item>
		</rule>
		*/

		// Create set of peer phrases, each containing complete phrase.
		// Note: the word delimiter is set as " ", so that the text we
		// attach to the transition can be multiple words (for example,
		// "fly to Seattle" is implicitly "fly" + "to" + "Seattle"):
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Seattle", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to New York", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Seattle", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to New York", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
		}
		// Find the best matching installed en-US recognizer.
		//CComPtr<ISpObjectToken> cpRecognizerToken;

		if (SUCCEEDED(hr))
		{
			hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken);
		}

		// Create the in-process recognizer and immediately set its state to inactive.
		//CComPtr<ISpRecognizer> cpRecognizer;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecognizer(cpRecognizerToken);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_INACTIVE);
		}

		// Create a new recognition context from the recognizer.
		//CComPtr<ISpRecoContext> cpContext;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->CreateRecoContext(&cpContext);
		}

		// Subscribe to the speech recognition event and end stream event.
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM);
			hr = cpContext->SetInterest(ullEventInterest, ullEventInterest);
		}

		// Establish a Win32 event to signal when speech events are available.
		//HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

			if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent)
			{
				// Notification handle unsupported
				//hr = SPERR_UNITIALIZED;
			}
		}
		// Set up an audio input stream using a .wav file and set the recognizer's input.
		CComPtr<ISpStream> cpInputStream;

		if (SUCCEEDED(hr))
		{
			hr = SPBindToFile(L"Test.wav", SPFM_OPEN_READONLY, &cpInputStream);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetInput(cpInputStream, TRUE);
		}

		// Create a new grammar and load an SRGS grammar from file.
		//CComPtr<ISpRecoGrammar> cpGrammar;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->CreateGrammar(0, &cpGrammar);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC);
		}

		// Set all top-level rules in the new grammar to the active state.
		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		}

		// Finally, set the recognizer state to active to begin recognition.
		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
		}

		 hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void     **)&pVoice);
		if (SUCCEEDED(hr)) {
			hr = SpEnumTokens(SPCAT_VOICES, L"Gender=Female", NULL, &pEnum);
			if (SUCCEEDED(hr))
			{
				// Get the number of voices.
				hr = pEnum->GetCount(&ulCount);
			}

			// Obtain a list of available voice tokens, set
			// the voice to the token, and call Speak.
			while (SUCCEEDED(hr) && ulCount--)			{
				if (pVoiceToken != nullptr) {
					pVoiceToken->Release();
				}

				if (SUCCEEDED(hr))
				{
					hr = pEnum->Next(1, &pVoiceToken, NULL);
				}

				if (SUCCEEDED(hr))
				{
					hr = pVoice->SetVoice(pVoiceToken);
				}

				if (SUCCEEDED(hr))
				{
					wchar_t* start = L"<?xml version=\"1.0\" encoding=\"ISO - 8859 - 1\"?><speak version = \"1.0\" xmlns = \"http://www.w3.org/2001/10/synthesis\"	xml:lang = \"en-US\">";
					wchar_t* end = L"</speak>";
					const wchar_t *xml = L"<voice required = \"Gender=Male\"> hi! <prosody pitch=\"fast\"> This is low pitch. </prosody><prosody volume=\"x - loud\"> This is extra loud volume. </prosody>";
					wstring s = start;
					s += xml;
					s += end;
					
					hr = pVoice->Speak(xml, SPF_IS_XML| SPF_ASYNC, 0);
					//hr = pVoice->Speak(L"How are you?", SPF_DEFAULT, NULL);
				}

			}
			/*
			if (SUCCEEDED(hr)) {
				hr = pEnum->Next(1, &pVoiceToken, NULL);
				if (SUCCEEDED(hr)) {
					hr = pVoice->SetVoice(pVoiceToken);
					// Set the output to the default audio device.
					if (SUCCEEDED(hr)) {
						hr = pVoice->SetOutput(NULL, TRUE);
						if (SUCCEEDED(hr)) {
							hr = pVoice->Speak(L"Hello, world!", SPF_DEFAULT, 0);
						}
					}
				}
			}
			*/
			pVoice->Release();
		}
		::CoUninitialize();
	}
int _tmain( int argc, _TCHAR* argv[] )
{
	cv::setUseOptimized( true );

	// Kinectのインスタンス生成、初期化
	INuiSensor* pSensor;
	HRESULT hResult = S_OK;
	hResult = NuiCreateSensorByIndex( 0, &pSensor );
	if( FAILED( hResult ) ){
		std::cerr << "Error : NuiCreateSensorByIndex" << std::endl;
		return -1;
	}

	hResult = pSensor->NuiInitialize( NUI_INITIALIZE_FLAG_USES_AUDIO );
	if( FAILED( hResult ) ){
		std::cerr << "Error : NuiInitialize" << std::endl;
		return -1;
	}

	// Audioストリームの初期化(InitializeAudioStream)
	std::cout << "InitializeAudioStream" << std::endl;
	INuiAudioBeam* pNuiAudioSource;
	hResult = pSensor->NuiGetAudioSource( &pNuiAudioSource );
	if( FAILED( hResult ) ){
		std::cerr << "Error : NuiGetAudioSource" << std::endl;
		return -1;
	}

	IMediaObject* pMediaObject = nullptr;
	IPropertyStore* pPropertyStore = nullptr;
	pNuiAudioSource->QueryInterface( IID_IMediaObject, reinterpret_cast<void**>( &pMediaObject ) );
	pNuiAudioSource->QueryInterface( IID_IPropertyStore, reinterpret_cast<void**>( &pPropertyStore ) );

	PROPVARIANT propvariant;
	PropVariantInit( &propvariant );
	propvariant.vt = VT_I4;
	propvariant.lVal = static_cast<LONG>( 4 );
	pPropertyStore->SetValue( MFPKEY_WMAAECMA_SYSTEM_MODE, propvariant );
	PropVariantClear( &propvariant );

	WAVEFORMATEX waveFormat = { AudioFormat, AudioChannels, AudioSamplesPerSecond, AudioAverageBytesPerSecond, AudioBlockAlign, AudioBitsPerSample, 0 };
	DMO_MEDIA_TYPE mediaType = { 0 };
	MoInitMediaType( &mediaType, sizeof( WAVEFORMATEX ) );

	mediaType.majortype = MEDIATYPE_Audio;
	mediaType.subtype = MEDIASUBTYPE_PCM;
	mediaType.lSampleSize = 0;
	mediaType.bFixedSizeSamples = true;
	mediaType.bTemporalCompression = false;
	mediaType.formattype = FORMAT_WaveFormatEx;
	memcpy( mediaType.pbFormat, &waveFormat, sizeof( WAVEFORMATEX ) );

	pMediaObject->SetOutputType( 0, &mediaType, 0 ); 

	KinectAudioStream* audioStream = new KinectAudioStream( pMediaObject );

	IStream* pStream = nullptr;
	audioStream->QueryInterface( IID_IStream, reinterpret_cast<void**>( &pStream ) );

	CoInitialize( nullptr );
	ISpStream* pSpeechStream = nullptr;
	CoCreateInstance( CLSID_SpStream, NULL, CLSCTX_INPROC_SERVER, __uuidof(ISpStream), reinterpret_cast<void**>( &pSpeechStream ) );

	pSpeechStream->SetBaseStream( pStream, SPDFID_WaveFormatEx, &waveFormat );

	MoFreeMediaType( &mediaType );
	pStream->Release();
	pPropertyStore->Release();
	pMediaObject->Release();
	pNuiAudioSource->Release();

	// 音声認識器を作成(CreateSpeechRecognizer)
	std::cout << "CreateSpeechRecognizer" << std::endl;
	ISpRecognizer* pSpeechRecognizer;
	CoCreateInstance( CLSID_SpInprocRecognizer, nullptr, CLSCTX_INPROC_SERVER, __uuidof(ISpRecognizer), reinterpret_cast<void**>( &pSpeechRecognizer ) );

	pSpeechRecognizer->SetInput( pSpeechStream, false );

	/*
	// If can use ATL, easier to using SpFindBestToken(sphelper.h). When using Professional or more.
	ISpObjectToken* pEngineToken = nullptr;
	SpFindBestToken( SPCAT_RECOGNIZERS, L"Language=411;Kinect=True", NULL, &pEngineToken ); // Japanese "Language=411;Kinect=True" English "Language=409;Kinect=True"
	*/

	///*
	// If can't use ATL, alternative to using SpFIndBestToken(sphelper.h). When using Express.
	const wchar_t* pVendorPreferred = L"VendorPreferred";
	const unsigned long lengthVendorPreferred = static_cast<unsigned long>( wcslen( pVendorPreferred ) );
	unsigned long length;
	ULongAdd( lengthVendorPreferred, 1, &length );
	wchar_t* pAttribsVendorPreferred = new wchar_t[ length ];
	StringCchCopyW( pAttribsVendorPreferred, length, pVendorPreferred );

	ISpObjectTokenCategory* pTokenCategory = nullptr;
	CoCreateInstance( CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL, __uuidof(ISpObjectTokenCategory), reinterpret_cast<void**>( &pTokenCategory ) );

	pTokenCategory->SetId( SPCAT_RECOGNIZERS, false );

	IEnumSpObjectTokens* pEnumTokens = nullptr;
	CoCreateInstance( CLSID_SpMMAudioEnum, nullptr, CLSCTX_ALL, __uuidof(IEnumSpObjectTokens), reinterpret_cast<void**>( &pEnumTokens ) );

	pTokenCategory->EnumTokens( L"Language=411;Kinect=True", pAttribsVendorPreferred, &pEnumTokens ); // Japanese "Language=411;Kinect=True" English "Language=409;Kinect=True"

	delete[] pAttribsVendorPreferred;
	
	ISpObjectToken* pEngineToken = nullptr;
	pEnumTokens->Next( 1, &pEngineToken, nullptr );
	//*/

	pSpeechRecognizer->SetRecognizer( pEngineToken );
	
	ISpRecoContext* pSpeechContext;
	pSpeechRecognizer->CreateRecoContext( &pSpeechContext );

	pEngineToken->Release();
	///*
	pTokenCategory->Release();
	pEnumTokens->Release();
	//*/

	// 音声認識辞書の作成(LoadSpeechGrammar)
	std::cout << "LoadSpeechGrammar" << std::endl;
	ISpRecoGrammar* pSpeechGrammar;
	pSpeechContext->CreateGrammar( 1, &pSpeechGrammar );

	pSpeechGrammar->LoadCmdFromFile( L"SpeechRecognition_Ja.grxml", /*SPLO_STATIC*/SPLO_DYNAMIC ); // http://www.w3.org/TR/speech-grammar/ (UTF-8/CRLF)
	
	audioStream->StartCapture();
	pSpeechGrammar->SetRuleState( nullptr, nullptr, SPRS_ACTIVE );
	pSpeechRecognizer->SetRecoState( SPRST_ACTIVE_ALWAYS );
	pSpeechContext->SetInterest( SPFEI( SPEI_RECOGNITION ), SPFEI( SPEI_RECOGNITION ) );
	pSpeechContext->Resume( 0 );

	HANDLE hSpeechEvent = INVALID_HANDLE_VALUE;
	hSpeechEvent = pSpeechContext->GetNotifyEventHandle();
	HANDLE hEvents[1] = { hSpeechEvent };

	int width = 640;
	int height = 480;

	cv::Mat audioMat = cv::Mat::zeros( height, width, CV_8UC3 );
	cv::namedWindow( "Audio" );

	bool exit = false;

	std::cout << std::endl << "Speech Recognition Start..." << std::endl << std::endl;

	while( 1 ){
		// イベントの更新待ち
		ResetEvent( hSpeechEvent );
		unsigned long waitObject = MsgWaitForMultipleObjectsEx( ARRAYSIZE( hEvents ), hEvents, INFINITE, QS_ALLINPUT, MWMO_INPUTAVAILABLE );

		if( waitObject == WAIT_OBJECT_0 ){
			// イベントの取得
			const float confidenceThreshold = 0.3f;
			SPEVENT eventStatus;
			unsigned long eventFetch = 0;
			pSpeechContext->GetEvents( 1, &eventStatus, &eventFetch );
			while( eventFetch > 0 ){
				switch( eventStatus.eEventId ){
					// 音声認識イベント(SPEI_HYPOTHESIS:推定またはSPEI_RECOGNITION:認識)
					case SPEI_HYPOTHESIS:
					case SPEI_RECOGNITION:
						if( eventStatus.elParamType == SPET_LPARAM_IS_OBJECT ){
							// フレーズの取得
							ISpRecoResult* pRecoResult = reinterpret_cast<ISpRecoResult*>( eventStatus.lParam );
							SPPHRASE* pPhrase = nullptr;
							hResult = pRecoResult->GetPhrase( &pPhrase );
							if( SUCCEEDED( hResult ) ){
								if( ( pPhrase->pProperties != nullptr ) && ( pPhrase->pProperties->pFirstChild != nullptr ) ){
									// 辞書のフレーズタグと比較
									const SPPHRASEPROPERTY* pSemantic = pPhrase->pProperties->pFirstChild;
									if( pSemantic->SREngineConfidence > confidenceThreshold ){
										if( wcscmp( L"あか", pSemantic->pszValue ) == 0 ){
											std::cout << "あか" << std::endl;
											audioMat = cv::Scalar( 0, 0, 255 );
										}
										else if( wcscmp( L"みどり", pSemantic->pszValue ) == 0 ){
											std::cout << "みどり" << std::endl;
											audioMat = cv::Scalar( 0, 255, 0 );
										}
										else if( wcscmp( L"あお", pSemantic->pszValue ) == 0 ){
											std::cout << "あお" << std::endl;
											audioMat = cv::Scalar( 255, 0, 0 );
										}
										else if( wcscmp( L"おわり", pSemantic->pszValue ) == 0 ){
											exit = true;
										}
									}
								}
								CoTaskMemFree( pPhrase );
							}
						}
						break;

					default:
						break;
				}
				pSpeechContext->GetEvents( 1, &eventStatus, &eventFetch );
			}
		}

		// 表示
		cv::imshow( "Audio", audioMat );

		// ループの終了判定(Escキー)
		if( cv::waitKey( 30 ) == VK_ESCAPE || exit ){
			break;
		}
	}

	// 終了処理
	audioStream->StopCapture();
	pSpeechRecognizer->SetRecoState( SPRST_INACTIVE );
	CoUninitialize();
	pSensor->NuiShutdown();
	CloseHandle( hSpeechEvent );

	cv::destroyAllWindows();

	return 0;
}
Esempio n. 29
0
void SpeechRecognizer::setEnabled(bool enabled) {
    if (enabled == _enabled || !_comInitialized) {
        return;
    }

    _enabled = enabled;

    if (_enabled) {

        HRESULT hr = S_OK;

        // Set up dedicated recognizer instead of using shared Windows recognizer.
        // - By default, shared recognizer's commands like "move left" override any added here.
        // - Unless do SetGrammarState(SPGS_EXCLUSIVE) on shared recognizer but then non-Interface commands don't work at all.
        // - With dedicated recognizer, user can choose whether to have Windows recognizer running in addition to Interface's.
        if (SUCCEEDED(hr)) {
            hr = CoCreateInstance(CLSID_SpInprocRecognizer, NULL, CLSCTX_ALL, IID_ISpRecognizer, (void**)&_speechRecognizer);
        }
        if (SUCCEEDED(hr)) {
            ISpObjectToken* audioToken;
            ISpObjectTokenCategory* audioTokenCategory;
            hr = CoCreateInstance(CLSID_SpObjectTokenCategory, NULL, CLSCTX_ALL, IID_ISpObjectTokenCategory, 
                (void**)&audioTokenCategory);
            if (SUCCEEDED(hr)) {
                hr = audioTokenCategory->SetId(SPCAT_AUDIOIN, TRUE);
            }
            if (SUCCEEDED(hr)) {
                WCHAR * tokenID;
                hr = audioTokenCategory->GetDefaultTokenId(&tokenID);
                if (SUCCEEDED(hr)) {
                    hr = CoCreateInstance(CLSID_SpObjectToken, NULL, CLSCTX_ALL, IID_ISpObjectToken, (void**)&audioToken);
                    if (SUCCEEDED(hr)) {
                        hr = audioToken->SetId(NULL, tokenID, FALSE);
                    }
                    ::CoTaskMemFree(tokenID);
                }
            }
            if (SUCCEEDED(hr)) {
                hr = static_cast<ISpRecognizer*>(_speechRecognizer)->SetInput(audioToken, TRUE);
            }

        }
        if (SUCCEEDED(hr)) {
            hr = static_cast<ISpRecognizer*>(_speechRecognizer)
                ->CreateRecoContext(reinterpret_cast<ISpRecoContext**>(&_speechRecognizerContext));
            if (FAILED(hr)) {
                static_cast<ISpRecognizer*>(_speechRecognizer)->Release();
            }
        }

        // Set up event notification mechanism.
        if (SUCCEEDED(hr)) {
            hr = static_cast<ISpRecoContext*>(_speechRecognizerContext)->SetNotifyWin32Event();
        }
        if (SUCCEEDED(hr)) {
            _commandRecognizedEvent = static_cast<ISpRecoContext*>(_speechRecognizerContext)->GetNotifyEventHandle();
            if (_commandRecognizedEvent) {
                _commandRecognizedNotifier->setHandle(_commandRecognizedEvent);
                _commandRecognizedNotifier->setEnabled(true);
            } else {
                hr = S_FALSE;
            }
        }
        
        // Set which events to be notified of.
        if (SUCCEEDED(hr)) {
            hr = static_cast<ISpRecoContext*>(_speechRecognizerContext)
                ->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
        }

        // Create grammar and load commands.
        if (SUCCEEDED(hr)) {
            hr = static_cast<ISpRecoContext*>(_speechRecognizerContext)
                ->CreateGrammar(NULL, reinterpret_cast<ISpRecoGrammar**>(&_speechRecognizerGrammar));
        }
        if (SUCCEEDED(hr)) {
            reloadCommands();
        }
        
        _enabled = SUCCEEDED(hr);

        qDebug() << "Speech recognition" << (_enabled ? "enabled" : "enable failed");

    } else {
        _commandRecognizedNotifier->setEnabled(false);
        static_cast<ISpRecoContext*>(_speechRecognizerContext)->Release();
        static_cast<ISpRecognizer*>(_speechRecognizer)->Release();
        qDebug() << "Speech recognition disabled";
    }

    emit enabledUpdated(_enabled);
}
Esempio n. 30
0
//音声認識のためのオブジェクトの構築.
void RSpeechRecognition::Create(const std::string & inDicticationFilterWord , const std::string & inGrammarXML , HWND inWindow , UINT inCallbackMesage )
{
	USES_CONVERSION;

	HRESULT hr;

	this->DicticationFilterWord = inDicticationFilterWord;
	this->CallbackWindowHandle = inWindow;
	this->CallbackWindowMesage = inCallbackMesage;

	//Dictation
	{
		CComPtr<ISpAudio> cpAudio;
		hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->DictationEngine.CoCreateInstance(CLSID_SpInprocRecognizer);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->DictationEngine->CreateRecoContext(&this->DictationRecoCtxt);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->DictationRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->DictationRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		//認識器始動
		hr = this->DictationRecoCtxt->CreateGrammar(0, &this->DictationGrammar);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->DictationGrammar->LoadDictation(NULL, SPLO_STATIC);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->DictationRecoCtxt->SetNotifyWin32Event();
		if(FAILED(hr))	 AfxThrowOleException(hr);
	}
	//ルールベースのエンジンを作る.
	{
		CComPtr<ISpAudio> cpAudio;
		hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->RuleEngine.CoCreateInstance(CLSID_SpInprocRecognizer);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		//オーディオから読み込んでね
		hr = this->RuleEngine->SetInput( cpAudio, TRUE);  
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->RuleEngine->CreateRecoContext(&this->RuleRecoCtxt);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->RuleRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->RuleRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		//認識器始動
		hr = this->RuleRecoCtxt->CreateGrammar(0, &this->RuleGrammar);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->RuleGrammar->LoadDictation(NULL, SPLO_STATIC);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->RuleGrammar->LoadCmdFromFile( A2W( inGrammarXML.c_str() ) ,SPLO_STATIC);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		hr = this->RuleRecoCtxt->SetNotifyCallbackFunction(__callbackRule , (WPARAM)this , 0);
		if(FAILED(hr))	 AfxThrowOleException(hr);

		//録音開始
		hr = this->RuleGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE );
		if(FAILED(hr))	 AfxThrowOleException(hr);
	}
	this->FlagCleanup();
}